Merge branch 'akamai_pv' of https://github.com/remitamine/youtube-dl into remitamine...
[youtube-dl] / youtube_dl / extractor / mgtv.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import int_or_none
6
7
8 class MGTVIE(InfoExtractor):
9     _VALID_URL = r'https?://www\.mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html'
10     IE_DESC = '芒果TV'
11
12     _TEST = {
13         'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
14         'md5': '',
15         'info_dict': {
16             'id': '3116640',
17             'ext': 'mp4',
18             'title': '我是歌手第四季双年巅峰会:韩红李玟“双王”领军对抗',
19             'description': '我是歌手第四季双年巅峰会',
20             'duration': 7461,
21             'thumbnail': 're:^https?://.*\.jpg$',
22         },
23         'params': {
24             'skip_download': True,  # m3u8 download
25         },
26     }
27
28     _FORMAT_MAP = {
29         '标清': ('Standard', 0),
30         '高清': ('High', 1),
31         '超清': ('SuperHigh', 2),
32     }
33
34     def _real_extract(self, url):
35         video_id = self._match_id(url)
36         api_data = self._download_json(
37             'http://v.api.mgtv.com/player/video', video_id,
38             query={'video_id': video_id})['data']
39         info = api_data['info']
40
41         formats = []
42         for idx, stream in enumerate(api_data['stream']):
43             format_name = stream.get('name')
44             format_id, preference = self._FORMAT_MAP.get(format_name, (None, None))
45             format_info = self._download_json(
46                 stream['url'], video_id,
47                 note='Download video info for format %s' % format_id or '#%d' % idx)
48             formats.append({
49                 'format_id': format_id,
50                 'url': format_info['info'],
51                 'ext': 'mp4',  # These are m3u8 playlists
52                 'preference': preference,
53             })
54         self._sort_formats(formats)
55
56         return {
57             'id': video_id,
58             'title': info['title'].strip(),
59             'formats': formats,
60             'description': info.get('desc'),
61             'duration': int_or_none(info.get('duration')),
62             'thumbnail': info.get('thumb'),
63         }