Merge remote-tracking branch 'Boris-de/wdrmaus_fix#8562'
[youtube-dl] / youtube_dl / extractor / vlive.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8     dict_get,
9     ExtractorError,
10     float_or_none,
11     int_or_none,
12 )
13 from ..compat import compat_urllib_parse_urlencode
14
15
16 class VLiveIE(InfoExtractor):
17     IE_NAME = 'vlive'
18     _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
19     _TEST = {
20         'url': 'http://www.vlive.tv/video/1326',
21         'md5': 'cc7314812855ce56de70a06a27314983',
22         'info_dict': {
23             'id': '1326',
24             'ext': 'mp4',
25             'title': "[V LIVE] Girl's Day's Broadcast",
26             'creator': "Girl's Day",
27             'view_count': int,
28         },
29     }
30
31     def _real_extract(self, url):
32         video_id = self._match_id(url)
33
34         webpage = self._download_webpage(
35             'http://www.vlive.tv/video/%s' % video_id, video_id)
36
37         video_params = self._search_regex(
38             r'\bvlive\.video\.init\(([^)]+)\)',
39             webpage, 'video params')
40         status, _, _, live_params, long_video_id, key = re.split(
41             r'"\s*,\s*"', video_params)[2:8]
42
43         if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR':
44             live_params = self._parse_json('"%s"' % live_params, video_id)
45             live_params = self._parse_json(live_params, video_id)
46             return self._live(video_id, webpage, live_params)
47         elif status == 'VOD_ON_AIR' or status == 'BIG_EVENT_INTRO':
48             if long_video_id and key:
49                 return self._replay(video_id, webpage, long_video_id, key)
50             else:
51                 status = 'COMING_SOON'
52
53         if status == 'LIVE_END':
54             raise ExtractorError('Uploading for replay. Please wait...',
55                                  expected=True)
56         elif status == 'COMING_SOON':
57             raise ExtractorError('Coming soon!', expected=True)
58         elif status == 'CANCELED':
59             raise ExtractorError('We are sorry, '
60                                  'but the live broadcast has been canceled.',
61                                  expected=True)
62         else:
63             raise ExtractorError('Unknown status %s' % status)
64
65     def _get_common_fields(self, webpage):
66         title = self._og_search_title(webpage)
67         creator = self._html_search_regex(
68             r'<div[^>]+class="info_area"[^>]*>\s*<a\s+[^>]*>([^<]+)',
69             webpage, 'creator', fatal=False)
70         thumbnail = self._og_search_thumbnail(webpage)
71         return {
72             'title': title,
73             'creator': creator,
74             'thumbnail': thumbnail,
75         }
76
77     def _live(self, video_id, webpage, live_params):
78         formats = []
79         for vid in live_params.get('resolutions', []):
80             formats.extend(self._extract_m3u8_formats(
81                 vid['cdnUrl'], video_id, 'mp4',
82                 m3u8_id=vid.get('name'),
83                 fatal=False, live=True))
84         self._sort_formats(formats)
85
86         return dict(self._get_common_fields(webpage),
87                     id=video_id,
88                     formats=formats,
89                     is_live=True)
90
91     def _replay(self, video_id, webpage, long_video_id, key):
92         playinfo = self._download_json(
93             'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?%s'
94             % compat_urllib_parse_urlencode({
95                 'videoId': long_video_id,
96                 'key': key,
97                 'ptc': 'http',
98                 'doct': 'json',  # document type (xml or json)
99                 'cpt': 'vtt',  # captions type (vtt or ttml)
100             }), video_id)
101
102         formats = [{
103             'url': vid['source'],
104             'format_id': vid.get('encodingOption', {}).get('name'),
105             'abr': float_or_none(vid.get('bitrate', {}).get('audio')),
106             'vbr': float_or_none(vid.get('bitrate', {}).get('video')),
107             'width': int_or_none(vid.get('encodingOption', {}).get('width')),
108             'height': int_or_none(vid.get('encodingOption', {}).get('height')),
109             'filesize': int_or_none(vid.get('size')),
110         } for vid in playinfo.get('videos', {}).get('list', []) if vid.get('source')]
111         self._sort_formats(formats)
112
113         view_count = int_or_none(playinfo.get('meta', {}).get('count'))
114
115         subtitles = {}
116         for caption in playinfo.get('captions', {}).get('list', []):
117             lang = dict_get(caption, ('language', 'locale', 'country', 'label'))
118             if lang and caption.get('source'):
119                 subtitles[lang] = [{
120                     'ext': 'vtt',
121                     'url': caption['source']}]
122
123         return dict(self._get_common_fields(webpage),
124                     id=video_id,
125                     formats=formats,
126                     view_count=view_count,
127                     subtitles=subtitles)