[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / melonvod.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6     int_or_none,
7     urljoin,
8 )
9
10
11 class MelonVODIE(InfoExtractor):
12     _VALID_URL = r'https?://vod\.melon\.com/video/detail2\.html?\?.*?mvId=(?P<id>[0-9]+)'
13     _TEST = {
14         'url': 'http://vod.melon.com/video/detail2.htm?mvId=50158734',
15         'info_dict': {
16             'id': '50158734',
17             'ext': 'mp4',
18             'title': "Jessica 'Wonderland' MV Making Film",
19             'thumbnail': r're:^https?://.*\.jpg$',
20             'artist': 'Jessica (์ œ์‹œ์นด)',
21             'upload_date': '20161212',
22             'duration': 203,
23         },
24         'params': {
25             'skip_download': 'm3u8 download',
26         }
27     }
28
29     def _real_extract(self, url):
30         video_id = self._match_id(url)
31
32         play_info = self._download_json(
33             'http://vod.melon.com/video/playerInfo.json', video_id,
34             note='Downloading player info JSON', query={'mvId': video_id})
35
36         title = play_info['mvInfo']['MVTITLE']
37
38         info = self._download_json(
39             'http://vod.melon.com/delivery/streamingInfo.json', video_id,
40             note='Downloading streaming info JSON',
41             query={
42                 'contsId': video_id,
43                 'contsType': 'VIDEO',
44             })
45
46         stream_info = info['streamingInfo']
47
48         formats = self._extract_m3u8_formats(
49             stream_info['encUrl'], video_id, 'mp4', m3u8_id='hls')
50         self._sort_formats(formats)
51
52         artist_list = play_info.get('artistList')
53         artist = None
54         if isinstance(artist_list, list):
55             artist = ', '.join(
56                 [a['ARTISTNAMEWEBLIST']
57                  for a in artist_list if a.get('ARTISTNAMEWEBLIST')])
58
59         thumbnail = urljoin(info.get('staticDomain'), stream_info.get('imgPath'))
60
61         duration = int_or_none(stream_info.get('playTime'))
62         upload_date = stream_info.get('mvSvcOpenDt', '')[:8] or None
63
64         return {
65             'id': video_id,
66             'title': title,
67             'artist': artist,
68             'thumbnail': thumbnail,
69             'upload_date': upload_date,
70             'duration': duration,
71             'formats': formats
72         }