[kakao] improve extraction
[youtube-dl] / youtube_dl / extractor / kakao.py
1 # coding: utf-8
2
3 from __future__ import unicode_literals
4
5 from .common import InfoExtractor
6 from ..compat import compat_str
7 from ..utils import (
8     int_or_none,
9     strip_or_none,
10     unified_timestamp,
11     update_url_query,
12 )
13
14
15 class KakaoIE(InfoExtractor):
16     _VALID_URL = r'https?://(?:play-)?tv\.kakao\.com/(?:channel/\d+|embed/player)/cliplink/(?P<id>\d+|[^?#&]+@my)'
17     _API_BASE_TMPL = 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/'
18
19     _TESTS = [{
20         'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083',
21         'md5': '702b2fbdeb51ad82f5c904e8c0766340',
22         'info_dict': {
23             'id': '301965083',
24             'ext': 'mp4',
25             'title': '乃木坂46 バナナマン 「3期生紹介コーナーが始動!顔高低差GPも!」 『乃木坂工事中』',
26             'uploader_id': 2671005,
27             'uploader': '그랑그랑이',
28             'timestamp': 1488160199,
29             'upload_date': '20170227',
30         }
31     }, {
32         'url': 'http://tv.kakao.com/channel/2653210/cliplink/300103180',
33         'md5': 'a8917742069a4dd442516b86e7d66529',
34         'info_dict': {
35             'id': '300103180',
36             'ext': 'mp4',
37             'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
38             'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
39             'uploader_id': 2653210,
40             'uploader': '쇼! 음악중심',
41             'timestamp': 1485684628,
42             'upload_date': '20170129',
43         }
44     }]
45
46     def _real_extract(self, url):
47         video_id = self._match_id(url)
48         display_id = video_id.rstrip('@my')
49         api_base = self._API_BASE_TMPL % video_id
50
51         player_header = {
52             'Referer': update_url_query(
53                 'http://tv.kakao.com/embed/player/cliplink/%s' % video_id, {
54                     'service': 'kakao_tv',
55                     'autoplay': '1',
56                     'profile': 'HIGH',
57                     'wmode': 'transparent',
58                 })
59         }
60
61         query = {
62             'player': 'monet_html5',
63             'referer': url,
64             'uuid': '',
65             'service': 'kakao_tv',
66             'section': '',
67             'dteType': 'PC',
68             'fields': ','.join([
69                 '-*', 'tid', 'clipLink', 'displayTitle', 'clip', 'title',
70                 'description', 'channelId', 'createTime', 'duration', 'playCount',
71                 'likeCount', 'commentCount', 'tagList', 'channel', 'name',
72                 'clipChapterThumbnailList', 'thumbnailUrl', 'timeInSec', 'isDefault'])
73         }
74
75         impress = self._download_json(
76             api_base + 'impress', display_id, 'Downloading video info',
77             query=query, headers=player_header)
78
79         clip_link = impress['clipLink']
80         clip = clip_link['clip']
81
82         title = clip.get('title') or clip_link.get('displayTitle')
83
84         tid = impress.get('tid', '')
85
86         query.update({
87             'fields': '-*,outputList,profile,width,height,label,filesize',
88             'tid': tid,
89             'profile': 'HIGH',
90         })
91         raw = self._download_json(
92             api_base + 'raw', display_id, 'Downloading video formats info',
93             query=query, headers=player_header)
94
95         formats = []
96         for fmt in raw.get('outputList', []):
97             try:
98                 profile_name = fmt['profile']
99                 query.update({
100                     'profile': profile_name,
101                     'fields': '-*,url',
102                 })
103                 fmt_url_json = self._download_json(
104                     api_base + 'raw/videolocation', display_id,
105                     'Downloading video URL for profile %s' % profile_name,
106                     query=query, headers=player_header, fatal=False)
107
108                 if fmt_url_json is None:
109                     continue
110
111                 fmt_url = fmt_url_json['url']
112                 formats.append({
113                     'url': fmt_url,
114                     'format_id': profile_name,
115                     'width': int_or_none(fmt.get('width')),
116                     'height': int_or_none(fmt.get('height')),
117                     'format_note': fmt.get('label'),
118                     'filesize': int_or_none(fmt.get('filesize'))
119                 })
120             except KeyError:
121                 pass
122         self._sort_formats(formats)
123
124         thumbs = []
125         for thumb in clip.get('clipChapterThumbnailList', []):
126             thumbs.append({
127                 'url': thumb.get('thumbnailUrl'),
128                 'id': compat_str(thumb.get('timeInSec')),
129                 'preference': -1 if thumb.get('isDefault') else 0
130             })
131         top_thumbnail = clip.get('thumbnailUrl')
132         if top_thumbnail:
133             thumbs.append({
134                 'url': top_thumbnail,
135                 'preference': 10,
136             })
137
138         return {
139             'id': display_id,
140             'title': title,
141             'description': strip_or_none(clip.get('description')),
142             'uploader': clip_link.get('channel', {}).get('name'),
143             'uploader_id': clip_link.get('channelId'),
144             'thumbnails': thumbs,
145             'timestamp': unified_timestamp(clip_link.get('createTime')),
146             'duration': int_or_none(clip.get('duration')),
147             'view_count': int_or_none(clip.get('playCount')),
148             'like_count': int_or_none(clip.get('likeCount')),
149             'comment_count': int_or_none(clip.get('commentCount')),
150             'formats': formats,
151             'tags': clip.get('tagList'),
152         }