[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / kakao.py
1 # coding: utf-8
2
3 from __future__ import unicode_literals
4
5 from .common import InfoExtractor
6 from ..compat import compat_str
7 from ..utils import (
8     int_or_none,
9     strip_or_none,
10     unified_timestamp,
11     update_url_query,
12 )
13
14
15 class KakaoIE(InfoExtractor):
16     _VALID_URL = r'https?://(?:play-)?tv\.kakao\.com/(?:channel/\d+|embed/player)/cliplink/(?P<id>\d+|[^?#&]+@my)'
17     _API_BASE_TMPL = 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/'
18
19     _TESTS = [{
20         'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083',
21         'md5': '702b2fbdeb51ad82f5c904e8c0766340',
22         'info_dict': {
23             'id': '301965083',
24             'ext': 'mp4',
25             'title': '乃木坂46 バナナマン 「3期生紹介コーナーが始動!顔高低差GPも!」 『乃木坂工事中』',
26             'uploader_id': 2671005,
27             'uploader': '그랑그랑이',
28             'timestamp': 1488160199,
29             'upload_date': '20170227',
30         }
31     }, {
32         'url': 'http://tv.kakao.com/channel/2653210/cliplink/300103180',
33         'md5': 'a8917742069a4dd442516b86e7d66529',
34         'info_dict': {
35             'id': '300103180',
36             'ext': 'mp4',
37             'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
38             'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
39             'uploader_id': 2653210,
40             'uploader': '쇼! 음악중심',
41             'timestamp': 1485684628,
42             'upload_date': '20170129',
43         }
44     }]
45
46     def _real_extract(self, url):
47         video_id = self._match_id(url)
48         display_id = video_id.rstrip('@my')
49         api_base = self._API_BASE_TMPL % video_id
50
51         player_header = {
52             'Referer': update_url_query(
53                 'http://tv.kakao.com/embed/player/cliplink/%s' % video_id, {
54                     'service': 'kakao_tv',
55                     'autoplay': '1',
56                     'profile': 'HIGH',
57                     'wmode': 'transparent',
58                 })
59         }
60
61         query = {
62             'player': 'monet_html5',
63             'referer': url,
64             'uuid': '',
65             'service': 'kakao_tv',
66             'section': '',
67             'dteType': 'PC',
68             'fields': ','.join([
69                 '-*', 'tid', 'clipLink', 'displayTitle', 'clip', 'title',
70                 'description', 'channelId', 'createTime', 'duration', 'playCount',
71                 'likeCount', 'commentCount', 'tagList', 'channel', 'name',
72                 'clipChapterThumbnailList', 'thumbnailUrl', 'timeInSec', 'isDefault',
73                 'videoOutputList', 'width', 'height', 'kbps', 'profile', 'label'])
74         }
75
76         impress = self._download_json(
77             api_base + 'impress', display_id, 'Downloading video info',
78             query=query, headers=player_header)
79
80         clip_link = impress['clipLink']
81         clip = clip_link['clip']
82
83         title = clip.get('title') or clip_link.get('displayTitle')
84
85         query['tid'] = impress.get('tid', '')
86
87         formats = []
88         for fmt in clip.get('videoOutputList', []):
89             try:
90                 profile_name = fmt['profile']
91                 if profile_name == 'AUDIO':
92                     continue
93                 query.update({
94                     'profile': profile_name,
95                     'fields': '-*,url',
96                 })
97                 fmt_url_json = self._download_json(
98                     api_base + 'raw/videolocation', display_id,
99                     'Downloading video URL for profile %s' % profile_name,
100                     query=query, headers=player_header, fatal=False)
101
102                 if fmt_url_json is None:
103                     continue
104
105                 fmt_url = fmt_url_json['url']
106                 formats.append({
107                     'url': fmt_url,
108                     'format_id': profile_name,
109                     'width': int_or_none(fmt.get('width')),
110                     'height': int_or_none(fmt.get('height')),
111                     'format_note': fmt.get('label'),
112                     'filesize': int_or_none(fmt.get('filesize')),
113                     'tbr': int_or_none(fmt.get('kbps')),
114                 })
115             except KeyError:
116                 pass
117         self._sort_formats(formats)
118
119         thumbs = []
120         for thumb in clip.get('clipChapterThumbnailList', []):
121             thumbs.append({
122                 'url': thumb.get('thumbnailUrl'),
123                 'id': compat_str(thumb.get('timeInSec')),
124                 'preference': -1 if thumb.get('isDefault') else 0
125             })
126         top_thumbnail = clip.get('thumbnailUrl')
127         if top_thumbnail:
128             thumbs.append({
129                 'url': top_thumbnail,
130                 'preference': 10,
131             })
132
133         return {
134             'id': display_id,
135             'title': title,
136             'description': strip_or_none(clip.get('description')),
137             'uploader': clip_link.get('channel', {}).get('name'),
138             'uploader_id': clip_link.get('channelId'),
139             'thumbnails': thumbs,
140             'timestamp': unified_timestamp(clip_link.get('createTime')),
141             'duration': int_or_none(clip.get('duration')),
142             'view_count': int_or_none(clip.get('playCount')),
143             'like_count': int_or_none(clip.get('likeCount')),
144             'comment_count': int_or_none(clip.get('commentCount')),
145             'formats': formats,
146             'tags': clip.get('tagList'),
147         }