[daum] improve info extraction
[youtube-dl] / youtube_dl / extractor / daum.py
1 # encoding: utf-8
2
3 from __future__ import unicode_literals
4
5 from .common import InfoExtractor
6 from ..compat import compat_urllib_parse
7 from ..utils import int_or_none
8
9
10 class DaumIE(InfoExtractor):
11     _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/v/(?P<id>[^?#&]+)'
12     IE_NAME = 'daum.net'
13
14     _TESTS = [{
15         'url': 'http://tvpot.daum.net/v/vab4dyeDBysyBssyukBUjBz',
16         'info_dict': {
17             'id': 'vab4dyeDBysyBssyukBUjBz',
18             'ext': 'mp4',
19             'title': '마크 헌트 vs 안토니오 실바',
20             'description': 'Mark Hunt vs Antonio Silva',
21             'upload_date': '20131217',
22             'duration': 2117,
23         },
24     }, {
25         'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24',
26         'only_matching': True,
27     }]
28
29     def _real_extract(self, url):
30         video_id = self._match_id(url)
31         query = compat_urllib_parse.urlencode({'vid': video_id})
32         info = self._download_xml(
33             'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
34             'Downloading video info')
35         movie_data = self._download_json(
36             'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?' + query,
37             video_id, 'Downloading video formats info')
38
39         formats = []
40         for format_el in movie_data['output_list']['output_list']:
41             profile = format_el['profile']
42             format_query = compat_urllib_parse.urlencode({
43                 'vid': video_id,
44                 'profile': profile,
45             })
46             url_doc = self._download_xml(
47                 'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
48                 video_id, note='Downloading video data for %s format' % profile)
49             format_url = url_doc.find('result/url').text
50             formats.append({
51                 'url': format_url,
52                 'format_id': profile,
53                 'width': int_or_none(format_el.get('width')),
54                 'height': int_or_none(format_el.get('height')),
55                 'filesize': int_or_none(format_el.get('filesize')),
56             })
57         self._sort_formats(formats)
58
59         return {
60             'id': video_id,
61             'title': info.find('TITLE').text,
62             'formats': formats,
63             'thumbnail': info.find('THUMB_URL').text,
64             'description': info.find('CONTENTS').text,
65             'duration': int_or_none(info.find('DURATION').text),
66             'upload_date': info.find('REGDTTM').text[:8],
67         }
68
69
70 class DaumClipIE(InfoExtractor):
71     _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
72     IE_NAME = 'daum.net'
73
74     _TESTS = [{
75         'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
76         'info_dict': {
77             'id': '52554690',
78             'ext': 'mp4',
79             'title': 'DOTA 2GETHER 시즌2 6회 - 2부',
80             'description': 'DOTA 2GETHER 시즌2 6회 - 2부',
81             'upload_date': '20130831',
82             'duration': 3868,
83             'view_count': int,
84         },
85     }]
86
87     def _real_extract(self, url):
88         video_id = self._match_id(url)
89         clip_info = self._download_json('http://tvpot.daum.net/mypot/json/GetClipInfo.do?clipid=%s' % video_id, video_id)['clip_bean']
90
91         return {
92             '_type': 'url_transparent',
93             'id': video_id,
94             'url': 'http://tvpot.daum.net/v/%s' % clip_info['vid'],
95             'title': clip_info['title'],
96             'thumbnail': clip_info.get('thumb_url'),
97             'description': clip_info.get('contents'),
98             'duration': int_or_none(clip_info.get('duration')),
99             'upload_date': clip_info.get('up_date')[:8],
100             'view_count': int_or_none(clip_info.get('play_count')),
101             'ie_key': 'Daum',
102         }