_ Git - youtube-dl/blob - youtube_dl/extractor/daum.py

   1 # encoding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 from .common import InfoExtractor
   6 from ..compat import compat_urllib_parse
   7 from ..utils import int_or_none
   8
   9
  10 class DaumIE(InfoExtractor):
  11     _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/v/(?P<id>[^?#&]+)'
  12     IE_NAME = 'daum.net'
  13
  14     _TESTS = [{
  15         'url': 'http://tvpot.daum.net/v/vab4dyeDBysyBssyukBUjBz',
  16         'info_dict': {
  17             'id': 'vab4dyeDBysyBssyukBUjBz',
  18             'ext': 'mp4',
  19             'title': '마크 헌트 vs 안토니오 실바',
  20             'description': 'Mark Hunt vs Antonio Silva',
  21             'upload_date': '20131217',
  22             'duration': 2117,
  23         },
  24     }, {
  25         'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24',
  26         'only_matching': True,
  27     }]
  28
  29     def _real_extract(self, url):
  30         video_id = self._match_id(url)
  31         query = compat_urllib_parse.urlencode({'vid': video_id})
  32         info = self._download_xml(
  33             'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
  34             'Downloading video info')
  35         movie_data = self._download_json(
  36             'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?' + query,
  37             video_id, 'Downloading video formats info')
  38
  39         formats = []
  40         for format_el in movie_data['output_list']['output_list']:
  41             profile = format_el['profile']
  42             format_query = compat_urllib_parse.urlencode({
  43                 'vid': video_id,
  44                 'profile': profile,
  45             })
  46             url_doc = self._download_xml(
  47                 'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
  48                 video_id, note='Downloading video data for %s format' % profile)
  49             format_url = url_doc.find('result/url').text
  50             formats.append({
  51                 'url': format_url,
  52                 'format_id': profile,
  53                 'width': int_or_none(format_el.get('width')),
  54                 'height': int_or_none(format_el.get('height')),
  55                 'filesize': int_or_none(format_el.get('filesize')),
  56             })
  57         self._sort_formats(formats)
  58
  59         return {
  60             'id': video_id,
  61             'title': info.find('TITLE').text,
  62             'formats': formats,
  63             'thumbnail': info.find('THUMB_URL').text,
  64             'description': info.find('CONTENTS').text,
  65             'duration': int_or_none(info.find('DURATION').text),
  66             'upload_date': info.find('REGDTTM').text[:8],
  67         }
  68
  69
  70 class DaumClipIE(InfoExtractor):
  71     _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
  72     IE_NAME = 'daum.net'
  73
  74     _TESTS = [{
  75         'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
  76         'info_dict': {
  77             'id': '52554690',
  78             'ext': 'mp4',
  79             'title': 'DOTA 2GETHER 시즌2 6회 - 2부',
  80             'description': 'DOTA 2GETHER 시즌2 6회 - 2부',
  81             'upload_date': '20130831',
  82             'duration': 3868,
  83             'view_count': int,
  84         },
  85     }]
  86
  87     def _real_extract(self, url):
  88         video_id = self._match_id(url)
  89         clip_info = self._download_json('http://tvpot.daum.net/mypot/json/GetClipInfo.do?clipid=%s' % video_id, video_id)['clip_bean']
  90
  91         return {
  92             '_type': 'url_transparent',
  93             'id': video_id,
  94             'url': 'http://tvpot.daum.net/v/%s' % clip_info['vid'],
  95             'title': clip_info['title'],
  96             'thumbnail': clip_info.get('thumb_url'),
  97             'description': clip_info.get('contents'),
  98             'duration': int_or_none(clip_info.get('duration')),
  99             'upload_date': clip_info.get('up_date')[:8],
 100             'view_count': int_or_none(clip_info.get('play_count')),
 101             'ie_key': 'Daum',
 102         }