2 from __future__ import unicode_literals
6 from .common import InfoExtractor
7 from ..compat import compat_urllib_parse_unquote_plus
8 from ..utils import int_or_none
11 class KUSIIE(InfoExtractor):
12 _VALID_URL = r'http://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))'
14 'url': 'http://www.kusi.com/story/31183873/turko-files-case-closed-put-on-hold',
15 'md5': 'f926e7684294cf8cb7bdf8858e1b3988',
19 'title': 'Turko Files: Case Closed! & Put On Hold!',
24 def _real_extract(self, url):
25 mobj = re.match(self._VALID_URL, url)
27 if mobj.group('clipId') is not None:
28 video_id = mobj.group('clipId')
30 webpage = self._download_webpage(url, mobj.group('path'))
31 video_id = self._html_search_regex(r'"clipId", "(\d+)"', webpage,
34 xml_url = 'http://www.kusi.com/build.asp?buildtype=buildfeaturexml'\
35 'request&featureType=Clip&featureid={0}&affiliateno=956&'\
36 'clientgroupid=1&rnd=562461'.format(video_id)
37 doc = self._download_xml(xml_url, video_id,
38 note='Downloading video info',
39 errnote='Failed to download video info')
41 video_title = doc.find('HEADLINE').text
42 duration = int_or_none(doc.find('DURATION'), get_attr='text')
43 description = doc.find('ABSTRACT')
45 quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content')
47 for quality in quality_options:
48 if 'height' in quality.attrib:
50 'url': compat_urllib_parse_unquote_plus(quality.attrib['url']),
51 'height': quality.attrib['height'],
53 self._sort_formats(formats)
58 'description': description,