class KikaIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?kika\.de/(?:[a-z-]+/)*(?:video|sendung)(?P<id>\d+).*'
+ _VALID_URL = r'https?://(?:www\.)?kika\.de/(?:[a-z-]+/)*(?:video|(?:einzel)?sendung)(?P<id>\d+).*'
_TESTS = [
{
- 'url': 'http://www.kika.de/baumhaus/videos/video9572.html',
- 'md5': '94fc748cf5d64916571d275a07ffe2d5',
+ 'url': 'http://www.kika.de/baumhaus/videos/video19636.html',
+ 'md5': '4930515e36b06c111213e80d1e4aad0e',
'info_dict': {
- 'id': '9572',
+ 'id': '19636',
'ext': 'mp4',
- 'title': 'Baumhaus vom 29. Oktober 2014',
- 'description': None
- }
+ 'title': 'Baumhaus vom 30. Oktober 2015',
+ 'description': None,
+ },
},
{
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html',
'id': '8182',
'ext': 'mp4',
'title': 'Beutolomäus und der geheime Weihnachtswunsch',
- 'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd'
- }
+ 'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
+ },
},
{
- 'url': 'http://www.kika.de/videos/allevideos/video9572_zc-32ca94ad_zs-3f535991.html',
- 'md5': '94fc748cf5d64916571d275a07ffe2d5',
+ 'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
+ 'md5': '4930515e36b06c111213e80d1e4aad0e',
'info_dict': {
- 'id': '9572',
+ 'id': '19636',
'ext': 'mp4',
- 'title': 'Baumhaus vom 29. Oktober 2014',
- 'description': None
- }
+ 'title': 'Baumhaus vom 30. Oktober 2015',
+ 'description': None,
+ },
},
{
- 'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/sendung81244_zc-81d703f8_zs-f82d5e31.html',
+ 'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html',
'md5': '5fe9c4dd7d71e3b238f04b8fdd588357',
'info_dict': {
'id': '8182',
'ext': 'mp4',
'title': 'Beutolomäus und der geheime Weihnachtswunsch',
- 'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd'
- }
- }
+ 'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
+ },
+ },
]
def _real_extract(self, url):
xml_re = r'sectionArticle[ "](?:(?!sectionA[ "])(?:.|\n))*?dataURL:\'(?:/[a-z-]+?)*?/video(\d+)-avCustom\.xml'
video_id = self._search_regex(xml_re, webpage, "xml_url", default=None)
if not video_id:
- # Video is not available online
err_msg = 'Video %s is not available online' % broadcast_id
raise ExtractorError(err_msg, expected=True)
broadcast_elem = xml_tree.find('broadcast')
description = broadcast_elem.find('broadcastDescription').text
except AttributeError:
- # No description available
description = None
# duration string format is mm:ss (even if it is >= 1 hour, e.g. 78:42)
tmp = xml_tree.find('duration').text.split(':')
duration = int(tmp[0]) * 60 + int(tmp[1])
- formats_list = []
- for elem in xml_tree.find('assets'):
- format_dict = {}
- format_dict['url'] = elem.find('progressiveDownloadUrl').text
- format_dict['ext'] = elem.find('mediaType').text.lower()
- format_dict['format'] = elem.find('profileName').text
- width = int(elem.find('frameWidth').text)
- height = int(elem.find('frameHeight').text)
- format_dict['width'] = width
- format_dict['height'] = height
- format_dict['resolution'] = '%dx%d' % (width, height)
- format_dict['abr'] = int(elem.find('bitrateAudio').text)
- format_dict['vbr'] = int(elem.find('bitrateVideo').text)
- format_dict['tbr'] = format_dict['abr'] + format_dict['vbr']
- format_dict['filesize'] = int(elem.find('fileSize').text)
-
- # append resolution and dict for sorting by resolution
- formats_list.append((width * height, format_dict))
-
- # Sort by resolution (=quality)
- formats_list.sort()
-
- out_list = [x[1] for x in formats_list]
+ formats = [{
+ 'url': elem.find('progressiveDownloadUrl').text,
+ 'ext': elem.find('mediaType').text.lower(),
+ 'format': elem.find('profileName').text,
+ 'width': int(elem.find('frameWidth').text),
+ 'height': int(elem.find('frameHeight').text),
+ 'abr': int(elem.find('bitrateAudio').text),
+ 'vbr': int(elem.find('bitrateVideo').text),
+ 'filesize': int(elem.find('fileSize').text),
+ } for elem in xml_tree.find('assets')]
+ self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
- 'formats': out_list,
+ 'formats': formats,
'duration': duration,
- 'webpage_url': webpage_url
+ 'webpage_url': webpage_url,
}