class MDRIE(InfoExtractor):
IE_DESC = 'MDR.DE and KiKA'
- _VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z]+(?P<id>\d+)(?:_.+?)?\.html'
+ _VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z]+-?(?P<id>\d+)(?:_.+?)?\.html'
_TESTS = [{
- # MDR regularily deletes its videos
+ # MDR regularly deletes its videos
'url': 'http://www.mdr.de/fakt/video189002.html',
'only_matching': True,
- }, {
+ }, {
# audio
'url': 'http://www.mdr.de/kultur/audio1312272_zc-15948bad_zs-86171fdd.html',
'md5': '64c4ee50f0a791deb9479cd7bbe9d2fa',
}, {
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html',
'only_matching': True,
+ }, {
+ 'url': 'http://www.mdr.de/mediathek/mdr-videos/a/video-1334.html',
+ 'only_matching': True,
}]
def _real_extract(self, url):
webpage = self._download_webpage(url, video_id)
data_url = self._search_regex(
- r'dataURL\s*:\s*(["\'])(?P<url>/.+/(?:video|audio)[0-9]+-avCustom\.xml)\1',
- webpage, 'data url', group='url')
+ r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P<url>\\?/.+/(?:video|audio)-?[0-9]+-avCustom\.xml)\1',
+ webpage, 'data url', default=None, group='url').replace('\/', '/')
doc = self._download_xml(
compat_urlparse.urljoin(url, data_url), video_id)
- title = (xpath_text(doc, './title', 'title', default=None) or
- xpath_text(doc, './broadcast/broadcastName', 'title'))
+ title = xpath_text(doc, ['./title', './broadcast/broadcastName'], 'title', fatal=True)
formats = []
processed_urls = []
vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000)
abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000)
- url_formats = []
-
ext = determine_ext(url_el.text)
if ext == 'm3u8':
url_formats = self._extract_m3u8_formats(
'height': height,
})
- url_formats.append(f)
+ url_formats = [f]
+
+ if not url_formats:
+ continue
if not vbr:
for f in url_formats:
'vcodec': 'none',
})
- if url_formats:
- formats.extend(url_formats)
+ formats.extend(url_formats)
+
self._sort_formats(formats)
description = xpath_text(doc, './broadcast/broadcastDescription', 'description')
timestamp = parse_iso8601(
- xpath_text(doc, './broadcast/broadcastDate', 'timestamp', default=None) or
- xpath_text(doc, './broadcast/broadcastStartDate', 'timestamp', default=None))
+ xpath_text(
+ doc, [
+ './broadcast/broadcastDate',
+ './broadcast/broadcastStartDate',
+ './broadcast/broadcastEndDate'],
+ 'timestamp', default=None))
duration = parse_duration(xpath_text(doc, './duration', 'duration'))
uploader = xpath_text(doc, './rights', 'uploader')