1 from __future__ import unicode_literals
5 from .common import InfoExtractor
8 class MDRIE(InfoExtractor):
9 _VALID_URL = r'^(?P<domain>https?://(?:www\.)?mdr\.de)/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)(?:_|\.html)'
11 # No tests, MDR regularily deletes its videos
13 'url': 'http://www.mdr.de/fakt/video189002.html',
14 'only_matching': True,
17 def _real_extract(self, url):
18 m = re.match(self._VALID_URL, url)
19 video_id = m.group('video_id')
20 domain = m.group('domain')
22 # determine title and media streams from webpage
23 html = self._download_webpage(url, video_id)
25 title = self._html_search_regex(r'<h[12]>(.*?)</h[12]>', html, 'title')
26 xmlurl = self._search_regex(
27 r'dataURL:\'(/(?:.+)/(?:video|audio)[0-9]+-avCustom.xml)', html, 'XML URL')
29 doc = self._download_xml(domain + xmlurl, video_id)
31 for a in doc.findall('./assets/asset'):
32 url_el = a.find('./progressiveDownloadUrl')
35 abr = int(a.find('bitrateAudio').text) // 1000
36 media_type = a.find('mediaType').text
39 'filesize': int(a.find('fileSize').text),
43 vbr_el = a.find('bitrateVideo')
47 'format_id': '%s-%d' % (media_type, abr),
50 vbr = int(vbr_el.text) // 1000
53 'width': int(a.find('frameWidth').text),
54 'height': int(a.find('frameHeight').text),
55 'format_id': '%s-%d' % (media_type, vbr),
57 formats.append(format)
58 self._sort_formats(formats)