X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fmdr.py;h=322e5b45a141bacd327288ac58ed658a46e628f7;hb=HEAD;hp=541ddd9099fec7a3997585e478f8bf89a9907aed;hpb=2b1b2d83cacfdce19cae5eea2f9bbfd142efc7f1;p=youtube-dl diff --git a/youtube_dl/extractor/mdr.py b/youtube_dl/extractor/mdr.py index 541ddd909..322e5b45a 100644 --- a/youtube_dl/extractor/mdr.py +++ b/youtube_dl/extractor/mdr.py @@ -14,12 +14,24 @@ from ..utils import ( class MDRIE(InfoExtractor): IE_DESC = 'MDR.DE and KiKA' - _VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z]+(?P\d+)(?:_.+?)?\.html' + _VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P\d+)(?:_.+?)?\.html' _TESTS = [{ - # MDR regularily deletes its videos + # MDR regularly deletes its videos 'url': 'http://www.mdr.de/fakt/video189002.html', 'only_matching': True, + }, { + # audio + 'url': 'http://www.mdr.de/kultur/audio1312272_zc-15948bad_zs-86171fdd.html', + 'md5': '64c4ee50f0a791deb9479cd7bbe9d2fa', + 'info_dict': { + 'id': '1312272', + 'ext': 'mp3', + 'title': 'Feuilleton vom 30. Oktober 2015', + 'duration': 250, + 'uploader': 'MITTELDEUTSCHER RUNDFUNK', + }, + 'skip': '404 not found', }, { 'url': 'http://www.kika.de/baumhaus/videos/video19636.html', 'md5': '4930515e36b06c111213e80d1e4aad0e', @@ -30,6 +42,7 @@ class MDRIE(InfoExtractor): 'duration': 134, 'uploader': 'KIKA', }, + 'skip': '404 not found', }, { 'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html', 'md5': '5fe9c4dd7d71e3b238f04b8fdd588357', @@ -38,17 +51,30 @@ class MDRIE(InfoExtractor): 'ext': 'mp4', 'title': 'Beutolomäus und der geheime Weihnachtswunsch', 'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd', - 'timestamp': 1419047100, - 'upload_date': '20141220', + 'timestamp': 1482541200, + 'upload_date': '20161224', 'duration': 4628, 'uploader': 'KIKA', }, + }, { + # audio with alternative playerURL pattern + 'url': 'http://www.mdr.de/kultur/videos-und-audios/audio-radio/operation-mindfuck-robert-wilson100.html', + 'info_dict': { + 'id': '100', + 'ext': 'mp4', + 'title': 'Feature: Operation Mindfuck - Robert Anton Wilson', + 'duration': 3239, + 'uploader': 'MITTELDEUTSCHER RUNDFUNK', + }, }, { 'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html', 'only_matching': True, }, { 'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html', 'only_matching': True, + }, { + 'url': 'http://www.mdr.de/mediathek/mdr-videos/a/video-1334.html', + 'only_matching': True, }] def _real_extract(self, url): @@ -57,14 +83,13 @@ class MDRIE(InfoExtractor): webpage = self._download_webpage(url, video_id) data_url = self._search_regex( - r'dataURL\s*:\s*(["\'])(?P/.+/(?:video|audio)[0-9]+-avCustom\.xml)\1', - webpage, 'data url', group='url') + r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P.+?-avCustom\.xml)\1', + webpage, 'data url', group='url').replace(r'\/', '/') doc = self._download_xml( compat_urlparse.urljoin(url, data_url), video_id) - title = (xpath_text(doc, './title', 'title', default=None) or - xpath_text(doc, './broadcast/broadcastName', 'title')) + title = xpath_text(doc, ['./title', './broadcast/broadcastName'], 'title', fatal=True) formats = [] processed_urls = [] @@ -86,8 +111,6 @@ class MDRIE(InfoExtractor): vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000) abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000) - url_formats = [] - ext = determine_ext(url_el.text) if ext == 'm3u8': url_formats = self._extract_m3u8_formats( @@ -120,7 +143,10 @@ class MDRIE(InfoExtractor): 'height': height, }) - url_formats.append(f) + url_formats = [f] + + if not url_formats: + continue if not vbr: for f in url_formats: @@ -132,14 +158,18 @@ class MDRIE(InfoExtractor): 'vcodec': 'none', }) - if url_formats: - formats.extend(url_formats) + formats.extend(url_formats) + self._sort_formats(formats) description = xpath_text(doc, './broadcast/broadcastDescription', 'description') timestamp = parse_iso8601( - xpath_text(doc, './broadcast/broadcastDate', 'timestamp', default=None) or - xpath_text(doc, './broadcast/broadcastStartDate', 'timestamp', default=None)) + xpath_text( + doc, [ + './broadcast/broadcastDate', + './broadcast/broadcastStartDate', + './broadcast/broadcastEndDate'], + 'timestamp', default=None)) duration = parse_duration(xpath_text(doc, './duration', 'duration')) uploader = xpath_text(doc, './rights', 'uploader')