X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fdreisat.py;h=908c9e514c41ea72bac0e6f6ede41def4ba0b20b;hb=e2628fb6a028bd48f39ee556ca5ecb07aceba7f5;hp=765cb1f377df132ee91deac5872877777187cd6a;hpb=85748629912aff950f8945b273e9809fc8991cfe;p=youtube-dl diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index 765cb1f37..908c9e514 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -1,85 +1,38 @@ -# coding: utf-8 +from __future__ import unicode_literals import re -import xml.etree.ElementTree -from .common import InfoExtractor -from ..utils import ( - determine_ext, - unified_strdate, -) +from .zdf import ZDFIE -class DreiSatIE(InfoExtractor): +class DreiSatIE(ZDFIE): IE_NAME = '3sat' - _VALID_URL = r'(?:http://)?(?:www\.)?3sat.de/mediathek/index.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P[0-9]+)$' - _TEST = { - u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983", - u'file': u'36983.webm', - u'md5': u'57c97d0469d71cf874f6815aa2b7c944', - u'info_dict': { - u"title": u"Kaffeeland Schweiz", - u"description": u"Über 80 Kaffeeröstereien liefern in der Schweiz das Getränk, in das das Land so vernarrt ist: Mehr als 1000 Tassen trinkt ein Schweizer pro Jahr. SCHWEIZWEIT nimmt die Kaffeekultur unter die...", - u"uploader": u"3sat", - u"upload_date": u"20130622" - } - } - + _VALID_URL = r'(?:https?://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P[0-9]+)$' + _TESTS = [ + { + 'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918', + 'md5': 'be37228896d30a88f315b638900a026e', + 'info_dict': { + 'id': '45918', + 'ext': 'mp4', + 'title': 'Waidmannsheil', + 'description': 'md5:cce00ca1d70e21425e72c86a98a56817', + 'uploader': 'SCHWEIZWEIT', + 'uploader_id': '100000210', + 'upload_date': '20140913' + }, + 'params': { + 'skip_download': True, # m3u8 downloads + } + }, + { + 'url': 'http://www.3sat.de/mediathek/mediathek.php?mode=play&obj=51066', + 'only_matching': True, + }, + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id - details_xml = self._download_webpage(details_url, video_id, note=u'Downloading video details') - details_doc = xml.etree.ElementTree.fromstring(details_xml.encode('utf-8')) - - thumbnail_els = details_doc.findall('.//teaserimage') - thumbnails = [{ - 'width': te.attrib['key'].partition('x')[0], - 'height': te.attrib['key'].partition('x')[2], - 'url': te.text, - } for te in thumbnail_els] - - information_el = details_doc.find('.//information') - video_title = information_el.find('./title').text - video_description = information_el.find('./detail').text - - details_el = details_doc.find('.//details') - video_uploader = details_el.find('./channel').text - upload_date = unified_strdate(details_el.find('./airtime').text) - - format_els = details_doc.findall('.//formitaet') - formats = [{ - 'format_id': fe.attrib['basetype'], - 'width': int(fe.find('./width').text), - 'height': int(fe.find('./height').text), - 'url': fe.find('./url').text, - 'ext': determine_ext(fe.find('./url').text), - 'filesize': int(fe.find('./filesize').text), - 'video_bitrate': int(fe.find('./videoBitrate').text), - '3sat_qualityname': fe.find('./quality').text, - } for fe in format_els - if not fe.find('./url').text.startswith('http://www.metafilegenerator.de/')] - - def _sortkey(format): - qidx = ['low', 'med', 'high', 'veryhigh'].index(format['3sat_qualityname']) - prefer_http = 1 if 'rtmp' in format['url'] else 0 - return (qidx, prefer_http, format['video_bitrate']) - formats.sort(key=_sortkey) - - info = { - '_type': 'video', - 'id': video_id, - 'title': video_title, - 'formats': formats, - 'description': video_description, - 'thumbnails': thumbnails, - 'thumbnail': thumbnails[-1]['url'], - 'uploader': video_uploader, - 'upload_date': upload_date, - } - - # TODO: Remove when #980 has been merged - info.update(formats[-1]) - - return info + return self.extract_from_xml_url(video_id, details_url)