X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fard.py;h=fd45b3e42b374ec6a7077454d238932c66d16d46;hb=73c4ac2c95caf535c9177d930921ae42816d669f;hp=fdb94af6b3ad1747f6e8e155e8d8ece8aca739ae;hpb=86b4e98ac6f57c3703cd2082a69351ece797111c;p=youtube-dl diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index fdb94af6b..fd45b3e42 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -8,14 +8,13 @@ from .generic import GenericIE from ..utils import ( determine_ext, ExtractorError, - get_element_by_attribute, qualities, int_or_none, parse_duration, unified_strdate, xpath_text, - parse_xml, ) +from ..compat import compat_etree_fromstring class ARDMediathekIE(InfoExtractor): @@ -23,17 +22,28 @@ class ARDMediathekIE(InfoExtractor): _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?' _TESTS = [{ - 'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht', - 'only_matching': True, + 'url': 'http://www.ardmediathek.de/tv/Dokumentation-und-Reportage/Ich-liebe-das-Leben-trotzdem/rbb-Fernsehen/Video?documentId=29582122&bcastId=3822114', + 'info_dict': { + 'id': '29582122', + 'ext': 'mp4', + 'title': 'Ich liebe das Leben trotzdem', + 'description': 'md5:45e4c225c72b27993314b31a84a5261c', + 'duration': 4557, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { - 'url': 'http://www.ardmediathek.de/tv/Tatort/Das-Wunder-von-Wolbeck-Video-tgl-ab-20/Das-Erste/Video?documentId=22490580&bcastId=602916', + 'url': 'http://www.ardmediathek.de/tv/Tatort/Tatort-Scheinwelten-H%C3%B6rfassung-Video/Das-Erste/Video?documentId=29522730&bcastId=602916', + 'md5': 'f4d98b10759ac06c0072bbcd1f0b9e3e', 'info_dict': { - 'id': '22490580', + 'id': '29522730', 'ext': 'mp4', - 'title': 'Das Wunder von Wolbeck (Video tgl. ab 20 Uhr)', - 'description': 'Auf einem restaurierten Hof bei Wolbeck wird der Heilpraktiker Raffael Lembeck eines morgens von seiner Frau Stella tot aufgefunden. Das Opfer war offensichtlich in seiner Praxis zu Fall gekommen und ist dann verblutet, erklärt Prof. Boerne am Tatort.', + 'title': 'Tatort: Scheinwelten - Hörfassung (Video tgl. ab 20 Uhr)', + 'description': 'md5:196392e79876d0ac94c94e8cdb2875f1', + 'duration': 5252, }, - 'skip': 'Blocked outside of Germany', }, { # audio 'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086', @@ -45,6 +55,9 @@ class ARDMediathekIE(InfoExtractor): 'description': 'md5:f6e39f3461f0e1f54bfa48c8875c86ef', 'duration': 3240, }, + }, { + 'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht', + 'only_matching': True, }] def _extract_media_info(self, media_info_url, webpage, video_id): @@ -69,7 +82,7 @@ class ARDMediathekIE(InfoExtractor): subtitle_url = media_info.get('_subtitleUrl') if subtitle_url: subtitles['de'] = [{ - 'ext': 'srt', + 'ext': 'ttml', 'url': subtitle_url, }] @@ -96,13 +109,15 @@ class ARDMediathekIE(InfoExtractor): server = stream.get('_server') for stream_url in stream_urls: ext = determine_ext(stream_url) + if quality != 'auto' and ext in ('f4m', 'm3u8'): + continue if ext == 'f4m': formats.extend(self._extract_f4m_formats( stream_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', - video_id, preference=-1, f4m_id='hds')) + video_id, preference=-1, f4m_id='hds', fatal=False)) elif ext == 'm3u8': formats.extend(self._extract_m3u8_formats( - stream_url, video_id, 'mp4', preference=1, m3u8_id='hls')) + stream_url, video_id, 'mp4', preference=1, m3u8_id='hls', fatal=False)) else: if server and server.startswith('rtmp'): f = { @@ -147,7 +162,7 @@ class ARDMediathekIE(InfoExtractor): raise ExtractorError('This program is only suitable for those aged 12 and older. Video %s is therefore only available between 20 pm and 6 am.' % video_id, expected=True) if re.search(r'[\?&]rss($|[=&])', url): - doc = parse_xml(webpage) + doc = compat_etree_fromstring(webpage.encode('utf-8')) if doc.tag == 'rss': return GenericIE()._extract_rss(url, video_id, doc) @@ -258,41 +273,3 @@ class ARDIE(InfoExtractor): 'upload_date': upload_date, 'thumbnail': thumbnail, } - - -class SportschauIE(ARDMediathekIE): - IE_NAME = 'Sportschau' - _VALID_URL = r'(?Phttps?://(?:www\.)?sportschau\.de/(?:[^/]+/)+video(?P[^/#?]+))\.html' - _TESTS = [{ - 'url': 'http://www.sportschau.de/tourdefrance/videoseppeltkokainhatnichtsmitklassischemdopingzutun100.html', - 'info_dict': { - 'id': 'seppeltkokainhatnichtsmitklassischemdopingzutun100', - 'ext': 'mp4', - 'title': 'Seppelt: "Kokain hat nichts mit klassischem Doping zu tun"', - 'thumbnail': 're:^https?://.*\.jpg$', - 'description': 'Der ARD-Doping Experte Hajo Seppelt gibt seine Einschätzung zum ersten Dopingfall der diesjährigen Tour de France um den Italiener Luca Paolini ab.', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - base_url = mobj.group('baseurl') - - webpage = self._download_webpage(url, video_id) - title = get_element_by_attribute('class', 'headline', webpage) - description = self._html_search_meta('description', webpage, 'description') - - info = self._extract_media_info( - base_url + '-mc_defaultQuality-h.json', webpage, video_id) - - info.update({ - 'title': title, - 'description': description, - }) - - return info