class ARDMediathekIE(InfoExtractor):
IE_NAME = 'ARD:mediathek'
- _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
+ _VALID_URL = r'^https?://(?:(?:(?:www|classic)\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
_TESTS = [{
# available till 26.07.2022
# audio
'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
'only_matching': True,
+ }, {
+ 'url': 'https://classic.ardmediathek.de/tv/Panda-Gorilla-Co/Panda-Gorilla-Co-Folge-274/Das-Erste/Video?bcastId=16355486&documentId=58234698',
+ 'only_matching': True,
}]
def _extract_media_info(self, media_info_url, webpage, video_id):
title = self._html_search_regex(
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
r'<meta name="dcterms\.title" content="(.*?)"/>',
- r'<h4 class="headline">(.*?)</h4>'],
+ r'<h4 class="headline">(.*?)</h4>',
+ r'<title[^>]*>(.*?)</title>'],
webpage, 'title')
description = self._html_search_meta(
'dcterms.abstract', webpage, 'description', default=None)
if description is None:
description = self._html_search_meta(
- 'description', webpage, 'meta description')
+ 'description', webpage, 'meta description', default=None)
+ if description is None:
+ description = self._html_search_regex(
+ r'<p\s+class="teasertext">(.+?)</p>',
+ webpage, 'teaser text', default=None)
# Thumbnail is sometimes not present.
# It is in the mobile version, but that seems to use a different URL