X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fndr.py;h=2447c812e021e73991082aefab4bd98e6dd000a1;hb=a6211d237b4e7051ca018cc09440502561fedaa7;hp=e3cc6fde80ea545a0c98a26854afa13c81cbc9b9;hpb=d5d38d16ae434a65b4335616fc4617904f7ca34a;p=youtube-dl diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index e3cc6fde8..2447c812e 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -7,14 +7,18 @@ from .common import InfoExtractor from ..utils import ( determine_ext, int_or_none, + merge_dicts, parse_iso8601, qualities, + try_get, + urljoin, ) class NDRBaseIE(InfoExtractor): def _real_extract(self, url): - display_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + display_id = next(group for group in mobj.groups() if group) webpage = self._download_webpage(url, display_id) return self._extract_embed(webpage, display_id) @@ -22,7 +26,7 @@ class NDRBaseIE(InfoExtractor): class NDRIE(NDRBaseIE): IE_NAME = 'ndr' IE_DESC = 'NDR.de - Norddeutscher Rundfunk' - _VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)+(?P[^/?#]+),[\da-z]+\.html' + _VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P[^/?#]+),[\da-z]+\.html' _TESTS = [{ # httpVideo, same content id 'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html', @@ -77,31 +81,38 @@ class NDRIE(NDRBaseIE): 'params': { 'skip_download': True, }, + }, { + 'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html', + 'only_matching': True, }] def _extract_embed(self, webpage, display_id): embed_url = self._html_search_meta( - 'embedURL', webpage, 'embed URL', fatal=True) + 'embedURL', webpage, 'embed URL', + default=None) or self._search_regex( + r'\bembedUrl["\']\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, + 'embed URL', group='url') description = self._search_regex( r']+itemprop="description">([^<]+)

', - webpage, 'description', fatal=False) + webpage, 'description', default=None) or self._og_search_description(webpage) timestamp = parse_iso8601( self._search_regex( - r'