From: Sergey M․ Date: Sun, 1 May 2016 01:15:23 +0000 (+0600) Subject: [tagesschau] Restrict playlist entry regex X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=68bb2fef9565159eba4a47f464b6b420cf2d5cda;p=youtube-dl [tagesschau] Restrict playlist entry regex --- diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py index 499bd260b..136e18f96 100644 --- a/youtube_dl/extractor/tagesschau.py +++ b/youtube_dl/extractor/tagesschau.py @@ -200,6 +200,10 @@ class TagesschauIE(InfoExtractor): }, { 'url': 'http://www.tagesschau.de/100sekunden/index.html', 'only_matching': True, + }, { + # playlist article with collapsing sections + 'url': 'http://www.tagesschau.de/wirtschaft/faq-freihandelszone-eu-usa-101.html', + 'only_matching': True, }] @classmethod @@ -275,7 +279,7 @@ class TagesschauIE(InfoExtractor): if webpage_type == 'website': # Article entries = [] for num, (entry_title, media_kind, download_text) in enumerate(re.findall( - r'(?s)]+class="infotext"[^>]*>.*?(.+?).*?

.*?%s' % DOWNLOAD_REGEX, + r'(?s)]+class="infotext"[^>]*>\s*(?:]+>)?\s*(.+?).*?

.*?%s' % DOWNLOAD_REGEX, webpage), 1): entries.append({ 'id': '%s-%d' % (display_id, num),