[tagesschau] Restrict playlist entry regex

author Sergey M․ <dstftw@gmail.com>

Sun, 1 May 2016 01:15:23 +0000 (07:15 +0600)

committer Sergey M․ <dstftw@gmail.com>

Sun, 1 May 2016 01:15:23 +0000 (07:15 +0600)
author Sergey M․ <dstftw@gmail.com>
Sun, 1 May 2016 01:15:23 +0000 (07:15 +0600)
committer Sergey M․ <dstftw@gmail.com>
Sun, 1 May 2016 01:15:23 +0000 (07:15 +0600)
diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py

index 499bd260b0d3999f7e3f369b5ce174a0c226ba33..136e18f96cadf7bd5701e32b0a3bc7c8767e324e 100644 (file)
--- a/youtube_dl/extractor/tagesschau.py
+++ b/youtube_dl/extractor/tagesschau.py
@@ -200,6 +200,10 @@ class TagesschauIE(InfoExtractor):
      }, {
          'url': 'http://www.tagesschau.de/100sekunden/index.html',
          'only_matching': True,
+    }, {
+        # playlist article with collapsing sections
+        'url': 'http://www.tagesschau.de/wirtschaft/faq-freihandelszone-eu-usa-101.html',
+        'only_matching': True,
      }]
  
      @classmethod
@@ -275,7 +279,7 @@ class TagesschauIE(InfoExtractor):
          if webpage_type == 'website':  # Article
              entries = []
              for num, (entry_title, media_kind, download_text) in enumerate(re.findall(
-                    r'(?s)<p[^>]+class="infotext"[^>]*>.*?<strong>(.+?)</strong>.*?</p>.*?%s' % DOWNLOAD_REGEX,
+                    r'(?s)<p[^>]+class="infotext"[^>]*>\s*(?:<a[^>]+>)?\s*<strong>(.+?)</strong>.*?</p>.*?%s' % DOWNLOAD_REGEX,
                      webpage), 1):
                  entries.append({
                      'id': '%s-%d' % (display_id, num),
author	Sergey M․ <dstftw@gmail.com>
	Sun, 1 May 2016 01:15:23 +0000 (07:15 +0600)
committer	Sergey M․ <dstftw@gmail.com>
	Sun, 1 May 2016 01:15:23 +0000 (07:15 +0600)