[nzz] Relax kaltura regex
authorAlexander Seiler <seileralex@gmail.com>
Tue, 20 Nov 2018 19:50:40 +0000 (20:50 +0100)
committerSergey M <dstftw@gmail.com>
Tue, 20 Nov 2018 19:50:40 +0000 (02:50 +0700)
youtube_dl/extractor/nzz.py

index 2d352f53f2908e0732d62e42cc09d457bfd5c77e..61ee77adbd3f47123d844c3731b8e8132afd1813 100644 (file)
@@ -11,20 +11,27 @@ from ..utils import (
 
 class NZZIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?nzz\.ch/(?:[^/]+/)*[^/?#]+-ld\.(?P<id>\d+)'
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.nzz.ch/zuerich/gymizyte/gymizyte-schreiben-schueler-heute-noch-diktate-ld.9153',
         'info_dict': {
             'id': '9153',
         },
         'playlist_mincount': 6,
-    }
+    }, {
+        'url': 'https://www.nzz.ch/video/nzz-standpunkte/cvp-auf-der-suche-nach-dem-mass-der-mitte-ld.1368112',
+        'info_dict': {
+            'id': '1368112',
+        },
+        'playlist_count': 1,
+    }]
 
     def _real_extract(self, url):
         page_id = self._match_id(url)
         webpage = self._download_webpage(url, page_id)
 
         entries = []
-        for player_element in re.findall(r'(<[^>]+class="kalturaPlayer"[^>]*>)', webpage):
+        for player_element in re.findall(
+                r'(<[^>]+class="kalturaPlayer[^"]*"[^>]*>)', webpage):
             player_params = extract_attributes(player_element)
             if player_params.get('data-type') not in ('kaltura_singleArticle',):
                 self.report_warning('Unsupported player type')