Fix some regexes
[youtube-dl] / youtube_dl / extractor / npo.py
index be10fc48613a7646fe13a572565b9b3f0ff7c013..b8fe244071d05e1daac7514b932be148802c21a7 100644 (file)
@@ -28,17 +28,17 @@ class NPOBaseIE(InfoExtractor):
 
 class NPOIE(NPOBaseIE):
     IE_NAME = 'npo'
-    IE_DESC = 'npo.nl and ntr.nl'
+    IE_DESC = 'npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl'
     _VALID_URL = r'''(?x)
                     (?:
                         npo:|
                         https?://
                             (?:www\.)?
                             (?:
-                                npo\.nl/(?!live|radio)(?:[^/]+/){2}|
+                                npo\.nl/(?!(?:live|radio)/)(?:[^/]+/){2}|
                                 ntr\.nl/(?:[^/]+/){2,}|
                                 omroepwnl\.nl/video/fragment/[^/]+__|
-                                zapp\.nl/[^/]+/[^/]+/
+                                (?:zapp|npo3)\.nl/(?:[^/]+/){2}
                             )
                         )
                         (?P<id>[^/?#]+)
@@ -125,6 +125,18 @@ class NPOIE(NPOBaseIE):
         'params': {
             'skip_download': True,
         }
+    }, {
+        # audio
+        'url': 'http://www.npo.nl/jouw-stad-rotterdam/29-01-2017/RBX_FUNX_6683215/RBX_FUNX_7601437',
+        'info_dict': {
+            'id': 'RBX_FUNX_6683215',
+            'ext': 'mp3',
+            'title': 'Jouw Stad Rotterdam',
+            'description': 'md5:db251505244f097717ec59fabc372d9f',
+        },
+        'params': {
+            'skip_download': True,
+        }
     }, {
         'url': 'http://www.zapp.nl/de-bzt-show/gemist/KN_1687547',
         'only_matching': True,
@@ -134,10 +146,16 @@ class NPOIE(NPOBaseIE):
     }, {
         'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990',
         'only_matching': True,
+    }, {
+        'url': 'https://www.npo3.nl/3onderzoekt/16-09-2015/VPWON_1239870',
+        'only_matching': True,
     }, {
         # live stream
         'url': 'npo:LI_NL1_4188102',
         'only_matching': True,
+    }, {
+        'url': 'http://www.npo.nl/radio-gaga/13-06-2017/BNN_101383373',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -193,7 +211,7 @@ class NPOIE(NPOBaseIE):
                 })
 
             # Example: http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706
-            if item.get('contentType') == 'url':
+            if item.get('contentType') in ('url', 'audio'):
                 add_format_url(item_url)
                 continue
 
@@ -201,7 +219,7 @@ class NPOIE(NPOBaseIE):
                 stream_info = self._download_json(
                     item_url + '&type=json', video_id,
                     'Downloading %s stream JSON'
-                    % item.get('label') or format_id or num)
+                    % item.get('label') or item.get('format') or format_id or num)
             except ExtractorError as ee:
                 if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
                     error = (self._parse_json(
@@ -301,9 +319,9 @@ class NPOIE(NPOBaseIE):
 
 class NPOLiveIE(NPOBaseIE):
     IE_NAME = 'npo.nl:live'
-    _VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?npo\.nl/live(?:/(?P<id>[^/?#&]+))?'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.npo.nl/live/npo-1',
         'info_dict': {
             'id': 'LI_NL1_4188102',
@@ -315,15 +333,18 @@ class NPOLiveIE(NPOBaseIE):
         'params': {
             'skip_download': True,
         }
-    }
+    }, {
+        'url': 'http://www.npo.nl/live',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
-        display_id = self._match_id(url)
+        display_id = self._match_id(url) or 'npo-1'
 
         webpage = self._download_webpage(url, display_id)
 
         live_id = self._search_regex(
-            r'data-prid="([^"]+)"', webpage, 'live id')
+            [r'media-id="([^"]+)"', r'data-prid="([^"]+)"'], webpage, 'live id')
 
         return {
             '_type': 'url_transparent',
@@ -448,7 +469,7 @@ class SchoolTVIE(NPODataMidEmbedIE):
 
 class HetKlokhuisIE(NPODataMidEmbedIE):
     IE_NAME = 'hetklokhuis'
-    _VALID_URL = r'https?://(?:www\.)?hetklokhuis.nl/[^/]+/\d+/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?hetklokhuis\.nl/[^/]+/\d+/(?P<id>[^/?#&]+)'
 
     _TEST = {
         'url': 'http://hetklokhuis.nl/tv-uitzending/3471/Zwaartekrachtsgolven',