https?://
(?:www\.)?
(?:
- npo\.nl/(?!(?:live|radio)/)(?:[^/]+/){2}|
- ntr\.nl/(?:[^/]+/){2,}|
+ npo\.nl/(?:[^/]+/)*|
+ (?:ntr|npostart)\.nl/(?:[^/]+/){2,}|
omroepwnl\.nl/video/fragment/[^/]+__|
(?:zapp|npo3)\.nl/(?:[^/]+/){2,}
)
}, {
'url': 'https://www.zapp.nl/1803-skelterlab/instructie-video-s/740-instructievideo-s/POMS_AT_11736927',
'only_matching': True,
+ }, {
+ 'url': 'https://www.npostart.nl/broodje-gezond-ei/28-05-2018/KN_1698996',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://npo.nl/KN_1698996',
+ 'only_matching': True,
}]
+ @classmethod
+ def suitable(cls, url):
+ return (False if any(ie.suitable(url)
+ for ie in (NPOLiveIE, NPORadioIE, NPORadioFragmentIE))
+ else super(NPOIE, cls).suitable(url))
+
def _real_extract(self, url):
video_id = self._match_id(url)
return self._get_info(video_id)
formats = []
urls = set()
+ def is_legal_url(format_url):
+ return format_url and format_url not in urls and re.match(
+ r'^(?:https?:)?//', format_url)
+
QUALITY_LABELS = ('Laag', 'Normaal', 'Hoog')
QUALITY_FORMATS = ('adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std')
})['items'][0]
for num, item in enumerate(items):
item_url = item.get('url')
- if not item_url or item_url in urls:
+ if not is_legal_url(item_url):
continue
urls.add(item_url)
format_id = self._search_regex(
quality = quality_from_format_id(format_id)
f_id = format_id
else:
- quality, f_id = None
+ quality, f_id = [None] * 2
formats.append({
'url': format_url,
'format_id': f_id,
video_url = stream_info.get('url')
if not video_url or video_url in urls:
continue
- urls.add(item_url)
+ urls.add(video_url)
if determine_ext(video_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, ext='mp4',
if not is_live:
for num, stream in enumerate(metadata.get('streams', [])):
stream_url = stream.get('url')
- if not stream_url or stream_url in urls:
+ if not is_legal_url(stream_url):
continue
urls.add(stream_url)
# smooth streaming is not supported
class NPOLiveIE(NPOBaseIE):
IE_NAME = 'npo.nl:live'
- _VALID_URL = r'https?://(?:www\.)?npo\.nl/live(?:/(?P<id>[^/?#&]+))?'
+ _VALID_URL = r'https?://(?:www\.)?npo(?:start)?\.nl/live(?:/(?P<id>[^/?#&]+))?'
_TESTS = [{
'url': 'http://www.npo.nl/live/npo-1',
}, {
'url': 'http://www.npo.nl/live',
'only_matching': True,
+ }, {
+ 'url': 'https://www.npostart.nl/live/npo-1',
+ 'only_matching': True,
}]
def _real_extract(self, url):
class NPORadioIE(InfoExtractor):
IE_NAME = 'npo.nl:radio'
- _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)/?$'
+ _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)'
_TEST = {
'url': 'http://www.npo.nl/radio/radio-1',
}
}
+ @classmethod
+ def suitable(cls, url):
+ return False if NPORadioFragmentIE.suitable(url) else super(NPORadioIE, cls).suitable(url)
+
@staticmethod
def _html_get_attribute_regex(attribute):
return r'{0}\s*=\s*\'([^\']+)\''.format(attribute)