[npo] Add support for anderetijden.nl (Closes #10754)
authorSergey M․ <dstftw@gmail.com>
Sun, 25 Sep 2016 15:19:00 +0000 (22:19 +0700)
committerSergey M․ <dstftw@gmail.com>
Sun, 25 Sep 2016 15:26:14 +0000 (22:26 +0700)
youtube_dl/extractor/npo.py

index ff02d03090a5b805613425f61c798e725dd752fa..66035a77c5ada26ca94fe4d3863c052ea253832e 100644 (file)
@@ -5,6 +5,7 @@ import re
 from .common import InfoExtractor
 from ..utils import (
     fix_xml_ampersands,
+    orderedSet,
     parse_duration,
     qualities,
     strip_jsonp,
@@ -446,7 +447,7 @@ class NPOPlaylistBaseIE(NPOIE):
 
         entries = [
             self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id)
-            for video_id in re.findall(self._PLAYLIST_ENTRY_RE, webpage)
+            for video_id in orderedSet(re.findall(self._PLAYLIST_ENTRY_RE, webpage))
         ]
 
         playlist_title = self._html_search_regex(
@@ -508,3 +509,18 @@ class WNLIE(NPOPlaylistBaseIE):
         },
         'playlist_count': 4,
     }]
+
+
+class AndereTijdenIE(NPOPlaylistBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?anderetijden\.nl/programma/(?:[^/]+/)+(?P<id>[^/?#&]+)'
+    _PLAYLIST_TITLE_RE = r'(?s)<h1[^>]+class=["\'].*?\bpage-title\b.*?["\'][^>]*>(.+?)</h1>'
+    _PLAYLIST_ENTRY_RE = r'<figure[^>]+class=["\']episode-container episode-page["\'][^>]+data-prid=["\'](.+?)["\']'
+
+    _TESTS = [{
+        'url': 'http://anderetijden.nl/programma/1/Andere-Tijden/aflevering/676/Duitse-soldaten-over-de-Slag-bij-Arnhem',
+        'info_dict': {
+            'id': 'Duitse-soldaten-over-de-Slag-bij-Arnhem',
+            'title': 'Duitse soldaten over de Slag bij Arnhem',
+        },
+        'playlist_count': 3,
+    }]