[nrk:playlist] Relax video id regex and improve _VALID_URL
[youtube-dl] / youtube_dl / extractor / nrk.py
index d17b7ed49771530a713e9b69590c6b56504e7ec9..e91d3a248ec3367af8b7e9a8f60be0d503877481 100644 (file)
@@ -25,6 +25,7 @@ class NRKIE(InfoExtractor):
                 'ext': 'flv',
                 'title': 'Dompap og andre fugler i Piip-Show',
                 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
+                'duration': 263,
             }
         },
         {
@@ -35,6 +36,7 @@ class NRKIE(InfoExtractor):
                 'ext': 'flv',
                 'title': 'Slik høres internett ut når du er blind',
                 'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
+                'duration': 20,
             }
         },
     ]
@@ -53,6 +55,8 @@ class NRKIE(InfoExtractor):
 
         video_url = data['mediaUrl'] + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81'
 
+        duration = parse_duration(data.get('duration'))
+
         images = data.get('images')
         if images:
             thumbnails = images['webImages']
@@ -67,10 +71,51 @@ class NRKIE(InfoExtractor):
             'ext': 'flv',
             'title': data['title'],
             'description': data['description'],
+            'duration': duration,
             'thumbnail': thumbnail,
         }
 
 
+class NRKPlaylistIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P<id>[^/]+)'
+
+    _TESTS = [{
+        'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
+        'info_dict': {
+            'id': 'gjenopplev-den-historiske-solformorkelsen-1.12270763',
+            'title': 'Gjenopplev den historiske solformørkelsen',
+            'description': 'md5:c2df8ea3bac5654a26fc2834a542feed',
+        },
+        'playlist_count': 2,
+    }, {
+        'url': 'http://www.nrk.no/kultur/bok/rivertonprisen-til-karin-fossum-1.12266449',
+        'info_dict': {
+            'id': 'rivertonprisen-til-karin-fossum-1.12266449',
+            'title': 'Rivertonprisen til Karin Fossum',
+            'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
+        },
+        'playlist_count': 5,
+    }]
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        entries = [
+            self.url_result('nrk:%s' % video_id, 'NRK')
+            for video_id in re.findall(
+                r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"',
+                webpage)
+        ]
+
+        playlist_title = self._og_search_title(webpage)
+        playlist_description = self._og_search_description(webpage)
+
+        return self.playlist_result(
+            entries, playlist_id, playlist_title, playlist_description)
+
+
 class NRKTVIE(InfoExtractor):
     _VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'