[nrktv:episodes] Add support for episodes (#11571)
[youtube-dl] / youtube_dl / extractor / nrk.py
index c89aac63ee90f133074d8ade8b7af23cf020f148..ea7be005a9640b631c03381778792251fca574a5 100644 (file)
@@ -48,6 +48,13 @@ class NRKBaseIE(InfoExtractor):
 
         entries = []
 
+        conviva = data.get('convivaStatistics') or {}
+        live = (data.get('mediaElementType') == 'Live' or
+                data.get('isLive') is True or conviva.get('isLive'))
+
+        def make_title(t):
+            return self._live_title(t) if live else t
+
         media_assets = data.get('mediaAssets')
         if media_assets and isinstance(media_assets, list):
             def video_id_and_title(idx):
@@ -61,6 +68,13 @@ class NRKBaseIE(InfoExtractor):
                 if not formats:
                     continue
                 self._sort_formats(formats)
+
+                # Some f4m streams may not work with hdcore in fragments' URLs
+                for f in formats:
+                    extra_param = f.get('extra_param_to_segment_url')
+                    if extra_param and 'hdcore' in extra_param:
+                        del f['extra_param_to_segment_url']
+
                 entry_id, entry_title = video_id_and_title(num)
                 duration = parse_duration(asset.get('duration'))
                 subtitles = {}
@@ -72,7 +86,7 @@ class NRKBaseIE(InfoExtractor):
                         })
                 entries.append({
                     'id': asset.get('carrierId') or entry_id,
-                    'title': entry_title,
+                    'title': make_title(entry_title),
                     'duration': duration,
                     'subtitles': subtitles,
                     'formats': formats,
@@ -87,7 +101,7 @@ class NRKBaseIE(InfoExtractor):
                 duration = parse_duration(data.get('duration'))
                 entries = [{
                     'id': video_id,
-                    'title': title,
+                    'title': make_title(title),
                     'duration': duration,
                     'formats': formats,
                 }]
@@ -111,7 +125,6 @@ class NRKBaseIE(InfoExtractor):
                     message_type, message_type)),
                 expected=True)
 
-        conviva = data.get('convivaStatistics') or {}
         series = conviva.get('seriesName') or data.get('seriesTitle')
         episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
 
@@ -194,7 +207,15 @@ class NRKIE(NRKBaseIE):
 
 class NRKTVIE(NRKBaseIE):
     IE_DESC = 'NRK TV and NRK Radio'
-    _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
+    _EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
+    _VALID_URL = r'''(?x)
+                        https?://
+                            (?:tv|radio)\.nrk(?:super)?\.no/
+                            (?:serie/[^/]+|program)/
+                            (?![Ee]pisodes)%s
+                            (?:/\d{2}-\d{2}-\d{4})?
+                            (?:\#del=(?P<part_id>\d+))?
+                    ''' % _EPISODE_RE
     _API_HOST = 'psapi-we.nrk.no'
 
     _TESTS = [{
@@ -260,9 +281,43 @@ class NRKTVIE(NRKBaseIE):
     }]
 
 
-class NRKPlaylistIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'
+class NRKTVDirekteIE(NRKTVIE):
+    IE_DESC = 'NRK TV Direkte and NRK Radio Direkte'
+    _VALID_URL = r'https?://(?:tv|radio)\.nrk\.no/direkte/(?P<id>[^/?#&]+)'
 
+    _TESTS = [{
+        'url': 'https://tv.nrk.no/direkte/nrk1',
+        'only_matching': True,
+    }, {
+        'url': 'https://radio.nrk.no/direkte/p1_oslo_akershus',
+        'only_matching': True,
+    }]
+
+
+class NRKPlaylistBaseIE(InfoExtractor):
+    def _extract_description(self, webpage):
+        pass
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        entries = [
+            self.url_result('nrk:%s' % video_id, NRKIE.ie_key())
+            for video_id in re.findall(self._ITEM_RE, webpage)
+        ]
+
+        playlist_title = self. _extract_title(webpage)
+        playlist_description = self._extract_description(webpage)
+
+        return self.playlist_result(
+            entries, playlist_id, playlist_title, playlist_description)
+
+
+class NRKPlaylistIE(NRKPlaylistBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'
+    _ITEM_RE = r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"'
     _TESTS = [{
         'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
         'info_dict': {
@@ -281,23 +336,28 @@ class NRKPlaylistIE(InfoExtractor):
         'playlist_count': 5,
     }]
 
-    def _real_extract(self, url):
-        playlist_id = self._match_id(url)
+    def _extract_title(self, webpage):
+        return self._og_search_title(webpage, fatal=False)
 
-        webpage = self._download_webpage(url, playlist_id)
+    def _extract_description(self, webpage):
+        return self._og_search_description(webpage)
 
-        entries = [
-            self.url_result('nrk:%s' % video_id, 'NRK')
-            for video_id in re.findall(
-                r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"',
-                webpage)
-        ]
 
-        playlist_title = self._og_search_title(webpage)
-        playlist_description = self._og_search_description(webpage)
+class NRKTVEpisodesIE(NRKPlaylistBaseIE):
+    _VALID_URL = r'https?://tv\.nrk\.no/program/[Ee]pisodes/[^/]+/(?P<id>\d+)'
+    _ITEM_RE = r'data-episode=["\']%s' % NRKTVIE._EPISODE_RE
+    _TESTS = [{
+        'url': 'https://tv.nrk.no/program/episodes/nytt-paa-nytt/69031',
+        'info_dict': {
+            'id': '69031',
+            'title': 'Nytt på nytt, sesong: 201210',
+        },
+        'playlist_count': 4,
+    }]
 
-        return self.playlist_result(
-            entries, playlist_id, playlist_title, playlist_description)
+    def _extract_title(self, webpage):
+        return self._html_search_regex(
+            r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
 
 
 class NRKSkoleIE(InfoExtractor):