[nrktv:episodes] Add support for episodes (#11571)

[youtube-dl] / youtube_dl / extractor / nrk.py
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py

index c89aac63ee90f133074d8ade8b7af23cf020f148..ea7be005a9640b631c03381778792251fca574a5 100644 (file)
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -48,6 +48,13 @@ class NRKBaseIE(InfoExtractor):
  
          entries = []
  
+        conviva = data.get('convivaStatistics') or {}
+        live = (data.get('mediaElementType') == 'Live' or
+                data.get('isLive') is True or conviva.get('isLive'))
+
+        def make_title(t):
+            return self._live_title(t) if live else t
+
          media_assets = data.get('mediaAssets')
          if media_assets and isinstance(media_assets, list):
              def video_id_and_title(idx):
@@ -61,6 +68,13 @@ class NRKBaseIE(InfoExtractor):
                  if not formats:
                      continue
                  self._sort_formats(formats)
+
+                # Some f4m streams may not work with hdcore in fragments' URLs
+                for f in formats:
+                    extra_param = f.get('extra_param_to_segment_url')
+                    if extra_param and 'hdcore' in extra_param:
+                        del f['extra_param_to_segment_url']
+
                  entry_id, entry_title = video_id_and_title(num)
                  duration = parse_duration(asset.get('duration'))
                  subtitles = {}
@@ -72,7 +86,7 @@ class NRKBaseIE(InfoExtractor):
                          })
                  entries.append({
                      'id': asset.get('carrierId') or entry_id,
-                    'title': entry_title,
+                    'title': make_title(entry_title),
                      'duration': duration,
                      'subtitles': subtitles,
                      'formats': formats,
@@ -87,7 +101,7 @@ class NRKBaseIE(InfoExtractor):
                  duration = parse_duration(data.get('duration'))
                  entries = [{
                      'id': video_id,
-                    'title': title,
+                    'title': make_title(title),
                      'duration': duration,
                      'formats': formats,
                  }]
@@ -111,7 +125,6 @@ class NRKBaseIE(InfoExtractor):
                      message_type, message_type)),
                  expected=True)
  
-        conviva = data.get('convivaStatistics') or {}
          series = conviva.get('seriesName') or data.get('seriesTitle')
          episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
  
@@ -194,7 +207,15 @@ class NRKIE(NRKBaseIE):
  
  class NRKTVIE(NRKBaseIE):
      IE_DESC = 'NRK TV and NRK Radio'
-    _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
+    _EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
+    _VALID_URL = r'''(?x)
+                        https?://
+                            (?:tv|radio)\.nrk(?:super)?\.no/
+                            (?:serie/[^/]+|program)/
+                            (?![Ee]pisodes)%s
+                            (?:/\d{2}-\d{2}-\d{4})?
+                            (?:\#del=(?P<part_id>\d+))?
+                    ''' % _EPISODE_RE
      _API_HOST = 'psapi-we.nrk.no'
  
      _TESTS = [{
@@ -260,9 +281,43 @@ class NRKTVIE(NRKBaseIE):
      }]
  
  
-class NRKPlaylistIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'
+class NRKTVDirekteIE(NRKTVIE):
+    IE_DESC = 'NRK TV Direkte and NRK Radio Direkte'
+    _VALID_URL = r'https?://(?:tv|radio)\.nrk\.no/direkte/(?P<id>[^/?#&]+)'
  
+    _TESTS = [{
+        'url': 'https://tv.nrk.no/direkte/nrk1',
+        'only_matching': True,
+    }, {
+        'url': 'https://radio.nrk.no/direkte/p1_oslo_akershus',
+        'only_matching': True,
+    }]
+
+
+class NRKPlaylistBaseIE(InfoExtractor):
+    def _extract_description(self, webpage):
+        pass
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        entries = [
+            self.url_result('nrk:%s' % video_id, NRKIE.ie_key())
+            for video_id in re.findall(self._ITEM_RE, webpage)
+        ]
+
+        playlist_title = self. _extract_title(webpage)
+        playlist_description = self._extract_description(webpage)
+
+        return self.playlist_result(
+            entries, playlist_id, playlist_title, playlist_description)
+
+
+class NRKPlaylistIE(NRKPlaylistBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'
+    _ITEM_RE = r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"'
      _TESTS = [{
          'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
          'info_dict': {
@@ -281,23 +336,28 @@ class NRKPlaylistIE(InfoExtractor):
          'playlist_count': 5,
      }]
  
-    def _real_extract(self, url):
-        playlist_id = self._match_id(url)
+    def _extract_title(self, webpage):
+        return self._og_search_title(webpage, fatal=False)
  
-        webpage = self._download_webpage(url, playlist_id)
+    def _extract_description(self, webpage):
+        return self._og_search_description(webpage)
  
-        entries = [
-            self.url_result('nrk:%s' % video_id, 'NRK')
-            for video_id in re.findall(
-                r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"',
-                webpage)
-        ]
  
-        playlist_title = self._og_search_title(webpage)
-        playlist_description = self._og_search_description(webpage)
+class NRKTVEpisodesIE(NRKPlaylistBaseIE):
+    _VALID_URL = r'https?://tv\.nrk\.no/program/[Ee]pisodes/[^/]+/(?P<id>\d+)'
+    _ITEM_RE = r'data-episode=["\']%s' % NRKTVIE._EPISODE_RE
+    _TESTS = [{
+        'url': 'https://tv.nrk.no/program/episodes/nytt-paa-nytt/69031',
+        'info_dict': {
+            'id': '69031',
+            'title': 'Nytt på nytt, sesong: 201210',
+        },
+        'playlist_count': 4,
+    }]
  
-        return self.playlist_result(
-            entries, playlist_id, playlist_title, playlist_description)
+    def _extract_title(self, webpage):
+        return self._html_search_regex(
+            r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
  
  
  class NRKSkoleIE(InfoExtractor):