[tv2] Fix and improve extraction (closes #22787)

[youtube-dl] / youtube_dl / extractor / tv2.py
diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py

index e4b4ac0e7acc3b1f74b53611a84f8e099207aaf2..1b6590767e346059217f7171f971d941aa6f6d71 100644 (file)
--- a/youtube_dl/extractor/tv2.py
+++ b/youtube_dl/extractor/tv2.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
  from __future__ import unicode_literals
  
  import re
@@ -11,6 +11,7 @@ from ..utils import (
      js_to_json,
      parse_iso8601,
      remove_end,
+    try_get,
  )
  
  
@@ -44,7 +45,14 @@ class TV2IE(InfoExtractor):
              data = self._download_json(
                  'http://sumo.tv2.no/api/web/asset/%s/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % (video_id, protocol),
                  video_id, 'Downloading play JSON')['playback']
-            for item in data['items']['item']:
+            items = try_get(data, lambda x: x['items']['item'])
+            if not items:
+                continue
+            if not isinstance(items, list):
+                items = [items]
+            for item in items:
+                if not isinstance(item, dict):
+                    continue
                  video_url = item.get('url')
                  if not video_url or video_url in format_urls:
                      continue
@@ -55,10 +63,11 @@ class TV2IE(InfoExtractor):
                  ext = determine_ext(video_url)
                  if ext == 'f4m':
                      formats.extend(self._extract_f4m_formats(
-                        video_url, video_id, f4m_id=format_id))
+                        video_url, video_id, f4m_id=format_id, fatal=False))
                  elif ext == 'm3u8':
                      formats.extend(self._extract_m3u8_formats(
-                        video_url, video_id, 'mp4', m3u8_id=format_id))
+                        video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                        m3u8_id=format_id, fatal=False))
                  elif ext == 'ism' or video_url.endswith('.ism/Manifest'):
                      pass
                  else:
@@ -125,7 +134,7 @@ class TV2ArticleIE(InfoExtractor):
  
          if not assets:
              # New embed pattern
-            for v in re.findall('TV2ContentboxVideo\(({.+?})\)', webpage):
+            for v in re.findall(r'TV2ContentboxVideo\(({.+?})\)', webpage):
                  video = self._parse_json(
                      v, playlist_id, transform_source=js_to_json, fatal=False)
                  if not video: