[tv2] Fix and improve extraction (closes #22787)
[youtube-dl] / youtube_dl / extractor / tv2.py
index e4b4ac0e7acc3b1f74b53611a84f8e099207aaf2..1b6590767e346059217f7171f971d941aa6f6d71 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -11,6 +11,7 @@ from ..utils import (
     js_to_json,
     parse_iso8601,
     remove_end,
+    try_get,
 )
 
 
@@ -44,7 +45,14 @@ class TV2IE(InfoExtractor):
             data = self._download_json(
                 'http://sumo.tv2.no/api/web/asset/%s/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % (video_id, protocol),
                 video_id, 'Downloading play JSON')['playback']
-            for item in data['items']['item']:
+            items = try_get(data, lambda x: x['items']['item'])
+            if not items:
+                continue
+            if not isinstance(items, list):
+                items = [items]
+            for item in items:
+                if not isinstance(item, dict):
+                    continue
                 video_url = item.get('url')
                 if not video_url or video_url in format_urls:
                     continue
@@ -55,10 +63,11 @@ class TV2IE(InfoExtractor):
                 ext = determine_ext(video_url)
                 if ext == 'f4m':
                     formats.extend(self._extract_f4m_formats(
-                        video_url, video_id, f4m_id=format_id))
+                        video_url, video_id, f4m_id=format_id, fatal=False))
                 elif ext == 'm3u8':
                     formats.extend(self._extract_m3u8_formats(
-                        video_url, video_id, 'mp4', m3u8_id=format_id))
+                        video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                        m3u8_id=format_id, fatal=False))
                 elif ext == 'ism' or video_url.endswith('.ism/Manifest'):
                     pass
                 else:
@@ -125,7 +134,7 @@ class TV2ArticleIE(InfoExtractor):
 
         if not assets:
             # New embed pattern
-            for v in re.findall('TV2ContentboxVideo\(({.+?})\)', webpage):
+            for v in re.findall(r'TV2ContentboxVideo\(({.+?})\)', webpage):
                 video = self._parse_json(
                     v, playlist_id, transform_source=js_to_json, fatal=False)
                 if not video: