[svtplay:series] Improve extraction (closes #16059)
authorSergey M․ <dstftw@gmail.com>
Wed, 4 Apr 2018 16:52:00 +0000 (23:52 +0700)
committerSergey M․ <dstftw@gmail.com>
Wed, 4 Apr 2018 17:29:02 +0000 (00:29 +0700)
youtube_dl/extractor/extractors.py
youtube_dl/extractor/svt.py

index b46a304acdae8e139d60591b71716b27cad2f6a3..c9f60114dac8ab4941d511cd4ed682620d74ad57 100644 (file)
@@ -1031,7 +1031,7 @@ from .sunporno import SunPornoIE
 from .svt import (
     SVTIE,
     SVTPlayIE,
-    SVTPlaylistIE,
+    SVTSeriesIE,
 )
 from .swrmediathek import SWRMediathekIE
 from .syfy import SyfyIE
index d02fd945081c80cebad517c46849ed5e831a3887..45b4b8bf7e268a149e3649dd915e9bf2815c4d37 100644 (file)
@@ -193,10 +193,8 @@ class SVTPlayIE(SVTBaseIE):
             return info_dict
 
 
-class SVTPlaylistIE(InfoExtractor):
-    IE_DESC = 'SVT Play serie'
+class SVTSeriesIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?svtplay\.se/(?P<id>[^/?&#]+)'
-    IE_NAME = 'svtplay:serie'
     _TESTS = [{
         'url': 'https://www.svtplay.se/rederiet',
         'info_dict': {
@@ -209,33 +207,28 @@ class SVTPlaylistIE(InfoExtractor):
 
     @classmethod
     def suitable(cls, url):
-        return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPlaylistIE, cls).suitable(url)
+        return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTSeriesIE, cls).suitable(url)
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        page = self._download_webpage(
-            url, video_id,
-            note='Downloading serie page',
-            errnote='unable to fetch serie page')
+        webpage = self._download_webpage(
+            url, video_id, 'Downloading serie page')
 
-        root_json = self._search_regex(
-            r'root\[\'__svtplay\'\]\s*=(.+);\n',
-            page, 'root')
-        root = self._parse_json(root_json, video_id)
-
-        metadata = root.get('metaData', {})
-        related_videos_accordion = root['relatedVideoContent']['relatedVideosAccordion']
+        root = self._parse_json(
+            self._search_regex(
+                r'root\[\s*(["\'])_*svtplay\1\s*\]\s*=\s*(?P<json>{.+?})\s*;\s*\n',
+                webpage, 'content', group='json'),
+            video_id)
 
         entries = []
-        for season in related_videos_accordion:
+        for season in root['relatedVideoContent']['relatedVideosAccordion']:
             videos = season.get('videos')
             if not isinstance(videos, list):
                 continue
-
             for video in videos:
                 content_url = video.get('contentUrl')
-                if not isinstance(content_url, compat_str):
+                if not content_url or not isinstance(content_url, compat_str):
                     continue
                 entries.append(
                     self.url_result(
@@ -244,5 +237,10 @@ class SVTPlaylistIE(InfoExtractor):
                         video_title=video.get('title')
                     ))
 
+        metadata = root.get('metaData')
+        if not isinstance(metadata, dict):
+            metadata = {}
+
         return self.playlist_result(
-            entries, video_id, metadata.get('title'), metadata.get('description'))
+            entries, video_id, metadata.get('title'),
+            metadata.get('description'))