From b71bb3ba8be711abab4c05527d28c4b5e4552401 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Sergey=20M=E2=80=A4?= Date: Wed, 4 Apr 2018 23:52:00 +0700 Subject: [PATCH] [svtplay:series] Improve extraction (closes #16059) --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/svt.py | 36 ++++++++++++++---------------- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b46a304ac..c9f60114d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1031,7 +1031,7 @@ from .sunporno import SunPornoIE from .svt import ( SVTIE, SVTPlayIE, - SVTPlaylistIE, + SVTSeriesIE, ) from .swrmediathek import SWRMediathekIE from .syfy import SyfyIE diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index d02fd9450..45b4b8bf7 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -193,10 +193,8 @@ class SVTPlayIE(SVTBaseIE): return info_dict -class SVTPlaylistIE(InfoExtractor): - IE_DESC = 'SVT Play serie' +class SVTSeriesIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?svtplay\.se/(?P[^/?&#]+)' - IE_NAME = 'svtplay:serie' _TESTS = [{ 'url': 'https://www.svtplay.se/rederiet', 'info_dict': { @@ -209,33 +207,28 @@ class SVTPlaylistIE(InfoExtractor): @classmethod def suitable(cls, url): - return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPlaylistIE, cls).suitable(url) + return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTSeriesIE, cls).suitable(url) def _real_extract(self, url): video_id = self._match_id(url) - page = self._download_webpage( - url, video_id, - note='Downloading serie page', - errnote='unable to fetch serie page') + webpage = self._download_webpage( + url, video_id, 'Downloading serie page') - root_json = self._search_regex( - r'root\[\'__svtplay\'\]\s*=(.+);\n', - page, 'root') - root = self._parse_json(root_json, video_id) - - metadata = root.get('metaData', {}) - related_videos_accordion = root['relatedVideoContent']['relatedVideosAccordion'] + root = self._parse_json( + self._search_regex( + r'root\[\s*(["\'])_*svtplay\1\s*\]\s*=\s*(?P{.+?})\s*;\s*\n', + webpage, 'content', group='json'), + video_id) entries = [] - for season in related_videos_accordion: + for season in root['relatedVideoContent']['relatedVideosAccordion']: videos = season.get('videos') if not isinstance(videos, list): continue - for video in videos: content_url = video.get('contentUrl') - if not isinstance(content_url, compat_str): + if not content_url or not isinstance(content_url, compat_str): continue entries.append( self.url_result( @@ -244,5 +237,10 @@ class SVTPlaylistIE(InfoExtractor): video_title=video.get('title') )) + metadata = root.get('metaData') + if not isinstance(metadata, dict): + metadata = {} + return self.playlist_result( - entries, video_id, metadata.get('title'), metadata.get('description')) + entries, video_id, metadata.get('title'), + metadata.get('description')) -- 2.39.5