X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fsvt.py;h=b544da414e1113a88eef68e9b4652c3164033e89;hb=cae5d9705c28ffc0bf5e149a5f92d31a48208e49;hp=d02fd945081c80cebad517c46849ed5e831a3887;hpb=fd97fa7bfc59983d315892c26f861842820a9579;p=youtube-dl diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index d02fd9450..b544da414 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -4,6 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urllib_parse_urlparse, +) from ..utils import ( determine_ext, dict_get, @@ -124,7 +128,11 @@ class SVTIE(SVTBaseIE): return info_dict -class SVTPlayIE(SVTBaseIE): +class SVTPlayBaseIE(SVTBaseIE): + _SVTPLAY_RE = r'root\s*\[\s*(["\'])_*svtplay\1\s*\]\s*=\s*(?P{.+?})\s*;\s*\n' + + +class SVTPlayIE(SVTPlayBaseIE): IE_DESC = 'SVT Play and Öppet arkiv' _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp)/(?P[0-9]+)' _TESTS = [{ @@ -159,8 +167,8 @@ class SVTPlayIE(SVTBaseIE): data = self._parse_json( self._search_regex( - r'root\["__svtplay"\]\s*=\s*([^;]+);', - webpage, 'embedded data', default='{}'), + self._SVTPLAY_RE, webpage, 'embedded data', default='{}', + group='json'), video_id, fatal=False) thumbnail = self._og_search_thumbnail(webpage) @@ -193,10 +201,8 @@ class SVTPlayIE(SVTBaseIE): return info_dict -class SVTPlaylistIE(InfoExtractor): - IE_DESC = 'SVT Play serie' +class SVTSeriesIE(SVTPlayBaseIE): _VALID_URL = r'https?://(?:www\.)?svtplay\.se/(?P[^/?&#]+)' - IE_NAME = 'svtplay:serie' _TESTS = [{ 'url': 'https://www.svtplay.se/rederiet', 'info_dict': { @@ -205,37 +211,53 @@ class SVTPlaylistIE(InfoExtractor): 'description': 'md5:505d491a58f4fcf6eb418ecab947e69e', }, 'playlist_mincount': 318, + }, { + 'url': 'https://www.svtplay.se/rederiet?tab=sasong2', + 'info_dict': { + 'id': 'rederiet-sasong2', + 'title': 'Rederiet - Säsong 2', + 'description': 'md5:505d491a58f4fcf6eb418ecab947e69e', + }, + 'playlist_count': 12, }] @classmethod def suitable(cls, url): - return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPlaylistIE, cls).suitable(url) + return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTSeriesIE, cls).suitable(url) def _real_extract(self, url): - video_id = self._match_id(url) + series_id = self._match_id(url) - page = self._download_webpage( - url, video_id, - note='Downloading serie page', - errnote='unable to fetch serie page') + qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + season_slug = qs.get('tab', [None])[0] - root_json = self._search_regex( - r'root\[\'__svtplay\'\]\s*=(.+);\n', - page, 'root') - root = self._parse_json(root_json, video_id) + if season_slug: + series_id += '-%s' % season_slug - metadata = root.get('metaData', {}) - related_videos_accordion = root['relatedVideoContent']['relatedVideosAccordion'] + webpage = self._download_webpage( + url, series_id, 'Downloading series page') + + root = self._parse_json( + self._search_regex( + self._SVTPLAY_RE, webpage, 'content', group='json'), + series_id) + + season_name = None entries = [] - for season in related_videos_accordion: + for season in root['relatedVideoContent']['relatedVideosAccordion']: + if not isinstance(season, dict): + continue + if season_slug: + if season.get('slug') != season_slug: + continue + season_name = season.get('name') videos = season.get('videos') if not isinstance(videos, list): continue - for video in videos: content_url = video.get('contentUrl') - if not isinstance(content_url, compat_str): + if not content_url or not isinstance(content_url, compat_str): continue entries.append( self.url_result( @@ -244,5 +266,17 @@ class SVTPlaylistIE(InfoExtractor): video_title=video.get('title') )) + metadata = root.get('metaData') + if not isinstance(metadata, dict): + metadata = {} + + title = metadata.get('title') + season_name = season_name or season_slug + + if title and season_name: + title = '%s - %s' % (title, season_name) + elif season_slug: + title = season_slug + return self.playlist_result( - entries, video_id, metadata.get('title'), metadata.get('description')) + entries, series_id, title, metadata.get('description'))