X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fsvt.py;h=f71eab8b25014501aa6d123e70fba4506c095cea;hb=4c76aa06665621c7689938afd7bbdbc797b5c7ea;hp=45b4b8bf7e268a149e3649dd915e9bf2815c4d37;hpb=b71bb3ba8be711abab4c05527d28c4b5e4552401;p=youtube-dl diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index 45b4b8bf7..f71eab8b2 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -4,6 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urllib_parse_urlparse, +) from ..utils import ( determine_ext, dict_get, @@ -18,6 +22,8 @@ class SVTBaseIE(InfoExtractor): _GEO_COUNTRIES = ['SE'] def _extract_video(self, video_info, video_id): + is_live = dict_get(video_info, ('live', 'simulcast'), default=False) + m3u8_protocol = 'm3u8' if is_live else 'm3u8_native' formats = [] for vr in video_info['videoReferences']: player_type = vr.get('playerType') or vr.get('format') @@ -26,7 +32,7 @@ class SVTBaseIE(InfoExtractor): if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( vurl, video_id, - ext='mp4', entry_protocol='m3u8_native', + ext='mp4', entry_protocol=m3u8_protocol, m3u8_id=player_type, fatal=False)) elif ext == 'f4m': formats.extend(self._extract_f4m_formats( @@ -86,6 +92,7 @@ class SVTBaseIE(InfoExtractor): 'season_number': season_number, 'episode': episode, 'episode_number': episode_number, + 'is_live': is_live, } @@ -124,9 +131,13 @@ class SVTIE(SVTBaseIE): return info_dict -class SVTPlayIE(SVTBaseIE): +class SVTPlayBaseIE(SVTBaseIE): + _SVTPLAY_RE = r'root\s*\[\s*(["\'])_*svtplay\1\s*\]\s*=\s*(?P{.+?})\s*;\s*\n' + + +class SVTPlayIE(SVTPlayBaseIE): IE_DESC = 'SVT Play and Öppet arkiv' - _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp)/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2', 'md5': '2b6704fe4a28801e1a098bbf3c5ac611', @@ -150,6 +161,9 @@ class SVTPlayIE(SVTBaseIE): }, { 'url': 'http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg', 'only_matching': True, + }, { + 'url': 'https://www.svtplay.se/kanaler/svt1', + 'only_matching': True, }] def _real_extract(self, url): @@ -159,12 +173,16 @@ class SVTPlayIE(SVTBaseIE): data = self._parse_json( self._search_regex( - r'root\["__svtplay"\]\s*=\s*([^;]+);', - webpage, 'embedded data', default='{}'), + self._SVTPLAY_RE, webpage, 'embedded data', default='{}', + group='json'), video_id, fatal=False) thumbnail = self._og_search_thumbnail(webpage) + def adjust_title(info): + if info['is_live']: + info['title'] = self._live_title(info['title']) + if data: video_info = try_get( data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'], @@ -175,6 +193,7 @@ class SVTPlayIE(SVTBaseIE): 'title': data['context']['dispatcher']['stores']['MetaStore']['title'], 'thumbnail': thumbnail, }) + adjust_title(info_dict) return info_dict video_id = self._search_regex( @@ -190,10 +209,11 @@ class SVTPlayIE(SVTBaseIE): info_dict['title'] = re.sub( r'\s*\|\s*.+?$', '', info_dict.get('episode') or self._og_search_title(webpage)) + adjust_title(info_dict) return info_dict -class SVTSeriesIE(InfoExtractor): +class SVTSeriesIE(SVTPlayBaseIE): _VALID_URL = r'https?://(?:www\.)?svtplay\.se/(?P[^/?&#]+)' _TESTS = [{ 'url': 'https://www.svtplay.se/rederiet', @@ -203,6 +223,14 @@ class SVTSeriesIE(InfoExtractor): 'description': 'md5:505d491a58f4fcf6eb418ecab947e69e', }, 'playlist_mincount': 318, + }, { + 'url': 'https://www.svtplay.se/rederiet?tab=sasong2', + 'info_dict': { + 'id': 'rederiet-sasong2', + 'title': 'Rederiet - Säsong 2', + 'description': 'md5:505d491a58f4fcf6eb418ecab947e69e', + }, + 'playlist_count': 12, }] @classmethod @@ -210,19 +238,32 @@ class SVTSeriesIE(InfoExtractor): return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTSeriesIE, cls).suitable(url) def _real_extract(self, url): - video_id = self._match_id(url) + series_id = self._match_id(url) + + qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + season_slug = qs.get('tab', [None])[0] + + if season_slug: + series_id += '-%s' % season_slug webpage = self._download_webpage( - url, video_id, 'Downloading serie page') + url, series_id, 'Downloading series page') root = self._parse_json( self._search_regex( - r'root\[\s*(["\'])_*svtplay\1\s*\]\s*=\s*(?P{.+?})\s*;\s*\n', - webpage, 'content', group='json'), - video_id) + self._SVTPLAY_RE, webpage, 'content', group='json'), + series_id) + + season_name = None entries = [] for season in root['relatedVideoContent']['relatedVideosAccordion']: + if not isinstance(season, dict): + continue + if season_slug: + if season.get('slug') != season_slug: + continue + season_name = season.get('name') videos = season.get('videos') if not isinstance(videos, list): continue @@ -241,6 +282,13 @@ class SVTSeriesIE(InfoExtractor): if not isinstance(metadata, dict): metadata = {} + title = metadata.get('title') + season_name = season_name or season_slug + + if title and season_name: + title = '%s - %s' % (title, season_name) + elif season_slug: + title = season_slug + return self.playlist_result( - entries, video_id, metadata.get('title'), - metadata.get('description')) + entries, series_id, title, metadata.get('description'))