X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fsvt.py;h=f2a2200bf87180f47ada72857a7eb617e0e0138e;hb=71631862f4de5a10223642ebdbd5e10db374d270;hp=6526a63459ae7139e59dddb2ee5447795cb00359;hpb=e4f90ea0a72711f6577d4cde1dd145f03ab34803;p=youtube-dl diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index 6526a6345..f2a2200bf 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -7,16 +7,16 @@ from .common import InfoExtractor from ..utils import ( determine_ext, dict_get, + int_or_none, + try_get, ) class SVTBaseIE(InfoExtractor): - def _extract_video(self, info, video_id): - video_info = self._get_video_info(info) - + def _extract_video(self, video_info, video_id): formats = [] for vr in video_info['videoReferences']: - player_type = vr.get('playerType') + player_type = vr.get('playerType') or vr.get('format') vurl = vr['url'] ext = determine_ext(vurl) if ext == 'm3u8': @@ -37,6 +37,9 @@ class SVTBaseIE(InfoExtractor): 'format_id': player_type, 'url': vurl, }) + if not formats and video_info.get('rights', {}).get('geoBlockedSweden'): + self.raise_geo_restricted( + 'This video is only available in Sweden', countries=['SE']) self._sort_formats(formats) subtitles = {} @@ -52,15 +55,32 @@ class SVTBaseIE(InfoExtractor): subtitles.setdefault(subtitle_lang, []).append({'url': subtitle_url}) - duration = video_info.get('materialLength') - age_limit = 18 if video_info.get('inappropriateForChildren') else 0 + title = video_info.get('title') + + series = video_info.get('programTitle') + season_number = int_or_none(video_info.get('season')) + episode = video_info.get('episodeTitle') + episode_number = int_or_none(video_info.get('episodeNumber')) + + duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration'))) + age_limit = None + adult = dict_get( + video_info, ('inappropriateForChildren', 'blockedForChildren'), + skip_false_values=False) + if adult is not None: + age_limit = 18 if adult else 0 return { 'id': video_id, + 'title': title, 'formats': formats, 'subtitles': subtitles, 'duration': duration, 'age_limit': age_limit, + 'series': series, + 'season_number': season_number, + 'episode': episode, + 'episode_number': episode_number, } @@ -85,9 +105,6 @@ class SVTIE(SVTBaseIE): if mobj: return mobj.group('url') - def _get_video_info(self, info): - return info['video'] - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) widget_id = mobj.group('widget_id') @@ -97,15 +114,15 @@ class SVTIE(SVTBaseIE): 'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id, article_id), article_id) - info_dict = self._extract_video(info, article_id) + info_dict = self._extract_video(info['video'], article_id) info_dict['title'] = info['context']['title'] return info_dict class SVTPlayIE(SVTBaseIE): IE_DESC = 'SVT Play and Öppet arkiv' - _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/video/(?P[0-9]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp)/(?P[0-9]+)' + _TESTS = [{ 'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2', 'md5': '2b6704fe4a28801e1a098bbf3c5ac611', 'info_dict': { @@ -113,7 +130,7 @@ class SVTPlayIE(SVTBaseIE): 'ext': 'mp4', 'title': 'Flygplan till Haile Selassie', 'duration': 3527, - 'thumbnail': 're:^https?://.*[\.-]jpg$', + 'thumbnail': r're:^https?://.*[\.-]jpg$', 'age_limit': 0, 'subtitles': { 'sv': [{ @@ -121,25 +138,50 @@ class SVTPlayIE(SVTBaseIE): }] }, }, - } - - def _get_video_info(self, info): - return info['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'] + }, { + # geo restricted to Sweden + 'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten', + 'only_matching': True, + }, { + 'url': 'http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - data = self._parse_json(self._search_regex( - r'root\["__svtplay"\]\s*=\s*([^;]+);', webpage, 'embedded data'), video_id) + data = self._parse_json( + self._search_regex( + r'root\["__svtplay"\]\s*=\s*([^;]+);', + webpage, 'embedded data', default='{}'), + video_id, fatal=False) thumbnail = self._og_search_thumbnail(webpage) - info_dict = self._extract_video(data, video_id) - info_dict.update({ - 'title': data['context']['dispatcher']['stores']['MetaStore']['title'], - 'thumbnail': thumbnail, - }) - - return info_dict + if data: + video_info = try_get( + data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'], + dict) + if video_info: + info_dict = self._extract_video(video_info, video_id) + info_dict.update({ + 'title': data['context']['dispatcher']['stores']['MetaStore']['title'], + 'thumbnail': thumbnail, + }) + return info_dict + + video_id = self._search_regex( + r']+data-video-id=["\']([\da-zA-Z-]+)', + webpage, 'video id', default=None) + + if video_id: + data = self._download_json( + 'http://www.svt.se/videoplayer-api/video/%s' % video_id, video_id) + info_dict = self._extract_video(data, video_id) + if not info_dict.get('title'): + info_dict['title'] = re.sub( + r'\s*\|\s*.+?$', '', + info_dict.get('episode') or self._og_search_title(webpage)) + return info_dict