X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fruutu.py;h=f984040aa07b08f56b1028d0dde1dc3fc78137cb;hb=HEAD;hp=ffea438cc4645c267c87b54a761394e0c1eca247;hpb=2098aee7d6facb9de2253f9b71a2dfa9b932b4cb;p=youtube-dl diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py index ffea438cc..f984040aa 100644 --- a/youtube_dl/extractor/ruutu.py +++ b/youtube_dl/extractor/ruutu.py @@ -5,6 +5,7 @@ from .common import InfoExtractor from ..compat import compat_urllib_parse_urlparse from ..utils import ( determine_ext, + ExtractorError, int_or_none, xpath_attr, xpath_text, @@ -12,7 +13,7 @@ from ..utils import ( class RuutuIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ruutu\.fi/video/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?(?:ruutu|supla)\.fi/(?:video|supla)/(?P\d+)' _TESTS = [ { 'url': 'http://www.ruutu.fi/video/2058907', @@ -22,7 +23,7 @@ class RuutuIE(InfoExtractor): 'ext': 'mp4', 'title': 'Oletko aina halunnut tietää mitä tapahtuu vain hetki ennen lähetystä? - Nyt se selvisi!', 'description': 'md5:cfc6ccf0e57a814360df464a91ff67d6', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 114, 'age_limit': 0, }, @@ -34,19 +35,52 @@ class RuutuIE(InfoExtractor): 'id': '2057306', 'ext': 'mp4', 'title': 'Superpesis: katso koko kausi Ruudussa', - 'description': 'md5:da2736052fef3b2bd5e0005e63c25eac', - 'thumbnail': 're:^https?://.*\.jpg$', + 'description': 'md5:bfb7336df2a12dc21d18fa696c9f8f23', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 40, 'age_limit': 0, }, }, + { + 'url': 'http://www.supla.fi/supla/2231370', + 'md5': 'df14e782d49a2c0df03d3be2a54ef949', + 'info_dict': { + 'id': '2231370', + 'ext': 'mp4', + 'title': 'Osa 1: Mikael Jungner', + 'description': 'md5:7d90f358c47542e3072ff65d7b1bcffe', + 'thumbnail': r're:^https?://.*\.jpg$', + 'age_limit': 0, + }, + }, + # Episode where is "NOT-USED", but has other + # downloadable sources available. + { + 'url': 'http://www.ruutu.fi/video/3193728', + 'only_matching': True, + }, + { + # audio podcast + 'url': 'https://www.supla.fi/supla/3382410', + 'md5': 'b9d7155fed37b2ebf6021d74c4b8e908', + 'info_dict': { + 'id': '3382410', + 'ext': 'mp3', + 'title': 'Mikä ihmeen poltergeist?', + 'description': 'md5:bbb6963df17dfd0ecd9eb9a61bf14b52', + 'thumbnail': r're:^https?://.*\.jpg$', + 'age_limit': 0, + }, + 'expected_warnings': ['HTTP Error 502: Bad Gateway'], + } ] def _real_extract(self, url): video_id = self._match_id(url) video_xml = self._download_xml( - 'http://gatling.ruutu.fi/media-xml-cache?id=%s' % video_id, video_id) + 'https://gatling.nelonenmedia.fi/media-xml-cache', video_id, + query={'id': video_id}) formats = [] processed_urls = [] @@ -57,9 +91,9 @@ class RuutuIE(InfoExtractor): extract_formats(child) elif child.tag.endswith('File'): video_url = child.text - if (not video_url or video_url in processed_urls or - any(p in video_url for p in ('NOT_USED', 'NOT-USED'))): - return + if (not video_url or video_url in processed_urls + or any(p in video_url for p in ('NOT_USED', 'NOT-USED'))): + continue processed_urls.append(video_url) ext = determine_ext(video_url) if ext == 'm3u8': @@ -68,6 +102,18 @@ class RuutuIE(InfoExtractor): elif ext == 'f4m': formats.extend(self._extract_f4m_formats( video_url, video_id, f4m_id='hds', fatal=False)) + elif ext == 'mpd': + # video-only and audio-only streams are of different + # duration resulting in out of sync issue + continue + formats.extend(self._extract_mpd_formats( + video_url, video_id, mpd_id='dash', fatal=False)) + elif ext == 'mp3' or child.tag == 'AudioMediaFile': + formats.append({ + 'format_id': 'audio', + 'url': video_url, + 'vcodec': 'none', + }) else: proto = compat_urllib_parse_urlparse(video_url).scheme if not child.tag.startswith('HTTP') and proto != 'rtmp': @@ -89,6 +135,11 @@ class RuutuIE(InfoExtractor): }) extract_formats(video_xml.find('./Clip')) + + drm = xpath_text(video_xml, './Clip/DRM', default=None) + if not formats and drm: + raise ExtractorError('This video is DRM protected.', expected=True) + self._sort_formats(formats) return {