X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Frts.py;h=12639f08bbc24b2c520b5d93a36b61dbb5e7d831;hb=11bed5827dace09b5483b159476ce9f8c29d6078;hp=5e84c109802e34ce8f57496ee3b7e2cd409c0788;hpb=121c09c7be1ac2944f3432122104c1952bfd1f04;p=youtube-dl diff --git a/youtube_dl/extractor/rts.py b/youtube_dl/extractor/rts.py index 5e84c1098..12639f08b 100644 --- a/youtube_dl/extractor/rts.py +++ b/youtube_dl/extractor/rts.py @@ -6,18 +6,29 @@ import re from .common import InfoExtractor from ..compat import ( compat_str, + compat_urllib_parse_urlparse, ) from ..utils import ( int_or_none, parse_duration, parse_iso8601, unescapeHTML, + xpath_text, ) class RTSIE(InfoExtractor): IE_DESC = 'RTS.ch' - _VALID_URL = r'https?://(?:www\.)?rts\.ch/(?:(?:[^/]+/){2,}(?P[0-9]+)-(?P.+?)\.html|play/tv/[^/]+/video/(?P.+?)\?id=(?P[0-9]+))' + _VALID_URL = r'''(?x) + (?: + rts:(?P\d+)| + https?:// + (?:www\.)?rts\.ch/ + (?: + (?:[^/]+/){2,}(?P[0-9]+)-(?P.+?)\.html| + play/tv/[^/]+/video/(?P.+?)\?id=(?P[0-9]+) + ) + )''' _TESTS = [ { @@ -120,6 +131,15 @@ class RTSIE(InfoExtractor): 'view_count': int, }, }, + { + # article with videos on rhs + 'url': 'http://www.rts.ch/sport/hockey/6693917-hockey-davos-decroche-son-31e-titre-de-champion-de-suisse.html', + 'info_dict': { + 'id': '6693917', + 'title': 'Hockey: Davos décroche son 31e titre de champion de Suisse', + }, + 'playlist_mincount': 5, + }, { 'url': 'http://www.rts.ch/play/tv/le-19h30/video/le-chantier-du-nouveau-parlement-vaudois-a-permis-une-trouvaille-historique?id=6348280', 'only_matching': True, @@ -128,7 +148,7 @@ class RTSIE(InfoExtractor): def _real_extract(self, url): m = re.match(self._VALID_URL, url) - video_id = m.group('id') or m.group('id_new') + video_id = m.group('rts_id') or m.group('id') or m.group('id_new') display_id = m.group('display_id') or m.group('display_id_new') def download_json(internal_id): @@ -141,6 +161,15 @@ class RTSIE(InfoExtractor): # video_id extracted out of URL is not always a real id if 'video' not in all_info and 'audio' not in all_info: page = self._download_webpage(url, display_id) + + # article with videos on rhs + videos = re.findall( + r']+class="content-item"[^>]*>\s*]+data-video-urn="urn:rts:video:(\d+)"', + page) + if videos: + entries = [self.url_result('rts:%s' % video_urn, 'RTS') for video_urn in videos] + return self.playlist_result(entries, video_id, self._og_search_title(page)) + internal_id = self._html_search_regex( r'<(?:video|audio) data-id="([0-9]+)"', page, 'internal video id') @@ -159,11 +188,27 @@ class RTSIE(InfoExtractor): return int_or_none(self._search_regex( r'-([0-9]+)k\.', url, 'bitrate', default=None)) - formats = [{ - 'format_id': fid, - 'url': furl, - 'tbr': extract_bitrate(furl), - } for fid, furl in info['streams'].items()] + formats = [] + for format_id, format_url in info['streams'].items(): + if format_url.endswith('.f4m'): + token = self._download_xml( + 'http://tp.srgssr.ch/token/akahd.xml?stream=%s/*' % compat_urllib_parse_urlparse(format_url).path, + video_id, 'Downloading %s token' % format_id) + auth_params = xpath_text(token, './/authparams', 'auth params') + if not auth_params: + continue + formats.extend(self._extract_f4m_formats( + '%s?%s&hdcore=3.4.0&plugin=aasp-3.4.0.132.66' % (format_url, auth_params), + video_id, f4m_id=format_id)) + elif format_url.endswith('.m3u8'): + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', m3u8_id=format_id)) + else: + formats.append({ + 'format_id': format_id, + 'url': format_url, + 'tbr': extract_bitrate(format_url), + }) if 'media' in info: formats.extend([{ @@ -172,6 +217,7 @@ class RTSIE(InfoExtractor): 'tbr': media['rate'] or extract_bitrate(media['url']), } for media in info['media'] if media.get('rate')]) + self._check_formats(formats, video_id) self._sort_formats(formats) return {