X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Frtbf.py;h=e42b319a3e224aa6b078cad7756e5c44b7f620d8;hb=9f0ee2a3883ec6f6fdccba90085cb925aaa2f617;hp=b70c36eaa47fb46b26c9c612cb61eddf34886acc;hpb=201e3c99b9f02afbee47512ce4ea2474d22f81d6;p=youtube-dl diff --git a/youtube_dl/extractor/rtbf.py b/youtube_dl/extractor/rtbf.py index b70c36eaa..e42b319a3 100644 --- a/youtube_dl/extractor/rtbf.py +++ b/youtube_dl/extractor/rtbf.py @@ -1,15 +1,16 @@ # coding: utf-8 from __future__ import unicode_literals -import re -import json - from .common import InfoExtractor -from ..utils import clean_html +from ..utils import ( + int_or_none, + unescapeHTML, +) + -class RTBFVideoIE(InfoExtractor): - _VALID_URL = r'https?://www.rtbf.be/video/(?P[^?]+)\?.*id=(?P<id>[0-9]+)' - _TEST = { +class RTBFIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rtbf\.be/(?:video/[^?]+\?.*\bid=|ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=)(?P<id>\d+)' + _TESTS = [{ 'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274', 'md5': '799f334ddf2c0a582ba80c44655be570', 'info_dict': { @@ -18,32 +19,52 @@ class RTBFVideoIE(InfoExtractor): 'title': 'Les Diables au coeur (épisode 2)', 'duration': 3099, } - } + }, { + # geo restricted + 'url': 'http://www.rtbf.be/ouftivi/heros/detail_scooby-doo-mysteres-associes?id=1097&videoId=2057442', + 'only_matching': True, + }, { + 'url': 'http://www.rtbf.be/ouftivi/niouzz?videoId=2055858', + 'only_matching': True, + }] + + _QUALITIES = [ + ('mobile', 'mobile'), + ('web', 'SD'), + ('url', 'MD'), + ('high', 'HD'), + ] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - # TODO more code goes here, for example ... - webpage = self._download_webpage(url, video_id) - title = self._html_search_regex( - r'<meta property="og:description" content="([^"]*)"', - webpage, 'title', mobj.group('title')) - print title - - iframe_url = self._html_search_regex(r'<iframe [^>]*src="([^"]+)"', - webpage, 'iframe') - iframe = self._download_webpage(iframe_url, video_id) - - data_video_idx = iframe.find('data-video') - next_data_idx = iframe.find('data-', data_video_idx + 1) - json_data_start = data_video_idx + len('data-video=') + 1 - json_data_end = next_data_idx - 2 - video_data = json.loads(clean_html(iframe[json_data_start:json_data_end])) + video_id = self._match_id(url) + + webpage = self._download_webpage( + 'http://www.rtbf.be/video/embed?id=%s' % video_id, video_id) + + data = self._parse_json( + unescapeHTML(self._search_regex( + r'data-media="([^"]+)"', webpage, 'data video')), + video_id) + + if data.get('provider').lower() == 'youtube': + video_url = data.get('downloadUrl') or data.get('url') + return self.url_result(video_url, 'Youtube') + formats = [] + for key, format_id in self._QUALITIES: + format_url = data['sources'].get(key) + if format_url: + formats.append({ + 'format_id': format_id, + 'url': format_url, + }) return { 'id': video_id, - 'title': title, - 'url': video_data['data']['downloadUrl'], - 'duration': video_data['data']['duration'], + 'formats': formats, + 'title': data['title'], + 'description': data.get('description') or data.get('subtitle'), + 'thumbnail': data.get('thumbnail'), + 'duration': data.get('duration') or data.get('realDuration'), + 'timestamp': int_or_none(data.get('created')), + 'view_count': int_or_none(data.get('viewCount')), }