[vgtv] Add new extractor
[youtube-dl] / youtube_dl / extractor / rtbf.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5 import json
6
7 from .common import InfoExtractor
8
9
10 class RTBFIE(InfoExtractor):
11     _VALID_URL = r'https?://www.rtbf.be/video/[^\?]+\?id=(?P<id>\d+)'
12     _TEST = {
13         'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',
14         'md5': '799f334ddf2c0a582ba80c44655be570',
15         'info_dict': {
16             'id': '1921274',
17             'ext': 'mp4',
18             'title': 'Les Diables au coeur (épisode 2)',
19             'description': 'Football - Diables Rouges',
20             'duration': 3099,
21             'timestamp': 1398456336,
22             'upload_date': '20140425',
23         }
24     }
25
26     def _real_extract(self, url):
27         mobj = re.match(self._VALID_URL, url)
28         video_id = mobj.group('id')
29
30         page = self._download_webpage('https://www.rtbf.be/video/embed?id=%s' % video_id, video_id)
31
32         data = json.loads(self._html_search_regex(
33             r'<div class="js-player-embed" data-video="([^"]+)"', page, 'data video'))['data']
34
35         video_url = data.get('downloadUrl') or data.get('url')
36
37         if data['provider'].lower() == 'youtube':
38             return self.url_result(video_url, 'Youtube')
39
40         return {
41             'id': video_id,
42             'url': video_url,
43             'title': data['title'],
44             'description': data.get('description') or data.get('subtitle'),
45             'thumbnail': data['thumbnail']['large'],
46             'duration': data.get('duration') or data.get('realDuration'),
47             'timestamp': data['created'],
48             'view_count': data['viewCount'],
49         }