Merge branch 'soompi' of https://github.com/ping/youtube-dl into ping-soompi
[youtube-dl] / youtube_dl / extractor / rtbf.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6     int_or_none,
7     unescapeHTML,
8 )
9
10
11 class RTBFIE(InfoExtractor):
12     _VALID_URL = r'https?://www.rtbf.be/video/[^\?]+\?id=(?P<id>\d+)'
13     _TEST = {
14         'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',
15         'md5': '799f334ddf2c0a582ba80c44655be570',
16         'info_dict': {
17             'id': '1921274',
18             'ext': 'mp4',
19             'title': 'Les Diables au coeur (épisode 2)',
20             'duration': 3099,
21         }
22     }
23
24     def _real_extract(self, url):
25         video_id = self._match_id(url)
26
27         webpage = self._download_webpage(
28             'http://www.rtbf.be/video/embed?id=%s' % video_id, video_id)
29
30         data = self._parse_json(
31             unescapeHTML(self._search_regex(
32                 r'data-video="([^"]+)"', webpage, 'data video')),
33             video_id)
34
35         video_url = data.get('downloadUrl') or data.get('url')
36
37         if data.get('provider').lower() == 'youtube':
38             return self.url_result(video_url, 'Youtube')
39
40         return {
41             'id': video_id,
42             'url': video_url,
43             'title': data['title'],
44             'description': data.get('description') or data.get('subtitle'),
45             'thumbnail': data.get('thumbnail'),
46             'duration': data.get('duration') or data.get('realDuration'),
47             'timestamp': int_or_none(data.get('created')),
48             'view_count': int_or_none(data.get('viewCount')),
49         }