Merge remote-tracking branch 'upstream/master' into bliptv
[youtube-dl] / youtube_dl / extractor / rtbf.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6     int_or_none,
7     unescapeHTML,
8 )
9
10
11 class RTBFIE(InfoExtractor):
12     _VALID_URL = r'https?://(?:www\.)?rtbf\.be/(?:video/[^?]+\?.*\bid=|ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=)(?P<id>\d+)'
13     _TESTS = [{
14         'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',
15         'md5': '799f334ddf2c0a582ba80c44655be570',
16         'info_dict': {
17             'id': '1921274',
18             'ext': 'mp4',
19             'title': 'Les Diables au coeur (épisode 2)',
20             'duration': 3099,
21         }
22     }, {
23         # geo restricted
24         'url': 'http://www.rtbf.be/ouftivi/heros/detail_scooby-doo-mysteres-associes?id=1097&videoId=2057442',
25         'only_matching': True,
26     }, {
27         'url': 'http://www.rtbf.be/ouftivi/niouzz?videoId=2055858',
28         'only_matching': True,
29     }]
30
31     _QUALITIES = [
32         ('mobile', 'mobile'),
33         ('web', 'SD'),
34         ('url', 'MD'),
35         ('high', 'HD'),
36     ]
37
38     def _real_extract(self, url):
39         video_id = self._match_id(url)
40
41         webpage = self._download_webpage(
42             'http://www.rtbf.be/video/embed?id=%s' % video_id, video_id)
43
44         data = self._parse_json(
45             unescapeHTML(self._search_regex(
46                 r'data-media="([^"]+)"', webpage, 'data video')),
47             video_id)
48
49         if data.get('provider').lower() == 'youtube':
50             video_url = data.get('downloadUrl') or data.get('url')
51             return self.url_result(video_url, 'Youtube')
52         formats = []
53         for key, format_id in self._QUALITIES:
54             format_url = data['sources'].get(key)
55             if format_url:
56                 formats.append({
57                     'format_id': format_id,
58                     'url': format_url,
59                 })
60
61         return {
62             'id': video_id,
63             'formats': formats,
64             'title': data['title'],
65             'description': data.get('description') or data.get('subtitle'),
66             'thumbnail': data.get('thumbnail'),
67             'duration': data.get('duration') or data.get('realDuration'),
68             'timestamp': int_or_none(data.get('created')),
69             'view_count': int_or_none(data.get('viewCount')),
70         }