[rts] Add thumbnail support
[youtube-dl] / youtube_dl / extractor / rts.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8     int_or_none,
9     parse_duration,
10     parse_iso8601,
11     unescapeHTML,
12 )
13
14
15 class RTSIE(InfoExtractor):
16     IE_DESC = 'RTS.ch'
17     _VALID_URL = r'^https?://(?:www\.)?rts\.ch/archives/tv/[^/]+/(?P<id>[0-9]+)-.*?\.html'
18
19     _TEST = {
20         'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
21         'md5': '753b877968ad8afaeddccc374d4256a5',
22         'info_dict': {
23             'id': '3449373',
24             'ext': 'mp4',
25             'duration': 1488,
26             'title': 'Les Enfants Terribles',
27             'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.',
28             'uploader': 'Divers',
29             'upload_date': '19680921',
30             'timestamp': -40280400,
31             'thumbnail': 're:^https?://.*\.image'
32         },
33     }
34
35     def _real_extract(self, url):
36         m = re.match(self._VALID_URL, url)
37         video_id = m.group('id')
38
39         all_info = self._download_json(
40             'http://www.rts.ch/a/%s.html?f=json/article' % video_id, video_id)
41         info = all_info['video']['JSONinfo']
42
43         upload_timestamp = parse_iso8601(info.get('broadcast_date'))
44         duration = parse_duration(info.get('duration'))
45         thumbnail = unescapeHTML(info.get('preview_image_url'))
46         formats = [{
47             'format_id': fid,
48             'url': furl,
49             'tbr': int_or_none(self._search_regex(
50                 r'-([0-9]+)k\.', furl, 'bitrate', default=None)),
51         } for fid, furl in info['streams'].items()]
52         self._sort_formats(formats)
53
54         return {
55             'id': video_id,
56             'formats': formats,
57             'title': info['title'],
58             'description': info.get('intro'),
59             'duration': duration,
60             'uploader': info.get('programName'),
61             'timestamp': upload_timestamp,
62             'thumbnail': thumbnail,
63         }