[YoutubeDL] write raw subtitle files
[youtube-dl] / youtube_dl / extractor / clipfish.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6     int_or_none,
7     unified_strdate,
8 )
9
10
11 class ClipfishIE(InfoExtractor):
12     _VALID_URL = r'https?://(?:www\.)?clipfish\.de/(?:[^/]+/)+video/(?P<id>[0-9]+)'
13     _TEST = {
14         'url': 'http://www.clipfish.de/special/ugly-americans/video/4343170/s01-e01-ugly-americans-date-in-der-hoelle/',
15         'md5': 'b9a5dc46294154c1193e2d10e0c95693',
16         'info_dict': {
17             'id': '4343170',
18             'ext': 'mp4',
19             'title': 'S01 E01 - Ugly Americans - Date in der Hölle',
20             'description': 'Mark Lilly arbeitet im Sozialdienst der Stadt New York und soll Immigranten bei ihrer Einbürgerung in die USA zur Seite stehen.',
21             'upload_date': '20161005',
22             'duration': 1291,
23             'view_count': int,
24         }
25     }
26
27     def _real_extract(self, url):
28         video_id = self._match_id(url)
29
30         video_info = self._download_json(
31             'http://www.clipfish.de/devapi/id/%s?format=json&apikey=hbbtv' % video_id,
32             video_id)['items'][0]
33
34         formats = []
35
36         m3u8_url = video_info.get('media_videourl_hls')
37         if m3u8_url:
38             formats.append({
39                 'url': m3u8_url.replace('de.hls.fra.clipfish.de', 'hls.fra.clipfish.de'),
40                 'ext': 'mp4',
41                 'format_id': 'hls',
42             })
43
44         mp4_url = video_info.get('media_videourl')
45         if mp4_url:
46             formats.append({
47                 'url': mp4_url,
48                 'format_id': 'mp4',
49                 'width': int_or_none(video_info.get('width')),
50                 'height': int_or_none(video_info.get('height')),
51                 'tbr': int_or_none(video_info.get('bitrate')),
52             })
53
54         descr = video_info.get('descr')
55         if descr:
56             descr = descr.strip()
57
58         return {
59             'id': video_id,
60             'title': video_info['title'],
61             'description': descr,
62             'formats': formats,
63             'thumbnail': video_info.get('media_content_thumbnail_large') or video_info.get('media_thumbnail'),
64             'duration': int_or_none(video_info.get('media_length')),
65             'upload_date': unified_strdate(video_info.get('pubDate')),
66             'view_count': int_or_none(video_info.get('media_views'))
67         }