a3a2915998dc1cc2fca8f5ccdf6cec6cac0d528b
[youtube-dl] / youtube_dl / extractor / fktv.py
1 from __future__ import unicode_literals
2
3 from .common import InfoExtractor
4 from ..utils import (
5     clean_html,
6     determine_ext,
7     js_to_json,
8 )
9
10
11 class FKTVIE(InfoExtractor):
12     IE_NAME = 'fernsehkritik.tv'
13     _VALID_URL = r'https?://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?'
14
15     _TEST = {
16         'url': 'http://fernsehkritik.tv/folge-1',
17         'md5': '21f0b0c99bce7d5b524eb1b17b1c6d79',
18         'info_dict': {
19             'id': '1',
20             'ext': 'mp4',
21             'title': 'Folge 1 vom 10. April 2007',
22             'thumbnail': 're:^https?://.*\.jpg$',
23         },
24     }
25
26     def _real_extract(self, url):
27         episode = self._match_id(url)
28
29         webpage = self._download_webpage(
30             'http://fernsehkritik.tv/folge-%s/play' % episode, episode)
31         title = clean_html(self._html_search_regex(
32             '<h3>([^<]+)</h3>', webpage, 'title'))
33         thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False)
34         sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)
35
36         formats = []
37         for source in sources:
38             furl = source.get('src')
39             if furl:
40                 formats.append({
41                     'url': furl,
42                     'format_id': determine_ext(furl),
43                 })
44         self._sort_formats(formats)
45
46         return {
47             'id': episode,
48             'title': title,
49             'formats': formats,
50             'thumbnail': thumbnail,
51         }