]> git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/extractor/helsinki.py
[teachable] Extract chapter metadata (closes #24421)
[youtube-dl] / youtube_dl / extractor / helsinki.py
1 # coding: utf-8
2
3 from __future__ import unicode_literals
4
5 from .common import InfoExtractor
6 from ..utils import js_to_json
7
8
9 class HelsinkiIE(InfoExtractor):
10     IE_DESC = 'helsinki.fi'
11     _VALID_URL = r'https?://video\.helsinki\.fi/Arkisto/flash\.php\?id=(?P<id>\d+)'
12     _TEST = {
13         'url': 'http://video.helsinki.fi/Arkisto/flash.php?id=20258',
14         'info_dict': {
15             'id': '20258',
16             'ext': 'mp4',
17             'title': 'Tietotekniikkafoorumi-iltapäivä',
18             'description': 'md5:f5c904224d43c133225130fe156a5ee0',
19         },
20         'params': {
21             'skip_download': True,  # RTMP
22         }
23     }
24
25     def _real_extract(self, url):
26         video_id = self._match_id(url)
27         webpage = self._download_webpage(url, video_id)
28
29         params = self._parse_json(self._html_search_regex(
30             r'(?s)jwplayer\("player"\).setup\((\{.*?\})\);',
31             webpage, 'player code'), video_id, transform_source=js_to_json)
32         formats = [{
33             'url': s['file'],
34             'ext': 'mp4',
35         } for s in params['sources']]
36         self._sort_formats(formats)
37
38         return {
39             'id': video_id,
40             'title': self._og_search_title(webpage).replace('Video: ', ''),
41             'description': self._og_search_description(webpage),
42             'formats': formats,
43         }