[sportbox] PEP 8
[youtube-dl] / youtube_dl / extractor / sportbox.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import js_to_json
8
9
10 class SportBoxEmbedIE(InfoExtractor):
11     _VALID_URL = r'https?://news\.sportbox\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)'
12     _TESTS = [{
13         'url': 'http://news.sportbox.ru/vdl/player/ci/211355',
14         'info_dict': {
15             'id': '211355',
16             'ext': 'mp4',
17             'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»',
18             'thumbnail': r're:^https?://.*\.jpg$',
19         },
20         'params': {
21             # m3u8 download
22             'skip_download': True,
23         },
24     }, {
25         'url': 'http://news.sportbox.ru/vdl/player?nid=370908&only_player=1&autostart=false&playeri=2&height=340&width=580',
26         'only_matching': True,
27     }]
28
29     @staticmethod
30     def _extract_urls(webpage):
31         return re.findall(
32             r'<iframe[^>]+src="(https?://news\.sportbox\.ru/vdl/player[^"]+)"',
33             webpage)
34
35     def _real_extract(self, url):
36         video_id = self._match_id(url)
37
38         webpage = self._download_webpage(url, video_id)
39
40         formats = []
41
42         def cleanup_js(code):
43             # desktop_advert_config contains complex Javascripts and we don't need it
44             return js_to_json(re.sub(r'desktop_advert_config.*', '', code))
45
46         jwplayer_data = self._parse_json(self._search_regex(
47             r'(?s)player\.setup\(({.+?})\);', webpage, 'jwplayer settings'), video_id,
48             transform_source=cleanup_js)
49
50         hls_url = jwplayer_data.get('hls_url')
51         if hls_url:
52             formats.extend(self._extract_m3u8_formats(
53                 hls_url, video_id, ext='mp4', m3u8_id='hls'))
54
55         rtsp_url = jwplayer_data.get('rtsp_url')
56         if rtsp_url:
57             formats.append({
58                 'url': rtsp_url,
59                 'format_id': 'rtsp',
60             })
61
62         self._sort_formats(formats)
63
64         title = jwplayer_data['node_title']
65         thumbnail = jwplayer_data.get('image_url')
66
67         return {
68             'id': video_id,
69             'title': title,
70             'thumbnail': thumbnail,
71             'formats': formats,
72         }