[picarto] Add extractor
[youtube-dl] / youtube_dl / extractor / picarto.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import ExtractorError, js_to_json, urlencode_postdata
6
7
8 class PicartoIE(InfoExtractor):
9     _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)[^/]*$'
10     _TEST = {
11         'url': 'https://picarto.tv/Setz',
12         'info_dict': {
13             'id': 'Setz',
14             'ext': 'mp4',
15             'title': 're:^Setz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
16             'timestamp': int,
17             'is_live': True
18         },
19         'params': {
20             'skip_download': True
21         }
22     }
23
24     def _real_extract(self, url):
25         channel_id = self._match_id(url)
26         stream_page = self._download_webpage(url, channel_id)
27
28         if 'This channel does not exist.' in stream_page:
29             raise ExtractorError('Channel does not exist', expected=True)
30
31         player_settings_js = self._html_search_regex(
32             r'(?s)playerSettings\[1\]\s*=\s*(\{.+?\}\n)', stream_page, 'player-settings')
33         player_settings = self._parse_json(player_settings_js, channel_id,
34             transform_source=js_to_json)
35         if not player_settings.get('online'):
36             raise ExtractorError('Stream is offline', expected=True)
37
38         cdn_data = self._download_json('https://picarto.tv/process/channel', channel_id,
39             data=urlencode_postdata({'loadbalancinginfo': channel_id}),
40             note='Fetching load balancer info')
41         edge = [edge['ep'] for edge in cdn_data['edges'] if edge['id'] == cdn_data['preferedEdge']][0]
42
43         formats = self._extract_m3u8_formats('https://%s/hls/%s/index.m3u8' % (edge, channel_id),
44             channel_id, 'mp4')
45         formats.append({'url': 'https://%s/mp4/%s.mp4' % (edge, channel_id)})
46         self._sort_formats(formats)
47
48         return {
49             'id': channel_id,
50             'formats': formats,
51             'ext': 'mp4',
52             'title': self._live_title(channel_id),
53             'is_live': True,
54             'thumbnail': player_settings.get('vodThumb'),
55             'age_limit': 18 if player_settings.get('mature') else None,
56         }
57
58
59 class PicartoVodIE(InfoExtractor):
60     _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P<id>[a-zA-Z0-9_\-\.]+).flv'
61     _TEST = {
62         'url': 'https://picarto.tv/videopopout/Carrot_2018.01.11.07.55.12.flv',
63         'md5': '80765b67813053ff31d4df2bd5e900ce',
64         'info_dict': {
65             'id': 'Carrot_2018.01.11.07.55.12',
66             'ext': 'mp4',
67             'title': 'Carrot_2018.01.11.07.55.12',
68             'thumbnail': r're:^https?://.*\.jpg$'
69         }
70     }
71
72     def _real_extract(self, url):
73         video_id = self._match_id(url)
74         webpage = self._download_webpage(url, video_id)
75
76         vod_info_js = self._html_search_regex(r'(?s)"#vod-player",\s*(\{.+?\})\)',
77             webpage, video_id)
78         vod_info = self._parse_json(vod_info_js, video_id, transform_source=js_to_json)
79
80         return {
81             'id': video_id,
82             'title': video_id,
83             'ext': 'mp4',
84             'protocol': 'm3u8',
85             'url': vod_info['vod'],
86             'thumbnail': vod_info.get('vodThumb'),
87         }