[picarto] Use API and add token support
[youtube-dl] / youtube_dl / extractor / picarto.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import time
5
6 from .common import InfoExtractor
7 from ..compat import compat_str
8 from ..utils import (
9     ExtractorError,
10     js_to_json,
11     update_url_query,
12     urlencode_postdata,
13 )
14
15
16 class PicartoIE(InfoExtractor):
17     _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)(?:/(?P<token>[a-zA-Z0-9]+))?'
18     _TEST = {
19         'url': 'https://picarto.tv/Setz',
20         'info_dict': {
21             'id': 'Setz',
22             'ext': 'mp4',
23             'title': 're:^Setz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
24             'timestamp': int,
25             'is_live': True
26         },
27         'skip': 'Stream is offline',
28     }
29
30     @classmethod
31     def suitable(cls, url):
32         return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
33
34     def _real_extract(self, url):
35         channel_id = self._match_id(url)
36         metadata = self._download_json(
37             'https://api.picarto.tv/v1/channel/name/' + channel_id,
38             channel_id)
39
40         if metadata.get('online') is False:
41             raise ExtractorError('Stream is offline', expected=True)
42
43         cdn_data = self._download_json(
44             'https://picarto.tv/process/channel', channel_id,
45             data=urlencode_postdata({'loadbalancinginfo': channel_id}),
46             note='Downloading load balancing info')
47
48         token = self._VALID_URL_RE.match(url).group('token') or 'public'
49         params = {
50             'con': int(time.time() * 1000),
51             'token': token,
52         }
53
54         prefered_edge = cdn_data.get('preferedEdge')
55         formats = []
56
57         for edge in cdn_data['edges']:
58             edge_ep = edge.get('ep')
59             if not edge_ep or not isinstance(edge_ep, compat_str):
60                 continue
61             edge_id = edge.get('id')
62             for tech in cdn_data['techs']:
63                 tech_label = tech.get('label')
64                 tech_type = tech.get('type')
65                 preference = 0
66                 if edge_id == prefered_edge:
67                     preference += 1
68                 format_id = []
69                 if edge_id:
70                     format_id.append(edge_id)
71                 if tech_type == 'application/x-mpegurl' or tech_label == 'HLS':
72                     format_id.append('hls')
73                     formats.extend(self._extract_m3u8_formats(
74                         update_url_query(
75                             'https://%s/hls/%s/index.m3u8'
76                             % (edge_ep, channel_id), params),
77                         channel_id, 'mp4', preference=preference,
78                         m3u8_id='-'.join(format_id), fatal=False))
79                     continue
80                 elif tech_type == 'video/mp4' or tech_label == 'MP4':
81                     format_id.append('mp4')
82                     formats.append({
83                         'url': update_url_query(
84                             'https://%s/mp4/%s.mp4' % (edge_ep, channel_id),
85                             params),
86                         'format_id': '-'.join(format_id),
87                         'preference': preference,
88                     })
89                 else:
90                     # rtmp format does not seem to work
91                     continue
92         self._sort_formats(formats)
93
94         mature = metadata.get('adult')
95         if mature is None:
96             age_limit = None
97         else:
98             age_limit = 18 if mature is True else 0
99
100         return {
101             'id': channel_id,
102             'title': self._live_title(channel_id),
103             'is_live': True,
104             'thumbnail': metadata.get('thumbnails', {}).get('web'),
105             'age_limit': age_limit,
106             'formats': formats,
107         }
108
109
110 class PicartoVodIE(InfoExtractor):
111     _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P<id>[^/?#&]+)'
112     _TESTS = [{
113         'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv',
114         'md5': '3ab45ba4352c52ee841a28fb73f2d9ca',
115         'info_dict': {
116             'id': 'ArtofZod_2017.12.12.00.13.23.flv',
117             'ext': 'mp4',
118             'title': 'ArtofZod_2017.12.12.00.13.23.flv',
119             'thumbnail': r're:^https?://.*\.jpg'
120         },
121     }, {
122         'url': 'https://picarto.tv/videopopout/Plague',
123         'only_matching': True,
124     }]
125
126     def _real_extract(self, url):
127         video_id = self._match_id(url)
128
129         webpage = self._download_webpage(url, video_id)
130
131         vod_info = self._parse_json(
132             self._search_regex(
133                 r'(?s)#vod-player["\']\s*,\s*(\{.+?\})\s*\)', webpage,
134                 video_id),
135             video_id, transform_source=js_to_json)
136
137         formats = self._extract_m3u8_formats(
138             vod_info['vod'], video_id, 'mp4', entry_protocol='m3u8_native',
139             m3u8_id='hls')
140         self._sort_formats(formats)
141
142         return {
143             'id': video_id,
144             'title': video_id,
145             'thumbnail': vod_info.get('vodThumb'),
146             'formats': formats,
147         }