[youtube] fix extraction for embed restricted live streams(fixes #16433)
[youtube-dl] / youtube_dl / extractor / picarto.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import time
5
6 from .common import InfoExtractor
7 from ..compat import compat_str
8 from ..utils import (
9     ExtractorError,
10     js_to_json,
11     try_get,
12     update_url_query,
13     urlencode_postdata,
14 )
15
16
17 class PicartoIE(InfoExtractor):
18     _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
19     _TEST = {
20         'url': 'https://picarto.tv/Setz',
21         'info_dict': {
22             'id': 'Setz',
23             'ext': 'mp4',
24             'title': 're:^Setz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
25             'timestamp': int,
26             'is_live': True
27         },
28         'skip': 'Stream is offline',
29     }
30
31     @classmethod
32     def suitable(cls, url):
33         return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
34
35     def _real_extract(self, url):
36         channel_id = self._match_id(url)
37         stream_page = self._download_webpage(url, channel_id)
38
39         if '>This channel does not exist' in stream_page:
40             raise ExtractorError(
41                 'Channel %s does not exist' % channel_id, expected=True)
42
43         player = self._parse_json(
44             self._search_regex(
45                 r'(?s)playerSettings\[\d+\]\s*=\s*(\{.+?\}\s*\n)', stream_page,
46                 'player settings'),
47             channel_id, transform_source=js_to_json)
48
49         if player.get('online') is False:
50             raise ExtractorError('Stream is offline', expected=True)
51
52         cdn_data = self._download_json(
53             'https://picarto.tv/process/channel', channel_id,
54             data=urlencode_postdata({'loadbalancinginfo': channel_id}),
55             note='Downloading load balancing info')
56
57         def get_event(key):
58             return try_get(player, lambda x: x['event'][key], compat_str) or ''
59
60         params = {
61             'token': player.get('token') or '',
62             'ticket': get_event('ticket'),
63             'con': int(time.time() * 1000),
64             'type': get_event('ticket'),
65             'scope': get_event('scope'),
66         }
67
68         prefered_edge = cdn_data.get('preferedEdge')
69         default_tech = player.get('defaultTech')
70
71         formats = []
72
73         for edge in cdn_data['edges']:
74             edge_ep = edge.get('ep')
75             if not edge_ep or not isinstance(edge_ep, compat_str):
76                 continue
77             edge_id = edge.get('id')
78             for tech in cdn_data['techs']:
79                 tech_label = tech.get('label')
80                 tech_type = tech.get('type')
81                 preference = 0
82                 if edge_id == prefered_edge:
83                     preference += 1
84                 if tech_type == default_tech:
85                     preference += 1
86                 format_id = []
87                 if edge_id:
88                     format_id.append(edge_id)
89                 if tech_type == 'application/x-mpegurl' or tech_label == 'HLS':
90                     format_id.append('hls')
91                     formats.extend(self._extract_m3u8_formats(
92                         update_url_query(
93                             'https://%s/hls/%s/index.m3u8'
94                             % (edge_ep, channel_id), params),
95                         channel_id, 'mp4', preference=preference,
96                         m3u8_id='-'.join(format_id), fatal=False))
97                     continue
98                 elif tech_type == 'video/mp4' or tech_label == 'MP4':
99                     format_id.append('mp4')
100                     formats.append({
101                         'url': update_url_query(
102                             'https://%s/mp4/%s.mp4' % (edge_ep, channel_id),
103                             params),
104                         'format_id': '-'.join(format_id),
105                         'preference': preference,
106                     })
107                 else:
108                     # rtmp format does not seem to work
109                     continue
110         self._sort_formats(formats)
111
112         mature = player.get('mature')
113         if mature is None:
114             age_limit = None
115         else:
116             age_limit = 18 if mature is True else 0
117
118         return {
119             'id': channel_id,
120             'title': self._live_title(channel_id),
121             'is_live': True,
122             'thumbnail': player.get('vodThumb'),
123             'age_limit': age_limit,
124             'formats': formats,
125         }
126
127
128 class PicartoVodIE(InfoExtractor):
129     _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P<id>[^/?#&]+)'
130     _TESTS = [{
131         'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv',
132         'md5': '3ab45ba4352c52ee841a28fb73f2d9ca',
133         'info_dict': {
134             'id': 'ArtofZod_2017.12.12.00.13.23.flv',
135             'ext': 'mp4',
136             'title': 'ArtofZod_2017.12.12.00.13.23.flv',
137             'thumbnail': r're:^https?://.*\.jpg'
138         },
139     }, {
140         'url': 'https://picarto.tv/videopopout/Plague',
141         'only_matching': True,
142     }]
143
144     def _real_extract(self, url):
145         video_id = self._match_id(url)
146
147         webpage = self._download_webpage(url, video_id)
148
149         vod_info = self._parse_json(
150             self._search_regex(
151                 r'(?s)#vod-player["\']\s*,\s*(\{.+?\})\s*\)', webpage,
152                 video_id),
153             video_id, transform_source=js_to_json)
154
155         formats = self._extract_m3u8_formats(
156             vod_info['vod'], video_id, 'mp4', entry_protocol='m3u8_native',
157             m3u8_id='hls')
158         self._sort_formats(formats)
159
160         return {
161             'id': video_id,
162             'title': video_id,
163             'thumbnail': vod_info.get('vodThumb'),
164             'formats': formats,
165         }