[peertube] Improve generic support (closes #16733)
[youtube-dl] / youtube_dl / extractor / peertube.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import compat_str
8 from ..utils import (
9     int_or_none,
10     parse_resolution,
11     try_get,
12     unified_timestamp,
13     urljoin,
14 )
15
16
17 class PeerTubeIE(InfoExtractor):
18     _INSTANCES_RE = r'''(?:
19                             # Taken from https://instances.joinpeertube.org/instances
20                             tube\.openalgeria\.org|
21                             peertube\.pointsecu\.fr|
22                             peertube\.nogafa\.org|
23                             peertube\.pl|
24                             megatube\.lilomoino\.fr|
25                             peertube\.tamanoir\.foucry\.net|
26                             peertube\.inapurna\.org|
27                             peertube\.netzspielplatz\.de|
28                             video\.deadsuperhero\.com|
29                             peertube\.devosi\.org|
30                             peertube\.1312\.media|
31                             tube\.worldofhauru\.xyz|
32                             tube\.bootlicker\.party|
33                             skeptikon\.fr|
34                             peertube\.geekshell\.fr|
35                             tube\.opportunis\.me|
36                             peertube\.peshane\.net|
37                             video\.blueline\.mg|
38                             tube\.homecomputing\.fr|
39                             videos\.cloudfrancois\.fr|
40                             peertube\.viviers-fibre\.net|
41                             tube\.ouahpiti\.info|
42                             video\.tedomum\.net|
43                             video\.g3l\.org|
44                             fontube\.fr|
45                             peertube\.gaialabs\.ch|
46                             peertube\.extremely\.online|
47                             peertube\.public-infrastructure\.eu|
48                             tube\.kher\.nl|
49                             peertube\.qtg\.fr|
50                             tube\.22decembre\.eu|
51                             facegirl\.me|
52                             video\.migennes\.net|
53                             janny\.moe|
54                             tube\.p2p\.legal|
55                             video\.atlanti\.se|
56                             troll\.tv|
57                             peertube\.geekael\.fr|
58                             vid\.leotindall\.com|
59                             video\.anormallostpod\.ovh|
60                             p-tube\.h3z\.jp|
61                             tube\.darfweb\.eu|
62                             videos\.iut-orsay\.fr|
63                             peertube\.solidev\.net|
64                             videos\.symphonie-of-code\.fr|
65                             testtube\.ortg\.de|
66                             videos\.cemea\.org|
67                             peertube\.gwendalavir\.eu|
68                             video\.passageenseine\.fr|
69                             videos\.festivalparminous\.org|
70                             peertube\.touhoppai\.moe|
71                             peertube\.duckdns\.org|
72                             sikke\.fi|
73                             peertube\.mastodon\.host|
74                             firedragonvideos\.com|
75                             vidz\.dou\.bet|
76                             peertube\.koehn\.com|
77                             peer\.hostux\.social|
78                             share\.tube|
79                             peertube\.walkingmountains\.fr|
80                             medias\.libox\.fr|
81                             peertube\.moe|
82                             peertube\.xyz|
83                             jp\.peertube\.network|
84                             videos\.benpro\.fr|
85                             tube\.otter\.sh|
86                             peertube\.angristan\.xyz|
87                             peertube\.parleur\.net|
88                             peer\.ecutsa\.fr|
89                             peertube\.heraut\.eu|
90                             peertube\.tifox\.fr|
91                             peertube\.maly\.io|
92                             vod\.mochi\.academy|
93                             exode\.me|
94                             coste\.video|
95                             tube\.aquilenet\.fr|
96                             peertube\.gegeweb\.eu|
97                             framatube\.org|
98                             thinkerview\.video|
99                             tube\.conferences-gesticulees\.net|
100                             peertube\.datagueule\.tv|
101                             video\.lqdn\.fr|
102                             meilleurtube\.delire\.party|
103                             tube\.mochi\.academy|
104                             peertube\.dav\.li|
105                             media\.zat\.im|
106                             pytu\.be|
107                             peertube\.valvin\.fr|
108                             peertube\.nsa\.ovh|
109                             video\.colibris-outilslibres\.org|
110                             video\.hispagatos\.org|
111                             tube\.svnet\.fr|
112                             peertube\.video|
113                             videos\.lecygnenoir\.info|
114                             peertube3\.cpy\.re|
115                             peertube2\.cpy\.re|
116                             videos\.tcit\.fr|
117                             peertube\.cpy\.re
118                         )'''
119     _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
120     _VALID_URL = r'''(?x)
121                     (?:
122                         peertube:(?P<host>[^:]+):|
123                         https?://(?P<host_2>%s)/(?:videos/(?:watch|embed)|api/v\d/videos)/
124                     )
125                     (?P<id>%s)
126                     ''' % (_INSTANCES_RE, _UUID_RE)
127     _TESTS = [{
128         'url': 'https://peertube.moe/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c',
129         'md5': '80f24ff364cc9d333529506a263e7feb',
130         'info_dict': {
131             'id': '2790feb0-8120-4e63-9af3-c943c69f5e6c',
132             'ext': 'mp4',
133             'title': 'wow',
134             'description': 'wow such video, so gif',
135             'thumbnail': r're:https?://.*\.(?:jpg|png)',
136             'timestamp': 1519297480,
137             'upload_date': '20180222',
138             'uploader': 'Luclu7',
139             'uploader_id': '7fc42640-efdb-4505-a45d-a15b1a5496f1',
140             'uploder_url': 'https://peertube.nsa.ovh/accounts/luclu7',
141             'license': 'Unknown',
142             'duration': 3,
143             'view_count': int,
144             'like_count': int,
145             'dislike_count': int,
146             'tags': list,
147             'categories': list,
148         }
149     }, {
150         'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
151         'only_matching': True,
152     }, {
153         # nsfw
154         'url': 'https://tube.22decembre.eu/videos/watch/9bb88cd3-9959-46d9-9ab9-33d2bb704c39',
155         'only_matching': True,
156     }, {
157         'url': 'https://tube.22decembre.eu/videos/embed/fed67262-6edb-4d1c-833b-daa9085c71d7',
158         'only_matching': True,
159     }, {
160         'url': 'https://tube.openalgeria.org/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8',
161         'only_matching': True,
162     }, {
163         'url': 'peertube:video.blender.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205',
164         'only_matching': True,
165     }]
166
167     @staticmethod
168     def _extract_peertube_url(webpage, source_url):
169         mobj = re.match(
170             r'https?://(?P<host>[^/]+)/videos/watch/(?P<id>%s)'
171             % PeerTubeIE._UUID_RE, source_url)
172         if mobj and any(p in webpage for p in (
173                 '<title>PeerTube<',
174                 'There will be other non JS-based clients to access PeerTube',
175                 '>We are sorry but it seems that PeerTube is not compatible with your web browser.<')):
176             return 'peertube:%s:%s' % mobj.group('host', 'id')
177
178     @staticmethod
179     def _extract_urls(webpage, source_url):
180         entries = re.findall(
181             r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//%s/videos/embed/%s)'''
182             % (PeerTubeIE._INSTANCES_RE, PeerTubeIE._UUID_RE), webpage)
183         if not entries:
184             peertube_url = PeerTubeIE._extract_peertube_url(webpage, source_url)
185             if peertube_url:
186                 entries = [peertube_url]
187         return entries
188
189     def _real_extract(self, url):
190         mobj = re.match(self._VALID_URL, url)
191         host = mobj.group('host') or mobj.group('host_2')
192         video_id = mobj.group('id')
193
194         video = self._download_json(
195             'https://%s/api/v1/videos/%s' % (host, video_id), video_id)
196
197         title = video['name']
198
199         formats = []
200         for file_ in video['files']:
201             if not isinstance(file_, dict):
202                 continue
203             file_url = file_.get('fileUrl')
204             if not file_url or not isinstance(file_url, compat_str):
205                 continue
206             file_size = int_or_none(file_.get('size'))
207             format_id = try_get(
208                 file_, lambda x: x['resolution']['label'], compat_str)
209             f = parse_resolution(format_id)
210             f.update({
211                 'url': file_url,
212                 'format_id': format_id,
213                 'filesize': file_size,
214             })
215             formats.append(f)
216         self._sort_formats(formats)
217
218         def account_data(field):
219             return try_get(video, lambda x: x['account'][field], compat_str)
220
221         category = try_get(video, lambda x: x['category']['label'], compat_str)
222         categories = [category] if category else None
223
224         nsfw = video.get('nsfw')
225         if nsfw is bool:
226             age_limit = 18 if nsfw else 0
227         else:
228             age_limit = None
229
230         return {
231             'id': video_id,
232             'title': title,
233             'description': video.get('description'),
234             'thumbnail': urljoin(url, video.get('thumbnailPath')),
235             'timestamp': unified_timestamp(video.get('publishedAt')),
236             'uploader': account_data('displayName'),
237             'uploader_id': account_data('uuid'),
238             'uploder_url': account_data('url'),
239             'license': try_get(
240                 video, lambda x: x['licence']['label'], compat_str),
241             'duration': int_or_none(video.get('duration')),
242             'view_count': int_or_none(video.get('views')),
243             'like_count': int_or_none(video.get('likes')),
244             'dislike_count': int_or_none(video.get('dislikes')),
245             'age_limit': age_limit,
246             'tags': try_get(video, lambda x: x['tags'], list),
247             'categories': categories,
248             'formats': formats,
249         }