[peertube] Add extractor (closes #16301, closes #16329)
[youtube-dl] / youtube_dl / extractor / peertube.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..compat import compat_str
6 from ..utils import (
7     int_or_none,
8     parse_resolution,
9     try_get,
10     unified_timestamp,
11     urljoin,
12 )
13
14
15 class PeerTubeIE(InfoExtractor):
16     _VALID_URL = r'''(?x)
17                     https?://
18                         (?:
19                             # Taken from https://instances.joinpeertube.org/instances
20                             tube\.openalgeria\.org|
21                             peertube\.pointsecu\.fr|
22                             peertube\.nogafa\.org|
23                             peertube\.pl|
24                             megatube\.lilomoino\.fr|
25                             peertube\.tamanoir\.foucry\.net|
26                             peertube\.inapurna\.org|
27                             peertube\.netzspielplatz\.de|
28                             video\.deadsuperhero\.com|
29                             peertube\.devosi\.org|
30                             peertube\.1312\.media|
31                             tube\.worldofhauru\.xyz|
32                             tube\.bootlicker\.party|
33                             skeptikon\.fr|
34                             peertube\.geekshell\.fr|
35                             tube\.opportunis\.me|
36                             peertube\.peshane\.net|
37                             video\.blueline\.mg|
38                             tube\.homecomputing\.fr|
39                             videos\.cloudfrancois\.fr|
40                             peertube\.viviers-fibre\.net|
41                             tube\.ouahpiti\.info|
42                             video\.tedomum\.net|
43                             video\.g3l\.org|
44                             fontube\.fr|
45                             peertube\.gaialabs\.ch|
46                             peertube\.extremely\.online|
47                             peertube\.public-infrastructure\.eu|
48                             tube\.kher\.nl|
49                             peertube\.qtg\.fr|
50                             tube\.22decembre\.eu|
51                             facegirl\.me|
52                             video\.migennes\.net|
53                             janny\.moe|
54                             tube\.p2p\.legal|
55                             video\.atlanti\.se|
56                             troll\.tv|
57                             peertube\.geekael\.fr|
58                             vid\.leotindall\.com|
59                             video\.anormallostpod\.ovh|
60                             p-tube\.h3z\.jp|
61                             tube\.darfweb\.eu|
62                             videos\.iut-orsay\.fr|
63                             peertube\.solidev\.net|
64                             videos\.symphonie-of-code\.fr|
65                             testtube\.ortg\.de|
66                             videos\.cemea\.org|
67                             peertube\.gwendalavir\.eu|
68                             video\.passageenseine\.fr|
69                             videos\.festivalparminous\.org|
70                             peertube\.touhoppai\.moe|
71                             peertube\.duckdns\.org|
72                             sikke\.fi|
73                             peertube\.mastodon\.host|
74                             firedragonvideos\.com|
75                             vidz\.dou\.bet|
76                             peertube\.koehn\.com|
77                             peer\.hostux\.social|
78                             share\.tube|
79                             peertube\.walkingmountains\.fr|
80                             medias\.libox\.fr|
81                             peertube\.moe|
82                             peertube\.xyz|
83                             jp\.peertube\.network|
84                             videos\.benpro\.fr|
85                             tube\.otter\.sh|
86                             peertube\.angristan\.xyz|
87                             peertube\.parleur\.net|
88                             peer\.ecutsa\.fr|
89                             peertube\.heraut\.eu|
90                             peertube\.tifox\.fr|
91                             peertube\.maly\.io|
92                             vod\.mochi\.academy|
93                             exode\.me|
94                             coste\.video|
95                             tube\.aquilenet\.fr|
96                             peertube\.gegeweb\.eu|
97                             framatube\.org|
98                             thinkerview\.video|
99                             tube\.conferences-gesticulees\.net|
100                             peertube\.datagueule\.tv|
101                             video\.lqdn\.fr|
102                             meilleurtube\.delire\.party|
103                             tube\.mochi\.academy|
104                             peertube\.dav\.li|
105                             media\.zat\.im|
106                             pytu\.be|
107                             peertube\.valvin\.fr|
108                             peertube\.nsa\.ovh|
109                             video\.colibris-outilslibres\.org|
110                             video\.hispagatos\.org|
111                             tube\.svnet\.fr|
112                             peertube\.video|
113                             videos\.lecygnenoir\.info|
114                             peertube3\.cpy\.re|
115                             peertube2\.cpy\.re|
116                             videos\.tcit\.fr|
117                             peertube\.cpy\.re
118                         )
119                         /videos/watch/(?P<id>[^/?#&]+)
120                     '''
121     _TESTS = [{
122         'url': 'https://peertube.moe/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c',
123         'md5': '80f24ff364cc9d333529506a263e7feb',
124         'info_dict': {
125             'id': '2790feb0-8120-4e63-9af3-c943c69f5e6c',
126             'ext': 'mp4',
127             'title': 'wow',
128             'description': 'wow such video, so gif',
129             'thumbnail': r're:https?://.*\.(?:jpg|png)',
130             'timestamp': 1519297480,
131             'upload_date': '20180222',
132             'uploader': 'Luclu7',
133             'uploader_id': '7fc42640-efdb-4505-a45d-a15b1a5496f1',
134             'uploder_url': 'https://peertube.nsa.ovh/accounts/luclu7',
135             'license': 'Unknown',
136             'duration': 3,
137             'view_count': int,
138             'like_count': int,
139             'dislike_count': int,
140             'tags': list,
141             'categories': list,
142         }
143     }, {
144         'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
145         'only_matching': True,
146     }, {
147         # nsfw
148         'url': 'https://tube.22decembre.eu/videos/watch/9bb88cd3-9959-46d9-9ab9-33d2bb704c39',
149         'only_matching': True,
150     }]
151
152     def _real_extract(self, url):
153         video_id = self._match_id(url)
154
155         video = self._download_json(
156             urljoin(url, '/api/v1/videos/%s' % video_id), video_id)
157
158         title = video['name']
159
160         formats = []
161         for file_ in video['files']:
162             if not isinstance(file_, dict):
163                 continue
164             file_url = file_.get('fileUrl')
165             if not file_url or not isinstance(file_url, compat_str):
166                 continue
167             file_size = int_or_none(file_.get('size'))
168             format_id = try_get(
169                 file_, lambda x: x['resolution']['label'], compat_str)
170             f = parse_resolution(format_id)
171             f.update({
172                 'url': file_url,
173                 'format_id': format_id,
174                 'filesize': file_size,
175             })
176             formats.append(f)
177         self._sort_formats(formats)
178
179         def account_data(field):
180             return try_get(video, lambda x: x['account'][field], compat_str)
181
182         category = try_get(video, lambda x: x['category']['label'], compat_str)
183         categories = [category] if category else None
184
185         nsfw = video.get('nsfw')
186         if nsfw is bool:
187             age_limit = 18 if nsfw else 0
188         else:
189             age_limit = None
190
191         return {
192             'id': video_id,
193             'title': title,
194             'description': video.get('description'),
195             'thumbnail': urljoin(url, video.get('thumbnailPath')),
196             'timestamp': unified_timestamp(video.get('publishedAt')),
197             'uploader': account_data('displayName'),
198             'uploader_id': account_data('uuid'),
199             'uploder_url': account_data('url'),
200             'license': try_get(
201                 video, lambda x: x['licence']['label'], compat_str),
202             'duration': int_or_none(video.get('duration')),
203             'view_count': int_or_none(video.get('views')),
204             'like_count': int_or_none(video.get('likes')),
205             'dislike_count': int_or_none(video.get('dislikes')),
206             'age_limit': age_limit,
207             'tags': try_get(video, lambda x: x['tags'], list),
208             'categories': categories,
209             'formats': formats,
210         }