[peertube] Add support for generic embeds
[youtube-dl] / youtube_dl / extractor / peertube.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import compat_str
8 from ..utils import (
9     int_or_none,
10     parse_resolution,
11     try_get,
12     unified_timestamp,
13     urljoin,
14 )
15
16
17 class PeerTubeIE(InfoExtractor):
18     _INSTANCES_RE = r'''(?:
19                             # Taken from https://instances.joinpeertube.org/instances
20                             tube\.openalgeria\.org|
21                             peertube\.pointsecu\.fr|
22                             peertube\.nogafa\.org|
23                             peertube\.pl|
24                             megatube\.lilomoino\.fr|
25                             peertube\.tamanoir\.foucry\.net|
26                             peertube\.inapurna\.org|
27                             peertube\.netzspielplatz\.de|
28                             video\.deadsuperhero\.com|
29                             peertube\.devosi\.org|
30                             peertube\.1312\.media|
31                             tube\.worldofhauru\.xyz|
32                             tube\.bootlicker\.party|
33                             skeptikon\.fr|
34                             peertube\.geekshell\.fr|
35                             tube\.opportunis\.me|
36                             peertube\.peshane\.net|
37                             video\.blueline\.mg|
38                             tube\.homecomputing\.fr|
39                             videos\.cloudfrancois\.fr|
40                             peertube\.viviers-fibre\.net|
41                             tube\.ouahpiti\.info|
42                             video\.tedomum\.net|
43                             video\.g3l\.org|
44                             fontube\.fr|
45                             peertube\.gaialabs\.ch|
46                             peertube\.extremely\.online|
47                             peertube\.public-infrastructure\.eu|
48                             tube\.kher\.nl|
49                             peertube\.qtg\.fr|
50                             tube\.22decembre\.eu|
51                             facegirl\.me|
52                             video\.migennes\.net|
53                             janny\.moe|
54                             tube\.p2p\.legal|
55                             video\.atlanti\.se|
56                             troll\.tv|
57                             peertube\.geekael\.fr|
58                             vid\.leotindall\.com|
59                             video\.anormallostpod\.ovh|
60                             p-tube\.h3z\.jp|
61                             tube\.darfweb\.eu|
62                             videos\.iut-orsay\.fr|
63                             peertube\.solidev\.net|
64                             videos\.symphonie-of-code\.fr|
65                             testtube\.ortg\.de|
66                             videos\.cemea\.org|
67                             peertube\.gwendalavir\.eu|
68                             video\.passageenseine\.fr|
69                             videos\.festivalparminous\.org|
70                             peertube\.touhoppai\.moe|
71                             peertube\.duckdns\.org|
72                             sikke\.fi|
73                             peertube\.mastodon\.host|
74                             firedragonvideos\.com|
75                             vidz\.dou\.bet|
76                             peertube\.koehn\.com|
77                             peer\.hostux\.social|
78                             share\.tube|
79                             peertube\.walkingmountains\.fr|
80                             medias\.libox\.fr|
81                             peertube\.moe|
82                             peertube\.xyz|
83                             jp\.peertube\.network|
84                             videos\.benpro\.fr|
85                             tube\.otter\.sh|
86                             peertube\.angristan\.xyz|
87                             peertube\.parleur\.net|
88                             peer\.ecutsa\.fr|
89                             peertube\.heraut\.eu|
90                             peertube\.tifox\.fr|
91                             peertube\.maly\.io|
92                             vod\.mochi\.academy|
93                             exode\.me|
94                             coste\.video|
95                             tube\.aquilenet\.fr|
96                             peertube\.gegeweb\.eu|
97                             framatube\.org|
98                             thinkerview\.video|
99                             tube\.conferences-gesticulees\.net|
100                             peertube\.datagueule\.tv|
101                             video\.lqdn\.fr|
102                             meilleurtube\.delire\.party|
103                             tube\.mochi\.academy|
104                             peertube\.dav\.li|
105                             media\.zat\.im|
106                             pytu\.be|
107                             peertube\.valvin\.fr|
108                             peertube\.nsa\.ovh|
109                             video\.colibris-outilslibres\.org|
110                             video\.hispagatos\.org|
111                             tube\.svnet\.fr|
112                             peertube\.video|
113                             videos\.lecygnenoir\.info|
114                             peertube3\.cpy\.re|
115                             peertube2\.cpy\.re|
116                             videos\.tcit\.fr|
117                             peertube\.cpy\.re
118                         )'''
119     _VALID_URL = r'''(?x)
120                     https?://
121                         %s
122                         /(?:videos/(?:watch|embed)|api/v\d/videos)/
123                         (?P<id>[^/?\#&]+)
124                     ''' % _INSTANCES_RE
125     _TESTS = [{
126         'url': 'https://peertube.moe/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c',
127         'md5': '80f24ff364cc9d333529506a263e7feb',
128         'info_dict': {
129             'id': '2790feb0-8120-4e63-9af3-c943c69f5e6c',
130             'ext': 'mp4',
131             'title': 'wow',
132             'description': 'wow such video, so gif',
133             'thumbnail': r're:https?://.*\.(?:jpg|png)',
134             'timestamp': 1519297480,
135             'upload_date': '20180222',
136             'uploader': 'Luclu7',
137             'uploader_id': '7fc42640-efdb-4505-a45d-a15b1a5496f1',
138             'uploder_url': 'https://peertube.nsa.ovh/accounts/luclu7',
139             'license': 'Unknown',
140             'duration': 3,
141             'view_count': int,
142             'like_count': int,
143             'dislike_count': int,
144             'tags': list,
145             'categories': list,
146         }
147     }, {
148         'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
149         'only_matching': True,
150     }, {
151         # nsfw
152         'url': 'https://tube.22decembre.eu/videos/watch/9bb88cd3-9959-46d9-9ab9-33d2bb704c39',
153         'only_matching': True,
154     }, {
155         'url': 'https://tube.22decembre.eu/videos/embed/fed67262-6edb-4d1c-833b-daa9085c71d7',
156         'only_matching': True,
157     }, {
158         'url': 'https://tube.openalgeria.org/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8',
159         'only_matching': True,
160     }]
161
162     @staticmethod
163     def _extract_urls(webpage):
164         return [
165             mobj.group('url')
166             for mobj in re.finditer(
167                 r'''(?x)<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//%s/videos/embed/[^/?\#&]+)\1'''
168                 % PeerTubeIE._INSTANCES_RE, webpage)]
169
170     def _real_extract(self, url):
171         video_id = self._match_id(url)
172
173         video = self._download_json(
174             urljoin(url, '/api/v1/videos/%s' % video_id), video_id)
175
176         title = video['name']
177
178         formats = []
179         for file_ in video['files']:
180             if not isinstance(file_, dict):
181                 continue
182             file_url = file_.get('fileUrl')
183             if not file_url or not isinstance(file_url, compat_str):
184                 continue
185             file_size = int_or_none(file_.get('size'))
186             format_id = try_get(
187                 file_, lambda x: x['resolution']['label'], compat_str)
188             f = parse_resolution(format_id)
189             f.update({
190                 'url': file_url,
191                 'format_id': format_id,
192                 'filesize': file_size,
193             })
194             formats.append(f)
195         self._sort_formats(formats)
196
197         def account_data(field):
198             return try_get(video, lambda x: x['account'][field], compat_str)
199
200         category = try_get(video, lambda x: x['category']['label'], compat_str)
201         categories = [category] if category else None
202
203         nsfw = video.get('nsfw')
204         if nsfw is bool:
205             age_limit = 18 if nsfw else 0
206         else:
207             age_limit = None
208
209         return {
210             'id': video_id,
211             'title': title,
212             'description': video.get('description'),
213             'thumbnail': urljoin(url, video.get('thumbnailPath')),
214             'timestamp': unified_timestamp(video.get('publishedAt')),
215             'uploader': account_data('displayName'),
216             'uploader_id': account_data('uuid'),
217             'uploder_url': account_data('url'),
218             'license': try_get(
219                 video, lambda x: x['licence']['label'], compat_str),
220             'duration': int_or_none(video.get('duration')),
221             'view_count': int_or_none(video.get('views')),
222             'like_count': int_or_none(video.get('likes')),
223             'dislike_count': int_or_none(video.get('dislikes')),
224             'age_limit': age_limit,
225             'tags': try_get(video, lambda x: x['tags'], list),
226             'categories': categories,
227             'formats': formats,
228         }