X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fpeertube.py;h=e03c3d1d3d61ec2fd981776fba2775464b9658d1;hb=a61ce71468cb222338ccd8039dc631f3619dc585;hp=b086f6f5a806047731da5ccdf08ece752738271b;hpb=c561b75c82247188e010b6b53c118bb26b4daaf0;p=youtube-dl diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index b086f6f5a..e03c3d1d3 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import compat_str from ..utils import ( @@ -8,14 +10,13 @@ from ..utils import ( parse_resolution, try_get, unified_timestamp, + url_or_none, urljoin, ) class PeerTubeIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?:// - (?: + _INSTANCES_RE = r'''(?: # Taken from https://instances.joinpeertube.org/instances tube\.openalgeria\.org| peertube\.pointsecu\.fr| @@ -115,9 +116,15 @@ class PeerTubeIE(InfoExtractor): peertube2\.cpy\.re| videos\.tcit\.fr| peertube\.cpy\.re - ) - /videos/watch/(?P[^/?#&]+) - ''' + )''' + _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}' + _VALID_URL = r'''(?x) + (?: + peertube:(?P[^:]+):| + https?://(?P%s)/(?:videos/(?:watch|embed)|api/v\d/videos)/ + ) + (?P%s) + ''' % (_INSTANCES_RE, _UUID_RE) _TESTS = [{ 'url': 'https://peertube.moe/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c', 'md5': '80f24ff364cc9d333529506a263e7feb', @@ -147,13 +154,46 @@ class PeerTubeIE(InfoExtractor): # nsfw 'url': 'https://tube.22decembre.eu/videos/watch/9bb88cd3-9959-46d9-9ab9-33d2bb704c39', 'only_matching': True, + }, { + 'url': 'https://tube.22decembre.eu/videos/embed/fed67262-6edb-4d1c-833b-daa9085c71d7', + 'only_matching': True, + }, { + 'url': 'https://tube.openalgeria.org/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8', + 'only_matching': True, + }, { + 'url': 'peertube:video.blender.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205', + 'only_matching': True, }] + @staticmethod + def _extract_peertube_url(webpage, source_url): + mobj = re.match( + r'https?://(?P[^/]+)/videos/watch/(?P%s)' + % PeerTubeIE._UUID_RE, source_url) + if mobj and any(p in webpage for p in ( + 'PeerTube<', + 'There will be other non JS-based clients to access PeerTube', + '>We are sorry but it seems that PeerTube is not compatible with your web browser.<')): + return 'peertube:%s:%s' % mobj.group('host', 'id') + + @staticmethod + def _extract_urls(webpage, source_url): + entries = re.findall( + r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//%s/videos/embed/%s)''' + % (PeerTubeIE._INSTANCES_RE, PeerTubeIE._UUID_RE), webpage) + if not entries: + peertube_url = PeerTubeIE._extract_peertube_url(webpage, source_url) + if peertube_url: + entries = [peertube_url] + return entries + def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') or mobj.group('host_2') + video_id = mobj.group('id') video = self._download_json( - urljoin(url, '/api/v1/videos/%s' % video_id), video_id) + 'https://%s/api/v1/videos/%s' % (host, video_id), video_id) title = video['name'] @@ -161,8 +201,8 @@ class PeerTubeIE(InfoExtractor): for file_ in video['files']: if not isinstance(file_, dict): continue - file_url = file_.get('fileUrl') - if not file_url or not isinstance(file_url, compat_str): + file_url = url_or_none(file_.get('fileUrl')) + if not file_url: continue file_size = int_or_none(file_.get('size')) format_id = try_get(