X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fopenload.py;h=6415b8fdcb7451e6b499d1d4d3515af2775c66c2;hb=971e3b7520563936f6e6946f5c08d64f65ab6f42;hp=71021d573f1fbe7af75b278a9ed519cb73f02517;hpb=2bfeee69b976fe049761dd3012e30b637ee05a58;p=youtube-dl diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 71021d573..6415b8fdc 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -5,21 +5,41 @@ import re from .common import InfoExtractor from ..compat import compat_chr -from ..utils import encode_base_n +from ..utils import ( + determine_ext, + encode_base_n, + ExtractorError, + mimetype2ext, +) class OpenloadIE(InfoExtractor): - _VALID_URL = r'https://openload.co/f/(?P[a-zA-Z0-9]+)' + _VALID_URL = r'https://openload.(?:co|io)/(?:f|embed)/(?P[a-zA-Z0-9-_]+)' - _TEST = { + _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', 'md5': 'bf1c059b004ebc7a256f89408e65c36e', 'info_dict': { 'id': 'kUEfGclsU9o', 'ext': 'mp4', 'title': 'skyrim_no-audio_1080.mp4', + 'thumbnail': 're:^https?://.*\.jpg$', }, - } + }, { + 'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4', + 'only_matching': True, + }, { + 'url': 'https://openload.io/f/ZAn6oz-VZGE/', + 'only_matching': True, + }, { + 'url': 'https://openload.co/f/_-ztPaZtMhM/', + 'only_matching': True, + }, { + # unavailable via https://openload.co/f/Sxz5sADo82g/, different layout + # for title and ext + 'url': 'https://openload.co/embed/Sxz5sADo82g/', + 'only_matching': True, + }] @staticmethod def openload_level2_debase(m): @@ -78,15 +98,33 @@ class OpenloadIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + + if 'File not found' in webpage: + raise ExtractorError('File not found', expected=True) + code = self._search_regex( - r']+>\s*]+>([^<]+)', + r'\s*\s*]+>[^>]+\s*]+>([^<]+)', webpage, 'JS code') + decoded = self.openload_decode(code) + video_url = self._search_regex( - r'return\s+"(https?://[^"]+)"', self.openload_decode(code), 'video URL') + r'return\s+"(https?://[^"]+)"', decoded, 'video URL') + + title = self._og_search_title(webpage, default=None) or self._search_regex( + r']+class=["\']title["\'][^>]*>([^<]+)', webpage, + 'title', default=None) or self._html_search_meta( + 'description', webpage, 'title', fatal=True) + + ext = mimetype2ext(self._search_regex( + r'window\.vt\s*=\s*(["\'])(?P.+?)\1', decoded, + 'mimetype', default=None, group='mimetype')) or determine_ext( + video_url, 'mp4') return { 'id': video_id, - 'title': self._og_search_title(webpage), + 'title': title, + 'ext': ext, + 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'url': video_url, }