X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fslutload.py;h=661f9e59d0310a6c6def25e20de33a9cbc65e09f;hb=HEAD;hp=095adfc15fb074226a03be575dcba9a406c6f99d;hpb=1476b497ebda9155d442065bb2a987d86c29026f;p=youtube-dl diff --git a/youtube_dl/extractor/slutload.py b/youtube_dl/extractor/slutload.py index 095adfc15..661f9e59d 100644 --- a/youtube_dl/extractor/slutload.py +++ b/youtube_dl/extractor/slutload.py @@ -1,46 +1,65 @@ -import re +from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ( - ExtractorError, -) + class SlutloadIE(InfoExtractor): - _VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P[^/]+)/?$' - _TEST = { - u'url': u'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/', - u'file': u'TD73btpBqSxc.mp4', - u'md5': u'0cf531ae8006b530bd9df947a6a0df77', - u'info_dict': { - u"title": u"virginie baisee en cam", - u"age_limit": 18, - } - } + _VALID_URL = r'https?://(?:\w+\.)?slutload\.com/(?:video/[^/]+|embed_player|watch)/(?P[^/]+)' + _TESTS = [{ + 'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/', + 'md5': '868309628ba00fd488cf516a113fd717', + 'info_dict': { + 'id': 'TD73btpBqSxc', + 'ext': 'mp4', + 'title': 'virginie baisee en cam', + 'age_limit': 18, + 'thumbnail': r're:https?://.*?\.jpg' + }, + }, { + # mobile site + 'url': 'http://mobile.slutload.com/video/masturbation-solo/fviFLmc6kzJ/', + 'only_matching': True, + }, { + 'url': 'http://www.slutload.com/embed_player/TD73btpBqSxc/', + 'only_matching': True, + }, { + 'url': 'http://www.slutload.com/watch/TD73btpBqSxc/Virginie-Baisee-En-Cam.html', + 'only_matching': True, + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - - video_id = mobj.group('videoid') - - # Get webpage content - webpage = self._download_webpage(url, video_id) - - # Get the video title - video_title = self._html_search_regex(r'

([^<]+)', - webpage, u'title').strip() + video_id = self._match_id(url) - # Get the video url - result = re.compile(r'
(?:(?!\1).)+)\1' % what, + embed_page, 'video %s' % what, default=None, group='url') - info = {'id': video_id, - 'url': video_url, - 'title': video_title, - 'thumbnail': video_thumb, - 'ext': 'mp4', - 'age_limit': 18} + video_url = extract('url') + if video_url: + title = self._html_search_regex( + r'([^<]+)', embed_page, 'title', default=video_id) + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'thumbnail': extract('preview'), + 'age_limit': 18 + } - return [info] + webpage = self._download_webpage( + 'http://www.slutload.com/video/_/%s/' % video_id, video_id) + title = self._html_search_regex( + r'<h1><strong>([^<]+)</strong>', webpage, 'title').strip() + info = self._parse_html5_media_entries(url, webpage, video_id)[0] + info.update({ + 'id': video_id, + 'title': title, + 'age_limit': 18, + }) + return info