X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fspankbang.py;h=e040ada29b24542582f72f08f31b843d928af251;hb=826dcff99cd0a44ec5fa94f0e0201f5115d097ef;hp=fbe6ef31a0741b95aab21af18d9c31ab74f2e67f;hpb=49bd993fd9adbcf6b5c11a7ec11c2b4a552e49c5;p=youtube-dl diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index fbe6ef31a..e040ada29 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -5,10 +5,13 @@ import re from .common import InfoExtractor from ..utils import ( ExtractorError, + merge_dicts, orderedSet, parse_duration, parse_resolution, str_to_int, + url_or_none, + urlencode_postdata, ) @@ -24,6 +27,8 @@ class SpankBangIE(InfoExtractor): 'description': 'dillion harper masturbates on a bed', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'silly2587', + 'timestamp': 1422571989, + 'upload_date': '20150129', 'age_limit': 18, } }, { @@ -64,38 +69,76 @@ class SpankBangIE(InfoExtractor): 'Video %s is not available' % video_id, expected=True) formats = [] - for mobj in re.finditer( - r'stream_url_(?P[^\s=]+)\s*=\s*(["\'])(?P(?:(?!\2).)+)\2', - webpage): - format_id, format_url = mobj.group('id', 'url') + + def extract_format(format_id, format_url): + f_url = url_or_none(format_url) + if not f_url: + return f = parse_resolution(format_id) f.update({ - 'url': format_url, + 'url': f_url, 'format_id': format_id, }) formats.append(f) + + STREAM_URL_PREFIX = 'stream_url_' + + for mobj in re.finditer( + r'%s(?P[^\s=]+)\s*=\s*(["\'])(?P(?:(?!\2).)+)\2' + % STREAM_URL_PREFIX, webpage): + extract_format(mobj.group('id', 'url')) + + if not formats: + stream_key = self._search_regex( + r'data-streamkey\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'stream key', group='value') + + sb_csrf_session = self._get_cookies( + 'https://spankbang.com')['sb_csrf_session'].value + + stream = self._download_json( + 'https://spankbang.com/api/videos/stream', video_id, + 'Downloading stream JSON', data=urlencode_postdata({ + 'id': stream_key, + 'data': 0, + 'sb_csrf_session': sb_csrf_session, + }), headers={ + 'Referer': url, + 'X-CSRFToken': sb_csrf_session, + }) + + for format_id, format_url in stream.items(): + if format_id.startswith(STREAM_URL_PREFIX): + if format_url and isinstance(format_url, list): + format_url = format_url[0] + extract_format( + format_id[len(STREAM_URL_PREFIX):], format_url) + self._sort_formats(formats) + info = self._search_json_ld(webpage, video_id, default={}) + title = self._html_search_regex( - r'(?s)]*>(.+?)', webpage, 'title') + r'(?s)]*>(.+?)', webpage, 'title', default=None) description = self._search_regex( r']+\bclass=["\']bottom[^>]+>\s*

[^<]*

\s*

([^<]+)', - webpage, 'description', fatal=False) - thumbnail = self._og_search_thumbnail(webpage) - uploader = self._search_regex( - r'class="user"[^>]*>]+>([^<]+)', + webpage, 'description', default=None) + thumbnail = self._og_search_thumbnail(webpage, default=None) + uploader = self._html_search_regex( + (r'(?s)]+class=["\']profile[^>]+>(.+?)', + r'class="user"[^>]*>]+>([^<]+)'), webpage, 'uploader', default=None) duration = parse_duration(self._search_regex( r']+\bclass=["\']right_side[^>]+>\s*([^<]+)', - webpage, 'duration', fatal=False)) + webpage, 'duration', default=None)) view_count = str_to_int(self._search_regex( - r'([\d,.]+)\s+plays', webpage, 'view count', fatal=False)) + r'([\d,.]+)\s+plays', webpage, 'view count', default=None)) age_limit = self._rta_search(webpage) - return { + return merge_dicts({ 'id': video_id, - 'title': title, + 'title': title or video_id, 'description': description, 'thumbnail': thumbnail, 'uploader': uploader, @@ -103,7 +146,8 @@ class SpankBangIE(InfoExtractor): 'view_count': view_count, 'formats': formats, 'age_limit': age_limit, - } + }, info + ) class SpankBangPlaylistIE(InfoExtractor):