X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fsoundgasm.py;h=3d78a9d76cce8aa28902111fe3f9018dafbb856a;hb=HEAD;hp=e568ff18cd7002df8c9dfa99c3bc162498833f0e;hpb=93b5071f73738d788c878b38a57f2b6efe0da883;p=youtube-dl diff --git a/youtube_dl/extractor/soundgasm.py b/youtube_dl/extractor/soundgasm.py index e568ff18c..3d78a9d76 100644 --- a/youtube_dl/extractor/soundgasm.py +++ b/youtube_dl/extractor/soundgasm.py @@ -4,73 +4,74 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import clean_html class SoundgasmIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P[0-9a-zA-Z_\-]+)/(?P[0-9a-zA-Z_\-]+)' + IE_NAME = 'soundgasm' + _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_-]+)/(?P<display_id>[0-9a-zA-Z_-]+)' _TEST = { 'url': 'http://soundgasm.net/u/ytdl/Piano-sample', 'md5': '010082a2c802c5275bb00030743e75ad', 'info_dict': { 'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9', 'ext': 'm4a', - 'title': 'ytdl_Piano-sample', - 'description': 'Royalty Free Sample Music' + 'title': 'Piano sample', + 'description': 'Royalty Free Sample Music', + 'uploader': 'ytdl', } } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('title') - audio_title = mobj.group('user') + '_' + mobj.group('title') + display_id = mobj.group('display_id') + webpage = self._download_webpage(url, display_id) + audio_url = self._html_search_regex( - r'(?s)m4a\:\s"([^"]+)"', webpage, 'audio URL') - audio_id = re.split('\/|\.', audio_url)[-2] + r'(?s)m4a\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, + 'audio URL', group='url') + + title = self._search_regex( + r'<div[^>]+\bclass=["\']jp-title[^>]+>([^<]+)', + webpage, 'title', default=display_id) + description = self._html_search_regex( - r'(?s)<li>Description:\s(.*?)<\/li>', webpage, 'description', - fatal=False) + (r'(?s)<div[^>]+\bclass=["\']jp-description[^>]+>(.+?)</div>', + r'(?s)<li>Description:\s(.*?)<\/li>'), + webpage, 'description', fatal=False) + + audio_id = self._search_regex( + r'/([^/]+)\.m4a', audio_url, 'audio id', default=display_id) return { 'id': audio_id, 'display_id': display_id, 'url': audio_url, - 'title': audio_title, - 'description': description + 'vcodec': 'none', + 'title': title, + 'description': description, + 'uploader': mobj.group('user'), } + class SoundgasmProfileIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<id>[0-9a-zA-Z_\-]+)/?$' + IE_NAME = 'soundgasm:profile' + _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<id>[^/]+)/?(?:\#.*)?$' _TEST = { 'url': 'http://soundgasm.net/u/ytdl', - 'playlist_count': 1, 'info_dict': { - '_type': 'playlist', 'id': 'ytdl', - 'title': 'ytdl' - } + }, + 'playlist_count': 1, } def _real_extract(self, url): profile_id = self._match_id(url) + webpage = self._download_webpage(url, profile_id) - ids = re.findall(r'''<a\s+href=".+?/u/%s/([^/]+)">''' % re.escape(profile_id), webpage) - ids = [clean_html(id) for id in ids] - - entries = [] - for id in ids: - entries.append({ - '_type': 'url', - 'url': ('http://soundgasm.net/u/%s/%s' % (profile_id, id)) - }) - - info_dict = { - '_type': 'playlist', - 'id': profile_id, - 'title': profile_id, - 'entries': entries - } + entries = [ + self.url_result(audio_url, 'Soundgasm') + for audio_url in re.findall(r'href="([^"]+/u/%s/[^"]+)' % profile_id, webpage)] - return info_dict; + return self.playlist_result(entries, profile_id)