X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fsoundgasm.py;h=e004e2c5ab12705c8d9ff5e12b25f53579539c72;hb=ec85ded83cbfa652ba94cb080aab52d8b270212a;hp=e568ff18cd7002df8c9dfa99c3bc162498833f0e;hpb=543ec2136b2c461b339bc9dcd50ab706cdffb907;p=youtube-dl diff --git a/youtube_dl/extractor/soundgasm.py b/youtube_dl/extractor/soundgasm.py index e568ff18c..e004e2c5a 100644 --- a/youtube_dl/extractor/soundgasm.py +++ b/youtube_dl/extractor/soundgasm.py @@ -4,10 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import clean_html class SoundgasmIE(InfoExtractor): + IE_NAME = 'soundgasm' _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P[0-9a-zA-Z_\-]+)/(?P[0-9a-zA-Z_\-]+)' _TEST = { 'url': 'http://soundgasm.net/u/ytdl/Piano-sample', @@ -27,7 +27,7 @@ class SoundgasmIE(InfoExtractor): webpage = self._download_webpage(url, display_id) audio_url = self._html_search_regex( r'(?s)m4a\:\s"([^"]+)"', webpage, 'audio URL') - audio_id = re.split('\/|\.', audio_url)[-2] + audio_id = re.split(r'\/|\.', audio_url)[-2] description = self._html_search_regex( r'(?s)<li>Description:\s(.*?)<\/li>', webpage, 'description', fatal=False) @@ -40,37 +40,25 @@ class SoundgasmIE(InfoExtractor): 'description': description } + class SoundgasmProfileIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<id>[0-9a-zA-Z_\-]+)/?$' + IE_NAME = 'soundgasm:profile' + _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<id>[^/]+)/?(?:\#.*)?$' _TEST = { 'url': 'http://soundgasm.net/u/ytdl', - 'playlist_count': 1, 'info_dict': { - '_type': 'playlist', 'id': 'ytdl', - 'title': 'ytdl' - } + }, + 'playlist_count': 1, } def _real_extract(self, url): profile_id = self._match_id(url) - webpage = self._download_webpage(url, profile_id) - - ids = re.findall(r'''<a\s+href=".+?/u/%s/([^/]+)">''' % re.escape(profile_id), webpage) - ids = [clean_html(id) for id in ids] - entries = [] - for id in ids: - entries.append({ - '_type': 'url', - 'url': ('http://soundgasm.net/u/%s/%s' % (profile_id, id)) - }) + webpage = self._download_webpage(url, profile_id) - info_dict = { - '_type': 'playlist', - 'id': profile_id, - 'title': profile_id, - 'entries': entries - } + entries = [ + self.url_result(audio_url, 'Soundgasm') + for audio_url in re.findall(r'href="([^"]+/u/%s/[^"]+)' % profile_id, webpage)] - return info_dict; + return self.playlist_result(entries, profile_id)