X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fsoundcloud.py;h=02e64e09436a5299c5d4f87f1a3ba871c63af230;hb=78653a33aa00ba5205940c2baac5d9f019795b88;hp=f1307dc8397c75631d7d7c2de1083a18e04f06f7;hpb=80fb6d4aa47154a1e963b28a17a065dc40a436b8;p=youtube-dl diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index f1307dc83..02e64e094 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -4,13 +4,17 @@ from __future__ import unicode_literals import re import itertools -from .common import InfoExtractor +from .common import ( + InfoExtractor, + SearchInfoExtractor +) from ..compat import ( compat_str, compat_urlparse, compat_urllib_parse, ) from ..utils import ( + encode_dict, ExtractorError, int_or_none, unified_strdate, @@ -29,7 +33,7 @@ class SoundcloudIE(InfoExtractor): _VALID_URL = r'''(?x)^(?:https?://)? (?:(?:(?:www\.|m\.)?soundcloud\.com/ (?P[\w\d-]+)/ - (?!(?:tracks|sets|reposts|likes|spotlight)/?(?:$|[?#])) + (?!(?:tracks|sets(?:/[^/?#]+)?|reposts|likes|spotlight)/?(?:$|[?#])) (?P[\w\d-]+)/? (?P<token>[^?]+?)?(?:[?].*)?$) |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+) @@ -113,7 +117,7 @@ class SoundcloudIE(InfoExtractor): }, ] - _CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28' + _CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea' _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf' def report_resolve(self, video_id): @@ -293,7 +297,15 @@ class SoundcloudSetIE(SoundcloudIE): class SoundcloudUserIE(SoundcloudIE): - _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|sets|reposts|likes|spotlight)/?)?(\?.*)?$' + _VALID_URL = r'''(?x) + https?:// + (?:(?:www|m)\.)?soundcloud\.com/ + (?P<user>[^/]+) + (?:/ + (?P<rsrc>tracks|sets|reposts|likes|spotlight) + )? + /?(?:[?#].*)?$ + ''' IE_NAME = 'soundcloud:user' _TESTS = [{ 'url': 'https://soundcloud.com/the-akashic-chronicler', @@ -301,7 +313,7 @@ class SoundcloudUserIE(SoundcloudIE): 'id': '114582580', 'title': 'The Akashic Chronicler (All)', }, - 'playlist_mincount': 112, + 'playlist_mincount': 111, }, { 'url': 'https://soundcloud.com/the-akashic-chronicler/tracks', 'info_dict': { @@ -322,14 +334,14 @@ class SoundcloudUserIE(SoundcloudIE): 'id': '114582580', 'title': 'The Akashic Chronicler (Reposts)', }, - 'playlist_mincount': 9, + 'playlist_mincount': 7, }, { 'url': 'https://soundcloud.com/the-akashic-chronicler/likes', 'info_dict': { 'id': '114582580', 'title': 'The Akashic Chronicler (Likes)', }, - 'playlist_mincount': 333, + 'playlist_mincount': 321, }, { 'url': 'https://soundcloud.com/grynpyret/spotlight', 'info_dict': { @@ -461,3 +473,60 @@ class SoundcloudPlaylistIE(SoundcloudIE): 'description': data.get('description'), 'entries': entries, } + + +class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): + IE_NAME = 'soundcloud:search' + IE_DESC = 'Soundcloud search' + _MAX_RESULTS = float('inf') + _TESTS = [{ + 'url': 'scsearch15:post-avant jazzcore', + 'info_dict': { + 'title': 'post-avant jazzcore', + }, + 'playlist_count': 15, + }] + + _SEARCH_KEY = 'scsearch' + _MAX_RESULTS_PER_PAGE = 200 + _DEFAULT_RESULTS_PER_PAGE = 50 + _API_V2_BASE = 'https://api-v2.soundcloud.com' + + def _get_collection(self, endpoint, collection_id, **query): + limit = min( + query.get('limit', self._DEFAULT_RESULTS_PER_PAGE), + self._MAX_RESULTS_PER_PAGE) + query['limit'] = limit + query['client_id'] = self._CLIENT_ID + query['linked_partitioning'] = '1' + query['offset'] = 0 + data = compat_urllib_parse.urlencode(encode_dict(query)) + next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data) + + collected_results = 0 + + for i in itertools.count(1): + response = self._download_json( + next_url, collection_id, 'Downloading page {0}'.format(i), + 'Unable to download API page') + + collection = response.get('collection', []) + if not collection: + break + + collection = list(filter(bool, collection)) + collected_results += len(collection) + + for item in collection: + yield self.url_result(item['uri'], SoundcloudIE.ie_key()) + + if not collection or collected_results >= limit: + break + + next_url = response.get('next_href') + if not next_url: + break + + def _get_n_results(self, query, n): + tracks = self._get_collection('/search/tracks', query, limit=n, q=query) + return self.playlist_result(tracks, playlist_title=query)