X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fsoundcloud.py;h=194dabc71d84072fc64afd50baa3b80467c0808f;hb=612b5f403e33d5c164b5c0bbad9f01ef6d38d050;hp=f1307dc8397c75631d7d7c2de1083a18e04f06f7;hpb=80fb6d4aa47154a1e963b28a17a065dc40a436b8;p=youtube-dl diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index f1307dc83..194dabc71 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -4,11 +4,14 @@ from __future__ import unicode_literals import re import itertools -from .common import InfoExtractor +from .common import ( + InfoExtractor, + SearchInfoExtractor +) from ..compat import ( compat_str, compat_urlparse, - compat_urllib_parse, + compat_urllib_parse_urlencode, ) from ..utils import ( ExtractorError, @@ -29,7 +32,7 @@ class SoundcloudIE(InfoExtractor): _VALID_URL = r'''(?x)^(?:https?://)? (?:(?:(?:www\.|m\.)?soundcloud\.com/ (?P[\w\d-]+)/ - (?!(?:tracks|sets|reposts|likes|spotlight)/?(?:$|[?#])) + (?!(?:tracks|sets(?:/[^/?#]+)?|reposts|likes|spotlight)/?(?:$|[?#])) (?P[\w\d-]+)/? (?P<token>[^?]+?)?(?:[?].*)?$) |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+) @@ -113,7 +116,7 @@ class SoundcloudIE(InfoExtractor): }, ] - _CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28' + _CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea' _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf' def report_resolve(self, video_id): @@ -218,7 +221,7 @@ class SoundcloudIE(InfoExtractor): full_title = track_id token = mobj.group('secret_token') if token: - info_json_url += "&secret_token=" + token + info_json_url += '&secret_token=' + token elif mobj.group('player'): query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) real_url = query['url'][0] @@ -293,7 +296,15 @@ class SoundcloudSetIE(SoundcloudIE): class SoundcloudUserIE(SoundcloudIE): - _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|sets|reposts|likes|spotlight)/?)?(\?.*)?$' + _VALID_URL = r'''(?x) + https?:// + (?:(?:www|m)\.)?soundcloud\.com/ + (?P<user>[^/]+) + (?:/ + (?P<rsrc>tracks|sets|reposts|likes|spotlight) + )? + /?(?:[?#].*)?$ + ''' IE_NAME = 'soundcloud:user' _TESTS = [{ 'url': 'https://soundcloud.com/the-akashic-chronicler', @@ -301,7 +312,7 @@ class SoundcloudUserIE(SoundcloudIE): 'id': '114582580', 'title': 'The Akashic Chronicler (All)', }, - 'playlist_mincount': 112, + 'playlist_mincount': 111, }, { 'url': 'https://soundcloud.com/the-akashic-chronicler/tracks', 'info_dict': { @@ -322,14 +333,14 @@ class SoundcloudUserIE(SoundcloudIE): 'id': '114582580', 'title': 'The Akashic Chronicler (Reposts)', }, - 'playlist_mincount': 9, + 'playlist_mincount': 7, }, { 'url': 'https://soundcloud.com/the-akashic-chronicler/likes', 'info_dict': { 'id': '114582580', 'title': 'The Akashic Chronicler (Likes)', }, - 'playlist_mincount': 333, + 'playlist_mincount': 321, }, { 'url': 'https://soundcloud.com/grynpyret/spotlight', 'info_dict': { @@ -372,27 +383,24 @@ class SoundcloudUserIE(SoundcloudIE): resource = mobj.group('rsrc') or 'all' base_url = self._BASE_URL_MAP[resource] % user['id'] - next_href = None + COMMON_QUERY = { + 'limit': 50, + 'client_id': self._CLIENT_ID, + 'linked_partitioning': '1', + } + + query = COMMON_QUERY.copy() + query['offset'] = 0 + + next_href = base_url + '?' + compat_urllib_parse_urlencode(query) entries = [] for i in itertools.count(): - if not next_href: - data = compat_urllib_parse.urlencode({ - 'offset': i * 50, - 'limit': 50, - 'client_id': self._CLIENT_ID, - 'linked_partitioning': '1', - 'representation': 'speedy', - }) - next_href = base_url + '?' + data - response = self._download_json( next_href, uploader, 'Downloading track page %s' % (i + 1)) collection = response['collection'] - if not collection: - self.to_screen('%s: End page received' % uploader) break def resolve_permalink_url(candidates): @@ -407,12 +415,15 @@ class SoundcloudUserIE(SoundcloudIE): if permalink_url: entries.append(self.url_result(permalink_url)) - if 'next_href' in response: - next_href = response['next_href'] - if not next_href: - break - else: - next_href = None + next_href = response.get('next_href') + if not next_href: + break + + parsed_next_href = compat_urlparse.urlparse(response['next_href']) + qs = compat_urlparse.parse_qs(parsed_next_href.query) + qs.update(COMMON_QUERY) + next_href = compat_urlparse.urlunparse( + parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True))) return { '_type': 'playlist', @@ -448,7 +459,7 @@ class SoundcloudPlaylistIE(SoundcloudIE): if token: data_dict['secret_token'] = token - data = compat_urllib_parse.urlencode(data_dict) + data = compat_urllib_parse_urlencode(data_dict) data = self._download_json( base_url + data, playlist_id, 'Downloading playlist') @@ -461,3 +472,60 @@ class SoundcloudPlaylistIE(SoundcloudIE): 'description': data.get('description'), 'entries': entries, } + + +class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): + IE_NAME = 'soundcloud:search' + IE_DESC = 'Soundcloud search' + _MAX_RESULTS = float('inf') + _TESTS = [{ + 'url': 'scsearch15:post-avant jazzcore', + 'info_dict': { + 'title': 'post-avant jazzcore', + }, + 'playlist_count': 15, + }] + + _SEARCH_KEY = 'scsearch' + _MAX_RESULTS_PER_PAGE = 200 + _DEFAULT_RESULTS_PER_PAGE = 50 + _API_V2_BASE = 'https://api-v2.soundcloud.com' + + def _get_collection(self, endpoint, collection_id, **query): + limit = min( + query.get('limit', self._DEFAULT_RESULTS_PER_PAGE), + self._MAX_RESULTS_PER_PAGE) + query['limit'] = limit + query['client_id'] = self._CLIENT_ID + query['linked_partitioning'] = '1' + query['offset'] = 0 + data = compat_urllib_parse_urlencode(query) + next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data) + + collected_results = 0 + + for i in itertools.count(1): + response = self._download_json( + next_url, collection_id, 'Downloading page {0}'.format(i), + 'Unable to download API page') + + collection = response.get('collection', []) + if not collection: + break + + collection = list(filter(bool, collection)) + collected_results += len(collection) + + for item in collection: + yield self.url_result(item['uri'], SoundcloudIE.ie_key()) + + if not collection or collected_results >= limit: + break + + next_url = response.get('next_href') + if not next_url: + break + + def _get_n_results(self, query, n): + tracks = self._get_collection('/search/tracks', query, limit=n, q=query) + return self.playlist_result(tracks, playlist_title=query)