X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fsoundcloud.py;h=1efb2b980cc83bc5ffa3b14c912188973b4574b7;hb=611c1dd96efc36a788475e14cc4de64d554d28a0;hp=a5c40514b6fb66de6ab6aa035268f389c99a989d;hpb=7e3472758bfaa75aa413368b29a26c2615e5231b;p=youtube-dl diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index a5c40514b..1efb2b980 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -14,6 +14,7 @@ from ..compat import ( compat_urllib_parse, ) from ..utils import ( + encode_dict, ExtractorError, int_or_none, unified_strdate, @@ -221,7 +222,7 @@ class SoundcloudIE(InfoExtractor): full_title = track_id token = mobj.group('secret_token') if token: - info_json_url += "&secret_token=" + token + info_json_url += '&secret_token=' + token elif mobj.group('player'): query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) real_url = query['url'][0] @@ -383,27 +384,24 @@ class SoundcloudUserIE(SoundcloudIE): resource = mobj.group('rsrc') or 'all' base_url = self._BASE_URL_MAP[resource] % user['id'] - next_href = None + COMMON_QUERY = { + 'limit': 50, + 'client_id': self._CLIENT_ID, + 'linked_partitioning': '1', + } + + query = COMMON_QUERY.copy() + query['offset'] = 0 + + next_href = base_url + '?' + compat_urllib_parse.urlencode(query) entries = [] for i in itertools.count(): - if not next_href: - data = compat_urllib_parse.urlencode({ - 'offset': i * 50, - 'limit': 50, - 'client_id': self._CLIENT_ID, - 'linked_partitioning': '1', - 'representation': 'speedy', - }) - next_href = base_url + '?' + data - response = self._download_json( next_href, uploader, 'Downloading track page %s' % (i + 1)) collection = response['collection'] - if not collection: - self.to_screen('%s: End page received' % uploader) break def resolve_permalink_url(candidates): @@ -418,12 +416,15 @@ class SoundcloudUserIE(SoundcloudIE): if permalink_url: entries.append(self.url_result(permalink_url)) - if 'next_href' in response: - next_href = response['next_href'] - if not next_href: - break - else: - next_href = None + next_href = response.get('next_href') + if not next_href: + break + + parsed_next_href = compat_urlparse.urlparse(response['next_href']) + qs = compat_urlparse.parse_qs(parsed_next_href.query) + qs.update(COMMON_QUERY) + next_href = compat_urlparse.urlunparse( + parsed_next_href._replace(query=compat_urllib_parse.urlencode(qs, True))) return { '_type': 'playlist', @@ -492,50 +493,40 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): _API_V2_BASE = 'https://api-v2.soundcloud.com' def _get_collection(self, endpoint, collection_id, **query): - query['limit'] = results_per_page = min( + limit = min( query.get('limit', self._DEFAULT_RESULTS_PER_PAGE), self._MAX_RESULTS_PER_PAGE) + query['limit'] = limit query['client_id'] = self._CLIENT_ID query['linked_partitioning'] = '1' + query['offset'] = 0 + data = compat_urllib_parse.urlencode(encode_dict(query)) + next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data) - total_results = None collected_results = 0 - next_url = None - - for i in itertools.count(): - if not next_url: - query['offset'] = i * results_per_page - data = compat_urllib_parse.urlencode(query) - next_url = '{0}{1}?{2}'.format( - self._API_V2_BASE, endpoint, data) - + for i in itertools.count(1): response = self._download_json( - next_url, collection_id, 'Downloading page {0}'.format(i + 1), + next_url, collection_id, 'Downloading page {0}'.format(i), 'Unable to download API page') - total_results = int(response.get( - 'total_results', total_results)) + collection = response.get('collection', []) + if not collection: + break - collection = response['collection'] + collection = list(filter(bool, collection)) collected_results += len(collection) - for item in filter(bool, collection): - yield item + for item in collection: + yield self.url_result(item['uri'], SoundcloudIE.ie_key()) - if (total_results is not None and collected_results >= total_results) or not collection: + if not collection or collected_results >= limit: break next_url = response.get('next_href') + if not next_url: + break def _get_n_results(self, query, n): - tracks = self._get_collection( - '/search/tracks', collection_id='Query "{0}"'.format(query), limit=n, q=query) - - results = [self.url_result(track['uri']) for track in itertools.islice(tracks, n)] - - if not results: - raise ExtractorError( - 'Soundcloud said: No track results', expected=True) - - return self.playlist_result(results, playlist_title=query) + tracks = self._get_collection('/search/tracks', query, limit=n, q=query) + return self.playlist_result(tracks, playlist_title=query)