from ..compat import (
compat_str,
compat_urlparse,
- compat_urllib_parse,
+ compat_urllib_parse_urlencode,
)
from ..utils import (
ExtractorError,
_CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea'
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
+ @staticmethod
+ def _extract_urls(webpage):
+ return [m.group('url') for m in re.finditer(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
+ webpage)]
+
def report_resolve(self, video_id):
"""Report information extraction."""
self.to_screen('%s: Resolving id' % video_id)
full_title = track_id
token = mobj.group('secret_token')
if token:
- info_json_url += "&secret_token=" + token
+ info_json_url += '&secret_token=' + token
elif mobj.group('player'):
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
real_url = query['url'][0]
resource = mobj.group('rsrc') or 'all'
base_url = self._BASE_URL_MAP[resource] % user['id']
- next_href = None
+ COMMON_QUERY = {
+ 'limit': 50,
+ 'client_id': self._CLIENT_ID,
+ 'linked_partitioning': '1',
+ }
+
+ query = COMMON_QUERY.copy()
+ query['offset'] = 0
+
+ next_href = base_url + '?' + compat_urllib_parse_urlencode(query)
entries = []
for i in itertools.count():
- if not next_href:
- data = compat_urllib_parse.urlencode({
- 'offset': i * 50,
- 'limit': 50,
- 'client_id': self._CLIENT_ID,
- 'linked_partitioning': '1',
- 'representation': 'speedy',
- })
- next_href = base_url + '?' + data
-
response = self._download_json(
next_href, uploader, 'Downloading track page %s' % (i + 1))
collection = response['collection']
-
if not collection:
- self.to_screen('%s: End page received' % uploader)
break
def resolve_permalink_url(candidates):
if permalink_url:
entries.append(self.url_result(permalink_url))
- if 'next_href' in response:
- next_href = response['next_href']
- if not next_href:
- break
- else:
- next_href = None
+ next_href = response.get('next_href')
+ if not next_href:
+ break
+
+ parsed_next_href = compat_urlparse.urlparse(response['next_href'])
+ qs = compat_urlparse.parse_qs(parsed_next_href.query)
+ qs.update(COMMON_QUERY)
+ next_href = compat_urlparse.urlunparse(
+ parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True)))
return {
'_type': 'playlist',
if token:
data_dict['secret_token'] = token
- data = compat_urllib_parse.urlencode(data_dict)
+ data = compat_urllib_parse_urlencode(data_dict)
data = self._download_json(
base_url + data, playlist_id, 'Downloading playlist')
class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
IE_NAME = 'soundcloud:search'
IE_DESC = 'Soundcloud search'
- _MAX_RESULTS = 200
+ _MAX_RESULTS = float('inf')
_TESTS = [{
'url': 'scsearch15:post-avant jazzcore',
'info_dict': {
}]
_SEARCH_KEY = 'scsearch'
- _RESULTS_PER_PAGE = 50
+ _MAX_RESULTS_PER_PAGE = 200
+ _DEFAULT_RESULTS_PER_PAGE = 50
_API_V2_BASE = 'https://api-v2.soundcloud.com'
def _get_collection(self, endpoint, collection_id, **query):
- query['limit'] = self._RESULTS_PER_PAGE
+ limit = min(
+ query.get('limit', self._DEFAULT_RESULTS_PER_PAGE),
+ self._MAX_RESULTS_PER_PAGE)
+ query['limit'] = limit
query['client_id'] = self._CLIENT_ID
query['linked_partitioning'] = '1'
+ query['offset'] = 0
+ data = compat_urllib_parse_urlencode(query)
+ next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data)
- total_results = self._MAX_RESULTS
collected_results = 0
- next_url = None
-
- for i in itertools.count():
- if not next_url:
- query['offset'] = i * self._RESULTS_PER_PAGE
- data = compat_urllib_parse.urlencode(query)
- next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data)
-
- response = self._download_json(next_url,
- video_id=collection_id,
- note='Downloading page {0}'.format(i+1),
- errnote='Unable to download API page')
+ for i in itertools.count(1):
+ response = self._download_json(
+ next_url, collection_id, 'Downloading page {0}'.format(i),
+ 'Unable to download API page')
- total_results = int(response.get(
- 'total_results', total_results))
+ collection = response.get('collection', [])
+ if not collection:
+ break
- collection = response['collection']
+ collection = list(filter(bool, collection))
collected_results += len(collection)
- for item in filter(bool, collection):
- yield item
+ for item in collection:
+ yield self.url_result(item['uri'], SoundcloudIE.ie_key())
- if collected_results >= total_results or not collection:
+ if not collection or collected_results >= limit:
break
- next_url = response.get('next_href', None)
+ next_url = response.get('next_href')
+ if not next_url:
+ break
def _get_n_results(self, query, n):
- tracks = self._get_collection('/search/tracks',
- collection_id='Query "{0}"'.format(query),
- q=query.encode('utf-8'))
-
- results = [self.url_result(url=track['uri'])
- for track in itertools.islice(tracks, n)]
-
- if not results:
- raise ExtractorError(
- '[soundcloud] No track results', expected=True)
-
- return self.playlist_result(results[:n], playlist_title=query)
-
+ tracks = self._get_collection('/search/tracks', query, limit=n, q=query)
+ return self.playlist_result(tracks, playlist_title=query)