X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fsoundcloud.py;h=47b84809f1b53bb2c020bd94d1cbd23b23292948;hb=4bfd294e2f83301921494c02e497cccf1a26cfd5;hp=a5c40514b6fb66de6ab6aa035268f389c99a989d;hpb=7e3472758bfaa75aa413368b29a26c2615e5231b;p=youtube-dl diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index a5c40514b..47b84809f 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -11,7 +11,7 @@ from .common import ( from ..compat import ( compat_str, compat_urlparse, - compat_urllib_parse, + compat_urllib_parse_urlencode, ) from ..utils import ( ExtractorError, @@ -32,7 +32,7 @@ class SoundcloudIE(InfoExtractor): _VALID_URL = r'''(?x)^(?:https?://)? (?:(?:(?:www\.|m\.)?soundcloud\.com/ (?P[\w\d-]+)/ - (?!(?:tracks|sets(?:/[^/?#]+)?|reposts|likes|spotlight)/?(?:$|[?#])) + (?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#])) (?P[\w\d-]+)/? (?P<token>[^?]+?)?(?:[?].*)?$) |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+) @@ -53,6 +53,7 @@ class SoundcloudIE(InfoExtractor): 'uploader': 'E.T. ExTerrestrial Music', 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1', 'duration': 143, + 'license': 'all-rights-reserved', } }, # not streamable song @@ -66,6 +67,7 @@ class SoundcloudIE(InfoExtractor): 'uploader': 'The Royal Concept', 'upload_date': '20120521', 'duration': 227, + 'license': 'all-rights-reserved', }, 'params': { # rtmp @@ -84,6 +86,7 @@ class SoundcloudIE(InfoExtractor): 'description': 'test chars: \"\'/\\ä↭', 'upload_date': '20131209', 'duration': 9, + 'license': 'all-rights-reserved', }, }, # private link (alt format) @@ -98,6 +101,7 @@ class SoundcloudIE(InfoExtractor): 'description': 'test chars: \"\'/\\ä↭', 'upload_date': '20131209', 'duration': 9, + 'license': 'all-rights-reserved', }, }, # downloadable song @@ -112,6 +116,7 @@ class SoundcloudIE(InfoExtractor): 'uploader': 'oddsamples', 'upload_date': '20140109', 'duration': 17, + 'license': 'cc-by-sa', }, }, ] @@ -119,6 +124,12 @@ class SoundcloudIE(InfoExtractor): _CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea' _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf' + @staticmethod + def _extract_urls(webpage): + return [m.group('url') for m in re.finditer( + r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1', + webpage)] + def report_resolve(self, video_id): """Report information extraction.""" self.to_screen('%s: Resolving id' % video_id) @@ -132,8 +143,8 @@ class SoundcloudIE(InfoExtractor): name = full_title or track_id if quiet: self.report_extraction(name) - thumbnail = info['artwork_url'] + track_license = info['license'] if thumbnail is not None: thumbnail = thumbnail.replace('-large', '-t500x500') ext = 'mp3' @@ -146,6 +157,7 @@ class SoundcloudIE(InfoExtractor): 'thumbnail': thumbnail, 'duration': int_or_none(info.get('duration'), 1000), 'webpage_url': info.get('permalink_url'), + 'license': track_license, } formats = [] if info.get('downloadable', False): @@ -216,12 +228,13 @@ class SoundcloudIE(InfoExtractor): track_id = mobj.group('track_id') token = None + if track_id is not None: info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID full_title = track_id token = mobj.group('secret_token') if token: - info_json_url += "&secret_token=" + token + info_json_url += '&secret_token=' + token elif mobj.group('player'): query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) real_url = query['url'][0] @@ -259,6 +272,9 @@ class SoundcloudSetIE(SoundcloudIE): 'title': 'The Royal Concept EP', }, 'playlist_mincount': 6, + }, { + 'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token', + 'only_matching': True, }] def _real_extract(self, url): @@ -383,27 +399,24 @@ class SoundcloudUserIE(SoundcloudIE): resource = mobj.group('rsrc') or 'all' base_url = self._BASE_URL_MAP[resource] % user['id'] - next_href = None + COMMON_QUERY = { + 'limit': 50, + 'client_id': self._CLIENT_ID, + 'linked_partitioning': '1', + } + + query = COMMON_QUERY.copy() + query['offset'] = 0 + + next_href = base_url + '?' + compat_urllib_parse_urlencode(query) entries = [] for i in itertools.count(): - if not next_href: - data = compat_urllib_parse.urlencode({ - 'offset': i * 50, - 'limit': 50, - 'client_id': self._CLIENT_ID, - 'linked_partitioning': '1', - 'representation': 'speedy', - }) - next_href = base_url + '?' + data - response = self._download_json( next_href, uploader, 'Downloading track page %s' % (i + 1)) collection = response['collection'] - if not collection: - self.to_screen('%s: End page received' % uploader) break def resolve_permalink_url(candidates): @@ -418,12 +431,15 @@ class SoundcloudUserIE(SoundcloudIE): if permalink_url: entries.append(self.url_result(permalink_url)) - if 'next_href' in response: - next_href = response['next_href'] - if not next_href: - break - else: - next_href = None + next_href = response.get('next_href') + if not next_href: + break + + parsed_next_href = compat_urlparse.urlparse(response['next_href']) + qs = compat_urlparse.parse_qs(parsed_next_href.query) + qs.update(COMMON_QUERY) + next_href = compat_urlparse.urlunparse( + parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True))) return { '_type': 'playlist', @@ -459,7 +475,7 @@ class SoundcloudPlaylistIE(SoundcloudIE): if token: data_dict['secret_token'] = token - data = compat_urllib_parse.urlencode(data_dict) + data = compat_urllib_parse_urlencode(data_dict) data = self._download_json( base_url + data, playlist_id, 'Downloading playlist') @@ -492,50 +508,40 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): _API_V2_BASE = 'https://api-v2.soundcloud.com' def _get_collection(self, endpoint, collection_id, **query): - query['limit'] = results_per_page = min( + limit = min( query.get('limit', self._DEFAULT_RESULTS_PER_PAGE), self._MAX_RESULTS_PER_PAGE) + query['limit'] = limit query['client_id'] = self._CLIENT_ID query['linked_partitioning'] = '1' + query['offset'] = 0 + data = compat_urllib_parse_urlencode(query) + next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data) - total_results = None collected_results = 0 - next_url = None - - for i in itertools.count(): - if not next_url: - query['offset'] = i * results_per_page - data = compat_urllib_parse.urlencode(query) - next_url = '{0}{1}?{2}'.format( - self._API_V2_BASE, endpoint, data) - + for i in itertools.count(1): response = self._download_json( - next_url, collection_id, 'Downloading page {0}'.format(i + 1), + next_url, collection_id, 'Downloading page {0}'.format(i), 'Unable to download API page') - total_results = int(response.get( - 'total_results', total_results)) + collection = response.get('collection', []) + if not collection: + break - collection = response['collection'] + collection = list(filter(bool, collection)) collected_results += len(collection) - for item in filter(bool, collection): - yield item + for item in collection: + yield self.url_result(item['uri'], SoundcloudIE.ie_key()) - if (total_results is not None and collected_results >= total_results) or not collection: + if not collection or collected_results >= limit: break next_url = response.get('next_href') + if not next_url: + break def _get_n_results(self, query, n): - tracks = self._get_collection( - '/search/tracks', collection_id='Query "{0}"'.format(query), limit=n, q=query) - - results = [self.url_result(track['uri']) for track in itertools.islice(tracks, n)] - - if not results: - raise ExtractorError( - 'Soundcloud said: No track results', expected=True) - - return self.playlist_result(results, playlist_title=query) + tracks = self._get_collection('/search/tracks', query, limit=n, q=query) + return self.playlist_result(tracks, playlist_title=query)