[soundcloud] automatically update client id on failing requests
[youtube-dl] / youtube_dl / extractor / soundcloud.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import itertools
5 import re
6
7 from .common import (
8     InfoExtractor,
9     SearchInfoExtractor
10 )
11 from ..compat import (
12     compat_HTTPError,
13     compat_kwargs,
14     compat_str,
15     compat_urlparse,
16 )
17 from ..utils import (
18     ExtractorError,
19     float_or_none,
20     HEADRequest,
21     int_or_none,
22     KNOWN_EXTENSIONS,
23     mimetype2ext,
24     str_or_none,
25     try_get,
26     unified_timestamp,
27     update_url_query,
28     url_or_none,
29 )
30
31
32 class SoundcloudEmbedIE(InfoExtractor):
33     _VALID_URL = r'https?://(?:w|player|p)\.soundcloud\.com/player/?.*?\burl=(?P<id>.+)'
34     _TEST = {
35         # from https://www.soundi.fi/uutiset/ennakkokuuntelussa-timo-kaukolammen-station-to-station-to-station-julkaisua-juhlitaan-tanaan-g-livelabissa/
36         'url': 'https://w.soundcloud.com/player/?visual=true&url=https%3A%2F%2Fapi.soundcloud.com%2Fplaylists%2F922213810&show_artwork=true&maxwidth=640&maxheight=960&dnt=1&secret_token=s-ziYey',
37         'only_matching': True,
38     }
39
40     @staticmethod
41     def _extract_urls(webpage):
42         return [m.group('url') for m in re.finditer(
43             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
44             webpage)]
45
46     def _real_extract(self, url):
47         query = compat_urlparse.parse_qs(
48             compat_urlparse.urlparse(url).query)
49         api_url = query['url'][0]
50         secret_token = query.get('secret_token')
51         if secret_token:
52             api_url = update_url_query(api_url, {'secret_token': secret_token[0]})
53         return self.url_result(api_url)
54
55
56 class SoundcloudIE(InfoExtractor):
57     """Information extractor for soundcloud.com
58        To access the media, the uid of the song and a stream token
59        must be extracted from the page source and the script must make
60        a request to media.soundcloud.com/crossdomain.xml. Then
61        the media can be grabbed by requesting from an url composed
62        of the stream token and uid
63      """
64
65     _VALID_URL = r'''(?x)^(?:https?://)?
66                     (?:(?:(?:www\.|m\.)?soundcloud\.com/
67                             (?!stations/track)
68                             (?P<uploader>[\w\d-]+)/
69                             (?!(?:tracks|albums|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
70                             (?P<title>[\w\d-]+)/?
71                             (?P<token>[^?]+?)?(?:[?].*)?$)
72                        |(?:api(?:-v2)?\.soundcloud\.com/tracks/(?P<track_id>\d+)
73                           (?:/?\?secret_token=(?P<secret_token>[^&]+))?)
74                     )
75                     '''
76     IE_NAME = 'soundcloud'
77     _TESTS = [
78         {
79             'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
80             'md5': 'ebef0a451b909710ed1d7787dddbf0d7',
81             'info_dict': {
82                 'id': '62986583',
83                 'ext': 'mp3',
84                 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
85                 'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
86                 'uploader': 'E.T. ExTerrestrial Music',
87                 'uploader_id': '1571244',
88                 'timestamp': 1349920598,
89                 'upload_date': '20121011',
90                 'duration': 143.216,
91                 'license': 'all-rights-reserved',
92                 'view_count': int,
93                 'like_count': int,
94                 'comment_count': int,
95                 'repost_count': int,
96             }
97         },
98         # not streamable song
99         {
100             'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
101             'info_dict': {
102                 'id': '47127627',
103                 'ext': 'mp3',
104                 'title': 'Goldrushed',
105                 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
106                 'uploader': 'The Royal Concept',
107                 'uploader_id': '9615865',
108                 'timestamp': 1337635207,
109                 'upload_date': '20120521',
110                 'duration': 30,
111                 'license': 'all-rights-reserved',
112                 'view_count': int,
113                 'like_count': int,
114                 'comment_count': int,
115                 'repost_count': int,
116             },
117             'params': {
118                 # rtmp
119                 'skip_download': True,
120             },
121             'skip': 'Preview',
122         },
123         # private link
124         {
125             'url': 'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp',
126             'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604',
127             'info_dict': {
128                 'id': '123998367',
129                 'ext': 'mp3',
130                 'title': 'Youtube - Dl Test Video \'\' Ä↭',
131                 'description': 'test chars:  \"\'/\\ä↭',
132                 'uploader': 'jaimeMF',
133                 'uploader_id': '69767071',
134                 'timestamp': 1386604920,
135                 'upload_date': '20131209',
136                 'duration': 9.927,
137                 'license': 'all-rights-reserved',
138                 'view_count': int,
139                 'like_count': int,
140                 'comment_count': int,
141                 'repost_count': int,
142             },
143         },
144         # private link (alt format)
145         {
146             'url': 'https://api.soundcloud.com/tracks/123998367?secret_token=s-8Pjrp',
147             'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604',
148             'info_dict': {
149                 'id': '123998367',
150                 'ext': 'mp3',
151                 'title': 'Youtube - Dl Test Video \'\' Ä↭',
152                 'description': 'test chars:  \"\'/\\ä↭',
153                 'uploader': 'jaimeMF',
154                 'uploader_id': '69767071',
155                 'timestamp': 1386604920,
156                 'upload_date': '20131209',
157                 'duration': 9.927,
158                 'license': 'all-rights-reserved',
159                 'view_count': int,
160                 'like_count': int,
161                 'comment_count': int,
162                 'repost_count': int,
163             },
164         },
165         # downloadable song
166         {
167             'url': 'https://soundcloud.com/oddsamples/bus-brakes',
168             'md5': '7624f2351f8a3b2e7cd51522496e7631',
169             'info_dict': {
170                 'id': '128590877',
171                 'ext': 'mp3',
172                 'title': 'Bus Brakes',
173                 'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66',
174                 'uploader': 'oddsamples',
175                 'uploader_id': '73680509',
176                 'timestamp': 1389232924,
177                 'upload_date': '20140109',
178                 'duration': 17.346,
179                 'license': 'cc-by-sa',
180                 'view_count': int,
181                 'like_count': int,
182                 'comment_count': int,
183                 'repost_count': int,
184             },
185         },
186         # private link, downloadable format
187         {
188             'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd',
189             'md5': '64a60b16e617d41d0bef032b7f55441e',
190             'info_dict': {
191                 'id': '340344461',
192                 'ext': 'wav',
193                 'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
194                 'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
195                 'uploader': 'Ori Uplift Music',
196                 'uploader_id': '12563093',
197                 'timestamp': 1504206263,
198                 'upload_date': '20170831',
199                 'duration': 7449.096,
200                 'license': 'all-rights-reserved',
201                 'view_count': int,
202                 'like_count': int,
203                 'comment_count': int,
204                 'repost_count': int,
205             },
206         },
207         # no album art, use avatar pic for thumbnail
208         {
209             'url': 'https://soundcloud.com/garyvee/sideways-prod-mad-real',
210             'md5': '59c7872bc44e5d99b7211891664760c2',
211             'info_dict': {
212                 'id': '309699954',
213                 'ext': 'mp3',
214                 'title': 'Sideways (Prod. Mad Real)',
215                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
216                 'uploader': 'garyvee',
217                 'uploader_id': '2366352',
218                 'timestamp': 1488152409,
219                 'upload_date': '20170226',
220                 'duration': 207.012,
221                 'thumbnail': r're:https?://.*\.jpg',
222                 'license': 'all-rights-reserved',
223                 'view_count': int,
224                 'like_count': int,
225                 'comment_count': int,
226                 'repost_count': int,
227             },
228             'params': {
229                 'skip_download': True,
230             },
231         },
232         # not available via api.soundcloud.com/i1/tracks/id/streams
233         {
234             'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer',
235             'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7',
236             'info_dict': {
237                 'id': '583011102',
238                 'ext': 'mp3',
239                 'title': 'Mezzo Valzer',
240                 'description': 'md5:4138d582f81866a530317bae316e8b61',
241                 'uploader': 'Giovanni Sarani',
242                 'uploader_id': '3352531',
243                 'timestamp': 1551394171,
244                 'upload_date': '20190228',
245                 'duration': 180.157,
246                 'thumbnail': r're:https?://.*\.jpg',
247                 'license': 'all-rights-reserved',
248                 'view_count': int,
249                 'like_count': int,
250                 'comment_count': int,
251                 'repost_count': int,
252             },
253             'expected_warnings': ['Unable to download JSON metadata'],
254         }
255     ]
256
257     _API_BASE = 'https://api.soundcloud.com/'
258     _API_V2_BASE = 'https://api-v2.soundcloud.com/'
259     _BASE_URL = 'https://soundcloud.com/'
260     _IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg'
261
262     _ARTWORK_MAP = {
263         'mini': 16,
264         'tiny': 20,
265         'small': 32,
266         'badge': 47,
267         't67x67': 67,
268         'large': 100,
269         't300x300': 300,
270         'crop': 400,
271         't500x500': 500,
272         'original': 0,
273     }
274
275     def _update_client_id(self):
276         webpage = self._download_webpage('https://soundcloud.com/', None)
277         for src in reversed(re.findall(r'<script[^>]+src="([^"]+)"', webpage)):
278             script = self._download_webpage(src, None, fatal=False)
279             if script:
280                 client_id = self._search_regex(
281                     r'client_id\s*:\s*"([0-9a-zA-Z]{32})"',
282                     script, 'client id', default=None)
283                 if client_id:
284                     self._CLIENT_ID = client_id
285                     self._downloader.cache.store('soundcloud', 'client_id', client_id)
286                     return
287         raise ExtractorError('Unable to extract client id')
288
289     def _download_json(self, *args, **kwargs):
290         query = kwargs.get('query', {}).copy()
291         for _ in range(2):
292             query['client_id'] = self._CLIENT_ID
293             kwargs['query'] = query
294             try:
295                 return super(SoundcloudIE, self)._download_json(*args, **compat_kwargs(kwargs))
296             except ExtractorError as e:
297                 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
298                     self._update_client_id()
299                     continue
300                 raise
301
302     def _real_initialize(self):
303         self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'YUKXoArFcqrlQn9tfNHvvyfnDISj04zk'
304
305     @classmethod
306     def _resolv_url(cls, url):
307         return SoundcloudIE._API_V2_BASE + 'resolve?url=' + url
308
309     def _extract_info_dict(self, info, full_title=None, secret_token=None, version=2):
310         track_id = compat_str(info['id'])
311         title = info['title']
312         track_base_url = self._API_BASE + 'tracks/%s' % track_id
313
314         format_urls = set()
315         formats = []
316         query = {'client_id': self._CLIENT_ID}
317         if secret_token:
318             query['secret_token'] = secret_token
319
320         if info.get('downloadable') and info.get('has_downloads_left'):
321             format_url = update_url_query(
322                 info.get('download_url') or track_base_url + '/download', query)
323             format_urls.add(format_url)
324             if version == 2:
325                 v1_info = self._download_json(
326                     track_base_url, track_id, query=query, fatal=False) or {}
327             else:
328                 v1_info = info
329             formats.append({
330                 'format_id': 'download',
331                 'ext': v1_info.get('original_format') or 'mp3',
332                 'filesize': int_or_none(v1_info.get('original_content_size')),
333                 'url': format_url,
334                 'preference': 10,
335             })
336
337         def invalid_url(url):
338             return not url or url in format_urls or re.search(r'/(?:preview|playlist)/0/30/', url)
339
340         def add_format(f, protocol):
341             mobj = re.search(r'\.(?P<abr>\d+)\.(?P<ext>[0-9a-z]{3,4})(?=[/?])', stream_url)
342             if mobj:
343                 for k, v in mobj.groupdict().items():
344                     if not f.get(k):
345                         f[k] = v
346             format_id_list = []
347             if protocol:
348                 format_id_list.append(protocol)
349             for k in ('ext', 'abr'):
350                 v = f.get(k)
351                 if v:
352                     format_id_list.append(v)
353             abr = f.get('abr')
354             if abr:
355                 f['abr'] = int(abr)
356             f.update({
357                 'format_id': '_'.join(format_id_list),
358                 'protocol': 'm3u8_native' if protocol == 'hls' else 'http',
359             })
360             formats.append(f)
361
362         # New API
363         transcodings = try_get(
364             info, lambda x: x['media']['transcodings'], list) or []
365         for t in transcodings:
366             if not isinstance(t, dict):
367                 continue
368             format_url = url_or_none(t.get('url'))
369             if not format_url or t.get('snipped') or '/preview/' in format_url:
370                 continue
371             stream = self._download_json(
372                 format_url, track_id, query=query, fatal=False)
373             if not isinstance(stream, dict):
374                 continue
375             stream_url = url_or_none(stream.get('url'))
376             if invalid_url(stream_url):
377                 continue
378             format_urls.add(stream_url)
379             stream_format = t.get('format') or {}
380             protocol = stream_format.get('protocol')
381             if protocol != 'hls' and '/hls' in format_url:
382                 protocol = 'hls'
383             ext = None
384             preset = str_or_none(t.get('preset'))
385             if preset:
386                 ext = preset.split('_')[0]
387             if ext not in KNOWN_EXTENSIONS:
388                 ext = mimetype2ext(stream_format.get('mime_type'))
389             add_format({
390                 'url': stream_url,
391                 'ext': ext,
392             }, 'http' if protocol == 'progressive' else protocol)
393
394         if not formats:
395             # Old API, does not work for some tracks (e.g.
396             # https://soundcloud.com/giovannisarani/mezzo-valzer)
397             # and might serve preview URLs (e.g.
398             # http://www.soundcloud.com/snbrn/ele)
399             format_dict = self._download_json(
400                 track_base_url + '/streams', track_id,
401                 'Downloading track url', query=query, fatal=False) or {}
402
403             for key, stream_url in format_dict.items():
404                 if invalid_url(stream_url):
405                     continue
406                 format_urls.add(stream_url)
407                 mobj = re.search(r'(http|hls)_([^_]+)_(\d+)_url', key)
408                 if mobj:
409                     protocol, ext, abr = mobj.groups()
410                     add_format({
411                         'abr': abr,
412                         'ext': ext,
413                         'url': stream_url,
414                     }, protocol)
415
416         if not formats:
417             # We fallback to the stream_url in the original info, this
418             # cannot be always used, sometimes it can give an HTTP 404 error
419             urlh = self._request_webpage(
420                 HEADRequest(info.get('stream_url') or track_base_url + '/stream'),
421                 track_id, query=query, fatal=False)
422             if urlh:
423                 stream_url = urlh.geturl()
424                 if not invalid_url(stream_url):
425                     add_format({'url': stream_url}, 'http')
426
427         for f in formats:
428             f['vcodec'] = 'none'
429
430         self._sort_formats(formats)
431
432         user = info.get('user') or {}
433
434         thumbnails = []
435         artwork_url = info.get('artwork_url')
436         thumbnail = artwork_url or user.get('avatar_url')
437         if isinstance(thumbnail, compat_str):
438             if re.search(self._IMAGE_REPL_RE, thumbnail):
439                 for image_id, size in self._ARTWORK_MAP.items():
440                     i = {
441                         'id': image_id,
442                         'url': re.sub(self._IMAGE_REPL_RE, '-%s.jpg' % image_id, thumbnail),
443                     }
444                     if image_id == 'tiny' and not artwork_url:
445                         size = 18
446                     elif image_id == 'original':
447                         i['preference'] = 10
448                     if size:
449                         i.update({
450                             'width': size,
451                             'height': size,
452                         })
453                     thumbnails.append(i)
454             else:
455                 thumbnails = [{'url': thumbnail}]
456
457         def extract_count(key):
458             return int_or_none(info.get('%s_count' % key))
459
460         return {
461             'id': track_id,
462             'uploader': user.get('username'),
463             'uploader_id': str_or_none(user.get('id')) or user.get('permalink'),
464             'uploader_url': user.get('permalink_url'),
465             'timestamp': unified_timestamp(info.get('created_at')),
466             'title': title,
467             'description': info.get('description'),
468             'thumbnails': thumbnails,
469             'duration': float_or_none(info.get('duration'), 1000),
470             'webpage_url': info.get('permalink_url'),
471             'license': info.get('license'),
472             'view_count': extract_count('playback'),
473             'like_count': extract_count('favoritings') or extract_count('likes'),
474             'comment_count': extract_count('comment'),
475             'repost_count': extract_count('reposts'),
476             'genre': info.get('genre'),
477             'formats': formats
478         }
479
480     def _real_extract(self, url):
481         mobj = re.match(self._VALID_URL, url)
482
483         track_id = mobj.group('track_id')
484
485         query = {}
486         if track_id:
487             info_json_url = self._API_V2_BASE + 'tracks/' + track_id
488             full_title = track_id
489             token = mobj.group('secret_token')
490             if token:
491                 query['secret_token'] = token
492         else:
493             full_title = resolve_title = '%s/%s' % mobj.group('uploader', 'title')
494             token = mobj.group('token')
495             if token:
496                 resolve_title += '/%s' % token
497             info_json_url = self._resolv_url(self._BASE_URL + resolve_title)
498
499         version = 2
500         info = self._download_json(
501             info_json_url, full_title, 'Downloading info JSON', query=query, fatal=False)
502         if not info:
503             info = self._download_json(
504                 info_json_url.replace(self._API_V2_BASE, self._API_BASE),
505                 full_title, 'Downloading info JSON', query=query)
506             version = 1
507
508         return self._extract_info_dict(info, full_title, token, version)
509
510
511 class SoundcloudPlaylistBaseIE(SoundcloudIE):
512     def _extract_track_entries(self, tracks, token=None):
513         entries = []
514         for track in tracks:
515             track_id = str_or_none(track.get('id'))
516             url = track.get('permalink_url')
517             if not url:
518                 if not track_id:
519                     continue
520                 url = self._API_V2_BASE + 'tracks/' + track_id
521                 if token:
522                     url += '?secret_token=' + token
523             entries.append(self.url_result(
524                 url, SoundcloudIE.ie_key(), track_id))
525         return entries
526
527
528 class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
529     _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
530     IE_NAME = 'soundcloud:set'
531     _TESTS = [{
532         'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
533         'info_dict': {
534             'id': '2284613',
535             'title': 'The Royal Concept EP',
536         },
537         'playlist_mincount': 5,
538     }, {
539         'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token',
540         'only_matching': True,
541     }]
542
543     def _real_extract(self, url):
544         mobj = re.match(self._VALID_URL, url)
545
546         full_title = '%s/sets/%s' % mobj.group('uploader', 'slug_title')
547         token = mobj.group('token')
548         if token:
549             full_title += '/' + token
550
551         info = self._download_json(self._resolv_url(
552             self._BASE_URL + full_title), full_title)
553
554         if 'errors' in info:
555             msgs = (compat_str(err['error_message']) for err in info['errors'])
556             raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs))
557
558         entries = self._extract_track_entries(info['tracks'], token)
559
560         return self.playlist_result(
561             entries, str_or_none(info.get('id')), info.get('title'))
562
563
564 class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
565     def _extract_playlist(self, base_url, playlist_id, playlist_title):
566         COMMON_QUERY = {
567             'limit': 2000000000,
568             'linked_partitioning': '1',
569         }
570
571         query = COMMON_QUERY.copy()
572         query['offset'] = 0
573
574         next_href = base_url
575
576         entries = []
577         for i in itertools.count():
578             response = self._download_json(
579                 next_href, playlist_id,
580                 'Downloading track page %s' % (i + 1), query=query)
581
582             collection = response['collection']
583
584             if not isinstance(collection, list):
585                 collection = []
586
587             # Empty collection may be returned, in this case we proceed
588             # straight to next_href
589
590             def resolve_entry(candidates):
591                 for cand in candidates:
592                     if not isinstance(cand, dict):
593                         continue
594                     permalink_url = url_or_none(cand.get('permalink_url'))
595                     if not permalink_url:
596                         continue
597                     return self.url_result(
598                         permalink_url,
599                         SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None,
600                         str_or_none(cand.get('id')), cand.get('title'))
601
602             for e in collection:
603                 entry = resolve_entry((e, e.get('track'), e.get('playlist')))
604                 if entry:
605                     entries.append(entry)
606
607             next_href = response.get('next_href')
608             if not next_href:
609                 break
610
611             next_href = response['next_href']
612             parsed_next_href = compat_urlparse.urlparse(next_href)
613             query = compat_urlparse.parse_qs(parsed_next_href.query)
614             query.update(COMMON_QUERY)
615
616         return {
617             '_type': 'playlist',
618             'id': playlist_id,
619             'title': playlist_title,
620             'entries': entries,
621         }
622
623
624 class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
625     _VALID_URL = r'''(?x)
626                         https?://
627                             (?:(?:www|m)\.)?soundcloud\.com/
628                             (?P<user>[^/]+)
629                             (?:/
630                                 (?P<rsrc>tracks|albums|sets|reposts|likes|spotlight)
631                             )?
632                             /?(?:[?#].*)?$
633                     '''
634     IE_NAME = 'soundcloud:user'
635     _TESTS = [{
636         'url': 'https://soundcloud.com/soft-cell-official',
637         'info_dict': {
638             'id': '207965082',
639             'title': 'Soft Cell (All)',
640         },
641         'playlist_mincount': 28,
642     }, {
643         'url': 'https://soundcloud.com/soft-cell-official/tracks',
644         'info_dict': {
645             'id': '207965082',
646             'title': 'Soft Cell (Tracks)',
647         },
648         'playlist_mincount': 27,
649     }, {
650         'url': 'https://soundcloud.com/soft-cell-official/albums',
651         'info_dict': {
652             'id': '207965082',
653             'title': 'Soft Cell (Albums)',
654         },
655         'playlist_mincount': 1,
656     }, {
657         'url': 'https://soundcloud.com/jcv246/sets',
658         'info_dict': {
659             'id': '12982173',
660             'title': 'Jordi / cv (Sets)',
661         },
662         'playlist_mincount': 2,
663     }, {
664         'url': 'https://soundcloud.com/jcv246/reposts',
665         'info_dict': {
666             'id': '12982173',
667             'title': 'Jordi / cv (Reposts)',
668         },
669         'playlist_mincount': 6,
670     }, {
671         'url': 'https://soundcloud.com/clalberg/likes',
672         'info_dict': {
673             'id': '11817582',
674             'title': 'clalberg (Likes)',
675         },
676         'playlist_mincount': 5,
677     }, {
678         'url': 'https://soundcloud.com/grynpyret/spotlight',
679         'info_dict': {
680             'id': '7098329',
681             'title': 'Grynpyret (Spotlight)',
682         },
683         'playlist_mincount': 1,
684     }]
685
686     _BASE_URL_MAP = {
687         'all': 'stream/users/%s',
688         'tracks': 'users/%s/tracks',
689         'albums': 'users/%s/albums',
690         'sets': 'users/%s/playlists',
691         'reposts': 'stream/users/%s/reposts',
692         'likes': 'users/%s/likes',
693         'spotlight': 'users/%s/spotlight',
694     }
695
696     def _real_extract(self, url):
697         mobj = re.match(self._VALID_URL, url)
698         uploader = mobj.group('user')
699
700         user = self._download_json(
701             self._resolv_url(self._BASE_URL + uploader),
702             uploader, 'Downloading user info')
703
704         resource = mobj.group('rsrc') or 'all'
705
706         return self._extract_playlist(
707             self._API_V2_BASE + self._BASE_URL_MAP[resource] % user['id'],
708             str_or_none(user.get('id')),
709             '%s (%s)' % (user['username'], resource.capitalize()))
710
711
712 class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
713     _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/stations/track/[^/]+/(?P<id>[^/?#&]+)'
714     IE_NAME = 'soundcloud:trackstation'
715     _TESTS = [{
716         'url': 'https://soundcloud.com/stations/track/officialsundial/your-text',
717         'info_dict': {
718             'id': '286017854',
719             'title': 'Track station: your text',
720         },
721         'playlist_mincount': 47,
722     }]
723
724     def _real_extract(self, url):
725         track_name = self._match_id(url)
726
727         track = self._download_json(self._resolv_url(url), track_name)
728         track_id = self._search_regex(
729             r'soundcloud:track-stations:(\d+)', track['id'], 'track id')
730
731         return self._extract_playlist(
732             self._API_V2_BASE + 'stations/%s/tracks' % track['id'],
733             track_id, 'Track station: %s' % track['title'])
734
735
736 class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
737     _VALID_URL = r'https?://api(?:-v2)?\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
738     IE_NAME = 'soundcloud:playlist'
739     _TESTS = [{
740         'url': 'https://api.soundcloud.com/playlists/4110309',
741         'info_dict': {
742             'id': '4110309',
743             'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
744             'description': 're:.*?TILT Brass - Bowery Poetry Club',
745         },
746         'playlist_count': 6,
747     }]
748
749     def _real_extract(self, url):
750         mobj = re.match(self._VALID_URL, url)
751         playlist_id = mobj.group('id')
752
753         query = {}
754         token = mobj.group('token')
755         if token:
756             query['secret_token'] = token
757
758         data = self._download_json(
759             self._API_V2_BASE + 'playlists/' + playlist_id,
760             playlist_id, 'Downloading playlist', query=query)
761
762         entries = self._extract_track_entries(data['tracks'], token)
763
764         return self.playlist_result(
765             entries, playlist_id, data.get('title'), data.get('description'))
766
767
768 class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
769     IE_NAME = 'soundcloud:search'
770     IE_DESC = 'Soundcloud search'
771     _MAX_RESULTS = float('inf')
772     _TESTS = [{
773         'url': 'scsearch15:post-avant jazzcore',
774         'info_dict': {
775             'title': 'post-avant jazzcore',
776         },
777         'playlist_count': 15,
778     }]
779
780     _SEARCH_KEY = 'scsearch'
781     _MAX_RESULTS_PER_PAGE = 200
782     _DEFAULT_RESULTS_PER_PAGE = 50
783
784     def _get_collection(self, endpoint, collection_id, **query):
785         limit = min(
786             query.get('limit', self._DEFAULT_RESULTS_PER_PAGE),
787             self._MAX_RESULTS_PER_PAGE)
788         query.update({
789             'limit': limit,
790             'linked_partitioning': 1,
791             'offset': 0,
792         })
793         next_url = update_url_query(self._API_V2_BASE + endpoint, query)
794
795         collected_results = 0
796
797         for i in itertools.count(1):
798             response = self._download_json(
799                 next_url, collection_id, 'Downloading page {0}'.format(i),
800                 'Unable to download API page')
801
802             collection = response.get('collection', [])
803             if not collection:
804                 break
805
806             collection = list(filter(bool, collection))
807             collected_results += len(collection)
808
809             for item in collection:
810                 yield self.url_result(item['uri'], SoundcloudIE.ie_key())
811
812             if not collection or collected_results >= limit:
813                 break
814
815             next_url = response.get('next_href')
816             if not next_url:
817                 break
818
819     def _get_n_results(self, query, n):
820         tracks = self._get_collection('search/tracks', query, limit=n, q=query)
821         return self.playlist_result(tracks, playlist_title=query)