[soundcloud] Add support for new rendition and improve extraction (closes #20699)
[youtube-dl] / youtube_dl / extractor / soundcloud.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import itertools
5 import re
6
7 from .common import (
8     InfoExtractor,
9     SearchInfoExtractor
10 )
11 from ..compat import (
12     compat_str,
13     compat_urlparse,
14     compat_urllib_parse_urlencode,
15 )
16 from ..utils import (
17     ExtractorError,
18     float_or_none,
19     int_or_none,
20     KNOWN_EXTENSIONS,
21     merge_dicts,
22     mimetype2ext,
23     str_or_none,
24     try_get,
25     unified_timestamp,
26     update_url_query,
27     url_or_none,
28 )
29
30
31 class SoundcloudIE(InfoExtractor):
32     """Information extractor for soundcloud.com
33        To access the media, the uid of the song and a stream token
34        must be extracted from the page source and the script must make
35        a request to media.soundcloud.com/crossdomain.xml. Then
36        the media can be grabbed by requesting from an url composed
37        of the stream token and uid
38      """
39
40     _VALID_URL = r'''(?x)^(?:https?://)?
41                     (?:(?:(?:www\.|m\.)?soundcloud\.com/
42                             (?!stations/track)
43                             (?P<uploader>[\w\d-]+)/
44                             (?!(?:tracks|albums|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
45                             (?P<title>[\w\d-]+)/?
46                             (?P<token>[^?]+?)?(?:[?].*)?$)
47                        |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
48                           (?:/?\?secret_token=(?P<secret_token>[^&]+))?)
49                        |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
50                     )
51                     '''
52     IE_NAME = 'soundcloud'
53     _TESTS = [
54         {
55             'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
56             'md5': 'ebef0a451b909710ed1d7787dddbf0d7',
57             'info_dict': {
58                 'id': '62986583',
59                 'ext': 'mp3',
60                 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
61                 'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
62                 'uploader': 'E.T. ExTerrestrial Music',
63                 'timestamp': 1349920598,
64                 'upload_date': '20121011',
65                 'duration': 143.216,
66                 'license': 'all-rights-reserved',
67                 'view_count': int,
68                 'like_count': int,
69                 'comment_count': int,
70                 'repost_count': int,
71             }
72         },
73         # not streamable song
74         {
75             'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
76             'info_dict': {
77                 'id': '47127627',
78                 'ext': 'mp3',
79                 'title': 'Goldrushed',
80                 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
81                 'uploader': 'The Royal Concept',
82                 'timestamp': 1337635207,
83                 'upload_date': '20120521',
84                 'duration': 30,
85                 'license': 'all-rights-reserved',
86                 'view_count': int,
87                 'like_count': int,
88                 'comment_count': int,
89                 'repost_count': int,
90             },
91             'params': {
92                 # rtmp
93                 'skip_download': True,
94             },
95         },
96         # private link
97         {
98             'url': 'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp',
99             'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604',
100             'info_dict': {
101                 'id': '123998367',
102                 'ext': 'mp3',
103                 'title': 'Youtube - Dl Test Video \'\' Ä↭',
104                 'description': 'test chars:  \"\'/\\ä↭',
105                 'uploader': 'jaimeMF',
106                 'timestamp': 1386604920,
107                 'upload_date': '20131209',
108                 'duration': 9.927,
109                 'license': 'all-rights-reserved',
110                 'view_count': int,
111                 'like_count': int,
112                 'comment_count': int,
113                 'repost_count': int,
114             },
115         },
116         # private link (alt format)
117         {
118             'url': 'https://api.soundcloud.com/tracks/123998367?secret_token=s-8Pjrp',
119             'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604',
120             'info_dict': {
121                 'id': '123998367',
122                 'ext': 'mp3',
123                 'title': 'Youtube - Dl Test Video \'\' Ä↭',
124                 'description': 'test chars:  \"\'/\\ä↭',
125                 'uploader': 'jaimeMF',
126                 'timestamp': 1386604920,
127                 'upload_date': '20131209',
128                 'duration': 9.927,
129                 'license': 'all-rights-reserved',
130                 'view_count': int,
131                 'like_count': int,
132                 'comment_count': int,
133                 'repost_count': int,
134             },
135         },
136         # downloadable song
137         {
138             'url': 'https://soundcloud.com/oddsamples/bus-brakes',
139             'md5': '7624f2351f8a3b2e7cd51522496e7631',
140             'info_dict': {
141                 'id': '128590877',
142                 'ext': 'mp3',
143                 'title': 'Bus Brakes',
144                 'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66',
145                 'uploader': 'oddsamples',
146                 'timestamp': 1389232924,
147                 'upload_date': '20140109',
148                 'duration': 17.346,
149                 'license': 'cc-by-sa',
150                 'view_count': int,
151                 'like_count': int,
152                 'comment_count': int,
153                 'repost_count': int,
154             },
155         },
156         # private link, downloadable format
157         {
158             'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd',
159             'md5': '64a60b16e617d41d0bef032b7f55441e',
160             'info_dict': {
161                 'id': '340344461',
162                 'ext': 'wav',
163                 'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
164                 'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
165                 'uploader': 'Ori Uplift Music',
166                 'timestamp': 1504206263,
167                 'upload_date': '20170831',
168                 'duration': 7449.096,
169                 'license': 'all-rights-reserved',
170                 'view_count': int,
171                 'like_count': int,
172                 'comment_count': int,
173                 'repost_count': int,
174             },
175         },
176         # no album art, use avatar pic for thumbnail
177         {
178             'url': 'https://soundcloud.com/garyvee/sideways-prod-mad-real',
179             'md5': '59c7872bc44e5d99b7211891664760c2',
180             'info_dict': {
181                 'id': '309699954',
182                 'ext': 'mp3',
183                 'title': 'Sideways (Prod. Mad Real)',
184                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
185                 'uploader': 'garyvee',
186                 'timestamp': 1488152409,
187                 'upload_date': '20170226',
188                 'duration': 207.012,
189                 'thumbnail': r're:https?://.*\.jpg',
190                 'license': 'all-rights-reserved',
191                 'view_count': int,
192                 'like_count': int,
193                 'comment_count': int,
194                 'repost_count': int,
195             },
196             'params': {
197                 'skip_download': True,
198             },
199         },
200         # not avaialble via api.soundcloud.com/i1/tracks/id/streams
201         {
202             'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer',
203             'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7',
204             'info_dict': {
205                 'id': '583011102',
206                 'ext': 'mp3',
207                 'title': 'Mezzo Valzer',
208                 'description': 'md5:4138d582f81866a530317bae316e8b61',
209                 'uploader': 'Giovanni Sarani',
210                 'timestamp': 1551394171,
211                 'upload_date': '20190228',
212                 'duration': 180.157,
213                 'thumbnail': r're:https?://.*\.jpg',
214                 'license': 'all-rights-reserved',
215                 'view_count': int,
216                 'like_count': int,
217                 'comment_count': int,
218                 'repost_count': int,
219             },
220             'expected_warnings': ['Unable to download JSON metadata'],
221         }
222     ]
223
224     _CLIENT_ID = 'FweeGBOOEOYJWLJN3oEyToGLKhmSz0I7'
225
226     @staticmethod
227     def _extract_urls(webpage):
228         return [m.group('url') for m in re.finditer(
229             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
230             webpage)]
231
232     @classmethod
233     def _resolv_url(cls, url):
234         return 'https://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
235
236     def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None):
237         track_id = compat_str(info['id'])
238         title = info['title']
239         name = full_title or track_id
240         if quiet:
241             self.report_extraction(name)
242         thumbnail = info.get('artwork_url') or info.get('user', {}).get('avatar_url')
243         if isinstance(thumbnail, compat_str):
244             thumbnail = thumbnail.replace('-large', '-t500x500')
245         username = try_get(info, lambda x: x['user']['username'], compat_str)
246
247         def extract_count(key):
248             return int_or_none(info.get('%s_count' % key))
249
250         like_count = extract_count('favoritings')
251         if like_count is None:
252             like_count = extract_count('likes')
253
254         result = {
255             'id': track_id,
256             'uploader': username,
257             'timestamp': unified_timestamp(info.get('created_at')),
258             'title': title,
259             'description': info.get('description'),
260             'thumbnail': thumbnail,
261             'duration': float_or_none(info.get('duration'), 1000),
262             'webpage_url': info.get('permalink_url'),
263             'license': info.get('license'),
264             'view_count': extract_count('playback'),
265             'like_count': like_count,
266             'comment_count': extract_count('comment'),
267             'repost_count': extract_count('reposts'),
268             'genre': info.get('genre'),
269         }
270
271         format_urls = set()
272         formats = []
273         query = {'client_id': self._CLIENT_ID}
274         if secret_token is not None:
275             query['secret_token'] = secret_token
276         if info.get('downloadable', False):
277             # We can build a direct link to the song
278             format_url = update_url_query(
279                 'https://api.soundcloud.com/tracks/%s/download' % track_id, query)
280             format_urls.add(format_url)
281             formats.append({
282                 'format_id': 'download',
283                 'ext': info.get('original_format', 'mp3'),
284                 'url': format_url,
285                 'vcodec': 'none',
286                 'preference': 10,
287             })
288
289         # Old API, does not work for some tracks (e.g.
290         # https://soundcloud.com/giovannisarani/mezzo-valzer)
291         format_dict = self._download_json(
292             'https://api.soundcloud.com/i1/tracks/%s/streams' % track_id,
293             track_id, 'Downloading track url', query=query, fatal=False)
294
295         if format_dict:
296             for key, stream_url in format_dict.items():
297                 if stream_url in format_urls:
298                     continue
299                 format_urls.add(stream_url)
300                 ext, abr = 'mp3', None
301                 mobj = re.search(r'_([^_]+)_(\d+)_url', key)
302                 if mobj:
303                     ext, abr = mobj.groups()
304                     abr = int(abr)
305                 if key.startswith('http'):
306                     stream_formats = [{
307                         'format_id': key,
308                         'ext': ext,
309                         'url': stream_url,
310                     }]
311                 elif key.startswith('rtmp'):
312                     # The url doesn't have an rtmp app, we have to extract the playpath
313                     url, path = stream_url.split('mp3:', 1)
314                     stream_formats = [{
315                         'format_id': key,
316                         'url': url,
317                         'play_path': 'mp3:' + path,
318                         'ext': 'flv',
319                     }]
320                 elif key.startswith('hls'):
321                     stream_formats = self._extract_m3u8_formats(
322                         stream_url, track_id, ext, entry_protocol='m3u8_native',
323                         m3u8_id=key, fatal=False)
324                 else:
325                     continue
326
327                 if abr:
328                     for f in stream_formats:
329                         f['abr'] = abr
330
331                 formats.extend(stream_formats)
332
333         # New API
334         transcodings = try_get(
335             info, lambda x: x['media']['transcodings'], list) or []
336         for t in transcodings:
337             if not isinstance(t, dict):
338                 continue
339             format_url = url_or_none(t.get('url'))
340             if not format_url:
341                 continue
342             stream = self._download_json(
343                 update_url_query(format_url, query), track_id, fatal=False)
344             if not isinstance(stream, dict):
345                 continue
346             stream_url = url_or_none(stream.get('url'))
347             if not stream_url:
348                 continue
349             if stream_url in format_urls:
350                 continue
351             format_urls.add(stream_url)
352             protocol = try_get(t, lambda x: x['format']['protocol'], compat_str)
353             if protocol != 'hls' and '/hls' in format_url:
354                 protocol = 'hls'
355             ext = None
356             preset = str_or_none(t.get('preset'))
357             if preset:
358                 ext = preset.split('_')[0]
359                 if ext not in KNOWN_EXTENSIONS:
360                     mimetype = try_get(
361                         t, lambda x: x['format']['mime_type'], compat_str)
362                     ext = mimetype2ext(mimetype) or 'mp3'
363             format_id_list = []
364             if protocol:
365                 format_id_list.append(protocol)
366             format_id_list.append(ext)
367             format_id = '_'.join(format_id_list)
368             formats.append({
369                 'url': stream_url,
370                 'format_id': format_id,
371                 'ext': ext,
372                 'protocol': 'm3u8_native' if protocol == 'hls' else 'http',
373             })
374
375         if not formats:
376             # We fallback to the stream_url in the original info, this
377             # cannot be always used, sometimes it can give an HTTP 404 error
378             formats.append({
379                 'format_id': 'fallback',
380                 'url': update_url_query(info['stream_url'], query),
381                 'ext': 'mp3',
382             })
383             self._check_formats(formats, track_id)
384
385         for f in formats:
386             f['vcodec'] = 'none'
387
388         self._sort_formats(formats)
389         result['formats'] = formats
390
391         return result
392
393     def _real_extract(self, url):
394         mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
395         if mobj is None:
396             raise ExtractorError('Invalid URL: %s' % url)
397
398         track_id = mobj.group('track_id')
399         new_info = {}
400
401         if track_id is not None:
402             info_json_url = 'https://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
403             full_title = track_id
404             token = mobj.group('secret_token')
405             if token:
406                 info_json_url += '&secret_token=' + token
407         elif mobj.group('player'):
408             query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
409             real_url = query['url'][0]
410             # If the token is in the query of the original url we have to
411             # manually add it
412             if 'secret_token' in query:
413                 real_url += '?secret_token=' + query['secret_token'][0]
414             return self.url_result(real_url)
415         else:
416             # extract uploader (which is in the url)
417             uploader = mobj.group('uploader')
418             # extract simple title (uploader + slug of song title)
419             slug_title = mobj.group('title')
420             token = mobj.group('token')
421             full_title = resolve_title = '%s/%s' % (uploader, slug_title)
422             if token:
423                 resolve_title += '/%s' % token
424
425             webpage = self._download_webpage(url, full_title, fatal=False)
426             if webpage:
427                 entries = self._parse_json(
428                     self._search_regex(
429                         r'var\s+c\s*=\s*(\[.+?\])\s*,\s*o\s*=Date\b', webpage,
430                         'data', default='[]'), full_title, fatal=False)
431                 if entries:
432                     for e in entries:
433                         if not isinstance(e, dict):
434                             continue
435                         if e.get('id') != 67:
436                             continue
437                         data = try_get(e, lambda x: x['data'][0], dict)
438                         if data:
439                             new_info = data
440                             break
441                 info_json_url = self._resolv_url(
442                     'https://soundcloud.com/%s' % resolve_title)
443
444         # Contains some additional info missing from new_info
445         info = self._download_json(
446             info_json_url, full_title, 'Downloading info JSON')
447
448         return self._extract_info_dict(
449             merge_dicts(info, new_info), full_title, secret_token=token)
450
451
452 class SoundcloudPlaylistBaseIE(SoundcloudIE):
453     @staticmethod
454     def _extract_id(e):
455         return compat_str(e['id']) if e.get('id') else None
456
457     def _extract_track_entries(self, tracks):
458         return [
459             self.url_result(
460                 track['permalink_url'], SoundcloudIE.ie_key(),
461                 video_id=self._extract_id(track))
462             for track in tracks if track.get('permalink_url')]
463
464
465 class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
466     _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
467     IE_NAME = 'soundcloud:set'
468     _TESTS = [{
469         'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
470         'info_dict': {
471             'id': '2284613',
472             'title': 'The Royal Concept EP',
473         },
474         'playlist_mincount': 5,
475     }, {
476         'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token',
477         'only_matching': True,
478     }]
479
480     def _real_extract(self, url):
481         mobj = re.match(self._VALID_URL, url)
482
483         # extract uploader (which is in the url)
484         uploader = mobj.group('uploader')
485         # extract simple title (uploader + slug of song title)
486         slug_title = mobj.group('slug_title')
487         full_title = '%s/sets/%s' % (uploader, slug_title)
488         url = 'https://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
489
490         token = mobj.group('token')
491         if token:
492             full_title += '/' + token
493             url += '/' + token
494
495         resolv_url = self._resolv_url(url)
496         info = self._download_json(resolv_url, full_title)
497
498         if 'errors' in info:
499             msgs = (compat_str(err['error_message']) for err in info['errors'])
500             raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs))
501
502         entries = self._extract_track_entries(info['tracks'])
503
504         return {
505             '_type': 'playlist',
506             'entries': entries,
507             'id': '%s' % info['id'],
508             'title': info['title'],
509         }
510
511
512 class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
513     _API_V2_BASE = 'https://api-v2.soundcloud.com'
514
515     def _extract_playlist(self, base_url, playlist_id, playlist_title):
516         COMMON_QUERY = {
517             'limit': 50,
518             'client_id': self._CLIENT_ID,
519             'linked_partitioning': '1',
520         }
521
522         query = COMMON_QUERY.copy()
523         query['offset'] = 0
524
525         next_href = base_url + '?' + compat_urllib_parse_urlencode(query)
526
527         entries = []
528         for i in itertools.count():
529             response = self._download_json(
530                 next_href, playlist_id, 'Downloading track page %s' % (i + 1))
531
532             collection = response['collection']
533
534             if not isinstance(collection, list):
535                 collection = []
536
537             # Empty collection may be returned, in this case we proceed
538             # straight to next_href
539
540             def resolve_entry(candidates):
541                 for cand in candidates:
542                     if not isinstance(cand, dict):
543                         continue
544                     permalink_url = url_or_none(cand.get('permalink_url'))
545                     if not permalink_url:
546                         continue
547                     return self.url_result(
548                         permalink_url,
549                         ie=SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None,
550                         video_id=self._extract_id(cand),
551                         video_title=cand.get('title'))
552
553             for e in collection:
554                 entry = resolve_entry((e, e.get('track'), e.get('playlist')))
555                 if entry:
556                     entries.append(entry)
557
558             next_href = response.get('next_href')
559             if not next_href:
560                 break
561
562             parsed_next_href = compat_urlparse.urlparse(response['next_href'])
563             qs = compat_urlparse.parse_qs(parsed_next_href.query)
564             qs.update(COMMON_QUERY)
565             next_href = compat_urlparse.urlunparse(
566                 parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True)))
567
568         return {
569             '_type': 'playlist',
570             'id': playlist_id,
571             'title': playlist_title,
572             'entries': entries,
573         }
574
575
576 class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
577     _VALID_URL = r'''(?x)
578                         https?://
579                             (?:(?:www|m)\.)?soundcloud\.com/
580                             (?P<user>[^/]+)
581                             (?:/
582                                 (?P<rsrc>tracks|albums|sets|reposts|likes|spotlight)
583                             )?
584                             /?(?:[?#].*)?$
585                     '''
586     IE_NAME = 'soundcloud:user'
587     _TESTS = [{
588         'url': 'https://soundcloud.com/soft-cell-official',
589         'info_dict': {
590             'id': '207965082',
591             'title': 'Soft Cell (All)',
592         },
593         'playlist_mincount': 28,
594     }, {
595         'url': 'https://soundcloud.com/soft-cell-official/tracks',
596         'info_dict': {
597             'id': '207965082',
598             'title': 'Soft Cell (Tracks)',
599         },
600         'playlist_mincount': 27,
601     }, {
602         'url': 'https://soundcloud.com/soft-cell-official/albums',
603         'info_dict': {
604             'id': '207965082',
605             'title': 'Soft Cell (Albums)',
606         },
607         'playlist_mincount': 1,
608     }, {
609         'url': 'https://soundcloud.com/jcv246/sets',
610         'info_dict': {
611             'id': '12982173',
612             'title': 'Jordi / cv (Playlists)',
613         },
614         'playlist_mincount': 2,
615     }, {
616         'url': 'https://soundcloud.com/jcv246/reposts',
617         'info_dict': {
618             'id': '12982173',
619             'title': 'Jordi / cv (Reposts)',
620         },
621         'playlist_mincount': 6,
622     }, {
623         'url': 'https://soundcloud.com/clalberg/likes',
624         'info_dict': {
625             'id': '11817582',
626             'title': 'clalberg (Likes)',
627         },
628         'playlist_mincount': 5,
629     }, {
630         'url': 'https://soundcloud.com/grynpyret/spotlight',
631         'info_dict': {
632             'id': '7098329',
633             'title': 'Grynpyret (Spotlight)',
634         },
635         'playlist_mincount': 1,
636     }]
637
638     _BASE_URL_MAP = {
639         'all': '%s/stream/users/%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
640         'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
641         'albums': '%s/users/%%s/albums' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
642         'sets': '%s/users/%%s/playlists' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
643         'reposts': '%s/stream/users/%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
644         'likes': '%s/users/%%s/likes' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
645         'spotlight': '%s/users/%%s/spotlight' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
646     }
647
648     _TITLE_MAP = {
649         'all': 'All',
650         'tracks': 'Tracks',
651         'albums': 'Albums',
652         'sets': 'Playlists',
653         'reposts': 'Reposts',
654         'likes': 'Likes',
655         'spotlight': 'Spotlight',
656     }
657
658     def _real_extract(self, url):
659         mobj = re.match(self._VALID_URL, url)
660         uploader = mobj.group('user')
661
662         url = 'https://soundcloud.com/%s/' % uploader
663         resolv_url = self._resolv_url(url)
664         user = self._download_json(
665             resolv_url, uploader, 'Downloading user info')
666
667         resource = mobj.group('rsrc') or 'all'
668
669         return self._extract_playlist(
670             self._BASE_URL_MAP[resource] % user['id'], compat_str(user['id']),
671             '%s (%s)' % (user['username'], self._TITLE_MAP[resource]))
672
673
674 class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
675     _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/stations/track/[^/]+/(?P<id>[^/?#&]+)'
676     IE_NAME = 'soundcloud:trackstation'
677     _TESTS = [{
678         'url': 'https://soundcloud.com/stations/track/officialsundial/your-text',
679         'info_dict': {
680             'id': '286017854',
681             'title': 'Track station: your-text',
682         },
683         'playlist_mincount': 47,
684     }]
685
686     def _real_extract(self, url):
687         track_name = self._match_id(url)
688
689         webpage = self._download_webpage(url, track_name)
690
691         track_id = self._search_regex(
692             r'soundcloud:track-stations:(\d+)', webpage, 'track id')
693
694         return self._extract_playlist(
695             '%s/stations/soundcloud:track-stations:%s/tracks'
696             % (self._API_V2_BASE, track_id),
697             track_id, 'Track station: %s' % track_name)
698
699
700 class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
701     _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
702     IE_NAME = 'soundcloud:playlist'
703     _TESTS = [{
704         'url': 'https://api.soundcloud.com/playlists/4110309',
705         'info_dict': {
706             'id': '4110309',
707             'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
708             'description': 're:.*?TILT Brass - Bowery Poetry Club',
709         },
710         'playlist_count': 6,
711     }]
712
713     def _real_extract(self, url):
714         mobj = re.match(self._VALID_URL, url)
715         playlist_id = mobj.group('id')
716         base_url = '%s//api.soundcloud.com/playlists/%s.json?' % (self.http_scheme(), playlist_id)
717
718         data_dict = {
719             'client_id': self._CLIENT_ID,
720         }
721         token = mobj.group('token')
722
723         if token:
724             data_dict['secret_token'] = token
725
726         data = compat_urllib_parse_urlencode(data_dict)
727         data = self._download_json(
728             base_url + data, playlist_id, 'Downloading playlist')
729
730         entries = self._extract_track_entries(data['tracks'])
731
732         return {
733             '_type': 'playlist',
734             'id': playlist_id,
735             'title': data.get('title'),
736             'description': data.get('description'),
737             'entries': entries,
738         }
739
740
741 class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
742     IE_NAME = 'soundcloud:search'
743     IE_DESC = 'Soundcloud search'
744     _MAX_RESULTS = float('inf')
745     _TESTS = [{
746         'url': 'scsearch15:post-avant jazzcore',
747         'info_dict': {
748             'title': 'post-avant jazzcore',
749         },
750         'playlist_count': 15,
751     }]
752
753     _SEARCH_KEY = 'scsearch'
754     _MAX_RESULTS_PER_PAGE = 200
755     _DEFAULT_RESULTS_PER_PAGE = 50
756     _API_V2_BASE = 'https://api-v2.soundcloud.com'
757
758     def _get_collection(self, endpoint, collection_id, **query):
759         limit = min(
760             query.get('limit', self._DEFAULT_RESULTS_PER_PAGE),
761             self._MAX_RESULTS_PER_PAGE)
762         query['limit'] = limit
763         query['client_id'] = self._CLIENT_ID
764         query['linked_partitioning'] = '1'
765         query['offset'] = 0
766         data = compat_urllib_parse_urlencode(query)
767         next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data)
768
769         collected_results = 0
770
771         for i in itertools.count(1):
772             response = self._download_json(
773                 next_url, collection_id, 'Downloading page {0}'.format(i),
774                 'Unable to download API page')
775
776             collection = response.get('collection', [])
777             if not collection:
778                 break
779
780             collection = list(filter(bool, collection))
781             collected_results += len(collection)
782
783             for item in collection:
784                 yield self.url_result(item['uri'], SoundcloudIE.ie_key())
785
786             if not collection or collected_results >= limit:
787                 break
788
789             next_url = response.get('next_href')
790             if not next_url:
791                 break
792
793     def _get_n_results(self, query, n):
794         tracks = self._get_collection('/search/tracks', query, limit=n, q=query)
795         return self.playlist_result(tracks, playlist_title=query)