Merge pull request #7045 from remitamine/ign

[youtube-dl] / youtube_dl / extractor / soundcloud.py
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py

index 6ce86cbcda9aeb2c8ebe0a8d2ce3063e67d9123c..02e64e09436a5299c5d4f87f1a3ba871c63af230 100644 (file)
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -4,13 +4,17 @@ from __future__ import unicode_literals
  import re
  import itertools
  
-from .common import InfoExtractor
+from .common import (
+    InfoExtractor,
+    SearchInfoExtractor
+)
  from ..compat import (
      compat_str,
      compat_urlparse,
      compat_urllib_parse,
  )
  from ..utils import (
+    encode_dict,
      ExtractorError,
      int_or_none,
      unified_strdate,
@@ -113,7 +117,7 @@ class SoundcloudIE(InfoExtractor):
          },
      ]
  
-    _CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
+    _CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea'
      _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
  
      def report_resolve(self, video_id):
@@ -309,7 +313,7 @@ class SoundcloudUserIE(SoundcloudIE):
              'id': '114582580',
              'title': 'The Akashic Chronicler (All)',
          },
-        'playlist_mincount': 112,
+        'playlist_mincount': 111,
      }, {
          'url': 'https://soundcloud.com/the-akashic-chronicler/tracks',
          'info_dict': {
@@ -330,14 +334,14 @@ class SoundcloudUserIE(SoundcloudIE):
              'id': '114582580',
              'title': 'The Akashic Chronicler (Reposts)',
          },
-        'playlist_mincount': 9,
+        'playlist_mincount': 7,
      }, {
          'url': 'https://soundcloud.com/the-akashic-chronicler/likes',
          'info_dict': {
              'id': '114582580',
              'title': 'The Akashic Chronicler (Likes)',
          },
-        'playlist_mincount': 333,
+        'playlist_mincount': 321,
      }, {
          'url': 'https://soundcloud.com/grynpyret/spotlight',
          'info_dict': {
@@ -469,3 +473,60 @@ class SoundcloudPlaylistIE(SoundcloudIE):
              'description': data.get('description'),
              'entries': entries,
          }
+
+
+class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
+    IE_NAME = 'soundcloud:search'
+    IE_DESC = 'Soundcloud search'
+    _MAX_RESULTS = float('inf')
+    _TESTS = [{
+        'url': 'scsearch15:post-avant jazzcore',
+        'info_dict': {
+            'title': 'post-avant jazzcore',
+        },
+        'playlist_count': 15,
+    }]
+
+    _SEARCH_KEY = 'scsearch'
+    _MAX_RESULTS_PER_PAGE = 200
+    _DEFAULT_RESULTS_PER_PAGE = 50
+    _API_V2_BASE = 'https://api-v2.soundcloud.com'
+
+    def _get_collection(self, endpoint, collection_id, **query):
+        limit = min(
+            query.get('limit', self._DEFAULT_RESULTS_PER_PAGE),
+            self._MAX_RESULTS_PER_PAGE)
+        query['limit'] = limit
+        query['client_id'] = self._CLIENT_ID
+        query['linked_partitioning'] = '1'
+        query['offset'] = 0
+        data = compat_urllib_parse.urlencode(encode_dict(query))
+        next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data)
+
+        collected_results = 0
+
+        for i in itertools.count(1):
+            response = self._download_json(
+                next_url, collection_id, 'Downloading page {0}'.format(i),
+                'Unable to download API page')
+
+            collection = response.get('collection', [])
+            if not collection:
+                break
+
+            collection = list(filter(bool, collection))
+            collected_results += len(collection)
+
+            for item in collection:
+                yield self.url_result(item['uri'], SoundcloudIE.ie_key())
+
+            if not collection or collected_results >= limit:
+                break
+
+            next_url = response.get('next_href')
+            if not next_url:
+                break
+
+    def _get_n_results(self, query, n):
+        tracks = self._get_collection('/search/tracks', query, limit=n, q=query)
+        return self.playlist_result(tracks, playlist_title=query)