Merge remote-tracking branch 'upstream/master' into bliptv

[youtube-dl] / youtube_dl / extractor / soundcloud.py
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py

index 3fe991849b67fb7ebf447715c7c34301ebc190ca..02e64e09436a5299c5d4f87f1a3ba871c63af230 100644 (file)
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -14,6 +14,7 @@ from ..compat import (
      compat_urllib_parse,
  )
  from ..utils import (
+    encode_dict,
      ExtractorError,
      int_or_none,
      unified_strdate,
@@ -477,7 +478,7 @@ class SoundcloudPlaylistIE(SoundcloudIE):
  class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
      IE_NAME = 'soundcloud:search'
      IE_DESC = 'Soundcloud search'
-    _MAX_RESULTS = 200
+    _MAX_RESULTS = float('inf')
      _TESTS = [{
          'url': 'scsearch15:post-avant jazzcore',
          'info_dict': {
@@ -487,63 +488,45 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
      }]
  
      _SEARCH_KEY = 'scsearch'
-    _RESULTS_PER_PAGE = 50
+    _MAX_RESULTS_PER_PAGE = 200
+    _DEFAULT_RESULTS_PER_PAGE = 50
      _API_V2_BASE = 'https://api-v2.soundcloud.com'
  
      def _get_collection(self, endpoint, collection_id, **query):
-        query['limit'] = self._RESULTS_PER_PAGE
+        limit = min(
+            query.get('limit', self._DEFAULT_RESULTS_PER_PAGE),
+            self._MAX_RESULTS_PER_PAGE)
+        query['limit'] = limit
          query['client_id'] = self._CLIENT_ID
          query['linked_partitioning'] = '1'
+        query['offset'] = 0
+        data = compat_urllib_parse.urlencode(encode_dict(query))
+        next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data)
  
-        total_results = self._MAX_RESULTS
          collected_results = 0
  
-        next_url = None
-
-        for i in itertools.count():
-            if not next_url:
-                query['offset'] = i * self._RESULTS_PER_PAGE
-                data = compat_urllib_parse.urlencode(query)
-                next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data)
-
-            response = self._download_json(next_url,
-                    video_id=collection_id,
-                    note='Downloading page {0}'.format(i+1),
-                    errnote='Unable to download API page')
+        for i in itertools.count(1):
+            response = self._download_json(
+                next_url, collection_id, 'Downloading page {0}'.format(i),
+                'Unable to download API page')
  
-            total_results = int(response.get(
-                'total_results', total_results))
+            collection = response.get('collection', [])
+            if not collection:
+                break
  
-            collection = response['collection']
+            collection = list(filter(bool, collection))
              collected_results += len(collection)
  
-            for item in filter(bool, collection):
-                yield item
+            for item in collection:
+                yield self.url_result(item['uri'], SoundcloudIE.ie_key())
  
-            if collected_results >= total_results or not collection:
+            if not collection or collected_results >= limit:
                  break
  
-            next_url = response.get('next_href', None)
-
-    def _get_n_results(self, query, n):
-        results = []
-
-        tracks = self._get_collection('/search/tracks',
-            collection_id='Query "{0}"'.format(query),
-            q=query.encode('utf-8'))
-
-        for _ in range(n):
-            try:
-                track = next(tracks)
-            except StopIteration:
+            next_url = response.get('next_href')
+            if not next_url:
                  break
-            uri = track['uri']
-            title = track['title']
-            results.append(self.url_result(url=uri))
-
-        if not results:
-            raise ExtractorError(
-                '[soundcloud] No track results', expected=True)
-        
-        return self.playlist_result(results[:n], playlist_title=query)
  
+    def _get_n_results(self, query, n):
+        tracks = self._get_collection('/search/tracks', query, limit=n, q=query)
+        return self.playlist_result(tracks, playlist_title=query)