X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fsoundcloud.py;h=47b84809f1b53bb2c020bd94d1cbd23b23292948;hb=4bfd294e2f83301921494c02e497cccf1a26cfd5;hp=a5c40514b6fb66de6ab6aa035268f389c99a989d;hpb=7e3472758bfaa75aa413368b29a26c2615e5231b;p=youtube-dl

diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
index a5c40514b..47b84809f 100644
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -11,7 +11,7 @@ from .common import (
 from ..compat import (
     compat_str,
     compat_urlparse,
-    compat_urllib_parse,
+    compat_urllib_parse_urlencode,
 )
 from ..utils import (
     ExtractorError,
@@ -32,7 +32,7 @@ class SoundcloudIE(InfoExtractor):
     _VALID_URL = r'''(?x)^(?:https?://)?
                     (?:(?:(?:www\.|m\.)?soundcloud\.com/
                             (?P<uploader>[\w\d-]+)/
-                            (?!(?:tracks|sets(?:/[^/?#]+)?|reposts|likes|spotlight)/?(?:$|[?#]))
+                            (?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
                             (?P<title>[\w\d-]+)/?
                             (?P<token>[^?]+?)?(?:[?].*)?$)
                        |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
@@ -53,6 +53,7 @@ class SoundcloudIE(InfoExtractor):
                 'uploader': 'E.T. ExTerrestrial Music',
                 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
                 'duration': 143,
+                'license': 'all-rights-reserved',
             }
         },
         # not streamable song
@@ -66,6 +67,7 @@ class SoundcloudIE(InfoExtractor):
                 'uploader': 'The Royal Concept',
                 'upload_date': '20120521',
                 'duration': 227,
+                'license': 'all-rights-reserved',
             },
             'params': {
                 # rtmp
@@ -84,6 +86,7 @@ class SoundcloudIE(InfoExtractor):
                 'description': 'test chars:  \"\'/\\Ã¤â­',
                 'upload_date': '20131209',
                 'duration': 9,
+                'license': 'all-rights-reserved',
             },
         },
         # private link (alt format)
@@ -98,6 +101,7 @@ class SoundcloudIE(InfoExtractor):
                 'description': 'test chars:  \"\'/\\Ã¤â­',
                 'upload_date': '20131209',
                 'duration': 9,
+                'license': 'all-rights-reserved',
             },
         },
         # downloadable song
@@ -112,6 +116,7 @@ class SoundcloudIE(InfoExtractor):
                 'uploader': 'oddsamples',
                 'upload_date': '20140109',
                 'duration': 17,
+                'license': 'cc-by-sa',
             },
         },
     ]
@@ -119,6 +124,12 @@ class SoundcloudIE(InfoExtractor):
     _CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea'
     _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
 
+    @staticmethod
+    def _extract_urls(webpage):
+        return [m.group('url') for m in re.finditer(
+            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
+            webpage)]
+
     def report_resolve(self, video_id):
         """Report information extraction."""
         self.to_screen('%s: Resolving id' % video_id)
@@ -132,8 +143,8 @@ class SoundcloudIE(InfoExtractor):
         name = full_title or track_id
         if quiet:
             self.report_extraction(name)
-
         thumbnail = info['artwork_url']
+        track_license = info['license']
         if thumbnail is not None:
             thumbnail = thumbnail.replace('-large', '-t500x500')
         ext = 'mp3'
@@ -146,6 +157,7 @@ class SoundcloudIE(InfoExtractor):
             'thumbnail': thumbnail,
             'duration': int_or_none(info.get('duration'), 1000),
             'webpage_url': info.get('permalink_url'),
+            'license': track_license,
         }
         formats = []
         if info.get('downloadable', False):
@@ -216,12 +228,13 @@ class SoundcloudIE(InfoExtractor):
 
         track_id = mobj.group('track_id')
         token = None
+
         if track_id is not None:
             info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
             full_title = track_id
             token = mobj.group('secret_token')
             if token:
-                info_json_url += "&secret_token=" + token
+                info_json_url += '&secret_token=' + token
         elif mobj.group('player'):
             query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
             real_url = query['url'][0]
@@ -259,6 +272,9 @@ class SoundcloudSetIE(SoundcloudIE):
             'title': 'The Royal Concept EP',
         },
         'playlist_mincount': 6,
+    }, {
+        'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -383,27 +399,24 @@ class SoundcloudUserIE(SoundcloudIE):
         resource = mobj.group('rsrc') or 'all'
         base_url = self._BASE_URL_MAP[resource] % user['id']
 
-        next_href = None
+        COMMON_QUERY = {
+            'limit': 50,
+            'client_id': self._CLIENT_ID,
+            'linked_partitioning': '1',
+        }
+
+        query = COMMON_QUERY.copy()
+        query['offset'] = 0
+
+        next_href = base_url + '?' + compat_urllib_parse_urlencode(query)
 
         entries = []
         for i in itertools.count():
-            if not next_href:
-                data = compat_urllib_parse.urlencode({
-                    'offset': i * 50,
-                    'limit': 50,
-                    'client_id': self._CLIENT_ID,
-                    'linked_partitioning': '1',
-                    'representation': 'speedy',
-                })
-                next_href = base_url + '?' + data
-
             response = self._download_json(
                 next_href, uploader, 'Downloading track page %s' % (i + 1))
 
             collection = response['collection']
-
             if not collection:
-                self.to_screen('%s: End page received' % uploader)
                 break
 
             def resolve_permalink_url(candidates):
@@ -418,12 +431,15 @@ class SoundcloudUserIE(SoundcloudIE):
                 if permalink_url:
                     entries.append(self.url_result(permalink_url))
 
-            if 'next_href' in response:
-                next_href = response['next_href']
-                if not next_href:
-                    break
-            else:
-                next_href = None
+            next_href = response.get('next_href')
+            if not next_href:
+                break
+
+            parsed_next_href = compat_urlparse.urlparse(response['next_href'])
+            qs = compat_urlparse.parse_qs(parsed_next_href.query)
+            qs.update(COMMON_QUERY)
+            next_href = compat_urlparse.urlunparse(
+                parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True)))
 
         return {
             '_type': 'playlist',
@@ -459,7 +475,7 @@ class SoundcloudPlaylistIE(SoundcloudIE):
         if token:
             data_dict['secret_token'] = token
 
-        data = compat_urllib_parse.urlencode(data_dict)
+        data = compat_urllib_parse_urlencode(data_dict)
         data = self._download_json(
             base_url + data, playlist_id, 'Downloading playlist')
 
@@ -492,50 +508,40 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
     _API_V2_BASE = 'https://api-v2.soundcloud.com'
 
     def _get_collection(self, endpoint, collection_id, **query):
-        query['limit'] = results_per_page = min(
+        limit = min(
             query.get('limit', self._DEFAULT_RESULTS_PER_PAGE),
             self._MAX_RESULTS_PER_PAGE)
+        query['limit'] = limit
         query['client_id'] = self._CLIENT_ID
         query['linked_partitioning'] = '1'
+        query['offset'] = 0
+        data = compat_urllib_parse_urlencode(query)
+        next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data)
 
-        total_results = None
         collected_results = 0
 
-        next_url = None
-
-        for i in itertools.count():
-            if not next_url:
-                query['offset'] = i * results_per_page
-                data = compat_urllib_parse.urlencode(query)
-                next_url = '{0}{1}?{2}'.format(
-                    self._API_V2_BASE, endpoint, data)
-
+        for i in itertools.count(1):
             response = self._download_json(
-                next_url, collection_id, 'Downloading page {0}'.format(i + 1),
+                next_url, collection_id, 'Downloading page {0}'.format(i),
                 'Unable to download API page')
 
-            total_results = int(response.get(
-                'total_results', total_results))
+            collection = response.get('collection', [])
+            if not collection:
+                break
 
-            collection = response['collection']
+            collection = list(filter(bool, collection))
             collected_results += len(collection)
 
-            for item in filter(bool, collection):
-                yield item
+            for item in collection:
+                yield self.url_result(item['uri'], SoundcloudIE.ie_key())
 
-            if (total_results is not None and collected_results >= total_results) or not collection:
+            if not collection or collected_results >= limit:
                 break
 
             next_url = response.get('next_href')
+            if not next_url:
+                break
 
     def _get_n_results(self, query, n):
-        tracks = self._get_collection(
-            '/search/tracks', collection_id='Query "{0}"'.format(query), limit=n, q=query)
-
-        results = [self.url_result(track['uri']) for track in itertools.islice(tracks, n)]
-
-        if not results:
-            raise ExtractorError(
-                'Soundcloud said: No track results', expected=True)
-
-        return self.playlist_result(results, playlist_title=query)
+        tracks = self._get_collection('/search/tracks', query, limit=n, q=query)
+        return self.playlist_result(tracks, playlist_title=query)