Merge remote-tracking branch 'rzhxeo/RTL'
[youtube-dl] / youtube_dl / extractor / soundcloud.py
index 7c9f1c6b65998d57515b65dea5e9120772e0b019..29cd5617c7d1919fa95e0b48e7ff35585106b800 100644 (file)
@@ -1,9 +1,12 @@
 import json
 import re
+import itertools
 
 from .common import InfoExtractor
 from ..utils import (
     compat_str,
+    compat_urlparse,
+    compat_urllib_parse,
 
     ExtractorError,
     unified_strdate,
@@ -22,6 +25,7 @@ class SoundcloudIE(InfoExtractor):
     _VALID_URL = r'''^(?:https?://)?
                     (?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$)
                        |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
+                       |(?P<widget>w.soundcloud.com/player/?.*?url=.*)
                     )
                     '''
     IE_NAME = u'soundcloud'
@@ -51,10 +55,11 @@ class SoundcloudIE(InfoExtractor):
     def _resolv_url(cls, url):
         return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
 
-    def _extract_info_dict(self, info, full_title=None):
+    def _extract_info_dict(self, info, full_title=None, quiet=False):
         video_id = info['id']
         name = full_title or video_id
-        self.report_extraction(name)
+        if quiet == False:
+            self.report_extraction(name)
 
         thumbnail = info['artwork_url']
         if thumbnail is not None:
@@ -79,6 +84,9 @@ class SoundcloudIE(InfoExtractor):
         if track_id is not None:
             info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
             full_title = track_id
+        elif mobj.group('widget'):
+            query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+            return self.url_result(query['url'][0], ie='Soundcloud')
         else:
             # extract uploader (which is in the url)
             uploader = mobj.group(1)
@@ -193,3 +201,41 @@ class SoundcloudSetIE(SoundcloudIE):
                 'id': info['id'],
                 'title': info['title'],
                 }
+
+
+class SoundcloudUserIE(SoundcloudIE):
+    _VALID_URL = r'https?://(www\.)?soundcloud.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$'
+    IE_NAME = u'soundcloud:user'
+
+    # it's in tests/test_playlists.py
+    _TEST = None
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        uploader = mobj.group('user')
+
+        url = 'http://soundcloud.com/%s/' % uploader
+        resolv_url = self._resolv_url(url)
+        user_json = self._download_webpage(resolv_url, uploader,
+            u'Downloading user info')
+        user = json.loads(user_json)
+
+        tracks = []
+        for i in itertools.count():
+            data = compat_urllib_parse.urlencode({'offset': i*50,
+                                                  'client_id': self._CLIENT_ID,
+                                                  })
+            tracks_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % user['id'] + data
+            response = self._download_webpage(tracks_url, uploader, 
+                u'Downloading tracks page %s' % (i+1))
+            new_tracks = json.loads(response)
+            tracks.extend(self._extract_info_dict(track, quiet=True) for track in new_tracks)
+            if len(new_tracks) < 50:
+                break
+
+        return {
+            '_type': 'playlist',
+            'id': compat_str(user['id']),
+            'title': user['username'],
+            'entries': tracks,
+        }