[soundcloud] Fix tests
[youtube-dl] / youtube_dl / extractor / soundcloud.py
index aeae931a20774ff9bc5c3988273a0d008e293954..3f1a46bb2c2ce9a5e03bd0222e897fbc9248e171 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -32,7 +32,7 @@ class SoundcloudIE(InfoExtractor):
     _VALID_URL = r'''(?x)^(?:https?://)?
                     (?:(?:(?:www\.|m\.)?soundcloud\.com/
                             (?P<uploader>[\w\d-]+)/
-                            (?!(?:tracks|sets(?:/[^/?#]+)?|reposts|likes|spotlight)/?(?:$|[?#]))
+                            (?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
                             (?P<title>[\w\d-]+)/?
                             (?P<token>[^?]+?)?(?:[?].*)?$)
                        |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
@@ -53,6 +53,7 @@ class SoundcloudIE(InfoExtractor):
                 'uploader': 'E.T. ExTerrestrial Music',
                 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
                 'duration': 143,
+                'license': 'all-rights-reserved',
             }
         },
         # not streamable song
@@ -66,6 +67,7 @@ class SoundcloudIE(InfoExtractor):
                 'uploader': 'The Royal Concept',
                 'upload_date': '20120521',
                 'duration': 227,
+                'license': 'all-rights-reserved',
             },
             'params': {
                 # rtmp
@@ -84,6 +86,7 @@ class SoundcloudIE(InfoExtractor):
                 'description': 'test chars:  \"\'/\\ä↭',
                 'upload_date': '20131209',
                 'duration': 9,
+                'license': 'all-rights-reserved',
             },
         },
         # private link (alt format)
@@ -98,6 +101,7 @@ class SoundcloudIE(InfoExtractor):
                 'description': 'test chars:  \"\'/\\ä↭',
                 'upload_date': '20131209',
                 'duration': 9,
+                'license': 'all-rights-reserved',
             },
         },
         # downloadable song
@@ -112,11 +116,12 @@ class SoundcloudIE(InfoExtractor):
                 'uploader': 'oddsamples',
                 'upload_date': '20140109',
                 'duration': 17,
+                'license': 'cc-by-sa',
             },
         },
     ]
 
-    _CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea'
+    _CLIENT_ID = '2t9loNQH90kzJcsFCODdigxfp325aq4z'
     _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
 
     @staticmethod
@@ -131,27 +136,27 @@ class SoundcloudIE(InfoExtractor):
 
     @classmethod
     def _resolv_url(cls, url):
-        return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
+        return 'https://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
 
     def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None):
         track_id = compat_str(info['id'])
         name = full_title or track_id
         if quiet:
             self.report_extraction(name)
-
-        thumbnail = info['artwork_url']
-        if thumbnail is not None:
+        thumbnail = info.get('artwork_url')
+        if isinstance(thumbnail, compat_str):
             thumbnail = thumbnail.replace('-large', '-t500x500')
         ext = 'mp3'
         result = {
             'id': track_id,
-            'uploader': info['user']['username'],
-            'upload_date': unified_strdate(info['created_at']),
+            'uploader': info.get('user', {}).get('username'),
+            'upload_date': unified_strdate(info.get('created_at')),
             'title': info['title'],
-            'description': info['description'],
+            'description': info.get('description'),
             'thumbnail': thumbnail,
             'duration': int_or_none(info.get('duration'), 1000),
             'webpage_url': info.get('permalink_url'),
+            'license': info.get('license'),
         }
         formats = []
         if info.get('downloadable', False):
@@ -168,46 +173,54 @@ class SoundcloudIE(InfoExtractor):
             })
 
         # We have to retrieve the url
-        streams_url = ('http://api.soundcloud.com/i1/tracks/{0}/streams?'
-                       'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token))
         format_dict = self._download_json(
-            streams_url,
-            track_id, 'Downloading track url')
+            'https://api.soundcloud.com/i1/tracks/%s/streams' % track_id,
+            track_id, 'Downloading track url', query={
+                'client_id': self._CLIENT_ID,
+                'secret_token': secret_token,
+            })
 
         for key, stream_url in format_dict.items():
+            abr = int_or_none(self._search_regex(
+                r'_(\d+)_url', key, 'audio bitrate', default=None))
             if key.startswith('http'):
-                formats.append({
+                stream_formats = [{
                     'format_id': key,
                     'ext': ext,
                     'url': stream_url,
-                    'vcodec': 'none',
-                })
+                }]
             elif key.startswith('rtmp'):
                 # The url doesn't have an rtmp app, we have to extract the playpath
                 url, path = stream_url.split('mp3:', 1)
-                formats.append({
+                stream_formats = [{
                     'format_id': key,
                     'url': url,
                     'play_path': 'mp3:' + path,
                     'ext': 'flv',
-                    'vcodec': 'none',
-                })
-
-            if not formats:
-                # We fallback to the stream_url in the original info, this
-                # cannot be always used, sometimes it can give an HTTP 404 error
-                formats.append({
-                    'format_id': 'fallback',
-                    'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
-                    'ext': ext,
-                    'vcodec': 'none',
-                })
+                }]
+            elif key.startswith('hls'):
+                stream_formats = self._extract_m3u8_formats(
+                    stream_url, track_id, 'mp3', entry_protocol='m3u8_native',
+                    m3u8_id=key, fatal=False)
+            else:
+                continue
+
+            for f in stream_formats:
+                f['abr'] = abr
+
+            formats.extend(stream_formats)
+
+        if not formats:
+            # We fallback to the stream_url in the original info, this
+            # cannot be always used, sometimes it can give an HTTP 404 error
+            formats.append({
+                'format_id': 'fallback',
+                'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
+                'ext': ext,
+            })
 
-            for f in formats:
-                if f['format_id'].startswith('http'):
-                    f['protocol'] = 'http'
-                if f['format_id'].startswith('rtmp'):
-                    f['protocol'] = 'rtmp'
+        for f in formats:
+            f['vcodec'] = 'none'
 
         self._check_formats(formats, track_id)
         self._sort_formats(formats)
@@ -221,9 +234,9 @@ class SoundcloudIE(InfoExtractor):
             raise ExtractorError('Invalid URL: %s' % url)
 
         track_id = mobj.group('track_id')
-        token = None
+
         if track_id is not None:
-            info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
+            info_json_url = 'https://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
             full_title = track_id
             token = mobj.group('secret_token')
             if token:
@@ -248,14 +261,27 @@ class SoundcloudIE(InfoExtractor):
 
             self.report_resolve(full_title)
 
-            url = 'http://soundcloud.com/%s' % resolve_title
+            url = 'https://soundcloud.com/%s' % resolve_title
             info_json_url = self._resolv_url(url)
         info = self._download_json(info_json_url, full_title, 'Downloading info JSON')
 
         return self._extract_info_dict(info, full_title, secret_token=token)
 
 
-class SoundcloudSetIE(SoundcloudIE):
+class SoundcloudPlaylistBaseIE(SoundcloudIE):
+    @staticmethod
+    def _extract_id(e):
+        return compat_str(e['id']) if e.get('id') else None
+
+    def _extract_track_entries(self, tracks):
+        return [
+            self.url_result(
+                track['permalink_url'], SoundcloudIE.ie_key(),
+                video_id=self._extract_id(track))
+            for track in tracks if track.get('permalink_url')]
+
+
+class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
     _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
     IE_NAME = 'soundcloud:set'
     _TESTS = [{
@@ -264,7 +290,10 @@ class SoundcloudSetIE(SoundcloudIE):
             'id': '2284613',
             'title': 'The Royal Concept EP',
         },
-        'playlist_mincount': 6,
+        'playlist_mincount': 5,
+    }, {
+        'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -275,7 +304,7 @@ class SoundcloudSetIE(SoundcloudIE):
         # extract simple title (uploader + slug of song title)
         slug_title = mobj.group('slug_title')
         full_title = '%s/sets/%s' % (uploader, slug_title)
-        url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
+        url = 'https://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
 
         token = mobj.group('token')
         if token:
@@ -291,7 +320,7 @@ class SoundcloudSetIE(SoundcloudIE):
             msgs = (compat_str(err['error_message']) for err in info['errors'])
             raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs))
 
-        entries = [self.url_result(track['permalink_url'], 'Soundcloud') for track in info['tracks']]
+        entries = self._extract_track_entries(info['tracks'])
 
         return {
             '_type': 'playlist',
@@ -301,7 +330,7 @@ class SoundcloudSetIE(SoundcloudIE):
         }
 
 
-class SoundcloudUserIE(SoundcloudIE):
+class SoundcloudUserIE(SoundcloudPlaylistBaseIE):
     _VALID_URL = r'''(?x)
                         https?://
                             (?:(?:www|m)\.)?soundcloud\.com/
@@ -318,21 +347,21 @@ class SoundcloudUserIE(SoundcloudIE):
             'id': '114582580',
             'title': 'The Akashic Chronicler (All)',
         },
-        'playlist_mincount': 111,
+        'playlist_mincount': 74,
     }, {
         'url': 'https://soundcloud.com/the-akashic-chronicler/tracks',
         'info_dict': {
             'id': '114582580',
             'title': 'The Akashic Chronicler (Tracks)',
         },
-        'playlist_mincount': 50,
+        'playlist_mincount': 37,
     }, {
         'url': 'https://soundcloud.com/the-akashic-chronicler/sets',
         'info_dict': {
             'id': '114582580',
             'title': 'The Akashic Chronicler (Playlists)',
         },
-        'playlist_mincount': 3,
+        'playlist_mincount': 2,
     }, {
         'url': 'https://soundcloud.com/the-akashic-chronicler/reposts',
         'info_dict': {
@@ -381,7 +410,7 @@ class SoundcloudUserIE(SoundcloudIE):
         mobj = re.match(self._VALID_URL, url)
         uploader = mobj.group('user')
 
-        url = 'http://soundcloud.com/%s/' % uploader
+        url = 'https://soundcloud.com/%s/' % uploader
         resolv_url = self._resolv_url(url)
         user = self._download_json(
             resolv_url, uploader, 'Downloading user info')
@@ -413,13 +442,14 @@ class SoundcloudUserIE(SoundcloudIE):
                 for cand in candidates:
                     if isinstance(cand, dict):
                         permalink_url = cand.get('permalink_url')
+                        entry_id = self._extract_id(cand)
                         if permalink_url and permalink_url.startswith('http'):
-                            return permalink_url
+                            return permalink_url, entry_id
 
             for e in collection:
-                permalink_url = resolve_permalink_url((e, e.get('track'), e.get('playlist')))
+                permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist')))
                 if permalink_url:
-                    entries.append(self.url_result(permalink_url))
+                    entries.append(self.url_result(permalink_url, video_id=entry_id))
 
             next_href = response.get('next_href')
             if not next_href:
@@ -439,11 +469,11 @@ class SoundcloudUserIE(SoundcloudIE):
         }
 
 
-class SoundcloudPlaylistIE(SoundcloudIE):
+class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
     _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
     IE_NAME = 'soundcloud:playlist'
     _TESTS = [{
-        'url': 'http://api.soundcloud.com/playlists/4110309',
+        'url': 'https://api.soundcloud.com/playlists/4110309',
         'info_dict': {
             'id': '4110309',
             'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
@@ -469,7 +499,7 @@ class SoundcloudPlaylistIE(SoundcloudIE):
         data = self._download_json(
             base_url + data, playlist_id, 'Downloading playlist')
 
-        entries = [self.url_result(track['permalink_url'], 'Soundcloud') for track in data['tracks']]
+        entries = self._extract_track_entries(data['tracks'])
 
         return {
             '_type': 'playlist',