[soundcloud] recognize more players’ urls (fixes #2078)
[youtube-dl] / youtube_dl / extractor / soundcloud.py
index 0571b36ac22204298e0692383be2dba145e91074..951e977bd0ba014340fe3eeb626723bde258e0dd 100644 (file)
@@ -24,11 +24,12 @@ class SoundcloudIE(InfoExtractor):
      """
 
     _VALID_URL = r'''^(?:https?://)?
-                    (?:(?:(?:www\.)?soundcloud\.com/
-                            (?P<uploader>[\w\d-]+)/(?P<title>[\w\d-]+)/?
+                    (?:(?:(?:www\.|m\.)?soundcloud\.com/
+                            (?P<uploader>[\w\d-]+)/
+                            (?!sets/)(?P<title>[\w\d-]+)/?
                             (?P<token>[^?]+?)?(?:[?].*)?$)
                        |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
-                       |(?P<widget>w\.soundcloud\.com/player/?.*?url=.*)
+                       |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
                     )
                     '''
     IE_NAME = u'soundcloud'
@@ -72,6 +73,19 @@ class SoundcloudIE(InfoExtractor):
                 u'upload_date': u'20131209',
             },
         },
+        # downloadable song
+        {
+            u'url': u'https://soundcloud.com/simgretina/just-your-problem-baby-1',
+            u'md5': u'56a8b69568acaa967b4c49f9d1d52d19',
+            u'info_dict': {
+                u'id': u'105614606',
+                u'ext': u'wav',
+                u'title': u'Just Your Problem Baby (Acapella)',
+                u'description': u'Vocals',
+                u'uploader': u'Sim Gretina',
+                u'upload_date': u'20130815',
+            },
+        },
     ]
 
     _CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
@@ -98,7 +112,7 @@ class SoundcloudIE(InfoExtractor):
         thumbnail = info['artwork_url']
         if thumbnail is not None:
             thumbnail = thumbnail.replace('-large', '-t500x500')
-        ext = info.get('original_format', u'mp3')
+        ext = u'mp3'
         result = {
             'id': track_id,
             'uploader': info['user']['username'],
@@ -114,7 +128,7 @@ class SoundcloudIE(InfoExtractor):
                     track_id, self._CLIENT_ID))
             result['formats'] = [{
                 'format_id': 'download',
-                'ext': ext,
+                'ext': info.get('original_format', u'mp3'),
                 'url': format_url,
                 'vcodec': 'none',
             }]
@@ -179,7 +193,7 @@ class SoundcloudIE(InfoExtractor):
         if track_id is not None:
             info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
             full_title = track_id
-        elif mobj.group('widget'):
+        elif mobj.group('player'):
             query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
             return self.url_result(query['url'][0], ie='Soundcloud')
         else: