[youtube:playlist] Extend _VALID_URL
[youtube-dl] / youtube_dl / extractor / soundcloud.py
index 1efb2b980cc83bc5ffa3b14c912188973b4574b7..9635c2b495e6744398051daaadd134b4707ec815 100644 (file)
@@ -11,10 +11,9 @@ from .common import (
 from ..compat import (
     compat_str,
     compat_urlparse,
-    compat_urllib_parse,
+    compat_urllib_parse_urlencode,
 )
 from ..utils import (
-    encode_dict,
     ExtractorError,
     int_or_none,
     unified_strdate,
@@ -33,7 +32,7 @@ class SoundcloudIE(InfoExtractor):
     _VALID_URL = r'''(?x)^(?:https?://)?
                     (?:(?:(?:www\.|m\.)?soundcloud\.com/
                             (?P<uploader>[\w\d-]+)/
-                            (?!(?:tracks|sets(?:/[^/?#]+)?|reposts|likes|spotlight)/?(?:$|[?#]))
+                            (?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
                             (?P<title>[\w\d-]+)/?
                             (?P<token>[^?]+?)?(?:[?].*)?$)
                        |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
@@ -120,6 +119,12 @@ class SoundcloudIE(InfoExtractor):
     _CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea'
     _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
 
+    @staticmethod
+    def _extract_urls(webpage):
+        return [m.group('url') for m in re.finditer(
+            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
+            webpage)]
+
     def report_resolve(self, video_id):
         """Report information extraction."""
         self.to_screen('%s: Resolving id' % video_id)
@@ -260,6 +265,9 @@ class SoundcloudSetIE(SoundcloudIE):
             'title': 'The Royal Concept EP',
         },
         'playlist_mincount': 6,
+    }, {
+        'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -393,7 +401,7 @@ class SoundcloudUserIE(SoundcloudIE):
         query = COMMON_QUERY.copy()
         query['offset'] = 0
 
-        next_href = base_url + '?' + compat_urllib_parse.urlencode(query)
+        next_href = base_url + '?' + compat_urllib_parse_urlencode(query)
 
         entries = []
         for i in itertools.count():
@@ -424,7 +432,7 @@ class SoundcloudUserIE(SoundcloudIE):
             qs = compat_urlparse.parse_qs(parsed_next_href.query)
             qs.update(COMMON_QUERY)
             next_href = compat_urlparse.urlunparse(
-                parsed_next_href._replace(query=compat_urllib_parse.urlencode(qs, True)))
+                parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True)))
 
         return {
             '_type': 'playlist',
@@ -460,7 +468,7 @@ class SoundcloudPlaylistIE(SoundcloudIE):
         if token:
             data_dict['secret_token'] = token
 
-        data = compat_urllib_parse.urlencode(data_dict)
+        data = compat_urllib_parse_urlencode(data_dict)
         data = self._download_json(
             base_url + data, playlist_id, 'Downloading playlist')
 
@@ -500,7 +508,7 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
         query['client_id'] = self._CLIENT_ID
         query['linked_partitioning'] = '1'
         query['offset'] = 0
-        data = compat_urllib_parse.urlencode(encode_dict(query))
+        data = compat_urllib_parse_urlencode(query)
         next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data)
 
         collected_results = 0