Document and test categories (#2923)
[youtube-dl] / youtube_dl / extractor / bandcamp.py
index 87932ba1ca3064a015892e3ba3d4de37c380179f..929aafdff3e848af3295eacf1520ec0ec0334966 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class BandcampIE(InfoExtractor):
-    _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
+    _VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>.*)'
     _TESTS = [{
         'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
         'file': '1812978515.mp3',
@@ -82,7 +82,7 @@ class BandcampIE(InfoExtractor):
         #We build the url we will use to get the final track url
         # This url is build in Bandcamp in the script download_bunde_*.js
         request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts'))
-        final_url_webpage = self._download_webpage(request_url, id, 'Requesting download url')
+        final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url')
         # If we could correctly generate the .rand field the url would be
         #in the "download_url" key
         final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
@@ -93,14 +93,14 @@ class BandcampIE(InfoExtractor):
             'ext': 'mp3',
             'vcodec': 'none',
             'url': final_url,
-            'thumbnail': info['thumb_url'],
-            'uploader': info['artist'],
+            'thumbnail': info.get('thumb_url'),
+            'uploader': info.get('artist'),
         }
 
 
 class BandcampAlbumIE(InfoExtractor):
     IE_NAME = 'Bandcamp:album'
-    _VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)'
+    _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+))?'
 
     _TEST = {
         'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
@@ -128,8 +128,10 @@ class BandcampAlbumIE(InfoExtractor):
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
+        playlist_id = mobj.group('subdomain')
         title = mobj.group('title')
-        webpage = self._download_webpage(url, title)
+        display_id = title or playlist_id
+        webpage = self._download_webpage(url, display_id)
         tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
         if not tracks_paths:
             raise ExtractorError('The page doesn\'t contain any tracks')
@@ -139,6 +141,8 @@ class BandcampAlbumIE(InfoExtractor):
         title = self._search_regex(r'album_title : "(.*?)"', webpage, 'title')
         return {
             '_type': 'playlist',
+            'id': playlist_id,
+            'display_id': display_id,
             'title': title,
             'entries': entries,
         }