Document and test categories (#2923)

[youtube-dl] / youtube_dl / extractor / bandcamp.py
diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py

index 87932ba1ca3064a015892e3ba3d4de37c380179f..929aafdff3e848af3295eacf1520ec0ec0334966 100644 (file)
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@@ -12,7 +12,7 @@ from ..utils import (
  
  
  class BandcampIE(InfoExtractor):
-    _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
+    _VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>.*)'
      _TESTS = [{
          'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
          'file': '1812978515.mp3',
@@ -82,7 +82,7 @@ class BandcampIE(InfoExtractor):
          #We build the url we will use to get the final track url
          # This url is build in Bandcamp in the script download_bunde_*.js
          request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts'))
-        final_url_webpage = self._download_webpage(request_url, id, 'Requesting download url')
+        final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url')
          # If we could correctly generate the .rand field the url would be
          #in the "download_url" key
          final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
@@ -93,14 +93,14 @@ class BandcampIE(InfoExtractor):
              'ext': 'mp3',
              'vcodec': 'none',
              'url': final_url,
-            'thumbnail': info['thumb_url'],
-            'uploader': info['artist'],
+            'thumbnail': info.get('thumb_url'),
+            'uploader': info.get('artist'),
          }
  
  
  class BandcampAlbumIE(InfoExtractor):
      IE_NAME = 'Bandcamp:album'
-    _VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)'
+    _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+))?'
  
      _TEST = {
          'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
@@ -128,8 +128,10 @@ class BandcampAlbumIE(InfoExtractor):
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
+        playlist_id = mobj.group('subdomain')
          title = mobj.group('title')
-        webpage = self._download_webpage(url, title)
+        display_id = title or playlist_id
+        webpage = self._download_webpage(url, display_id)
          tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
          if not tracks_paths:
              raise ExtractorError('The page doesn\'t contain any tracks')
@@ -139,6 +141,8 @@ class BandcampAlbumIE(InfoExtractor):
          title = self._search_regex(r'album_title : "(.*?)"', webpage, 'title')
          return {
              '_type': 'playlist',
+            'id': playlist_id,
+            'display_id': display_id,
              'title': title,
              'entries': entries,
          }