[bandcamp] add support for albums (reported in #1270)
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Fri, 22 Nov 2013 15:05:14 +0000 (16:05 +0100)
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Fri, 22 Nov 2013 15:05:14 +0000 (16:05 +0100)
test/test_playlists.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/bandcamp.py
youtube_dl/extractor/generic.py

index 706b6bdca1399284263106b755fdf9278c5d17d5..d83b3bf519649839e42641950853018d9592441e 100644 (file)
@@ -22,6 +22,7 @@ from youtube_dl.extractor import (
     LivestreamIE,
     NHLVideocenterIE,
     BambuserChannelIE,
     LivestreamIE,
     NHLVideocenterIE,
     BambuserChannelIE,
+    BandcampAlbumIE
 )
 
 
 )
 
 
@@ -103,5 +104,13 @@ class TestPlaylists(unittest.TestCase):
         self.assertEqual(result['title'], u'pixelversity')
         self.assertTrue(len(result['entries']) >= 66)
 
         self.assertEqual(result['title'], u'pixelversity')
         self.assertTrue(len(result['entries']) >= 66)
 
+    def test_bandcamp_album(self):
+        dl = FakeYDL()
+        ie = BandcampAlbumIE(dl)
+        result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['title'], u'Nightmare Night EP')
+        self.assertTrue(len(result['entries']) >= 4)
+
 if __name__ == '__main__':
     unittest.main()
 if __name__ == '__main__':
     unittest.main()
index ffb74df9f36a623cd0abd8cf744923e6e63cc4f0..802beef21482440cbc979c877b714befad042560 100644 (file)
@@ -11,7 +11,7 @@ from .arte import (
 )
 from .auengine import AUEngineIE
 from .bambuser import BambuserIE, BambuserChannelIE
 )
 from .auengine import AUEngineIE
 from .bambuser import BambuserIE, BambuserChannelIE
-from .bandcamp import BandcampIE
+from .bandcamp import BandcampIE, BandcampAlbumIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bloomberg import BloombergIE
 from .breakcom import BreakIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bloomberg import BloombergIE
 from .breakcom import BreakIE
index 129a20f4497b4cc6fc9f031e8e48dd8eb8980f66..81d5c60e9a6257bbc56997a93ee04342c9c5537b 100644 (file)
@@ -3,11 +3,13 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
 
 from .common import InfoExtractor
 from ..utils import (
+    compat_urlparse,
     ExtractorError,
 )
 
 
 class BandcampIE(InfoExtractor):
     ExtractorError,
 )
 
 
 class BandcampIE(InfoExtractor):
+    IE_NAME = u'Bandcamp'
     _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
     _TEST = {
         u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
     _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
     _TEST = {
         u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
@@ -61,3 +63,25 @@ class BandcampIE(InfoExtractor):
                       }
 
         return [track_info]
                       }
 
         return [track_info]
+
+
+class BandcampAlbumIE(InfoExtractor):
+    IE_NAME = u'Bandcamp:album'
+    _VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        title = mobj.group('title')
+        webpage = self._download_webpage(url, title)
+        tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
+        if not tracks_paths:
+            raise ExtractorError(u'The page doesn\'t contain any track')
+        entries = [
+            self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
+            for t_path in tracks_paths]
+        title = self._search_regex(r'album_title : "(.*?)"', webpage, u'title')
+        return {
+            '_type': 'playlist',
+            'title': title,
+            'entries': entries,
+        }
index e1d6a2a013ea599711245ea5b2382f6f02871700..0b5f2b2bb92907c5489cca3c63643bf8f80dd4a9 100644 (file)
@@ -199,7 +199,8 @@ class GenericIE(InfoExtractor):
         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
         if mobj is not None:
             burl = unescapeHTML(mobj.group(1))
         mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
         if mobj is not None:
             burl = unescapeHTML(mobj.group(1))
-            return self.url_result(burl, 'Bandcamp')
+            # Don't set the extractor because it can be a track url or an album
+            return self.url_result(burl)
 
         # Start with something easy: JW Player in SWFObject
         mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
 
         # Start with something easy: JW Player in SWFObject
         mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)