X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fbandcamp.py;h=359d4174bc181abb3ec7b210b9f851c182d3f765;hb=3f8ced5144a76a3f9ab7ee8cd06cc79bb75dc564;hp=dcf6721ee1af7ba56e89bd43f40f95c44826746e;hpb=45aef472817b33194a4b59f345dbaf08701f65d3;p=youtube-dl diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index dcf6721ee..359d4174b 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -3,12 +3,46 @@ import re from .common import InfoExtractor from ..utils import ( + compat_str, + compat_urlparse, ExtractorError, ) class BandcampIE(InfoExtractor): + IE_NAME = u'Bandcamp' _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P.*)' + _TESTS = [{ + u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song', + u'file': u'1812978515.mp3', + u'md5': u'cdeb30cdae1921719a3cbcab696ef53c', + u'info_dict': { + u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad" + }, + u'skip': u'There is a limit of 200 free downloads / month for the test song' + }, { + u'url': u'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', + u'playlist': [ + { + u'file': u'1353101989.mp3', + u'md5': u'39bc1eded3476e927c724321ddf116cf', + u'info_dict': { + u'title': u'Intro', + } + }, + { + u'file': u'38097443.mp3', + u'md5': u'1a2c32e2691474643e912cc6cd4bffaa', + u'info_dict': { + u'title': u'Kero One - Keep It Alive (Blazo remix)', + } + }, + ], + u'params': { + u'playlistend': 2 + }, + u'skip': u'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test' + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -17,6 +51,26 @@ class BandcampIE(InfoExtractor): # We get the link to the free download page m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage) if m_download is None: + m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage) + if m_trackinfo: + json_code = m_trackinfo.group(1) + data = json.loads(json_code) + + entries = [] + for d in data: + formats = [{ + 'format_id': 'format_id', + 'url': format_url, + 'ext': format_id.partition('-')[0] + } for format_id, format_url in sorted(d['file'].items())] + entries.append({ + 'id': compat_str(d['id']), + 'title': d['title'], + 'formats': formats, + }) + + return self.playlist_result(entries, title, title) + else: raise ExtractorError(u'No free songs found') download_link = m_download.group(1) @@ -52,3 +106,25 @@ class BandcampIE(InfoExtractor): } return [track_info] + + +class BandcampAlbumIE(InfoExtractor): + IE_NAME = u'Bandcamp:album' + _VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + title = mobj.group('title') + webpage = self._download_webpage(url, title) + tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage) + if not tracks_paths: + raise ExtractorError(u'The page doesn\'t contain any track') + entries = [ + self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key()) + for t_path in tracks_paths] + title = self._search_regex(r'album_title : "(.*?)"', webpage, u'title') + return { + '_type': 'playlist', + 'title': title, + 'entries': entries, + }