X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fbandcamp.py;h=490cc961a204d40d41fbb4e0306a66611f161a09;hb=6b597516c12c7fd81e832f3ec05dd0dca6089823;hp=c13446665d2fb0e202973a26f5c7499c325719d1;hpb=c59c3c84ede823e5c97f695ae904545c615e4ded;p=youtube-dl diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index c13446665..490cc961a 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -4,9 +4,11 @@ import json import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_str, compat_urlparse, +) +from ..utils import ( ExtractorError, ) @@ -70,26 +72,29 @@ class BandcampIE(InfoExtractor): download_link = m_download.group(1) video_id = self._search_regex( - r'var TralbumData = {.*?id: (?P\d+),?$', - webpage, 'video id', flags=re.MULTILINE | re.DOTALL) + r'(?ms)var TralbumData = {.*?id: (?P\d+),?$', + webpage, 'video id') download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page') # We get the dictionary of the track from some javascript code - info = re.search(r'items: (.*?),$', download_webpage, re.MULTILINE).group(1) - info = json.loads(info)[0] + all_info = self._parse_json(self._search_regex( + r'(?sm)items: (.*?),$', download_webpage, 'items'), video_id) + info = all_info[0] # We pick mp3-320 for now, until format selection can be easily implemented. mp3_info = info['downloads']['mp3-320'] # If we try to use this url it says the link has expired initial_url = mp3_info['url'] - re_url = r'(?Phttp://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P.*?)&id=(?P.*?)&ts=(?P.*)$' - m_url = re.match(re_url, initial_url) - #We build the url we will use to get the final track url + m_url = re.match( + r'(?Phttp://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P.*?)&id=(?P.*?)&ts=(?P.*)$', + initial_url) + # We build the url we will use to get the final track url # This url is build in Bandcamp in the script download_bunde_*.js request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts')) final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url') # If we could correctly generate the .rand field the url would be - #in the "download_url" key - final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1) + # in the "download_url" key + final_url = self._search_regex( + r'"retry_url":"(.*?)"', final_url_webpage, 'final video URL') return { 'id': video_id, @@ -104,26 +109,31 @@ class BandcampIE(InfoExtractor): class BandcampAlbumIE(InfoExtractor): IE_NAME = 'Bandcamp:album' - _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com(?:/album/(?P[^?#]+))' + _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+)|/?(?:$|[?#]))' _TESTS = [{ 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', 'playlist': [ { - 'file': '1353101989.mp3', 'md5': '39bc1eded3476e927c724321ddf116cf', 'info_dict': { + 'id': '1353101989', + 'ext': 'mp3', 'title': 'Intro', } }, { - 'file': '38097443.mp3', 'md5': '1a2c32e2691474643e912cc6cd4bffaa', 'info_dict': { + 'id': '38097443', + 'ext': 'mp3', 'title': 'Kero One - Keep It Alive (Blazo remix)', } }, ], + 'info_dict': { + 'title': 'Jazz Format Mixtape vol.1', + }, 'params': { 'playlistend': 2 }, @@ -134,6 +144,12 @@ class BandcampAlbumIE(InfoExtractor): 'title': 'Hierophany of the Open Grave', }, 'playlist_mincount': 9, + }, { + 'url': 'http://dotscale.bandcamp.com', + 'info_dict': { + 'title': 'Loom', + }, + 'playlist_mincount': 7, }] def _real_extract(self, url): @@ -148,7 +164,8 @@ class BandcampAlbumIE(InfoExtractor): entries = [ self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key()) for t_path in tracks_paths] - title = self._search_regex(r'album_title : "(.*?)"', webpage, 'title') + title = self._search_regex( + r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False) return { '_type': 'playlist', 'id': playlist_id,