X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fbandcamp.py;h=c1ef8051d3074a6551941bf140f88eee4ed8a124;hb=9f0ee2a3883ec6f6fdccba90085cb925aaa2f617;hp=27d8652f6ec8d4540c4214da2690549a6af20b73;hpb=99c2398bc641c478dbea85c013081d883479827e;p=youtube-dl diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 27d8652f6..c1ef8051d 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -10,6 +10,8 @@ from ..compat import ( ) from ..utils import ( ExtractorError, + float_or_none, + int_or_none, ) @@ -52,11 +54,11 @@ class BandcampIE(InfoExtractor): ext, abr_str = format_id.split('-', 1) formats.append({ 'format_id': format_id, - 'url': format_url, + 'url': self._proto_relative_url(format_url, 'http:'), 'ext': ext, 'vcodec': 'none', 'acodec': ext, - 'abr': int(abr_str), + 'abr': int_or_none(abr_str), }) self._sort_formats(formats) @@ -65,21 +67,21 @@ class BandcampIE(InfoExtractor): 'id': compat_str(data['id']), 'title': data['title'], 'formats': formats, - 'duration': float(data['duration']), + 'duration': float_or_none(data.get('duration')), } else: raise ExtractorError('No free songs found') download_link = m_download.group(1) video_id = self._search_regex( - r'(?ms)var TralbumData = {.*?id: (?P\d+),?$', + r'(?ms)var TralbumData = .*?[{,]\s*id: (?P\d+),?$', webpage, 'video id') download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page') # We get the dictionary of the track from some javascript code all_info = self._parse_json(self._search_regex( r'(?sm)items: (.*?),$', download_webpage, 'items'), video_id) - info = All_info[0] + info = all_info[0] # We pick mp3-320 for now, until format selection can be easily implemented. mp3_info = info['downloads']['mp3-320'] # If we try to use this url it says the link has expired @@ -93,8 +95,8 @@ class BandcampIE(InfoExtractor): final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url') # If we could correctly generate the .rand field the url would be # in the "download_url" key - final_url = self._search_regex( - r'"retry_url":"(.*?)"', final_url_webpage, 'final video URL') + final_url = self._proto_relative_url(self._search_regex( + r'"retry_url":"(.+?)"', final_url_webpage, 'final video URL'), 'http:') return { 'id': video_id, @@ -109,7 +111,7 @@ class BandcampIE(InfoExtractor): class BandcampAlbumIE(InfoExtractor): IE_NAME = 'Bandcamp:album' - _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com(?:/album/(?P[^?#]+)|/?(?:$|[?#]))' + _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^?#]+)|/?(?:$|[?#]))' _TESTS = [{ 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', @@ -133,31 +135,37 @@ class BandcampAlbumIE(InfoExtractor): ], 'info_dict': { 'title': 'Jazz Format Mixtape vol.1', + 'id': 'jazz-format-mixtape-vol-1', + 'uploader_id': 'blazo', }, 'params': { 'playlistend': 2 }, - 'skip': 'Bandcamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test' + 'skip': 'Bandcamp imposes download limits.' }, { 'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave', 'info_dict': { 'title': 'Hierophany of the Open Grave', + 'uploader_id': 'nightbringer', + 'id': 'hierophany-of-the-open-grave', }, 'playlist_mincount': 9, }, { 'url': 'http://dotscale.bandcamp.com', 'info_dict': { 'title': 'Loom', + 'id': 'dotscale', + 'uploader_id': 'dotscale', }, 'playlist_mincount': 7, }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - playlist_id = mobj.group('subdomain') - title = mobj.group('title') - display_id = title or playlist_id - webpage = self._download_webpage(url, display_id) + uploader_id = mobj.group('subdomain') + album_id = mobj.group('album_id') + playlist_id = album_id or uploader_id + webpage = self._download_webpage(url, playlist_id) tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage) if not tracks_paths: raise ExtractorError('The page doesn\'t contain any tracks') @@ -168,8 +176,8 @@ class BandcampAlbumIE(InfoExtractor): r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False) return { '_type': 'playlist', + 'uploader_id': uploader_id, 'id': playlist_id, - 'display_id': display_id, 'title': title, 'entries': entries, }