projects
/
youtube-dl
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Merge pull request #12861 from Tithen-Firion/cbsinteractive-fix
[youtube-dl]
/
youtube_dl
/
extractor
/
bandcamp.py
diff --git
a/youtube_dl/extractor/bandcamp.py
b/youtube_dl/extractor/bandcamp.py
index 88c590e98388d5f6058dd71ffb97f4f0254f0c5b..489d0ba53f672363213c7f788e83b692eb11894d 100644
(file)
--- a/
youtube_dl/extractor/bandcamp.py
+++ b/
youtube_dl/extractor/bandcamp.py
@@
-34,12
+34,12
@@
class BandcampIE(InfoExtractor):
'_skip': 'There is a limit of 200 free downloads / month for the test song'
}, {
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
'_skip': 'There is a limit of 200 free downloads / month for the test song'
}, {
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
- 'md5': '
73d0b3171568232574e45652f8720b5c
',
+ 'md5': '
0369ace6b939f0927e62c67a1a8d9fa7
',
'info_dict': {
'id': '2650410135',
'info_dict': {
'id': '2650410135',
- 'ext': '
mp3
',
- 'title': 'Lanius (Battle)',
- 'uploader': 'Ben Prunty
Music
',
+ 'ext': '
aiff
',
+ 'title': '
Ben Prunty -
Lanius (Battle)',
+ 'uploader': 'Ben Prunty',
},
}]
},
}]
@@
-47,6
+47,7
@@
class BandcampIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
title = mobj.group('title')
webpage = self._download_webpage(url, title)
mobj = re.match(self._VALID_URL, url)
title = mobj.group('title')
webpage = self._download_webpage(url, title)
+ thumbnail = self._html_search_meta('og:image', webpage, default=None)
m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
if not m_download:
m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
if not m_download:
m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
@@
-75,6
+76,7
@@
class BandcampIE(InfoExtractor):
return {
'id': track_id,
'title': data['title'],
return {
'id': track_id,
'title': data['title'],
+ 'thumbnail': thumbnail,
'formats': formats,
'duration': float_or_none(data.get('duration')),
}
'formats': formats,
'duration': float_or_none(data.get('duration')),
}
@@
-143,7
+145,7
@@
class BandcampIE(InfoExtractor):
return {
'id': video_id,
'title': title,
return {
'id': video_id,
'title': title,
- 'thumbnail': info.get('thumb_url'),
+ 'thumbnail': info.get('thumb_url')
or thumbnail
,
'uploader': info.get('artist'),
'artist': artist,
'track': track,
'uploader': info.get('artist'),
'artist': artist,
'track': track,
@@
-209,6
+211,15
@@
class BandcampAlbumIE(InfoExtractor):
'id': 'entropy-ep',
},
'playlist_mincount': 3,
'id': 'entropy-ep',
},
'playlist_mincount': 3,
+ }, {
+ # not all tracks have songs
+ 'url': 'https://insulters.bandcamp.com/album/we-are-the-plague',
+ 'info_dict': {
+ 'id': 'we-are-the-plague',
+ 'title': 'WE ARE THE PLAGUE',
+ 'uploader_id': 'insulters',
+ },
+ 'playlist_count': 2,
}]
def _real_extract(self, url):
}]
def _real_extract(self, url):
@@
-217,12
+228,16
@@
class BandcampAlbumIE(InfoExtractor):
album_id = mobj.group('album_id')
playlist_id = album_id or uploader_id
webpage = self._download_webpage(url, playlist_id)
album_id = mobj.group('album_id')
playlist_id = album_id or uploader_id
webpage = self._download_webpage(url, playlist_id)
- tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
- if not tracks_paths:
+ track_elements = re.findall(
+ r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', webpage)
+ if not track_elements:
raise ExtractorError('The page doesn\'t contain any tracks')
raise ExtractorError('The page doesn\'t contain any tracks')
+ # Only tracks with duration info have songs
entries = [
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
entries = [
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
- for t_path in tracks_paths]
+ for elem_content, t_path in track_elements
+ if self._html_search_meta('duration', elem_content, default=None)]
+
title = self._html_search_regex(
r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
webpage, 'title', fatal=False)
title = self._html_search_regex(
r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
webpage, 'title', fatal=False)