[bandcamp:album] Fix title extraction (Closes #10455)
authorSergey M․ <dstftw@gmail.com>
Tue, 30 Aug 2016 17:29:49 +0000 (00:29 +0700)
committerSergey M․ <dstftw@gmail.com>
Tue, 30 Aug 2016 17:29:49 +0000 (00:29 +0700)
youtube_dl/extractor/bandcamp.py

index 991ab0676e6b93a1c64d04f48b3728551bc4ccf0..249c3d9569c440b057af9f6706109545ace32862 100644 (file)
@@ -162,6 +162,15 @@ class BandcampAlbumIE(InfoExtractor):
             'uploader_id': 'dotscale',
         },
         'playlist_mincount': 7,
+    }, {
+        # with escaped quote in title
+        'url': 'https://jstrecords.bandcamp.com/album/entropy-ep',
+        'info_dict': {
+            'title': '"Entropy" EP',
+            'uploader_id': 'jstrecords',
+            'id': 'entropy-ep',
+        },
+        'playlist_mincount': 3,
     }]
 
     def _real_extract(self, url):
@@ -176,8 +185,11 @@ class BandcampAlbumIE(InfoExtractor):
         entries = [
             self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
             for t_path in tracks_paths]
-        title = self._search_regex(
-            r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False)
+        title = self._html_search_regex(
+            r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
+            webpage, 'title', fatal=False)
+        if title:
+            title = title.replace(r'\"', '"')
         return {
             '_type': 'playlist',
             'uploader_id': uploader_id,