Improve URL extraction
[youtube-dl] / youtube_dl / extractor / bandcamp.py
index 9ddb9af1725467ff67c8407f46ce1f891ed701de..b8514734d5780a380d6997efdc9fa09e96f7042a 100644 (file)
@@ -19,6 +19,7 @@ from ..utils import (
     unescapeHTML,
     update_url_query,
     unified_strdate,
+    url_or_none,
 )
 
 
@@ -131,8 +132,8 @@ class BandcampIE(InfoExtractor):
                 fatal=False)
             if not stat:
                 continue
-            retry_url = stat.get('retry_url')
-            if not isinstance(retry_url, compat_str):
+            retry_url = url_or_none(stat.get('retry_url'))
+            if not retry_url:
                 continue
             formats.append({
                 'url': self._proto_relative_url(retry_url, 'http:'),
@@ -242,7 +243,12 @@ class BandcampAlbumIE(InfoExtractor):
             raise ExtractorError('The page doesn\'t contain any tracks')
         # Only tracks with duration info have songs
         entries = [
-            self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
+            self.url_result(
+                compat_urlparse.urljoin(url, t_path),
+                ie=BandcampIE.ie_key(),
+                video_title=self._search_regex(
+                    r'<span\b[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)',
+                    elem_content, 'track title', fatal=False))
             for elem_content, t_path in track_elements
             if self._html_search_meta('duration', elem_content, default=None)]
 
@@ -301,7 +307,7 @@ class BandcampWeeklyIE(InfoExtractor):
 
         formats = []
         for format_id, format_url in show['audio_stream'].items():
-            if not isinstance(format_url, compat_str):
+            if not url_or_none(format_url):
                 continue
             for known_ext in KNOWN_EXTENSIONS:
                 if known_ext in format_id: