[extractor/common] Make _is_valid_url more relaxed
[youtube-dl] / youtube_dl / extractor / common.py
index 937237b3f597669e737f18fa7c96708de56e794b..50d48c40dc61ffc872f97fd1f763d53363a52ec0 100644 (file)
@@ -67,6 +67,7 @@ from ..utils import (
     sanitized_Request,
     sanitize_filename,
     str_or_none,
+    strip_or_none,
     unescapeHTML,
     unified_strdate,
     unified_timestamp,
@@ -219,7 +220,7 @@ class InfoExtractor(object):
                         * "preference" (optional, int) - quality of the image
                         * "width" (optional, int)
                         * "height" (optional, int)
-                        * "resolution" (optional, string "{width}x{height"},
+                        * "resolution" (optional, string "{width}x{height}",
                                         deprecated)
                         * "filesize" (optional, int)
     thumbnail:      Full URL to a video thumbnail image.
@@ -1423,12 +1424,10 @@ class InfoExtractor(object):
         try:
             self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
             return True
-        except ExtractorError as e:
-            if isinstance(e.cause, compat_urllib_error.URLError):
-                self.to_screen(
-                    '%s: %s URL is invalid, skipping' % (video_id, item))
-                return False
-            raise
+        except ExtractorError:
+            self.to_screen(
+                '%s: %s URL is invalid, skipping' % (video_id, item))
+            return False
 
     def http_scheme(self):
         """ Either "http:" or "https:", depending on the user's preferences """
@@ -2480,7 +2479,7 @@ class InfoExtractor(object):
                 'subtitles': {},
             }
             media_attributes = extract_attributes(media_tag)
-            src = media_attributes.get('src')
+            src = strip_or_none(media_attributes.get('src'))
             if src:
                 _, formats = _media_formats(src, media_type)
                 media_info['formats'].extend(formats)
@@ -2490,7 +2489,7 @@ class InfoExtractor(object):
                     s_attr = extract_attributes(source_tag)
                     # data-video-src and data-src are non standard but seen
                     # several times in the wild
-                    src = dict_get(s_attr, ('src', 'data-video-src', 'data-src'))
+                    src = strip_or_none(dict_get(s_attr, ('src', 'data-video-src', 'data-src')))
                     if not src:
                         continue
                     f = parse_content_type(s_attr.get('type'))
@@ -2533,7 +2532,7 @@ class InfoExtractor(object):
                     track_attributes = extract_attributes(track_tag)
                     kind = track_attributes.get('kind')
                     if not kind or kind in ('subtitles', 'captions'):
-                        src = track_attributes.get('src')
+                        src = strip_or_none(track_attributes.get('src'))
                         if not src:
                             continue
                         lang = track_attributes.get('srclang') or track_attributes.get('lang') or track_attributes.get('label')