Merge branch 'sohu_fix' of https://github.com/yan12125/youtube-dl into yan12125-sohu_fix
[youtube-dl] / youtube_dl / utils.py
index 0f49d602eab1296dced15449a949c26dc4408d66..e82e3998a7c30d8ae14c9561b51946df0cbebcff 100644 (file)
@@ -252,15 +252,12 @@ def sanitize_open(filename, open_mode):
             raise
 
         # In case of error, try to remove win32 forbidden chars
-        alt_filename = os.path.join(
-            re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
-            for path_part in os.path.split(filename)
-        )
+        alt_filename = sanitize_path(filename)
         if alt_filename == filename:
             raise
         else:
             # An exception here should be caught in the caller
-            stream = open(encodeFilename(filename), open_mode)
+            stream = open(encodeFilename(alt_filename), open_mode)
             return (stream, alt_filename)
 
 
@@ -322,7 +319,7 @@ def sanitize_path(s):
     if unc_or_drive:
         norm_path.pop(0)
     sanitized_path = [
-        re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
+        path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part)
         for path_part in norm_path]
     if unc_or_drive:
         sanitized_path.insert(0, unc_or_drive + os.path.sep)
@@ -1807,3 +1804,18 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
             return None  # No Proxy
         return compat_urllib_request.ProxyHandler.proxy_open(
             self, req, proxy, type)
+
+
+def url_sanitize_consecutive_slashes(url):
+    """Sanitize URLs with consecutive slashes
+
+    For example, transform both
+        http://hostname/foo//bar/filename.html
+    and
+        http://hostname//foo/bar/filename.html
+    into
+        http://hostname/foo/bar/filename.html
+    """
+    parsed_url = list(compat_urlparse.urlparse(url))
+    parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2])
+    return compat_urlparse.urlunparse(parsed_url)