try:
pref = locale.getpreferredencoding()
'TEST'.encode(pref)
- except:
+ except Exception:
pref = 'UTF-8'
return pref
except OSError:
pass
os.rename(tf.name, fn)
- except:
+ except Exception:
try:
os.remove(tf.name)
except OSError:
raise
# In case of error, try to remove win32 forbidden chars
- alt_filename = os.path.join(
- re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
- for path_part in os.path.split(filename)
- )
+ alt_filename = sanitize_path(filename)
if alt_filename == filename:
raise
else:
# An exception here should be caught in the caller
- stream = open(encodeFilename(filename), open_mode)
+ stream = open(encodeFilename(alt_filename), open_mode)
return (stream, alt_filename)
return result
+def sanitize_path(s):
+ """Sanitizes and normalizes path on Windows"""
+ if sys.platform != 'win32':
+ return s
+ drive, _ = os.path.splitdrive(s)
+ unc, _ = os.path.splitunc(s)
+ unc_or_drive = unc or drive
+ norm_path = os.path.normpath(remove_start(s, unc_or_drive)).split(os.path.sep)
+ if unc_or_drive:
+ norm_path.pop(0)
+ sanitized_path = [
+ path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part)
+ for path_part in norm_path]
+ if unc_or_drive:
+ sanitized_path.insert(0, unc_or_drive + os.path.sep)
+ return os.path.join(*sanitized_path)
+
+
+def sanitize_url_path_consecutive_slashes(url):
+ """Collapses consecutive slashes in URLs' path"""
+ parsed_url = list(compat_urlparse.urlparse(url))
+ parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2])
+ return compat_urlparse.urlunparse(parsed_url)
+
+
def orderedSet(iterable):
""" Remove all duplicates from the input iterable """
res = []
if entity in compat_html_entities.name2codepoint:
return compat_chr(compat_html_entities.name2codepoint[entity])
- mobj = re.match(r'#(x?[0-9]+)', entity)
+ mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
if mobj is not None:
numstr = mobj.group(1)
if numstr.startswith('x'):
# Replace commas
date_str = date_str.replace(',', ' ')
# %z (UTC offset) is only supported in python>=3.2
- date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
+ if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str):
+ date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
# Remove AM/PM + timezone
date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
]
if day_first:
format_expressions.extend([
+ '%d-%m-%Y',
'%d.%m.%Y',
'%d/%m/%Y',
'%d/%m/%y',
])
else:
format_expressions.extend([
+ '%m-%d-%Y',
'%m.%d.%Y',
'%m/%d/%Y',
'%m/%d/%y',
'(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
[a-zA-Z_][.a-zA-Z_0-9]*
''', fix_kv, code)
- res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
+ res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res)
return res
return None # No Proxy
return compat_urllib_request.ProxyHandler.proxy_open(
self, req, proxy, type)
-
-
-def url_sanitize_consecutive_slashes(url):
- """Sanitize URLs with consecutive slashes
-
- For example, transform both
- http://hostname/foo//bar/filename.html
- and
- http://hostname//foo/bar/filename.html
- into
- http://hostname/foo/bar/filename.html
- """
- parsed_url = list(compat_urlparse.urlparse(url))
- parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2])
- return compat_urlparse.urlunparse(parsed_url)