try:
pref = locale.getpreferredencoding()
'TEST'.encode(pref)
- except:
+ except Exception:
pref = 'UTF-8'
return pref
except OSError:
pass
os.rename(tf.name, fn)
- except:
+ except Exception:
try:
os.remove(tf.name)
except OSError:
return os.path.join(*sanitized_path)
+def sanitize_url_path_consecutive_slashes(url):
+ """Collapses consecutive slashes in URLs' path"""
+ parsed_url = list(compat_urlparse.urlparse(url))
+ parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2])
+ return compat_urlparse.urlunparse(parsed_url)
+
+
def orderedSet(iterable):
""" Remove all duplicates from the input iterable """
res = []
if entity in compat_html_entities.name2codepoint:
return compat_chr(compat_html_entities.name2codepoint[entity])
- mobj = re.match(r'#(x?[0-9]+)', entity)
+ mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
if mobj is not None:
numstr = mobj.group(1)
if numstr.startswith('x'):
# Replace commas
date_str = date_str.replace(',', ' ')
# %z (UTC offset) is only supported in python>=3.2
- date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
+ if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str):
+ date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
# Remove AM/PM + timezone
date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
]
if day_first:
format_expressions.extend([
+ '%d-%m-%Y',
'%d.%m.%Y',
'%d/%m/%Y',
'%d/%m/%y',
])
else:
format_expressions.extend([
+ '%m-%d-%Y',
'%m.%d.%Y',
'%m/%d/%Y',
'%m/%d/%y',
'(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
[a-zA-Z_][.a-zA-Z_0-9]*
''', fix_kv, code)
- res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
+ res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res)
return res
return None # No Proxy
return compat_urllib_request.ProxyHandler.proxy_open(
self, req, proxy, type)
-
-
-def url_sanitize_consecutive_slashes(url):
- """Sanitize URLs with consecutive slashes
-
- For example, transform both
- http://hostname/foo//bar/filename.html
- and
- http://hostname//foo/bar/filename.html
- into
- http://hostname/foo/bar/filename.html
- """
- parsed_url = list(compat_urlparse.urlparse(url))
- parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2])
- return compat_urlparse.urlunparse(parsed_url)