X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=90e0ed9ab7eb36d4c9f30efffdd2868e9cfe6d47;hb=70a1165b32acf253905109e9b4f245295d67af1f;hp=e82e3998a7c30d8ae14c9561b51946df0cbebcff;hpb=dc03a42537cba83597ca8acb2bbe03f686f2136c;p=youtube-dl diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e82e3998a..90e0ed9ab 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -75,7 +75,7 @@ def preferredencoding(): try: pref = locale.getpreferredencoding() 'TEST'.encode(pref) - except: + except Exception: pref = 'UTF-8' return pref @@ -127,7 +127,7 @@ def write_json_file(obj, fn): except OSError: pass os.rename(tf.name, fn) - except: + except Exception: try: os.remove(tf.name) except OSError: @@ -326,6 +326,13 @@ def sanitize_path(s): return os.path.join(*sanitized_path) +def sanitize_url_path_consecutive_slashes(url): + """Collapses consecutive slashes in URLs' path""" + parsed_url = list(compat_urlparse.urlparse(url)) + parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2]) + return compat_urlparse.urlunparse(parsed_url) + + def orderedSet(iterable): """ Remove all duplicates from the input iterable """ res = [] @@ -341,7 +348,7 @@ def _htmlentity_transform(entity): if entity in compat_html_entities.name2codepoint: return compat_chr(compat_html_entities.name2codepoint[entity]) - mobj = re.match(r'#(x?[0-9]+)', entity) + mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity) if mobj is not None: numstr = mobj.group(1) if numstr.startswith('x'): @@ -1804,18 +1811,3 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): return None # No Proxy return compat_urllib_request.ProxyHandler.proxy_open( self, req, proxy, type) - - -def url_sanitize_consecutive_slashes(url): - """Sanitize URLs with consecutive slashes - - For example, transform both - http://hostname/foo//bar/filename.html - and - http://hostname//foo/bar/filename.html - into - http://hostname/foo/bar/filename.html - """ - parsed_url = list(compat_urlparse.urlparse(url)) - parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2]) - return compat_urlparse.urlunparse(parsed_url)