X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=245d623d86df365a562fb395bc792b42f68634eb;hb=91757b0f373ec3201f95066eeb0e09ebdcc1a067;hp=d5597d514dc502f73fb71ed8456017e4d1a2848b;hpb=dcca5819675df1e9d9a1caf00af0f98bb2ce511c;p=youtube-dl diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d5597d514..245d623d8 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -319,13 +319,20 @@ def sanitize_path(s): if unc_or_drive: norm_path.pop(0) sanitized_path = [ - re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part) + path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part) for path_part in norm_path] if unc_or_drive: sanitized_path.insert(0, unc_or_drive + os.path.sep) return os.path.join(*sanitized_path) +def sanitize_url_path_consecutive_slashes(url): + """Collapses consecutive slashes in URLs' path""" + parsed_url = list(compat_urlparse.urlparse(url)) + parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2]) + return compat_urlparse.urlunparse(parsed_url) + + def orderedSet(iterable): """ Remove all duplicates from the input iterable """ res = [] @@ -341,7 +348,7 @@ def _htmlentity_transform(entity): if entity in compat_html_entities.name2codepoint: return compat_chr(compat_html_entities.name2codepoint[entity]) - mobj = re.match(r'#(x?[0-9]+)', entity) + mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity) if mobj is not None: numstr = mobj.group(1) if numstr.startswith('x'):