X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=245d623d86df365a562fb395bc792b42f68634eb;hb=af140002158c1079b1365392c6a48ea06ad23c82;hp=e511232caae2fd68fb13446df5da79cff9fff8a3;hpb=d55de57b67bceca5d9116ddcc2ada2fa1957d89d;p=youtube-dl diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e511232ca..245d623d8 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -319,13 +319,20 @@ def sanitize_path(s): if unc_or_drive: norm_path.pop(0) sanitized_path = [ - re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part) + path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part) for path_part in norm_path] if unc_or_drive: sanitized_path.insert(0, unc_or_drive + os.path.sep) return os.path.join(*sanitized_path) +def sanitize_url_path_consecutive_slashes(url): + """Collapses consecutive slashes in URLs' path""" + parsed_url = list(compat_urlparse.urlparse(url)) + parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2]) + return compat_urlparse.urlunparse(parsed_url) + + def orderedSet(iterable): """ Remove all duplicates from the input iterable """ res = [] @@ -341,7 +348,7 @@ def _htmlentity_transform(entity): if entity in compat_html_entities.name2codepoint: return compat_chr(compat_html_entities.name2codepoint[entity]) - mobj = re.match(r'#(x?[0-9]+)', entity) + mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity) if mobj is not None: numstr = mobj.group(1) if numstr.startswith('x'):