X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=40d6823a0f8ddbbf42a54bf4b0aea9e344825600;hb=1a9c655e3b1569f315d4193e877cba0b4a863c63;hp=6f53337d49496561cada56ed149866fa4f949c0e;hpb=51937c086943a3bdbf6f707c75d041ed3b0ba743;p=youtube-dl diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 6f53337d4..40d6823a0 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -26,21 +26,24 @@ std_headers = { 'Accept-Language': 'en-us,en;q=0.5', } +try: + compat_str = unicode # Python 2 +except NameError: + compat_str = str + def preferredencoding(): """Get preferred encoding. Returns the best encoding scheme for the system, based on locale.getpreferredencoding() and some further tweaks. """ - def yield_preferredencoding(): - try: - pref = locale.getpreferredencoding() - u'TEST'.encode(pref) - except: - pref = 'UTF-8' - while True: - yield pref - return yield_preferredencoding().next() + try: + pref = locale.getpreferredencoding() + u'TEST'.encode(pref) + except: + pref = 'UTF-8' + + return pref def htmlentity_transform(matchobj): @@ -189,24 +192,36 @@ def timeconvert(timestr): if timetuple is not None: timestamp = email.utils.mktime_tz(timetuple) return timestamp - -def sanitize_filename(s): - """Sanitizes a string so it could be used as part of a filename.""" + +def sanitize_filename(s, restricted=False): + """Sanitizes a string so it could be used as part of a filename. + If restricted is set, use a stricter subset of allowed characters. + """ def replace_insane(char): if char == '?' or ord(char) < 32 or ord(char) == 127: return '' elif char == '"': - return '\'' + return '' if restricted else '\'' elif char == ':': - return ' -' + return '_-' if restricted else ' -' elif char in '\\/|*<>': - return '-' + return '_' + if restricted and (char in '!&\'' or char.isspace()): + return '_' + if restricted and ord(char) > 127: + return '_' return char result = u''.join(map(replace_insane, s)) - while '--' in result: - result = result.replace('--', '-') - return result.strip('-') + while '__' in result: + result = result.replace('__', '_') + result = result.strip('_') + # Common case of "Foreign band name - English song title" + if restricted and result.startswith('-_'): + result = result[2:] + if not result: + result = '_' + return result def orderedSet(iterable): """ Remove all duplicates from the input iterable """ @@ -299,7 +314,7 @@ class ContentTooShortError(Exception): class Trouble(Exception): """Trouble helper exception - + This is an exception to be handled with FileDownloader.trouble """