Merge remote-tracking branch 'Asido/master'
[youtube-dl] / youtube_dl / utils.py
index 658fd2686b8534e834c34872883fd26d3d820cde..40d6823a0f8ddbbf42a54bf4b0aea9e344825600 100644 (file)
@@ -37,15 +37,13 @@ def preferredencoding():
        Returns the best encoding scheme for the system, based on
        locale.getpreferredencoding() and some further tweaks.
        """
        Returns the best encoding scheme for the system, based on
        locale.getpreferredencoding() and some further tweaks.
        """
-       def yield_preferredencoding():
-               try:
-                       pref = locale.getpreferredencoding()
-                       u'TEST'.encode(pref)
-               except:
-                       pref = 'UTF-8'
-               while True:
-                       yield pref
-       return yield_preferredencoding().next()
+       try:
+               pref = locale.getpreferredencoding()
+               u'TEST'.encode(pref)
+       except:
+               pref = 'UTF-8'
+
+       return pref
 
 
 def htmlentity_transform(matchobj):
 
 
 def htmlentity_transform(matchobj):
@@ -194,24 +192,36 @@ def timeconvert(timestr):
        if timetuple is not None:
                timestamp = email.utils.mktime_tz(timetuple)
        return timestamp
        if timetuple is not None:
                timestamp = email.utils.mktime_tz(timetuple)
        return timestamp
-       
-def sanitize_filename(s):
-       """Sanitizes a string so it could be used as part of a filename."""
+
+def sanitize_filename(s, restricted=False):
+       """Sanitizes a string so it could be used as part of a filename.
+       If restricted is set, use a stricter subset of allowed characters.
+       """
        def replace_insane(char):
                if char == '?' or ord(char) < 32 or ord(char) == 127:
                        return ''
                elif char == '"':
        def replace_insane(char):
                if char == '?' or ord(char) < 32 or ord(char) == 127:
                        return ''
                elif char == '"':
-                       return '\''
+                       return '' if restricted else '\''
                elif char == ':':
                elif char == ':':
-                       return ' -'
+                       return '_-' if restricted else ' -'
                elif char in '\\/|*<>':
                elif char in '\\/|*<>':
-                       return '-'
+                       return '_'
+               if restricted and (char in '!&\'' or char.isspace()):
+                       return '_'
+               if restricted and ord(char) > 127:
+                       return '_'
                return char
 
        result = u''.join(map(replace_insane, s))
                return char
 
        result = u''.join(map(replace_insane, s))
-       while '--' in result:
-               result = result.replace('--', '-')
-       return result.strip('-')
+       while '__' in result:
+               result = result.replace('__', '_')
+       result = result.strip('_')
+       # Common case of "Foreign band name - English song title"
+       if restricted and result.startswith('-_'):
+               result = result[2:]
+       if not result:
+               result = '_'
+       return result
 
 def orderedSet(iterable):
        """ Remove all duplicates from the input iterable """
 
 def orderedSet(iterable):
        """ Remove all duplicates from the input iterable """
@@ -304,7 +314,7 @@ class ContentTooShortError(Exception):
 
 class Trouble(Exception):
        """Trouble helper exception
 
 class Trouble(Exception):
        """Trouble helper exception
-       
+
        This is an exception to be handled with
        FileDownloader.trouble
        """
        This is an exception to be handled with
        FileDownloader.trouble
        """