Merge remote-tracking branch 'Asido/master'
[youtube-dl] / youtube_dl / utils.py
index a64937b4c12077fef92c95d0ddd36e30de0e8917..40d6823a0f8ddbbf42a54bf4b0aea9e344825600 100644 (file)
@@ -26,21 +26,24 @@ std_headers = {
        'Accept-Language': 'en-us,en;q=0.5',
 }
 
+try:
+    compat_str = unicode # Python 2
+except NameError:
+    compat_str = str
+
 def preferredencoding():
        """Get preferred encoding.
 
        Returns the best encoding scheme for the system, based on
        locale.getpreferredencoding() and some further tweaks.
        """
-       def yield_preferredencoding():
-               try:
-                       pref = locale.getpreferredencoding()
-                       u'TEST'.encode(pref)
-               except:
-                       pref = 'UTF-8'
-               while True:
-                       yield pref
-       return yield_preferredencoding().next()
+       try:
+               pref = locale.getpreferredencoding()
+               u'TEST'.encode(pref)
+       except:
+               pref = 'UTF-8'
+
+       return pref
 
 
 def htmlentity_transform(matchobj):
@@ -83,7 +86,6 @@ class IDParser(HTMLParser.HTMLParser):
                HTMLParser.HTMLParser.__init__(self)
 
        def error(self, message):
-               print >> sys.stderr, self.getpos()
                if self.error_count > 10 or self.started:
                        raise HTMLParser.HTMLParseError(message, self.getpos())
                self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
@@ -190,24 +192,36 @@ def timeconvert(timestr):
        if timetuple is not None:
                timestamp = email.utils.mktime_tz(timetuple)
        return timestamp
-       
-def sanitize_filename(s):
-       """Sanitizes a string so it could be used as part of a filename."""
+
+def sanitize_filename(s, restricted=False):
+       """Sanitizes a string so it could be used as part of a filename.
+       If restricted is set, use a stricter subset of allowed characters.
+       """
        def replace_insane(char):
                if char == '?' or ord(char) < 32 or ord(char) == 127:
                        return ''
                elif char == '"':
-                       return '\''
+                       return '' if restricted else '\''
                elif char == ':':
-                       return ' -'
+                       return '_-' if restricted else ' -'
                elif char in '\\/|*<>':
-                       return '-'
+                       return '_'
+               if restricted and (char in '!&\'' or char.isspace()):
+                       return '_'
+               if restricted and ord(char) > 127:
+                       return '_'
                return char
 
        result = u''.join(map(replace_insane, s))
-       while '--' in result:
-               result = result.replace('--', '-')
-       return result.strip('-')
+       while '__' in result:
+               result = result.replace('__', '_')
+       result = result.strip('_')
+       # Common case of "Foreign band name - English song title"
+       if restricted and result.startswith('-_'):
+               result = result[2:]
+       if not result:
+               result = '_'
+       return result
 
 def orderedSet(iterable):
        """ Remove all duplicates from the input iterable """
@@ -300,7 +314,7 @@ class ContentTooShortError(Exception):
 
 class Trouble(Exception):
        """Trouble helper exception
-       
+
        This is an exception to be handled with
        FileDownloader.trouble
        """