Use character instead of byte strings
[youtube-dl] / youtube_dl / utils.py
index 839da17d0763c6f41373856762d28f198ae94821..658fd2686b8534e834c34872883fd26d3d820cde 100644 (file)
@@ -26,6 +26,11 @@ std_headers = {
        'Accept-Language': 'en-us,en;q=0.5',
 }
 
+try:
+    compat_str = unicode # Python 2
+except NameError:
+    compat_str = str
+
 def preferredencoding():
        """Get preferred encoding.
 
@@ -83,7 +88,6 @@ class IDParser(HTMLParser.HTMLParser):
                HTMLParser.HTMLParser.__init__(self)
 
        def error(self, message):
-               print >> sys.stderr, self.getpos()
                if self.error_count > 10 or self.started:
                        raise HTMLParser.HTMLParseError(message, self.getpos())
                self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
@@ -194,10 +198,20 @@ def timeconvert(timestr):
 def sanitize_filename(s):
        """Sanitizes a string so it could be used as part of a filename."""
        def replace_insane(char):
-               if char in u' .\\/|?*<>:"' or ord(char) < 32:
-                       return '_'
+               if char == '?' or ord(char) < 32 or ord(char) == 127:
+                       return ''
+               elif char == '"':
+                       return '\''
+               elif char == ':':
+                       return ' -'
+               elif char in '\\/|*<>':
+                       return '-'
                return char
-       return u''.join(map(replace_insane, s)).strip('_')
+
+       result = u''.join(map(replace_insane, s))
+       while '--' in result:
+               result = result.replace('--', '-')
+       return result.strip('-')
 
 def orderedSet(iterable):
        """ Remove all duplicates from the input iterable """