X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=40d6823a0f8ddbbf42a54bf4b0aea9e344825600;hb=1a9c655e3b1569f315d4193e877cba0b4a863c63;hp=839da17d0763c6f41373856762d28f198ae94821;hpb=5c961d89df83006aede903547ec8b270ed42b785;p=youtube-dl diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 839da17d0..40d6823a0 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -26,21 +26,24 @@ std_headers = { 'Accept-Language': 'en-us,en;q=0.5', } +try: + compat_str = unicode # Python 2 +except NameError: + compat_str = str + def preferredencoding(): """Get preferred encoding. Returns the best encoding scheme for the system, based on locale.getpreferredencoding() and some further tweaks. """ - def yield_preferredencoding(): - try: - pref = locale.getpreferredencoding() - u'TEST'.encode(pref) - except: - pref = 'UTF-8' - while True: - yield pref - return yield_preferredencoding().next() + try: + pref = locale.getpreferredencoding() + u'TEST'.encode(pref) + except: + pref = 'UTF-8' + + return pref def htmlentity_transform(matchobj): @@ -83,7 +86,6 @@ class IDParser(HTMLParser.HTMLParser): HTMLParser.HTMLParser.__init__(self) def error(self, message): - print >> sys.stderr, self.getpos() if self.error_count > 10 or self.started: raise HTMLParser.HTMLParseError(message, self.getpos()) self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line @@ -190,14 +192,36 @@ def timeconvert(timestr): if timetuple is not None: timestamp = email.utils.mktime_tz(timetuple) return timestamp - -def sanitize_filename(s): - """Sanitizes a string so it could be used as part of a filename.""" + +def sanitize_filename(s, restricted=False): + """Sanitizes a string so it could be used as part of a filename. + If restricted is set, use a stricter subset of allowed characters. + """ def replace_insane(char): - if char in u' .\\/|?*<>:"' or ord(char) < 32: + if char == '?' or ord(char) < 32 or ord(char) == 127: + return '' + elif char == '"': + return '' if restricted else '\'' + elif char == ':': + return '_-' if restricted else ' -' + elif char in '\\/|*<>': + return '_' + if restricted and (char in '!&\'' or char.isspace()): + return '_' + if restricted and ord(char) > 127: return '_' return char - return u''.join(map(replace_insane, s)).strip('_') + + result = u''.join(map(replace_insane, s)) + while '__' in result: + result = result.replace('__', '_') + result = result.strip('_') + # Common case of "Foreign band name - English song title" + if restricted and result.startswith('-_'): + result = result[2:] + if not result: + result = '_' + return result def orderedSet(iterable): """ Remove all duplicates from the input iterable """ @@ -290,7 +314,7 @@ class ContentTooShortError(Exception): class Trouble(Exception): """Trouble helper exception - + This is an exception to be handled with FileDownloader.trouble """