X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=922e17eccfac611a1d90bf83e913383c9afce30d;hb=795cc5059a6f349f861e246269d6cced39b3a753;hp=d18073d72894c905b92233d0493cf83d525a6607;hpb=0b8c922da91fb7238ea15434d6a4792da84015bf;p=youtube-dl diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d18073d72..922e17ecc 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -83,7 +83,7 @@ class IDParser(HTMLParser.HTMLParser): HTMLParser.HTMLParser.__init__(self) def error(self, message): - print self.getpos() + print >> sys.stderr, self.getpos() if self.error_count > 10 or self.started: raise HTMLParser.HTMLParseError(message, self.getpos()) self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line @@ -156,12 +156,6 @@ def clean_html(html): return html -def sanitize_title(utitle): - """Sanitizes a video title so it could be used as part of a filename.""" - utitle = unescapeHTML(utitle) - return utitle.replace(unicode(os.sep), u'%') - - def sanitize_open(filename, open_mode): """Try to open the given filename, and slightly tweak it if this fails. @@ -196,10 +190,14 @@ def timeconvert(timestr): if timetuple is not None: timestamp = email.utils.mktime_tz(timetuple) return timestamp - -def simplify_title(title): - expr = re.compile(ur'[^\w\d_\-]+', flags=re.UNICODE) - return expr.sub(u'_', title).strip(u'_') + +def sanitize_filename(s): + """Sanitizes a string so it could be used as part of a filename.""" + def replace_insane(char): + if char in u' .\\/|?*<>:"' or ord(char) < 32: + return '_' + return char + return u''.join(map(replace_insane, s)).strip('_') def orderedSet(iterable): """ Remove all duplicates from the input iterable """ @@ -225,7 +223,7 @@ def encodeFilename(s): assert type(s) == type(u'') - if sys.platform == 'win32' and sys.getwindowsversion().major >= 5: + if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5: # Pass u'' directly to use Unicode APIs on Windows 2000 and up # (Detecting Windows NT 4 is tricky because 'major >= 4' would # match Windows 9x series as well. Besides, NT 4 is obsolete.)