X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=1f60d34ae23ad2f36b54072e0b133d3cd2a22149;hb=240089e5df640a12b1d300da05932c2f74ff8c69;hp=6b78f51cd6971f9ba6906f31cb4d1deda5288a30;hpb=3187e42a23a0bb68799935ed7acc7667e7dfe5c3;p=youtube-dl diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 6b78f51cd..1f60d34ae 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -19,13 +19,18 @@ except ImportError: import StringIO std_headers = { - 'User-Agent': 'iTunes/10.6.1', + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'en-us,en;q=0.5', } +try: + compat_str = unicode # Python 2 +except NameError: + compat_str = str + def preferredencoding(): """Get preferred encoding. @@ -83,7 +88,6 @@ class IDParser(HTMLParser.HTMLParser): HTMLParser.HTMLParser.__init__(self) def error(self, message): - print >> sys.stderr, self.getpos() if self.error_count > 10 or self.started: raise HTMLParser.HTMLParseError(message, self.getpos()) self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line @@ -190,14 +194,28 @@ def timeconvert(timestr): if timetuple is not None: timestamp = email.utils.mktime_tz(timetuple) return timestamp - -def sanitize_filename(s): - """Sanitizes a string so it could be used as part of a filename.""" + +def sanitize_filename(s, restricted=False): + """Sanitizes a string so it could be used as part of a filename. + If restricted is set, use a stricter subset of allowed characters. + """ def replace_insane(char): - if char in u' .\\/|?*<>:"' or ord(char) < 32: + if char == '?' or ord(char) < 32 or ord(char) == 127: + return '' + elif char == '"': + return '' if restricted else '\'' + elif char == ':': + return '_-' if restricted else ' -' + elif char in '\\/|*<>': + return '-' + if restricted and (char in '&\'' or char.isspace()): return '_' return char - return u''.join(map(replace_insane, s)).strip('_') + + result = u''.join(map(replace_insane, s)) + while '--' in result: + result = result.replace('--', '-') + return result.strip('-') def orderedSet(iterable): """ Remove all duplicates from the input iterable """ @@ -223,7 +241,7 @@ def encodeFilename(s): assert type(s) == type(u'') - if sys.platform == 'win32' and sys.getwindowsversion().major >= 5: + if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5: # Pass u'' directly to use Unicode APIs on Windows 2000 and up # (Detecting Windows NT 4 is tricky because 'major >= 4' would # match Windows 9x series as well. Besides, NT 4 is obsolete.)