X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=bde446bcbcbeb68bc7042c58e64e32893704c273;hb=1a84d8675b4417ca9adf40bb12325c728767896a;hp=1f60d34ae23ad2f36b54072e0b133d3cd2a22149;hpb=240089e5df640a12b1d300da05932c2f74ff8c69;p=youtube-dl diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 1f60d34ae..bde446bcb 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -27,9 +27,9 @@ std_headers = { } try: - compat_str = unicode # Python 2 + u = unicode # Python 2 except NameError: - compat_str = str + u = str def preferredencoding(): """Get preferred encoding. @@ -37,19 +37,17 @@ def preferredencoding(): Returns the best encoding scheme for the system, based on locale.getpreferredencoding() and some further tweaks. """ - def yield_preferredencoding(): - try: - pref = locale.getpreferredencoding() - u'TEST'.encode(pref) - except: - pref = 'UTF-8' - while True: - yield pref - return yield_preferredencoding().next() + try: + pref = locale.getpreferredencoding() + u'TEST'.encode(pref) + except: + pref = 'UTF-8' + + return pref def htmlentity_transform(matchobj): - """Transforms an HTML entity to a Unicode character. + """Transforms an HTML entity to a character. This function receives a match object and is intended to be used with the re.sub() function. @@ -60,7 +58,6 @@ def htmlentity_transform(matchobj): if entity in htmlentitydefs.name2codepoint: return unichr(htmlentitydefs.name2codepoint[entity]) - # Unicode character mobj = re.match(ur'(?u)#(x?\d+)', entity) if mobj is not None: numstr = mobj.group(1) @@ -69,7 +66,7 @@ def htmlentity_transform(matchobj): numstr = u'0%s' % numstr else: base = 10 - return unichr(long(numstr, base)) + return unichr(int(numstr, base)) # Unknown entity in name, return its literal representation return (u'&%s;' % entity) @@ -128,8 +125,10 @@ class IDParser(HTMLParser.HTMLParser): handle_decl = handle_pi = unknown_decl = find_startpos def get_result(self): - if self.result == None: return None - if len(self.result) != 3: return None + if self.result is None: + return None + if len(self.result) != 3: + return None lines = self.html.split('\n') lines = lines[self.result[1][0]-1:self.result[2][0]] lines[0] = lines[0][self.result[1][1]:] @@ -207,15 +206,23 @@ def sanitize_filename(s, restricted=False): elif char == ':': return '_-' if restricted else ' -' elif char in '\\/|*<>': - return '-' - if restricted and (char in '&\'' or char.isspace()): + return '_' + if restricted and (char in '!&\'' or char.isspace()): + return '_' + if restricted and ord(char) > 127: return '_' return char result = u''.join(map(replace_insane, s)) - while '--' in result: - result = result.replace('--', '-') - return result.strip('-') + while '__' in result: + result = result.replace('__', '_') + result = result.strip('_') + # Common case of "Foreign band name - English song title" + if restricted and result.startswith('-_'): + result = result[2:] + if not result: + result = '_' + return result def orderedSet(iterable): """ Remove all duplicates from the input iterable """ @@ -227,7 +234,7 @@ def orderedSet(iterable): def unescapeHTML(s): """ - @param s a string (of type unicode) + @param s a string """ assert type(s) == type(u'') @@ -236,7 +243,7 @@ def unescapeHTML(s): def encodeFilename(s): """ - @param s The name of the file (of type unicode) + @param s The name of the file """ assert type(s) == type(u'') @@ -308,7 +315,7 @@ class ContentTooShortError(Exception): class Trouble(Exception): """Trouble helper exception - + This is an exception to be handled with FileDownloader.trouble """