X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=sidebyside;f=youtube_dl%2Futils.py;h=1f3bfef7d562e3fb0e63db16e644b86819eaaa5c;hb=9724e5d33661b6d6c84b6da64b78c0b96221ab24;hp=54fa17c388aa326cbbc83b1f2a7829a00deddb0a;hpb=08b38d54015706d90d05371455ba86b7887cabcc;p=youtube-dl diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 54fa17c38..1f3bfef7d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -54,7 +54,7 @@ from .compat import ( compiled_regex_type = type(re.compile('')) std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)', + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', @@ -62,6 +62,11 @@ std_headers = { } +ENGLISH_MONTH_NAMES = [ + 'January', 'February', 'March', 'April', 'May', 'June', + 'July', 'August', 'September', 'October', 'November', 'December'] + + def preferredencoding(): """Get preferred encoding. @@ -299,6 +304,8 @@ def sanitize_filename(s, restricted=False, is_id=False): # Common case of "Foreign band name - English song title" if restricted and result.startswith('-_'): result = result[2:] + if result.startswith('-'): + result = '_' + result[len('-'):] if not result: result = '_' return result @@ -895,8 +902,8 @@ def _windows_write_string(s, out): def not_a_console(handle): if handle == INVALID_HANDLE_VALUE or handle is None: return True - return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR - or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0) + return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or + GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0) if not_a_console(h): return False @@ -1185,11 +1192,18 @@ def get_term_width(): def month_by_name(name): """ Return the number of a month by (locale-independently) English name """ - ENGLISH_NAMES = [ - 'January', 'February', 'March', 'April', 'May', 'June', - 'July', 'August', 'September', 'October', 'November', 'December'] try: - return ENGLISH_NAMES.index(name) + 1 + return ENGLISH_MONTH_NAMES.index(name) + 1 + except ValueError: + return None + + +def month_by_abbreviation(abbrev): + """ Return the number of a month by (locale-independently) English + abbreviations """ + + try: + return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1 except ValueError: return None @@ -1276,6 +1290,7 @@ def parse_duration(s): (?P[0-9.]+)\s*(?:mins?|minutes?)\s*| (?P[0-9.]+)\s*(?:hours?)| + \s*(?P[0-9]+)\s*(?:[:h]|hours?)\s*(?P[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*| (?: (?: (?:(?P[0-9]+)\s*(?:[:d]|days?)\s*)? @@ -1294,10 +1309,14 @@ def parse_duration(s): return float_or_none(m.group('only_hours'), invscale=60 * 60) if m.group('secs'): res += int(m.group('secs')) + if m.group('mins_reversed'): + res += int(m.group('mins_reversed')) * 60 if m.group('mins'): res += int(m.group('mins')) * 60 if m.group('hours'): res += int(m.group('hours')) * 60 * 60 + if m.group('hours_reversed'): + res += int(m.group('hours_reversed')) * 60 * 60 if m.group('days'): res += int(m.group('days')) * 24 * 60 * 60 if m.group('ms'): @@ -1548,8 +1567,8 @@ def js_to_json(code): return '"%s"' % v res = re.sub(r'''(?x) - "(?:[^"\\]*(?:\\\\|\\")?)*"| - '(?:[^'\\]*(?:\\\\|\\')?)*'| + "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"| + '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'| [a-zA-Z_][.a-zA-Z_0-9]* ''', fix_kv, code) res = re.sub(r',(\s*\])', lambda m: m.group(1), res) @@ -1604,6 +1623,15 @@ def args_to_str(args): return ' '.join(shlex_quote(a) for a in args) +def mimetype2ext(mt): + _, _, res = mt.rpartition('/') + + return { + 'x-ms-wmv': 'wmv', + 'x-mp4-fragmented': 'mp4', + }.get(res, res) + + def urlhandle_detect_ext(url_handle): try: url_handle.headers @@ -1619,7 +1647,7 @@ def urlhandle_detect_ext(url_handle): if e: return e - return getheader('Content-Type').split("/")[1] + return mimetype2ext(getheader('Content-Type')) def age_restricted(content_limit, age_limit):