X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=ef44b99a55115c6ff0da334d4bb9e62477ba388a;hb=ea69624992c711b40781fa573a71fabbbf488ae1;hp=16bf49408c5228202cd43d9f7265d77c1c5122dc;hpb=2ae2ffda5eae9c64d40d2fec839ba5deb07717f2;p=youtube-dl diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 16bf49408..ef44b99a5 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -22,7 +22,6 @@ import locale import math import operator import os -import pipes import platform import random import re @@ -36,9 +35,11 @@ import xml.etree.ElementTree import zlib from .compat import ( + compat_HTMLParseError, compat_HTMLParser, compat_basestring, compat_chr, + compat_ctypes_WINFUNCTYPE, compat_etree_fromstring, compat_expanduser, compat_html_entities, @@ -159,6 +160,8 @@ DATE_FORMATS = ( '%Y-%m-%dT%H:%M', '%b %d %Y at %H:%M', '%b %d %Y at %H:%M:%S', + '%B %d %Y at %H:%M', + '%B %d %Y at %H:%M:%S', ) DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS) @@ -365,9 +368,9 @@ def get_elements_by_attribute(attribute, value, html, escape_value=True): retlist = [] for m in re.finditer(r'''(?xs) <([a-zA-Z0-9:._-]+) - (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*? + (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*? \s+%s=['"]?%s['"]? - (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*? + (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*? \s*> (?P.*?) @@ -409,8 +412,12 @@ def extract_attributes(html_element): but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5. """ parser = HTMLAttributeParser() - parser.feed(html_element) - parser.close() + try: + parser.feed(html_element) + parser.close() + # Older Python may throw HTMLParseError in case of malformed HTML + except compat_HTMLParseError: + pass return parser.attrs @@ -592,7 +599,7 @@ def unescapeHTML(s): assert type(s) == compat_str return re.sub( - r'&([^;]+;)', lambda m: _htmlentity_transform(m.group(1)), s) + r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s) def get_subprocess_encoding(): @@ -859,8 +866,8 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs): # expected HTTP responses to meet HTTP/1.0 or later (see also # https://github.com/rg3/youtube-dl/issues/6727) if sys.version_info < (3, 0): - kwargs[b'strict'] = True - hc = http_class(*args, **kwargs) + kwargs['strict'] = True + hc = http_class(*args, **compat_kwargs(kwargs)) source_address = ydl_handler._params.get('source_address') if source_address is not None: sa = (source_address, 0) @@ -1324,24 +1331,24 @@ def _windows_write_string(s, out): if fileno not in WIN_OUTPUT_IDS: return False - GetStdHandle = ctypes.WINFUNCTYPE( + GetStdHandle = compat_ctypes_WINFUNCTYPE( ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)( - (b'GetStdHandle', ctypes.windll.kernel32)) + ('GetStdHandle', ctypes.windll.kernel32)) h = GetStdHandle(WIN_OUTPUT_IDS[fileno]) - WriteConsoleW = ctypes.WINFUNCTYPE( + WriteConsoleW = compat_ctypes_WINFUNCTYPE( ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR, ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD), - ctypes.wintypes.LPVOID)((b'WriteConsoleW', ctypes.windll.kernel32)) + ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32)) written = ctypes.wintypes.DWORD(0) - GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b'GetFileType', ctypes.windll.kernel32)) + GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32)) FILE_TYPE_CHAR = 0x0002 FILE_TYPE_REMOTE = 0x8000 - GetConsoleMode = ctypes.WINFUNCTYPE( + GetConsoleMode = compat_ctypes_WINFUNCTYPE( ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.POINTER(ctypes.wintypes.DWORD))( - (b'GetConsoleMode', ctypes.windll.kernel32)) + ('GetConsoleMode', ctypes.windll.kernel32)) INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value def not_a_console(handle): @@ -1530,7 +1537,7 @@ def shell_quote(args): if isinstance(a, bytes): # We may get a filename encoded with 'encodeFilename' a = a.decode(encoding) - quoted_args.append(pipes.quote(a)) + quoted_args.append(compat_shlex_quote(a)) return ' '.join(quoted_args) @@ -1811,6 +1818,10 @@ def float_or_none(v, scale=1, invscale=1, default=None): return default +def bool_or_none(v, default=None): + return v if isinstance(v, bool) else default + + def strip_or_none(v): return None if v is None else v.strip() @@ -1827,10 +1838,20 @@ def parse_duration(s): days, hours, mins, secs, ms = m.groups() else: m = re.match( - r'''(?ix)(?:P?T)? + r'''(?ix)(?:P? + (?: + [0-9]+\s*y(?:ears?)?\s* + )? + (?: + [0-9]+\s*m(?:onths?)?\s* + )? + (?: + [0-9]+\s*w(?:eeks?)?\s* + )? (?: (?P[0-9]+)\s*d(?:ays?)?\s* )? + T)? (?: (?P[0-9]+)\s*h(?:ours?)?\s* )? @@ -1925,7 +1946,7 @@ class PagedList(object): class OnDemandPagedList(PagedList): - def __init__(self, pagefunc, pagesize, use_cache=False): + def __init__(self, pagefunc, pagesize, use_cache=True): self._pagefunc = pagefunc self._pagesize = pagesize self._use_cache = use_cache @@ -2246,7 +2267,7 @@ def js_to_json(code): "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| {comment}|,(?={skip}[\]}}])| - [a-zA-Z_][.a-zA-Z_0-9]*| + (?:(?