X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=d7ae5a90a8480f6c825303039b357ff53e7f8d56;hb=5a000b45b339f6516f2a5a3bdfd2869713e8438a;hp=24778807813ec0b6303b6daf849c047b950b98e3;hpb=d74bebd50263a2f744595b9a54825914bc07657b;p=youtube-dl diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 247788078..d7ae5a90a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -673,6 +673,8 @@ class ExtractorError(Exception): expected = True if video_id is not None: msg = video_id + ': ' + msg + if cause: + msg += u' (caused by %r)' % cause if not expected: msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.' super(ExtractorError, self).__init__(msg) @@ -799,6 +801,12 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): del req.headers['User-agent'] req.headers['User-agent'] = req.headers['Youtubedl-user-agent'] del req.headers['Youtubedl-user-agent'] + + if sys.version_info < (2, 7) and '#' in req.get_full_url(): + # Python 2.6 is brain-dead when it comes to fragments + req._Request__original = req._Request__original.partition('#')[0] + req._Request__r_type = req._Request__r_type.partition('#')[0] + return req def http_response(self, req, resp): @@ -884,7 +892,9 @@ def unified_strdate(date_str): '%d/%m/%Y', '%d/%m/%y', '%Y/%m/%d %H:%M:%S', + '%d/%m/%Y %H:%M:%S', '%Y-%m-%d %H:%M:%S', + '%Y-%m-%d %H:%M:%S.%f', '%d.%m.%Y %H:%M', '%d.%m.%Y %H.%M', '%Y-%m-%dT%H:%M:%SZ', @@ -1384,14 +1394,16 @@ def check_executable(exe, args=[]): class PagedList(object): - def __init__(self, pagefunc, pagesize): - self._pagefunc = pagefunc - self._pagesize = pagesize - def __len__(self): # This is only useful for tests return len(self.getslice()) + +class OnDemandPagedList(PagedList): + def __init__(self, pagefunc, pagesize): + self._pagefunc = pagefunc + self._pagesize = pagesize + def getslice(self, start=0, end=None): res = [] for pagenum in itertools.count(start // self._pagesize): @@ -1430,6 +1442,35 @@ class PagedList(object): return res +class InAdvancePagedList(PagedList): + def __init__(self, pagefunc, pagecount, pagesize): + self._pagefunc = pagefunc + self._pagecount = pagecount + self._pagesize = pagesize + + def getslice(self, start=0, end=None): + res = [] + start_page = start // self._pagesize + end_page = ( + self._pagecount if end is None else (end // self._pagesize + 1)) + skip_elems = start - start_page * self._pagesize + only_more = None if end is None else end - start + for pagenum in range(start_page, end_page): + page = list(self._pagefunc(pagenum)) + if skip_elems: + page = page[skip_elems:] + skip_elems = None + if only_more is not None: + if len(page) < only_more: + only_more -= len(page) + else: + page = page[:only_more] + res.extend(page) + break + res.extend(page) + return res + + def uppercase_escape(s): unicode_escape = codecs.getdecoder('unicode_escape') return re.sub( @@ -1437,6 +1478,24 @@ def uppercase_escape(s): lambda m: unicode_escape(m.group(0))[0], s) + +def escape_rfc3986(s): + """Escape non-ASCII characters as suggested by RFC 3986""" + if sys.version_info < (3, 0) and isinstance(s, unicode): + s = s.encode('utf-8') + return compat_urllib_parse.quote(s, "%/;:@&=+$,!~*'()?#[]") + + +def escape_url(url): + """Escape URL as suggested by RFC 3986""" + url_parsed = compat_urllib_parse_urlparse(url) + return url_parsed._replace( + path=escape_rfc3986(url_parsed.path), + params=escape_rfc3986(url_parsed.params), + query=escape_rfc3986(url_parsed.query), + fragment=escape_rfc3986(url_parsed.fragment) + ).geturl() + try: struct.pack(u'!I', 0) except TypeError: @@ -1516,33 +1575,37 @@ US_RATINGS = { } +def parse_age_limit(s): + if s is None: + return None + m = re.match(r'^(?P\d{1,2})\+?$', s) + return int(m.group('age')) if m else US_RATINGS.get(s, None) + + def strip_jsonp(code): return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?\s*$', r'\1', code) def js_to_json(code): def fix_kv(m): - key = m.group(2) - if key.startswith("'"): - assert key.endswith("'") - assert '"' not in key - key = '"%s"' % key[1:-1] - elif not key.startswith('"'): - key = '"%s"' % key - - value = m.group(4) - if value.startswith("'"): - assert value.endswith("'") - assert '"' not in value - value = '"%s"' % value[1:-1] - - return m.group(1) + key + m.group(3) + value + v = m.group(0) + if v in ('true', 'false', 'null'): + return v + if v.startswith('"'): + return v + if v.startswith("'"): + v = v[1:-1] + v = re.sub(r"\\\\|\\'|\"", lambda m: { + '\\\\': '\\\\', + "\\'": "'", + '"': '\\"', + }[m.group(0)], v) + return '"%s"' % v res = re.sub(r'''(?x) - ([{,]\s*) - ("[^"]*"|\'[^\']*\'|[a-z0-9A-Z]+) - (:\s*) - ([0-9.]+|true|false|"[^"]*"|\'[^\']*\'|\[|\{) + "(?:[^"\\]*(?:\\\\|\\")?)*"| + '(?:[^'\\]*(?:\\\\|\\')?)*'| + [a-zA-Z_][a-zA-Z_0-9]* ''', fix_kv, code) res = re.sub(r',(\s*\])', lambda m: m.group(1), res) return res @@ -1571,3 +1634,13 @@ except AttributeError: if ret: raise subprocess.CalledProcessError(ret, p.args, output=output) return output + + +def limit_length(s, length): + """ Add ellipses to overly long strings """ + if s is None: + return None + ELLIPSES = '...' + if len(s) > length: + return s[:length - len(ELLIPSES)] + ELLIPSES + return s