X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=d431aa6b726c59b40a9c48b3e3144f1d7a2c8db0;hb=12557339453e25dbb18dfc51dc1e88ca5325d8e9;hp=fc64a418670f87306234407e8ead182ae44f1131;hpb=8ee239e9216f12eae38deb079090b677086e9de1;p=youtube-dl diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index fc64a4186..d431aa6b7 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -160,8 +160,6 @@ if sys.version_info >= (2, 7): def find_xpath_attr(node, xpath, key, val=None): """ Find the xpath xpath[@key=val] """ assert re.match(r'^[a-zA-Z_-]+$', key) - if val: - assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val) expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val)) return node.find(expr) else: @@ -909,9 +907,9 @@ def unified_strdate(date_str, day_first=True): '%d %b %Y', '%B %d %Y', '%b %d %Y', - '%b %dst %Y %I:%M%p', - '%b %dnd %Y %I:%M%p', - '%b %dth %Y %I:%M%p', + '%b %dst %Y %I:%M', + '%b %dnd %Y %I:%M', + '%b %dth %Y %I:%M', '%Y %m %d', '%Y-%m-%d', '%Y/%m/%d', @@ -1590,9 +1588,12 @@ class PagedList(object): class OnDemandPagedList(PagedList): - def __init__(self, pagefunc, pagesize): + def __init__(self, pagefunc, pagesize, use_cache=False): self._pagefunc = pagefunc self._pagesize = pagesize + self._use_cache = use_cache + if use_cache: + self._cache = {} def getslice(self, start=0, end=None): res = [] @@ -1602,7 +1603,13 @@ class OnDemandPagedList(PagedList): if start >= nextfirstid: continue - page_results = list(self._pagefunc(pagenum)) + page_results = None + if self._use_cache: + page_results = self._cache.get(pagenum) + if page_results is None: + page_results = list(self._pagefunc(pagenum)) + if self._use_cache: + self._cache[pagenum] = page_results startv = ( start % self._pagesize @@ -1732,6 +1739,14 @@ def urlencode_postdata(*args, **kargs): return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii') +def update_url_query(url, query): + parsed_url = compat_urlparse.urlparse(url) + qs = compat_parse_qs(parsed_url.query) + qs.update(query) + return compat_urlparse.urlunparse(parsed_url._replace( + query=compat_urllib_parse.urlencode(qs, True))) + + def encode_dict(d, encoding='utf-8'): def encode(v): return v.encode(encoding) if isinstance(v, compat_basestring) else v @@ -1866,7 +1881,11 @@ def mimetype2ext(mt): return { '3gpp': '3gp', + 'smptett+xml': 'tt', + 'srt': 'srt', + 'ttaf+xml': 'dfxp', 'ttml+xml': 'ttml', + 'vtt': 'vtt', 'x-flv': 'flv', 'x-mp4-fragmented': 'mp4', 'x-ms-wmv': 'wmv', @@ -2626,3 +2645,41 @@ def ohdave_rsa_encrypt(data, exponent, modulus): payload = int(binascii.hexlify(data[::-1]), 16) encrypted = pow(payload, exponent, modulus) return '%x' % encrypted + + +def encode_base_n(num, n, table=None): + FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' + if not table: + table = FULL_TABLE[:n] + + if n > len(table): + raise ValueError('base %d exceeds table length %d' % (n, len(table))) + + if num == 0: + return table[0] + + ret = '' + while num: + ret = table[num % n] + ret + num = num // n + return ret + + +def decode_packed_codes(code): + mobj = re.search( + r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)", + code) + obfucasted_code, base, count, symbols = mobj.groups() + base = int(base) + count = int(count) + symbols = symbols.split('|') + symbol_table = {} + + while count: + count -= 1 + base_n_count = encode_base_n(count, base) + symbol_table[base_n_count] = symbols[count] or base_n_count + + return re.sub( + r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)], + obfucasted_code)