- def assign(val):
- local_vars[ass_m.group('out')] = val
- return val
- expr = ass_m.group('expr')
- elif stmt.startswith(u'return '):
- assign = lambda v: v
- expr = stmt[len(u'return '):]
- else:
- raise ExtractorError(
- u'Cannot determine left side of statement in %r' % stmt)
-
- v = interpret_expression(expr, local_vars, allow_recursion)
- return assign(v)
-
- def interpret_expression(expr, local_vars, allow_recursion):
- if expr.isdigit():
- return int(expr)
-
- if expr.isalpha():
- return local_vars[expr]
-
- m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
- if m:
- member = m.group('member')
- val = local_vars[m.group('in')]
- if member == 'split("")':
- return list(val)
- if member == 'join("")':
- return u''.join(val)
- if member == 'length':
- return len(val)
- if member == 'reverse()':
- return val[::-1]
- slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
- if slice_m:
- idx = interpret_expression(
- slice_m.group('idx'), local_vars, allow_recursion-1)
- return val[idx:]
-
- m = re.match(
- r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
- if m:
- val = local_vars[m.group('in')]
- idx = interpret_expression(m.group('idx'), local_vars,
- allow_recursion-1)
- return val[idx]
-
- m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
- if m:
- a = interpret_expression(m.group('a'),
- local_vars, allow_recursion)
- b = interpret_expression(m.group('b'),
- local_vars, allow_recursion)
- return a % b
-
- m = re.match(
- r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
- if m:
- fname = m.group('func')
- if fname not in functions:
- functions[fname] = extract_function(fname)
- argvals = [int(v) if v.isdigit() else local_vars[v]
- for v in m.group('args').split(',')]
- return functions[fname](argvals)
- raise ExtractorError(u'Unsupported JS expression %r' % expr)
-
- def extract_function(funcname):
- func_m = re.search(
- r'function ' + re.escape(funcname) +
- r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
- jscode)
- argnames = func_m.group('args').split(',')
-
- def resf(args):
- local_vars = dict(zip(argnames, args))
- for stmt in func_m.group('code').split(';'):
- res = interpret_statement(stmt, local_vars)
- return res
- return resf
-
- initial_function = extract_function(funcname)
- return lambda s: initial_function([s])
-
- def _parse_sig_swf(self, file_contents):
- if file_contents[1:3] != b'WS':
- raise ExtractorError(
- u'Not an SWF file; header is %r' % file_contents[:3])
- if file_contents[:1] == b'C':
- content = zlib.decompress(file_contents[8:])
- else:
- raise NotImplementedError(u'Unsupported compression format %r' %
- file_contents[:1])
-
- def extract_tags(content):
- pos = 0
- while pos < len(content):
- header16 = struct.unpack('<H', content[pos:pos+2])[0]
- pos += 2
- tag_code = header16 >> 6
- tag_len = header16 & 0x3f
- if tag_len == 0x3f:
- tag_len = struct.unpack('<I', content[pos:pos+4])[0]
- pos += 4
- assert pos+tag_len <= len(content)
- yield (tag_code, content[pos:pos+tag_len])
- pos += tag_len
-
- code_tag = next(tag
- for tag_code, tag in extract_tags(content)
- if tag_code == 82)
- p = code_tag.index(b'\0', 4) + 1
-
- # Parse ABC (AVM2 ByteCode)
- def read_int(data=None, pos=None):
- if hasattr(data, 'read'):
- assert pos is None
-
- res = 0
- shift = 0
- for _ in range(5):
- buf = data.read(1)
- assert len(buf) == 1
- b = struct.unpack('<B', buf)[0]
- res = res | ((b & 0x7f) << shift)
- if b & 0x80 == 0:
- break
- shift += 7
- return res
-
- if data is None:
- data = code_tag
- if pos is None:
- pos = p
- res = 0
- shift = 0
- for _ in range(5):
- b = struct.unpack('<B', data[pos:pos+1])[0]
- pos += 1
- res = res | ((b & 0x7f) << shift)
- if b & 0x80 == 0:
- break
- shift += 7
- return (res, pos)
- assert read_int(b'\x00', 0) == (0, 1)
- assert read_int(b'\x10', 0) == (16, 1)
- assert read_int(b'\x34', 0) == (0x34, 1)
- assert read_int(b'\xb4\x12', 0) == (0x12 * 0x80 + 0x34, 2)
- assert read_int(b'\xff\xff\xff\x00', 0) == (0x1fffff, 4)
-
- def u30(*args, **kwargs):
- res = read_int(*args, **kwargs)
- if isinstance(res, tuple):
- assert res[0] & 0xf0000000 == 0
- else:
- assert res & 0xf0000000 == 0
- return res
- u32 = read_int
-
- def s32(data=None, pos=None):
- v, pos = read_int(data, pos)
- if v & 0x80000000 != 0:
- v = - ((v ^ 0xffffffff) + 1)
- return (v, pos)
- assert s32(b'\xff\xff\xff\xff\x0f', 0) == (-1, 5)
-
- def string():
- slen, p = u30()
- return (code_tag[p:p+slen].decode('utf-8'), p + slen)
-
- def read_byte(data=None, pos=None):
- if data is None:
- data = code_tag
- if pos is None:
- pos = p
- res = struct.unpack('<B', data[pos:pos+1])[0]
- return (res, pos + 1)
-
- # minor_version + major_version
- p += 2 + 2
-
- # Constant pool
- int_count, p = u30()
- for _c in range(1, int_count):
- _, p = s32()
- uint_count, p = u30()
- for _c in range(1, uint_count):
- _, p = u32()
- double_count, p = u30()
- p += (double_count-1) * 8
- string_count, p = u30()
- constant_strings = [u'']
- for _c in range(1, string_count):
- s, p = string()
- constant_strings.append(s)
- namespace_count, p = u30()
- for _c in range(1, namespace_count):
- p += 1 # kind
- _, p = u30() # name
- ns_set_count, p = u30()
- for _c in range(1, ns_set_count):
- count, p = u30()
- for _c2 in range(count):
- _, p = u30()
- multiname_count, p = u30()
- MULTINAME_SIZES = {
- 0x07: 2, # QName
- 0x0d: 2, # QNameA
- 0x0f: 1, # RTQName
- 0x10: 1, # RTQNameA
- 0x11: 0, # RTQNameL
- 0x12: 0, # RTQNameLA
- 0x09: 2, # Multiname
- 0x0e: 2, # MultinameA
- 0x1b: 1, # MultinameL
- 0x1c: 1, # MultinameLA
- }
- multinames = [u'']
- for _c in range(1, multiname_count):
- kind, p = u30()
- assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
- if kind == 0x07:
- namespace_idx, p = u30()
- name_idx, p = u30()
- multinames.append(constant_strings[name_idx])