1 from __future__ import unicode_literals
16 def _extract_tags(file_contents):
17 if file_contents[1:3] != b'WS':
19 'Not an SWF file; header is %r' % file_contents[:3])
20 if file_contents[:1] == b'C':
21 content = zlib.decompress(file_contents[8:])
23 raise NotImplementedError(
24 'Unsupported compression format %r' %
27 # Determine number of bits in framesize rectangle
28 framesize_nbits = compat_struct_unpack('!B', content[:1])[0] >> 3
29 framesize_len = (5 + 4 * framesize_nbits + 7) // 8
31 pos = framesize_len + 2 + 2
32 while pos < len(content):
33 header16 = compat_struct_unpack('<H', content[pos:pos + 2])[0]
35 tag_code = header16 >> 6
36 tag_len = header16 & 0x3f
38 tag_len = compat_struct_unpack('<I', content[pos:pos + 4])[0]
40 assert pos + tag_len <= len(content), \
41 ('Tag %d ends at %d+%d - that\'s longer than the file (%d)'
42 % (tag_code, pos, tag_len, len(content)))
43 yield (tag_code, content[pos:pos + tag_len])
47 class _AVMClass_Object(object):
48 def __init__(self, avm_class):
49 self.avm_class = avm_class
52 return '%s#%x' % (self.avm_class.name, id(self))
55 class _ScopeDict(dict):
56 def __init__(self, avm_class):
57 super(_ScopeDict, self).__init__()
58 self.avm_class = avm_class
61 return '%s__Scope(%s)' % (
63 super(_ScopeDict, self).__repr__())
66 class _AVMClass(object):
67 def __init__(self, name_idx, name, static_properties=None):
68 self.name_idx = name_idx
70 self.method_names = {}
73 self.method_pyfunctions = {}
74 self.static_properties = static_properties if static_properties else {}
76 self.variables = _ScopeDict(self)
79 def make_object(self):
80 return _AVMClass_Object(self)
83 return '_AVMClass(%s)' % (self.name)
85 def register_methods(self, methods):
86 self.method_names.update(methods.items())
87 self.method_idxs.update(dict(
89 for name, idx in methods.items()))
92 class _Multiname(object):
93 def __init__(self, kind):
97 return '[MULTINAME kind: 0x%x]' % self.kind
100 def _read_int(reader):
106 b = compat_struct_unpack('<B', buf)[0]
107 res = res | ((b & 0x7f) << shift)
115 res = _read_int(reader)
116 assert res & 0xf0000000 == 0
122 v = _read_int(reader)
123 if v & 0x80000000 != 0:
124 v = - ((v ^ 0xffffffff) + 1)
131 last_byte = b'\xff' if (ord(bs[2:3]) >= 0x80) else b'\x00'
132 return compat_struct_unpack('<i', bs + last_byte)[0]
135 def _read_string(reader):
137 resb = reader.read(slen)
138 assert len(resb) == slen
139 return resb.decode('utf-8')
142 def _read_bytes(count, reader):
144 resb = reader.read(count)
145 assert len(resb) == count
149 def _read_byte(reader):
150 resb = _read_bytes(1, reader=reader)
151 res = compat_struct_unpack('<B', resb)[0]
155 StringClass = _AVMClass('(no name idx)', 'String')
156 ByteArrayClass = _AVMClass('(no name idx)', 'ByteArray')
157 TimerClass = _AVMClass('(no name idx)', 'Timer')
158 TimerEventClass = _AVMClass('(no name idx)', 'TimerEvent', {'TIMER': 'timer'})
160 StringClass.name: StringClass,
161 ByteArrayClass.name: ByteArrayClass,
162 TimerClass.name: TimerClass,
163 TimerEventClass.name: TimerEventClass,
167 class _Undefined(object):
170 __nonzero__ = __bool__
179 undefined = _Undefined()
182 class SWFInterpreter(object):
183 def __init__(self, file_contents):
184 self._patched_functions = {
185 (TimerClass, 'addEventListener'): lambda params: undefined,
188 for tag_code, tag in _extract_tags(file_contents)
190 p = code_tag.index(b'\0', 4) + 1
191 code_reader = io.BytesIO(code_tag[p:])
193 # Parse ABC (AVM2 ByteCode)
195 # Define a couple convenience methods
196 u30 = lambda *args: _u30(*args, reader=code_reader)
197 s32 = lambda *args: _s32(*args, reader=code_reader)
198 u32 = lambda *args: _u32(*args, reader=code_reader)
199 read_bytes = lambda *args: _read_bytes(*args, reader=code_reader)
200 read_byte = lambda *args: _read_byte(*args, reader=code_reader)
202 # minor_version + major_version
207 self.constant_ints = [0]
208 for _c in range(1, int_count):
209 self.constant_ints.append(s32())
210 self.constant_uints = [0]
212 for _c in range(1, uint_count):
213 self.constant_uints.append(u32())
215 read_bytes(max(0, (double_count - 1)) * 8)
217 self.constant_strings = ['']
218 for _c in range(1, string_count):
219 s = _read_string(code_reader)
220 self.constant_strings.append(s)
221 namespace_count = u30()
222 for _c in range(1, namespace_count):
226 for _c in range(1, ns_set_count):
228 for _c2 in range(count):
230 multiname_count = u30()
239 0x0e: 2, # MultinameA
240 0x1b: 1, # MultinameL
241 0x1c: 1, # MultinameLA
243 self.multinames = ['']
244 for _c in range(1, multiname_count):
246 assert kind in MULTINAME_SIZES, 'Invalid multiname kind %r' % kind
248 u30() # namespace_idx
250 self.multinames.append(self.constant_strings[name_idx])
254 self.multinames.append(self.constant_strings[name_idx])
256 self.multinames.append(_Multiname(kind))
257 for _c2 in range(MULTINAME_SIZES[kind]):
262 MethodInfo = collections.namedtuple(
264 ['NEED_ARGUMENTS', 'NEED_REST'])
266 for method_id in range(method_count):
269 for _ in range(param_count):
271 u30() # name index (always 0 for youtube)
273 if flags & 0x08 != 0:
276 for c in range(option_count):
279 if flags & 0x80 != 0:
280 # Param names present
281 for _ in range(param_count):
283 mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
284 method_infos.append(mi)
287 metadata_count = u30()
288 for _c in range(metadata_count):
291 for _c2 in range(item_count):
295 def parse_traits_info():
296 trait_name_idx = u30()
297 kind_full = read_byte()
298 kind = kind_full & 0x0f
299 attrs = kind_full >> 4
302 if kind == 0x00: # Slot
304 u30() # type_name_idx
308 elif kind == 0x06: # Const
310 u30() # type_name_idx
315 if vkind == 0x03: # Constant_Int
316 value = self.constant_ints[vindex]
317 elif vkind == 0x04: # Constant_UInt
318 value = self.constant_uints[vindex]
320 return {}, None # Ignore silently for now
321 constants = {self.multinames[trait_name_idx]: value}
322 elif kind in (0x01, 0x02, 0x03): # Method / Getter / Setter
325 methods[self.multinames[trait_name_idx]] = method_idx
326 elif kind == 0x04: # Class
329 elif kind == 0x05: # Function
332 methods[function_idx] = self.multinames[trait_name_idx]
334 raise ExtractorError('Unsupported trait kind %d' % kind)
336 if attrs & 0x4 != 0: # Metadata present
337 metadata_count = u30()
338 for _c3 in range(metadata_count):
339 u30() # metadata index
341 return methods, constants
346 for class_id in range(class_count):
349 cname = self.multinames[name_idx]
350 avm_class = _AVMClass(name_idx, cname)
351 classes.append(avm_class)
353 u30() # super_name idx
355 if flags & 0x08 != 0: # Protected namespace is present
356 u30() # protected_ns_idx
358 for _c2 in range(intrf_count):
362 for _c2 in range(trait_count):
363 trait_methods, trait_constants = parse_traits_info()
364 avm_class.register_methods(trait_methods)
366 avm_class.constants.update(trait_constants)
368 assert len(classes) == class_count
369 self._classes_by_name = dict((c.name, c) for c in classes)
371 for avm_class in classes:
372 avm_class.cinit_idx = u30()
374 for _c2 in range(trait_count):
375 trait_methods, trait_constants = parse_traits_info()
376 avm_class.register_methods(trait_methods)
378 avm_class.constants.update(trait_constants)
382 for _c in range(script_count):
385 for _c2 in range(trait_count):
389 method_body_count = u30()
390 Method = collections.namedtuple('Method', ['code', 'local_count'])
391 self._all_methods = []
392 for _c in range(method_body_count):
396 u30() # init_scope_depth
397 u30() # max_scope_depth
399 code = read_bytes(code_length)
400 m = Method(code, local_count)
401 self._all_methods.append(m)
402 for avm_class in classes:
403 if method_idx in avm_class.method_idxs:
404 avm_class.methods[avm_class.method_idxs[method_idx]] = m
405 exception_count = u30()
406 for _c2 in range(exception_count):
413 for _c2 in range(trait_count):
416 assert p + code_reader.tell() == len(code_tag)
418 def patch_function(self, avm_class, func_name, f):
419 self._patched_functions[(avm_class, func_name)] = f
421 def extract_class(self, class_name, call_cinit=True):
423 res = self._classes_by_name[class_name]
425 raise ExtractorError('Class %r not found' % class_name)
427 if call_cinit and hasattr(res, 'cinit_idx'):
428 res.register_methods({'$cinit': res.cinit_idx})
429 res.methods['$cinit'] = self._all_methods[res.cinit_idx]
430 cinit = self.extract_function(res, '$cinit')
435 def extract_function(self, avm_class, func_name):
436 p = self._patched_functions.get((avm_class, func_name))
439 if func_name in avm_class.method_pyfunctions:
440 return avm_class.method_pyfunctions[func_name]
441 if func_name in self._classes_by_name:
442 return self._classes_by_name[func_name].make_object()
443 if func_name not in avm_class.methods:
444 raise ExtractorError('Cannot find function %s.%s' % (
445 avm_class.name, func_name))
446 m = avm_class.methods[func_name]
450 coder = io.BytesIO(m.code)
451 s24 = lambda: _s24(coder)
452 u30 = lambda: _u30(coder)
454 registers = [avm_class.variables] + list(args) + [None] * m.local_count
456 scopes = collections.deque([
457 self._classes_by_name, avm_class.constants, avm_class.variables])
459 opcode = _read_byte(coder)
460 if opcode == 9: # label
461 pass # Spec says: "Do nothing."
462 elif opcode == 16: # jump
464 coder.seek(coder.tell() + offset)
465 elif opcode == 17: # iftrue
469 coder.seek(coder.tell() + offset)
470 elif opcode == 18: # iffalse
474 coder.seek(coder.tell() + offset)
475 elif opcode == 19: # ifeq
480 coder.seek(coder.tell() + offset)
481 elif opcode == 20: # ifne
486 coder.seek(coder.tell() + offset)
487 elif opcode == 21: # iflt
492 coder.seek(coder.tell() + offset)
493 elif opcode == 32: # pushnull
495 elif opcode == 33: # pushundefined
496 stack.append(undefined)
497 elif opcode == 36: # pushbyte
498 v = _read_byte(coder)
500 elif opcode == 37: # pushshort
503 elif opcode == 38: # pushtrue
505 elif opcode == 39: # pushfalse
507 elif opcode == 40: # pushnan
508 stack.append(float('NaN'))
509 elif opcode == 42: # dup
512 elif opcode == 44: # pushstring
514 stack.append(self.constant_strings[idx])
515 elif opcode == 48: # pushscope
516 new_scope = stack.pop()
517 scopes.append(new_scope)
518 elif opcode == 66: # construct
520 args = list(reversed(
521 [stack.pop() for _ in range(arg_count)]))
523 res = obj.avm_class.make_object()
525 elif opcode == 70: # callproperty
527 mname = self.multinames[index]
529 args = list(reversed(
530 [stack.pop() for _ in range(arg_count)]))
533 if obj == StringClass:
534 if mname == 'String':
535 assert len(args) == 1
536 assert isinstance(args[0], (
537 int, compat_str, _Undefined))
538 if args[0] == undefined:
541 res = compat_str(args[0])
545 raise NotImplementedError(
546 'Function String.%s is not yet implemented'
548 elif isinstance(obj, _AVMClass_Object):
549 func = self.extract_function(obj.avm_class, mname)
553 elif isinstance(obj, _AVMClass):
554 func = self.extract_function(obj, mname)
558 elif isinstance(obj, _ScopeDict):
559 if mname in obj.avm_class.method_names:
560 func = self.extract_function(obj.avm_class, mname)
566 elif isinstance(obj, compat_str):
568 assert len(args) == 1
569 assert isinstance(args[0], compat_str)
573 res = obj.split(args[0])
576 elif mname == 'charCodeAt':
577 assert len(args) <= 1
578 idx = 0 if len(args) == 0 else args[0]
579 assert isinstance(idx, int)
583 elif isinstance(obj, list):
585 assert len(args) == 1
586 assert isinstance(args[0], int)
590 elif mname == 'join':
591 assert len(args) == 1
592 assert isinstance(args[0], compat_str)
593 res = args[0].join(obj)
596 raise NotImplementedError(
597 'Unsupported property %r on %r'
599 elif opcode == 71: # returnvoid
602 elif opcode == 72: # returnvalue
605 elif opcode == 73: # constructsuper
606 # Not yet implemented, just hope it works without it
608 args = list(reversed(
609 [stack.pop() for _ in range(arg_count)]))
611 elif opcode == 74: # constructproperty
614 args = list(reversed(
615 [stack.pop() for _ in range(arg_count)]))
618 mname = self.multinames[index]
619 assert isinstance(obj, _AVMClass)
621 # We do not actually call the constructor for now;
622 # we just pretend it does nothing
623 stack.append(obj.make_object())
624 elif opcode == 79: # callpropvoid
626 mname = self.multinames[index]
628 args = list(reversed(
629 [stack.pop() for _ in range(arg_count)]))
631 if isinstance(obj, _AVMClass_Object):
632 func = self.extract_function(obj.avm_class, mname)
634 assert res is undefined
636 if isinstance(obj, _ScopeDict):
637 assert mname in obj.avm_class.method_names
638 func = self.extract_function(obj.avm_class, mname)
640 assert res is undefined
642 if mname == 'reverse':
643 assert isinstance(obj, list)
646 raise NotImplementedError(
647 'Unsupported (void) property %r on %r'
649 elif opcode == 86: # newarray
652 for i in range(arg_count):
653 arr.append(stack.pop())
656 elif opcode == 93: # findpropstrict
658 mname = self.multinames[index]
659 for s in reversed(scopes):
665 if mname not in res and mname in _builtin_classes:
666 stack.append(_builtin_classes[mname])
668 stack.append(res[mname])
669 elif opcode == 94: # findproperty
671 mname = self.multinames[index]
672 for s in reversed(scopes):
677 res = avm_class.variables
679 elif opcode == 96: # getlex
681 mname = self.multinames[index]
682 for s in reversed(scopes):
687 scope = avm_class.variables
691 elif mname in _builtin_classes:
692 res = _builtin_classes[mname]
694 # Assume uninitialized
698 elif opcode == 97: # setproperty
701 idx = self.multinames[index]
702 if isinstance(idx, _Multiname):
706 elif opcode == 98: # getlocal
708 stack.append(registers[index])
709 elif opcode == 99: # setlocal
712 registers[index] = value
713 elif opcode == 102: # getproperty
715 pname = self.multinames[index]
716 if pname == 'length':
718 assert isinstance(obj, (compat_str, list))
719 stack.append(len(obj))
720 elif isinstance(pname, compat_str): # Member access
722 if isinstance(obj, _AVMClass):
723 res = obj.static_properties[pname]
727 assert isinstance(obj, (dict, _ScopeDict)),\
728 'Accessing member %r on %r' % (pname, obj)
729 res = obj.get(pname, undefined)
731 else: # Assume attribute access
733 assert isinstance(idx, int)
735 assert isinstance(obj, list)
736 stack.append(obj[idx])
737 elif opcode == 104: # initproperty
740 idx = self.multinames[index]
741 if isinstance(idx, _Multiname):
745 elif opcode == 115: # convert_
747 intvalue = int(value)
748 stack.append(intvalue)
749 elif opcode == 128: # coerce
751 elif opcode == 130: # coerce_a
753 # um, yes, it's any value
755 elif opcode == 133: # coerce_s
756 assert isinstance(stack[-1], (type(None), compat_str))
757 elif opcode == 147: # decrement
759 assert isinstance(value, int)
760 stack.append(value - 1)
761 elif opcode == 149: # typeof
764 _Undefined: 'undefined',
765 compat_str: 'String',
769 elif opcode == 160: # add
772 res = value1 + value2
774 elif opcode == 161: # subtract
777 res = value1 - value2
779 elif opcode == 162: # multiply
782 res = value1 * value2
784 elif opcode == 164: # modulo
787 res = value1 % value2
789 elif opcode == 168: # bitand
792 assert isinstance(value1, int)
793 assert isinstance(value2, int)
794 res = value1 & value2
796 elif opcode == 171: # equals
799 result = value1 == value2
801 elif opcode == 175: # greaterequals
804 result = value1 >= value2
806 elif opcode == 192: # increment_i
808 assert isinstance(value, int)
809 stack.append(value + 1)
810 elif opcode == 208: # getlocal_0
811 stack.append(registers[0])
812 elif opcode == 209: # getlocal_1
813 stack.append(registers[1])
814 elif opcode == 210: # getlocal_2
815 stack.append(registers[2])
816 elif opcode == 211: # getlocal_3
817 stack.append(registers[3])
818 elif opcode == 212: # setlocal_0
819 registers[0] = stack.pop()
820 elif opcode == 213: # setlocal_1
821 registers[1] = stack.pop()
822 elif opcode == 214: # setlocal_2
823 registers[2] = stack.pop()
824 elif opcode == 215: # setlocal_3
825 registers[3] = stack.pop()
827 raise NotImplementedError(
828 'Unsupported opcode %d' % opcode)
830 avm_class.method_pyfunctions[func_name] = resfunc