X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fswfinterp.py;h=8ccb64c9d4a2b232c87450fe9648751aeb555d5b;hb=01b4b745749bb92b4a56b4201d699740cbf450ab;hp=1cd2921386f3a86cc12a95545478311e571a268a;hpb=5425626790a46f9b5bdecf4e33bb254c4c2423ea;p=youtube-dl diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index 1cd292138..8ccb64c9d 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -5,11 +5,28 @@ import io import struct import zlib -from .utils import ExtractorError - - -def _extract_tags(content): - pos = 0 +from .utils import ( + compat_str, + ExtractorError, +) + + +def _extract_tags(file_contents): + if file_contents[1:3] != b'WS': + raise ExtractorError( + 'Not an SWF file; header is %r' % file_contents[:3]) + if file_contents[:1] == b'C': + content = zlib.decompress(file_contents[8:]) + else: + raise NotImplementedError( + 'Unsupported compression format %r' % + file_contents[:1]) + + # Determine number of bits in framesize rectangle + framesize_nbits = struct.unpack('!B', content[:1])[0] >> 3 + framesize_len = (5 + 4 * framesize_nbits + 7) // 8 + + pos = framesize_len + 2 + 2 while pos < len(content): header16 = struct.unpack('= 0x80) else b'\x00' - return struct.unpack('!i', first_byte + bs) + last_byte = b'\xff' if (ord(bs[2:3]) >= 0x80) else b'\x00' + return struct.unpack('= 0 resb = reader.read(count) assert len(resb) == count return resb @@ -103,18 +141,8 @@ def _read_byte(reader): class SWFInterpreter(object): def __init__(self, file_contents): - if file_contents[1:3] != b'WS': - raise ExtractorError( - 'Not an SWF file; header is %r' % file_contents[:3]) - if file_contents[:1] == b'C': - content = zlib.decompress(file_contents[8:]) - else: - raise NotImplementedError( - 'Unsupported compression format %r' % - file_contents[:1]) - code_tag = next(tag - for tag_code, tag in _extract_tags(content) + for tag_code, tag in _extract_tags(file_contents) if tag_code == 82) p = code_tag.index(b'\0', 4) + 1 code_reader = io.BytesIO(code_tag[p:]) @@ -139,12 +167,12 @@ class SWFInterpreter(object): for _c in range(1, uint_count): u32() double_count = u30() - read_bytes((double_count - 1) * 8) + read_bytes(max(0, (double_count - 1)) * 8) string_count = u30() - constant_strings = [''] + self.constant_strings = [''] for _c in range(1, string_count): s = _read_string(code_reader) - constant_strings.append(s) + self.constant_strings.append(s) namespace_count = u30() for _c in range(1, namespace_count): read_bytes(1) # kind @@ -174,7 +202,7 @@ class SWFInterpreter(object): if kind == 0x07: u30() # namespace_idx name_idx = u30() - self.multinames.append(constant_strings[name_idx]) + self.multinames.append(self.constant_strings[name_idx]) else: self.multinames.append('[MULTINAME kind: %d]' % kind) for _c2 in range(MULTINAME_SIZES[kind]): @@ -253,7 +281,11 @@ class SWFInterpreter(object): classes = [] for class_id in range(class_count): name_idx = u30() - classes.append(_AVMClass(name_idx, self.multinames[name_idx])) + + cname = self.multinames[name_idx] + avm_class = _AVMClass(name_idx, cname) + classes.append(avm_class) + u30() # super_name idx flags = read_byte() if flags & 0x08 != 0: # Protected namespace is present @@ -264,7 +296,9 @@ class SWFInterpreter(object): u30() # iinit trait_count = u30() for _c2 in range(trait_count): - parse_traits_info() + trait_methods = parse_traits_info() + avm_class.register_methods(trait_methods) + assert len(classes) == class_count self._classes_by_name = dict((c.name, c) for c in classes) @@ -273,10 +307,7 @@ class SWFInterpreter(object): trait_count = u30() for _c2 in range(trait_count): trait_methods = parse_traits_info() - avm_class.method_names.update(trait_methods.items()) - avm_class.method_idxs.update(dict( - (idx, name) - for name, idx in trait_methods.items())) + avm_class.register_methods(trait_methods) # Scripts script_count = u30() @@ -321,12 +352,14 @@ class SWFInterpreter(object): raise ExtractorError('Class %r not found' % class_name) def extract_function(self, avm_class, func_name): + print('Extracting %s.%s' % (avm_class.name, func_name)) if func_name in avm_class.method_pyfunctions: return avm_class.method_pyfunctions[func_name] if func_name in self._classes_by_name: return self._classes_by_name[func_name].make_object() if func_name not in avm_class.methods: - raise ExtractorError('Cannot find function %r' % func_name) + raise ExtractorError('Cannot find function %s.%s' % ( + avm_class.name, func_name)) m = avm_class.methods[func_name] def resfunc(args): @@ -336,8 +369,10 @@ class SWFInterpreter(object): u30 = lambda: _u30(coder) print('Invoking %s.%s(%r)' % (avm_class.name, func_name, tuple(args))) - registers = ['(this)'] + list(args) + [None] * m.local_count + registers = [avm_class.variables] + list(args) + [None] * m.local_count stack = [] + scopes = collections.deque([ + self._classes_by_name, avm_class.variables]) while True: opcode = _read_byte(coder) print('opcode: %r, stack(%d): %r' % (opcode, len(stack), stack)) @@ -346,16 +381,23 @@ class SWFInterpreter(object): value = stack.pop() if value: coder.seek(coder.tell() + offset) + elif opcode == 18: # iffalse + offset = s24() + value = stack.pop() + if not value: + coder.seek(coder.tell() + offset) elif opcode == 36: # pushbyte v = _read_byte(coder) stack.append(v) + elif opcode == 42: # dup + value = stack[-1] + stack.append(value) elif opcode == 44: # pushstring idx = u30() - stack.append(constant_strings[idx]) + stack.append(self.constant_strings[idx]) elif opcode == 48: # pushscope - # We don't implement the scope register, so we'll just - # ignore the popped value new_scope = stack.pop() + scopes.append(new_scope) elif opcode == 70: # callproperty index = u30() mname = self.multinames[index] @@ -363,33 +405,38 @@ class SWFInterpreter(object): args = list(reversed( [stack.pop() for _ in range(arg_count)])) obj = stack.pop() - if mname == 'split': - assert len(args) == 1 - assert isinstance(args[0], compat_str) - assert isinstance(obj, compat_str) - if args[0] == '': - res = list(obj) - else: - res = obj.split(args[0]) - stack.append(res) - elif mname == 'slice': - assert len(args) == 1 - assert isinstance(args[0], int) - assert isinstance(obj, list) - res = obj[args[0]:] - stack.append(res) - elif mname == 'join': - assert len(args) == 1 - assert isinstance(args[0], compat_str) - assert isinstance(obj, list) - res = args[0].join(obj) + + if isinstance(obj, _AVMClass_Object): + func = self.extract_function(obj.avm_class, mname) + res = func(args) stack.append(res) - elif mname in avm_class.method_pyfunctions: - stack.append(avm_class.method_pyfunctions[mname](args)) - else: - raise NotImplementedError( - 'Unsupported property %r on %r' - % (mname, obj)) + continue + elif isinstance(obj, compat_str): + if mname == 'split': + assert len(args) == 1 + assert isinstance(args[0], compat_str) + if args[0] == '': + res = list(obj) + else: + res = obj.split(args[0]) + stack.append(res) + continue + elif isinstance(obj, list): + if mname == 'slice': + assert len(args) == 1 + assert isinstance(args[0], int) + res = obj[args[0]:] + stack.append(res) + continue + elif mname == 'join': + assert len(args) == 1 + assert isinstance(args[0], compat_str) + res = args[0].join(obj) + stack.append(res) + continue + raise NotImplementedError( + 'Unsupported property %r on %r' + % (mname, obj)) elif opcode == 72: # returnvalue res = stack.pop() return res @@ -401,11 +448,12 @@ class SWFInterpreter(object): obj = stack.pop() mname = self.multinames[index] + assert isinstance(obj, _AVMClass) construct_method = self.extract_function( - obj.avm_class, mname) + obj, mname) # We do not actually call the constructor for now; # we just pretend it does nothing - stack.append(obj) + stack.append(obj.make_object()) elif opcode == 79: # callpropvoid index = u30() mname = self.multinames[index] @@ -430,17 +478,35 @@ class SWFInterpreter(object): elif opcode == 93: # findpropstrict index = u30() mname = self.multinames[index] - res = self.extract_function(avm_class, mname) - stack.append(res) + for s in reversed(scopes): + if mname in s: + res = s + break + else: + res = scopes[0] + stack.append(res[mname]) elif opcode == 94: # findproperty index = u30() mname = self.multinames[index] - res = avm_class.variables.get(mname) + for s in reversed(scopes): + if mname in s: + res = s + break + else: + res = avm_class.variables stack.append(res) elif opcode == 96: # getlex index = u30() mname = self.multinames[index] - res = avm_class.variables.get(mname, None) + for s in reversed(scopes): + if mname in s: + scope = s + break + else: + scope = avm_class.variables + # I cannot find where static variables are initialized + # so let's just return None + res = scope.get(mname) stack.append(res) elif opcode == 97: # setproperty index = u30() @@ -468,10 +534,24 @@ class SWFInterpreter(object): obj = stack.pop() assert isinstance(obj, list) stack.append(obj[idx]) + elif opcode == 115: # convert_ + value = stack.pop() + intvalue = int(value) + stack.append(intvalue) elif opcode == 128: # coerce u30() elif opcode == 133: # coerce_s assert isinstance(stack[-1], (type(None), compat_str)) + elif opcode == 160: # add + value2 = stack.pop() + value1 = stack.pop() + res = value1 + value2 + stack.append(res) + elif opcode == 161: # subtract + value2 = stack.pop() + value1 = stack.pop() + res = value1 - value2 + stack.append(res) elif opcode == 164: # modulo value2 = stack.pop() value1 = stack.pop() @@ -490,6 +570,10 @@ class SWFInterpreter(object): stack.append(registers[2]) elif opcode == 211: # getlocal_3 stack.append(registers[3]) + elif opcode == 212: # setlocal_0 + registers[0] = stack.pop() + elif opcode == 213: # setlocal_1 + registers[1] = stack.pop() elif opcode == 214: # setlocal_2 registers[2] = stack.pop() elif opcode == 215: # setlocal_3