X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fswfinterp.py;h=812ee7e8c54f97b2c6ef1e033f0975f22e5a6b6e;hb=decf2ae400d52e98bcd073a69b24b3dbf3d38d53;hp=64a518fc61e420578a061d83231f87913f8d2960;hpb=e75c24e88907f329c57cf05d729dbf599349bb50;p=youtube-dl diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index 64a518fc6..812ee7e8c 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -5,7 +5,10 @@ import io import struct import zlib -from .utils import ExtractorError +from .utils import ( + compat_str, + ExtractorError, +) def _extract_tags(file_contents): @@ -39,22 +42,23 @@ def _extract_tags(file_contents): pos += tag_len -class _AVM_Object(object): - def __init__(self, value=None, name_hint=None): - self.value = value - self.name_hint = name_hint +class _AVMClass_Object(object): + def __init__(self, avm_class): + self.avm_class = avm_class def __repr__(self): - nh = '' if self.name_hint is None else (' %s' % self.name_hint) - return 'AVMObject%s(%r)' % (nh, self.value) + return '%s#%x' % (self.avm_class.name, id(self)) -class _AVMClass_Object(object): +class _ScopeDict(dict): def __init__(self, avm_class): + super(_ScopeDict, self).__init__() self.avm_class = avm_class def __repr__(self): - return '%s#%x' % (self.avm_class.name, id(self)) + return '%s__Scope(%s)' % ( + self.avm_class.name, + super(_ScopeDict, self).__repr__()) class _AVMClass(object): @@ -65,11 +69,29 @@ class _AVMClass(object): self.method_idxs = {} self.methods = {} self.method_pyfunctions = {} - self.variables = {} + + self.variables = _ScopeDict(self) def make_object(self): return _AVMClass_Object(self) + def __repr__(self): + return '_AVMClass(%s)' % (self.name) + + def register_methods(self, methods): + self.method_names.update(methods.items()) + self.method_idxs.update(dict( + (idx, name) + for name, idx in methods.items())) + + +class _Multiname(object): + def __init__(self, kind): + self.kind = kind + + def __repr__(self): + return '[MULTINAME kind: 0x%x]' % self.kind + def _read_int(reader): res = 0 @@ -156,10 +178,10 @@ class SWFInterpreter(object): double_count = u30() read_bytes(max(0, (double_count - 1)) * 8) string_count = u30() - constant_strings = [''] + self.constant_strings = [''] for _c in range(1, string_count): s = _read_string(code_reader) - constant_strings.append(s) + self.constant_strings.append(s) namespace_count = u30() for _c in range(1, namespace_count): read_bytes(1) # kind @@ -189,9 +211,9 @@ class SWFInterpreter(object): if kind == 0x07: u30() # namespace_idx name_idx = u30() - self.multinames.append(constant_strings[name_idx]) + self.multinames.append(self.constant_strings[name_idx]) else: - self.multinames.append('[MULTINAME kind: %d]' % kind) + self.multinames.append(_Multiname(kind)) for _c2 in range(MULTINAME_SIZES[kind]): u30() @@ -268,7 +290,11 @@ class SWFInterpreter(object): classes = [] for class_id in range(class_count): name_idx = u30() - classes.append(_AVMClass(name_idx, self.multinames[name_idx])) + + cname = self.multinames[name_idx] + avm_class = _AVMClass(name_idx, cname) + classes.append(avm_class) + u30() # super_name idx flags = read_byte() if flags & 0x08 != 0: # Protected namespace is present @@ -279,7 +305,9 @@ class SWFInterpreter(object): u30() # iinit trait_count = u30() for _c2 in range(trait_count): - parse_traits_info() + trait_methods = parse_traits_info() + avm_class.register_methods(trait_methods) + assert len(classes) == class_count self._classes_by_name = dict((c.name, c) for c in classes) @@ -288,10 +316,7 @@ class SWFInterpreter(object): trait_count = u30() for _c2 in range(trait_count): trait_methods = parse_traits_info() - avm_class.method_names.update(trait_methods.items()) - avm_class.method_idxs.update(dict( - (idx, name) - for name, idx in trait_methods.items())) + avm_class.register_methods(trait_methods) # Scripts script_count = u30() @@ -336,12 +361,14 @@ class SWFInterpreter(object): raise ExtractorError('Class %r not found' % class_name) def extract_function(self, avm_class, func_name): + print('Extracting %s.%s' % (avm_class.name, func_name)) if func_name in avm_class.method_pyfunctions: return avm_class.method_pyfunctions[func_name] if func_name in self._classes_by_name: return self._classes_by_name[func_name].make_object() if func_name not in avm_class.methods: - raise ExtractorError('Cannot find function %r' % func_name) + raise ExtractorError('Cannot find function %s.%s' % ( + avm_class.name, func_name)) m = avm_class.methods[func_name] def resfunc(args): @@ -353,7 +380,8 @@ class SWFInterpreter(object): print('Invoking %s.%s(%r)' % (avm_class.name, func_name, tuple(args))) registers = [avm_class.variables] + list(args) + [None] * m.local_count stack = [] - scopes = collections.deque([avm_class.variables]) + scopes = collections.deque([ + self._classes_by_name, avm_class.variables]) while True: opcode = _read_byte(coder) print('opcode: %r, stack(%d): %r' % (opcode, len(stack), stack)) @@ -375,10 +403,17 @@ class SWFInterpreter(object): stack.append(value) elif opcode == 44: # pushstring idx = u30() - stack.append(constant_strings[idx]) + stack.append(self.constant_strings[idx]) elif opcode == 48: # pushscope new_scope = stack.pop() scopes.append(new_scope) + elif opcode == 66: # construct + arg_count = u30() + args = list(reversed( + [stack.pop() for _ in range(arg_count)])) + obj = stack.pop() + res = obj.avm_class.make_object() + stack.append(res) elif opcode == 70: # callproperty index = u30() mname = self.multinames[index] @@ -386,33 +421,46 @@ class SWFInterpreter(object): args = list(reversed( [stack.pop() for _ in range(arg_count)])) obj = stack.pop() - if mname == 'split': - assert len(args) == 1 - assert isinstance(args[0], compat_str) - assert isinstance(obj, compat_str) - if args[0] == '': - res = list(obj) - else: - res = obj.split(args[0]) - stack.append(res) - elif mname == 'slice': - assert len(args) == 1 - assert isinstance(args[0], int) - assert isinstance(obj, list) - res = obj[args[0]:] + + if isinstance(obj, _AVMClass_Object): + func = self.extract_function(obj.avm_class, mname) + res = func(args) stack.append(res) - elif mname == 'join': - assert len(args) == 1 - assert isinstance(args[0], compat_str) - assert isinstance(obj, list) - res = args[0].join(obj) + continue + elif isinstance(obj, _ScopeDict): + if mname in obj.avm_class.method_names: + func = self.extract_function(obj.avm_class, mname) + res = func(args) + else: + res = obj[mname] stack.append(res) - elif mname in avm_class.method_pyfunctions: - stack.append(avm_class.method_pyfunctions[mname](args)) - else: - raise NotImplementedError( - 'Unsupported property %r on %r' - % (mname, obj)) + continue + elif isinstance(obj, compat_str): + if mname == 'split': + assert len(args) == 1 + assert isinstance(args[0], compat_str) + if args[0] == '': + res = list(obj) + else: + res = obj.split(args[0]) + stack.append(res) + continue + elif isinstance(obj, list): + if mname == 'slice': + assert len(args) == 1 + assert isinstance(args[0], int) + res = obj[args[0]:] + stack.append(res) + continue + elif mname == 'join': + assert len(args) == 1 + assert isinstance(args[0], compat_str) + res = args[0].join(obj) + stack.append(res) + continue + raise NotImplementedError( + 'Unsupported property %r on %r' + % (mname, obj)) elif opcode == 72: # returnvalue res = stack.pop() return res @@ -424,11 +472,12 @@ class SWFInterpreter(object): obj = stack.pop() mname = self.multinames[index] + assert isinstance(obj, _AVMClass) construct_method = self.extract_function( - obj.avm_class, mname) + obj, mname) # We do not actually call the constructor for now; # we just pretend it does nothing - stack.append(obj) + stack.append(obj.make_object()) elif opcode == 79: # callpropvoid index = u30() mname = self.multinames[index] @@ -450,7 +499,7 @@ class SWFInterpreter(object): arr.append(stack.pop()) arr = arr[::-1] stack.append(arr) - elif opcode == 94: # findproperty + elif opcode == 93: # findpropstrict index = u30() mname = self.multinames[index] for s in reversed(scopes): @@ -459,6 +508,16 @@ class SWFInterpreter(object): break else: res = scopes[0] + stack.append(res[mname]) + elif opcode == 94: # findproperty + index = u30() + mname = self.multinames[index] + for s in reversed(scopes): + if mname in s: + res = s + break + else: + res = avm_class.variables stack.append(res) elif opcode == 96: # getlex index = u30() @@ -468,7 +527,7 @@ class SWFInterpreter(object): scope = s break else: - scope = scopes[0] + scope = avm_class.variables # I cannot find where static variables are initialized # so let's just return None res = scope.get(mname) @@ -477,7 +536,10 @@ class SWFInterpreter(object): index = u30() value = stack.pop() idx = self.multinames[index] + if isinstance(idx, _Multiname): + idx = stack.pop() obj = stack.pop() + print('Setting %r.%r = %r' % (obj, idx, value)) obj[idx] = value elif opcode == 98: # getlocal index = u30() @@ -535,6 +597,10 @@ class SWFInterpreter(object): stack.append(registers[2]) elif opcode == 211: # getlocal_3 stack.append(registers[3]) + elif opcode == 212: # setlocal_0 + registers[0] = stack.pop() + elif opcode == 213: # setlocal_1 + registers[1] = stack.pop() elif opcode == 214: # setlocal_2 registers[2] = stack.pop() elif opcode == 215: # setlocal_3