[youtube] Move swfinterp into its own file
authorPhilipp Hagemeister <phihag@phihag.de>
Fri, 18 Jul 2014 08:24:28 +0000 (10:24 +0200)
committerPhilipp Hagemeister <phihag@phihag.de>
Fri, 18 Jul 2014 08:24:28 +0000 (10:24 +0200)
test/test_youtube_signature.py
youtube_dl/extractor/youtube.py
youtube_dl/swfinterp.py [new file with mode: 0644]

index d95533959481df9b458f56c14d4857d3c5230252..e443e0be886fd49daaa63d091f20f5abb7f988d9 100644 (file)
@@ -45,6 +45,12 @@ _TESTS = [
         u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
         u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
     ),
+    (
+        u'http://s.ytimg.com/yts/swfbin/player-vfl5vIhK2/watch_as3.swf',
+        u'swf',
+        86,
+        u'23456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!?#$%&\'()*+,-./:;<=>"'
+    ),
 ]
 
 
@@ -57,12 +63,12 @@ class TestSignature(unittest.TestCase):
 
 
 def make_tfunc(url, stype, sig_input, expected_sig):
-    basename = url.rpartition('/')[2]
-    m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename)
-    assert m, '%r should follow URL format' % basename
+    m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3)?\.[a-z]+$', url)
+    assert m, '%r should follow URL format' % url
     test_id = m.group(1)
 
     def test_func(self):
+        basename = 'player-%s.%s' % (test_id, stype)
         fn = os.path.join(self.TESTDATA_DIR, basename)
 
         if not os.path.exists(fn):
index 16f4a047d6f736026ac63ec55fb76215c8638224..623056bd962782fddfc2ed3841e2c5e826a35507 100644 (file)
@@ -14,6 +14,7 @@ import zlib
 from .common import InfoExtractor, SearchInfoExtractor
 from .subtitles import SubtitlesInfoExtractor
 from ..jsinterp import JSInterpreter
+from ..swfinterp import SWFInterpreter
 from ..utils import (
     compat_chr,
     compat_parse_qs,
@@ -450,457 +451,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         return lambda s: initial_function([s])
 
     def _parse_sig_swf(self, file_contents):
-        if file_contents[1:3] != b'WS':
-            raise ExtractorError(
-                u'Not an SWF file; header is %r' % file_contents[:3])
-        if file_contents[:1] == b'C':
-            content = zlib.decompress(file_contents[8:])
-        else:
-            raise NotImplementedError(u'Unsupported compression format %r' %
-                                      file_contents[:1])
-
-        def extract_tags(content):
-            pos = 0
-            while pos < len(content):
-                header16 = struct.unpack('<H', content[pos:pos+2])[0]
-                pos += 2
-                tag_code = header16 >> 6
-                tag_len = header16 & 0x3f
-                if tag_len == 0x3f:
-                    tag_len = struct.unpack('<I', content[pos:pos+4])[0]
-                    pos += 4
-                assert pos+tag_len <= len(content)
-                yield (tag_code, content[pos:pos+tag_len])
-                pos += tag_len
-
-        code_tag = next(tag
-                        for tag_code, tag in extract_tags(content)
-                        if tag_code == 82)
-        p = code_tag.index(b'\0', 4) + 1
-        code_reader = io.BytesIO(code_tag[p:])
-
-        # Parse ABC (AVM2 ByteCode)
-        def read_int(reader=None):
-            if reader is None:
-                reader = code_reader
-            res = 0
-            shift = 0
-            for _ in range(5):
-                buf = reader.read(1)
-                assert len(buf) == 1
-                b = struct.unpack('<B', buf)[0]
-                res = res | ((b & 0x7f) << shift)
-                if b & 0x80 == 0:
-                    break
-                shift += 7
-            return res
-
-        def u30(reader=None):
-            res = read_int(reader)
-            assert res & 0xf0000000 == 0
-            return res
-        u32 = read_int
-
-        def s32(reader=None):
-            v = read_int(reader)
-            if v & 0x80000000 != 0:
-                v = - ((v ^ 0xffffffff) + 1)
-            return v
-
-        def s24(reader):
-            bs = reader.read(3)
-            assert len(bs) == 3
-            first_byte = b'\xff' if (ord(bs[0:1]) >= 0x80) else b'\x00'
-            return struct.unpack('!i', first_byte + bs)
-
-        def read_string(reader=None):
-            if reader is None:
-                reader = code_reader
-            slen = u30(reader)
-            resb = reader.read(slen)
-            assert len(resb) == slen
-            return resb.decode('utf-8')
-
-        def read_bytes(count, reader=None):
-            if reader is None:
-                reader = code_reader
-            resb = reader.read(count)
-            assert len(resb) == count
-            return resb
-
-        def read_byte(reader=None):
-            resb = read_bytes(1, reader=reader)
-            res = struct.unpack('<B', resb)[0]
-            return res
-
-        # minor_version + major_version
-        read_bytes(2 + 2)
-
-        # Constant pool
-        int_count = u30()
-        for _c in range(1, int_count):
-            s32()
-        uint_count = u30()
-        for _c in range(1, uint_count):
-            u32()
-        double_count = u30()
-        read_bytes((double_count-1) * 8)
-        string_count = u30()
-        constant_strings = [u'']
-        for _c in range(1, string_count):
-            s = read_string()
-            constant_strings.append(s)
-        namespace_count = u30()
-        for _c in range(1, namespace_count):
-            read_bytes(1)  # kind
-            u30()  # name
-        ns_set_count = u30()
-        for _c in range(1, ns_set_count):
-            count = u30()
-            for _c2 in range(count):
-                u30()
-        multiname_count = u30()
-        MULTINAME_SIZES = {
-            0x07: 2,  # QName
-            0x0d: 2,  # QNameA
-            0x0f: 1,  # RTQName
-            0x10: 1,  # RTQNameA
-            0x11: 0,  # RTQNameL
-            0x12: 0,  # RTQNameLA
-            0x09: 2,  # Multiname
-            0x0e: 2,  # MultinameA
-            0x1b: 1,  # MultinameL
-            0x1c: 1,  # MultinameLA
-        }
-        multinames = [u'']
-        for _c in range(1, multiname_count):
-            kind = u30()
-            assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
-            if kind == 0x07:
-                u30()  # namespace_idx
-                name_idx = u30()
-                multinames.append(constant_strings[name_idx])
-            else:
-                multinames.append('[MULTINAME kind: %d]' % kind)
-                for _c2 in range(MULTINAME_SIZES[kind]):
-                    u30()
-
-        # Methods
-        method_count = u30()
-        MethodInfo = collections.namedtuple(
-            'MethodInfo',
-            ['NEED_ARGUMENTS', 'NEED_REST'])
-        method_infos = []
-        for method_id in range(method_count):
-            param_count = u30()
-            u30()  # return type
-            for _ in range(param_count):
-                u30()  # param type
-            u30()  # name index (always 0 for youtube)
-            flags = read_byte()
-            if flags & 0x08 != 0:
-                # Options present
-                option_count = u30()
-                for c in range(option_count):
-                    u30()  # val
-                    read_bytes(1)  # kind
-            if flags & 0x80 != 0:
-                # Param names present
-                for _ in range(param_count):
-                    u30()  # param name
-            mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
-            method_infos.append(mi)
-
-        # Metadata
-        metadata_count = u30()
-        for _c in range(metadata_count):
-            u30()  # name
-            item_count = u30()
-            for _c2 in range(item_count):
-                u30()  # key
-                u30()  # value
-
-        def parse_traits_info():
-            trait_name_idx = u30()
-            kind_full = read_byte()
-            kind = kind_full & 0x0f
-            attrs = kind_full >> 4
-            methods = {}
-            if kind in [0x00, 0x06]:  # Slot or Const
-                u30()  # Slot id
-                u30()  # type_name_idx
-                vindex = u30()
-                if vindex != 0:
-                    read_byte()  # vkind
-            elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
-                u30()  # disp_id
-                method_idx = u30()
-                methods[multinames[trait_name_idx]] = method_idx
-            elif kind == 0x04:  # Class
-                u30()  # slot_id
-                u30()  # classi
-            elif kind == 0x05:  # Function
-                u30()  # slot_id
-                function_idx = u30()
-                methods[function_idx] = multinames[trait_name_idx]
-            else:
-                raise ExtractorError(u'Unsupported trait kind %d' % kind)
-
-            if attrs & 0x4 != 0:  # Metadata present
-                metadata_count = u30()
-                for _c3 in range(metadata_count):
-                    u30()  # metadata index
-
-            return methods
-
-        class AVMClass(object):
-            def __init__(self, name_idx):
-                self.name_idx = name_idx
-                self.method_names = {}
-                self.method_idxs = {}
-                self.methods = {}
-                self.method_pyfunctions = {}
-                self.variables = {}
-
-            @property
-            def name(self):
-                return multinames[self.name_idx]
-
-        # Classes
-        class_count = u30()
-        classes = []
-        for class_id in range(class_count):
-            name_idx = u30()
-            classes.append(AVMClass(name_idx))
-            u30()  # super_name idx
-            flags = read_byte()
-            if flags & 0x08 != 0:  # Protected namespace is present
-                u30()  # protected_ns_idx
-            intrf_count = u30()
-            for _c2 in range(intrf_count):
-                u30()
-            u30()  # iinit
-            trait_count = u30()
-            for _c2 in range(trait_count):
-                parse_traits_info()
-        assert len(classes) == class_count
-
+        swfi = SWFInterpreter(file_contents)
         TARGET_CLASSNAME = u'SignatureDecipher'
-        searched_class = next(
-            c for c in classes if c.name == TARGET_CLASSNAME)
-        if searched_class is None:
-            raise ExtractorError(u'Target class %r not found' %
-                                 TARGET_CLASSNAME)
-
-        for avm_class in classes:
-            u30()  # cinit
-            trait_count = u30()
-            for _c2 in range(trait_count):
-                trait_methods = parse_traits_info()
-                avm_class.method_names.update(trait_methods.items())
-                avm_class.method_idxs.update(dict(
-                    (idx, name)
-                    for name, idx in trait_methods.items()))
-
-        # Scripts
-        script_count = u30()
-        for _c in range(script_count):
-            u30()  # init
-            trait_count = u30()
-            for _c2 in range(trait_count):
-                parse_traits_info()
-
-        # Method bodies
-        method_body_count = u30()
-        Method = collections.namedtuple('Method', ['code', 'local_count'])
-        for _c in range(method_body_count):
-            method_idx = u30()
-            u30()  # max_stack
-            local_count = u30()
-            u30()  # init_scope_depth
-            u30()  # max_scope_depth
-            code_length = u30()
-            code = read_bytes(code_length)
-            for avm_class in classes:
-                if method_idx in avm_class.method_idxs:
-                    m = Method(code, local_count)
-                    avm_class.methods[avm_class.method_idxs[method_idx]] = m
-            exception_count = u30()
-            for _c2 in range(exception_count):
-                u30()  # from
-                u30()  # to
-                u30()  # target
-                u30()  # exc_type
-                u30()  # var_name
-            trait_count = u30()
-            for _c2 in range(trait_count):
-                parse_traits_info()
-
-        assert p + code_reader.tell() == len(code_tag)
-
-        def extract_function(avm_class, func_name):
-            if func_name in avm_class.method_pyfunctions:
-                return avm_class.method_pyfunctions[func_name]
-            if func_name not in avm_class.methods:
-                raise ExtractorError(u'Cannot find function %r' % func_name)
-            m = avm_class.methods[func_name]
-
-            def resfunc(args):
-                registers = ['(this)'] + list(args) + [None] * m.local_count
-                stack = []
-                coder = io.BytesIO(m.code)
-                while True:
-                    opcode = struct.unpack('!B', coder.read(1))[0]
-                    if opcode == 17:  # iftrue
-                        offset = s24(coder)
-                        value = stack.pop()
-                        if value:
-                            coder.seek(coder.tell() + offset)
-                    elif opcode == 36:  # pushbyte
-                        v = struct.unpack('!B', coder.read(1))[0]
-                        stack.append(v)
-                    elif opcode == 44:  # pushstring
-                        idx = u30(coder)
-                        stack.append(constant_strings[idx])
-                    elif opcode == 48:  # pushscope
-                        # We don't implement the scope register, so we'll just
-                        # ignore the popped value
-                        stack.pop()
-                    elif opcode == 70:  # callproperty
-                        index = u30(coder)
-                        mname = multinames[index]
-                        arg_count = u30(coder)
-                        args = list(reversed(
-                            [stack.pop() for _ in range(arg_count)]))
-                        obj = stack.pop()
-                        if mname == u'split':
-                            assert len(args) == 1
-                            assert isinstance(args[0], compat_str)
-                            assert isinstance(obj, compat_str)
-                            if args[0] == u'':
-                                res = list(obj)
-                            else:
-                                res = obj.split(args[0])
-                            stack.append(res)
-                        elif mname == u'slice':
-                            assert len(args) == 1
-                            assert isinstance(args[0], int)
-                            assert isinstance(obj, list)
-                            res = obj[args[0]:]
-                            stack.append(res)
-                        elif mname == u'join':
-                            assert len(args) == 1
-                            assert isinstance(args[0], compat_str)
-                            assert isinstance(obj, list)
-                            res = args[0].join(obj)
-                            stack.append(res)
-                        elif mname in avm_class.method_pyfunctions:
-                            stack.append(avm_class.method_pyfunctions[mname](args))
-                        else:
-                            raise NotImplementedError(
-                                u'Unsupported property %r on %r'
-                                % (mname, obj))
-                    elif opcode == 72:  # returnvalue
-                        res = stack.pop()
-                        return res
-                    elif opcode == 79:  # callpropvoid
-                        index = u30(coder)
-                        mname = multinames[index]
-                        arg_count = u30(coder)
-                        args = list(reversed(
-                            [stack.pop() for _ in range(arg_count)]))
-                        obj = stack.pop()
-                        if mname == u'reverse':
-                            assert isinstance(obj, list)
-                            obj.reverse()
-                        else:
-                            raise NotImplementedError(
-                                u'Unsupported (void) property %r on %r'
-                                % (mname, obj))
-                    elif opcode == 86:  # newarray
-                        arg_count = u30(coder)
-                        arr = []
-                        for i in range(arg_count):
-                            arr.append(stack.pop())
-                        arr = arr[::-1]
-                        stack.append(arr)
-                    elif opcode == 93:  # findpropstrict
-                        index = u30(coder)
-                        mname = multinames[index]
-                        res = extract_function(avm_class, mname)
-                        stack.append(res)
-                    elif opcode == 94:  # findproperty
-                        index = u30(coder)
-                        mname = multinames[index]
-                        res = avm_class.variables.get(mname)
-                        stack.append(res)
-                    elif opcode == 96:  # getlex
-                        index = u30(coder)
-                        mname = multinames[index]
-                        res = avm_class.variables.get(mname)
-                        stack.append(res)
-                    elif opcode == 97:  # setproperty
-                        index = u30(coder)
-                        value = stack.pop()
-                        idx = stack.pop()
-                        obj = stack.pop()
-                        assert isinstance(obj, list)
-                        assert isinstance(idx, int)
-                        obj[idx] = value
-                    elif opcode == 98:  # getlocal
-                        index = u30(coder)
-                        stack.append(registers[index])
-                    elif opcode == 99:  # setlocal
-                        index = u30(coder)
-                        value = stack.pop()
-                        registers[index] = value
-                    elif opcode == 102:  # getproperty
-                        index = u30(coder)
-                        pname = multinames[index]
-                        if pname == u'length':
-                            obj = stack.pop()
-                            assert isinstance(obj, list)
-                            stack.append(len(obj))
-                        else:  # Assume attribute access
-                            idx = stack.pop()
-                            assert isinstance(idx, int)
-                            obj = stack.pop()
-                            assert isinstance(obj, list)
-                            stack.append(obj[idx])
-                    elif opcode == 128:  # coerce
-                        u30(coder)
-                    elif opcode == 133:  # coerce_s
-                        assert isinstance(stack[-1], (type(None), compat_str))
-                    elif opcode == 164:  # modulo
-                        value2 = stack.pop()
-                        value1 = stack.pop()
-                        res = value1 % value2
-                        stack.append(res)
-                    elif opcode == 175:  # greaterequals
-                        value2 = stack.pop()
-                        value1 = stack.pop()
-                        result = value1 >= value2
-                        stack.append(result)
-                    elif opcode == 208:  # getlocal_0
-                        stack.append(registers[0])
-                    elif opcode == 209:  # getlocal_1
-                        stack.append(registers[1])
-                    elif opcode == 210:  # getlocal_2
-                        stack.append(registers[2])
-                    elif opcode == 211:  # getlocal_3
-                        stack.append(registers[3])
-                    elif opcode == 214:  # setlocal_2
-                        registers[2] = stack.pop()
-                    elif opcode == 215:  # setlocal_3
-                        registers[3] = stack.pop()
-                    else:
-                        raise NotImplementedError(
-                            u'Unsupported opcode %d' % opcode)
-
-            avm_class.method_pyfunctions[func_name] = resfunc
-            return resfunc
-
-        initial_function = extract_function(searched_class, u'decipher')
+        searched_class = swfi.extract_class(TARGET_CLASSNAME)
+        initial_function = swfi.extract_function(searched_class, u'decipher')
         return lambda s: initial_function([s])
 
     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py
new file mode 100644 (file)
index 0000000..1cd2921
--- /dev/null
@@ -0,0 +1,503 @@
+from __future__ import unicode_literals
+
+import collections
+import io
+import struct
+import zlib
+
+from .utils import ExtractorError
+
+
+def _extract_tags(content):
+    pos = 0
+    while pos < len(content):
+        header16 = struct.unpack('<H', content[pos:pos + 2])[0]
+        pos += 2
+        tag_code = header16 >> 6
+        tag_len = header16 & 0x3f
+        if tag_len == 0x3f:
+            tag_len = struct.unpack('<I', content[pos:pos + 4])[0]
+            pos += 4
+        assert pos + tag_len <= len(content)
+        yield (tag_code, content[pos:pos + tag_len])
+        pos += tag_len
+
+
+class _AVMClass_Object(object):
+    def __init__(self, avm_class):
+        self.avm_class = avm_class
+
+    def __repr__(self):
+        return '%s#%x' % (self.avm_class.name, id(self))
+
+
+class _AVMClass(object):
+    def __init__(self, name_idx, name):
+        self.name_idx = name_idx
+        self.name = name
+        self.method_names = {}
+        self.method_idxs = {}
+        self.methods = {}
+        self.method_pyfunctions = {}
+        self.variables = {}
+
+    def make_object(self):
+        return _AVMClass_Object(self)
+
+
+def _read_int(reader):
+    res = 0
+    shift = 0
+    for _ in range(5):
+        buf = reader.read(1)
+        assert len(buf) == 1
+        b = struct.unpack('<B', buf)[0]
+        res = res | ((b & 0x7f) << shift)
+        if b & 0x80 == 0:
+            break
+        shift += 7
+    return res
+
+
+def _u30(reader):
+    res = _read_int(reader)
+    assert res & 0xf0000000 == 0
+    return res
+u32 = _read_int
+
+
+def _s32(reader):
+    v = _read_int(reader)
+    if v & 0x80000000 != 0:
+        v = - ((v ^ 0xffffffff) + 1)
+    return v
+
+
+def _s24(reader):
+    bs = reader.read(3)
+    assert len(bs) == 3
+    first_byte = b'\xff' if (ord(bs[0:1]) >= 0x80) else b'\x00'
+    return struct.unpack('!i', first_byte + bs)
+
+
+def _read_string(reader):
+    slen = _u30(reader)
+    resb = reader.read(slen)
+    assert len(resb) == slen
+    return resb.decode('utf-8')
+
+
+def _read_bytes(count, reader):
+    if reader is None:
+        reader = code_reader
+    resb = reader.read(count)
+    assert len(resb) == count
+    return resb
+
+
+def _read_byte(reader):
+    resb = _read_bytes(1, reader=reader)
+    res = struct.unpack('<B', resb)[0]
+    return res
+
+
+class SWFInterpreter(object):
+    def __init__(self, file_contents):
+        if file_contents[1:3] != b'WS':
+            raise ExtractorError(
+                'Not an SWF file; header is %r' % file_contents[:3])
+        if file_contents[:1] == b'C':
+            content = zlib.decompress(file_contents[8:])
+        else:
+            raise NotImplementedError(
+                'Unsupported compression format %r' %
+                file_contents[:1])
+
+        code_tag = next(tag
+                        for tag_code, tag in _extract_tags(content)
+                        if tag_code == 82)
+        p = code_tag.index(b'\0', 4) + 1
+        code_reader = io.BytesIO(code_tag[p:])
+
+        # Parse ABC (AVM2 ByteCode)
+
+        # Define a couple convenience methods
+        u30 = lambda *args: _u30(*args, reader=code_reader)
+        s32 = lambda *args: _s32(*args, reader=code_reader)
+        u32 = lambda *args: _u32(*args, reader=code_reader)
+        read_bytes = lambda *args: _read_bytes(*args, reader=code_reader)
+        read_byte = lambda *args: _read_byte(*args, reader=code_reader)
+
+        # minor_version + major_version
+        read_bytes(2 + 2)
+
+        # Constant pool
+        int_count = u30()
+        for _c in range(1, int_count):
+            s32()
+        uint_count = u30()
+        for _c in range(1, uint_count):
+            u32()
+        double_count = u30()
+        read_bytes((double_count - 1) * 8)
+        string_count = u30()
+        constant_strings = ['']
+        for _c in range(1, string_count):
+            s = _read_string(code_reader)
+            constant_strings.append(s)
+        namespace_count = u30()
+        for _c in range(1, namespace_count):
+            read_bytes(1)  # kind
+            u30()  # name
+        ns_set_count = u30()
+        for _c in range(1, ns_set_count):
+            count = u30()
+            for _c2 in range(count):
+                u30()
+        multiname_count = u30()
+        MULTINAME_SIZES = {
+            0x07: 2,  # QName
+            0x0d: 2,  # QNameA
+            0x0f: 1,  # RTQName
+            0x10: 1,  # RTQNameA
+            0x11: 0,  # RTQNameL
+            0x12: 0,  # RTQNameLA
+            0x09: 2,  # Multiname
+            0x0e: 2,  # MultinameA
+            0x1b: 1,  # MultinameL
+            0x1c: 1,  # MultinameLA
+        }
+        self.multinames = ['']
+        for _c in range(1, multiname_count):
+            kind = u30()
+            assert kind in MULTINAME_SIZES, 'Invalid multiname kind %r' % kind
+            if kind == 0x07:
+                u30()  # namespace_idx
+                name_idx = u30()
+                self.multinames.append(constant_strings[name_idx])
+            else:
+                self.multinames.append('[MULTINAME kind: %d]' % kind)
+                for _c2 in range(MULTINAME_SIZES[kind]):
+                    u30()
+
+        # Methods
+        method_count = u30()
+        MethodInfo = collections.namedtuple(
+            'MethodInfo',
+            ['NEED_ARGUMENTS', 'NEED_REST'])
+        method_infos = []
+        for method_id in range(method_count):
+            param_count = u30()
+            u30()  # return type
+            for _ in range(param_count):
+                u30()  # param type
+            u30()  # name index (always 0 for youtube)
+            flags = read_byte()
+            if flags & 0x08 != 0:
+                # Options present
+                option_count = u30()
+                for c in range(option_count):
+                    u30()  # val
+                    read_bytes(1)  # kind
+            if flags & 0x80 != 0:
+                # Param names present
+                for _ in range(param_count):
+                    u30()  # param name
+            mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
+            method_infos.append(mi)
+
+        # Metadata
+        metadata_count = u30()
+        for _c in range(metadata_count):
+            u30()  # name
+            item_count = u30()
+            for _c2 in range(item_count):
+                u30()  # key
+                u30()  # value
+
+        def parse_traits_info():
+            trait_name_idx = u30()
+            kind_full = read_byte()
+            kind = kind_full & 0x0f
+            attrs = kind_full >> 4
+            methods = {}
+            if kind in [0x00, 0x06]:  # Slot or Const
+                u30()  # Slot id
+                u30()  # type_name_idx
+                vindex = u30()
+                if vindex != 0:
+                    read_byte()  # vkind
+            elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
+                u30()  # disp_id
+                method_idx = u30()
+                methods[self.multinames[trait_name_idx]] = method_idx
+            elif kind == 0x04:  # Class
+                u30()  # slot_id
+                u30()  # classi
+            elif kind == 0x05:  # Function
+                u30()  # slot_id
+                function_idx = u30()
+                methods[function_idx] = self.multinames[trait_name_idx]
+            else:
+                raise ExtractorError('Unsupported trait kind %d' % kind)
+
+            if attrs & 0x4 != 0:  # Metadata present
+                metadata_count = u30()
+                for _c3 in range(metadata_count):
+                    u30()  # metadata index
+
+            return methods
+
+        # Classes
+        class_count = u30()
+        classes = []
+        for class_id in range(class_count):
+            name_idx = u30()
+            classes.append(_AVMClass(name_idx, self.multinames[name_idx]))
+            u30()  # super_name idx
+            flags = read_byte()
+            if flags & 0x08 != 0:  # Protected namespace is present
+                u30()  # protected_ns_idx
+            intrf_count = u30()
+            for _c2 in range(intrf_count):
+                u30()
+            u30()  # iinit
+            trait_count = u30()
+            for _c2 in range(trait_count):
+                parse_traits_info()
+        assert len(classes) == class_count
+        self._classes_by_name = dict((c.name, c) for c in classes)
+
+        for avm_class in classes:
+            u30()  # cinit
+            trait_count = u30()
+            for _c2 in range(trait_count):
+                trait_methods = parse_traits_info()
+                avm_class.method_names.update(trait_methods.items())
+                avm_class.method_idxs.update(dict(
+                    (idx, name)
+                    for name, idx in trait_methods.items()))
+
+        # Scripts
+        script_count = u30()
+        for _c in range(script_count):
+            u30()  # init
+            trait_count = u30()
+            for _c2 in range(trait_count):
+                parse_traits_info()
+
+        # Method bodies
+        method_body_count = u30()
+        Method = collections.namedtuple('Method', ['code', 'local_count'])
+        for _c in range(method_body_count):
+            method_idx = u30()
+            u30()  # max_stack
+            local_count = u30()
+            u30()  # init_scope_depth
+            u30()  # max_scope_depth
+            code_length = u30()
+            code = read_bytes(code_length)
+            for avm_class in classes:
+                if method_idx in avm_class.method_idxs:
+                    m = Method(code, local_count)
+                    avm_class.methods[avm_class.method_idxs[method_idx]] = m
+            exception_count = u30()
+            for _c2 in range(exception_count):
+                u30()  # from
+                u30()  # to
+                u30()  # target
+                u30()  # exc_type
+                u30()  # var_name
+            trait_count = u30()
+            for _c2 in range(trait_count):
+                parse_traits_info()
+
+        assert p + code_reader.tell() == len(code_tag)
+
+    def extract_class(self, class_name):
+        try:
+            return self._classes_by_name[class_name]
+        except KeyError:
+            raise ExtractorError('Class %r not found' % class_name)
+
+    def extract_function(self, avm_class, func_name):
+        if func_name in avm_class.method_pyfunctions:
+            return avm_class.method_pyfunctions[func_name]
+        if func_name in self._classes_by_name:
+            return self._classes_by_name[func_name].make_object()
+        if func_name not in avm_class.methods:
+            raise ExtractorError('Cannot find function %r' % func_name)
+        m = avm_class.methods[func_name]
+
+        def resfunc(args):
+            # Helper functions
+            coder = io.BytesIO(m.code)
+            s24 = lambda: _s24(coder)
+            u30 = lambda: _u30(coder)
+
+            print('Invoking %s.%s(%r)' % (avm_class.name, func_name, tuple(args)))
+            registers = ['(this)'] + list(args) + [None] * m.local_count
+            stack = []
+            while True:
+                opcode = _read_byte(coder)
+                print('opcode: %r, stack(%d): %r' % (opcode, len(stack), stack))
+                if opcode == 17:  # iftrue
+                    offset = s24()
+                    value = stack.pop()
+                    if value:
+                        coder.seek(coder.tell() + offset)
+                elif opcode == 36:  # pushbyte
+                    v = _read_byte(coder)
+                    stack.append(v)
+                elif opcode == 44:  # pushstring
+                    idx = u30()
+                    stack.append(constant_strings[idx])
+                elif opcode == 48:  # pushscope
+                    # We don't implement the scope register, so we'll just
+                    # ignore the popped value
+                    new_scope = stack.pop()
+                elif opcode == 70:  # callproperty
+                    index = u30()
+                    mname = self.multinames[index]
+                    arg_count = u30()
+                    args = list(reversed(
+                        [stack.pop() for _ in range(arg_count)]))
+                    obj = stack.pop()
+                    if mname == 'split':
+                        assert len(args) == 1
+                        assert isinstance(args[0], compat_str)
+                        assert isinstance(obj, compat_str)
+                        if args[0] == '':
+                            res = list(obj)
+                        else:
+                            res = obj.split(args[0])
+                        stack.append(res)
+                    elif mname == 'slice':
+                        assert len(args) == 1
+                        assert isinstance(args[0], int)
+                        assert isinstance(obj, list)
+                        res = obj[args[0]:]
+                        stack.append(res)
+                    elif mname == 'join':
+                        assert len(args) == 1
+                        assert isinstance(args[0], compat_str)
+                        assert isinstance(obj, list)
+                        res = args[0].join(obj)
+                        stack.append(res)
+                    elif mname in avm_class.method_pyfunctions:
+                        stack.append(avm_class.method_pyfunctions[mname](args))
+                    else:
+                        raise NotImplementedError(
+                            'Unsupported property %r on %r'
+                            % (mname, obj))
+                elif opcode == 72:  # returnvalue
+                    res = stack.pop()
+                    return res
+                elif opcode == 74:  # constructproperty
+                    index = u30()
+                    arg_count = u30()
+                    args = list(reversed(
+                        [stack.pop() for _ in range(arg_count)]))
+                    obj = stack.pop()
+
+                    mname = self.multinames[index]
+                    construct_method = self.extract_function(
+                        obj.avm_class, mname)
+                    # We do not actually call the constructor for now;
+                    # we just pretend it does nothing
+                    stack.append(obj)
+                elif opcode == 79:  # callpropvoid
+                    index = u30()
+                    mname = self.multinames[index]
+                    arg_count = u30()
+                    args = list(reversed(
+                        [stack.pop() for _ in range(arg_count)]))
+                    obj = stack.pop()
+                    if mname == 'reverse':
+                        assert isinstance(obj, list)
+                        obj.reverse()
+                    else:
+                        raise NotImplementedError(
+                            'Unsupported (void) property %r on %r'
+                            % (mname, obj))
+                elif opcode == 86:  # newarray
+                    arg_count = u30()
+                    arr = []
+                    for i in range(arg_count):
+                        arr.append(stack.pop())
+                    arr = arr[::-1]
+                    stack.append(arr)
+                elif opcode == 93:  # findpropstrict
+                    index = u30()
+                    mname = self.multinames[index]
+                    res = self.extract_function(avm_class, mname)
+                    stack.append(res)
+                elif opcode == 94:  # findproperty
+                    index = u30()
+                    mname = self.multinames[index]
+                    res = avm_class.variables.get(mname)
+                    stack.append(res)
+                elif opcode == 96:  # getlex
+                    index = u30()
+                    mname = self.multinames[index]
+                    res = avm_class.variables.get(mname, None)
+                    stack.append(res)
+                elif opcode == 97:  # setproperty
+                    index = u30()
+                    value = stack.pop()
+                    idx = self.multinames[index]
+                    obj = stack.pop()
+                    obj[idx] = value
+                elif opcode == 98:  # getlocal
+                    index = u30()
+                    stack.append(registers[index])
+                elif opcode == 99:  # setlocal
+                    index = u30()
+                    value = stack.pop()
+                    registers[index] = value
+                elif opcode == 102:  # getproperty
+                    index = u30()
+                    pname = self.multinames[index]
+                    if pname == 'length':
+                        obj = stack.pop()
+                        assert isinstance(obj, list)
+                        stack.append(len(obj))
+                    else:  # Assume attribute access
+                        idx = stack.pop()
+                        assert isinstance(idx, int)
+                        obj = stack.pop()
+                        assert isinstance(obj, list)
+                        stack.append(obj[idx])
+                elif opcode == 128:  # coerce
+                    u30()
+                elif opcode == 133:  # coerce_s
+                    assert isinstance(stack[-1], (type(None), compat_str))
+                elif opcode == 164:  # modulo
+                    value2 = stack.pop()
+                    value1 = stack.pop()
+                    res = value1 % value2
+                    stack.append(res)
+                elif opcode == 175:  # greaterequals
+                    value2 = stack.pop()
+                    value1 = stack.pop()
+                    result = value1 >= value2
+                    stack.append(result)
+                elif opcode == 208:  # getlocal_0
+                    stack.append(registers[0])
+                elif opcode == 209:  # getlocal_1
+                    stack.append(registers[1])
+                elif opcode == 210:  # getlocal_2
+                    stack.append(registers[2])
+                elif opcode == 211:  # getlocal_3
+                    stack.append(registers[3])
+                elif opcode == 214:  # setlocal_2
+                    registers[2] = stack.pop()
+                elif opcode == 215:  # setlocal_3
+                    registers[3] = stack.pop()
+                else:
+                    raise NotImplementedError(
+                        'Unsupported opcode %d' % opcode)
+
+        avm_class.method_pyfunctions[func_name] = resfunc
+        return resfunc
+