X-Git-Url: http://git.bitcoin.ninja/index.cgi?p=ldk-java;a=blobdiff_plain;f=java_strings.py;h=2fcf836d23b900bcf87bd3b7e5875c5ee0c52523;hp=f5df004d80d3dd709c54366165a712ceb461337e;hb=HEAD;hpb=40764e2a87c8cc70c5749a4d681f68842f975f59 diff --git a/java_strings.py b/java_strings.py index f5df004d..7e35d175 100644 --- a/java_strings.py +++ b/java_strings.py @@ -17,6 +17,8 @@ class Consts: uint16_t = ['short'], uint32_t = ['int'], uint64_t = ['long'], + int64_t = ['long'], + double = ['double'], ) self.java_type_map = dict( String = "String" @@ -70,6 +72,7 @@ public class bindings { // Fetching the LDK versions from C also checks that the header and binaries match System.err.println("Loaded LDK-Java Bindings " + version.get_ldk_java_bindings_version() + " with LDK " + get_ldk_version() + " and LDK-C-Bindings " + get_ldk_c_bindings_version()); } + public static void run_statics() { /* Useful to force the statics to run */ } static native void init(java.lang.Class c); static native void init_class_cache(); static native String get_lib_version_string(); @@ -130,6 +133,38 @@ class CommonBase { } }""" + self.txin_defn = """public class TxIn extends CommonBase { + /** The witness in this input, in serialized form */ + public final byte[] witness; + /** The script_sig in this input */ + public final byte[] script_sig; + /** The transaction output's sequence number */ + public final int sequence; + /** The txid this input is spending */ + public final byte[] previous_txid; + /** The output index within the spent transaction of the output this input is spending */ + public final int previous_vout; + + TxIn(java.lang.Object _dummy, long ptr) { + super(ptr); + this.witness = bindings.TxIn_get_witness(ptr); + this.script_sig = bindings.TxIn_get_script_sig(ptr); + this.sequence = bindings.TxIn_get_sequence(ptr); + this.previous_txid = bindings.TxIn_get_previous_txid(ptr); + this.previous_vout = bindings.TxIn_get_previous_vout(ptr); + } + /** Constructs a new TxIn, note that previous_txid must be exactly 32 bytes */ + public TxIn(byte[] witness, byte[] script_sig, int sequence, byte[] previous_txid, int previous_vout) { + this(null, bindings.TxIn_new(witness, script_sig, sequence, previous_txid, previous_vout)); + } + + @Override @SuppressWarnings(\"deprecation\") + protected void finalize() throws Throwable { + super.finalize(); + if (ptr != 0) { bindings.TxIn_free(ptr); } + } +}""" + self.scalar_defn = """public class BigEndianScalar extends CommonBase { /** The bytes of the scalar value, in big endian */ public final byte[] scalar_bytes; @@ -150,6 +185,34 @@ class CommonBase { } }""" + self.witness_program_defn = """public class WitnessProgram extends CommonBase { + /** The witness program bytes themselves */ + public final byte[] program; + /** The witness version */ + public final WitnessVersion version; + + WitnessProgram(java.lang.Object _dummy, long ptr) { + super(ptr); + this.program = bindings.WitnessProgram_get_program(ptr); + this.version = new WitnessVersion(bindings.WitnessProgram_get_version(ptr)); + } + static byte check_args(byte[] program, WitnessVersion version) { + if (program.length < 2 || program.length > 40) throw new IllegalArgumentException(); + if (version.getVal() == 0 && program.length != 20 && program.length != 32) throw new IllegalArgumentException(); + return version.getVal(); + } + public WitnessProgram(byte[] program, WitnessVersion version) { + super(bindings.WitnessProgram_new(check_args(program, version), program)); + this.program = bindings.WitnessProgram_get_program(ptr); + this.version = new WitnessVersion(bindings.WitnessProgram_get_version(ptr)); + } + + @Override @SuppressWarnings(\"deprecation\") + protected void finalize() throws Throwable { + super.finalize(); + if (ptr != 0) { bindings.WitnessProgram_free(ptr); } + } +}""" self.c_file_pfx = """#include // On OSX jlong (ie long long) is not equivalent to int64_t, so we override here @@ -429,25 +492,133 @@ typedef jlongArray int64_tArray; typedef jbyteArray int8_tArray; typedef jshortArray int16_tArray; -static inline jstring str_ref_to_java(JNIEnv *env, const char* chars, size_t len) { - // Sadly we need to create a temporary because Java can't accept a char* without a 0-terminator - char* conv_buf = MALLOC(len + 1, "str conv buf"); - memcpy(conv_buf, chars, len); - conv_buf[len] = 0; - jstring ret = (*env)->NewStringUTF(env, conv_buf); - FREE(conv_buf); +static inline jstring str_ref_to_java(JNIEnv *env, const unsigned char* chars, size_t len) { + // Java uses "Modified UTF-8" rather than UTF-8. This requires special + // handling for codepoints above 0xFFFF, which get converted from four + // bytes to six. We don't know upfront how many codepoints in the string + // are above 0xFFFF, so we just allocate an extra 33% up front and waste a + // bit of space. + unsigned char* java_chars = MALLOC(len * 3 / 2 + 1, "str conv buf"); + unsigned char* next_java_char = java_chars; + const unsigned char* next_in_char = chars; + const unsigned char* end = chars + len; + #define COPY_CHAR_TO_JAVA do { *next_java_char = *next_in_char; next_java_char++; next_in_char++; } while (0) + + while (next_in_char < end) { + if (!*next_in_char) break; + if (!(*next_in_char & 0b10000000)) { + COPY_CHAR_TO_JAVA; + } else if ((*next_in_char & 0b11100000) == 0b11000000) { + if (next_in_char + 2 > end) { CHECK(false); break; } // bad string + COPY_CHAR_TO_JAVA; + COPY_CHAR_TO_JAVA; + } else if ((*next_in_char & 0b11110000) == 0b11100000) { + if (next_in_char + 3 > end) { CHECK(false); break; } // bad string + COPY_CHAR_TO_JAVA; + COPY_CHAR_TO_JAVA; + COPY_CHAR_TO_JAVA; + } else if ((*next_in_char & 0b11111000) == 0b11110000) { + if (next_in_char + 4 > end) { CHECK(false); break; } // bad string + uint32_t codepoint = 0; + codepoint |= (((uint32_t)*(next_in_char )) & 0b00000111) << 18; + codepoint |= (((uint32_t)*(next_in_char + 1)) & 0b00111111) << 12; + codepoint |= (((uint32_t)*(next_in_char + 2)) & 0b00111111) << 6; + codepoint |= (((uint32_t)*(next_in_char + 3)) & 0b00111111) << 0; + codepoint -= 0x10000; + *next_java_char = 0b11101101; + next_java_char++; + *next_java_char = 0b10100000 | ((codepoint >> 16) & 0b00001111); + next_java_char++; + *next_java_char = 0b10000000 | ((codepoint >> 10) & 0b00111111); + next_java_char++; + *next_java_char = 0b11101101; + next_java_char++; + *next_java_char = 0b10110000 | ((codepoint >> 6) & 0b00001111); + next_java_char++; + *next_java_char = 0b10000000 | ((codepoint >> 0) & 0b00111111); + next_java_char++; + next_in_char += 4; + } else { + // Bad string + CHECK(false); + break; + } + } + *next_java_char = 0; + jstring ret = (*env)->NewStringUTF(env, java_chars); + FREE(java_chars); return ret; } static inline LDKStr java_to_owned_str(JNIEnv *env, jstring str) { uint64_t str_len = (*env)->GetStringUTFLength(env, str); - char* newchars = MALLOC(str_len + 1, "String chars"); - const char* jchars = (*env)->GetStringUTFChars(env, str, NULL); - memcpy(newchars, jchars, str_len); - newchars[str_len] = 0; + // Java uses "Modified UTF-8" rather than UTF-8. This requires special + // handling for codepoints above 0xFFFF, which we implement below. + unsigned char* newchars = MALLOC(str_len, "String chars"); + unsigned char* next_newchar = newchars; + uint64_t utf8_len = 0; + + const unsigned char* jchars = (*env)->GetStringUTFChars(env, str, NULL); + const unsigned char* next_char = jchars; + const unsigned char* end = jchars + str_len; + + #define COPY_CHAR_FROM_JAVA do { *next_newchar = *next_char; next_newchar++; next_char++; utf8_len++; } while (0) + + while (next_char < end) { + if (!(*next_char & 0b10000000)) { + CHECK(*next_char != 0); // Bad Modified UTF-8 string, but we'll just cut here + COPY_CHAR_FROM_JAVA; + } else if ((*next_char & 0b11100000) == 0b11000000) { + if (next_char + 2 > end) { CHECK(false); break; } // bad string + uint16_t codepoint = 0; + codepoint |= (((uint16_t)(*next_char & 0x1f)) << 6); + codepoint |= *(next_char + 1) & 0x3f; + if (codepoint == 0) { + // We should really never get null codepoints, but java allows them. + // Just skip it. + next_char += 2; + } else { + COPY_CHAR_FROM_JAVA; + COPY_CHAR_FROM_JAVA; + } + } else if ((*next_char & 0b11110000) == 0b11100000) { + if (next_char + 3 > end) { CHECK(false); break; } // bad string + if (*next_char == 0b11101101 && (*(next_char + 1) & 0b11110000) == 0b10100000) { + // Surrogate code unit shoul indicate we have a codepoint above + // 0xFFFF, which is where Modified UTF-8 and UTF-8 diverge. + if (next_char + 6 > end) { CHECK(false); break; } // bad string + CHECK(*(next_char + 3) == 0b11101101); + CHECK((*(next_char + 4) & 0b11110000) == 0b10110000); + // Calculate the codepoint per https://docs.oracle.com/javase/1.5.0/docs/guide/jni/spec/types.html#wp16542 + uint32_t codepoint = 0x10000; + codepoint += ((((uint32_t)*(next_char + 1)) & 0x0f) << 16); + codepoint += ((((uint32_t)*(next_char + 2)) & 0x3f) << 10); + codepoint += ((((uint32_t)*(next_char + 4)) & 0x0f) << 6); + codepoint += (((uint32_t)*(next_char + 5)) & 0x3f); + *next_newchar = 0b11110000 | ((codepoint >> 18) & 0b111); + next_newchar++; + *next_newchar = 0b10000000 | ((codepoint >> 12) & 0b111111); + next_newchar++; + *next_newchar = 0b10000000 | ((codepoint >> 6) & 0b111111); + next_newchar++; + *next_newchar = 0b10000000 | ( codepoint & 0b111111); + next_newchar++; + next_char += 6; + utf8_len += 4; + } else { + COPY_CHAR_FROM_JAVA; + COPY_CHAR_FROM_JAVA; + COPY_CHAR_FROM_JAVA; + } + } else { + // Bad string + CHECK(false); + break; + } + } (*env)->ReleaseStringUTFChars(env, str, jchars); LDKStr res = { .chars = newchars, - .len = str_len, + .len = utf8_len, .chars_is_owned = true }; return res; @@ -485,7 +656,7 @@ import javax.annotation.Nullable; self.usize_c_ty = "int64_t" self.usize_native_ty = "long" self.native_zero_ptr = "0" - self.result_c_ty = "jclass" + self.unitary_enum_c_ty = "jclass" self.ptr_arr = "jobjectArray" self.is_arr_some_check = ("", " != NULL") self.get_native_arr_len_call = ("(*env)->GetArrayLength(env, ", ")") @@ -565,7 +736,7 @@ import javax.annotation.Nullable; else: return "(*env)->Release" + ty_info.java_ty.strip("[]").title() + "ArrayElements(env, " + arr_name + ", " + dest_name + ", 0)" - def map_hu_array_elems(self, arr_name, conv_name, arr_ty, elem_ty): + def map_hu_array_elems(self, arr_name, conv_name, arr_ty, elem_ty, is_nullable): if elem_ty.java_ty == "long" and elem_ty.java_hu_ty != "long": return arr_name + " != null ? Arrays.stream(" + arr_name + ").mapToLong(" + conv_name + " -> " + elem_ty.from_hu_conv[0] + ").toArray() : null" elif elem_ty.java_ty == "long": @@ -691,7 +862,7 @@ import javax.annotation.Nullable; out_c = out_c + "\t\tcase %d: return %s;\n" % (ord_v, var) ord_v = ord_v + 1 out_java_enum = out_java_enum + "\t; static native void init();\n" - out_java_enum = out_java_enum + "\tstatic { init(); }\n" + out_java_enum = out_java_enum + "\tstatic { org.ldk.impl.bindings.run_statics(); init(); }\n" out_java_enum = out_java_enum + "}" out_java = out_java + "\tstatic { " + struct_name + ".values(); /* Force enum statics to run */ }\n" out_c += "\t}\n" @@ -1219,7 +1390,7 @@ import javax.annotation.Nullable; out_opaque_struct_human += self.hu_struct_file_prefix out_opaque_struct_human += "\n/**\n * " + struct_doc_comment.replace("\n", "\n * ") + "\n */\n" out_opaque_struct_human += "@SuppressWarnings(\"unchecked\") // We correctly assign various generic arrays\n" - hu_name = struct_name.replace("LDKC2Tuple", "TwoTuple").replace("LDKC3Tuple", "ThreeTuple").replace("LDK", "") + hu_name = struct_name.replace("LDKC2Tuple", "TwoTuple").replace("LDKC4Tuple", "FourTuple").replace("LDKC3Tuple", "ThreeTuple").replace("LDK", "") out_opaque_struct_human += ("public class " + hu_name + " extends CommonBase") if struct_name.startswith("LDKLocked") or struct_name.startswith("LDKReadOnly"): out_opaque_struct_human += (" implements AutoCloseable") @@ -1247,6 +1418,9 @@ import javax.annotation.Nullable; java_hu_struct += "\tprotected void finalize() throws Throwable {\n" java_hu_struct += "\t\tif (ptr != 0) { bindings." + struct_name.replace("LDK","") + "_free(ptr); } super.finalize();\n" java_hu_struct += "\t}\n\n" + java_hu_struct += "\tprotected void force_free() {\n" # Used by NioPeerHandler + java_hu_struct += "\t\tif (ptr != 0) { bindings." + struct_name.replace("LDK","") + "_free(ptr); ptr = 0; }\n" + java_hu_struct += "\t}\n\n" java_hu_struct += "\tstatic " + human_ty + " constr_from_ptr(long ptr) {\n" java_hu_struct += "\t\tif (bindings." + struct_name.replace("LDK", "") + "_is_ok(ptr)) {\n" java_hu_struct += "\t\t\treturn new " + human_ty + "_OK(null, ptr);\n"