From: Matt Corallo Date: Wed, 18 Aug 2021 21:51:28 +0000 (+0000) Subject: Differentiate `inner` pointers representing `None` and `Some(ZST)` X-Git-Tag: v0.0.101.1~2^2~4 X-Git-Url: http://git.bitcoin.ninja/index.cgi?p=ldk-c-bindings;a=commitdiff_plain;h=9a960fb8c9ab355628b08a4c43fe1241caa2fa77 Differentiate `inner` pointers representing `None` and `Some(ZST)` For zero-sized-types, rust `Box::into_inner(Box::new(ZST {}))` returns `1usize as *mut ZST`, which confuses our Java bindings which check for `None` by checking if `inner < 1024`. While we could convert the Java bindings to check for `inner == 0`, the magic value for ZST pointers is not, to my knowledge, an ABI guarantee Rust provides. Instead, we add an offset to the `inner` pointers to push them past the zero page for ZSTs, taking this opportunity to clean up some of our pointer conversion and push them through a common set of utility functions. We also add testing infrastructure to add similar offsets to non-ZSTs to get good test coverage of the offset addition-removal, though Rust should largely be ignoring pointer values for ZSTs anyway so there should be little risk in anything going wrong here. --- diff --git a/c-bindings-gen/src/blocks.rs b/c-bindings-gen/src/blocks.rs index 9c40441..2bbe298 100644 --- a/c-bindings-gen/src/blocks.rs +++ b/c-bindings-gen/src/blocks.rs @@ -697,7 +697,7 @@ pub fn write_method_call_params(w: &mut W, sig: &syn::Signatu write!(w, "ret").unwrap(); } else if !to_c && self_segs_iter.is_some() && self_segs_iter.unwrap().next().is_none() { // If we're returning "Self" (and not "Self::X"), just do it manually - write!(w, "{} {{ inner: Box::into_raw(Box::new(ret)), is_owned: true }}", this_type).unwrap(); + write!(w, "{} {{ inner: ObjOps::heap_alloc(ret), is_owned: true }}", this_type).unwrap(); } else if to_c { let new_var = types.write_from_c_conversion_new_var(w, &format_ident!("ret"), rtype, generics); if new_var { diff --git a/c-bindings-gen/src/main.rs b/c-bindings-gen/src/main.rs index 2f94e43..ce69294 100644 --- a/c-bindings-gen/src/main.rs +++ b/c-bindings-gen/src/main.rs @@ -563,7 +563,7 @@ fn writeln_opaque(w: &mut W, ident: &syn::Ident, struct_name: writeln!(w, "}}\n").unwrap(); writeln!(w, "impl Drop for {} {{\n\tfn drop(&mut self) {{", struct_name).unwrap(); writeln!(w, "\t\tif self.is_owned && !<*mut native{}>::is_null(self.inner) {{", ident).unwrap(); - writeln!(w, "\t\t\tlet _ = unsafe {{ Box::from_raw(self.inner) }};\n\t\t}}\n\t}}\n}}").unwrap(); + writeln!(w, "\t\t\tlet _ = unsafe {{ Box::from_raw(ObjOps::untweak_ptr(self.inner)) }};\n\t\t}}\n\t}}\n}}").unwrap(); writeln!(w, "/// Frees any resources used by the {}, if is_owned is set and inner is non-NULL.", struct_name).unwrap(); writeln!(w, "#[no_mangle]\npub extern \"C\" fn {}_free(this_obj: {}) {{ }}", struct_name, struct_name).unwrap(); writeln!(w, "#[allow(unused)]").unwrap(); @@ -571,11 +571,17 @@ fn writeln_opaque(w: &mut W, ident: &syn::Ident, struct_name: writeln!(w, "extern \"C\" fn {}_free_void(this_ptr: *mut c_void) {{", struct_name).unwrap(); writeln!(w, "\tunsafe {{ let _ = Box::from_raw(this_ptr as *mut native{}); }}\n}}", struct_name).unwrap(); writeln!(w, "#[allow(unused)]").unwrap(); - writeln!(w, "/// When moving out of the pointer, we have to ensure we aren't a reference, this makes that easy").unwrap(); writeln!(w, "impl {} {{", struct_name).unwrap(); + writeln!(w, "\tpub(crate) fn get_native_ref(&self) -> &'static native{} {{", struct_name).unwrap(); + writeln!(w, "\t\tunsafe {{ &*ObjOps::untweak_ptr(self.inner) }}").unwrap(); + writeln!(w, "\t}}").unwrap(); + writeln!(w, "\tpub(crate) fn get_native_mut_ref(&self) -> &'static mut native{} {{", struct_name).unwrap(); + writeln!(w, "\t\tunsafe {{ &mut *ObjOps::untweak_ptr(self.inner) }}").unwrap(); + writeln!(w, "\t}}").unwrap(); + writeln!(w, "\t/// When moving out of the pointer, we have to ensure we aren't a reference, this makes that easy").unwrap(); writeln!(w, "\tpub(crate) fn take_inner(mut self) -> *mut native{} {{", struct_name).unwrap(); writeln!(w, "\t\tassert!(self.is_owned);").unwrap(); - writeln!(w, "\t\tlet ret = self.inner;").unwrap(); + writeln!(w, "\t\tlet ret = ObjOps::untweak_ptr(self.inner);").unwrap(); writeln!(w, "\t\tself.inner = std::ptr::null_mut();").unwrap(); writeln!(w, "\t\tret").unwrap(); writeln!(w, "\t}}\n}}").unwrap(); @@ -620,7 +626,7 @@ fn writeln_struct<'a, 'b, W: std::io::Write>(w: &mut W, s: &'a syn::ItemStruct, writeln_arg_docs(w, &field.attrs, "", types, Some(&gen_types), vec![].drain(..), Some(&ref_type)); write!(w, "#[no_mangle]\npub extern \"C\" fn {}_get_{}(this_ptr: &{}) -> ", struct_name, ident, struct_name).unwrap(); types.write_c_type(w, &ref_type, Some(&gen_types), true); - write!(w, " {{\n\tlet mut inner_val = &mut unsafe {{ &mut *this_ptr.inner }}.{};\n\t", ident).unwrap(); + write!(w, " {{\n\tlet mut inner_val = &mut this_ptr.get_native_mut_ref().{};\n\t", ident).unwrap(); let local_var = types.write_to_c_conversion_new_var(w, &format_ident!("inner_val"), &ref_type, Some(&gen_types), true); if local_var { write!(w, "\n\t").unwrap(); } types.write_to_c_conversion_inline_prefix(w, &ref_type, Some(&gen_types), true); @@ -636,7 +642,7 @@ fn writeln_struct<'a, 'b, W: std::io::Write>(w: &mut W, s: &'a syn::ItemStruct, write!(w, ") {{\n\t").unwrap(); let local_var = types.write_from_c_conversion_new_var(w, &format_ident!("val"), &field.ty, Some(&gen_types)); if local_var { write!(w, "\n\t").unwrap(); } - write!(w, "unsafe {{ &mut *this_ptr.inner }}.{} = ", ident).unwrap(); + write!(w, "unsafe {{ &mut *ObjOps::untweak_ptr(this_ptr.inner) }}.{} = ", ident).unwrap(); types.write_from_c_conversion_prefix(w, &field.ty, Some(&gen_types)); write!(w, "val").unwrap(); types.write_from_c_conversion_suffix(w, &field.ty, Some(&gen_types)); @@ -662,7 +668,7 @@ fn writeln_struct<'a, 'b, W: std::io::Write>(w: &mut W, s: &'a syn::ItemStruct, write!(w, "\n\t").unwrap(); } } - writeln!(w, "{} {{ inner: Box::into_raw(Box::new(native{} {{", struct_name, s.ident).unwrap(); + writeln!(w, "{} {{ inner: ObjOps::heap_alloc(native{} {{", struct_name, s.ident).unwrap(); for field in fields.named.iter() { write!(w, "\t\t{}: ", field.ident.as_ref().unwrap()).unwrap(); types.write_from_c_conversion_prefix(w, &field.ty, Some(&gen_types)); @@ -670,7 +676,7 @@ fn writeln_struct<'a, 'b, W: std::io::Write>(w: &mut W, s: &'a syn::ItemStruct, types.write_from_c_conversion_suffix(w, &field.ty, Some(&gen_types)); writeln!(w, ",").unwrap(); } - writeln!(w, "\t}})), is_owned: true }}\n}}").unwrap(); + writeln!(w, "\t}}), is_owned: true }}\n}}").unwrap(); } } } @@ -764,7 +770,7 @@ fn writeln_impl(w: &mut W, i: &syn::ItemImpl, types: &mut Typ // type-conversion logic without actually knowing the concrete native type. writeln!(w, "impl From for crate::{} {{", ident, full_trait_path).unwrap(); writeln!(w, "\tfn from(obj: native{}) -> Self {{", ident).unwrap(); - writeln!(w, "\t\tlet mut rust_obj = {} {{ inner: Box::into_raw(Box::new(obj)), is_owned: true }};", ident).unwrap(); + writeln!(w, "\t\tlet mut rust_obj = {} {{ inner: ObjOps::heap_alloc(obj), is_owned: true }};", ident).unwrap(); writeln!(w, "\t\tlet mut ret = {}_as_{}(&rust_obj);", ident, trait_obj.ident).unwrap(); writeln!(w, "\t\t// We want to free rust_obj when ret gets drop()'d, not rust_obj, so wipe rust_obj's pointer and set ret's free() fn").unwrap(); writeln!(w, "\t\trust_obj.inner = std::ptr::null_mut();").unwrap(); @@ -775,7 +781,7 @@ fn writeln_impl(w: &mut W, i: &syn::ItemImpl, types: &mut Typ writeln!(w, "/// This copies the `inner` pointer in this_arg and thus the returned {} must be freed before this_arg is", trait_obj.ident).unwrap(); write!(w, "#[no_mangle]\npub extern \"C\" fn {}_as_{}(this_arg: &{}) -> crate::{} {{\n", ident, trait_obj.ident, ident, full_trait_path).unwrap(); writeln!(w, "\tcrate::{} {{", full_trait_path).unwrap(); - writeln!(w, "\t\tthis_arg: unsafe {{ (*this_arg).inner as *mut c_void }},").unwrap(); + writeln!(w, "\t\tthis_arg: unsafe {{ ObjOps::untweak_ptr((*this_arg).inner) as *mut c_void }},").unwrap(); writeln!(w, "\t\tfree: None,").unwrap(); macro_rules! write_meth { @@ -826,7 +832,7 @@ fn writeln_impl(w: &mut W, i: &syn::ItemImpl, types: &mut Typ (s, t) => { if let Some(supertrait_obj) = types.crate_types.traits.get(s) { writeln!(w, "\t\t{}: crate::{} {{", t, s).unwrap(); - writeln!(w, "\t\t\tthis_arg: unsafe {{ (*this_arg).inner as *mut c_void }},").unwrap(); + writeln!(w, "\t\t\tthis_arg: unsafe {{ ObjOps::untweak_ptr((*this_arg).inner) as *mut c_void }},").unwrap(); writeln!(w, "\t\t\tfree: None,").unwrap(); for item in supertrait_obj.items.iter() { match item { @@ -941,7 +947,7 @@ fn writeln_impl(w: &mut W, i: &syn::ItemImpl, types: &mut Typ } else if path_matches_nongeneric(&trait_path.1, &["Default"]) { writeln!(w, "/// Creates a \"default\" {}. See struct and individual field documentaiton for details on which values are used.", ident).unwrap(); write!(w, "#[must_use]\n#[no_mangle]\npub extern \"C\" fn {}_default() -> {} {{\n", ident, ident).unwrap(); - write!(w, "\t{} {{ inner: Box::into_raw(Box::new(Default::default())), is_owned: true }}\n", ident).unwrap(); + write!(w, "\t{} {{ inner: ObjOps::heap_alloc(Default::default()), is_owned: true }}\n", ident).unwrap(); write!(w, "}}\n").unwrap(); } else if path_matches_nongeneric(&trait_path.1, &["core", "cmp", "PartialEq"]) { } else if path_matches_nongeneric(&trait_path.1, &["core", "cmp", "Eq"]) { @@ -996,7 +1002,7 @@ fn writeln_impl(w: &mut W, i: &syn::ItemImpl, types: &mut Typ writeln!(w, "\tfn clone(&self) -> Self {{").unwrap(); writeln!(w, "\t\tSelf {{").unwrap(); writeln!(w, "\t\t\tinner: if <*mut native{}>::is_null(self.inner) {{ std::ptr::null_mut() }} else {{", ident).unwrap(); - writeln!(w, "\t\t\t\tBox::into_raw(Box::new(unsafe {{ &*self.inner }}.clone())) }},").unwrap(); + writeln!(w, "\t\t\t\tObjOps::heap_alloc(unsafe {{ &*ObjOps::untweak_ptr(self.inner) }}.clone()) }},").unwrap(); writeln!(w, "\t\t\tis_owned: true,").unwrap(); writeln!(w, "\t\t}}\n\t}}\n}}").unwrap(); writeln!(w, "#[allow(unused)]").unwrap(); @@ -1093,9 +1099,9 @@ fn writeln_impl(w: &mut W, i: &syn::ItemImpl, types: &mut Typ if takes_owned_self { write!(w, "(*unsafe {{ Box::from_raw(this_arg.take_inner()) }}).{}(", m.sig.ident).unwrap(); } else if takes_mut_self { - write!(w, "unsafe {{ &mut (*(this_arg.inner as *mut native{})) }}.{}(", ident, m.sig.ident).unwrap(); + write!(w, "unsafe {{ &mut (*ObjOps::untweak_ptr(this_arg.inner as *mut native{})) }}.{}(", ident, m.sig.ident).unwrap(); } else { - write!(w, "unsafe {{ &*this_arg.inner }}.{}(", m.sig.ident).unwrap(); + write!(w, "unsafe {{ &*ObjOps::untweak_ptr(this_arg.inner) }}.{}(", m.sig.ident).unwrap(); } }, _ => unimplemented!(), diff --git a/c-bindings-gen/src/types.rs b/c-bindings-gen/src/types.rs index d19897d..d17148f 100644 --- a/c-bindings-gen/src/types.rs +++ b/c-bindings-gen/src/types.rs @@ -1339,9 +1339,9 @@ impl<'a, 'c: 'a> TypeResolver<'a, 'c> { let is_inner_ref = if let Some(syn::Type::Reference(_)) = single_contained { true } else { false }; if is_ref { return Some(("if ", vec![ - (".is_none() { std::ptr::null() } else { ".to_owned(), + (".is_none() { std::ptr::null() } else { ObjOps::nonnull_ptr_to_inner(".to_owned(), format!("({}{}.unwrap())", var_access, if is_inner_ref { "" } else { ".as_ref()" })) - ], " }", ContainerPrefixLocation::OutsideConv)); + ], ") }", ContainerPrefixLocation::OutsideConv)); } else { return Some(("if ", vec![ (".is_none() { std::ptr::null_mut() } else { ".to_owned(), format!("({}.unwrap())", var_access)) @@ -1866,14 +1866,14 @@ impl<'a, 'c: 'a> TypeResolver<'a, 'c> { DeclType::MirroredEnum => write!(w, "crate::{}::native_into(", decl_path).unwrap(), DeclType::EnumIgnored|DeclType::StructImported if is_ref && ptr_for_ref && from_ptr => write!(w, "crate::{} {{ inner: unsafe {{ (", decl_path).unwrap(), - DeclType::EnumIgnored|DeclType::StructImported if is_ref && ptr_for_ref => - write!(w, "crate::{} {{ inner: unsafe {{ ( (&(*", decl_path).unwrap(), - DeclType::EnumIgnored|DeclType::StructImported if is_ref => - write!(w, "&crate::{} {{ inner: unsafe {{ (", decl_path).unwrap(), + DeclType::EnumIgnored|DeclType::StructImported if is_ref => { + if !ptr_for_ref { write!(w, "&").unwrap(); } + write!(w, "crate::{} {{ inner: unsafe {{ ObjOps::nonnull_ptr_to_inner((", decl_path).unwrap() + }, DeclType::EnumIgnored|DeclType::StructImported if !is_ref && from_ptr => write!(w, "crate::{} {{ inner: ", decl_path).unwrap(), DeclType::EnumIgnored|DeclType::StructImported if !is_ref => - write!(w, "crate::{} {{ inner: Box::into_raw(Box::new(", decl_path).unwrap(), + write!(w, "crate::{} {{ inner: ObjOps::heap_alloc(", decl_path).unwrap(), DeclType::Trait(_) if is_ref => write!(w, "").unwrap(), DeclType::Trait(_) if !is_ref => {}, _ => panic!("{:?}", decl_path), @@ -1890,13 +1890,11 @@ impl<'a, 'c: 'a> TypeResolver<'a, 'c> { DeclType::MirroredEnum => write!(w, ")").unwrap(), DeclType::EnumIgnored|DeclType::StructImported if is_ref && ptr_for_ref && from_ptr => write!(w, " as *const _) as *mut _ }}, is_owned: false }}").unwrap(), - DeclType::EnumIgnored|DeclType::StructImported if is_ref && ptr_for_ref => - write!(w, ") as *const _) as *mut _) }}, is_owned: false }}").unwrap(), DeclType::EnumIgnored|DeclType::StructImported if is_ref => - write!(w, " as *const _) as *mut _ }}, is_owned: false }}").unwrap(), + write!(w, " as *const _) as *mut _) }}, is_owned: false }}").unwrap(), DeclType::EnumIgnored|DeclType::StructImported if !is_ref && from_ptr => write!(w, ", is_owned: true }}").unwrap(), - DeclType::EnumIgnored|DeclType::StructImported if !is_ref => write!(w, ")), is_owned: true }}").unwrap(), + DeclType::EnumIgnored|DeclType::StructImported if !is_ref => write!(w, "), is_owned: true }}").unwrap(), DeclType::Trait(_) if is_ref => {}, DeclType::Trait(_) => { // This is used when we're converting a concrete Rust type into a C trait @@ -1912,13 +1910,11 @@ impl<'a, 'c: 'a> TypeResolver<'a, 'c> { self.write_to_c_conversion_inline_suffix_inner(w, t, generics, false, ptr_for_ref, false); } - fn write_from_c_conversion_prefix_inner(&self, w: &mut W, t: &syn::Type, generics: Option<&GenericTypes>, is_ref: bool, ptr_for_ref: bool) { + fn write_from_c_conversion_prefix_inner(&self, w: &mut W, t: &syn::Type, generics: Option<&GenericTypes>, is_ref: bool, _ptr_for_ref: bool) { self.write_conversion_inline_intern(w, t, generics, is_ref, false, false, "() /*", true, |_, _| "&local_".to_owned(), |a, b, _c| self.from_c_conversion_prefix_from_path(a, b), - |w, decl_type, _full_path, is_ref, is_mut| match decl_type { - DeclType::StructImported if is_ref && ptr_for_ref => write!(w, "unsafe {{ &*(*").unwrap(), - DeclType::StructImported if is_mut && is_ref => write!(w, "unsafe {{ &mut *").unwrap(), - DeclType::StructImported if is_ref => write!(w, "unsafe {{ &*").unwrap(), + |w, decl_type, _full_path, is_ref, _is_mut| match decl_type { + DeclType::StructImported if is_ref => write!(w, "").unwrap(), DeclType::StructImported if !is_ref => write!(w, "*unsafe {{ Box::from_raw(").unwrap(), DeclType::MirroredEnum if is_ref => write!(w, "&").unwrap(), DeclType::MirroredEnum => {}, @@ -1938,9 +1934,10 @@ impl<'a, 'c: 'a> TypeResolver<'a, 'c> { (true, Some(_)) => unreachable!(), }, |a, b, _c| self.from_c_conversion_suffix_from_path(a, b), - |w, decl_type, _full_path, is_ref, _is_mut| match decl_type { - DeclType::StructImported if is_ref && ptr_for_ref => write!(w, ").inner }}").unwrap(), - DeclType::StructImported if is_ref => write!(w, ".inner }}").unwrap(), + |w, decl_type, _full_path, is_ref, is_mut| match decl_type { + DeclType::StructImported if is_ref && ptr_for_ref => write!(w, "XXX unimplemented").unwrap(), + DeclType::StructImported if is_mut && is_ref => write!(w, ".get_native_mut_ref()").unwrap(), + DeclType::StructImported if is_ref => write!(w, ".get_native_ref()").unwrap(), DeclType::StructImported if !is_ref => write!(w, ".take_inner()) }}").unwrap(), DeclType::MirroredEnum if is_ref => write!(w, ".to_native()").unwrap(), DeclType::MirroredEnum => write!(w, ".into_native()").unwrap(), @@ -1961,7 +1958,7 @@ impl<'a, 'c: 'a> TypeResolver<'a, 'c> { } else { None } }, |w, decl_type, _full_path, is_ref, _is_mut| match decl_type { - DeclType::StructImported if !is_ref => write!(w, "unsafe {{ &*").unwrap(), + DeclType::StructImported if !is_ref => write!(w, "").unwrap(), _ => unimplemented!(), }); } @@ -1975,7 +1972,7 @@ impl<'a, 'c: 'a> TypeResolver<'a, 'c> { }, |a, b, _c| self.from_c_conversion_suffix_from_path(a, b), |w, decl_type, _full_path, is_ref, _is_mut| match decl_type { - DeclType::StructImported if !is_ref => write!(w, ".inner }}").unwrap(), + DeclType::StructImported if !is_ref => write!(w, ".get_native_ref()").unwrap(), _ => unimplemented!(), }); } @@ -2064,14 +2061,14 @@ impl<'a, 'c: 'a> TypeResolver<'a, 'c> { if prefix_location == ContainerPrefixLocation::PerConv { var_prefix(w, conv_ty, generics, is_ref && ty_has_inner, ptr_for_ref, false); } else if !is_ref && !needs_ref_map && to_c && only_contained_has_inner { - write!(w, "Box::into_raw(Box::new(").unwrap(); + write!(w, "ObjOps::heap_alloc(").unwrap(); } write!(w, "{}{}", if contains_slice { "local_" } else { "" }, if new_var { new_var_name } else { var_access }).unwrap(); if prefix_location == ContainerPrefixLocation::PerConv { var_suffix(w, conv_ty, generics, is_ref && ty_has_inner, ptr_for_ref, false); } else if !is_ref && !needs_ref_map && to_c && only_contained_has_inner { - write!(w, "))").unwrap(); + write!(w, ")").unwrap(); } write!(w, " }}").unwrap(); } diff --git a/lightning-c-bindings/src/c_types/mod.rs b/lightning-c-bindings/src/c_types/mod.rs index 2648a9b..09c8a51 100644 --- a/lightning-c-bindings/src/c_types/mod.rs +++ b/lightning-c-bindings/src/c_types/mod.rs @@ -527,3 +527,51 @@ impl TakePointer<*mut T> for *mut T { ret } } + + +pub(crate) mod ObjOps { + #[inline] + #[must_use = "returns new dangling pointer"] + pub(crate) fn heap_alloc(obj: T) -> *mut T { + let ptr = Box::into_raw(Box::new(obj)); + nonnull_ptr_to_inner(ptr) + } + #[inline] + pub(crate) fn nonnull_ptr_to_inner(ptr: *const T) -> *mut T { + if core::mem::size_of::() == 0 { + // We map `None::` as `T { inner: null, .. }` which works great for all + // non-Zero-Sized-Types `T`. + // For ZSTs, we need to differentiate between null implying `None` and null implying + // `Some` with no allocation. + // Thus, for ZSTs, we add one (usually) page here, which should always be aligned. + // Note that this relies on undefined behavior! A pointer to NULL may be valid, but a + // pointer to NULL + 4096 is almost certainly not. That said, Rust's existing use of + // `(*mut T)1` for the pointer we're adding to is also not defined, so we should be + // fine. + // Note that we add 4095 here as at least the Java client assumes that the low bit on + // any heap pointer is 0, which is generally provided by malloc, but which is not true + // for ZSTs "allocated" by `Box::new`. + debug_assert_eq!(ptr as usize, 1); + unsafe { (ptr as *mut T).cast::().add(4096 - 1).cast::() } + } else { + // In order to get better test coverage, also increment non-ZST pointers with + // --cfg=test_mod_pointers, which is set in genbindings.sh for debug builds. + #[cfg(test_mod_pointers)] + unsafe { (ptr as *mut T).cast::().add(4096).cast::() } + #[cfg(not(test_mod_pointers))] + unsafe { ptr as *mut T } + } + } + #[inline] + /// Invert nonnull_ptr_to_inner + pub(crate) fn untweak_ptr(ptr: *mut T) -> *mut T { + if core::mem::size_of::() == 0 { + unsafe { ptr.cast::().sub(4096 - 1).cast::() } + } else { + #[cfg(test_mod_pointers)] + unsafe { ptr.cast::().sub(4096).cast::() } + #[cfg(not(test_mod_pointers))] + ptr + } + } +}