// While RouteGraphNode can be laid out with fewer bytes, performance appears to be improved
// substantially when it is laid out at exactly 64 bytes.
-//
-// Thus, we use `#[repr(C)]` on the struct to force a suboptimal layout and check that it stays 64
-// bytes here.
-#[cfg(any(ldk_bench, not(any(test, fuzzing))))]
const _GRAPH_NODE_SMALL: usize = 64 - core::mem::size_of::<RouteGraphNode>();
-#[cfg(any(ldk_bench, not(any(test, fuzzing))))]
const _GRAPH_NODE_FIXED_SIZE: usize = core::mem::size_of::<RouteGraphNode>() - 64;
/// A [`CandidateRouteHop::FirstHop`] entry.
}
}
- #[inline]
+ #[inline(always)]
fn src_node_counter(&self) -> u32 {
match self {
CandidateRouteHop::FirstHop { payer_node_counter, .. } => *payer_node_counter,
}
}
+#[cfg(target_feature = "sse")]
+#[inline(always)]
+unsafe fn do_prefetch<T>(ptr: *const T) {
+ #[cfg(target_arch = "x86_64")]
+ use core::arch::x86_64::*;
+ #[cfg(target_arch = "x86")]
+ use core::arch::x86::*;
+ _mm_prefetch(ptr as *const i8, _MM_HINT_T0);
+}
+
+#[cfg(not(target_feature = "sse"))]
+#[inline(always)]
+unsafe fn do_prefetch<T>(_: *const T) {}
+
+#[inline(always)]
+fn prefetch_first_byte<T>(t: &T) {
+ // While X86's prefetch should be safe even on an invalid memory address (the ISA says
+ // "PREFETCHh instruction is merely a hint and does not affect program behavior"), we take
+ // an extra step towards safety here by requiring the pointer be valid (as Rust references
+ // are always valid when accessed).
+ //
+ // Note that a pointer in Rust could be to a zero sized type, in which case the pointer could
+ // be NULL (or some other bogus value), so we explicitly check for that here.
+ if ::core::mem::size_of::<T>() != 0 {
+ unsafe { do_prefetch(t) }
+ }
+}
+
/// It's useful to keep track of the hops associated with the fees required to use them,
/// so that we can choose cheaper paths (as per Dijkstra's algorithm).
/// Fee values should be updated only in the context of the whole path, see update_value_and_recompute_fees.
/// These fee values are useful to choose hops as we traverse the graph "payee-to-payer".
#[derive(Clone)]
-#[repr(C)] // Force fields to appear in the order we define them.
+#[repr(align(128))]
struct PathBuildingHop<'a> {
candidate: CandidateRouteHop<'a>,
target_node_counter: Option<u32>,
/// channel scoring.
path_penalty_msat: u64,
- // The last 16 bytes are on the next cache line by default in glibc's malloc. Thus, we should
- // only place fields which are not hot there. Luckily, the next three fields are only read if
- // we end up on the selected path, and only in the final path layout phase, so we don't care
- // too much if reading them is slow.
-
fee_msat: u64,
/// All the fees paid *after* this channel on the way to the destination
value_contribution_msat: u64,
}
-// Checks that the entries in the `find_route` `dist` map fit in (exactly) two standard x86-64
-// cache lines. Sadly, they're not guaranteed to actually lie on a cache line (and in fact,
-// generally won't, because at least glibc's malloc will align to a nice, big, round
-// boundary...plus 16), but at least it will reduce the amount of data we'll need to load.
-//
-// Note that these assertions only pass on somewhat recent rustc, and thus are gated on the
-// ldk_bench flag.
-#[cfg(ldk_bench)]
-const _NODE_MAP_SIZE_TWO_CACHE_LINES: usize = 128 - core::mem::size_of::<(NodeId, PathBuildingHop)>();
-#[cfg(ldk_bench)]
-const _NODE_MAP_SIZE_EXACTLY_CACHE_LINES: usize = core::mem::size_of::<(NodeId, PathBuildingHop)>() - 128;
+const _NODE_MAP_SIZE_TWO_CACHE_LINES: usize = 128 - core::mem::size_of::<Option<PathBuildingHop>>();
+const _NODE_MAP_SIZE_EXACTLY_TWO_CACHE_LINES: usize = core::mem::size_of::<Option<PathBuildingHop>>() - 128;
impl<'a> core::fmt::Debug for PathBuildingHop<'a> {
fn fmt(&self, f: &mut core::fmt::Formatter) -> Result<(), core::fmt::Error> {
// if the amount being transferred over this path is lower.
// We do this for now, but this is a subject for removal.
if let Some(mut available_value_contribution_msat) = htlc_maximum_msat.checked_sub($next_hops_fee_msat) {
+ let cltv_expiry_delta = $candidate.cltv_expiry_delta();
+ let htlc_minimum_msat = $candidate.htlc_minimum_msat();
let used_liquidity_msat = used_liquidities
.get(&$candidate.id())
.map_or(0, |used_liquidity_msat| {
.checked_sub(2*MEDIAN_HOP_CLTV_EXPIRY_DELTA)
.unwrap_or(payment_params.max_total_cltv_expiry_delta - final_cltv_expiry_delta);
let hop_total_cltv_delta = ($next_hops_cltv_delta as u32)
- .saturating_add($candidate.cltv_expiry_delta());
+ .saturating_add(cltv_expiry_delta);
let exceeds_cltv_delta_limit = hop_total_cltv_delta > max_total_cltv_expiry_delta;
let value_contribution_msat = cmp::min(available_value_contribution_msat, $next_hops_value_contribution);
None => unreachable!(),
};
#[allow(unused_comparisons)] // $next_hops_path_htlc_minimum_msat is 0 in some calls so rustc complains
- let over_path_minimum_msat = amount_to_transfer_over_msat >= $candidate.htlc_minimum_msat() &&
+ let over_path_minimum_msat = amount_to_transfer_over_msat >= htlc_minimum_msat &&
amount_to_transfer_over_msat >= $next_hops_path_htlc_minimum_msat;
#[allow(unused_comparisons)] // $next_hops_path_htlc_minimum_msat is 0 in some calls so rustc complains
let may_overpay_to_meet_path_minimum_msat =
- ((amount_to_transfer_over_msat < $candidate.htlc_minimum_msat() &&
- recommended_value_msat >= $candidate.htlc_minimum_msat()) ||
+ ((amount_to_transfer_over_msat < htlc_minimum_msat &&
+ recommended_value_msat >= htlc_minimum_msat) ||
(amount_to_transfer_over_msat < $next_hops_path_htlc_minimum_msat &&
recommended_value_msat >= $next_hops_path_htlc_minimum_msat));
// payment path (upstream to the payee). To avoid that, we recompute
// path fees knowing the final path contribution after constructing it.
let curr_min = cmp::max(
- $next_hops_path_htlc_minimum_msat, $candidate.htlc_minimum_msat()
+ $next_hops_path_htlc_minimum_msat, htlc_minimum_msat
);
- let path_htlc_minimum_msat = compute_fees_saturating(curr_min, $candidate.fees())
+ let candidate_fees = $candidate.fees();
+ let src_node_counter = $candidate.src_node_counter();
+ let path_htlc_minimum_msat = compute_fees_saturating(curr_min, candidate_fees)
.saturating_add(curr_min);
- let dist_entry = &mut dist[$candidate.src_node_counter() as usize];
+ let dist_entry = &mut dist[src_node_counter as usize];
let old_entry = if let Some(hop) = dist_entry {
hop
} else {
if src_node_id != our_node_id {
// Note that `u64::max_value` means we'll always fail the
// `old_entry.total_fee_msat > total_fee_msat` check below
- hop_use_fee_msat = compute_fees_saturating(amount_to_transfer_over_msat, $candidate.fees());
+ hop_use_fee_msat = compute_fees_saturating(amount_to_transfer_over_msat, candidate_fees);
total_fee_msat = total_fee_msat.saturating_add(hop_use_fee_msat);
}
if !features.requires_unknown_bits() {
for chan_id in $node.channels.iter() {
let chan = network_channels.get(chan_id).unwrap();
+ // Calling chan.as_directed_to, below, will require access to memory two
+ // cache lines away from chan.features (in the form of `one_to_two` or
+ // `two_to_one`, depending on our direction). Thus, while we're looking at
+ // feature flags, go ahead and prefetch that memory, reducing the price we
+ // pay for it later.
+ prefetch_first_byte(&chan.one_to_two);
+ prefetch_first_byte(&chan.two_to_one);
if !chan.features.requires_unknown_bits() {
if let Some((directed_channel, source)) = chan.as_directed_to(&$node_id) {
if first_hops.is_none() || *source != our_node_id {