From 31476ac74b68b8314367313a5a686de5a9417fae Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Sat, 16 Dec 2023 17:57:10 +0000 Subject: [PATCH] float unroll non-prob bucket math --- lightning/src/routing/scoring.rs | 39 ++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/lightning/src/routing/scoring.rs b/lightning/src/routing/scoring.rs index 0274aed21..8b35c2e98 100644 --- a/lightning/src/routing/scoring.rs +++ b/lightning/src/routing/scoring.rs @@ -1911,7 +1911,7 @@ mod bucketed_history { let mut cumulative_success_prob_times_billion = 0; let mut cumulative_success_prob_float = 0.0; - let mut cumulative_success_points = 0; + let mut cumulative_success_points = 0.0; macro_rules! calculate_probability { ($success_probability: ident, $accumulate_prob: ident, $payment_pos: ident, $BUCKET_START_POS: ident, $MATH_TY: ty @@ -1950,13 +1950,36 @@ mod bucketed_history { .map(|idx| BUCKET_START_POS[idx] + 1).unwrap_or(0); if payment_pos < int_min_bucket_start_pos { - for (max_idx, max_bucket) in max_liquidity_offset_history_buckets.iter().enumerate().take(32 - min_idx) { - let max_bucket_end_pos = BUCKET_START_POS[31 - max_idx]; - if payment_pos >= max_bucket_end_pos { + let min_bucket_float = *min_bucket as f32; + let min_bucket_simd = FourF32::new( + min_bucket_float, min_bucket_float, min_bucket_float, min_bucket_float + ); + let max_max_idx = 31 - min_idx; + for (idx, chunk) in max_liquidity_offset_history_buckets.chunks(4).enumerate() { + let (max_idx_a, max_idx_b, max_idx_c, max_idx_d) = + (idx * 4, idx * 4 + 1, idx * 4 + 2, idx * 4 + 3); + + let max_bucket_a = chunk[0]; + let mut max_bucket_b = chunk[1]; + let mut max_bucket_c = chunk[2]; + let mut max_bucket_d = chunk[3]; + + let max_bucket_end_pos_a = $BUCKET_START_POS[31 - max_idx_a]; + if $payment_pos >= max_bucket_end_pos_a || max_idx_a > max_max_idx { // Success probability 0, the payment amount may be above the max liquidity break; } - cumulative_success_points += ((*min_bucket as u32) * (*max_bucket as u32)) as u64; + let max_bucket_end_pos_b = $BUCKET_START_POS[31 - max_idx_b]; + if max_idx_b > max_max_idx || $payment_pos >= max_bucket_end_pos_b { max_bucket_b = 0; } + let max_bucket_end_pos_c = $BUCKET_START_POS[31 - max_idx_c]; + if max_idx_c > max_max_idx || $payment_pos >= max_bucket_end_pos_c { max_bucket_c = 0; } + let max_bucket_end_pos_d = $BUCKET_START_POS[31 - max_idx_d]; + if max_idx_d > max_max_idx || $payment_pos >= max_bucket_end_pos_d { max_bucket_d = 0; } + + let buckets = FourF32::from_ints(max_bucket_a, max_bucket_b, max_bucket_c, max_bucket_d); + + let points = min_bucket_simd * buckets; + cumulative_success_points += points.consuming_sum(); } } else { for (max_idx, max_bucket) in max_liquidity_offset_history_buckets.iter().enumerate().take(32 - min_idx) { @@ -2004,12 +2027,8 @@ mod bucketed_history { ); } - // Once we've added all 32*32/2 32-bit success points together, we may have up to 42 - // bits. Thus, we still have > 20 bits left, which we multiply before dividing by - // total_valid_points_tracked. We finally normalize back to billions. - debug_assert!(cumulative_success_points < u64::max_value() / 1024 / 1024); cumulative_success_prob_times_billion += - cumulative_success_points * 1024 * 1024 / total_valid_points_tracked * 1024; + (cumulative_success_points / total_points_tracked_float * (1024.0 * 1024.0 * 1024.0)) as u64; cumulative_success_prob_times_billion += (cumulative_success_prob_float * 1024.0 * 1024.0 * 1024.0) as u64; -- 2.39.5