From: Matt Corallo Date: Sat, 16 Dec 2023 04:39:36 +0000 (+0000) Subject: vectorize X-Git-Url: http://git.bitcoin.ninja/?a=commitdiff_plain;h=4f98fd113f8614578a3ac8a56bfae8759f887424;p=rust-lightning vectorize --- diff --git a/lightning/src/routing/scoring.rs b/lightning/src/routing/scoring.rs index 9f0d903ba..9a9e37b53 100644 --- a/lightning/src/routing/scoring.rs +++ b/lightning/src/routing/scoring.rs @@ -74,6 +74,7 @@ use { core::cell::{RefCell, RefMut, Ref}, crate::sync::{Mutex, MutexGuard}, }; +use crate::util::simd_f32::*; /// We define Score ever-so-slightly differently based on whether we are being built for C bindings /// or not. For users, `LockableScore` must somehow be writeable to disk. For Rust users, this is @@ -1102,37 +1103,81 @@ fn linear_success_probability( fn nonlinear_success_probability( amount_msat: u64, min_liquidity_msat: u64, max_liquidity_msat: u64, capacity_msat: u64, min_zero_implies_no_successes: bool, -) -> (f64, f64) { - debug_assert!(min_liquidity_msat <= amount_msat); - debug_assert!(amount_msat < max_liquidity_msat); - debug_assert!(max_liquidity_msat <= capacity_msat); +) -> (f32, f32) { + let min_max_amt_max_msat = FourF32::new( + min_liquidity_msat as f32, + max_liquidity_msat as f32, + amount_msat as f32, + max_liquidity_msat as f32, + ); + + let capacity = capacity_msat as f32; + let cap_cap_cap_cap = FourF32::new(capacity, capacity, capacity, capacity); + + let zero_zero_den_num = nonlinear_success_probability_finish( + min_max_amt_max_msat, cap_cap_cap_cap, + min_zero_implies_no_successes && min_liquidity_msat == 0, + ); + + let res = zero_zero_den_num.dump(); + (res.3, res.2) +} - let capacity = capacity_msat as f64; - let min = (min_liquidity_msat as f64) / capacity; - let max = (max_liquidity_msat as f64) / capacity; - let amount = (amount_msat as f64) / capacity; +#[inline(always)] +fn bucket_nonlinear_success_probability( + amount_msat: u16, min_liquidity_msat: u16, max_liquidity_msat: u16, capacity_msat: u16, + min_zero_implies_no_successes: bool, +) -> FourF32 { + let min_max_amt_max_msat = FourF32::from_ints( + min_liquidity_msat, + max_liquidity_msat, + amount_msat, + max_liquidity_msat, + ); + + let capacity = capacity_msat as f32; + let cap_cap_cap_cap = FourF32::new(capacity, capacity, capacity, capacity); + + nonlinear_success_probability_finish( + min_max_amt_max_msat, cap_cap_cap_cap, + min_zero_implies_no_successes && min_liquidity_msat == 0, + ) +} - // Assume the channel has a probability density function of (x - 0.5)^2 for values from - // 0 to 1 (where 1 is the channel's full capacity). The success probability given some - // liquidity bounds is thus the integral under the curve from the amount to maximum - // estimated liquidity, divided by the same integral from the minimum to the maximum - // estimated liquidity bounds. - // - // Because the integral from x to y is simply (y - 0.5)^3 - (x - 0.5)^3, we can - // calculate the cumulative density function between the min/max bounds trivially. Note - // that we don't bother to normalize the CDF to total to 1, as it will come out in the - // division of num / den. - let (max_pow, amt_pow, min_pow) = three_f64_pow_3(max - 0.5, amount - 0.5, min - 0.5); - let mut num = max_pow - amt_pow; - let mut den = max_pow - min_pow; - - if min_zero_implies_no_successes && min_liquidity_msat == 0 { - // If we have no knowledge of the channel, scale probability down by ~75% - den *= 21.0; - num *= 16.0; +#[inline(always)] +fn nonlinear_success_probability_finish( + min_max_amt_max_msat: FourF32, cap_cap_cap_cap: FourF32, times_16_on_21: bool, +) -> FourF32 { + #[cfg(debug_assertions)] { + let args = min_max_amt_max_msat.dump(); + debug_assert_eq!(args.1, args.3); + + let cap = cap_cap_cap_cap.dump(); + debug_assert_eq!(cap.0, cap.1); + debug_assert_eq!(cap.0, cap.2); + debug_assert_eq!(cap.0, cap.3); + + debug_assert!(args.0 <= args.2); + debug_assert!(args.2 < args.3); + debug_assert!(args.3 <= cap.0); } - (num, den) + let min_max_amt_max = min_max_amt_max_msat / cap_cap_cap_cap; + + let mhalf_mhalf_mhalf_mhalf = FourF32::new(-0.5f32, -0.5f32, -0.5f32, -0.5f32); + let min_max_amt_max_offset = min_max_amt_max + mhalf_mhalf_mhalf_mhalf; + + let min_max_amt_max_sq = min_max_amt_max_offset * min_max_amt_max_offset; + let min_max_amt_max_pow = min_max_amt_max_sq * min_max_amt_max_offset; + + let mut zero_zero_den_num = min_max_amt_max_pow.hsub(); + + if times_16_on_21 { + let zero_zero_twentyone_sixteen = FourF32::new(0.0f32, 0.0f32, 21.0f32, 16.0f32); + zero_zero_den_num = zero_zero_den_num * zero_zero_twentyone_sixteen; + } + + zero_zero_den_num } @@ -1162,7 +1207,7 @@ fn success_probability( ); // Because our numerator and denominator max out at 0.5^3 we need to multiply them by // quite a large factor to get something useful (ideally in the 2^30 range). - const MILLIONISH: f64 = 1024.0 * 1024.0 * 32.0; + const MILLIONISH: f32 = 1024.0 * 1024.0 * 32.0; let numerator = (num * MILLIONISH) as u64 + 1; let mut denominator = (den * MILLIONISH) as u64 + 1; debug_assert!(numerator <= 1 << 30, "Got large numerator ({}) from float {}.", numerator, num); @@ -1838,7 +1883,7 @@ mod bucketed_history { if total_valid_points_tracked < FULLY_DECAYED.into() { return None; } - let total_points_tracked_float = total_valid_points_tracked as f64; + let total_points_tracked_float = total_valid_points_tracked as f32; let mut cumulative_success_prob_times_billion = 0; let mut cumulative_success_prob_float = 0.0; @@ -1863,11 +1908,11 @@ mod bucketed_history { } let max_bucket_end_pos = BUCKET_START_POS[32 - highest_max_bucket_with_points] - 1; if payment_pos < max_bucket_end_pos { - let (numerator, denominator) = $success_probability(payment_pos as u64, 0, - max_bucket_end_pos as u64, POSITION_TICKS as u64 - 1, true); + let success_probability = $success_probability(payment_pos, 0, + max_bucket_end_pos , POSITION_TICKS - 1, true); let bucket_points = (min_liquidity_offset_history_buckets[0] as u64) * total_max_points; - $accumulate_prob(numerator, denominator, bucket_points); + $accumulate_prob(success_probability, bucket_points); } } @@ -1889,13 +1934,13 @@ mod bucketed_history { // Success probability 0, the payment amount may be above the max liquidity break; } - let (numerator, denominator) = $success_probability(payment_pos as u64, - min_bucket_start_pos as u64, max_bucket_end_pos as u64, - POSITION_TICKS as u64 - 1, true); + let success_probability = $success_probability(payment_pos, + min_bucket_start_pos, max_bucket_end_pos, + POSITION_TICKS - 1, true); // Note that this multiply can only barely not overflow - two 16 bit ints plus // 30 bits is 62 bits. let bucket_points = (*min_bucket as u32) * (*max_bucket as u32); - $accumulate_prob(numerator, denominator, bucket_points as u64); + $accumulate_prob(success_probability, bucket_points as u64); } } } @@ -1903,18 +1948,24 @@ mod bucketed_history { } if params.linear_success_probability { - let mut int_success_prob = |numerator: u64, denominator: u64, bucket_points: u64| { + let success_prob_u64s = |a, b, c, d, e: bool| { + linear_success_probability(a as u64, b as u64, c as u64, d as u64, e) + }; + let mut int_success_prob = |(numerator, denominator), bucket_points: u64| { cumulative_success_prob_times_billion += bucket_points * 1024 * 1024 * 1024 / total_valid_points_tracked * numerator / denominator; +0.0 }; - calculate_probability!(linear_success_probability, int_success_prob); + calculate_probability!(success_prob_u64s, int_success_prob); } else { - let mut float_success_prob = |numerator: f64, denominator: f64, bucket_points: u64| { - cumulative_success_prob_float += (bucket_points as f64) - / total_points_tracked_float * numerator / denominator; + let mut float_success_prob = |zero_zero_den_num: FourF32, bucket_points: u64| { + let zero_zero_total_points = FourF32::new(0.0f32, 0.0f32, total_points_tracked_float, bucket_points as f32); + let zero_zero_rden_rnum = zero_zero_den_num * zero_zero_total_points; + let res = zero_zero_rden_rnum.dump(); + cumulative_success_prob_float += res.3 / res.2; }; - calculate_probability!(nonlinear_success_probability, float_success_prob); + calculate_probability!(bucket_nonlinear_success_probability, float_success_prob); } // Once we've added all 32*32/2 32-bit success points together, we may have up to 42