From 9566c272c8f2109aeee6628d74dc5b3eecb427e1 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Fri, 8 Dec 2023 05:44:32 +0000 Subject: [PATCH] Somewhat optimize the generic `Features::requires_unknown_bits` It turns out we spend several percent of our routefinding time just checking if nodes and channels require unknown features byte-by-byte. While the cost is almost certainly dominated by the memory read latency, avoiding doing the checks byte-by-byte should reduce the branch count slightly, which may reduce the overhead. --- lightning/src/ln/features.rs | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/lightning/src/ln/features.rs b/lightning/src/ln/features.rs index 51c608c1a..79bf871de 100644 --- a/lightning/src/ln/features.rs +++ b/lightning/src/ln/features.rs @@ -801,15 +801,23 @@ impl Features { pub fn requires_unknown_bits(&self) -> bool { // Bitwise AND-ing with all even bits set except for known features will select required // unknown features. - let byte_count = T::KNOWN_FEATURE_MASK.len(); - self.flags.iter().enumerate().any(|(i, &byte)| { - let unknown_features = if i < byte_count { - !T::KNOWN_FEATURE_MASK[i] - } else { - 0b11_11_11_11 - }; - (byte & (ANY_REQUIRED_FEATURES_MASK & unknown_features)) != 0 - }) + let mut known_chunks = T::KNOWN_FEATURE_MASK.chunks(8); + for chunk in self.flags.chunks(8) { + let mut flag_bytes = [0; 8]; + flag_bytes[..chunk.len()].copy_from_slice(&chunk); + let flag_int = u64::from_le_bytes(flag_bytes); + + let known_chunk = known_chunks.next().unwrap_or(&[0; 0]); + let mut known_bytes = [0; 8]; + known_bytes[..known_chunk.len()].copy_from_slice(&known_chunk); + let known_int = u64::from_le_bytes(known_bytes); + + const REQ_MASK: u64 = u64::from_le_bytes([ANY_REQUIRED_FEATURES_MASK; 8]); + if flag_int & (REQ_MASK & !known_int) != 0 { + return true; + } + } + false } pub(crate) fn supports_unknown_bits(&self) -> bool { -- 2.39.5