From 1bd35367d8495d9a8b90f5de0f02b68014523e3b Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Tue, 25 Oct 2022 03:50:07 +0000 Subject: [PATCH] Add a new `IndexedMap` type and use it in network graph storage Our network graph has to be iterable in a deterministic order and with the ability to iterate over a specific range. Thus, historically, we've used a `BTreeMap` to do the iteration. This is fine, except our map needs to also provide high performance lookups in order to make route-finding fast. Sadly, `BTreeMap`s are quite slow due to the branching penalty. Here we replace the `BTreeMap`s in the scorer with a dummy wrapper. In the next commit the internals thereof will be replaced with a `HashMap`-based implementation. --- lightning/src/routing/gossip.rs | 56 +++++------ lightning/src/routing/router.rs | 12 +-- lightning/src/util/indexed_map.rs | 159 ++++++++++++++++++++++++++++++ lightning/src/util/mod.rs | 2 + 4 files changed, 195 insertions(+), 34 deletions(-) create mode 100644 lightning/src/util/indexed_map.rs diff --git a/lightning/src/routing/gossip.rs b/lightning/src/routing/gossip.rs index a12b3d563..24a21d795 100644 --- a/lightning/src/routing/gossip.rs +++ b/lightning/src/routing/gossip.rs @@ -32,11 +32,11 @@ use crate::util::logger::{Logger, Level}; use crate::util::events::{MessageSendEvent, MessageSendEventsProvider}; use crate::util::scid_utils::{block_from_scid, scid_from_parts, MAX_SCID_BLOCK}; use crate::util::string::PrintableString; +use crate::util::indexed_map::{IndexedMap, Entry as IndexedMapEntry}; use crate::io; use crate::io_extras::{copy, sink}; use crate::prelude::*; -use alloc::collections::{BTreeMap, btree_map::Entry as BtreeEntry}; use core::{cmp, fmt}; use crate::sync::{RwLock, RwLockReadGuard}; #[cfg(feature = "std")] @@ -133,8 +133,8 @@ pub struct NetworkGraph where L::Target: Logger { genesis_hash: BlockHash, logger: L, // Lock order: channels -> nodes - channels: RwLock>, - nodes: RwLock>, + channels: RwLock>, + nodes: RwLock>, // Lock order: removed_channels -> removed_nodes // // NOTE: In the following `removed_*` maps, we use seconds since UNIX epoch to track time instead @@ -158,8 +158,8 @@ pub struct NetworkGraph where L::Target: Logger { /// A read-only view of [`NetworkGraph`]. pub struct ReadOnlyNetworkGraph<'a> { - channels: RwLockReadGuard<'a, BTreeMap>, - nodes: RwLockReadGuard<'a, BTreeMap>, + channels: RwLockReadGuard<'a, IndexedMap>, + nodes: RwLockReadGuard<'a, IndexedMap>, } /// Update to the [`NetworkGraph`] based on payment failure information conveyed via the Onion @@ -1131,13 +1131,13 @@ impl Writeable for NetworkGraph where L::Target: Logger { self.genesis_hash.write(writer)?; let channels = self.channels.read().unwrap(); (channels.len() as u64).write(writer)?; - for (ref chan_id, ref chan_info) in channels.iter() { + for (ref chan_id, ref chan_info) in channels.unordered_iter() { (*chan_id).write(writer)?; chan_info.write(writer)?; } let nodes = self.nodes.read().unwrap(); (nodes.len() as u64).write(writer)?; - for (ref node_id, ref node_info) in nodes.iter() { + for (ref node_id, ref node_info) in nodes.unordered_iter() { node_id.write(writer)?; node_info.write(writer)?; } @@ -1156,14 +1156,14 @@ impl ReadableArgs for NetworkGraph where L::Target: Logger { let genesis_hash: BlockHash = Readable::read(reader)?; let channels_count: u64 = Readable::read(reader)?; - let mut channels: BTreeMap = BTreeMap::new(); + let mut channels = IndexedMap::new(); for _ in 0..channels_count { let chan_id: u64 = Readable::read(reader)?; let chan_info = Readable::read(reader)?; channels.insert(chan_id, chan_info); } let nodes_count: u64 = Readable::read(reader)?; - let mut nodes: BTreeMap = BTreeMap::new(); + let mut nodes = IndexedMap::new(); for _ in 0..nodes_count { let node_id = Readable::read(reader)?; let node_info = Readable::read(reader)?; @@ -1191,11 +1191,11 @@ impl ReadableArgs for NetworkGraph where L::Target: Logger { impl fmt::Display for NetworkGraph where L::Target: Logger { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { writeln!(f, "Network map\n[Channels]")?; - for (key, val) in self.channels.read().unwrap().iter() { + for (key, val) in self.channels.read().unwrap().unordered_iter() { writeln!(f, " {}: {}", key, val)?; } writeln!(f, "[Nodes]")?; - for (&node_id, val) in self.nodes.read().unwrap().iter() { + for (&node_id, val) in self.nodes.read().unwrap().unordered_iter() { writeln!(f, " {}: {}", log_bytes!(node_id.as_slice()), val)?; } Ok(()) @@ -1218,8 +1218,8 @@ impl NetworkGraph where L::Target: Logger { secp_ctx: Secp256k1::verification_only(), genesis_hash, logger, - channels: RwLock::new(BTreeMap::new()), - nodes: RwLock::new(BTreeMap::new()), + channels: RwLock::new(IndexedMap::new()), + nodes: RwLock::new(IndexedMap::new()), last_rapid_gossip_sync_timestamp: Mutex::new(None), removed_channels: Mutex::new(HashMap::new()), removed_nodes: Mutex::new(HashMap::new()), @@ -1252,7 +1252,7 @@ impl NetworkGraph where L::Target: Logger { /// purposes. #[cfg(test)] pub fn clear_nodes_announcement_info(&self) { - for node in self.nodes.write().unwrap().iter_mut() { + for node in self.nodes.write().unwrap().unordered_iter_mut() { node.1.announcement_info = None; } } @@ -1382,7 +1382,7 @@ impl NetworkGraph where L::Target: Logger { let node_id_b = channel_info.node_two.clone(); match channels.entry(short_channel_id) { - BtreeEntry::Occupied(mut entry) => { + IndexedMapEntry::Occupied(mut entry) => { //TODO: because asking the blockchain if short_channel_id is valid is only optional //in the blockchain API, we need to handle it smartly here, though it's unclear //exactly how... @@ -1401,17 +1401,17 @@ impl NetworkGraph where L::Target: Logger { return Err(LightningError{err: "Already have knowledge of channel".to_owned(), action: ErrorAction::IgnoreDuplicateGossip}); } }, - BtreeEntry::Vacant(entry) => { + IndexedMapEntry::Vacant(entry) => { entry.insert(channel_info); } }; for current_node_id in [node_id_a, node_id_b].iter() { match nodes.entry(current_node_id.clone()) { - BtreeEntry::Occupied(node_entry) => { + IndexedMapEntry::Occupied(node_entry) => { node_entry.into_mut().channels.push(short_channel_id); }, - BtreeEntry::Vacant(node_entry) => { + IndexedMapEntry::Vacant(node_entry) => { node_entry.insert(NodeInfo { channels: vec!(short_channel_id), announcement_info: None, @@ -1585,7 +1585,7 @@ impl NetworkGraph where L::Target: Logger { for scid in node.channels.iter() { if let Some(chan_info) = channels.remove(scid) { let other_node_id = if node_id == chan_info.node_one { chan_info.node_two } else { chan_info.node_one }; - if let BtreeEntry::Occupied(mut other_node_entry) = nodes.entry(other_node_id) { + if let IndexedMapEntry::Occupied(mut other_node_entry) = nodes.entry(other_node_id) { other_node_entry.get_mut().channels.retain(|chan_id| { *scid != *chan_id }); @@ -1644,7 +1644,7 @@ impl NetworkGraph where L::Target: Logger { // Sadly BTreeMap::retain was only stabilized in 1.53 so we can't switch to it for some // time. let mut scids_to_remove = Vec::new(); - for (scid, info) in channels.iter_mut() { + for (scid, info) in channels.unordered_iter_mut() { if info.one_to_two.is_some() && info.one_to_two.as_ref().unwrap().last_update < min_time_unix { info.one_to_two = None; } @@ -1813,10 +1813,10 @@ impl NetworkGraph where L::Target: Logger { Ok(()) } - fn remove_channel_in_nodes(nodes: &mut BTreeMap, chan: &ChannelInfo, short_channel_id: u64) { + fn remove_channel_in_nodes(nodes: &mut IndexedMap, chan: &ChannelInfo, short_channel_id: u64) { macro_rules! remove_from_node { ($node_id: expr) => { - if let BtreeEntry::Occupied(mut entry) = nodes.entry($node_id) { + if let IndexedMapEntry::Occupied(mut entry) = nodes.entry($node_id) { entry.get_mut().channels.retain(|chan_id| { short_channel_id != *chan_id }); @@ -1837,8 +1837,8 @@ impl NetworkGraph where L::Target: Logger { impl ReadOnlyNetworkGraph<'_> { /// Returns all known valid channels' short ids along with announced channel info. /// - /// (C-not exported) because we have no mapping for `BTreeMap`s - pub fn channels(&self) -> &BTreeMap { + /// (C-not exported) because we don't want to return lifetime'd references + pub fn channels(&self) -> &IndexedMap { &*self.channels } @@ -1850,13 +1850,13 @@ impl ReadOnlyNetworkGraph<'_> { #[cfg(c_bindings)] // Non-bindings users should use `channels` /// Returns the list of channels in the graph pub fn list_channels(&self) -> Vec { - self.channels.keys().map(|c| *c).collect() + self.channels.unordered_keys().map(|c| *c).collect() } /// Returns all known nodes' public keys along with announced node info. /// - /// (C-not exported) because we have no mapping for `BTreeMap`s - pub fn nodes(&self) -> &BTreeMap { + /// (C-not exported) because we don't want to return lifetime'd references + pub fn nodes(&self) -> &IndexedMap { &*self.nodes } @@ -1868,7 +1868,7 @@ impl ReadOnlyNetworkGraph<'_> { #[cfg(c_bindings)] // Non-bindings users should use `nodes` /// Returns the list of nodes in the graph pub fn list_nodes(&self) -> Vec { - self.nodes.keys().map(|n| *n).collect() + self.nodes.unordered_keys().map(|n| *n).collect() } /// Get network addresses by node id. diff --git a/lightning/src/routing/router.rs b/lightning/src/routing/router.rs index e4b95a90d..eb6eede0e 100644 --- a/lightning/src/routing/router.rs +++ b/lightning/src/routing/router.rs @@ -5507,9 +5507,9 @@ mod tests { 'load_endpoints: for _ in 0..10 { loop { seed = seed.overflowing_mul(0xdeadbeef).0; - let src = &PublicKey::from_slice(nodes.keys().skip(seed % nodes.len()).next().unwrap().as_slice()).unwrap(); + let src = &PublicKey::from_slice(nodes.unordered_keys().skip(seed % nodes.len()).next().unwrap().as_slice()).unwrap(); seed = seed.overflowing_mul(0xdeadbeef).0; - let dst = PublicKey::from_slice(nodes.keys().skip(seed % nodes.len()).next().unwrap().as_slice()).unwrap(); + let dst = PublicKey::from_slice(nodes.unordered_keys().skip(seed % nodes.len()).next().unwrap().as_slice()).unwrap(); let payment_params = PaymentParameters::from_node_id(dst); let amt = seed as u64 % 200_000_000; let params = ProbabilisticScoringParameters::default(); @@ -5545,9 +5545,9 @@ mod tests { 'load_endpoints: for _ in 0..10 { loop { seed = seed.overflowing_mul(0xdeadbeef).0; - let src = &PublicKey::from_slice(nodes.keys().skip(seed % nodes.len()).next().unwrap().as_slice()).unwrap(); + let src = &PublicKey::from_slice(nodes.unordered_keys().skip(seed % nodes.len()).next().unwrap().as_slice()).unwrap(); seed = seed.overflowing_mul(0xdeadbeef).0; - let dst = PublicKey::from_slice(nodes.keys().skip(seed % nodes.len()).next().unwrap().as_slice()).unwrap(); + let dst = PublicKey::from_slice(nodes.unordered_keys().skip(seed % nodes.len()).next().unwrap().as_slice()).unwrap(); let payment_params = PaymentParameters::from_node_id(dst).with_features(channelmanager::provided_invoice_features(&config)); let amt = seed as u64 % 200_000_000; let params = ProbabilisticScoringParameters::default(); @@ -5745,9 +5745,9 @@ mod benches { 'load_endpoints: for _ in 0..150 { loop { seed *= 0xdeadbeef; - let src = PublicKey::from_slice(nodes.keys().skip(seed % nodes.len()).next().unwrap().as_slice()).unwrap(); + let src = PublicKey::from_slice(nodes.unordered_keys().skip(seed % nodes.len()).next().unwrap().as_slice()).unwrap(); seed *= 0xdeadbeef; - let dst = PublicKey::from_slice(nodes.keys().skip(seed % nodes.len()).next().unwrap().as_slice()).unwrap(); + let dst = PublicKey::from_slice(nodes.unordered_keys().skip(seed % nodes.len()).next().unwrap().as_slice()).unwrap(); let params = PaymentParameters::from_node_id(dst).with_features(features.clone()); let first_hop = first_hop(src); let amt = seed as u64 % 1_000_000; diff --git a/lightning/src/util/indexed_map.rs b/lightning/src/util/indexed_map.rs new file mode 100644 index 000000000..841659714 --- /dev/null +++ b/lightning/src/util/indexed_map.rs @@ -0,0 +1,159 @@ +//! This module has a map which can be iterated in a deterministic order. See the [`IndexedMap`]. + +use crate::prelude::HashMap; +use alloc::collections::{BTreeMap, btree_map}; +use core::cmp::Ord; +use core::ops::RangeBounds; + +/// A map which can be iterated in a deterministic order. +/// +/// This would traditionally be accomplished by simply using a [`BTreeMap`], however B-Trees +/// generally have very slow lookups. Because we use a nodes+channels map while finding routes +/// across the network graph, our network graph backing map must be as performant as possible. +/// However, because peers expect to sync the network graph from us (and we need to support that +/// without holding a lock on the graph for the duration of the sync or dumping the entire graph +/// into our outbound message queue), we need an iterable map with a consistent iteration order we +/// can jump to a starting point on. +/// +/// Thus, we have a custom data structure here - its API mimics that of Rust's [`BTreeMap`], but is +/// actually backed by a [`HashMap`], with some additional tracking to ensure we can iterate over +/// keys in the order defined by [`Ord`]. +/// +/// [`BTreeMap`]: alloc::collections::BTreeMap +#[derive(Clone, PartialEq, Eq)] +pub struct IndexedMap { + map: BTreeMap, +} + +impl IndexedMap { + /// Constructs a new, empty map + pub fn new() -> Self { + Self { + map: BTreeMap::new(), + } + } + + #[inline(always)] + /// Fetches the element with the given `key`, if one exists. + pub fn get(&self, key: &K) -> Option<&V> { + self.map.get(key) + } + + /// Fetches a mutable reference to the element with the given `key`, if one exists. + pub fn get_mut(&mut self, key: &K) -> Option<&mut V> { + self.map.get_mut(key) + } + + #[inline] + /// Returns true if an element with the given `key` exists in the map. + pub fn contains_key(&self, key: &K) -> bool { + self.map.contains_key(key) + } + + /// Removes the element with the given `key`, returning it, if one exists. + pub fn remove(&mut self, key: &K) -> Option { + self.map.remove(key) + } + + /// Inserts the given `key`/`value` pair into the map, returning the element that was + /// previously stored at the given `key`, if one exists. + pub fn insert(&mut self, key: K, value: V) -> Option { + self.map.insert(key, value) + } + + /// Returns an [`Entry`] for the given `key` in the map, allowing access to the value. + pub fn entry(&mut self, key: K) -> Entry<'_, K, V> { + match self.map.entry(key) { + btree_map::Entry::Vacant(entry) => { + Entry::Vacant(VacantEntry { + underlying_entry: entry + }) + }, + btree_map::Entry::Occupied(entry) => { + Entry::Occupied(OccupiedEntry { + underlying_entry: entry + }) + } + } + } + + /// Returns an iterator which iterates over the keys in the map, in a random order. + pub fn unordered_keys(&self) -> impl Iterator { + self.map.keys() + } + + /// Returns an iterator which iterates over the `key`/`value` pairs in a random order. + pub fn unordered_iter(&self) -> impl Iterator { + self.map.iter() + } + + /// Returns an iterator which iterates over the `key`s and mutable references to `value`s in a + /// random order. + pub fn unordered_iter_mut(&mut self) -> impl Iterator { + self.map.iter_mut() + } + + /// Returns an iterator which iterates over the `key`/`value` pairs in a given range. + pub fn range>(&self, range: R) -> btree_map::Range { + self.map.range(range) + } + + /// Returns the number of `key`/`value` pairs in the map + pub fn len(&self) -> usize { + self.map.len() + } + + /// Returns true if there are no elements in the map + pub fn is_empty(&self) -> bool { + self.map.is_empty() + } +} + +/// An [`Entry`] for a key which currently has no value +pub struct VacantEntry<'a, K: Ord, V> { + underlying_entry: btree_map::VacantEntry<'a, K, V>, +} + +/// An [`Entry`] for an existing key-value pair +pub struct OccupiedEntry<'a, K: Ord, V> { + underlying_entry: btree_map::OccupiedEntry<'a, K, V>, +} + +/// A mutable reference to a position in the map. This can be used to reference, add, or update the +/// value at a fixed key. +pub enum Entry<'a, K: Ord, V> { + /// A mutable reference to a position within the map where there is no value. + Vacant(VacantEntry<'a, K, V>), + /// A mutable reference to a position within the map where there is currently a value. + Occupied(OccupiedEntry<'a, K, V>), +} + +impl<'a, K: Ord, V> VacantEntry<'a, K, V> { + /// Insert a value into the position described by this entry. + pub fn insert(self, value: V) -> &'a mut V { + self.underlying_entry.insert(value) + } +} + +impl<'a, K: Ord, V> OccupiedEntry<'a, K, V> { + /// Remove the value at the position described by this entry. + pub fn remove_entry(self) -> (K, V) { + self.underlying_entry.remove_entry() + } + + /// Get a reference to the value at the position described by this entry. + pub fn get(&self) -> &V { + self.underlying_entry.get() + } + + /// Get a mutable reference to the value at the position described by this entry. + pub fn get_mut(&mut self) -> &mut V { + self.underlying_entry.get_mut() + } + + /// Consume this entry, returning a mutable reference to the value at the position described by + /// this entry. + pub fn into_mut(self) -> &'a mut V { + self.underlying_entry.into_mut() + } +} diff --git a/lightning/src/util/mod.rs b/lightning/src/util/mod.rs index 1d46865b6..1673bd07f 100644 --- a/lightning/src/util/mod.rs +++ b/lightning/src/util/mod.rs @@ -40,6 +40,8 @@ pub(crate) mod transaction_utils; pub(crate) mod scid_utils; pub(crate) mod time; +pub mod indexed_map; + /// Logging macro utilities. #[macro_use] pub(crate) mod macro_logger; -- 2.39.5