Add a new `IndexedMap` type and use it in network graph storage
authorMatt Corallo <git@bluematt.me>
Tue, 25 Oct 2022 03:50:07 +0000 (03:50 +0000)
committerMatt Corallo <git@bluematt.me>
Wed, 25 Jan 2023 18:58:51 +0000 (18:58 +0000)
Our network graph has to be iterable in a deterministic order and
with the ability to iterate over a specific range. Thus,
historically, we've used a `BTreeMap` to do the iteration. This is
fine, except our map needs to also provide high performance lookups
in order to make route-finding fast. Sadly, `BTreeMap`s are quite
slow due to the branching penalty.

Here we replace the `BTreeMap`s in the scorer with a dummy wrapper.
In the next commit the internals thereof will be replaced with a
`HashMap`-based implementation.

lightning/src/routing/gossip.rs
lightning/src/routing/router.rs
lightning/src/util/indexed_map.rs [new file with mode: 0644]
lightning/src/util/mod.rs

index a12b3d563daaa6353f010224b345314ce8081dc3..24a21d795b2c1fc9bef6084efddabb901046d2c3 100644 (file)
@@ -32,11 +32,11 @@ use crate::util::logger::{Logger, Level};
 use crate::util::events::{MessageSendEvent, MessageSendEventsProvider};
 use crate::util::scid_utils::{block_from_scid, scid_from_parts, MAX_SCID_BLOCK};
 use crate::util::string::PrintableString;
+use crate::util::indexed_map::{IndexedMap, Entry as IndexedMapEntry};
 
 use crate::io;
 use crate::io_extras::{copy, sink};
 use crate::prelude::*;
-use alloc::collections::{BTreeMap, btree_map::Entry as BtreeEntry};
 use core::{cmp, fmt};
 use crate::sync::{RwLock, RwLockReadGuard};
 #[cfg(feature = "std")]
@@ -133,8 +133,8 @@ pub struct NetworkGraph<L: Deref> where L::Target: Logger {
        genesis_hash: BlockHash,
        logger: L,
        // Lock order: channels -> nodes
-       channels: RwLock<BTreeMap<u64, ChannelInfo>>,
-       nodes: RwLock<BTreeMap<NodeId, NodeInfo>>,
+       channels: RwLock<IndexedMap<u64, ChannelInfo>>,
+       nodes: RwLock<IndexedMap<NodeId, NodeInfo>>,
        // Lock order: removed_channels -> removed_nodes
        //
        // NOTE: In the following `removed_*` maps, we use seconds since UNIX epoch to track time instead
@@ -158,8 +158,8 @@ pub struct NetworkGraph<L: Deref> where L::Target: Logger {
 
 /// A read-only view of [`NetworkGraph`].
 pub struct ReadOnlyNetworkGraph<'a> {
-       channels: RwLockReadGuard<'a, BTreeMap<u64, ChannelInfo>>,
-       nodes: RwLockReadGuard<'a, BTreeMap<NodeId, NodeInfo>>,
+       channels: RwLockReadGuard<'a, IndexedMap<u64, ChannelInfo>>,
+       nodes: RwLockReadGuard<'a, IndexedMap<NodeId, NodeInfo>>,
 }
 
 /// Update to the [`NetworkGraph`] based on payment failure information conveyed via the Onion
@@ -1131,13 +1131,13 @@ impl<L: Deref> Writeable for NetworkGraph<L> where L::Target: Logger {
                self.genesis_hash.write(writer)?;
                let channels = self.channels.read().unwrap();
                (channels.len() as u64).write(writer)?;
-               for (ref chan_id, ref chan_info) in channels.iter() {
+               for (ref chan_id, ref chan_info) in channels.unordered_iter() {
                        (*chan_id).write(writer)?;
                        chan_info.write(writer)?;
                }
                let nodes = self.nodes.read().unwrap();
                (nodes.len() as u64).write(writer)?;
-               for (ref node_id, ref node_info) in nodes.iter() {
+               for (ref node_id, ref node_info) in nodes.unordered_iter() {
                        node_id.write(writer)?;
                        node_info.write(writer)?;
                }
@@ -1156,14 +1156,14 @@ impl<L: Deref> ReadableArgs<L> for NetworkGraph<L> where L::Target: Logger {
 
                let genesis_hash: BlockHash = Readable::read(reader)?;
                let channels_count: u64 = Readable::read(reader)?;
-               let mut channels: BTreeMap<u64, ChannelInfo> = BTreeMap::new();
+               let mut channels = IndexedMap::new();
                for _ in 0..channels_count {
                        let chan_id: u64 = Readable::read(reader)?;
                        let chan_info = Readable::read(reader)?;
                        channels.insert(chan_id, chan_info);
                }
                let nodes_count: u64 = Readable::read(reader)?;
-               let mut nodes: BTreeMap<NodeId, NodeInfo> = BTreeMap::new();
+               let mut nodes = IndexedMap::new();
                for _ in 0..nodes_count {
                        let node_id = Readable::read(reader)?;
                        let node_info = Readable::read(reader)?;
@@ -1191,11 +1191,11 @@ impl<L: Deref> ReadableArgs<L> for NetworkGraph<L> where L::Target: Logger {
 impl<L: Deref> fmt::Display for NetworkGraph<L> where L::Target: Logger {
        fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
                writeln!(f, "Network map\n[Channels]")?;
-               for (key, val) in self.channels.read().unwrap().iter() {
+               for (key, val) in self.channels.read().unwrap().unordered_iter() {
                        writeln!(f, " {}: {}", key, val)?;
                }
                writeln!(f, "[Nodes]")?;
-               for (&node_id, val) in self.nodes.read().unwrap().iter() {
+               for (&node_id, val) in self.nodes.read().unwrap().unordered_iter() {
                        writeln!(f, " {}: {}", log_bytes!(node_id.as_slice()), val)?;
                }
                Ok(())
@@ -1218,8 +1218,8 @@ impl<L: Deref> NetworkGraph<L> where L::Target: Logger {
                        secp_ctx: Secp256k1::verification_only(),
                        genesis_hash,
                        logger,
-                       channels: RwLock::new(BTreeMap::new()),
-                       nodes: RwLock::new(BTreeMap::new()),
+                       channels: RwLock::new(IndexedMap::new()),
+                       nodes: RwLock::new(IndexedMap::new()),
                        last_rapid_gossip_sync_timestamp: Mutex::new(None),
                        removed_channels: Mutex::new(HashMap::new()),
                        removed_nodes: Mutex::new(HashMap::new()),
@@ -1252,7 +1252,7 @@ impl<L: Deref> NetworkGraph<L> where L::Target: Logger {
        /// purposes.
        #[cfg(test)]
        pub fn clear_nodes_announcement_info(&self) {
-               for node in self.nodes.write().unwrap().iter_mut() {
+               for node in self.nodes.write().unwrap().unordered_iter_mut() {
                        node.1.announcement_info = None;
                }
        }
@@ -1382,7 +1382,7 @@ impl<L: Deref> NetworkGraph<L> where L::Target: Logger {
                let node_id_b = channel_info.node_two.clone();
 
                match channels.entry(short_channel_id) {
-                       BtreeEntry::Occupied(mut entry) => {
+                       IndexedMapEntry::Occupied(mut entry) => {
                                //TODO: because asking the blockchain if short_channel_id is valid is only optional
                                //in the blockchain API, we need to handle it smartly here, though it's unclear
                                //exactly how...
@@ -1401,17 +1401,17 @@ impl<L: Deref> NetworkGraph<L> where L::Target: Logger {
                                        return Err(LightningError{err: "Already have knowledge of channel".to_owned(), action: ErrorAction::IgnoreDuplicateGossip});
                                }
                        },
-                       BtreeEntry::Vacant(entry) => {
+                       IndexedMapEntry::Vacant(entry) => {
                                entry.insert(channel_info);
                        }
                };
 
                for current_node_id in [node_id_a, node_id_b].iter() {
                        match nodes.entry(current_node_id.clone()) {
-                               BtreeEntry::Occupied(node_entry) => {
+                               IndexedMapEntry::Occupied(node_entry) => {
                                        node_entry.into_mut().channels.push(short_channel_id);
                                },
-                               BtreeEntry::Vacant(node_entry) => {
+                               IndexedMapEntry::Vacant(node_entry) => {
                                        node_entry.insert(NodeInfo {
                                                channels: vec!(short_channel_id),
                                                announcement_info: None,
@@ -1585,7 +1585,7 @@ impl<L: Deref> NetworkGraph<L> where L::Target: Logger {
                        for scid in node.channels.iter() {
                                if let Some(chan_info) = channels.remove(scid) {
                                        let other_node_id = if node_id == chan_info.node_one { chan_info.node_two } else { chan_info.node_one };
-                                       if let BtreeEntry::Occupied(mut other_node_entry) = nodes.entry(other_node_id) {
+                                       if let IndexedMapEntry::Occupied(mut other_node_entry) = nodes.entry(other_node_id) {
                                                other_node_entry.get_mut().channels.retain(|chan_id| {
                                                        *scid != *chan_id
                                                });
@@ -1644,7 +1644,7 @@ impl<L: Deref> NetworkGraph<L> where L::Target: Logger {
                // Sadly BTreeMap::retain was only stabilized in 1.53 so we can't switch to it for some
                // time.
                let mut scids_to_remove = Vec::new();
-               for (scid, info) in channels.iter_mut() {
+               for (scid, info) in channels.unordered_iter_mut() {
                        if info.one_to_two.is_some() && info.one_to_two.as_ref().unwrap().last_update < min_time_unix {
                                info.one_to_two = None;
                        }
@@ -1813,10 +1813,10 @@ impl<L: Deref> NetworkGraph<L> where L::Target: Logger {
                Ok(())
        }
 
-       fn remove_channel_in_nodes(nodes: &mut BTreeMap<NodeId, NodeInfo>, chan: &ChannelInfo, short_channel_id: u64) {
+       fn remove_channel_in_nodes(nodes: &mut IndexedMap<NodeId, NodeInfo>, chan: &ChannelInfo, short_channel_id: u64) {
                macro_rules! remove_from_node {
                        ($node_id: expr) => {
-                               if let BtreeEntry::Occupied(mut entry) = nodes.entry($node_id) {
+                               if let IndexedMapEntry::Occupied(mut entry) = nodes.entry($node_id) {
                                        entry.get_mut().channels.retain(|chan_id| {
                                                short_channel_id != *chan_id
                                        });
@@ -1837,8 +1837,8 @@ impl<L: Deref> NetworkGraph<L> where L::Target: Logger {
 impl ReadOnlyNetworkGraph<'_> {
        /// Returns all known valid channels' short ids along with announced channel info.
        ///
-       /// (C-not exported) because we have no mapping for `BTreeMap`s
-       pub fn channels(&self) -> &BTreeMap<u64, ChannelInfo> {
+       /// (C-not exported) because we don't want to return lifetime'd references
+       pub fn channels(&self) -> &IndexedMap<u64, ChannelInfo> {
                &*self.channels
        }
 
@@ -1850,13 +1850,13 @@ impl ReadOnlyNetworkGraph<'_> {
        #[cfg(c_bindings)] // Non-bindings users should use `channels`
        /// Returns the list of channels in the graph
        pub fn list_channels(&self) -> Vec<u64> {
-               self.channels.keys().map(|c| *c).collect()
+               self.channels.unordered_keys().map(|c| *c).collect()
        }
 
        /// Returns all known nodes' public keys along with announced node info.
        ///
-       /// (C-not exported) because we have no mapping for `BTreeMap`s
-       pub fn nodes(&self) -> &BTreeMap<NodeId, NodeInfo> {
+       /// (C-not exported) because we don't want to return lifetime'd references
+       pub fn nodes(&self) -> &IndexedMap<NodeId, NodeInfo> {
                &*self.nodes
        }
 
@@ -1868,7 +1868,7 @@ impl ReadOnlyNetworkGraph<'_> {
        #[cfg(c_bindings)] // Non-bindings users should use `nodes`
        /// Returns the list of nodes in the graph
        pub fn list_nodes(&self) -> Vec<NodeId> {
-               self.nodes.keys().map(|n| *n).collect()
+               self.nodes.unordered_keys().map(|n| *n).collect()
        }
 
        /// Get network addresses by node id.
index e4b95a90d3b44ecb18a9da72821f12d2f8e68316..eb6eede0e8216f87d4165d23bd879967697e27f4 100644 (file)
@@ -5507,9 +5507,9 @@ mod tests {
                'load_endpoints: for _ in 0..10 {
                        loop {
                                seed = seed.overflowing_mul(0xdeadbeef).0;
-                               let src = &PublicKey::from_slice(nodes.keys().skip(seed % nodes.len()).next().unwrap().as_slice()).unwrap();
+                               let src = &PublicKey::from_slice(nodes.unordered_keys().skip(seed % nodes.len()).next().unwrap().as_slice()).unwrap();
                                seed = seed.overflowing_mul(0xdeadbeef).0;
-                               let dst = PublicKey::from_slice(nodes.keys().skip(seed % nodes.len()).next().unwrap().as_slice()).unwrap();
+                               let dst = PublicKey::from_slice(nodes.unordered_keys().skip(seed % nodes.len()).next().unwrap().as_slice()).unwrap();
                                let payment_params = PaymentParameters::from_node_id(dst);
                                let amt = seed as u64 % 200_000_000;
                                let params = ProbabilisticScoringParameters::default();
@@ -5545,9 +5545,9 @@ mod tests {
                'load_endpoints: for _ in 0..10 {
                        loop {
                                seed = seed.overflowing_mul(0xdeadbeef).0;
-                               let src = &PublicKey::from_slice(nodes.keys().skip(seed % nodes.len()).next().unwrap().as_slice()).unwrap();
+                               let src = &PublicKey::from_slice(nodes.unordered_keys().skip(seed % nodes.len()).next().unwrap().as_slice()).unwrap();
                                seed = seed.overflowing_mul(0xdeadbeef).0;
-                               let dst = PublicKey::from_slice(nodes.keys().skip(seed % nodes.len()).next().unwrap().as_slice()).unwrap();
+                               let dst = PublicKey::from_slice(nodes.unordered_keys().skip(seed % nodes.len()).next().unwrap().as_slice()).unwrap();
                                let payment_params = PaymentParameters::from_node_id(dst).with_features(channelmanager::provided_invoice_features(&config));
                                let amt = seed as u64 % 200_000_000;
                                let params = ProbabilisticScoringParameters::default();
@@ -5745,9 +5745,9 @@ mod benches {
                'load_endpoints: for _ in 0..150 {
                        loop {
                                seed *= 0xdeadbeef;
-                               let src = PublicKey::from_slice(nodes.keys().skip(seed % nodes.len()).next().unwrap().as_slice()).unwrap();
+                               let src = PublicKey::from_slice(nodes.unordered_keys().skip(seed % nodes.len()).next().unwrap().as_slice()).unwrap();
                                seed *= 0xdeadbeef;
-                               let dst = PublicKey::from_slice(nodes.keys().skip(seed % nodes.len()).next().unwrap().as_slice()).unwrap();
+                               let dst = PublicKey::from_slice(nodes.unordered_keys().skip(seed % nodes.len()).next().unwrap().as_slice()).unwrap();
                                let params = PaymentParameters::from_node_id(dst).with_features(features.clone());
                                let first_hop = first_hop(src);
                                let amt = seed as u64 % 1_000_000;
diff --git a/lightning/src/util/indexed_map.rs b/lightning/src/util/indexed_map.rs
new file mode 100644 (file)
index 0000000..8416597
--- /dev/null
@@ -0,0 +1,159 @@
+//! This module has a map which can be iterated in a deterministic order. See the [`IndexedMap`].
+
+use crate::prelude::HashMap;
+use alloc::collections::{BTreeMap, btree_map};
+use core::cmp::Ord;
+use core::ops::RangeBounds;
+
+/// A map which can be iterated in a deterministic order.
+///
+/// This would traditionally be accomplished by simply using a [`BTreeMap`], however B-Trees
+/// generally have very slow lookups. Because we use a nodes+channels map while finding routes
+/// across the network graph, our network graph backing map must be as performant as possible.
+/// However, because peers expect to sync the network graph from us (and we need to support that
+/// without holding a lock on the graph for the duration of the sync or dumping the entire graph
+/// into our outbound message queue), we need an iterable map with a consistent iteration order we
+/// can jump to a starting point on.
+///
+/// Thus, we have a custom data structure here - its API mimics that of Rust's [`BTreeMap`], but is
+/// actually backed by a [`HashMap`], with some additional tracking to ensure we can iterate over
+/// keys in the order defined by [`Ord`].
+///
+/// [`BTreeMap`]: alloc::collections::BTreeMap
+#[derive(Clone, PartialEq, Eq)]
+pub struct IndexedMap<K: Ord, V> {
+       map: BTreeMap<K, V>,
+}
+
+impl<K: Ord, V> IndexedMap<K, V> {
+       /// Constructs a new, empty map
+       pub fn new() -> Self {
+               Self {
+                       map: BTreeMap::new(),
+               }
+       }
+
+       #[inline(always)]
+       /// Fetches the element with the given `key`, if one exists.
+       pub fn get(&self, key: &K) -> Option<&V> {
+               self.map.get(key)
+       }
+
+       /// Fetches a mutable reference to the element with the given `key`, if one exists.
+       pub fn get_mut(&mut self, key: &K) -> Option<&mut V> {
+               self.map.get_mut(key)
+       }
+
+       #[inline]
+       /// Returns true if an element with the given `key` exists in the map.
+       pub fn contains_key(&self, key: &K) -> bool {
+               self.map.contains_key(key)
+       }
+
+       /// Removes the element with the given `key`, returning it, if one exists.
+       pub fn remove(&mut self, key: &K) -> Option<V> {
+               self.map.remove(key)
+       }
+
+       /// Inserts the given `key`/`value` pair into the map, returning the element that was
+       /// previously stored at the given `key`, if one exists.
+       pub fn insert(&mut self, key: K, value: V) -> Option<V> {
+               self.map.insert(key, value)
+       }
+
+       /// Returns an [`Entry`] for the given `key` in the map, allowing access to the value.
+       pub fn entry(&mut self, key: K) -> Entry<'_, K, V> {
+               match self.map.entry(key) {
+                       btree_map::Entry::Vacant(entry) => {
+                               Entry::Vacant(VacantEntry {
+                                       underlying_entry: entry
+                               })
+                       },
+                       btree_map::Entry::Occupied(entry) => {
+                               Entry::Occupied(OccupiedEntry {
+                                       underlying_entry: entry
+                               })
+                       }
+               }
+       }
+
+       /// Returns an iterator which iterates over the keys in the map, in a random order.
+       pub fn unordered_keys(&self) -> impl Iterator<Item = &K> {
+               self.map.keys()
+       }
+
+       /// Returns an iterator which iterates over the `key`/`value` pairs in a random order.
+       pub fn unordered_iter(&self) -> impl Iterator<Item = (&K, &V)> {
+               self.map.iter()
+       }
+
+       /// Returns an iterator which iterates over the `key`s and mutable references to `value`s in a
+       /// random order.
+       pub fn unordered_iter_mut(&mut self) -> impl Iterator<Item = (&K, &mut V)> {
+               self.map.iter_mut()
+       }
+
+       /// Returns an iterator which iterates over the `key`/`value` pairs in a given range.
+       pub fn range<R: RangeBounds<K>>(&self, range: R) -> btree_map::Range<K, V> {
+               self.map.range(range)
+       }
+
+       /// Returns the number of `key`/`value` pairs in the map
+       pub fn len(&self) -> usize {
+               self.map.len()
+       }
+
+       /// Returns true if there are no elements in the map
+       pub fn is_empty(&self) -> bool {
+               self.map.is_empty()
+       }
+}
+
+/// An [`Entry`] for a key which currently has no value
+pub struct VacantEntry<'a, K: Ord, V> {
+       underlying_entry: btree_map::VacantEntry<'a, K, V>,
+}
+
+/// An [`Entry`] for an existing key-value pair
+pub struct OccupiedEntry<'a, K: Ord, V> {
+       underlying_entry: btree_map::OccupiedEntry<'a, K, V>,
+}
+
+/// A mutable reference to a position in the map. This can be used to reference, add, or update the
+/// value at a fixed key.
+pub enum Entry<'a, K: Ord, V> {
+       /// A mutable reference to a position within the map where there is no value.
+       Vacant(VacantEntry<'a, K, V>),
+       /// A mutable reference to a position within the map where there is currently a value.
+       Occupied(OccupiedEntry<'a, K, V>),
+}
+
+impl<'a, K: Ord, V> VacantEntry<'a, K, V> {
+       /// Insert a value into the position described by this entry.
+       pub fn insert(self, value: V) -> &'a mut V {
+               self.underlying_entry.insert(value)
+       }
+}
+
+impl<'a, K: Ord, V> OccupiedEntry<'a, K, V> {
+       /// Remove the value at the position described by this entry.
+       pub fn remove_entry(self) -> (K, V) {
+               self.underlying_entry.remove_entry()
+       }
+
+       /// Get a reference to the value at the position described by this entry.
+       pub fn get(&self) -> &V {
+               self.underlying_entry.get()
+       }
+
+       /// Get a mutable reference to the value at the position described by this entry.
+       pub fn get_mut(&mut self) -> &mut V {
+               self.underlying_entry.get_mut()
+       }
+
+       /// Consume this entry, returning a mutable reference to the value at the position described by
+       /// this entry.
+       pub fn into_mut(self) -> &'a mut V {
+               self.underlying_entry.into_mut()
+       }
+}
index 1d46865b6019b0158659ccab2c590c419416e36c..1673bd07f69b24d7af49f7d9c3604ea33f000422 100644 (file)
@@ -40,6 +40,8 @@ pub(crate) mod transaction_utils;
 pub(crate) mod scid_utils;
 pub(crate) mod time;
 
+pub mod indexed_map;
+
 /// Logging macro utilities.
 #[macro_use]
 pub(crate) mod macro_logger;