Merge pull request #1436 from TheBlueMatt/2022-04-event-process-try-lock
[rust-lightning] / lightning-background-processor / src / lib.rs
index fd1ad6fe2881f6505d12d83d0050aedc102c4aa0..e23737c0adcf16fec641c0d48aac940645c54716 100644 (file)
@@ -6,20 +6,21 @@
 #![deny(missing_docs)]
 #![deny(unsafe_code)]
 
+#![cfg_attr(docsrs, feature(doc_auto_cfg))]
+
 #[macro_use] extern crate lightning;
 
 use lightning::chain;
 use lightning::chain::chaininterface::{BroadcasterInterface, FeeEstimator};
-use lightning::chain::chainmonitor::ChainMonitor;
-use lightning::chain::channelmonitor;
+use lightning::chain::chainmonitor::{ChainMonitor, Persist};
 use lightning::chain::keysinterface::{Sign, KeysInterface};
 use lightning::ln::channelmanager::ChannelManager;
 use lightning::ln::msgs::{ChannelMessageHandler, RoutingMessageHandler};
-use lightning::ln::peer_handler::{PeerManager, SocketDescriptor};
-use lightning::ln::peer_handler::CustomMessageHandler;
-use lightning::routing::network_graph::NetGraphMsgHandler;
+use lightning::ln::peer_handler::{CustomMessageHandler, PeerManager, SocketDescriptor};
+use lightning::routing::network_graph::{NetworkGraph, NetGraphMsgHandler};
 use lightning::util::events::{Event, EventHandler, EventsProvider};
 use lightning::util::logger::Logger;
+use lightning::util::persist::Persister;
 use std::sync::Arc;
 use std::sync::atomic::{AtomicBool, Ordering};
 use std::thread;
@@ -36,6 +37,8 @@ use std::ops::Deref;
 ///   [`ChannelManager`] persistence should be done in the background.
 /// * Calling [`ChannelManager::timer_tick_occurred`] and [`PeerManager::timer_tick_occurred`]
 ///   at the appropriate intervals.
+/// * Calling [`NetworkGraph::remove_stale_channels`] (if a [`NetGraphMsgHandler`] is provided to
+///   [`BackgroundProcessor::start`]).
 ///
 /// It will also call [`PeerManager::process_events`] periodically though this shouldn't be relied
 /// upon as doing so may result in high latency.
@@ -61,7 +64,7 @@ const FRESHNESS_TIMER: u64 = 60;
 const FRESHNESS_TIMER: u64 = 1;
 
 #[cfg(all(not(test), not(debug_assertions)))]
-const PING_TIMER: u64 = 5;
+const PING_TIMER: u64 = 10;
 /// Signature operations take a lot longer without compiler optimisations.
 /// Increasing the ping timer allows for this but slower devices will be disconnected if the
 /// timeout is reached.
@@ -70,42 +73,20 @@ const PING_TIMER: u64 = 30;
 #[cfg(test)]
 const PING_TIMER: u64 = 1;
 
-/// Trait which handles persisting a [`ChannelManager`] to disk.
-///
-/// [`ChannelManager`]: lightning::ln::channelmanager::ChannelManager
-pub trait ChannelManagerPersister<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref>
-where
-       M::Target: 'static + chain::Watch<Signer>,
-       T::Target: 'static + BroadcasterInterface,
-       K::Target: 'static + KeysInterface<Signer = Signer>,
-       F::Target: 'static + FeeEstimator,
-       L::Target: 'static + Logger,
-{
-       /// Persist the given [`ChannelManager`] to disk, returning an error if persistence failed
-       /// (which will cause the [`BackgroundProcessor`] which called this method to exit.
-       ///
-       /// [`ChannelManager`]: lightning::ln::channelmanager::ChannelManager
-       fn persist_manager(&self, channel_manager: &ChannelManager<Signer, M, T, K, F, L>) -> Result<(), std::io::Error>;
-}
+/// Prune the network graph of stale entries hourly.
+const NETWORK_PRUNE_TIMER: u64 = 60 * 60;
+
+#[cfg(not(test))]
+const FIRST_NETWORK_PRUNE_TIMER: u64 = 60;
+#[cfg(test)]
+const FIRST_NETWORK_PRUNE_TIMER: u64 = 1;
 
-impl<Fun, Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref>
-ChannelManagerPersister<Signer, M, T, K, F, L> for Fun where
-       M::Target: 'static + chain::Watch<Signer>,
-       T::Target: 'static + BroadcasterInterface,
-       K::Target: 'static + KeysInterface<Signer = Signer>,
-       F::Target: 'static + FeeEstimator,
-       L::Target: 'static + Logger,
-       Fun: Fn(&ChannelManager<Signer, M, T, K, F, L>) -> Result<(), std::io::Error>,
-{
-       fn persist_manager(&self, channel_manager: &ChannelManager<Signer, M, T, K, F, L>) -> Result<(), std::io::Error> {
-               self(channel_manager)
-       }
-}
 
 /// Decorates an [`EventHandler`] with common functionality provided by standard [`EventHandler`]s.
 struct DecoratingEventHandler<
        E: EventHandler,
-       N: Deref<Target = NetGraphMsgHandler<A, L>>,
+       N: Deref<Target = NetGraphMsgHandler<G, A, L>>,
+       G: Deref<Target = NetworkGraph>,
        A: Deref,
        L: Deref,
 >
@@ -116,10 +97,11 @@ where A::Target: chain::Access, L::Target: Logger {
 
 impl<
        E: EventHandler,
-       N: Deref<Target = NetGraphMsgHandler<A, L>>,
+       N: Deref<Target = NetGraphMsgHandler<G, A, L>>,
+       G: Deref<Target = NetworkGraph>,
        A: Deref,
        L: Deref,
-> EventHandler for DecoratingEventHandler<E, N, A, L>
+> EventHandler for DecoratingEventHandler<E, N, G, A, L>
 where A::Target: chain::Access, L::Target: Logger {
        fn handle_event(&self, event: &Event) {
                if let Some(event_handler) = &self.net_graph_msg_handler {
@@ -134,17 +116,21 @@ impl BackgroundProcessor {
        /// documentation].
        ///
        /// The thread runs indefinitely unless the object is dropped, [`stop`] is called, or
-       /// `persist_manager` returns an error. In case of an error, the error is retrieved by calling
+       /// [`Persister::persist_manager`] returns an error. In case of an error, the error is retrieved by calling
        /// either [`join`] or [`stop`].
        ///
        /// # Data Persistence
        ///
-       /// `persist_manager` is responsible for writing out the [`ChannelManager`] to disk, and/or
+       /// [`Persister::persist_manager`] is responsible for writing out the [`ChannelManager`] to disk, and/or
        /// uploading to one or more backup services. See [`ChannelManager::write`] for writing out a
-       /// [`ChannelManager`]. See [`FilesystemPersister::persist_manager`] for Rust-Lightning's
+       /// [`ChannelManager`]. See the `lightning-persister` crate for LDK's
        /// provided implementation.
        ///
-       /// Typically, users should either implement [`ChannelManagerPersister`] to never return an
+       /// [`Persister::persist_graph`] is responsible for writing out the [`NetworkGraph`] to disk. See
+       /// [`NetworkGraph::write`] for writing out a [`NetworkGraph`]. See the `lightning-persister` crate
+       /// for LDK's provided implementation.
+       ///
+       /// Typically, users should either implement [`Persister::persist_manager`] to never return an
        /// error or call [`join`] and handle any error that may arise. For the latter case,
        /// `BackgroundProcessor` must be restarted by calling `start` again after handling the error.
        ///
@@ -155,13 +141,15 @@ impl BackgroundProcessor {
        /// functionality implemented by other handlers.
        /// * [`NetGraphMsgHandler`] if given will update the [`NetworkGraph`] based on payment failures.
        ///
-       /// [top-level documentation]: Self
+       /// [top-level documentation]: BackgroundProcessor
        /// [`join`]: Self::join
        /// [`stop`]: Self::stop
        /// [`ChannelManager`]: lightning::ln::channelmanager::ChannelManager
        /// [`ChannelManager::write`]: lightning::ln::channelmanager::ChannelManager#impl-Writeable
-       /// [`FilesystemPersister::persist_manager`]: lightning_persister::FilesystemPersister::persist_manager
+       /// [`Persister::persist_manager`]: lightning::util::persist::Persister::persist_manager
+       /// [`Persister::persist_graph`]: lightning::util::persist::Persister::persist_graph
        /// [`NetworkGraph`]: lightning::routing::network_graph::NetworkGraph
+       /// [`NetworkGraph::write`]: lightning::routing::network_graph::NetworkGraph#impl-Writeable
        pub fn start<
                Signer: 'static + Sign,
                CA: 'static + Deref + Send + Sync,
@@ -170,20 +158,21 @@ impl BackgroundProcessor {
                T: 'static + Deref + Send + Sync,
                K: 'static + Deref + Send + Sync,
                F: 'static + Deref + Send + Sync,
+               G: 'static + Deref<Target = NetworkGraph> + Send + Sync,
                L: 'static + Deref + Send + Sync,
                P: 'static + Deref + Send + Sync,
                Descriptor: 'static + SocketDescriptor + Send + Sync,
                CMH: 'static + Deref + Send + Sync,
                RMH: 'static + Deref + Send + Sync,
-               EH: 'static + EventHandler + Send + Sync,
-               CMP: 'static + Send + ChannelManagerPersister<Signer, CW, T, K, F, L>,
+               EH: 'static + EventHandler + Send,
+               PS: 'static + Deref + Send,
                M: 'static + Deref<Target = ChainMonitor<Signer, CF, T, F, L, P>> + Send + Sync,
                CM: 'static + Deref<Target = ChannelManager<Signer, CW, T, K, F, L>> + Send + Sync,
-               NG: 'static + Deref<Target = NetGraphMsgHandler<CA, L>> + Send + Sync,
+               NG: 'static + Deref<Target = NetGraphMsgHandler<G, CA, L>> + Send + Sync,
                UMH: 'static + Deref + Send + Sync,
                PM: 'static + Deref<Target = PeerManager<Descriptor, CMH, RMH, L, UMH>> + Send + Sync,
        >(
-               persister: CMP, event_handler: EH, chain_monitor: M, channel_manager: CM,
+               persister: PS, event_handler: EH, chain_monitor: M, channel_manager: CM,
                net_graph_msg_handler: Option<NG>, peer_manager: PM, logger: L
        ) -> Self
        where
@@ -194,58 +183,113 @@ impl BackgroundProcessor {
                K::Target: 'static + KeysInterface<Signer = Signer>,
                F::Target: 'static + FeeEstimator,
                L::Target: 'static + Logger,
-               P::Target: 'static + channelmonitor::Persist<Signer>,
+               P::Target: 'static + Persist<Signer>,
                CMH::Target: 'static + ChannelMessageHandler,
                RMH::Target: 'static + RoutingMessageHandler,
                UMH::Target: 'static + CustomMessageHandler,
+               PS::Target: 'static + Persister<Signer, CW, T, K, F, L>
        {
                let stop_thread = Arc::new(AtomicBool::new(false));
                let stop_thread_clone = stop_thread.clone();
                let handle = thread::spawn(move || -> Result<(), std::io::Error> {
-                       let event_handler = DecoratingEventHandler { event_handler, net_graph_msg_handler };
+                       let event_handler = DecoratingEventHandler { event_handler, net_graph_msg_handler: net_graph_msg_handler.as_ref().map(|t| t.deref()) };
 
                        log_trace!(logger, "Calling ChannelManager's timer_tick_occurred on startup");
                        channel_manager.timer_tick_occurred();
 
                        let mut last_freshness_call = Instant::now();
                        let mut last_ping_call = Instant::now();
+                       let mut last_prune_call = Instant::now();
+                       let mut have_pruned = false;
+
                        loop {
-                               peer_manager.process_events();
                                channel_manager.process_pending_events(&event_handler);
                                chain_monitor.process_pending_events(&event_handler);
+
+                               // Note that the PeerManager::process_events may block on ChannelManager's locks,
+                               // hence it comes last here. When the ChannelManager finishes whatever it's doing,
+                               // we want to ensure we get into `persist_manager` as quickly as we can, especially
+                               // without running the normal event processing above and handing events to users.
+                               //
+                               // Specifically, on an *extremely* slow machine, we may see ChannelManager start
+                               // processing a message effectively at any point during this loop. In order to
+                               // minimize the time between such processing completing and persisting the updated
+                               // ChannelManager, we want to minimize methods blocking on a ChannelManager
+                               // generally, and as a fallback place such blocking only immediately before
+                               // persistence.
+                               peer_manager.process_events();
+
+                               // We wait up to 100ms, but track how long it takes to detect being put to sleep,
+                               // see `await_start`'s use below.
+                               let await_start = Instant::now();
                                let updates_available =
                                        channel_manager.await_persistable_update_timeout(Duration::from_millis(100));
+                               let await_time = await_start.elapsed();
+
                                if updates_available {
+                                       log_trace!(logger, "Persisting ChannelManager...");
                                        persister.persist_manager(&*channel_manager)?;
+                                       log_trace!(logger, "Done persisting ChannelManager.");
                                }
                                // Exit the loop if the background processor was requested to stop.
                                if stop_thread.load(Ordering::Acquire) == true {
                                        log_trace!(logger, "Terminating background processor.");
-                                       return Ok(());
+                                       break;
                                }
                                if last_freshness_call.elapsed().as_secs() > FRESHNESS_TIMER {
                                        log_trace!(logger, "Calling ChannelManager's timer_tick_occurred");
                                        channel_manager.timer_tick_occurred();
                                        last_freshness_call = Instant::now();
                                }
-                               if last_ping_call.elapsed().as_secs() > PING_TIMER * 2 {
+                               if await_time > Duration::from_secs(1) {
                                        // On various platforms, we may be starved of CPU cycles for several reasons.
                                        // E.g. on iOS, if we've been in the background, we will be entirely paused.
                                        // Similarly, if we're on a desktop platform and the device has been asleep, we
                                        // may not get any cycles.
-                                       // In any case, if we've been entirely paused for more than double our ping
-                                       // timer, we should have disconnected all sockets by now (and they're probably
-                                       // dead anyway), so disconnect them by calling `timer_tick_occurred()` twice.
-                                       log_trace!(logger, "Awoke after more than double our ping timer, disconnecting peers.");
-                                       peer_manager.timer_tick_occurred();
-                                       peer_manager.timer_tick_occurred();
+                                       // We detect this by checking if our max-100ms-sleep, above, ran longer than a
+                                       // full second, at which point we assume sockets may have been killed (they
+                                       // appear to be at least on some platforms, even if it has only been a second).
+                                       // Note that we have to take care to not get here just because user event
+                                       // processing was slow at the top of the loop. For example, the sample client
+                                       // may call Bitcoin Core RPCs during event handling, which very often takes
+                                       // more than a handful of seconds to complete, and shouldn't disconnect all our
+                                       // peers.
+                                       log_trace!(logger, "100ms sleep took more than a second, disconnecting peers.");
+                                       peer_manager.disconnect_all_peers();
                                        last_ping_call = Instant::now();
                                } else if last_ping_call.elapsed().as_secs() > PING_TIMER {
                                        log_trace!(logger, "Calling PeerManager's timer_tick_occurred");
                                        peer_manager.timer_tick_occurred();
                                        last_ping_call = Instant::now();
                                }
+
+                               // Note that we want to run a graph prune once not long after startup before
+                               // falling back to our usual hourly prunes. This avoids short-lived clients never
+                               // pruning their network graph. We run once 60 seconds after startup before
+                               // continuing our normal cadence.
+                               if last_prune_call.elapsed().as_secs() > if have_pruned { NETWORK_PRUNE_TIMER } else { FIRST_NETWORK_PRUNE_TIMER } {
+                                       if let Some(ref handler) = net_graph_msg_handler {
+                                               log_trace!(logger, "Pruning network graph of stale entries");
+                                               handler.network_graph().remove_stale_channels();
+                                               if let Err(e) = persister.persist_graph(handler.network_graph()) {
+                                                       log_error!(logger, "Error: Failed to persist network graph, check your disk and permissions {}", e)
+                                               }
+                                               last_prune_call = Instant::now();
+                                               have_pruned = true;
+                                       }
+                               }
                        }
+
+                       // After we exit, ensure we persist the ChannelManager one final time - this avoids
+                       // some races where users quit while channel updates were in-flight, with
+                       // ChannelMonitor update(s) persisted without a corresponding ChannelManager update.
+                       persister.persist_manager(&*channel_manager)?;
+
+                       // Persist NetworkGraph on exit
+                       if let Some(ref handler) = net_graph_msg_handler {
+                               persister.persist_graph(handler.network_graph())?;
+                       }
+                       Ok(())
                });
                Self { stop_thread: stop_thread_clone, thread_handle: Some(handle) }
        }
@@ -305,7 +349,7 @@ mod tests {
        use bitcoin::network::constants::Network;
        use lightning::chain::{BestBlock, Confirm, chainmonitor};
        use lightning::chain::channelmonitor::ANTI_REORG_DELAY;
-       use lightning::chain::keysinterface::{InMemorySigner, KeysInterface, KeysManager};
+       use lightning::chain::keysinterface::{InMemorySigner, Recipient, KeysInterface, KeysManager};
        use lightning::chain::transaction::OutPoint;
        use lightning::get_event_msg;
        use lightning::ln::channelmanager::{BREAKDOWN_TIMEOUT, ChainParameters, ChannelManager, SimpleArcChannelManager};
@@ -317,6 +361,9 @@ mod tests {
        use lightning::util::events::{Event, MessageSendEventsProvider, MessageSendEvent};
        use lightning::util::ser::Writeable;
        use lightning::util::test_utils;
+       use lightning::util::persist::KVStorePersister;
+       use lightning_invoice::payment::{InvoicePayer, RetryAttempts};
+       use lightning_invoice::utils::DefaultRouter;
        use lightning_persister::FilesystemPersister;
        use std::fs;
        use std::path::PathBuf;
@@ -340,11 +387,12 @@ mod tests {
 
        struct Node {
                node: Arc<SimpleArcChannelManager<ChainMonitor, test_utils::TestBroadcaster, test_utils::TestFeeEstimator, test_utils::TestLogger>>,
-               net_graph_msg_handler: Option<Arc<NetGraphMsgHandler<Arc<test_utils::TestChainSource>, Arc<test_utils::TestLogger>>>>,
+               net_graph_msg_handler: Option<Arc<NetGraphMsgHandler<Arc<NetworkGraph>, Arc<test_utils::TestChainSource>, Arc<test_utils::TestLogger>>>>,
                peer_manager: Arc<PeerManager<TestDescriptor, Arc<test_utils::TestChannelMessageHandler>, Arc<test_utils::TestRoutingMessageHandler>, Arc<test_utils::TestLogger>, IgnoringMessageHandler>>,
                chain_monitor: Arc<ChainMonitor>,
                persister: Arc<FilesystemPersister>,
                tx_broadcaster: Arc<test_utils::TestBroadcaster>,
+               network_graph: Arc<NetworkGraph>,
                logger: Arc<test_utils::TestLogger>,
                best_block: BestBlock,
        }
@@ -359,6 +407,45 @@ mod tests {
                }
        }
 
+       struct Persister {
+               graph_error: Option<(std::io::ErrorKind, &'static str)>,
+               manager_error: Option<(std::io::ErrorKind, &'static str)>,
+               filesystem_persister: FilesystemPersister,
+       }
+
+       impl Persister {
+               fn new(data_dir: String) -> Self {
+                       let filesystem_persister = FilesystemPersister::new(data_dir.clone());
+                       Self { graph_error: None, manager_error: None, filesystem_persister }
+               }
+
+               fn with_graph_error(self, error: std::io::ErrorKind, message: &'static str) -> Self {
+                       Self { graph_error: Some((error, message)), ..self }
+               }
+
+               fn with_manager_error(self, error: std::io::ErrorKind, message: &'static str) -> Self {
+                       Self { manager_error: Some((error, message)), ..self }
+               }
+       }
+
+       impl KVStorePersister for Persister {
+               fn persist<W: Writeable>(&self, key: &str, object: &W) -> std::io::Result<()> {
+                       if key == "manager" {
+                               if let Some((error, message)) = self.manager_error {
+                                       return Err(std::io::Error::new(error, message))
+                               }
+                       }
+
+                       if key == "network_graph" {
+                               if let Some((error, message)) = self.graph_error {
+                                       return Err(std::io::Error::new(error, message))
+                               }
+                       }
+
+                       self.filesystem_persister.persist(key, object)
+               }
+       }
+
        fn get_full_filepath(filepath: String, filename: String) -> String {
                let mut path = PathBuf::from(filepath);
                path.push(filename);
@@ -382,18 +469,18 @@ mod tests {
                        let best_block = BestBlock::from_genesis(network);
                        let params = ChainParameters { network, best_block };
                        let manager = Arc::new(ChannelManager::new(fee_estimator.clone(), chain_monitor.clone(), tx_broadcaster.clone(), logger.clone(), keys_manager.clone(), UserConfig::default(), params));
-                       let network_graph = NetworkGraph::new(genesis_block.header.block_hash());
-                       let net_graph_msg_handler = Some(Arc::new(NetGraphMsgHandler::new(network_graph, Some(chain_source.clone()), logger.clone())));
+                       let network_graph = Arc::new(NetworkGraph::new(genesis_block.header.block_hash()));
+                       let net_graph_msg_handler = Some(Arc::new(NetGraphMsgHandler::new(network_graph.clone(), Some(chain_source.clone()), logger.clone())));
                        let msg_handler = MessageHandler { chan_handler: Arc::new(test_utils::TestChannelMessageHandler::new()), route_handler: Arc::new(test_utils::TestRoutingMessageHandler::new() )};
-                       let peer_manager = Arc::new(PeerManager::new(msg_handler, keys_manager.get_node_secret(), &seed, logger.clone(), IgnoringMessageHandler{}));
-                       let node = Node { node: manager, net_graph_msg_handler, peer_manager, chain_monitor, persister, tx_broadcaster, logger, best_block };
+                       let peer_manager = Arc::new(PeerManager::new(msg_handler, keys_manager.get_node_secret(Recipient::Node).unwrap(), &seed, logger.clone(), IgnoringMessageHandler{}));
+                       let node = Node { node: manager, net_graph_msg_handler, peer_manager, chain_monitor, persister, tx_broadcaster, network_graph, logger, best_block };
                        nodes.push(node);
                }
 
                for i in 0..num_nodes {
                        for j in (i+1)..num_nodes {
-                               nodes[i].node.peer_connected(&nodes[j].node.get_our_node_id(), &Init { features: InitFeatures::known() });
-                               nodes[j].node.peer_connected(&nodes[i].node.get_our_node_id(), &Init { features: InitFeatures::known() });
+                               nodes[i].node.peer_connected(&nodes[j].node.get_our_node_id(), &Init { features: InitFeatures::known(), remote_network_address: None });
+                               nodes[j].node.peer_connected(&nodes[i].node.get_our_node_id(), &Init { features: InitFeatures::known(), remote_network_address: None });
                        }
                }
 
@@ -482,18 +569,20 @@ mod tests {
 
                // Initiate the background processors to watch each node.
                let data_dir = nodes[0].persister.get_data_dir();
-               let persister = move |node: &ChannelManager<InMemorySigner, Arc<ChainMonitor>, Arc<test_utils::TestBroadcaster>, Arc<KeysManager>, Arc<test_utils::TestFeeEstimator>, Arc<test_utils::TestLogger>>| FilesystemPersister::persist_manager(data_dir.clone(), node);
+               let persister = Arc::new(Persister::new(data_dir));
                let event_handler = |_: &_| {};
                let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].net_graph_msg_handler.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone());
 
                macro_rules! check_persisted_data {
-                       ($node: expr, $filepath: expr, $expected_bytes: expr) => {
-                               match $node.write(&mut $expected_bytes) {
-                                       Ok(()) => {
-                                               loop {
+                       ($node: expr, $filepath: expr) => {
+                               let mut expected_bytes = Vec::new();
+                               loop {
+                                       expected_bytes.clear();
+                                       match $node.write(&mut expected_bytes) {
+                                               Ok(()) => {
                                                        match std::fs::read($filepath) {
                                                                Ok(bytes) => {
-                                                                       if bytes == $expected_bytes {
+                                                                       if bytes == expected_bytes {
                                                                                break
                                                                        } else {
                                                                                continue
@@ -501,17 +590,17 @@ mod tests {
                                                                },
                                                                Err(_) => continue
                                                        }
-                                               }
-                                       },
-                                       Err(e) => panic!("Unexpected error: {}", e)
+                                               },
+                                               Err(e) => panic!("Unexpected error: {}", e)
+                                       }
                                }
                        }
                }
 
                // Check that the initial channel manager data is persisted as expected.
                let filepath = get_full_filepath("test_background_processor_persister_0".to_string(), "manager".to_string());
-               let mut expected_bytes = Vec::new();
-               check_persisted_data!(nodes[0].node, filepath.clone(), expected_bytes);
+               check_persisted_data!(nodes[0].node, filepath.clone());
+
                loop {
                        if !nodes[0].node.get_persistence_condvar_value() { break }
                }
@@ -520,12 +609,18 @@ mod tests {
                nodes[0].node.force_close_channel(&OutPoint { txid: tx.txid(), index: 0 }.to_channel_id()).unwrap();
 
                // Check that the force-close updates are persisted.
-               let mut expected_bytes = Vec::new();
-               check_persisted_data!(nodes[0].node, filepath.clone(), expected_bytes);
+               check_persisted_data!(nodes[0].node, filepath.clone());
                loop {
                        if !nodes[0].node.get_persistence_condvar_value() { break }
                }
 
+               // Check network graph is persisted
+               let filepath = get_full_filepath("test_background_processor_persister_0".to_string(), "network_graph".to_string());
+               if let Some(ref handler) = nodes[0].net_graph_msg_handler {
+                       let network_graph = handler.network_graph();
+                       check_persisted_data!(network_graph, filepath.clone());
+               }
+
                assert!(bg_processor.stop().is_ok());
        }
 
@@ -535,7 +630,7 @@ mod tests {
                // `FRESHNESS_TIMER`.
                let nodes = create_nodes(1, "test_timer_tick_called".to_string());
                let data_dir = nodes[0].persister.get_data_dir();
-               let persister = move |node: &ChannelManager<InMemorySigner, Arc<ChainMonitor>, Arc<test_utils::TestBroadcaster>, Arc<KeysManager>, Arc<test_utils::TestFeeEstimator>, Arc<test_utils::TestLogger>>| FilesystemPersister::persist_manager(data_dir.clone(), node);
+               let persister = Arc::new(Persister::new(data_dir));
                let event_handler = |_: &_| {};
                let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].net_graph_msg_handler.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone());
                loop {
@@ -552,12 +647,13 @@ mod tests {
        }
 
        #[test]
-       fn test_persist_error() {
+       fn test_channel_manager_persist_error() {
                // Test that if we encounter an error during manager persistence, the thread panics.
                let nodes = create_nodes(2, "test_persist_error".to_string());
                open_channel!(nodes[0], nodes[1], 100000);
 
-               let persister = |_: &_| Err(std::io::Error::new(std::io::ErrorKind::Other, "test"));
+               let data_dir = nodes[0].persister.get_data_dir();
+               let persister = Arc::new(Persister::new(data_dir).with_manager_error(std::io::ErrorKind::Other, "test"));
                let event_handler = |_: &_| {};
                let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].net_graph_msg_handler.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone());
                match bg_processor.join() {
@@ -569,19 +665,37 @@ mod tests {
                }
        }
 
+       #[test]
+       fn test_network_graph_persist_error() {
+               // Test that if we encounter an error during network graph persistence, an error gets returned.
+               let nodes = create_nodes(2, "test_persist_network_graph_error".to_string());
+               let data_dir = nodes[0].persister.get_data_dir();
+               let persister = Arc::new(Persister::new(data_dir).with_graph_error(std::io::ErrorKind::Other, "test"));
+               let event_handler = |_: &_| {};
+               let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].net_graph_msg_handler.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone());
+
+               match bg_processor.stop() {
+                       Ok(_) => panic!("Expected error persisting network graph"),
+                       Err(e) => {
+                               assert_eq!(e.kind(), std::io::ErrorKind::Other);
+                               assert_eq!(e.get_ref().unwrap().to_string(), "test");
+                       },
+               }
+       }
+
        #[test]
        fn test_background_event_handling() {
                let mut nodes = create_nodes(2, "test_background_event_handling".to_string());
                let channel_value = 100000;
                let data_dir = nodes[0].persister.get_data_dir();
-               let persister = move |node: &_| FilesystemPersister::persist_manager(data_dir.clone(), node);
+               let persister = Arc::new(Persister::new(data_dir.clone()));
 
                // Set up a background event handler for FundingGenerationReady events.
                let (sender, receiver) = std::sync::mpsc::sync_channel(1);
                let event_handler = move |event: &Event| {
                        sender.send(handle_funding_generation_ready!(event, channel_value)).unwrap();
                };
-               let bg_processor = BackgroundProcessor::start(persister.clone(), event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].net_graph_msg_handler.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone());
+               let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].net_graph_msg_handler.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone());
 
                // Open a channel and check that the FundingGenerationReady event was handled.
                begin_open_channel!(nodes[0], nodes[1], channel_value);
@@ -605,6 +719,7 @@ mod tests {
                // Set up a background event handler for SpendableOutputs events.
                let (sender, receiver) = std::sync::mpsc::sync_channel(1);
                let event_handler = move |event: &Event| sender.send(event.clone()).unwrap();
+               let persister = Arc::new(Persister::new(data_dir));
                let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].net_graph_msg_handler.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone());
 
                // Force close the channel and check that the SpendableOutputs event was handled.
@@ -622,4 +737,21 @@ mod tests {
 
                assert!(bg_processor.stop().is_ok());
        }
+
+       #[test]
+       fn test_invoice_payer() {
+               let keys_manager = test_utils::TestKeysInterface::new(&[0u8; 32], Network::Testnet);
+               let random_seed_bytes = keys_manager.get_secure_random_bytes();
+               let nodes = create_nodes(2, "test_invoice_payer".to_string());
+
+               // Initiate the background processors to watch each node.
+               let data_dir = nodes[0].persister.get_data_dir();
+               let persister = Arc::new(Persister::new(data_dir));
+               let scorer = Arc::new(Mutex::new(test_utils::TestScorer::with_penalty(0)));
+               let router = DefaultRouter::new(Arc::clone(&nodes[0].network_graph), Arc::clone(&nodes[0].logger), random_seed_bytes);
+               let invoice_payer = Arc::new(InvoicePayer::new(Arc::clone(&nodes[0].node), router, scorer, Arc::clone(&nodes[0].logger), |_: &_| {}, RetryAttempts(2)));
+               let event_handler = Arc::clone(&invoice_payer);
+               let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].net_graph_msg_handler.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone());
+               assert!(bg_processor.stop().is_ok());
+       }
 }