X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=lightning-background-processor%2Fsrc%2Flib.rs;h=e23737c0adcf16fec641c0d48aac940645c54716;hb=72069bfc9d082d3d142cfa09dca2bf6e1f085710;hp=22fef2661fa661d82884c047bb09697959841a15;hpb=f6fa8e9c5baf220da9945ab6ec114404486446f0;p=rust-lightning diff --git a/lightning-background-processor/src/lib.rs b/lightning-background-processor/src/lib.rs index 22fef266..e23737c0 100644 --- a/lightning-background-processor/src/lib.rs +++ b/lightning-background-processor/src/lib.rs @@ -20,6 +20,7 @@ use lightning::ln::peer_handler::{CustomMessageHandler, PeerManager, SocketDescr use lightning::routing::network_graph::{NetworkGraph, NetGraphMsgHandler}; use lightning::util::events::{Event, EventHandler, EventsProvider}; use lightning::util::logger::Logger; +use lightning::util::persist::Persister; use std::sync::Arc; use std::sync::atomic::{AtomicBool, Ordering}; use std::thread; @@ -75,37 +76,11 @@ const PING_TIMER: u64 = 1; /// Prune the network graph of stale entries hourly. const NETWORK_PRUNE_TIMER: u64 = 60 * 60; -/// Trait which handles persisting a [`ChannelManager`] to disk. -/// -/// [`ChannelManager`]: lightning::ln::channelmanager::ChannelManager -pub trait ChannelManagerPersister -where - M::Target: 'static + chain::Watch, - T::Target: 'static + BroadcasterInterface, - K::Target: 'static + KeysInterface, - F::Target: 'static + FeeEstimator, - L::Target: 'static + Logger, -{ - /// Persist the given [`ChannelManager`] to disk, returning an error if persistence failed - /// (which will cause the [`BackgroundProcessor`] which called this method to exit. - /// - /// [`ChannelManager`]: lightning::ln::channelmanager::ChannelManager - fn persist_manager(&self, channel_manager: &ChannelManager) -> Result<(), std::io::Error>; -} +#[cfg(not(test))] +const FIRST_NETWORK_PRUNE_TIMER: u64 = 60; +#[cfg(test)] +const FIRST_NETWORK_PRUNE_TIMER: u64 = 1; -impl -ChannelManagerPersister for Fun where - M::Target: 'static + chain::Watch, - T::Target: 'static + BroadcasterInterface, - K::Target: 'static + KeysInterface, - F::Target: 'static + FeeEstimator, - L::Target: 'static + Logger, - Fun: Fn(&ChannelManager) -> Result<(), std::io::Error>, -{ - fn persist_manager(&self, channel_manager: &ChannelManager) -> Result<(), std::io::Error> { - self(channel_manager) - } -} /// Decorates an [`EventHandler`] with common functionality provided by standard [`EventHandler`]s. struct DecoratingEventHandler< @@ -141,17 +116,21 @@ impl BackgroundProcessor { /// documentation]. /// /// The thread runs indefinitely unless the object is dropped, [`stop`] is called, or - /// `persist_manager` returns an error. In case of an error, the error is retrieved by calling + /// [`Persister::persist_manager`] returns an error. In case of an error, the error is retrieved by calling /// either [`join`] or [`stop`]. /// /// # Data Persistence /// - /// `persist_manager` is responsible for writing out the [`ChannelManager`] to disk, and/or + /// [`Persister::persist_manager`] is responsible for writing out the [`ChannelManager`] to disk, and/or /// uploading to one or more backup services. See [`ChannelManager::write`] for writing out a - /// [`ChannelManager`]. See [`FilesystemPersister::persist_manager`] for Rust-Lightning's + /// [`ChannelManager`]. See the `lightning-persister` crate for LDK's /// provided implementation. /// - /// Typically, users should either implement [`ChannelManagerPersister`] to never return an + /// [`Persister::persist_graph`] is responsible for writing out the [`NetworkGraph`] to disk. See + /// [`NetworkGraph::write`] for writing out a [`NetworkGraph`]. See the `lightning-persister` crate + /// for LDK's provided implementation. + /// + /// Typically, users should either implement [`Persister::persist_manager`] to never return an /// error or call [`join`] and handle any error that may arise. For the latter case, /// `BackgroundProcessor` must be restarted by calling `start` again after handling the error. /// @@ -167,8 +146,10 @@ impl BackgroundProcessor { /// [`stop`]: Self::stop /// [`ChannelManager`]: lightning::ln::channelmanager::ChannelManager /// [`ChannelManager::write`]: lightning::ln::channelmanager::ChannelManager#impl-Writeable - /// [`FilesystemPersister::persist_manager`]: lightning_persister::FilesystemPersister::persist_manager + /// [`Persister::persist_manager`]: lightning::util::persist::Persister::persist_manager + /// [`Persister::persist_graph`]: lightning::util::persist::Persister::persist_graph /// [`NetworkGraph`]: lightning::routing::network_graph::NetworkGraph + /// [`NetworkGraph::write`]: lightning::routing::network_graph::NetworkGraph#impl-Writeable pub fn start< Signer: 'static + Sign, CA: 'static + Deref + Send + Sync, @@ -184,14 +165,14 @@ impl BackgroundProcessor { CMH: 'static + Deref + Send + Sync, RMH: 'static + Deref + Send + Sync, EH: 'static + EventHandler + Send, - CMP: 'static + Send + ChannelManagerPersister, + PS: 'static + Deref + Send, M: 'static + Deref> + Send + Sync, CM: 'static + Deref> + Send + Sync, NG: 'static + Deref> + Send + Sync, UMH: 'static + Deref + Send + Sync, PM: 'static + Deref> + Send + Sync, >( - persister: CMP, event_handler: EH, chain_monitor: M, channel_manager: CM, + persister: PS, event_handler: EH, chain_monitor: M, channel_manager: CM, net_graph_msg_handler: Option, peer_manager: PM, logger: L ) -> Self where @@ -206,6 +187,7 @@ impl BackgroundProcessor { CMH::Target: 'static + ChannelMessageHandler, RMH::Target: 'static + RoutingMessageHandler, UMH::Target: 'static + CustomMessageHandler, + PS::Target: 'static + Persister { let stop_thread = Arc::new(AtomicBool::new(false)); let stop_thread_clone = stop_thread.clone(); @@ -221,10 +203,22 @@ impl BackgroundProcessor { let mut have_pruned = false; loop { - peer_manager.process_events(); // Note that this may block on ChannelManager's locking channel_manager.process_pending_events(&event_handler); chain_monitor.process_pending_events(&event_handler); + // Note that the PeerManager::process_events may block on ChannelManager's locks, + // hence it comes last here. When the ChannelManager finishes whatever it's doing, + // we want to ensure we get into `persist_manager` as quickly as we can, especially + // without running the normal event processing above and handing events to users. + // + // Specifically, on an *extremely* slow machine, we may see ChannelManager start + // processing a message effectively at any point during this loop. In order to + // minimize the time between such processing completing and persisting the updated + // ChannelManager, we want to minimize methods blocking on a ChannelManager + // generally, and as a fallback place such blocking only immediately before + // persistence. + peer_manager.process_events(); + // We wait up to 100ms, but track how long it takes to detect being put to sleep, // see `await_start`'s use below. let await_start = Instant::now(); @@ -273,19 +267,29 @@ impl BackgroundProcessor { // falling back to our usual hourly prunes. This avoids short-lived clients never // pruning their network graph. We run once 60 seconds after startup before // continuing our normal cadence. - if last_prune_call.elapsed().as_secs() > if have_pruned { NETWORK_PRUNE_TIMER } else { 60 } { + if last_prune_call.elapsed().as_secs() > if have_pruned { NETWORK_PRUNE_TIMER } else { FIRST_NETWORK_PRUNE_TIMER } { if let Some(ref handler) = net_graph_msg_handler { log_trace!(logger, "Pruning network graph of stale entries"); handler.network_graph().remove_stale_channels(); + if let Err(e) = persister.persist_graph(handler.network_graph()) { + log_error!(logger, "Error: Failed to persist network graph, check your disk and permissions {}", e) + } last_prune_call = Instant::now(); have_pruned = true; } } } + // After we exit, ensure we persist the ChannelManager one final time - this avoids // some races where users quit while channel updates were in-flight, with // ChannelMonitor update(s) persisted without a corresponding ChannelManager update. - persister.persist_manager(&*channel_manager) + persister.persist_manager(&*channel_manager)?; + + // Persist NetworkGraph on exit + if let Some(ref handler) = net_graph_msg_handler { + persister.persist_graph(handler.network_graph())?; + } + Ok(()) }); Self { stop_thread: stop_thread_clone, thread_handle: Some(handle) } } @@ -357,6 +361,7 @@ mod tests { use lightning::util::events::{Event, MessageSendEventsProvider, MessageSendEvent}; use lightning::util::ser::Writeable; use lightning::util::test_utils; + use lightning::util::persist::KVStorePersister; use lightning_invoice::payment::{InvoicePayer, RetryAttempts}; use lightning_invoice::utils::DefaultRouter; use lightning_persister::FilesystemPersister; @@ -402,6 +407,45 @@ mod tests { } } + struct Persister { + graph_error: Option<(std::io::ErrorKind, &'static str)>, + manager_error: Option<(std::io::ErrorKind, &'static str)>, + filesystem_persister: FilesystemPersister, + } + + impl Persister { + fn new(data_dir: String) -> Self { + let filesystem_persister = FilesystemPersister::new(data_dir.clone()); + Self { graph_error: None, manager_error: None, filesystem_persister } + } + + fn with_graph_error(self, error: std::io::ErrorKind, message: &'static str) -> Self { + Self { graph_error: Some((error, message)), ..self } + } + + fn with_manager_error(self, error: std::io::ErrorKind, message: &'static str) -> Self { + Self { manager_error: Some((error, message)), ..self } + } + } + + impl KVStorePersister for Persister { + fn persist(&self, key: &str, object: &W) -> std::io::Result<()> { + if key == "manager" { + if let Some((error, message)) = self.manager_error { + return Err(std::io::Error::new(error, message)) + } + } + + if key == "network_graph" { + if let Some((error, message)) = self.graph_error { + return Err(std::io::Error::new(error, message)) + } + } + + self.filesystem_persister.persist(key, object) + } + } + fn get_full_filepath(filepath: String, filename: String) -> String { let mut path = PathBuf::from(filepath); path.push(filename); @@ -525,19 +569,20 @@ mod tests { // Initiate the background processors to watch each node. let data_dir = nodes[0].persister.get_data_dir(); - let persister = move |node: &ChannelManager, Arc, Arc, Arc, Arc>| FilesystemPersister::persist_manager(data_dir.clone(), node); + let persister = Arc::new(Persister::new(data_dir)); let event_handler = |_: &_| {}; let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].net_graph_msg_handler.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone()); macro_rules! check_persisted_data { - ($node: expr, $filepath: expr, $expected_bytes: expr) => { + ($node: expr, $filepath: expr) => { + let mut expected_bytes = Vec::new(); loop { - $expected_bytes.clear(); - match $node.write(&mut $expected_bytes) { + expected_bytes.clear(); + match $node.write(&mut expected_bytes) { Ok(()) => { match std::fs::read($filepath) { Ok(bytes) => { - if bytes == $expected_bytes { + if bytes == expected_bytes { break } else { continue @@ -554,8 +599,8 @@ mod tests { // Check that the initial channel manager data is persisted as expected. let filepath = get_full_filepath("test_background_processor_persister_0".to_string(), "manager".to_string()); - let mut expected_bytes = Vec::new(); - check_persisted_data!(nodes[0].node, filepath.clone(), expected_bytes); + check_persisted_data!(nodes[0].node, filepath.clone()); + loop { if !nodes[0].node.get_persistence_condvar_value() { break } } @@ -564,12 +609,18 @@ mod tests { nodes[0].node.force_close_channel(&OutPoint { txid: tx.txid(), index: 0 }.to_channel_id()).unwrap(); // Check that the force-close updates are persisted. - let mut expected_bytes = Vec::new(); - check_persisted_data!(nodes[0].node, filepath.clone(), expected_bytes); + check_persisted_data!(nodes[0].node, filepath.clone()); loop { if !nodes[0].node.get_persistence_condvar_value() { break } } + // Check network graph is persisted + let filepath = get_full_filepath("test_background_processor_persister_0".to_string(), "network_graph".to_string()); + if let Some(ref handler) = nodes[0].net_graph_msg_handler { + let network_graph = handler.network_graph(); + check_persisted_data!(network_graph, filepath.clone()); + } + assert!(bg_processor.stop().is_ok()); } @@ -579,7 +630,7 @@ mod tests { // `FRESHNESS_TIMER`. let nodes = create_nodes(1, "test_timer_tick_called".to_string()); let data_dir = nodes[0].persister.get_data_dir(); - let persister = move |node: &ChannelManager, Arc, Arc, Arc, Arc>| FilesystemPersister::persist_manager(data_dir.clone(), node); + let persister = Arc::new(Persister::new(data_dir)); let event_handler = |_: &_| {}; let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].net_graph_msg_handler.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone()); loop { @@ -596,12 +647,13 @@ mod tests { } #[test] - fn test_persist_error() { + fn test_channel_manager_persist_error() { // Test that if we encounter an error during manager persistence, the thread panics. let nodes = create_nodes(2, "test_persist_error".to_string()); open_channel!(nodes[0], nodes[1], 100000); - let persister = |_: &_| Err(std::io::Error::new(std::io::ErrorKind::Other, "test")); + let data_dir = nodes[0].persister.get_data_dir(); + let persister = Arc::new(Persister::new(data_dir).with_manager_error(std::io::ErrorKind::Other, "test")); let event_handler = |_: &_| {}; let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].net_graph_msg_handler.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone()); match bg_processor.join() { @@ -613,19 +665,37 @@ mod tests { } } + #[test] + fn test_network_graph_persist_error() { + // Test that if we encounter an error during network graph persistence, an error gets returned. + let nodes = create_nodes(2, "test_persist_network_graph_error".to_string()); + let data_dir = nodes[0].persister.get_data_dir(); + let persister = Arc::new(Persister::new(data_dir).with_graph_error(std::io::ErrorKind::Other, "test")); + let event_handler = |_: &_| {}; + let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].net_graph_msg_handler.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone()); + + match bg_processor.stop() { + Ok(_) => panic!("Expected error persisting network graph"), + Err(e) => { + assert_eq!(e.kind(), std::io::ErrorKind::Other); + assert_eq!(e.get_ref().unwrap().to_string(), "test"); + }, + } + } + #[test] fn test_background_event_handling() { let mut nodes = create_nodes(2, "test_background_event_handling".to_string()); let channel_value = 100000; let data_dir = nodes[0].persister.get_data_dir(); - let persister = move |node: &_| FilesystemPersister::persist_manager(data_dir.clone(), node); + let persister = Arc::new(Persister::new(data_dir.clone())); // Set up a background event handler for FundingGenerationReady events. let (sender, receiver) = std::sync::mpsc::sync_channel(1); let event_handler = move |event: &Event| { sender.send(handle_funding_generation_ready!(event, channel_value)).unwrap(); }; - let bg_processor = BackgroundProcessor::start(persister.clone(), event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].net_graph_msg_handler.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone()); + let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].net_graph_msg_handler.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone()); // Open a channel and check that the FundingGenerationReady event was handled. begin_open_channel!(nodes[0], nodes[1], channel_value); @@ -649,6 +719,7 @@ mod tests { // Set up a background event handler for SpendableOutputs events. let (sender, receiver) = std::sync::mpsc::sync_channel(1); let event_handler = move |event: &Event| sender.send(event.clone()).unwrap(); + let persister = Arc::new(Persister::new(data_dir)); let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].net_graph_msg_handler.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone()); // Force close the channel and check that the SpendableOutputs event was handled. @@ -675,7 +746,7 @@ mod tests { // Initiate the background processors to watch each node. let data_dir = nodes[0].persister.get_data_dir(); - let persister = move |node: &ChannelManager, Arc, Arc, Arc, Arc>| FilesystemPersister::persist_manager(data_dir.clone(), node); + let persister = Arc::new(Persister::new(data_dir)); let scorer = Arc::new(Mutex::new(test_utils::TestScorer::with_penalty(0))); let router = DefaultRouter::new(Arc::clone(&nodes[0].network_graph), Arc::clone(&nodes[0].logger), random_seed_bytes); let invoice_payer = Arc::new(InvoicePayer::new(Arc::clone(&nodes[0].node), router, scorer, Arc::clone(&nodes[0].logger), |_: &_| {}, RetryAttempts(2)));