Handle being asleep for more than double our ping time gracefully
[rust-lightning] / lightning-background-processor / src / lib.rs
index 3cd01a5e4c78f293b86acfe94d87e7750ec69227..c75c47262e51d2873a335acc37518bccd6ed2984 100644 (file)
@@ -10,6 +10,8 @@
 
 use lightning::chain;
 use lightning::chain::chaininterface::{BroadcasterInterface, FeeEstimator};
+use lightning::chain::chainmonitor::ChainMonitor;
+use lightning::chain::channelmonitor;
 use lightning::chain::keysinterface::{Sign, KeysInterface};
 use lightning::ln::channelmanager::ChannelManager;
 use lightning::ln::msgs::{ChannelMessageHandler, RoutingMessageHandler};
@@ -39,9 +41,7 @@ use std::ops::Deref;
 /// for unilateral chain closure fees are at risk.
 pub struct BackgroundProcessor {
        stop_thread: Arc<AtomicBool>,
-       /// May be used to retrieve and handle the error if `BackgroundProcessor`'s thread
-       /// exits due to an error while persisting.
-       pub thread_handle: JoinHandle<Result<(), std::io::Error>>,
+       thread_handle: Option<JoinHandle<Result<(), std::io::Error>>>,
 }
 
 #[cfg(not(test))]
@@ -49,6 +49,8 @@ const FRESHNESS_TIMER: u64 = 60;
 #[cfg(test)]
 const FRESHNESS_TIMER: u64 = 1;
 
+const PING_TIMER: u64 = 5;
+
 /// Trait which handles persisting a [`ChannelManager`] to disk.
 ///
 /// [`ChannelManager`]: lightning::ln::channelmanager::ChannelManager
@@ -82,56 +84,67 @@ ChannelManagerPersister<Signer, M, T, K, F, L> for Fun where
 }
 
 impl BackgroundProcessor {
-       /// Start a background thread that takes care of responsibilities enumerated in the top-level
-       /// documentation.
+       /// Start a background thread that takes care of responsibilities enumerated in the [top-level
+       /// documentation].
+       ///
+       /// The thread runs indefinitely unless the object is dropped, [`stop`] is called, or
+       /// `persist_manager` returns an error. In case of an error, the error is retrieved by calling
+       /// either [`join`] or [`stop`].
        ///
-       /// If `persist_manager` returns an error, then this thread will return said error (and
-       /// `start()` will need to be called again to restart the `BackgroundProcessor`). Users should
-       /// wait on [`thread_handle`]'s `join()` method to be able to tell if and when an error is
-       /// returned, or implement `persist_manager` such that an error is never returned to the
-       /// `BackgroundProcessor`
+       /// Typically, users should either implement [`ChannelManagerPersister`] to never return an
+       /// error or call [`join`] and handle any error that may arise. For the latter case, the
+       /// `BackgroundProcessor` must be restarted by calling `start` again after handling the error.
        ///
        /// `persist_manager` is responsible for writing out the [`ChannelManager`] to disk, and/or
        /// uploading to one or more backup services. See [`ChannelManager::write`] for writing out a
        /// [`ChannelManager`]. See [`FilesystemPersister::persist_manager`] for Rust-Lightning's
        /// provided implementation.
        ///
-       /// [`thread_handle`]: BackgroundProcessor::thread_handle
+       /// [top-level documentation]: Self
+       /// [`join`]: Self::join
+       /// [`stop`]: Self::stop
        /// [`ChannelManager`]: lightning::ln::channelmanager::ChannelManager
        /// [`ChannelManager::write`]: lightning::ln::channelmanager::ChannelManager#impl-Writeable
        /// [`FilesystemPersister::persist_manager`]: lightning_persister::FilesystemPersister::persist_manager
        pub fn start<
                Signer: 'static + Sign,
-               M: 'static + Deref + Send + Sync,
+               CF: 'static + Deref + Send + Sync,
+               CW: 'static + Deref + Send + Sync,
                T: 'static + Deref + Send + Sync,
                K: 'static + Deref + Send + Sync,
                F: 'static + Deref + Send + Sync,
                L: 'static + Deref + Send + Sync,
+               P: 'static + Deref + Send + Sync,
                Descriptor: 'static + SocketDescriptor + Send + Sync,
                CMH: 'static + Deref + Send + Sync,
                RMH: 'static + Deref + Send + Sync,
                EH: 'static + EventHandler + Send + Sync,
-               CMP: 'static + Send + ChannelManagerPersister<Signer, M, T, K, F, L>,
-               CM: 'static + Deref<Target = ChannelManager<Signer, M, T, K, F, L>> + Send + Sync,
+               CMP: 'static + Send + ChannelManagerPersister<Signer, CW, T, K, F, L>,
+               M: 'static + Deref<Target = ChainMonitor<Signer, CF, T, F, L, P>> + Send + Sync,
+               CM: 'static + Deref<Target = ChannelManager<Signer, CW, T, K, F, L>> + Send + Sync,
                PM: 'static + Deref<Target = PeerManager<Descriptor, CMH, RMH, L>> + Send + Sync,
        >
-       (persister: CMP, event_handler: EH, channel_manager: CM, peer_manager: PM, logger: L) -> Self
+       (persister: CMP, event_handler: EH, chain_monitor: M, channel_manager: CM, peer_manager: PM, logger: L) -> Self
        where
-               M::Target: 'static + chain::Watch<Signer>,
+               CF::Target: 'static + chain::Filter,
+               CW::Target: 'static + chain::Watch<Signer>,
                T::Target: 'static + BroadcasterInterface,
                K::Target: 'static + KeysInterface<Signer = Signer>,
                F::Target: 'static + FeeEstimator,
                L::Target: 'static + Logger,
+               P::Target: 'static + channelmonitor::Persist<Signer>,
                CMH::Target: 'static + ChannelMessageHandler,
                RMH::Target: 'static + RoutingMessageHandler,
        {
                let stop_thread = Arc::new(AtomicBool::new(false));
                let stop_thread_clone = stop_thread.clone();
                let handle = thread::spawn(move || -> Result<(), std::io::Error> {
-                       let mut current_time = Instant::now();
+                       let mut last_freshness_call = Instant::now();
+                       let mut last_ping_call = Instant::now();
                        loop {
                                peer_manager.process_events();
                                channel_manager.process_pending_events(&event_handler);
+                               chain_monitor.process_pending_events(&event_handler);
                                let updates_available =
                                        channel_manager.await_persistable_update_timeout(Duration::from_millis(100));
                                if updates_available {
@@ -142,34 +155,92 @@ impl BackgroundProcessor {
                                        log_trace!(logger, "Terminating background processor.");
                                        return Ok(());
                                }
-                               if current_time.elapsed().as_secs() > FRESHNESS_TIMER {
-                                       log_trace!(logger, "Calling ChannelManager's and PeerManager's timer_tick_occurred");
+                               if last_freshness_call.elapsed().as_secs() > FRESHNESS_TIMER {
+                                       log_trace!(logger, "Calling ChannelManager's timer_tick_occurred");
                                        channel_manager.timer_tick_occurred();
+                                       last_freshness_call = Instant::now();
+                               }
+                               if last_ping_call.elapsed().as_secs() > PING_TIMER * 2 {
+                                       // On various platforms, we may be starved of CPU cycles for several reasons.
+                                       // E.g. on iOS, if we've been in the background, we will be entirely paused.
+                                       // Similarly, if we're on a desktop platform and the device has been asleep, we
+                                       // may not get any cycles.
+                                       // In any case, if we've been entirely paused for more than double our ping
+                                       // timer, we should have disconnected all sockets by now (and they're probably
+                                       // dead anyway), so disconnect them by calling `timer_tick_occurred()` twice.
+                                       log_trace!(logger, "Awoke after more than double our ping timer, disconnecting peers.");
+                                       peer_manager.timer_tick_occurred();
+                                       peer_manager.timer_tick_occurred();
+                                       last_ping_call = Instant::now();
+                               } else if last_ping_call.elapsed().as_secs() > PING_TIMER {
+                                       log_trace!(logger, "Calling PeerManager's timer_tick_occurred");
                                        peer_manager.timer_tick_occurred();
-                                       current_time = Instant::now();
+                                       last_ping_call = Instant::now();
                                }
                        }
                });
-               Self { stop_thread: stop_thread_clone, thread_handle: handle }
+               Self { stop_thread: stop_thread_clone, thread_handle: Some(handle) }
        }
 
-       /// Stop `BackgroundProcessor`'s thread.
-       pub fn stop(self) -> Result<(), std::io::Error> {
+       /// Join `BackgroundProcessor`'s thread, returning any error that occurred while persisting
+       /// [`ChannelManager`].
+       ///
+       /// # Panics
+       ///
+       /// This function panics if the background thread has panicked such as while persisting or
+       /// handling events.
+       ///
+       /// [`ChannelManager`]: lightning::ln::channelmanager::ChannelManager
+       pub fn join(mut self) -> Result<(), std::io::Error> {
+               assert!(self.thread_handle.is_some());
+               self.join_thread()
+       }
+
+       /// Stop `BackgroundProcessor`'s thread, returning any error that occurred while persisting
+       /// [`ChannelManager`].
+       ///
+       /// # Panics
+       ///
+       /// This function panics if the background thread has panicked such as while persisting or
+       /// handling events.
+       ///
+       /// [`ChannelManager`]: lightning::ln::channelmanager::ChannelManager
+       pub fn stop(mut self) -> Result<(), std::io::Error> {
+               assert!(self.thread_handle.is_some());
+               self.stop_and_join_thread()
+       }
+
+       fn stop_and_join_thread(&mut self) -> Result<(), std::io::Error> {
                self.stop_thread.store(true, Ordering::Release);
-               self.thread_handle.join().unwrap()
+               self.join_thread()
+       }
+
+       fn join_thread(&mut self) -> Result<(), std::io::Error> {
+               match self.thread_handle.take() {
+                       Some(handle) => handle.join().unwrap(),
+                       None => Ok(()),
+               }
+       }
+}
+
+impl Drop for BackgroundProcessor {
+       fn drop(&mut self) {
+               self.stop_and_join_thread().unwrap();
        }
 }
 
 #[cfg(test)]
 mod tests {
+       use bitcoin::blockdata::block::BlockHeader;
        use bitcoin::blockdata::constants::genesis_block;
        use bitcoin::blockdata::transaction::{Transaction, TxOut};
        use bitcoin::network::constants::Network;
-       use lightning::chain::chainmonitor;
+       use lightning::chain::{BestBlock, Confirm, chainmonitor};
+       use lightning::chain::channelmonitor::ANTI_REORG_DELAY;
        use lightning::chain::keysinterface::{InMemorySigner, KeysInterface, KeysManager};
        use lightning::chain::transaction::OutPoint;
        use lightning::get_event_msg;
-       use lightning::ln::channelmanager::{BestBlock, ChainParameters, ChannelManager, SimpleArcChannelManager};
+       use lightning::ln::channelmanager::{BREAKDOWN_TIMEOUT, ChainParameters, ChannelManager, SimpleArcChannelManager};
        use lightning::ln::features::InitFeatures;
        use lightning::ln::msgs::ChannelMessageHandler;
        use lightning::ln::peer_handler::{PeerManager, MessageHandler, SocketDescriptor};
@@ -184,6 +255,8 @@ mod tests {
        use std::time::Duration;
        use super::{BackgroundProcessor, FRESHNESS_TIMER};
 
+       const EVENT_DEADLINE: u64 = 5 * FRESHNESS_TIMER;
+
        #[derive(Clone, Eq, Hash, PartialEq)]
        struct TestDescriptor{}
        impl SocketDescriptor for TestDescriptor {
@@ -199,8 +272,11 @@ mod tests {
        struct Node {
                node: Arc<SimpleArcChannelManager<ChainMonitor, test_utils::TestBroadcaster, test_utils::TestFeeEstimator, test_utils::TestLogger>>,
                peer_manager: Arc<PeerManager<TestDescriptor, Arc<test_utils::TestChannelMessageHandler>, Arc<test_utils::TestRoutingMessageHandler>, Arc<test_utils::TestLogger>>>,
+               chain_monitor: Arc<ChainMonitor>,
                persister: Arc<FilesystemPersister>,
+               tx_broadcaster: Arc<test_utils::TestBroadcaster>,
                logger: Arc<test_utils::TestLogger>,
+               best_block: BestBlock,
        }
 
        impl Drop for Node {
@@ -222,8 +298,8 @@ mod tests {
        fn create_nodes(num_nodes: usize, persist_dir: String) -> Vec<Node> {
                let mut nodes = Vec::new();
                for i in 0..num_nodes {
-                       let tx_broadcaster = Arc::new(test_utils::TestBroadcaster{txn_broadcasted: Mutex::new(Vec::new())});
-                       let fee_estimator = Arc::new(test_utils::TestFeeEstimator { sat_per_kw: 253 });
+                       let tx_broadcaster = Arc::new(test_utils::TestBroadcaster{txn_broadcasted: Mutex::new(Vec::new()), blocks: Arc::new(Mutex::new(Vec::new()))});
+                       let fee_estimator = Arc::new(test_utils::TestFeeEstimator { sat_per_kw: Mutex::new(253) });
                        let chain_source = Arc::new(test_utils::TestChainSource::new(Network::Testnet));
                        let logger = Arc::new(test_utils::TestLogger::with_id(format!("node {}", i)));
                        let persister = Arc::new(FilesystemPersister::new(format!("{}_persister_{}", persist_dir, i)));
@@ -232,14 +308,12 @@ mod tests {
                        let now = Duration::from_secs(genesis_block(network).header.time as u64);
                        let keys_manager = Arc::new(KeysManager::new(&seed, now.as_secs(), now.subsec_nanos()));
                        let chain_monitor = Arc::new(chainmonitor::ChainMonitor::new(Some(chain_source.clone()), tx_broadcaster.clone(), logger.clone(), fee_estimator.clone(), persister.clone()));
-                       let params = ChainParameters {
-                               network,
-                               best_block: BestBlock::from_genesis(network),
-                       };
-                       let manager = Arc::new(ChannelManager::new(fee_estimator.clone(), chain_monitor.clone(), tx_broadcaster, logger.clone(), keys_manager.clone(), UserConfig::default(), params));
+                       let best_block = BestBlock::from_genesis(network);
+                       let params = ChainParameters { network, best_block };
+                       let manager = Arc::new(ChannelManager::new(fee_estimator.clone(), chain_monitor.clone(), tx_broadcaster.clone(), logger.clone(), keys_manager.clone(), UserConfig::default(), params));
                        let msg_handler = MessageHandler { chan_handler: Arc::new(test_utils::TestChannelMessageHandler::new()), route_handler: Arc::new(test_utils::TestRoutingMessageHandler::new() )};
                        let peer_manager = Arc::new(PeerManager::new(msg_handler, keys_manager.get_node_secret(), &seed, logger.clone()));
-                       let node = Node { node: manager, peer_manager, persister, logger };
+                       let node = Node { node: manager, peer_manager, chain_monitor, persister, tx_broadcaster, logger, best_block };
                        nodes.push(node);
                }
                nodes
@@ -289,6 +363,30 @@ mod tests {
                }}
        }
 
+       fn confirm_transaction_depth(node: &mut Node, tx: &Transaction, depth: u32) {
+               for i in 1..=depth {
+                       let prev_blockhash = node.best_block.block_hash();
+                       let height = node.best_block.height() + 1;
+                       let header = BlockHeader { version: 0x20000000, prev_blockhash, merkle_root: Default::default(), time: height, bits: 42, nonce: 42 };
+                       let txdata = vec![(0, tx)];
+                       node.best_block = BestBlock::new(header.block_hash(), height);
+                       match i {
+                               1 => {
+                                       node.node.transactions_confirmed(&header, &txdata, height);
+                                       node.chain_monitor.transactions_confirmed(&header, &txdata, height);
+                               },
+                               x if x == depth => {
+                                       node.node.best_block_updated(&header, height);
+                                       node.chain_monitor.best_block_updated(&header, height);
+                               },
+                               _ => {},
+                       }
+               }
+       }
+       fn confirm_transaction(node: &mut Node, tx: &Transaction) {
+               confirm_transaction_depth(node, tx, ANTI_REORG_DELAY);
+       }
+
        #[test]
        fn test_background_processor() {
                // Test that when a new channel is created, the ChannelManager needs to be re-persisted with
@@ -305,7 +403,7 @@ mod tests {
                let data_dir = nodes[0].persister.get_data_dir();
                let persister = move |node: &ChannelManager<InMemorySigner, Arc<ChainMonitor>, Arc<test_utils::TestBroadcaster>, Arc<KeysManager>, Arc<test_utils::TestFeeEstimator>, Arc<test_utils::TestLogger>>| FilesystemPersister::persist_manager(data_dir.clone(), node);
                let event_handler = |_| {};
-               let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].node.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone());
+               let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone());
 
                macro_rules! check_persisted_data {
                        ($node: expr, $filepath: expr, $expected_bytes: expr) => {
@@ -358,11 +456,13 @@ mod tests {
                let data_dir = nodes[0].persister.get_data_dir();
                let persister = move |node: &ChannelManager<InMemorySigner, Arc<ChainMonitor>, Arc<test_utils::TestBroadcaster>, Arc<KeysManager>, Arc<test_utils::TestFeeEstimator>, Arc<test_utils::TestLogger>>| FilesystemPersister::persist_manager(data_dir.clone(), node);
                let event_handler = |_| {};
-               let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].node.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone());
+               let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone());
                loop {
                        let log_entries = nodes[0].logger.lines.lock().unwrap();
-                       let desired_log = "Calling ChannelManager's and PeerManager's timer_tick_occurred".to_string();
-                       if log_entries.get(&("lightning_background_processor".to_string(), desired_log)).is_some() {
+                       let desired_log = "Calling ChannelManager's timer_tick_occurred".to_string();
+                       let second_desired_log = "Calling PeerManager's timer_tick_occurred".to_string();
+                       if log_entries.get(&("lightning_background_processor".to_string(), desired_log)).is_some() &&
+                                       log_entries.get(&("lightning_background_processor".to_string(), second_desired_log)).is_some() {
                                break
                        }
                }
@@ -378,13 +478,19 @@ mod tests {
 
                let persister = |_: &_| Err(std::io::Error::new(std::io::ErrorKind::Other, "test"));
                let event_handler = |_| {};
-               let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].node.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone());
-               let _ = bg_processor.thread_handle.join().unwrap().expect_err("Errored persisting manager: test");
+               let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone());
+               match bg_processor.join() {
+                       Ok(_) => panic!("Expected error persisting manager"),
+                       Err(e) => {
+                               assert_eq!(e.kind(), std::io::ErrorKind::Other);
+                               assert_eq!(e.get_ref().unwrap().to_string(), "test");
+                       },
+               }
        }
 
        #[test]
        fn test_background_event_handling() {
-               let nodes = create_nodes(2, "test_background_event_handling".to_string());
+               let mut nodes = create_nodes(2, "test_background_event_handling".to_string());
                let channel_value = 100000;
                let data_dir = nodes[0].persister.get_data_dir();
                let persister = move |node: &_| FilesystemPersister::persist_manager(data_dir.clone(), node);
@@ -394,15 +500,43 @@ mod tests {
                let event_handler = move |event| {
                        sender.send(handle_funding_generation_ready!(event, channel_value)).unwrap();
                };
-               let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].node.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone());
+               let bg_processor = BackgroundProcessor::start(persister.clone(), event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone());
 
                // Open a channel and check that the FundingGenerationReady event was handled.
                begin_open_channel!(nodes[0], nodes[1], channel_value);
-               let timeout = Duration::from_secs(5 * FRESHNESS_TIMER);
-               let (temporary_channel_id, tx) = receiver
-                       .recv_timeout(timeout)
+               let (temporary_channel_id, funding_tx) = receiver
+                       .recv_timeout(Duration::from_secs(EVENT_DEADLINE))
                        .expect("FundingGenerationReady not handled within deadline");
-               end_open_channel!(nodes[0], nodes[1], temporary_channel_id, tx);
+               end_open_channel!(nodes[0], nodes[1], temporary_channel_id, funding_tx);
+
+               // Confirm the funding transaction.
+               confirm_transaction(&mut nodes[0], &funding_tx);
+               let as_funding = get_event_msg!(nodes[0], MessageSendEvent::SendFundingLocked, nodes[1].node.get_our_node_id());
+               confirm_transaction(&mut nodes[1], &funding_tx);
+               let bs_funding = get_event_msg!(nodes[1], MessageSendEvent::SendFundingLocked, nodes[0].node.get_our_node_id());
+               nodes[0].node.handle_funding_locked(&nodes[1].node.get_our_node_id(), &bs_funding);
+               let _as_channel_update = get_event_msg!(nodes[0], MessageSendEvent::SendChannelUpdate, nodes[1].node.get_our_node_id());
+               nodes[1].node.handle_funding_locked(&nodes[0].node.get_our_node_id(), &as_funding);
+               let _bs_channel_update = get_event_msg!(nodes[1], MessageSendEvent::SendChannelUpdate, nodes[0].node.get_our_node_id());
+
+               assert!(bg_processor.stop().is_ok());
+
+               // Set up a background event handler for SpendableOutputs events.
+               let (sender, receiver) = std::sync::mpsc::sync_channel(1);
+               let event_handler = move |event| sender.send(event).unwrap();
+               let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].peer_manager.clone(), nodes[0].logger.clone());
+
+               // Force close the channel and check that the SpendableOutputs event was handled.
+               nodes[0].node.force_close_channel(&nodes[0].node.list_channels()[0].channel_id).unwrap();
+               let commitment_tx = nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().pop().unwrap();
+               confirm_transaction_depth(&mut nodes[0], &commitment_tx, BREAKDOWN_TIMEOUT as u32);
+               let event = receiver
+                       .recv_timeout(Duration::from_secs(EVENT_DEADLINE))
+                       .expect("SpendableOutputs not handled within deadline");
+               match event {
+                       Event::SpendableOutputs { .. } => {},
+                       _ => panic!("Unexpected event: {:?}", event),
+               }
 
                assert!(bg_processor.stop().is_ok());
        }