Merge pull request #2966 from G8XSU/2647-distribute
[rust-lightning] / lightning / src / chain / chainmonitor.rs
index bbbd60d064be1cf8070f3ae633a76192e6ff3a39..e6bb9d90778ce46b4cd7ba5f25eafb52daba455d 100644 (file)
@@ -31,19 +31,20 @@ use crate::chain::{ChannelMonitorUpdateStatus, Filter, WatchedOutput};
 use crate::chain::chaininterface::{BroadcasterInterface, FeeEstimator};
 use crate::chain::channelmonitor::{ChannelMonitor, ChannelMonitorUpdate, Balance, MonitorEvent, TransactionOutputs, WithChannelMonitor};
 use crate::chain::transaction::{OutPoint, TransactionData};
-use crate::ln::ChannelId;
-use crate::sign::ecdsa::WriteableEcdsaChannelSigner;
+use crate::ln::types::ChannelId;
+use crate::sign::ecdsa::EcdsaChannelSigner;
 use crate::events;
 use crate::events::{Event, EventHandler};
 use crate::util::logger::{Logger, WithContext};
 use crate::util::errors::APIError;
 use crate::util::wakers::{Future, Notifier};
-use crate::ln::channelmanager::ChannelDetails;
+use crate::ln::channel_state::ChannelDetails;
 
 use crate::prelude::*;
 use crate::sync::{RwLock, RwLockReadGuard, Mutex, MutexGuard};
 use core::ops::Deref;
 use core::sync::atomic::{AtomicUsize, Ordering};
+use bitcoin::hashes::Hash;
 use bitcoin::secp256k1::PublicKey;
 
 /// `Persist` defines behavior for persisting channel monitors: this could mean
@@ -101,7 +102,7 @@ use bitcoin::secp256k1::PublicKey;
 ///
 /// [`TrustedCommitmentTransaction::revokeable_output_index`]: crate::ln::chan_utils::TrustedCommitmentTransaction::revokeable_output_index
 /// [`TrustedCommitmentTransaction::build_to_local_justice_tx`]: crate::ln::chan_utils::TrustedCommitmentTransaction::build_to_local_justice_tx
-pub trait Persist<ChannelSigner: WriteableEcdsaChannelSigner> {
+pub trait Persist<ChannelSigner: EcdsaChannelSigner> {
        /// Persist a new channel's data in response to a [`chain::Watch::watch_channel`] call. This is
        /// called by [`ChannelManager`] for new channels, or may be called directly, e.g. on startup.
        ///
@@ -163,7 +164,7 @@ pub trait Persist<ChannelSigner: WriteableEcdsaChannelSigner> {
        fn archive_persisted_channel(&self, channel_funding_outpoint: OutPoint);
 }
 
-struct MonitorHolder<ChannelSigner: WriteableEcdsaChannelSigner> {
+struct MonitorHolder<ChannelSigner: EcdsaChannelSigner> {
        monitor: ChannelMonitor<ChannelSigner>,
        /// The full set of pending monitor updates for this Channel.
        ///
@@ -174,7 +175,7 @@ struct MonitorHolder<ChannelSigner: WriteableEcdsaChannelSigner> {
        pending_monitor_updates: Mutex<Vec<u64>>,
 }
 
-impl<ChannelSigner: WriteableEcdsaChannelSigner> MonitorHolder<ChannelSigner> {
+impl<ChannelSigner: EcdsaChannelSigner> MonitorHolder<ChannelSigner> {
        fn has_pending_updates(&self, pending_monitor_updates_lock: &MutexGuard<Vec<u64>>) -> bool {
                !pending_monitor_updates_lock.is_empty()
        }
@@ -184,12 +185,12 @@ impl<ChannelSigner: WriteableEcdsaChannelSigner> MonitorHolder<ChannelSigner> {
 ///
 /// Note that this holds a mutex in [`ChainMonitor`] and may block other events until it is
 /// released.
-pub struct LockedChannelMonitor<'a, ChannelSigner: WriteableEcdsaChannelSigner> {
+pub struct LockedChannelMonitor<'a, ChannelSigner: EcdsaChannelSigner> {
        lock: RwLockReadGuard<'a, HashMap<OutPoint, MonitorHolder<ChannelSigner>>>,
        funding_txo: OutPoint,
 }
 
-impl<ChannelSigner: WriteableEcdsaChannelSigner> Deref for LockedChannelMonitor<'_, ChannelSigner> {
+impl<ChannelSigner: EcdsaChannelSigner> Deref for LockedChannelMonitor<'_, ChannelSigner> {
        type Target = ChannelMonitor<ChannelSigner>;
        fn deref(&self) -> &ChannelMonitor<ChannelSigner> {
                &self.lock.get(&self.funding_txo).expect("Checked at construction").monitor
@@ -212,7 +213,7 @@ impl<ChannelSigner: WriteableEcdsaChannelSigner> Deref for LockedChannelMonitor<
 /// [`ChannelManager`]: crate::ln::channelmanager::ChannelManager
 /// [module-level documentation]: crate::chain::chainmonitor
 /// [`rebroadcast_pending_claims`]: Self::rebroadcast_pending_claims
-pub struct ChainMonitor<ChannelSigner: WriteableEcdsaChannelSigner, C: Deref, T: Deref, F: Deref, L: Deref, P: Deref>
+pub struct ChainMonitor<ChannelSigner: EcdsaChannelSigner, C: Deref, T: Deref, F: Deref, L: Deref, P: Deref>
        where C::Target: chain::Filter,
         T::Target: BroadcasterInterface,
         F::Target: FeeEstimator,
@@ -231,10 +232,12 @@ pub struct ChainMonitor<ChannelSigner: WriteableEcdsaChannelSigner, C: Deref, T:
        /// The best block height seen, used as a proxy for the passage of time.
        highest_chain_height: AtomicUsize,
 
+       /// A [`Notifier`] used to wake up the background processor in case we have any [`Event`]s for
+       /// it to give to users (or [`MonitorEvent`]s for `ChannelManager` to process).
        event_notifier: Notifier,
 }
 
-impl<ChannelSigner: WriteableEcdsaChannelSigner, C: Deref, T: Deref, F: Deref, L: Deref, P: Deref> ChainMonitor<ChannelSigner, C, T, F, L, P>
+impl<ChannelSigner: EcdsaChannelSigner, C: Deref, T: Deref, F: Deref, L: Deref, P: Deref> ChainMonitor<ChannelSigner, C, T, F, L, P>
 where C::Target: chain::Filter,
            T::Target: BroadcasterInterface,
            F::Target: FeeEstimator,
@@ -258,10 +261,11 @@ where C::Target: chain::Filter,
        {
                let err_str = "ChannelMonitor[Update] persistence failed unrecoverably. This indicates we cannot continue normal operation and must shut down.";
                let funding_outpoints = hash_set_from_iter(self.monitors.read().unwrap().keys().cloned());
+               let channel_count = funding_outpoints.len();
                for funding_outpoint in funding_outpoints.iter() {
                        let monitor_lock = self.monitors.read().unwrap();
                        if let Some(monitor_state) = monitor_lock.get(funding_outpoint) {
-                               if self.update_monitor_with_chain_data(header, txdata, &process, funding_outpoint, &monitor_state).is_err() {
+                               if self.update_monitor_with_chain_data(header, best_height, txdata, &process, funding_outpoint, &monitor_state, channel_count).is_err() {
                                        // Take the monitors lock for writing so that we poison it and any future
                                        // operations going forward fail immediately.
                                        core::mem::drop(monitor_lock);
@@ -276,7 +280,7 @@ where C::Target: chain::Filter,
                let monitor_states = self.monitors.write().unwrap();
                for (funding_outpoint, monitor_state) in monitor_states.iter() {
                        if !funding_outpoints.contains(funding_outpoint) {
-                               if self.update_monitor_with_chain_data(header, txdata, &process, funding_outpoint, &monitor_state).is_err() {
+                               if self.update_monitor_with_chain_data(header, best_height, txdata, &process, funding_outpoint, &monitor_state, channel_count).is_err() {
                                        log_error!(self.logger, "{}", err_str);
                                        panic!("{}", err_str);
                                }
@@ -295,14 +299,29 @@ where C::Target: chain::Filter,
        }
 
        fn update_monitor_with_chain_data<FN>(
-               &self, header: &Header, txdata: &TransactionData, process: FN, funding_outpoint: &OutPoint,
-               monitor_state: &MonitorHolder<ChannelSigner>
+               &self, header: &Header, best_height: Option<u32>, txdata: &TransactionData, process: FN, funding_outpoint: &OutPoint,
+               monitor_state: &MonitorHolder<ChannelSigner>, channel_count: usize,
        ) -> Result<(), ()> where FN: Fn(&ChannelMonitor<ChannelSigner>, &TransactionData) -> Vec<TransactionOutputs> {
                let monitor = &monitor_state.monitor;
-               let logger = WithChannelMonitor::from(&self.logger, &monitor);
-               let mut txn_outputs;
-               {
-                       txn_outputs = process(monitor, txdata);
+               let logger = WithChannelMonitor::from(&self.logger, &monitor, None);
+
+               let mut txn_outputs = process(monitor, txdata);
+
+               let get_partition_key = |funding_outpoint: &OutPoint| {
+                       let funding_txid_hash = funding_outpoint.txid.to_raw_hash();
+                       let funding_txid_hash_bytes = funding_txid_hash.as_byte_array();
+                       let funding_txid_u32 = u32::from_be_bytes([funding_txid_hash_bytes[0], funding_txid_hash_bytes[1], funding_txid_hash_bytes[2], funding_txid_hash_bytes[3]]);
+                       funding_txid_u32.wrapping_add(best_height.unwrap_or_default())
+               };
+
+               let partition_factor = if channel_count < 15 {
+                       5
+               } else {
+                       50 // ~ 8hours
+               };
+
+               let has_pending_claims = monitor_state.monitor.has_pending_claims();
+               if has_pending_claims || get_partition_key(funding_outpoint) % partition_factor == 0 {
                        log_trace!(logger, "Syncing Channel Monitor for channel {}", log_funding_info!(monitor));
                        match self.persister.update_persisted_channel(*funding_outpoint, None, monitor) {
                                ChannelMonitorUpdateStatus::Completed =>
@@ -311,10 +330,10 @@ where C::Target: chain::Filter,
                                        ),
                                ChannelMonitorUpdateStatus::InProgress => {
                                        log_trace!(logger, "Channel Monitor sync for channel {} in progress.", log_funding_info!(monitor));
-                               },
+                               }
                                ChannelMonitorUpdateStatus::UnrecoverableError => {
                                        return Err(());
-                               },
+                               }
                        }
                }
 
@@ -597,7 +616,7 @@ where C::Target: chain::Filter,
        pub fn archive_fully_resolved_channel_monitors(&self) {
                let mut have_monitors_to_prune = false;
                for (_, monitor_holder) in self.monitors.read().unwrap().iter() {
-                       let logger = WithChannelMonitor::from(&self.logger, &monitor_holder.monitor);
+                       let logger = WithChannelMonitor::from(&self.logger, &monitor_holder.monitor, None);
                        if monitor_holder.monitor.is_fully_resolved(&logger) {
                                have_monitors_to_prune = true;
                        }
@@ -605,7 +624,7 @@ where C::Target: chain::Filter,
                if have_monitors_to_prune {
                        let mut monitors = self.monitors.write().unwrap();
                        monitors.retain(|funding_txo, monitor_holder| {
-                               let logger = WithChannelMonitor::from(&self.logger, &monitor_holder.monitor);
+                               let logger = WithChannelMonitor::from(&self.logger, &monitor_holder.monitor, None);
                                if monitor_holder.monitor.is_fully_resolved(&logger) {
                                        log_info!(logger,
                                                "Archiving fully resolved ChannelMonitor for funding txo {}",
@@ -621,7 +640,7 @@ where C::Target: chain::Filter,
        }
 }
 
-impl<ChannelSigner: WriteableEcdsaChannelSigner, C: Deref, T: Deref, F: Deref, L: Deref, P: Deref>
+impl<ChannelSigner: EcdsaChannelSigner, C: Deref, T: Deref, F: Deref, L: Deref, P: Deref>
 chain::Listen for ChainMonitor<ChannelSigner, C, T, F, L, P>
 where
        C::Target: chain::Filter,
@@ -636,6 +655,8 @@ where
                        monitor.block_connected(
                                header, txdata, height, &*self.broadcaster, &*self.fee_estimator, &self.logger)
                });
+               // Assume we may have some new events and wake the event processor
+               self.event_notifier.notify();
        }
 
        fn block_disconnected(&self, header: &Header, height: u32) {
@@ -648,7 +669,7 @@ where
        }
 }
 
-impl<ChannelSigner: WriteableEcdsaChannelSigner, C: Deref, T: Deref, F: Deref, L: Deref, P: Deref>
+impl<ChannelSigner: EcdsaChannelSigner, C: Deref, T: Deref, F: Deref, L: Deref, P: Deref>
 chain::Confirm for ChainMonitor<ChannelSigner, C, T, F, L, P>
 where
        C::Target: chain::Filter,
@@ -663,6 +684,8 @@ where
                        monitor.transactions_confirmed(
                                header, txdata, height, &*self.broadcaster, &*self.fee_estimator, &self.logger)
                });
+               // Assume we may have some new events and wake the event processor
+               self.event_notifier.notify();
        }
 
        fn transaction_unconfirmed(&self, txid: &Txid) {
@@ -683,6 +706,8 @@ where
                                header, height, &*self.broadcaster, &*self.fee_estimator, &self.logger
                        )
                });
+               // Assume we may have some new events and wake the event processor
+               self.event_notifier.notify();
        }
 
        fn get_relevant_txids(&self) -> Vec<(Txid, u32, Option<BlockHash>)> {
@@ -698,7 +723,7 @@ where
        }
 }
 
-impl<ChannelSigner: WriteableEcdsaChannelSigner, C: Deref , T: Deref , F: Deref , L: Deref , P: Deref >
+impl<ChannelSigner: EcdsaChannelSigner, C: Deref , T: Deref , F: Deref , L: Deref , P: Deref >
 chain::Watch<ChannelSigner> for ChainMonitor<ChannelSigner, C, T, F, L, P>
 where C::Target: chain::Filter,
            T::Target: BroadcasterInterface,
@@ -707,7 +732,7 @@ where C::Target: chain::Filter,
            P::Target: Persist<ChannelSigner>,
 {
        fn watch_channel(&self, funding_outpoint: OutPoint, monitor: ChannelMonitor<ChannelSigner>) -> Result<ChannelMonitorUpdateStatus, ()> {
-               let logger = WithChannelMonitor::from(&self.logger, &monitor);
+               let logger = WithChannelMonitor::from(&self.logger, &monitor, None);
                let mut monitors = self.monitors.write().unwrap();
                let entry = match monitors.entry(funding_outpoint) {
                        hash_map::Entry::Occupied(_) => {
@@ -752,7 +777,7 @@ where C::Target: chain::Filter,
                let monitors = self.monitors.read().unwrap();
                match monitors.get(&funding_txo) {
                        None => {
-                               let logger = WithContext::from(&self.logger, update.counterparty_node_id, Some(channel_id));
+                               let logger = WithContext::from(&self.logger, update.counterparty_node_id, Some(channel_id), None);
                                log_error!(logger, "Failed to update channel monitor: no such monitor registered");
 
                                // We should never ever trigger this from within ChannelManager. Technically a
@@ -765,7 +790,7 @@ where C::Target: chain::Filter,
                        },
                        Some(monitor_state) => {
                                let monitor = &monitor_state.monitor;
-                               let logger = WithChannelMonitor::from(&self.logger, &monitor);
+                               let logger = WithChannelMonitor::from(&self.logger, &monitor, None);
                                log_trace!(logger, "Updating ChannelMonitor to id {} for channel {}", update.update_id, log_funding_info!(monitor));
                                let update_res = monitor.update_monitor(update, &self.broadcaster, &self.fee_estimator, &self.logger);
 
@@ -833,7 +858,7 @@ where C::Target: chain::Filter,
        }
 }
 
-impl<ChannelSigner: WriteableEcdsaChannelSigner, C: Deref, T: Deref, F: Deref, L: Deref, P: Deref> events::EventsProvider for ChainMonitor<ChannelSigner, C, T, F, L, P>
+impl<ChannelSigner: EcdsaChannelSigner, C: Deref, T: Deref, F: Deref, L: Deref, P: Deref> events::EventsProvider for ChainMonitor<ChannelSigner, C, T, F, L, P>
        where C::Target: chain::Filter,
              T::Target: BroadcasterInterface,
              F::Target: FeeEstimator,
@@ -862,14 +887,17 @@ impl<ChannelSigner: WriteableEcdsaChannelSigner, C: Deref, T: Deref, F: Deref, L
 
 #[cfg(test)]
 mod tests {
-       use crate::check_added_monitors;
+       use crate::{check_added_monitors, check_closed_event};
        use crate::{expect_payment_path_successful, get_event_msg};
        use crate::{get_htlc_update_msgs, get_revoke_commit_msgs};
        use crate::chain::{ChannelMonitorUpdateStatus, Watch};
-       use crate::events::{Event, MessageSendEvent, MessageSendEventsProvider};
+       use crate::chain::channelmonitor::ANTI_REORG_DELAY;
+       use crate::events::{ClosureReason, Event, MessageSendEvent, MessageSendEventsProvider};
        use crate::ln::functional_test_utils::*;
        use crate::ln::msgs::ChannelMessageHandler;
 
+       const CHAINSYNC_MONITOR_PARTITION_FACTOR: u32 = 5;
+
        #[test]
        fn test_async_ooo_offchain_updates() {
                // Test that if we have multiple offchain updates being persisted and they complete
@@ -975,6 +1003,79 @@ mod tests {
                check_added_monitors!(nodes[0], 1);
        }
 
+       #[test]
+       fn test_chainsync_triggers_distributed_monitor_persistence() {
+               let chanmon_cfgs = create_chanmon_cfgs(3);
+               let node_cfgs = create_node_cfgs(3, &chanmon_cfgs);
+               let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &[None, None, None]);
+               let nodes = create_network(3, &node_cfgs, &node_chanmgrs);
+
+               // Use FullBlockViaListen to avoid duplicate calls to process_chain_data and skips_blocks() in
+               // case of other connect_styles.
+               *nodes[0].connect_style.borrow_mut() = ConnectStyle::FullBlockViaListen;
+               *nodes[1].connect_style.borrow_mut() = ConnectStyle::FullBlockViaListen;
+               *nodes[2].connect_style.borrow_mut() = ConnectStyle::FullBlockViaListen;
+
+               let _channel_1 = create_announced_chan_between_nodes(&nodes, 0, 1).2;
+               let channel_2 = create_announced_chan_between_nodes_with_value(&nodes, 0, 2, 1_000_000, 0).2;
+
+               chanmon_cfgs[0].persister.chain_sync_monitor_persistences.lock().unwrap().clear();
+               chanmon_cfgs[1].persister.chain_sync_monitor_persistences.lock().unwrap().clear();
+               chanmon_cfgs[2].persister.chain_sync_monitor_persistences.lock().unwrap().clear();
+
+               connect_blocks(&nodes[0], CHAINSYNC_MONITOR_PARTITION_FACTOR * 2);
+               connect_blocks(&nodes[1], CHAINSYNC_MONITOR_PARTITION_FACTOR * 2);
+               connect_blocks(&nodes[2], CHAINSYNC_MONITOR_PARTITION_FACTOR * 2);
+
+               // Connecting [`DEFAULT_CHAINSYNC_PARTITION_FACTOR`] * 2 blocks should trigger only 2 writes
+               // per monitor/channel.
+               assert_eq!(2 * 2, chanmon_cfgs[0].persister.chain_sync_monitor_persistences.lock().unwrap().len());
+               assert_eq!(2, chanmon_cfgs[1].persister.chain_sync_monitor_persistences.lock().unwrap().len());
+               assert_eq!(2, chanmon_cfgs[2].persister.chain_sync_monitor_persistences.lock().unwrap().len());
+
+               // Test that monitors with pending_claims are persisted on every block.
+               // Now, close channel_2 i.e. b/w node-0 and node-2 to create pending_claim in node[0].
+               nodes[0].node.force_close_broadcasting_latest_txn(&channel_2, &nodes[2].node.get_our_node_id(), "Channel force-closed".to_string()).unwrap();
+               check_closed_event!(&nodes[0], 1, ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true) }, false,
+                       [nodes[2].node.get_our_node_id()], 1000000);
+               check_closed_broadcast(&nodes[0], 1, true);
+               let close_tx = nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
+               assert_eq!(close_tx.len(), 1);
+
+               mine_transaction(&nodes[2], &close_tx[0]);
+               check_added_monitors(&nodes[2], 1);
+               check_closed_broadcast(&nodes[2], 1, true);
+               check_closed_event!(&nodes[2], 1, ClosureReason::CommitmentTxConfirmed, false,
+                       [nodes[0].node.get_our_node_id()], 1000000);
+
+               chanmon_cfgs[0].persister.chain_sync_monitor_persistences.lock().unwrap().clear();
+               chanmon_cfgs[2].persister.chain_sync_monitor_persistences.lock().unwrap().clear();
+
+               // For channel_2, there should be a monitor write for every block connection.
+               // We connect [`DEFAULT_CHAINSYNC_MONITOR_PARTITION_FACTOR`] blocks since we don't know when
+               // channel_1 monitor persistence will occur, with [`DEFAULT_CHAINSYNC_MONITOR_PARTITION_FACTOR`]
+               // it will be persisted exactly once.
+               connect_blocks(&nodes[0], CHAINSYNC_MONITOR_PARTITION_FACTOR);
+               connect_blocks(&nodes[2], CHAINSYNC_MONITOR_PARTITION_FACTOR);
+
+               // DEFAULT_CHAINSYNC_MONITOR_PARTITION_FACTOR writes for channel_2 due to pending_claim, 1 for
+               // channel_1
+               assert_eq!((CHAINSYNC_MONITOR_PARTITION_FACTOR + 1) as usize, chanmon_cfgs[0].persister.chain_sync_monitor_persistences.lock().unwrap().len());
+               // For node[2], there is no pending_claim
+               assert_eq!(1, chanmon_cfgs[2].persister.chain_sync_monitor_persistences.lock().unwrap().len());
+
+               // Confirm claim for node[0] with ANTI_REORG_DELAY and reset monitor write counter.
+               mine_transaction(&nodes[0], &close_tx[0]);
+               connect_blocks(&nodes[0], ANTI_REORG_DELAY - 1);
+               check_added_monitors(&nodes[0], 1);
+               chanmon_cfgs[0].persister.chain_sync_monitor_persistences.lock().unwrap().clear();
+
+               // Again connect 1 full cycle of DEFAULT_CHAINSYNC_MONITOR_PARTITION_FACTOR blocks, it should only
+               // result in 1 write per monitor/channel.
+               connect_blocks(&nodes[0], CHAINSYNC_MONITOR_PARTITION_FACTOR);
+               assert_eq!(2, chanmon_cfgs[0].persister.chain_sync_monitor_persistences.lock().unwrap().len());
+       }
+
        #[test]
        #[cfg(feature = "std")]
        fn update_during_chainsync_poisons_channel() {
@@ -983,13 +1084,15 @@ mod tests {
                let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
                let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
                create_announced_chan_between_nodes(&nodes, 0, 1);
+               *nodes[0].connect_style.borrow_mut() = ConnectStyle::FullBlockViaListen;
 
-               chanmon_cfgs[0].persister.chain_sync_monitor_persistences.lock().unwrap().clear();
                chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::UnrecoverableError);
 
                assert!(std::panic::catch_unwind(|| {
                        // Returning an UnrecoverableError should always panic immediately
-                       connect_blocks(&nodes[0], 1);
+                       // Connecting [`DEFAULT_CHAINSYNC_PARTITION_FACTOR`] blocks so that we trigger some persistence
+                       // after accounting for block-height based partitioning/distribution.
+                       connect_blocks(&nodes[0], CHAINSYNC_MONITOR_PARTITION_FACTOR);
                }).is_err());
                assert!(std::panic::catch_unwind(|| {
                        // ...and also poison our locks causing later use to panic as well