From cb86399f2e61afb21e935d4084bc7fc0a6bf5afa Mon Sep 17 00:00:00 2001 From: Gursharan Singh <3442979+G8XSU@users.noreply.github.com> Date: Thu, 7 Mar 2024 11:00:41 -0800 Subject: [PATCH] Don't pause events for chainsync persistence We used to wait on ChannelMonitor persistence to avoid duplicate payment events. But this can still happen in cases where ChannelMonitor handed the event to ChannelManager and we did not persist ChannelManager after event handling. It is expected to receive payment duplicate events and clients should handle these events in an idempotent manner. Removing this hold-up of events simplifies the logic and makes it easier to not persist ChannelMonitors on every block connect. --- lightning/src/chain/chainmonitor.rs | 118 ++++------------------------ lightning/src/ln/payment_tests.rs | 49 +++++------- 2 files changed, 32 insertions(+), 135 deletions(-) diff --git a/lightning/src/chain/chainmonitor.rs b/lightning/src/chain/chainmonitor.rs index 81ba30ffb..278d882dd 100644 --- a/lightning/src/chain/chainmonitor.rs +++ b/lightning/src/chain/chainmonitor.rs @@ -29,7 +29,7 @@ use bitcoin::hash_types::{Txid, BlockHash}; use crate::chain; use crate::chain::{ChannelMonitorUpdateStatus, Filter, WatchedOutput}; use crate::chain::chaininterface::{BroadcasterInterface, FeeEstimator}; -use crate::chain::channelmonitor::{ChannelMonitor, ChannelMonitorUpdate, Balance, MonitorEvent, TransactionOutputs, WithChannelMonitor, LATENCY_GRACE_PERIOD_BLOCKS}; +use crate::chain::channelmonitor::{ChannelMonitor, ChannelMonitorUpdate, Balance, MonitorEvent, TransactionOutputs, WithChannelMonitor}; use crate::chain::transaction::{OutPoint, TransactionData}; use crate::ln::ChannelId; use crate::sign::ecdsa::WriteableEcdsaChannelSigner; @@ -209,17 +209,6 @@ struct MonitorHolder { /// update_persisted_channel, the user returns a /// [`ChannelMonitorUpdateStatus::InProgress`], and then calls channel_monitor_updated /// immediately, racing our insertion of the pending update into the contained Vec. - /// - /// Beyond the synchronization of updates themselves, we cannot handle user events until after - /// any chain updates have been stored on disk. Thus, we scan this list when returning updates - /// to the ChannelManager, refusing to return any updates for a ChannelMonitor which is still - /// being persisted fully to disk after a chain update. - /// - /// This avoids the possibility of handling, e.g. an on-chain claim, generating a claim monitor - /// event, resulting in the relevant ChannelManager generating a PaymentSent event and dropping - /// the pending payment entry, and then reloading before the monitor is persisted, resulting in - /// the ChannelManager re-adding the same payment entry, before the same block is replayed, - /// resulting in a duplicate PaymentSent event. pending_monitor_updates: Mutex>, /// The last block height at which no [`UpdateOrigin::ChainSync`] monitor updates were present /// in `pending_monitor_updates`. @@ -227,6 +216,8 @@ struct MonitorHolder { /// sync event, we let monitor events return to `ChannelManager` because we cannot hold them up /// forever or we'll end up with HTLC preimages waiting to feed back into an upstream channel /// forever, risking funds loss. + /// + /// [`LATENCY_GRACE_PERIOD_BLOCKS`]: crate::util::ser::Writeable::write last_chain_persist_height: AtomicUsize, } @@ -393,7 +384,7 @@ where C::Target: chain::Filter, chain_sync_update_id ), ChannelMonitorUpdateStatus::InProgress => { - log_debug!(logger, "Channel Monitor sync for channel {} in progress, holding events until completion!", log_funding_info!(monitor)); + log_debug!(logger, "Channel Monitor sync for channel {} in progress.", log_funding_info!(monitor)); pending_monitor_updates.push(update_id); }, ChannelMonitorUpdateStatus::UnrecoverableError => { @@ -924,21 +915,12 @@ where C::Target: chain::Filter, fn release_pending_monitor_events(&self) -> Vec<(OutPoint, ChannelId, Vec, Option)> { let mut pending_monitor_events = self.pending_monitor_events.lock().unwrap().split_off(0); for monitor_state in self.monitors.read().unwrap().values() { - let logger = WithChannelMonitor::from(&self.logger, &monitor_state.monitor); - let is_pending_monitor_update = monitor_state.has_pending_chainsync_updates(&monitor_state.pending_monitor_updates.lock().unwrap()); - if !is_pending_monitor_update || monitor_state.last_chain_persist_height.load(Ordering::Acquire) + LATENCY_GRACE_PERIOD_BLOCKS as usize <= self.highest_chain_height.load(Ordering::Acquire) { - if is_pending_monitor_update { - log_error!(logger, "A ChannelMonitor sync took longer than {} blocks to complete.", LATENCY_GRACE_PERIOD_BLOCKS); - log_error!(logger, " To avoid funds-loss, we are allowing monitor updates to be released."); - log_error!(logger, " This may cause duplicate payment events to be generated."); - } - let monitor_events = monitor_state.monitor.get_and_clear_pending_monitor_events(); - if monitor_events.len() > 0 { - let monitor_outpoint = monitor_state.monitor.get_funding_txo().0; - let monitor_channel_id = monitor_state.monitor.channel_id(); - let counterparty_node_id = monitor_state.monitor.get_counterparty_node_id(); - pending_monitor_events.push((monitor_outpoint, monitor_channel_id, monitor_events, counterparty_node_id)); - } + let monitor_events = monitor_state.monitor.get_and_clear_pending_monitor_events(); + if monitor_events.len() > 0 { + let monitor_outpoint = monitor_state.monitor.get_funding_txo().0; + let monitor_channel_id = monitor_state.monitor.channel_id(); + let counterparty_node_id = monitor_state.monitor.get_counterparty_node_id(); + pending_monitor_events.push((monitor_outpoint, monitor_channel_id, monitor_events, counterparty_node_id)); } } pending_monitor_events @@ -975,15 +957,12 @@ impl = chanmon_cfgs[0].persister.chain_sync_monitor_persistences.lock().unwrap() - .get_mut(&funding_txo).unwrap().drain().collect(); - // If we are using chain::Confirm instead of chain::Listen, we will get the same update twice. - // If we're testing connection idempotency we may get substantially more. - assert!(mon_updates.len() >= 1); - assert!(nodes[0].chain_monitor.release_pending_monitor_events().is_empty()); - assert!(nodes[0].node.get_and_clear_pending_events().is_empty()); + // Note that we skip persisting ChannelMonitors. We should still be generating the payment sent + // event without ChannelMonitor persistence. If we reset to a previous state on reload, the block + // should be replayed and we'll regenerate the event. // If we persist the ChannelManager here, we should get the PaymentSent event after // deserialization. @@ -1128,13 +1121,7 @@ fn do_test_dup_htlc_onchain_fails_on_reload(persist_manager_post_event: bool, co chan_manager_serialized = nodes[0].node.encode(); } - // Now persist the ChannelMonitor and inform the ChainMonitor that we're done, generating the - // payment sent event. - chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::Completed); let chan_0_monitor_serialized = get_monitor!(nodes[0], chan_id).encode(); - for update in mon_updates { - nodes[0].chain_monitor.chain_monitor.channel_monitor_updated(funding_txo, update).unwrap(); - } if payment_timeout { expect_payment_failed!(nodes[0], payment_hash, false); } else { @@ -1168,13 +1155,13 @@ fn do_test_dup_htlc_onchain_fails_on_reload(persist_manager_post_event: bool, co } #[test] -fn test_dup_htlc_onchain_fails_on_reload() { - do_test_dup_htlc_onchain_fails_on_reload(true, true, true); - do_test_dup_htlc_onchain_fails_on_reload(true, true, false); - do_test_dup_htlc_onchain_fails_on_reload(true, false, false); - do_test_dup_htlc_onchain_fails_on_reload(false, true, true); - do_test_dup_htlc_onchain_fails_on_reload(false, true, false); - do_test_dup_htlc_onchain_fails_on_reload(false, false, false); +fn test_dup_htlc_onchain_doesnt_fail_on_reload() { + do_test_dup_htlc_onchain_doesnt_fail_on_reload(true, true, true); + do_test_dup_htlc_onchain_doesnt_fail_on_reload(true, true, false); + do_test_dup_htlc_onchain_doesnt_fail_on_reload(true, false, false); + do_test_dup_htlc_onchain_doesnt_fail_on_reload(false, true, true); + do_test_dup_htlc_onchain_doesnt_fail_on_reload(false, true, false); + do_test_dup_htlc_onchain_doesnt_fail_on_reload(false, false, false); } #[test] -- 2.39.5