From 38374dde42991ec1829ac07033a313e883c4242e Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Fri, 3 Feb 2023 00:46:50 +0000 Subject: [PATCH] Expect callers to hold read locks before `channel_monitor_updated` Our existing lockorder tests assume that a read lock on a thread that is already holding the same read lock is totally fine. This isn't at all true. The `std` `RwLock` behavior is platform-dependent - on most platforms readers can starve writers as readers will never block for a pending writer. However, on platforms where this is not the case, one thread trying to take a write lock may deadlock with another thread that both already has, and is attempting to take again, a read lock. Worse, our in-tree `FairRwLock` exhibits this behavior explicitly on all platforms to avoid the starvation issue. Sadly, a user ended up hitting this deadlock in production in the form of a call to `get_and_clear_pending_msg_events` which holds the `ChannelManager::total_consistency_lock` before calling `process_pending_monitor_events` and eventually `channel_monitor_updated`, which tries to take the same read lock again. Luckily, the fix is trivial, simply remove the redundand read lock in `channel_monitor_updated`. Fixes #2000 --- lightning/src/ln/channelmanager.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index 6035f5dc0..48c73a5be 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -4176,7 +4176,7 @@ where } fn channel_monitor_updated(&self, funding_txo: &OutPoint, highest_applied_update_id: u64, counterparty_node_id: Option<&PublicKey>) { - let _persistence_guard = PersistenceNotifierGuard::notify_on_drop(&self.total_consistency_lock, &self.persistence_notifier); + debug_assert!(self.total_consistency_lock.try_write().is_err()); // Caller holds read lock let counterparty_node_id = match counterparty_node_id { Some(cp_id) => cp_id.clone(), @@ -5116,6 +5116,8 @@ where /// Process pending events from the `chain::Watch`, returning whether any events were processed. fn process_pending_monitor_events(&self) -> bool { + debug_assert!(self.total_consistency_lock.try_write().is_err()); // Caller holds read lock + let mut failed_channels = Vec::new(); let mut pending_monitor_events = self.chain_monitor.release_pending_monitor_events(); let has_pending_monitor_events = !pending_monitor_events.is_empty(); @@ -5193,7 +5195,13 @@ where /// update events as a separate process method here. #[cfg(fuzzing)] pub fn process_monitor_events(&self) { - self.process_pending_monitor_events(); + PersistenceNotifierGuard::optionally_notify(&self.total_consistency_lock, &self.persistence_notifier, || { + if self.process_pending_monitor_events() { + NotifyOption::DoPersist + } else { + NotifyOption::SkipPersist + } + }); } /// Check the holding cell in each channel and free any pending HTLCs in them if possible. -- 2.39.5