Merge pull request #1238 from TheBlueMatt/2022-01-lockorder-checks
authorMatt Corallo <649246+TheBlueMatt@users.noreply.github.com>
Fri, 14 Jan 2022 16:59:42 +0000 (16:59 +0000)
committerGitHub <noreply@github.com>
Fri, 14 Jan 2022 16:59:42 +0000 (16:59 +0000)
Fix and test lockorder

.github/workflows/build.yml
lightning/Cargo.toml
lightning/src/chain/chainmonitor.rs
lightning/src/debug_sync.rs [new file with mode: 0644]
lightning/src/lib.rs
lightning/src/ln/channelmanager.rs
lightning/src/ln/functional_tests.rs
lightning/src/ln/shutdown_tests.rs

index 002005f1809ef32adbb464a9a5491383512a0dbd..fef042beee6a79f7c2e5812890b698641e093834 100644 (file)
@@ -101,6 +101,10 @@ jobs:
           RUSTFLAGS="-C link-dead-code" cargo build --verbose --color always --features rpc-client
           RUSTFLAGS="-C link-dead-code" cargo build --verbose --color always --features rpc-client,rest-client
           RUSTFLAGS="-C link-dead-code" cargo build --verbose --color always --features rpc-client,rest-client,tokio
+      - name: Test backtrace-debug builds on Rust ${{ matrix.toolchain }}
+        if: "matrix.build-no-std"
+        run: |
+          cd lightning && cargo test --verbose --color always --features backtrace
       - name: Test on Rust ${{ matrix.toolchain }} with net-tokio
         if: "matrix.build-net-tokio && !matrix.coverage"
         run: cargo test --verbose --color always
index 445e92234439ecee2324d8c8e190f2bae59607db..55ee177996c00259cf25842475d4a7e8a6d4bccf 100644 (file)
@@ -39,6 +39,7 @@ secp256k1 = { version = "0.20.2", default-features = false, features = ["alloc"]
 hashbrown = { version = "0.11", optional = true }
 hex = { version = "0.3", optional = true }
 regex = { version = "0.1.80", optional = true }
+backtrace = { version = "0.3", optional = true }
 
 core2 = { version = "0.3.0", optional = true, default-features = false }
 
index 9db666bb03d0ff4e706069d3eba5576258eabb1d..05f8832917603095cfc72693a2f4bd1034bf0092 100644 (file)
@@ -920,9 +920,10 @@ mod tests {
                                merkle_root: Default::default() };
                        nodes[0].chain_monitor.chain_monitor.best_block_updated(&latest_header, nodes[0].best_block_info().1 + LATENCY_GRACE_PERIOD_BLOCKS);
                } else {
-                       for (funding_outpoint, update_ids) in chanmon_cfgs[0].persister.chain_sync_monitor_persistences.lock().unwrap().iter() {
+                       let persistences = chanmon_cfgs[0].persister.chain_sync_monitor_persistences.lock().unwrap().clone();
+                       for (funding_outpoint, update_ids) in persistences {
                                for update_id in update_ids {
-                                       nodes[0].chain_monitor.chain_monitor.channel_monitor_updated(*funding_outpoint, *update_id).unwrap();
+                                       nodes[0].chain_monitor.chain_monitor.channel_monitor_updated(funding_outpoint, update_id).unwrap();
                                }
                        }
                }
diff --git a/lightning/src/debug_sync.rs b/lightning/src/debug_sync.rs
new file mode 100644 (file)
index 0000000..7ee5ee5
--- /dev/null
@@ -0,0 +1,213 @@
+pub use ::alloc::sync::Arc;
+use core::ops::{Deref, DerefMut};
+use core::time::Duration;
+
+use std::collections::HashSet;
+use std::cell::RefCell;
+
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+use std::sync::Mutex as StdMutex;
+use std::sync::MutexGuard as StdMutexGuard;
+use std::sync::RwLock as StdRwLock;
+use std::sync::RwLockReadGuard as StdRwLockReadGuard;
+use std::sync::RwLockWriteGuard as StdRwLockWriteGuard;
+use std::sync::Condvar as StdCondvar;
+
+#[cfg(feature = "backtrace")]
+use backtrace::Backtrace;
+
+pub type LockResult<Guard> = Result<Guard, ()>;
+
+pub struct Condvar {
+       inner: StdCondvar,
+}
+
+impl Condvar {
+       pub fn new() -> Condvar {
+               Condvar { inner: StdCondvar::new() }
+       }
+
+       pub fn wait<'a, T>(&'a self, guard: MutexGuard<'a, T>) -> LockResult<MutexGuard<'a, T>> {
+               let mutex: &'a Mutex<T> = guard.mutex;
+               self.inner.wait(guard.into_inner()).map(|lock| MutexGuard { mutex, lock }).map_err(|_| ())
+       }
+
+       #[allow(unused)]
+       pub fn wait_timeout<'a, T>(&'a self, guard: MutexGuard<'a, T>, dur: Duration) -> LockResult<(MutexGuard<'a, T>, ())> {
+               let mutex = guard.mutex;
+               self.inner.wait_timeout(guard.into_inner(), dur).map(|(lock, _)| (MutexGuard { mutex, lock }, ())).map_err(|_| ())
+       }
+
+       pub fn notify_all(&self) { self.inner.notify_all(); }
+}
+
+thread_local! {
+       /// We track the set of locks currently held by a reference to their `MutexMetadata`
+       static MUTEXES_HELD: RefCell<HashSet<Arc<MutexMetadata>>> = RefCell::new(HashSet::new());
+}
+static MUTEX_IDX: AtomicUsize = AtomicUsize::new(0);
+
+/// Metadata about a single mutex, by id, the set of things locked-before it, and the backtrace of
+/// when the Mutex itself was constructed.
+struct MutexMetadata {
+       mutex_idx: u64,
+       locked_before: StdMutex<HashSet<Arc<MutexMetadata>>>,
+       #[cfg(feature = "backtrace")]
+       mutex_construction_bt: Backtrace,
+}
+impl PartialEq for MutexMetadata {
+       fn eq(&self, o: &MutexMetadata) -> bool { self.mutex_idx == o.mutex_idx }
+}
+impl Eq for MutexMetadata {}
+impl std::hash::Hash for MutexMetadata {
+       fn hash<H: std::hash::Hasher>(&self, hasher: &mut H) { hasher.write_u64(self.mutex_idx); }
+}
+
+pub struct Mutex<T: Sized> {
+       inner: StdMutex<T>,
+       deps: Arc<MutexMetadata>,
+}
+
+#[must_use = "if unused the Mutex will immediately unlock"]
+pub struct MutexGuard<'a, T: Sized + 'a> {
+       mutex: &'a Mutex<T>,
+       lock: StdMutexGuard<'a, T>,
+}
+
+impl<'a, T: Sized> MutexGuard<'a, T> {
+       fn into_inner(self) -> StdMutexGuard<'a, T> {
+               // Somewhat unclear why we cannot move out of self.lock, but doing so gets E0509.
+               unsafe {
+                       let v: StdMutexGuard<'a, T> = std::ptr::read(&self.lock);
+                       std::mem::forget(self);
+                       v
+               }
+       }
+}
+
+impl<T: Sized> Drop for MutexGuard<'_, T> {
+       fn drop(&mut self) {
+               MUTEXES_HELD.with(|held| {
+                       held.borrow_mut().remove(&self.mutex.deps);
+               });
+       }
+}
+
+impl<T: Sized> Deref for MutexGuard<'_, T> {
+       type Target = T;
+
+       fn deref(&self) -> &T {
+               &self.lock.deref()
+       }
+}
+
+impl<T: Sized> DerefMut for MutexGuard<'_, T> {
+       fn deref_mut(&mut self) -> &mut T {
+               self.lock.deref_mut()
+       }
+}
+
+impl<T> Mutex<T> {
+       pub fn new(inner: T) -> Mutex<T> {
+               Mutex {
+                       inner: StdMutex::new(inner),
+                       deps: Arc::new(MutexMetadata {
+                               locked_before: StdMutex::new(HashSet::new()),
+                               mutex_idx: MUTEX_IDX.fetch_add(1, Ordering::Relaxed) as u64,
+                               #[cfg(feature = "backtrace")]
+                               mutex_construction_bt: Backtrace::new(),
+                       }),
+               }
+       }
+
+       pub fn lock<'a>(&'a self) -> LockResult<MutexGuard<'a, T>> {
+               MUTEXES_HELD.with(|held| {
+                       // For each mutex which is currently locked, check that no mutex's locked-before
+                       // set includes the mutex we're about to lock, which would imply a lockorder
+                       // inversion.
+                       for locked in held.borrow().iter() {
+                               for locked_dep in locked.locked_before.lock().unwrap().iter() {
+                                       if *locked_dep == self.deps {
+                                               #[cfg(feature = "backtrace")]
+                                               panic!("Tried to violate existing lockorder.\nMutex that should be locked after the current lock was created at the following backtrace.\nNote that to get a backtrace for the lockorder violation, you should set RUST_BACKTRACE=1\n{:?}", locked.mutex_construction_bt);
+                                               #[cfg(not(feature = "backtrace"))]
+                                               panic!("Tried to violate existing lockorder. Build with the backtrace feature for more info.");
+                                       }
+                               }
+                               // Insert any already-held mutexes in our locked-before set.
+                               self.deps.locked_before.lock().unwrap().insert(Arc::clone(locked));
+                       }
+                       held.borrow_mut().insert(Arc::clone(&self.deps));
+               });
+               self.inner.lock().map(|lock| MutexGuard { mutex: self, lock }).map_err(|_| ())
+       }
+
+       pub fn try_lock<'a>(&'a self) -> LockResult<MutexGuard<'a, T>> {
+               let res = self.inner.try_lock().map(|lock| MutexGuard { mutex: self, lock }).map_err(|_| ());
+               if res.is_ok() {
+                       MUTEXES_HELD.with(|held| {
+                               // Since a try-lock will simply fail if the lock is held already, we do not
+                               // consider try-locks to ever generate lockorder inversions. However, if a try-lock
+                               // succeeds, we do consider it to have created lockorder dependencies.
+                               for locked in held.borrow().iter() {
+                                       self.deps.locked_before.lock().unwrap().insert(Arc::clone(locked));
+                               }
+                               held.borrow_mut().insert(Arc::clone(&self.deps));
+                       });
+               }
+               res
+       }
+}
+
+pub struct RwLock<T: ?Sized> {
+       inner: StdRwLock<T>
+}
+
+pub struct RwLockReadGuard<'a, T: ?Sized + 'a> {
+       lock: StdRwLockReadGuard<'a, T>,
+}
+
+pub struct RwLockWriteGuard<'a, T: ?Sized + 'a> {
+       lock: StdRwLockWriteGuard<'a, T>,
+}
+
+impl<T: ?Sized> Deref for RwLockReadGuard<'_, T> {
+       type Target = T;
+
+       fn deref(&self) -> &T {
+               &self.lock.deref()
+       }
+}
+
+impl<T: ?Sized> Deref for RwLockWriteGuard<'_, T> {
+       type Target = T;
+
+       fn deref(&self) -> &T {
+               &self.lock.deref()
+       }
+}
+
+impl<T: ?Sized> DerefMut for RwLockWriteGuard<'_, T> {
+       fn deref_mut(&mut self) -> &mut T {
+               self.lock.deref_mut()
+       }
+}
+
+impl<T> RwLock<T> {
+       pub fn new(inner: T) -> RwLock<T> {
+               RwLock { inner: StdRwLock::new(inner) }
+       }
+
+       pub fn read<'a>(&'a self) -> LockResult<RwLockReadGuard<'a, T>> {
+               self.inner.read().map(|lock| RwLockReadGuard { lock }).map_err(|_| ())
+       }
+
+       pub fn write<'a>(&'a self) -> LockResult<RwLockWriteGuard<'a, T>> {
+               self.inner.write().map(|lock| RwLockWriteGuard { lock }).map_err(|_| ())
+       }
+
+       pub fn try_write<'a>(&'a self) -> LockResult<RwLockWriteGuard<'a, T>> {
+               self.inner.try_write().map(|lock| RwLockWriteGuard { lock }).map_err(|_| ())
+       }
+}
index 3338803f9a0c771a7ef976a0afba9bd28d6ded7f..ca18d7bb5cc0ba5ae64eadb5310e023d62b634b2 100644 (file)
@@ -143,8 +143,16 @@ mod prelude {
        pub use alloc::string::ToString;
 }
 
+#[cfg(all(feature = "std", test))]
+mod debug_sync;
+#[cfg(all(feature = "backtrace", feature = "std", test))]
+extern crate backtrace;
+
 #[cfg(feature = "std")]
 mod sync {
+       #[cfg(test)]
+       pub use debug_sync::*;
+       #[cfg(not(test))]
        pub use ::std::sync::{Arc, Mutex, Condvar, MutexGuard, RwLock, RwLockReadGuard};
 }
 
index e9af5b1868193094ed2e9c4be9a1b53696c398ff..77e9152d4c1d3ad5af0ff6283345301261d3dcbe 100644 (file)
@@ -3931,12 +3931,12 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
        }
 
        fn finalize_claims(&self, mut sources: Vec<HTLCSource>) {
+               let mut outbounds = self.pending_outbound_payments.lock().unwrap();
                let mut pending_events = self.pending_events.lock().unwrap();
                for source in sources.drain(..) {
                        if let HTLCSource::OutboundRoute { session_priv, payment_id, path, .. } = source {
                                let mut session_priv_bytes = [0; 32];
                                session_priv_bytes.copy_from_slice(&session_priv[..]);
-                               let mut outbounds = self.pending_outbound_payments.lock().unwrap();
                                if let hash_map::Entry::Occupied(mut payment) = outbounds.entry(payment_id) {
                                        assert!(payment.get().is_fulfilled());
                                        if payment.get_mut().remove(&session_priv_bytes, None) {
@@ -5321,8 +5321,8 @@ where
                        inbound_payment.expiry_time > header.time as u64
                });
 
-               let mut pending_events = self.pending_events.lock().unwrap();
                let mut outbounds = self.pending_outbound_payments.lock().unwrap();
+               let mut pending_events = self.pending_events.lock().unwrap();
                outbounds.retain(|payment_id, payment| {
                        if payment.remaining_parts() != 0 { return true }
                        if let PendingOutboundPayment::Retryable { starting_block_height, payment_hash, .. } = payment {
@@ -6151,6 +6151,8 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> Writeable f
                        peer_state.latest_features.write(writer)?;
                }
 
+               let pending_inbound_payments = self.pending_inbound_payments.lock().unwrap();
+               let pending_outbound_payments = self.pending_outbound_payments.lock().unwrap();
                let events = self.pending_events.lock().unwrap();
                (events.len() as u64).write(writer)?;
                for event in events.iter() {
@@ -6172,14 +6174,12 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> Writeable f
                (self.last_node_announcement_serial.load(Ordering::Acquire) as u32).write(writer)?;
                (self.highest_seen_timestamp.load(Ordering::Acquire) as u32).write(writer)?;
 
-               let pending_inbound_payments = self.pending_inbound_payments.lock().unwrap();
                (pending_inbound_payments.len() as u64).write(writer)?;
                for (hash, pending_payment) in pending_inbound_payments.iter() {
                        hash.write(writer)?;
                        pending_payment.write(writer)?;
                }
 
-               let pending_outbound_payments = self.pending_outbound_payments.lock().unwrap();
                // For backwards compat, write the session privs and their total length.
                let mut num_pending_outbounds_compat: u64 = 0;
                for (_, outbound) in pending_outbound_payments.iter() {
index fe67188f280f317345384c1b83e588ed47304606..0993caf4c1d1bd74708ed35c66be20e107f6f78e 100644 (file)
@@ -2481,7 +2481,7 @@ fn revoked_output_claim() {
        mine_transaction(&nodes[1], &revoked_local_txn[0]);
        check_added_monitors!(nodes[1], 1);
        check_closed_event!(nodes[1], 1, ClosureReason::CommitmentTxConfirmed);
-       let node_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap();
+       let node_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().clone();
        assert_eq!(node_txn.len(), 2); // ChannelMonitor: justice tx against revoked to_local output, ChannelManager: local commitment tx
 
        check_spends!(node_txn[0], revoked_local_txn[0]);
@@ -4645,7 +4645,7 @@ fn test_claim_sizeable_push_msat() {
        check_closed_broadcast!(nodes[1], true);
        check_added_monitors!(nodes[1], 1);
        check_closed_event!(nodes[1], 1, ClosureReason::HolderForceClosed);
-       let node_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap();
+       let node_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().clone();
        assert_eq!(node_txn.len(), 1);
        check_spends!(node_txn[0], chan.3);
        assert_eq!(node_txn[0].output.len(), 2); // We can't force trimming of to_remote output as channel_reserve_satoshis block us to do so at channel opening
@@ -4713,7 +4713,7 @@ fn test_claim_on_remote_revoked_sizeable_push_msat() {
        check_added_monitors!(nodes[1], 1);
        check_closed_event!(nodes[1], 1, ClosureReason::CommitmentTxConfirmed);
 
-       let node_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap();
+       let node_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().clone();
        mine_transaction(&nodes[1], &node_txn[0]);
        connect_blocks(&nodes[1], ANTI_REORG_DELAY - 1);
 
@@ -4756,7 +4756,7 @@ fn test_static_spendable_outputs_preimage_tx() {
        }
 
        // Check B's monitor was able to send back output descriptor event for preimage tx on A's commitment tx
-       let node_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap(); // ChannelManager : 2 (local commitment tx + HTLC-Success), ChannelMonitor: preimage tx
+       let node_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().clone(); // ChannelManager : 2 (local commitment tx + HTLC-Success), ChannelMonitor: preimage tx
        assert_eq!(node_txn.len(), 3);
        check_spends!(node_txn[0], commitment_tx[0]);
        assert_eq!(node_txn[0].input[0].witness.last().unwrap().len(), OFFERED_HTLC_SCRIPT_WEIGHT);
@@ -4842,7 +4842,7 @@ fn test_static_spendable_outputs_justice_tx_revoked_commitment_tx() {
        check_added_monitors!(nodes[1], 1);
        check_closed_event!(nodes[1], 1, ClosureReason::CommitmentTxConfirmed);
 
-       let node_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap();
+       let node_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().clone();
        assert_eq!(node_txn.len(), 2);
        assert_eq!(node_txn[0].input.len(), 2);
        check_spends!(node_txn[0], revoked_local_txn[0]);
@@ -4895,7 +4895,7 @@ fn test_static_spendable_outputs_justice_tx_revoked_htlc_timeout_tx() {
        check_added_monitors!(nodes[1], 1);
        check_closed_event!(nodes[1], 1, ClosureReason::CommitmentTxConfirmed);
 
-       let node_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap();
+       let node_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().clone();
        assert_eq!(node_txn.len(), 3); // ChannelMonitor: bogus justice tx, justice tx on revoked outputs, ChannelManager: local commitment tx
        // The first transaction generated is bogus - it spends both outputs of revoked_local_txn[0]
        // including the one already spent by revoked_htlc_txn[1]. That's OK, we'll spend with valid
@@ -4951,7 +4951,7 @@ fn test_static_spendable_outputs_justice_tx_revoked_htlc_success_tx() {
        check_closed_broadcast!(nodes[1], true);
        check_added_monitors!(nodes[1], 1);
        check_closed_event!(nodes[1], 1, ClosureReason::CommitmentTxConfirmed);
-       let revoked_htlc_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap();
+       let revoked_htlc_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().clone();
 
        assert_eq!(revoked_htlc_txn.len(), 2);
        assert_eq!(revoked_htlc_txn[0].input.len(), 1);
@@ -4969,7 +4969,7 @@ fn test_static_spendable_outputs_justice_tx_revoked_htlc_success_tx() {
        check_added_monitors!(nodes[0], 1);
        check_closed_event!(nodes[0], 1, ClosureReason::CommitmentTxConfirmed);
 
-       let node_txn = nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap();
+       let node_txn = nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().clone();
        assert_eq!(node_txn.len(), 3); // ChannelMonitor: justice tx on revoked commitment, justice tx on revoked HTLC-success, ChannelManager: local commitment tx
 
        // The first transaction generated is bogus - it spends both outputs of revoked_local_txn[0]
@@ -5110,7 +5110,7 @@ fn test_onchain_to_onchain_claim() {
        let commitment_tx = get_local_commitment_txn!(nodes[0], chan_1.2);
        mine_transaction(&nodes[1], &commitment_tx[0]);
        check_closed_event!(nodes[1], 1, ClosureReason::CommitmentTxConfirmed);
-       let b_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap();
+       let b_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().clone();
        // ChannelMonitor: HTLC-Success tx, ChannelManager: local commitment tx + HTLC-Success tx
        assert_eq!(b_txn.len(), 3);
        check_spends!(b_txn[1], chan_1.3);
@@ -8685,7 +8685,7 @@ fn do_test_onchain_htlc_settlement_after_close(broadcast_alice: bool, go_onchain
                };
                let header = BlockHeader { version: 0x20000000, prev_blockhash: nodes[1].best_block_hash(), merkle_root: Default::default(), time: 42, bits: 42, nonce: 42};
                connect_block(&nodes[1], &Block { header, txdata: vec![txn_to_broadcast[0].clone()]});
-               let mut bob_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap();
+               let mut bob_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().clone();
                if broadcast_alice {
                        check_closed_broadcast!(nodes[1], true);
                        check_added_monitors!(nodes[1], 1);
index b712212ab75a7208431949765324a63b8e7b32a7..7afc982faabd3b8975646ea8f669172887974340 100644 (file)
@@ -778,7 +778,7 @@ fn do_test_closing_signed_reinit_timeout(timeout_step: TimeoutStep) {
        nodes[1].node.timer_tick_occurred();
        nodes[1].node.timer_tick_occurred();
 
-       let txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap();
+       let txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().clone();
        assert_eq!(txn.len(), 1);
        assert_eq!(txn[0].output.len(), 2);