Merge pull request #2009 from TheBlueMatt/2023-02-no-racey-retries
[rust-lightning] / lightning / src / ln / channelmanager.rs
index e3a67d75c9085fefd8e42a17160bb3e128ed298e..d47fbe0babd17d98d1674c58377d7bf3942bbeb8 100644 (file)
@@ -70,7 +70,7 @@ use crate::prelude::*;
 use core::{cmp, mem};
 use core::cell::RefCell;
 use crate::io::Read;
-use crate::sync::{Arc, Mutex, RwLock, RwLockReadGuard, FairRwLock};
+use crate::sync::{Arc, Mutex, RwLock, RwLockReadGuard, FairRwLock, LockTestExt, LockHeldState};
 use core::sync::atomic::{AtomicUsize, Ordering};
 use core::time::Duration;
 use core::ops::Deref;
@@ -493,6 +493,18 @@ pub(super) struct PeerState<Signer: ChannelSigner> {
        is_connected: bool,
 }
 
+impl <Signer: ChannelSigner> PeerState<Signer> {
+       /// Indicates that a peer meets the criteria where we're ok to remove it from our storage.
+       /// If true is passed for `require_disconnected`, the function will return false if we haven't
+       /// disconnected from the node already, ie. `PeerState::is_connected` is set to `true`.
+       fn ok_to_remove(&self, require_disconnected: bool) -> bool {
+               if require_disconnected && self.is_connected {
+                       return false
+               }
+               self.channel_by_id.len() == 0
+       }
+}
+
 /// Stores a PaymentSecret and any other data we may need to validate an inbound payment is
 /// actually ours and not some duplicate HTLC sent to us by a node along the route.
 ///
@@ -1178,9 +1190,9 @@ pub enum RecentPaymentDetails {
                /// made before LDK version 0.0.104.
                payment_hash: Option<PaymentHash>,
        },
-       /// After a payment is explicitly abandoned by calling [`ChannelManager::abandon_payment`], it
-       /// is marked as abandoned until an [`Event::PaymentFailed`] is generated. A payment could also
-       /// be marked as abandoned if pathfinding fails repeatedly or retries have been exhausted.
+       /// After a payment's retries are exhausted per the provided [`Retry`], or it is explicitly
+       /// abandoned via [`ChannelManager::abandon_payment`], it is marked as abandoned until all
+       /// pending HTLCs for this payment resolve and an [`Event::PaymentFailed`] is generated.
        Abandoned {
                /// Hash of the payment that we have given up trying to send.
                payment_hash: PaymentHash,
@@ -1206,13 +1218,10 @@ macro_rules! handle_error {
                match $internal {
                        Ok(msg) => Ok(msg),
                        Err(MsgHandleErrInternal { err, chan_id, shutdown_finish }) => {
-                               #[cfg(any(feature = "_test_utils", test))]
-                               {
-                                       // In testing, ensure there are no deadlocks where the lock is already held upon
-                                       // entering the macro.
-                                       debug_assert!($self.pending_events.try_lock().is_ok());
-                                       debug_assert!($self.per_peer_state.try_write().is_ok());
-                               }
+                               // In testing, ensure there are no deadlocks where the lock is already held upon
+                               // entering the macro.
+                               debug_assert_ne!($self.pending_events.held_by_thread(), LockHeldState::HeldByThread);
+                               debug_assert_ne!($self.per_peer_state.held_by_thread(), LockHeldState::HeldByThread);
 
                                let mut msg_events = Vec::with_capacity(2);
 
@@ -1706,7 +1715,7 @@ where
        ///
        /// This can be useful for payments that may have been prepared, but ultimately not sent, as a
        /// result of a crash. If such a payment exists, is not listed here, and an
-       /// [`Event::PaymentSent`] has not been received, you may consider retrying the payment.
+       /// [`Event::PaymentSent`] has not been received, you may consider resending the payment.
        ///
        /// [`Event::PaymentSent`]: events::Event::PaymentSent
        pub fn list_recent_payments(&self) -> Vec<RecentPaymentDetails> {
@@ -2463,8 +2472,8 @@ where
        /// If a pending payment is currently in-flight with the same [`PaymentId`] provided, this
        /// method will error with an [`APIError::InvalidRoute`]. Note, however, that once a payment
        /// is no longer pending (either via [`ChannelManager::abandon_payment`], or handling of an
-       /// [`Event::PaymentSent`]) LDK will not stop you from sending a second payment with the same
-       /// [`PaymentId`].
+       /// [`Event::PaymentSent`] or [`Event::PaymentFailed`]) LDK will not stop you from sending a
+       /// second payment with the same [`PaymentId`].
        ///
        /// Thus, in order to ensure duplicate payments are not sent, you should implement your own
        /// tracking of payments, including state to indicate once a payment has completed. Because you
@@ -2509,6 +2518,7 @@ where
        /// [`Route`], we assume the invoice had the basic_mpp feature set.
        ///
        /// [`Event::PaymentSent`]: events::Event::PaymentSent
+       /// [`Event::PaymentFailed`]: events::Event::PaymentFailed
        /// [`PeerManager::process_events`]: crate::ln::peer_handler::PeerManager::process_events
        /// [`ChannelMonitorUpdateStatus::InProgress`]: crate::chain::ChannelMonitorUpdateStatus::InProgress
        pub fn send_payment(&self, route: &Route, payment_hash: PaymentHash, payment_secret: &Option<PaymentSecret>, payment_id: PaymentId) -> Result<(), PaymentSendFailure> {
@@ -2525,7 +2535,7 @@ where
                let best_block_height = self.best_block.read().unwrap().height();
                self.pending_outbound_payments
                        .send_payment(payment_hash, payment_secret, payment_id, retry_strategy, route_params,
-                               &self.router, self.list_usable_channels(), self.compute_inflight_htlcs(),
+                               &self.router, self.list_usable_channels(), || self.compute_inflight_htlcs(),
                                &self.entropy_source, &self.node_signer, best_block_height, &self.logger,
                                |path, payment_params, payment_hash, payment_secret, total_value, cur_height, payment_id, keysend_preimage, session_priv|
                                self.send_payment_along_path(path, payment_params, payment_hash, payment_secret, total_value, cur_height, payment_id, keysend_preimage, session_priv))
@@ -2546,48 +2556,25 @@ where
        }
 
 
-       /// Retries a payment along the given [`Route`].
-       ///
-       /// Errors returned are a superset of those returned from [`send_payment`], so see
-       /// [`send_payment`] documentation for more details on errors. This method will also error if the
-       /// retry amount puts the payment more than 10% over the payment's total amount, if the payment
-       /// for the given `payment_id` cannot be found (likely due to timeout or success), or if
-       /// further retries have been disabled with [`abandon_payment`].
-       ///
-       /// [`send_payment`]: [`ChannelManager::send_payment`]
-       /// [`abandon_payment`]: [`ChannelManager::abandon_payment`]
-       pub fn retry_payment(&self, route: &Route, payment_id: PaymentId) -> Result<(), PaymentSendFailure> {
-               let best_block_height = self.best_block.read().unwrap().height();
-               self.pending_outbound_payments.retry_payment_with_route(route, payment_id, &self.entropy_source, &self.node_signer, best_block_height,
-                       |path, payment_params, payment_hash, payment_secret, total_value, cur_height, payment_id, keysend_preimage, session_priv|
-                       self.send_payment_along_path(path, payment_params, payment_hash, payment_secret, total_value, cur_height, payment_id, keysend_preimage, session_priv))
-       }
-
-       /// Signals that no further retries for the given payment will occur.
+       /// Signals that no further retries for the given payment should occur. Useful if you have a
+       /// pending outbound payment with retries remaining, but wish to stop retrying the payment before
+       /// retries are exhausted.
        ///
-       /// After this method returns, no future calls to [`retry_payment`] for the given `payment_id`
-       /// are allowed. If no [`Event::PaymentFailed`] event had been generated before, one will be
-       /// generated as soon as there are no remaining pending HTLCs for this payment.
+       /// If no [`Event::PaymentFailed`] event had been generated before, one will be generated as soon
+       /// as there are no remaining pending HTLCs for this payment.
        ///
        /// Note that calling this method does *not* prevent a payment from succeeding. You must still
        /// wait until you receive either a [`Event::PaymentFailed`] or [`Event::PaymentSent`] event to
        /// determine the ultimate status of a payment.
        ///
        /// If an [`Event::PaymentFailed`] event is generated and we restart without this
-       /// [`ChannelManager`] having been persisted, the payment may still be in the pending state
-       /// upon restart. This allows further calls to [`retry_payment`] (and requiring a second call
-       /// to [`abandon_payment`] to mark the payment as failed again). Otherwise, future calls to
-       /// [`retry_payment`] will fail with [`PaymentSendFailure::ParameterError`].
+       /// [`ChannelManager`] having been persisted, another [`Event::PaymentFailed`] may be generated.
        ///
-       /// [`abandon_payment`]: Self::abandon_payment
-       /// [`retry_payment`]: Self::retry_payment
        /// [`Event::PaymentFailed`]: events::Event::PaymentFailed
        /// [`Event::PaymentSent`]: events::Event::PaymentSent
        pub fn abandon_payment(&self, payment_id: PaymentId) {
                let _persistence_guard = PersistenceNotifierGuard::notify_on_drop(&self.total_consistency_lock, &self.persistence_notifier);
-               if let Some(payment_failed_ev) = self.pending_outbound_payments.abandon_payment(payment_id) {
-                       self.pending_events.lock().unwrap().push(payment_failed_ev);
-               }
+               self.pending_outbound_payments.abandon_payment(payment_id, &self.pending_events);
        }
 
        /// Send a spontaneous payment, which is a payment that does not require the recipient to have
@@ -2616,11 +2603,16 @@ where
 
        /// Similar to [`ChannelManager::send_spontaneous_payment`], but will automatically find a route
        /// based on `route_params` and retry failed payment paths based on `retry_strategy`.
+       ///
+       /// See [`PaymentParameters::for_keysend`] for help in constructing `route_params` for spontaneous
+       /// payments.
+       ///
+       /// [`PaymentParameters::for_keysend`]: crate::routing::router::PaymentParameters::for_keysend
        pub fn send_spontaneous_payment_with_retry(&self, payment_preimage: Option<PaymentPreimage>, payment_id: PaymentId, route_params: RouteParameters, retry_strategy: Retry) -> Result<PaymentHash, PaymentSendFailure> {
                let best_block_height = self.best_block.read().unwrap().height();
                self.pending_outbound_payments.send_spontaneous_payment(payment_preimage, payment_id,
                        retry_strategy, route_params, &self.router, self.list_usable_channels(),
-                       self.compute_inflight_htlcs(),  &self.entropy_source, &self.node_signer, best_block_height,
+                       || self.compute_inflight_htlcs(),  &self.entropy_source, &self.node_signer, best_block_height,
                        &self.logger,
                        |path, payment_params, payment_hash, payment_secret, total_value, cur_height, payment_id, keysend_preimage, session_priv|
                        self.send_payment_along_path(path, payment_params, payment_hash, payment_secret, total_value, cur_height, payment_id, keysend_preimage, session_priv))
@@ -3355,7 +3347,8 @@ where
 
                let best_block_height = self.best_block.read().unwrap().height();
                self.pending_outbound_payments.check_retry_payments(&self.router, || self.list_usable_channels(),
-                       || self.compute_inflight_htlcs(), &self.entropy_source, &self.node_signer, best_block_height, &self.logger,
+                       || self.compute_inflight_htlcs(), &self.entropy_source, &self.node_signer, best_block_height,
+                       &self.pending_events, &self.logger,
                        |path, payment_params, payment_hash, payment_secret, total_value, cur_height, payment_id, keysend_preimage, session_priv|
                        self.send_payment_along_path(path, payment_params, payment_hash, payment_secret, total_value, cur_height, payment_id, keysend_preimage, session_priv));
 
@@ -3521,8 +3514,7 @@ where
 
                                                true
                                        });
-                                       let peer_should_be_removed = !peer_state.is_connected && peer_state.channel_by_id.len() == 0;
-                                       if peer_should_be_removed {
+                                       if peer_state.ok_to_remove(true) {
                                                pending_peers_awaiting_removal.push(counterparty_node_id);
                                        }
                                }
@@ -3544,7 +3536,7 @@ where
                                                        // have no channels to the peer.
                                                        let remove_entry = {
                                                                let peer_state = entry.get().lock().unwrap();
-                                                               !peer_state.is_connected && peer_state.channel_by_id.len() == 0
+                                                               peer_state.ok_to_remove(true)
                                                        };
                                                        if remove_entry {
                                                                entry.remove_entry();
@@ -3727,17 +3719,12 @@ where
        /// Fails an HTLC backwards to the sender of it to us.
        /// Note that we do not assume that channels corresponding to failed HTLCs are still available.
        fn fail_htlc_backwards_internal(&self, source: &HTLCSource, payment_hash: &PaymentHash, onion_error: &HTLCFailReason, destination: HTLCDestination) {
-               #[cfg(any(feature = "_test_utils", test))]
-               {
-                       // Ensure that the peer state channel storage lock is not held when calling this
-                       // function.
-                       // This ensures that future code doesn't introduce a lock_order requirement for
-                       // `forward_htlcs` to be locked after the `per_peer_state` peer locks, which calling
-                       // this function with any `per_peer_state` peer lock aquired would.
-                       let per_peer_state = self.per_peer_state.read().unwrap();
-                       for (_, peer) in per_peer_state.iter() {
-                               debug_assert!(peer.try_lock().is_ok());
-                       }
+               // Ensure that no peer state channel storage lock is held when calling this function.
+               // This ensures that future code doesn't introduce a lock-order requirement for
+               // `forward_htlcs` to be locked after the `per_peer_state` peer locks, which calling
+               // this function with any `per_peer_state` peer lock acquired would.
+               for (_, peer) in self.per_peer_state.read().unwrap().iter() {
+                       debug_assert_ne!(peer.held_by_thread(), LockHeldState::HeldByThread);
                }
 
                //TODO: There is a timing attack here where if a node fails an HTLC back to us they can
@@ -5049,7 +5036,7 @@ where
                                        ), chan),
                                        // Note that announcement_signatures fails if the channel cannot be announced,
                                        // so get_channel_update_for_broadcast will never fail by the time we get here.
-                                       update_msg: self.get_channel_update_for_broadcast(chan.get()).unwrap(),
+                                       update_msg: Some(self.get_channel_update_for_broadcast(chan.get()).unwrap()),
                                });
                        },
                        hash_map::Entry::Vacant(_) => return Err(MsgHandleErrInternal::send_err_msg_no_close(format!("Got a message for a channel from the wrong node! No such channel for the passed counterparty_node_id {}", counterparty_node_id), msg.channel_id))
@@ -5424,7 +5411,8 @@ where
        /// [`PaymentHash`] and [`PaymentPreimage`] for you.
        ///
        /// The [`PaymentPreimage`] will ultimately be returned to you in the [`PaymentClaimable`], which
-       /// will have the [`PaymentClaimable::payment_preimage`] field filled in. That should then be
+       /// will have the [`PaymentClaimable::purpose`] be [`PaymentPurpose::InvoicePayment`] with
+       /// its [`PaymentPurpose::InvoicePayment::payment_preimage`] field filled in. That should then be
        /// passed directly to [`claim_funds`].
        ///
        /// See [`create_inbound_payment_for_hash`] for detailed documentation on behavior and requirements.
@@ -5444,7 +5432,9 @@ where
        ///
        /// [`claim_funds`]: Self::claim_funds
        /// [`PaymentClaimable`]: events::Event::PaymentClaimable
-       /// [`PaymentClaimable::payment_preimage`]: events::Event::PaymentClaimable::payment_preimage
+       /// [`PaymentClaimable::purpose`]: events::Event::PaymentClaimable::purpose
+       /// [`PaymentPurpose::InvoicePayment`]: events::PaymentPurpose::InvoicePayment
+       /// [`PaymentPurpose::InvoicePayment::payment_preimage`]: events::PaymentPurpose::InvoicePayment::payment_preimage
        /// [`create_inbound_payment_for_hash`]: Self::create_inbound_payment_for_hash
        pub fn create_inbound_payment(&self, min_value_msat: Option<u64>, invoice_expiry_delta_secs: u32,
                min_final_cltv_expiry_delta: Option<u16>) -> Result<(PaymentHash, PaymentSecret), ()> {
@@ -5974,7 +5964,7 @@ where
                                                                                msg: announcement,
                                                                                // Note that announcement_signatures fails if the channel cannot be announced,
                                                                                // so get_channel_update_for_broadcast will never fail by the time we get here.
-                                                                               update_msg: self.get_channel_update_for_broadcast(channel).unwrap(),
+                                                                               update_msg: Some(self.get_channel_update_for_broadcast(channel).unwrap()),
                                                                        });
                                                                }
                                                        }
@@ -6254,9 +6244,8 @@ where
        fn peer_disconnected(&self, counterparty_node_id: &PublicKey, no_connection_possible: bool) {
                let _persistence_guard = PersistenceNotifierGuard::notify_on_drop(&self.total_consistency_lock, &self.persistence_notifier);
                let mut failed_channels = Vec::new();
-               let mut no_channels_remain = true;
                let mut per_peer_state = self.per_peer_state.write().unwrap();
-               {
+               let remove_peer = {
                        log_debug!(self.logger, "Marking channels with {} disconnected and generating channel_updates. We believe we {} make future connections to this peer.",
                                log_pubkey!(counterparty_node_id), if no_connection_possible { "cannot" } else { "can" });
                        if let Some(peer_state_mutex) = per_peer_state.get(counterparty_node_id) {
@@ -6269,8 +6258,6 @@ where
                                                update_maps_on_chan_removal!(self, chan);
                                                self.issue_channel_close_events(chan, ClosureReason::DisconnectedPeer);
                                                return false;
-                                       } else {
-                                               no_channels_remain = false;
                                        }
                                        true
                                });
@@ -6290,6 +6277,7 @@ where
                                                &events::MessageSendEvent::SendChannelAnnouncement { .. } => false,
                                                &events::MessageSendEvent::BroadcastChannelAnnouncement { .. } => true,
                                                &events::MessageSendEvent::BroadcastChannelUpdate { .. } => true,
+                                               &events::MessageSendEvent::BroadcastNodeAnnouncement { .. } => true,
                                                &events::MessageSendEvent::SendChannelUpdate { .. } => false,
                                                &events::MessageSendEvent::HandleError { .. } => false,
                                                &events::MessageSendEvent::SendChannelRangeQuery { .. } => false,
@@ -6300,9 +6288,10 @@ where
                                });
                                debug_assert!(peer_state.is_connected, "A disconnected peer cannot disconnect");
                                peer_state.is_connected = false;
-                       }
-               }
-               if no_channels_remain {
+                               peer_state.ok_to_remove(true)
+                       } else { true }
+               };
+               if remove_peer {
                        per_peer_state.remove(counterparty_node_id);
                }
                mem::drop(per_peer_state);
@@ -6896,6 +6885,7 @@ where
                        best_block.block_hash().write(writer)?;
                }
 
+               let mut serializable_peer_count: u64 = 0;
                {
                        let per_peer_state = self.per_peer_state.read().unwrap();
                        let mut unfunded_channels = 0;
@@ -6903,6 +6893,9 @@ where
                        for (_, peer_state_mutex) in per_peer_state.iter() {
                                let mut peer_state_lock = peer_state_mutex.lock().unwrap();
                                let peer_state = &mut *peer_state_lock;
+                               if !peer_state.ok_to_remove(false) {
+                                       serializable_peer_count += 1;
+                               }
                                number_of_channels += peer_state.channel_by_id.len();
                                for (_, channel) in peer_state.channel_by_id.iter() {
                                        if !channel.is_funding_initiated() {
@@ -6953,11 +6946,18 @@ where
                        htlc_purposes.push(purpose);
                }
 
-               (per_peer_state.len() as u64).write(writer)?;
+               (serializable_peer_count).write(writer)?;
                for (peer_pubkey, peer_state_mutex) in per_peer_state.iter() {
-                       peer_pubkey.write(writer)?;
-                       let peer_state = peer_state_mutex.lock().unwrap();
-                       peer_state.latest_features.write(writer)?;
+                       let peer_state_lock = peer_state_mutex.lock().unwrap();
+                       let peer_state = &*peer_state_lock;
+                       // Peers which we have no channels to should be dropped once disconnected. As we
+                       // disconnect all peers when shutting down and serializing the ChannelManager, we
+                       // consider all peers as disconnected here. There's therefore no need write peers with
+                       // no channels.
+                       if !peer_state.ok_to_remove(false) {
+                               peer_pubkey.write(writer)?;
+                               peer_state.latest_features.write(writer)?;
+                       }
                }
 
                let events = self.pending_events.lock().unwrap();
@@ -7694,7 +7694,7 @@ where
 
                        inbound_payment_key: expanded_inbound_key,
                        pending_inbound_payments: Mutex::new(pending_inbound_payments),
-                       pending_outbound_payments: OutboundPayments { pending_outbound_payments: Mutex::new(pending_outbound_payments.unwrap()) },
+                       pending_outbound_payments: OutboundPayments { pending_outbound_payments: Mutex::new(pending_outbound_payments.unwrap()), retry_lock: Mutex::new(()), },
                        pending_intercepted_htlcs: Mutex::new(pending_intercepted_htlcs.unwrap()),
 
                        forward_htlcs: Mutex::new(forward_htlcs),
@@ -7968,7 +7968,7 @@ mod tests {
                let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
                let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
                create_announced_chan_between_nodes(&nodes, 0, 1);
-               let scorer = test_utils::TestScorer::with_penalty(0);
+               let scorer = test_utils::TestScorer::new();
                let random_seed_bytes = chanmon_cfgs[1].keys_manager.get_secure_random_bytes();
 
                // To start (1), send a regular payment but don't claim it.
@@ -8074,7 +8074,7 @@ mod tests {
                };
                let network_graph = nodes[0].network_graph.clone();
                let first_hops = nodes[0].node.list_usable_channels();
-               let scorer = test_utils::TestScorer::with_penalty(0);
+               let scorer = test_utils::TestScorer::new();
                let random_seed_bytes = chanmon_cfgs[1].keys_manager.get_secure_random_bytes();
                let route = find_route(
                        &payer_pubkey, &route_params, &network_graph, Some(&first_hops.iter().collect::<Vec<_>>()),
@@ -8117,7 +8117,7 @@ mod tests {
                };
                let network_graph = nodes[0].network_graph.clone();
                let first_hops = nodes[0].node.list_usable_channels();
-               let scorer = test_utils::TestScorer::with_penalty(0);
+               let scorer = test_utils::TestScorer::new();
                let random_seed_bytes = chanmon_cfgs[1].keys_manager.get_secure_random_bytes();
                let route = find_route(
                        &payer_pubkey, &route_params, &network_graph, Some(&first_hops.iter().collect::<Vec<_>>()),
@@ -8487,7 +8487,8 @@ pub mod bench {
                let tx_broadcaster = test_utils::TestBroadcaster{txn_broadcasted: Mutex::new(Vec::new()), blocks: Arc::new(Mutex::new(Vec::new()))};
                let fee_estimator = test_utils::TestFeeEstimator { sat_per_kw: Mutex::new(253) };
                let logger_a = test_utils::TestLogger::with_id("node a".to_owned());
-               let router = test_utils::TestRouter::new(Arc::new(NetworkGraph::new(genesis_hash, &logger_a)));
+               let scorer = Mutex::new(test_utils::TestScorer::new());
+               let router = test_utils::TestRouter::new(Arc::new(NetworkGraph::new(genesis_hash, &logger_a)), &scorer);
 
                let mut config: UserConfig = Default::default();
                config.channel_handshake_config.minimum_depth = 1;
@@ -8578,7 +8579,7 @@ pub mod bench {
                                let usable_channels = $node_a.list_usable_channels();
                                let payment_params = PaymentParameters::from_node_id($node_b.get_our_node_id(), TEST_FINAL_CLTV)
                                        .with_features($node_b.invoice_features());
-                               let scorer = test_utils::TestScorer::with_penalty(0);
+                               let scorer = test_utils::TestScorer::new();
                                let seed = [3u8; 32];
                                let keys_manager = KeysManager::new(&seed, 42, 42);
                                let random_seed_bytes = keys_manager.get_secure_random_bytes();