Delay removal of fulfilled outbound payments for a few timer ticks
authorMatt Corallo <git@bluematt.me>
Sat, 8 Oct 2022 23:26:18 +0000 (23:26 +0000)
committerMatt Corallo <git@bluematt.me>
Wed, 2 Nov 2022 01:09:07 +0000 (01:09 +0000)
Previously, once a fulfilled outbound payment completed and all
associated HTLCs were resolved, we'd immediately remove the payment
entry from the `pending_outbound_payments` map.

Now that we're using the `pending_outbound_payments` map for send
idempotency, this presents a race condition - if the user makes a
redundant `send_payment` call at the same time that the original
payment's last HTLC is resolved, the user would reasonably expect
the `send_payment` call to fail due to our idempotency guarantees.

However, because the `pending_outbound_payments` entry is being
removed, if it completes first the `send_payment` call will
succeed even though the user has not had a chance to see the
corresponding `Event::PaymentSent`.

Instead, here, we delay removal of `Fulfilled`
`pending_outbound_payments` entries until several timer ticks have
passed without any corresponding event or HTLC pending.

lightning/src/ln/channelmanager.rs
lightning/src/ln/payment_tests.rs

index c223a74131c9ecbc7c07f27ae5a24838fa2137cc..5a98289c528b2a86dad6e0657e40bf88aeea356e 100644 (file)
@@ -473,6 +473,7 @@ pub(crate) enum PendingOutboundPayment {
        Fulfilled {
                session_privs: HashSet<[u8; 32]>,
                payment_hash: Option<PaymentHash>,
+               timer_ticks_without_htlcs: u8,
        },
        /// When a payer gives up trying to retry a payment, they inform us, letting us generate a
        /// `PaymentFailed` event when all HTLCs have irrevocably failed. This avoids a number of race
@@ -526,7 +527,7 @@ impl PendingOutboundPayment {
                                => session_privs,
                });
                let payment_hash = self.payment_hash();
-               *self = PendingOutboundPayment::Fulfilled { session_privs, payment_hash };
+               *self = PendingOutboundPayment::Fulfilled { session_privs, payment_hash, timer_ticks_without_htlcs: 0 };
        }
 
        fn mark_abandoned(&mut self) -> Result<(), ()> {
@@ -960,6 +961,11 @@ pub(crate) const PAYMENT_EXPIRY_BLOCKS: u32 = 3;
 /// The number of ticks of [`ChannelManager::timer_tick_occurred`] until expiry of incomplete MPPs
 pub(crate) const MPP_TIMEOUT_TICKS: u8 = 3;
 
+/// The number of ticks of [`ChannelManager::timer_tick_occurred`] until we time-out the
+/// idempotency of payments by [`PaymentId`]. See
+/// [`ChannelManager::remove_stale_resolved_payments`].
+pub(crate) const IDEMPOTENCY_TIMEOUT_TICKS: u8 = 7;
+
 /// Information needed for constructing an invoice route hint for this channel.
 #[derive(Clone, Debug, PartialEq)]
 pub struct CounterpartyForwardingInfo {
@@ -3628,6 +3634,45 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
                });
        }
 
+       fn remove_stale_resolved_payments(&self) {
+               // If an outbound payment was completed, and no pending HTLCs remain, we should remove it
+               // from the map. However, if we did that immediately when the last payment HTLC is claimed,
+               // this could race the user making a duplicate send_payment call and our idempotency
+               // guarantees would be violated. Instead, we wait a few timer ticks to do the actual
+               // removal. This should be more than sufficient to ensure the idempotency of any
+               // `send_payment` calls that were made at the same time the `PaymentSent` event was being
+               // processed.
+               let mut pending_outbound_payments = self.pending_outbound_payments.lock().unwrap();
+               let pending_events = self.pending_events.lock().unwrap();
+               pending_outbound_payments.retain(|payment_id, payment| {
+                       if let PendingOutboundPayment::Fulfilled { session_privs, timer_ticks_without_htlcs, .. } = payment {
+                               let mut no_remaining_entries = session_privs.is_empty();
+                               if no_remaining_entries {
+                                       for ev in pending_events.iter() {
+                                               match ev {
+                                                       events::Event::PaymentSent { payment_id: Some(ev_payment_id), .. } |
+                                                       events::Event::PaymentPathSuccessful { payment_id: ev_payment_id, .. } |
+                                                       events::Event::PaymentPathFailed { payment_id: Some(ev_payment_id), .. } => {
+                                                               if payment_id == ev_payment_id {
+                                                                       no_remaining_entries = false;
+                                                                       break;
+                                                               }
+                                                       },
+                                                       _ => {},
+                                               }
+                                       }
+                               }
+                               if no_remaining_entries {
+                                       *timer_ticks_without_htlcs += 1;
+                                       *timer_ticks_without_htlcs <= IDEMPOTENCY_TIMEOUT_TICKS
+                               } else {
+                                       *timer_ticks_without_htlcs = 0;
+                                       true
+                               }
+                       } else { true }
+               });
+       }
+
        /// Performs actions which should happen on startup and roughly once per minute thereafter.
        ///
        /// This currently includes:
@@ -3731,6 +3776,9 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
                        for (err, counterparty_node_id) in handle_errors.drain(..) {
                                let _ = handle_error!(self, err, counterparty_node_id);
                        }
+
+                       self.remove_stale_resolved_payments();
+
                        should_persist
                });
        }
@@ -4248,9 +4296,6 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
                                                        }
                                                );
                                        }
-                                       if payment.get().remaining_parts() == 0 {
-                                               payment.remove();
-                                       }
                                }
                        }
                }
@@ -4296,10 +4341,6 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
                                                                }
                                                        );
                                                }
-
-                                               if payment.get().remaining_parts() == 0 {
-                                                       payment.remove();
-                                               }
                                        }
                                } else {
                                        log_trace!(self.logger, "Received duplicative fulfill for HTLC with payment_preimage {}", log_bytes!(payment_preimage.0));
@@ -6624,6 +6665,7 @@ impl_writeable_tlv_based_enum_upgradable!(PendingOutboundPayment,
        (1, Fulfilled) => {
                (0, session_privs, required),
                (1, payment_hash, option),
+               (3, timer_ticks_without_htlcs, (default_value, 0)),
        },
        (2, Retryable) => {
                (0, session_privs, required),
index 5ba78382a91bf6176b30edc49af4b6e8212f4e63..83eb84334a0720671f83cfc419134598ce73a189 100644 (file)
@@ -16,7 +16,7 @@ use crate::chain::channelmonitor::{ANTI_REORG_DELAY, ChannelMonitor, LATENCY_GRA
 use crate::chain::transaction::OutPoint;
 use crate::chain::keysinterface::KeysInterface;
 use crate::ln::channel::EXPIRE_PREV_CONFIG_TICKS;
-use crate::ln::channelmanager::{self, BREAKDOWN_TIMEOUT, ChannelManager, ChannelManagerReadArgs, MPP_TIMEOUT_TICKS, MIN_CLTV_EXPIRY_DELTA, PaymentId, PaymentSendFailure};
+use crate::ln::channelmanager::{self, BREAKDOWN_TIMEOUT, ChannelManager, ChannelManagerReadArgs, MPP_TIMEOUT_TICKS, MIN_CLTV_EXPIRY_DELTA, PaymentId, PaymentSendFailure, IDEMPOTENCY_TIMEOUT_TICKS};
 use crate::ln::msgs;
 use crate::ln::msgs::ChannelMessageHandler;
 use crate::routing::router::{PaymentParameters, get_route};
@@ -1255,3 +1255,74 @@ fn onchain_failed_probe_yields_event() {
        }
        assert!(found_probe_failed);
 }
+
+#[test]
+fn claimed_send_payment_idempotent() {
+       // Tests that `send_payment` (and friends) are (reasonably) idempotent.
+       let chanmon_cfgs = create_chanmon_cfgs(2);
+       let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
+       let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
+       let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
+
+       create_announced_chan_between_nodes(&nodes, 0, 1, channelmanager::provided_init_features(), channelmanager::provided_init_features()).2;
+
+       let (route, second_payment_hash, second_payment_preimage, second_payment_secret) = get_route_and_payment_hash!(nodes[0], nodes[1], 100_000);
+       let (first_payment_preimage, _, _, payment_id) = send_along_route(&nodes[0], route.clone(), &[&nodes[1]], 100_000);
+
+       macro_rules! check_send_rejected {
+               () => {
+                       // If we try to resend a new payment with a different payment_hash but with the same
+                       // payment_id, it should be rejected.
+                       let send_result = nodes[0].node.send_payment(&route, second_payment_hash, &Some(second_payment_secret), payment_id);
+                       match send_result {
+                               Err(PaymentSendFailure::ParameterError(APIError::RouteError { err: "Payment already in progress" })) => {},
+                               _ => panic!("Unexpected send result: {:?}", send_result),
+                       }
+
+                       // Further, if we try to send a spontaneous payment with the same payment_id it should
+                       // also be rejected.
+                       let send_result = nodes[0].node.send_spontaneous_payment(&route, None, payment_id);
+                       match send_result {
+                               Err(PaymentSendFailure::ParameterError(APIError::RouteError { err: "Payment already in progress" })) => {},
+                               _ => panic!("Unexpected send result: {:?}", send_result),
+                       }
+               }
+       }
+
+       check_send_rejected!();
+
+       // Claim the payment backwards, but note that the PaymentSent event is still pending and has
+       // not been seen by the user. At this point, from the user perspective nothing has changed, so
+       // we must remain just as idempotent as we were before.
+       do_claim_payment_along_route(&nodes[0], &[&[&nodes[1]]], false, first_payment_preimage);
+
+       for _ in 0..=IDEMPOTENCY_TIMEOUT_TICKS {
+               nodes[0].node.timer_tick_occurred();
+       }
+
+       check_send_rejected!();
+
+       // Once the user sees and handles the `PaymentSent` event, we expect them to no longer call
+       // `send_payment`, and our idempotency guarantees are off - they should have atomically marked
+       // the payment complete. However, they could have called `send_payment` while the event was
+       // being processed, leading to a race in our idempotency guarantees. Thus, even immediately
+       // after the event is handled a duplicate payment should sitll be rejected.
+       expect_payment_sent!(&nodes[0], first_payment_preimage, Some(0));
+       check_send_rejected!();
+
+       // If relatively little time has passed, a duplicate payment should still fail.
+       nodes[0].node.timer_tick_occurred();
+       check_send_rejected!();
+
+       // However, after some time has passed (at least more than the one timer tick above), a
+       // duplicate payment should go through, as ChannelManager should no longer have any remaining
+       // references to the old payment data.
+       for _ in 0..IDEMPOTENCY_TIMEOUT_TICKS {
+               nodes[0].node.timer_tick_occurred();
+       }
+
+       nodes[0].node.send_payment(&route, second_payment_hash, &Some(second_payment_secret), payment_id).unwrap();
+       check_added_monitors!(nodes[0], 1);
+       pass_along_route(&nodes[0], &[&[&nodes[1]]], 100_000, second_payment_hash, second_payment_secret);
+       claim_payment(&nodes[0], &[&nodes[1]], second_payment_preimage);
+}