Handle double-HTLC-claims without failing the backwards channel
authorMatt Corallo <git@bluematt.me>
Tue, 29 Jun 2021 21:05:45 +0000 (21:05 +0000)
committerMatt Corallo <git@bluematt.me>
Wed, 28 Jul 2021 00:34:53 +0000 (00:34 +0000)
When receiving an update_fulfill_htlc message, we immediately
forward the claim backwards along the payment path before waiting
for a full commitment_signed dance. This is great, but can cause
duplicative claims if a node sends an update_fulfill_htlc message,
disconnects, reconnects, and then has to re-send its
update_fulfill_htlc message again.

While there was code to handle this, it treated it as a channel
error on the inbound channel, which is incorrect - this is an
expected, albeit incredibly rare, condition. Instead, we handle
these double-claims correctly, simply ignoring them.

With debug_assertions enabled, we also check that the previous
close of the same HTLC was a fulfill, and that we are not moving
from a HTLC failure to an HTLC claim after its too late.

A test is also added, which hits all three failure cases in
`Channel::get_update_fulfill_htlc`.

Found by the chanmon_consistency fuzzer.

lightning/src/ln/chanmon_update_fail_tests.rs
lightning/src/ln/channel.rs

index 2c62abc55f85c00defe3f608eb6dba56824a6a23..2918b4cef13aaf2baa2c83c280aacc6c075f2fae 100644 (file)
@@ -2215,3 +2215,119 @@ fn channel_holding_cell_serialize() {
        do_channel_holding_cell_serialize(true, false);
        do_channel_holding_cell_serialize(false, true); // last arg doesn't matter
 }
+
+#[derive(PartialEq)]
+enum HTLCStatusAtDupClaim {
+       Received,
+       HoldingCell,
+       Cleared,
+}
+fn do_test_reconnect_dup_htlc_claims(htlc_status: HTLCStatusAtDupClaim, second_fails: bool) {
+       // When receiving an update_fulfill_htlc message, we immediately forward the claim backwards
+       // along the payment path before waiting for a full commitment_signed dance. This is great, but
+       // can cause duplicative claims if a node sends an update_fulfill_htlc message, disconnects,
+       // reconnects, and then has to re-send its update_fulfill_htlc message again.
+       // In previous code, we didn't handle the double-claim correctly, spuriously closing the
+       // channel on which the inbound HTLC was received.
+       let chanmon_cfgs = create_chanmon_cfgs(3);
+       let node_cfgs = create_node_cfgs(3, &chanmon_cfgs);
+       let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &[None, None, None]);
+       let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs);
+
+       create_announced_chan_between_nodes(&nodes, 0, 1, InitFeatures::known(), InitFeatures::known());
+       let chan_2 = create_announced_chan_between_nodes(&nodes, 1, 2, InitFeatures::known(), InitFeatures::known()).2;
+
+       let (payment_preimage, payment_hash, _) = route_payment(&nodes[0], &[&nodes[1], &nodes[2]], 100_000);
+
+       let mut as_raa = None;
+       if htlc_status == HTLCStatusAtDupClaim::HoldingCell {
+               // In order to get the HTLC claim into the holding cell at nodes[1], we need nodes[1] to be
+               // awaiting a remote revoke_and_ack from nodes[0].
+               let (_, second_payment_hash, second_payment_secret) = get_payment_preimage_hash!(nodes[1]);
+               let route = get_route(&nodes[0].node.get_our_node_id(), &nodes[0].net_graph_msg_handler.network_graph.read().unwrap(),
+                       &nodes[1].node.get_our_node_id(), Some(InvoiceFeatures::known()), None, &Vec::new(), 100_000, TEST_FINAL_CLTV, nodes[1].logger).unwrap();
+               nodes[0].node.send_payment(&route, second_payment_hash, &Some(second_payment_secret)).unwrap();
+               check_added_monitors!(nodes[0], 1);
+
+               let send_event = SendEvent::from_event(nodes[0].node.get_and_clear_pending_msg_events().remove(0));
+               nodes[1].node.handle_update_add_htlc(&nodes[0].node.get_our_node_id(), &send_event.msgs[0]);
+               nodes[1].node.handle_commitment_signed(&nodes[0].node.get_our_node_id(), &send_event.commitment_msg);
+               check_added_monitors!(nodes[1], 1);
+
+               let (bs_raa, bs_cs) = get_revoke_commit_msgs!(nodes[1], nodes[0].node.get_our_node_id());
+               nodes[0].node.handle_revoke_and_ack(&nodes[1].node.get_our_node_id(), &bs_raa);
+               check_added_monitors!(nodes[0], 1);
+               nodes[0].node.handle_commitment_signed(&nodes[1].node.get_our_node_id(), &bs_cs);
+               check_added_monitors!(nodes[0], 1);
+
+               as_raa = Some(get_event_msg!(nodes[0], MessageSendEvent::SendRevokeAndACK, nodes[1].node.get_our_node_id()));
+       }
+
+       let fulfill_msg = msgs::UpdateFulfillHTLC {
+               channel_id: chan_2,
+               htlc_id: 0,
+               payment_preimage,
+       };
+       if second_fails {
+               assert!(nodes[2].node.fail_htlc_backwards(&payment_hash));
+               expect_pending_htlcs_forwardable!(nodes[2]);
+               check_added_monitors!(nodes[2], 1);
+               get_htlc_update_msgs!(nodes[2], nodes[1].node.get_our_node_id());
+       } else {
+               assert!(nodes[2].node.claim_funds(payment_preimage));
+               check_added_monitors!(nodes[2], 1);
+               let cs_updates = get_htlc_update_msgs!(nodes[2], nodes[1].node.get_our_node_id());
+               assert_eq!(cs_updates.update_fulfill_htlcs.len(), 1);
+               // Check that the message we're about to deliver matches the one generated:
+               assert_eq!(fulfill_msg, cs_updates.update_fulfill_htlcs[0]);
+       }
+       nodes[1].node.handle_update_fulfill_htlc(&nodes[2].node.get_our_node_id(), &fulfill_msg);
+       check_added_monitors!(nodes[1], 1);
+
+       let mut bs_updates = None;
+       if htlc_status != HTLCStatusAtDupClaim::HoldingCell {
+               bs_updates = Some(get_htlc_update_msgs!(nodes[1], nodes[0].node.get_our_node_id()));
+               assert_eq!(bs_updates.as_ref().unwrap().update_fulfill_htlcs.len(), 1);
+               nodes[0].node.handle_update_fulfill_htlc(&nodes[1].node.get_our_node_id(), &bs_updates.as_ref().unwrap().update_fulfill_htlcs[0]);
+               expect_payment_sent!(nodes[0], payment_preimage);
+               if htlc_status == HTLCStatusAtDupClaim::Cleared {
+                       commitment_signed_dance!(nodes[0], nodes[1], &bs_updates.as_ref().unwrap().commitment_signed, false);
+               }
+       } else {
+               assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
+       }
+
+       nodes[1].node.peer_disconnected(&nodes[2].node.get_our_node_id(), false);
+       nodes[2].node.peer_disconnected(&nodes[1].node.get_our_node_id(), false);
+
+       if second_fails {
+               reconnect_nodes(&nodes[1], &nodes[2], (false, false), (0, 0), (0, 0), (1, 0), (0, 0), (0, 0), (false, false));
+               expect_pending_htlcs_forwardable!(nodes[1]);
+       } else {
+               reconnect_nodes(&nodes[1], &nodes[2], (false, false), (0, 0), (1, 0), (0, 0), (0, 0), (0, 0), (false, false));
+       }
+
+       if htlc_status == HTLCStatusAtDupClaim::HoldingCell {
+               nodes[1].node.handle_revoke_and_ack(&nodes[0].node.get_our_node_id(), &as_raa.unwrap());
+               check_added_monitors!(nodes[1], 1);
+               expect_pending_htlcs_forwardable_ignore!(nodes[1]); // We finally receive the second payment, but don't claim it
+
+               bs_updates = Some(get_htlc_update_msgs!(nodes[1], nodes[0].node.get_our_node_id()));
+               assert_eq!(bs_updates.as_ref().unwrap().update_fulfill_htlcs.len(), 1);
+               nodes[0].node.handle_update_fulfill_htlc(&nodes[1].node.get_our_node_id(), &bs_updates.as_ref().unwrap().update_fulfill_htlcs[0]);
+               expect_payment_sent!(nodes[0], payment_preimage);
+       }
+       if htlc_status != HTLCStatusAtDupClaim::Cleared {
+               commitment_signed_dance!(nodes[0], nodes[1], &bs_updates.as_ref().unwrap().commitment_signed, false);
+       }
+}
+
+#[test]
+fn test_reconnect_dup_htlc_claims() {
+       do_test_reconnect_dup_htlc_claims(HTLCStatusAtDupClaim::Received, false);
+       do_test_reconnect_dup_htlc_claims(HTLCStatusAtDupClaim::HoldingCell, false);
+       do_test_reconnect_dup_htlc_claims(HTLCStatusAtDupClaim::Cleared, false);
+       do_test_reconnect_dup_htlc_claims(HTLCStatusAtDupClaim::Received, true);
+       do_test_reconnect_dup_htlc_claims(HTLCStatusAtDupClaim::HoldingCell, true);
+       do_test_reconnect_dup_htlc_claims(HTLCStatusAtDupClaim::Cleared, true);
+}
index d8a284fa8184cd9e1d030043c351c09c144d4039..6d95525773445363361f5c6b5ba0d9f5da499d9f 100644 (file)
@@ -444,6 +444,15 @@ pub(super) struct Channel<Signer: Sign> {
        ///
        /// See-also <https://github.com/lightningnetwork/lnd/issues/4006>
        pub workaround_lnd_bug_4006: Option<msgs::FundingLocked>,
+
+       #[cfg(any(test, feature = "fuzztarget"))]
+       // When we receive an HTLC fulfill on an outbound path, we may immediately fulfill the
+       // corresponding HTLC on the inbound path. If, then, the outbound path channel is
+       // disconnected and reconnected (before we've exchange commitment_signed and revoke_and_ack
+       // messages), they may re-broadcast their update_fulfill_htlc, causing a duplicate claim. This
+       // is fine, but as a sanity check in our failure to generate the second claim, we check here
+       // that the original was a claim, and that we aren't now trying to fulfill a failed HTLC.
+       historical_inbound_htlc_fulfills: HashSet<u64>,
 }
 
 #[cfg(any(test, feature = "fuzztarget"))]
@@ -645,6 +654,9 @@ impl<Signer: Sign> Channel<Signer> {
                        next_remote_commitment_tx_fee_info_cached: Mutex::new(None),
 
                        workaround_lnd_bug_4006: None,
+
+                       #[cfg(any(test, feature = "fuzztarget"))]
+                       historical_inbound_htlc_fulfills: HashSet::new(),
                })
        }
 
@@ -890,6 +902,9 @@ impl<Signer: Sign> Channel<Signer> {
                        next_remote_commitment_tx_fee_info_cached: Mutex::new(None),
 
                        workaround_lnd_bug_4006: None,
+
+                       #[cfg(any(test, feature = "fuzztarget"))]
+                       historical_inbound_htlc_fulfills: HashSet::new(),
                };
 
                Ok(chan)
@@ -1249,8 +1264,8 @@ impl<Signer: Sign> Channel<Signer> {
                                                if let &InboundHTLCRemovalReason::Fulfill(_) = reason {
                                                } else {
                                                        log_warn!(logger, "Have preimage and want to fulfill HTLC with payment hash {} we already failed against channel {}", log_bytes!(htlc.payment_hash.0), log_bytes!(self.channel_id()));
+                                                       debug_assert!(false, "Tried to fulfill an HTLC that was already failed");
                                                }
-                                               debug_assert!(false, "Tried to fulfill an HTLC that was already fail/fulfilled");
                                                return Ok((None, None));
                                        },
                                        _ => {
@@ -1263,7 +1278,11 @@ impl<Signer: Sign> Channel<Signer> {
                        }
                }
                if pending_idx == core::usize::MAX {
-                       return Err(ChannelError::Ignore("Unable to find a pending HTLC which matched the given HTLC ID".to_owned()));
+                       #[cfg(any(test, feature = "fuzztarget"))]
+                       // If we failed to find an HTLC to fulfill, make sure it was previously fulfilled and
+                       // this is simply a duplicate claim, not previously failed and we lost funds.
+                       debug_assert!(self.historical_inbound_htlc_fulfills.contains(&htlc_id_arg));
+                       return Ok((None, None));
                }
 
                // Now update local state:
@@ -1285,7 +1304,8 @@ impl<Signer: Sign> Channel<Signer> {
                                                if htlc_id_arg == htlc_id {
                                                        // Make sure we don't leave latest_monitor_update_id incremented here:
                                                        self.latest_monitor_update_id -= 1;
-                                                       debug_assert!(false, "Tried to fulfill an HTLC that was already fulfilled");
+                                                       #[cfg(any(test, feature = "fuzztarget"))]
+                                                       debug_assert!(self.historical_inbound_htlc_fulfills.contains(&htlc_id_arg));
                                                        return Ok((None, None));
                                                }
                                        },
@@ -1305,8 +1325,12 @@ impl<Signer: Sign> Channel<Signer> {
                        self.holding_cell_htlc_updates.push(HTLCUpdateAwaitingACK::ClaimHTLC {
                                payment_preimage: payment_preimage_arg, htlc_id: htlc_id_arg,
                        });
+                       #[cfg(any(test, feature = "fuzztarget"))]
+                       self.historical_inbound_htlc_fulfills.insert(htlc_id_arg);
                        return Ok((None, Some(monitor_update)));
                }
+               #[cfg(any(test, feature = "fuzztarget"))]
+               self.historical_inbound_htlc_fulfills.insert(htlc_id_arg);
 
                {
                        let htlc = &mut self.pending_inbound_htlcs[pending_idx];
@@ -1366,8 +1390,11 @@ impl<Signer: Sign> Channel<Signer> {
                        if htlc.htlc_id == htlc_id_arg {
                                match htlc.state {
                                        InboundHTLCState::Committed => {},
-                                       InboundHTLCState::LocalRemoved(_) => {
-                                               debug_assert!(false, "Tried to fail an HTLC that was already fail/fulfilled");
+                                       InboundHTLCState::LocalRemoved(ref reason) => {
+                                               if let &InboundHTLCRemovalReason::Fulfill(_) = reason {
+                                               } else {
+                                                       debug_assert!(false, "Tried to fail an HTLC that was already failed");
+                                               }
                                                return Ok(None);
                                        },
                                        _ => {
@@ -1379,7 +1406,11 @@ impl<Signer: Sign> Channel<Signer> {
                        }
                }
                if pending_idx == core::usize::MAX {
-                       return Err(ChannelError::Ignore("Unable to find a pending HTLC which matched the given HTLC ID".to_owned()));
+                       #[cfg(any(test, feature = "fuzztarget"))]
+                       // If we failed to find an HTLC to fail, make sure it was previously fulfilled and this
+                       // is simply a duplicate fail, not previously failed and we failed-back too early.
+                       debug_assert!(self.historical_inbound_htlc_fulfills.contains(&htlc_id_arg));
+                       return Ok(None);
                }
 
                // Now update local state:
@@ -1388,8 +1419,9 @@ impl<Signer: Sign> Channel<Signer> {
                                match pending_update {
                                        &HTLCUpdateAwaitingACK::ClaimHTLC { htlc_id, .. } => {
                                                if htlc_id_arg == htlc_id {
-                                                       debug_assert!(false, "Tried to fail an HTLC that was already fulfilled");
-                                                       return Err(ChannelError::Ignore("Unable to find a pending HTLC which matched the given HTLC ID".to_owned()));
+                                                       #[cfg(any(test, feature = "fuzztarget"))]
+                                                       debug_assert!(self.historical_inbound_htlc_fulfills.contains(&htlc_id_arg));
+                                                       return Ok(None);
                                                }
                                        },
                                        &HTLCUpdateAwaitingACK::FailHTLC { htlc_id, .. } => {
@@ -2453,7 +2485,14 @@ impl<Signer: Sign> Channel<Signer> {
                                        },
                                        &HTLCUpdateAwaitingACK::FailHTLC { htlc_id, ref err_packet } => {
                                                match self.get_update_fail_htlc(htlc_id, err_packet.clone(), logger) {
-                                                       Ok(update_fail_msg_option) => update_fail_htlcs.push(update_fail_msg_option.unwrap()),
+                                                       Ok(update_fail_msg_option) => {
+                                                               // If an HTLC failure was previously added to the holding cell (via
+                                                               // `get_update_fail_htlc`) then generating the fail message itself
+                                                               // must not fail - we should never end up in a state where we
+                                                               // double-fail an HTLC or fail-then-claim an HTLC as it indicates
+                                                               // we didn't wait for a full revocation before failing.
+                                                               update_fail_htlcs.push(update_fail_msg_option.unwrap())
+                                                       },
                                                        Err(e) => {
                                                                if let ChannelError::Ignore(_) = e {}
                                                                else {
@@ -4690,6 +4729,13 @@ impl<Signer: Sign> Writeable for Channel<Signer> {
 
                self.channel_update_status.write(writer)?;
 
+               #[cfg(any(test, feature = "fuzztarget"))]
+               (self.historical_inbound_htlc_fulfills.len() as u64).write(writer)?;
+               #[cfg(any(test, feature = "fuzztarget"))]
+               for htlc in self.historical_inbound_htlc_fulfills.iter() {
+                       htlc.write(writer)?;
+               }
+
                write_tlv_fields!(writer, {
                        (0, self.announcement_sigs, option),
                        // minimum_depth and counterparty_selected_channel_reserve_satoshis used to have a
@@ -4882,6 +4928,16 @@ impl<'a, Signer: Sign, K: Deref> ReadableArgs<&'a K> for Channel<Signer>
 
                let channel_update_status = Readable::read(reader)?;
 
+               #[cfg(any(test, feature = "fuzztarget"))]
+               let mut historical_inbound_htlc_fulfills = HashSet::new();
+               #[cfg(any(test, feature = "fuzztarget"))]
+               {
+                       let htlc_fulfills_len: u64 = Readable::read(reader)?;
+                       for _ in 0..htlc_fulfills_len {
+                               assert!(historical_inbound_htlc_fulfills.insert(Readable::read(reader)?));
+                       }
+               }
+
                let mut announcement_sigs = None;
                read_tlv_fields!(reader, {
                        (0, announcement_sigs, option),
@@ -4973,6 +5029,9 @@ impl<'a, Signer: Sign, K: Deref> ReadableArgs<&'a K> for Channel<Signer>
                        next_remote_commitment_tx_fee_info_cached: Mutex::new(None),
 
                        workaround_lnd_bug_4006: None,
+
+                       #[cfg(any(test, feature = "fuzztarget"))]
+                       historical_inbound_htlc_fulfills,
                })
        }
 }