From: Matt Corallo Date: Tue, 29 Jun 2021 21:05:45 +0000 (+0000) Subject: Handle double-HTLC-claims without failing the backwards channel X-Git-Tag: v0.0.100~20^2~2 X-Git-Url: http://git.bitcoin.ninja/index.cgi?p=rust-lightning;a=commitdiff_plain;h=7e78fa660cec8a73286c94c1073ee588140e7a01 Handle double-HTLC-claims without failing the backwards channel When receiving an update_fulfill_htlc message, we immediately forward the claim backwards along the payment path before waiting for a full commitment_signed dance. This is great, but can cause duplicative claims if a node sends an update_fulfill_htlc message, disconnects, reconnects, and then has to re-send its update_fulfill_htlc message again. While there was code to handle this, it treated it as a channel error on the inbound channel, which is incorrect - this is an expected, albeit incredibly rare, condition. Instead, we handle these double-claims correctly, simply ignoring them. With debug_assertions enabled, we also check that the previous close of the same HTLC was a fulfill, and that we are not moving from a HTLC failure to an HTLC claim after its too late. A test is also added, which hits all three failure cases in `Channel::get_update_fulfill_htlc`. Found by the chanmon_consistency fuzzer. --- diff --git a/lightning/src/ln/chanmon_update_fail_tests.rs b/lightning/src/ln/chanmon_update_fail_tests.rs index 2c62abc5..2918b4ce 100644 --- a/lightning/src/ln/chanmon_update_fail_tests.rs +++ b/lightning/src/ln/chanmon_update_fail_tests.rs @@ -2215,3 +2215,119 @@ fn channel_holding_cell_serialize() { do_channel_holding_cell_serialize(true, false); do_channel_holding_cell_serialize(false, true); // last arg doesn't matter } + +#[derive(PartialEq)] +enum HTLCStatusAtDupClaim { + Received, + HoldingCell, + Cleared, +} +fn do_test_reconnect_dup_htlc_claims(htlc_status: HTLCStatusAtDupClaim, second_fails: bool) { + // When receiving an update_fulfill_htlc message, we immediately forward the claim backwards + // along the payment path before waiting for a full commitment_signed dance. This is great, but + // can cause duplicative claims if a node sends an update_fulfill_htlc message, disconnects, + // reconnects, and then has to re-send its update_fulfill_htlc message again. + // In previous code, we didn't handle the double-claim correctly, spuriously closing the + // channel on which the inbound HTLC was received. + let chanmon_cfgs = create_chanmon_cfgs(3); + let node_cfgs = create_node_cfgs(3, &chanmon_cfgs); + let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &[None, None, None]); + let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs); + + create_announced_chan_between_nodes(&nodes, 0, 1, InitFeatures::known(), InitFeatures::known()); + let chan_2 = create_announced_chan_between_nodes(&nodes, 1, 2, InitFeatures::known(), InitFeatures::known()).2; + + let (payment_preimage, payment_hash, _) = route_payment(&nodes[0], &[&nodes[1], &nodes[2]], 100_000); + + let mut as_raa = None; + if htlc_status == HTLCStatusAtDupClaim::HoldingCell { + // In order to get the HTLC claim into the holding cell at nodes[1], we need nodes[1] to be + // awaiting a remote revoke_and_ack from nodes[0]. + let (_, second_payment_hash, second_payment_secret) = get_payment_preimage_hash!(nodes[1]); + let route = get_route(&nodes[0].node.get_our_node_id(), &nodes[0].net_graph_msg_handler.network_graph.read().unwrap(), + &nodes[1].node.get_our_node_id(), Some(InvoiceFeatures::known()), None, &Vec::new(), 100_000, TEST_FINAL_CLTV, nodes[1].logger).unwrap(); + nodes[0].node.send_payment(&route, second_payment_hash, &Some(second_payment_secret)).unwrap(); + check_added_monitors!(nodes[0], 1); + + let send_event = SendEvent::from_event(nodes[0].node.get_and_clear_pending_msg_events().remove(0)); + nodes[1].node.handle_update_add_htlc(&nodes[0].node.get_our_node_id(), &send_event.msgs[0]); + nodes[1].node.handle_commitment_signed(&nodes[0].node.get_our_node_id(), &send_event.commitment_msg); + check_added_monitors!(nodes[1], 1); + + let (bs_raa, bs_cs) = get_revoke_commit_msgs!(nodes[1], nodes[0].node.get_our_node_id()); + nodes[0].node.handle_revoke_and_ack(&nodes[1].node.get_our_node_id(), &bs_raa); + check_added_monitors!(nodes[0], 1); + nodes[0].node.handle_commitment_signed(&nodes[1].node.get_our_node_id(), &bs_cs); + check_added_monitors!(nodes[0], 1); + + as_raa = Some(get_event_msg!(nodes[0], MessageSendEvent::SendRevokeAndACK, nodes[1].node.get_our_node_id())); + } + + let fulfill_msg = msgs::UpdateFulfillHTLC { + channel_id: chan_2, + htlc_id: 0, + payment_preimage, + }; + if second_fails { + assert!(nodes[2].node.fail_htlc_backwards(&payment_hash)); + expect_pending_htlcs_forwardable!(nodes[2]); + check_added_monitors!(nodes[2], 1); + get_htlc_update_msgs!(nodes[2], nodes[1].node.get_our_node_id()); + } else { + assert!(nodes[2].node.claim_funds(payment_preimage)); + check_added_monitors!(nodes[2], 1); + let cs_updates = get_htlc_update_msgs!(nodes[2], nodes[1].node.get_our_node_id()); + assert_eq!(cs_updates.update_fulfill_htlcs.len(), 1); + // Check that the message we're about to deliver matches the one generated: + assert_eq!(fulfill_msg, cs_updates.update_fulfill_htlcs[0]); + } + nodes[1].node.handle_update_fulfill_htlc(&nodes[2].node.get_our_node_id(), &fulfill_msg); + check_added_monitors!(nodes[1], 1); + + let mut bs_updates = None; + if htlc_status != HTLCStatusAtDupClaim::HoldingCell { + bs_updates = Some(get_htlc_update_msgs!(nodes[1], nodes[0].node.get_our_node_id())); + assert_eq!(bs_updates.as_ref().unwrap().update_fulfill_htlcs.len(), 1); + nodes[0].node.handle_update_fulfill_htlc(&nodes[1].node.get_our_node_id(), &bs_updates.as_ref().unwrap().update_fulfill_htlcs[0]); + expect_payment_sent!(nodes[0], payment_preimage); + if htlc_status == HTLCStatusAtDupClaim::Cleared { + commitment_signed_dance!(nodes[0], nodes[1], &bs_updates.as_ref().unwrap().commitment_signed, false); + } + } else { + assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty()); + } + + nodes[1].node.peer_disconnected(&nodes[2].node.get_our_node_id(), false); + nodes[2].node.peer_disconnected(&nodes[1].node.get_our_node_id(), false); + + if second_fails { + reconnect_nodes(&nodes[1], &nodes[2], (false, false), (0, 0), (0, 0), (1, 0), (0, 0), (0, 0), (false, false)); + expect_pending_htlcs_forwardable!(nodes[1]); + } else { + reconnect_nodes(&nodes[1], &nodes[2], (false, false), (0, 0), (1, 0), (0, 0), (0, 0), (0, 0), (false, false)); + } + + if htlc_status == HTLCStatusAtDupClaim::HoldingCell { + nodes[1].node.handle_revoke_and_ack(&nodes[0].node.get_our_node_id(), &as_raa.unwrap()); + check_added_monitors!(nodes[1], 1); + expect_pending_htlcs_forwardable_ignore!(nodes[1]); // We finally receive the second payment, but don't claim it + + bs_updates = Some(get_htlc_update_msgs!(nodes[1], nodes[0].node.get_our_node_id())); + assert_eq!(bs_updates.as_ref().unwrap().update_fulfill_htlcs.len(), 1); + nodes[0].node.handle_update_fulfill_htlc(&nodes[1].node.get_our_node_id(), &bs_updates.as_ref().unwrap().update_fulfill_htlcs[0]); + expect_payment_sent!(nodes[0], payment_preimage); + } + if htlc_status != HTLCStatusAtDupClaim::Cleared { + commitment_signed_dance!(nodes[0], nodes[1], &bs_updates.as_ref().unwrap().commitment_signed, false); + } +} + +#[test] +fn test_reconnect_dup_htlc_claims() { + do_test_reconnect_dup_htlc_claims(HTLCStatusAtDupClaim::Received, false); + do_test_reconnect_dup_htlc_claims(HTLCStatusAtDupClaim::HoldingCell, false); + do_test_reconnect_dup_htlc_claims(HTLCStatusAtDupClaim::Cleared, false); + do_test_reconnect_dup_htlc_claims(HTLCStatusAtDupClaim::Received, true); + do_test_reconnect_dup_htlc_claims(HTLCStatusAtDupClaim::HoldingCell, true); + do_test_reconnect_dup_htlc_claims(HTLCStatusAtDupClaim::Cleared, true); +} diff --git a/lightning/src/ln/channel.rs b/lightning/src/ln/channel.rs index d8a284fa..6d955257 100644 --- a/lightning/src/ln/channel.rs +++ b/lightning/src/ln/channel.rs @@ -444,6 +444,15 @@ pub(super) struct Channel { /// /// See-also pub workaround_lnd_bug_4006: Option, + + #[cfg(any(test, feature = "fuzztarget"))] + // When we receive an HTLC fulfill on an outbound path, we may immediately fulfill the + // corresponding HTLC on the inbound path. If, then, the outbound path channel is + // disconnected and reconnected (before we've exchange commitment_signed and revoke_and_ack + // messages), they may re-broadcast their update_fulfill_htlc, causing a duplicate claim. This + // is fine, but as a sanity check in our failure to generate the second claim, we check here + // that the original was a claim, and that we aren't now trying to fulfill a failed HTLC. + historical_inbound_htlc_fulfills: HashSet, } #[cfg(any(test, feature = "fuzztarget"))] @@ -645,6 +654,9 @@ impl Channel { next_remote_commitment_tx_fee_info_cached: Mutex::new(None), workaround_lnd_bug_4006: None, + + #[cfg(any(test, feature = "fuzztarget"))] + historical_inbound_htlc_fulfills: HashSet::new(), }) } @@ -890,6 +902,9 @@ impl Channel { next_remote_commitment_tx_fee_info_cached: Mutex::new(None), workaround_lnd_bug_4006: None, + + #[cfg(any(test, feature = "fuzztarget"))] + historical_inbound_htlc_fulfills: HashSet::new(), }; Ok(chan) @@ -1249,8 +1264,8 @@ impl Channel { if let &InboundHTLCRemovalReason::Fulfill(_) = reason { } else { log_warn!(logger, "Have preimage and want to fulfill HTLC with payment hash {} we already failed against channel {}", log_bytes!(htlc.payment_hash.0), log_bytes!(self.channel_id())); + debug_assert!(false, "Tried to fulfill an HTLC that was already failed"); } - debug_assert!(false, "Tried to fulfill an HTLC that was already fail/fulfilled"); return Ok((None, None)); }, _ => { @@ -1263,7 +1278,11 @@ impl Channel { } } if pending_idx == core::usize::MAX { - return Err(ChannelError::Ignore("Unable to find a pending HTLC which matched the given HTLC ID".to_owned())); + #[cfg(any(test, feature = "fuzztarget"))] + // If we failed to find an HTLC to fulfill, make sure it was previously fulfilled and + // this is simply a duplicate claim, not previously failed and we lost funds. + debug_assert!(self.historical_inbound_htlc_fulfills.contains(&htlc_id_arg)); + return Ok((None, None)); } // Now update local state: @@ -1285,7 +1304,8 @@ impl Channel { if htlc_id_arg == htlc_id { // Make sure we don't leave latest_monitor_update_id incremented here: self.latest_monitor_update_id -= 1; - debug_assert!(false, "Tried to fulfill an HTLC that was already fulfilled"); + #[cfg(any(test, feature = "fuzztarget"))] + debug_assert!(self.historical_inbound_htlc_fulfills.contains(&htlc_id_arg)); return Ok((None, None)); } }, @@ -1305,8 +1325,12 @@ impl Channel { self.holding_cell_htlc_updates.push(HTLCUpdateAwaitingACK::ClaimHTLC { payment_preimage: payment_preimage_arg, htlc_id: htlc_id_arg, }); + #[cfg(any(test, feature = "fuzztarget"))] + self.historical_inbound_htlc_fulfills.insert(htlc_id_arg); return Ok((None, Some(monitor_update))); } + #[cfg(any(test, feature = "fuzztarget"))] + self.historical_inbound_htlc_fulfills.insert(htlc_id_arg); { let htlc = &mut self.pending_inbound_htlcs[pending_idx]; @@ -1366,8 +1390,11 @@ impl Channel { if htlc.htlc_id == htlc_id_arg { match htlc.state { InboundHTLCState::Committed => {}, - InboundHTLCState::LocalRemoved(_) => { - debug_assert!(false, "Tried to fail an HTLC that was already fail/fulfilled"); + InboundHTLCState::LocalRemoved(ref reason) => { + if let &InboundHTLCRemovalReason::Fulfill(_) = reason { + } else { + debug_assert!(false, "Tried to fail an HTLC that was already failed"); + } return Ok(None); }, _ => { @@ -1379,7 +1406,11 @@ impl Channel { } } if pending_idx == core::usize::MAX { - return Err(ChannelError::Ignore("Unable to find a pending HTLC which matched the given HTLC ID".to_owned())); + #[cfg(any(test, feature = "fuzztarget"))] + // If we failed to find an HTLC to fail, make sure it was previously fulfilled and this + // is simply a duplicate fail, not previously failed and we failed-back too early. + debug_assert!(self.historical_inbound_htlc_fulfills.contains(&htlc_id_arg)); + return Ok(None); } // Now update local state: @@ -1388,8 +1419,9 @@ impl Channel { match pending_update { &HTLCUpdateAwaitingACK::ClaimHTLC { htlc_id, .. } => { if htlc_id_arg == htlc_id { - debug_assert!(false, "Tried to fail an HTLC that was already fulfilled"); - return Err(ChannelError::Ignore("Unable to find a pending HTLC which matched the given HTLC ID".to_owned())); + #[cfg(any(test, feature = "fuzztarget"))] + debug_assert!(self.historical_inbound_htlc_fulfills.contains(&htlc_id_arg)); + return Ok(None); } }, &HTLCUpdateAwaitingACK::FailHTLC { htlc_id, .. } => { @@ -2453,7 +2485,14 @@ impl Channel { }, &HTLCUpdateAwaitingACK::FailHTLC { htlc_id, ref err_packet } => { match self.get_update_fail_htlc(htlc_id, err_packet.clone(), logger) { - Ok(update_fail_msg_option) => update_fail_htlcs.push(update_fail_msg_option.unwrap()), + Ok(update_fail_msg_option) => { + // If an HTLC failure was previously added to the holding cell (via + // `get_update_fail_htlc`) then generating the fail message itself + // must not fail - we should never end up in a state where we + // double-fail an HTLC or fail-then-claim an HTLC as it indicates + // we didn't wait for a full revocation before failing. + update_fail_htlcs.push(update_fail_msg_option.unwrap()) + }, Err(e) => { if let ChannelError::Ignore(_) = e {} else { @@ -4690,6 +4729,13 @@ impl Writeable for Channel { self.channel_update_status.write(writer)?; + #[cfg(any(test, feature = "fuzztarget"))] + (self.historical_inbound_htlc_fulfills.len() as u64).write(writer)?; + #[cfg(any(test, feature = "fuzztarget"))] + for htlc in self.historical_inbound_htlc_fulfills.iter() { + htlc.write(writer)?; + } + write_tlv_fields!(writer, { (0, self.announcement_sigs, option), // minimum_depth and counterparty_selected_channel_reserve_satoshis used to have a @@ -4882,6 +4928,16 @@ impl<'a, Signer: Sign, K: Deref> ReadableArgs<&'a K> for Channel let channel_update_status = Readable::read(reader)?; + #[cfg(any(test, feature = "fuzztarget"))] + let mut historical_inbound_htlc_fulfills = HashSet::new(); + #[cfg(any(test, feature = "fuzztarget"))] + { + let htlc_fulfills_len: u64 = Readable::read(reader)?; + for _ in 0..htlc_fulfills_len { + assert!(historical_inbound_htlc_fulfills.insert(Readable::read(reader)?)); + } + } + let mut announcement_sigs = None; read_tlv_fields!(reader, { (0, announcement_sigs, option), @@ -4973,6 +5029,9 @@ impl<'a, Signer: Sign, K: Deref> ReadableArgs<&'a K> for Channel next_remote_commitment_tx_fee_info_cached: Mutex::new(None), workaround_lnd_bug_4006: None, + + #[cfg(any(test, feature = "fuzztarget"))] + historical_inbound_htlc_fulfills, }) } }