From: Matt Corallo <git@bluematt.me>
Date: Fri, 2 Sep 2022 21:10:43 +0000 (+0000)
Subject: Correct payment resolution after on chain failure of dust HTLCs
X-Git-Tag: v0.0.111~16^2
X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=refs%2Fheads%2F2022-08-dust-retry;p=rust-lightning

Correct payment resolution after on chain failure of dust HTLCs

Previously, we wouldn't mark a dust HTLC as permanently resolved if
the commitment transaction went on chain. This resulted in us
always considering the HTLC as pending on restart, when we load the
pending payments set from the monitors.

Fixes #1653.
---

diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs
index 52cfe9972..ed8475059 100644
--- a/lightning/src/chain/channelmonitor.rs
+++ b/lightning/src/chain/channelmonitor.rs
@@ -619,7 +619,7 @@ pub enum Balance {
 /// An HTLC which has been irrevocably resolved on-chain, and has reached ANTI_REORG_DELAY.
 #[derive(PartialEq)]
 struct IrrevocablyResolvedHTLC {
-	commitment_tx_output_idx: u32,
+	commitment_tx_output_idx: Option<u32>,
 	/// The txid of the transaction which resolved the HTLC, this may be a commitment (if the HTLC
 	/// was not present in the confirmed commitment transaction), HTLC-Success, or HTLC-Timeout
 	/// transaction.
@@ -628,11 +628,39 @@ struct IrrevocablyResolvedHTLC {
 	payment_preimage: Option<PaymentPreimage>,
 }
 
-impl_writeable_tlv_based!(IrrevocablyResolvedHTLC, {
-	(0, commitment_tx_output_idx, required),
-	(1, resolving_txid, option),
-	(2, payment_preimage, option),
-});
+// In LDK versions prior to 0.0.111 commitment_tx_output_idx was not Option-al and
+// IrrevocablyResolvedHTLC objects only existed for non-dust HTLCs. This was a bug, but to maintain
+// backwards compatibility we must ensure we always write out a commitment_tx_output_idx field,
+// using `u32::max_value()` as a sentinal to indicate the HTLC was dust.
+impl Writeable for IrrevocablyResolvedHTLC {
+	fn write<W: Writer>(&self, writer: &mut W) -> Result<(), io::Error> {
+		let mapped_commitment_tx_output_idx = self.commitment_tx_output_idx.unwrap_or(u32::max_value());
+		write_tlv_fields!(writer, {
+			(0, mapped_commitment_tx_output_idx, required),
+			(1, self.resolving_txid, option),
+			(2, self.payment_preimage, option),
+		});
+		Ok(())
+	}
+}
+
+impl Readable for IrrevocablyResolvedHTLC {
+	fn read<R: io::Read>(reader: &mut R) -> Result<Self, DecodeError> {
+		let mut mapped_commitment_tx_output_idx = 0;
+		let mut resolving_txid = None;
+		let mut payment_preimage = None;
+		read_tlv_fields!(reader, {
+			(0, mapped_commitment_tx_output_idx, required),
+			(1, resolving_txid, option),
+			(2, payment_preimage, option),
+		});
+		Ok(Self {
+			commitment_tx_output_idx: if mapped_commitment_tx_output_idx == u32::max_value() { None } else { Some(mapped_commitment_tx_output_idx) },
+			resolving_txid,
+			payment_preimage,
+		})
+	}
+}
 
 /// A ChannelMonitor handles chain events (blocks connected and disconnected) and generates
 /// on-chain transactions to ensure no loss of funds occurs.
@@ -1485,7 +1513,7 @@ impl<Signer: Sign> ChannelMonitorImpl<Signer> {
 			}
 		}
 		let htlc_resolved = self.htlcs_resolved_on_chain.iter()
-			.find(|v| if v.commitment_tx_output_idx == htlc_commitment_tx_output_idx {
+			.find(|v| if v.commitment_tx_output_idx == Some(htlc_commitment_tx_output_idx) {
 				debug_assert!(htlc_spend_txid_opt.is_none());
 				htlc_spend_txid_opt = v.resolving_txid;
 				true
@@ -1775,7 +1803,7 @@ impl<Signer: Sign> ChannelMonitor<Signer> {
 		macro_rules! walk_htlcs {
 			($holder_commitment: expr, $htlc_iter: expr) => {
 				for (htlc, source) in $htlc_iter {
-					if us.htlcs_resolved_on_chain.iter().any(|v| Some(v.commitment_tx_output_idx) == htlc.transaction_output_index) {
+					if us.htlcs_resolved_on_chain.iter().any(|v| v.commitment_tx_output_idx == htlc.transaction_output_index) {
 						// We should assert that funding_spend_confirmed is_some() here, but we
 						// have some unit tests which violate HTLC transaction CSVs entirely and
 						// would fail.
@@ -2902,12 +2930,10 @@ impl<Signer: Sign> ChannelMonitorImpl<Signer> {
 						source: source.clone(),
 						htlc_value_satoshis,
 					}));
-					if let Some(idx) = commitment_tx_output_idx {
-						self.htlcs_resolved_on_chain.push(IrrevocablyResolvedHTLC {
-							commitment_tx_output_idx: idx, resolving_txid: Some(entry.txid),
-							payment_preimage: None,
-						});
-					}
+					self.htlcs_resolved_on_chain.push(IrrevocablyResolvedHTLC {
+						commitment_tx_output_idx, resolving_txid: Some(entry.txid),
+						payment_preimage: None,
+					});
 				},
 				OnchainEvent::MaturingOutput { descriptor } => {
 					log_debug!(logger, "Descriptor {} has got enough confirmations to be passed upstream", log_spendable!(descriptor));
@@ -2917,7 +2943,7 @@ impl<Signer: Sign> ChannelMonitorImpl<Signer> {
 				},
 				OnchainEvent::HTLCSpendConfirmation { commitment_tx_output_idx, preimage, .. } => {
 					self.htlcs_resolved_on_chain.push(IrrevocablyResolvedHTLC {
-						commitment_tx_output_idx, resolving_txid: Some(entry.txid),
+						commitment_tx_output_idx: Some(commitment_tx_output_idx), resolving_txid: Some(entry.txid),
 						payment_preimage: preimage,
 					});
 				},
diff --git a/lightning/src/ln/payment_tests.rs b/lightning/src/ln/payment_tests.rs
index cc8f4ee27..315f76635 100644
--- a/lightning/src/ln/payment_tests.rs
+++ b/lightning/src/ln/payment_tests.rs
@@ -16,7 +16,7 @@ use chain::channelmonitor::{ANTI_REORG_DELAY, ChannelMonitor, LATENCY_GRACE_PERI
 use chain::transaction::OutPoint;
 use chain::keysinterface::KeysInterface;
 use ln::channel::EXPIRE_PREV_CONFIG_TICKS;
-use ln::channelmanager::{BREAKDOWN_TIMEOUT, ChannelManager, ChannelManagerReadArgs, MPP_TIMEOUT_TICKS, PaymentId, PaymentSendFailure};
+use ln::channelmanager::{BREAKDOWN_TIMEOUT, ChannelManager, ChannelManagerReadArgs, MPP_TIMEOUT_TICKS, MIN_CLTV_EXPIRY_DELTA, PaymentId, PaymentSendFailure};
 use ln::features::{InitFeatures, InvoiceFeatures};
 use ln::msgs;
 use ln::msgs::ChannelMessageHandler;
@@ -563,6 +563,231 @@ fn retry_with_no_persist() {
 	do_retry_with_no_persist(false);
 }
 
+fn do_test_completed_payment_not_retryable_on_reload(use_dust: bool) {
+	// Test that an off-chain completed payment is not retryable on restart. This was previously
+	// broken for dust payments, but we test for both dust and non-dust payments.
+	//
+	// `use_dust` switches to using a dust HTLC, which results in the HTLC not having an on-chain
+	// output at all.
+	let chanmon_cfgs = create_chanmon_cfgs(3);
+	let node_cfgs = create_node_cfgs(3, &chanmon_cfgs);
+
+	let mut manually_accept_config = test_default_channel_config();
+	manually_accept_config.manually_accept_inbound_channels = true;
+
+	let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &[None, Some(manually_accept_config), None]);
+
+	let first_persister: test_utils::TestPersister;
+	let first_new_chain_monitor: test_utils::TestChainMonitor;
+	let first_nodes_0_deserialized: ChannelManager<EnforcingSigner, &test_utils::TestChainMonitor, &test_utils::TestBroadcaster, &test_utils::TestKeysInterface, &test_utils::TestFeeEstimator, &test_utils::TestLogger>;
+	let second_persister: test_utils::TestPersister;
+	let second_new_chain_monitor: test_utils::TestChainMonitor;
+	let second_nodes_0_deserialized: ChannelManager<EnforcingSigner, &test_utils::TestChainMonitor, &test_utils::TestBroadcaster, &test_utils::TestKeysInterface, &test_utils::TestFeeEstimator, &test_utils::TestLogger>;
+	let third_persister: test_utils::TestPersister;
+	let third_new_chain_monitor: test_utils::TestChainMonitor;
+	let third_nodes_0_deserialized: ChannelManager<EnforcingSigner, &test_utils::TestChainMonitor, &test_utils::TestBroadcaster, &test_utils::TestKeysInterface, &test_utils::TestFeeEstimator, &test_utils::TestLogger>;
+
+	let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs);
+
+	// Because we set nodes[1] to manually accept channels, just open a 0-conf channel.
+	let (funding_tx, chan_id) = open_zero_conf_channel(&nodes[0], &nodes[1], None);
+	confirm_transaction(&nodes[0], &funding_tx);
+	confirm_transaction(&nodes[1], &funding_tx);
+	// Ignore the announcement_signatures messages
+	nodes[0].node.get_and_clear_pending_msg_events();
+	nodes[1].node.get_and_clear_pending_msg_events();
+	let chan_id_2 = create_announced_chan_between_nodes(&nodes, 1, 2, InitFeatures::known(), InitFeatures::known()).2;
+
+	// Serialize the ChannelManager prior to sending payments
+	let mut nodes_0_serialized = nodes[0].node.encode();
+
+	let route = get_route_and_payment_hash!(nodes[0], nodes[2], if use_dust { 1_000 } else { 1_000_000 }).0;
+	let (payment_preimage, payment_hash, payment_secret, payment_id) = send_along_route(&nodes[0], route, &[&nodes[1], &nodes[2]], if use_dust { 1_000 } else { 1_000_000 });
+
+	// The ChannelMonitor should always be the latest version, as we're required to persist it
+	// during the `commitment_signed_dance!()`.
+	let mut chan_0_monitor_serialized = test_utils::TestVecWriter(Vec::new());
+	get_monitor!(nodes[0], chan_id).write(&mut chan_0_monitor_serialized).unwrap();
+
+	let mut chan_1_monitor_serialized = test_utils::TestVecWriter(Vec::new());
+
+	macro_rules! reload_node {
+		($chain_monitor: ident, $chan_manager: ident, $persister: ident) => { {
+			$persister = test_utils::TestPersister::new();
+			let keys_manager = &chanmon_cfgs[0].keys_manager;
+			$chain_monitor = test_utils::TestChainMonitor::new(Some(nodes[0].chain_source), nodes[0].tx_broadcaster.clone(), nodes[0].logger, node_cfgs[0].fee_estimator, &$persister, keys_manager);
+			nodes[0].chain_monitor = &$chain_monitor;
+			let mut chan_0_monitor_read = &chan_0_monitor_serialized.0[..];
+			let (_, mut chan_0_monitor) = <(BlockHash, ChannelMonitor<EnforcingSigner>)>::read(
+				&mut chan_0_monitor_read, keys_manager).unwrap();
+			assert!(chan_0_monitor_read.is_empty());
+
+			let mut chan_1_monitor = None;
+			let mut channel_monitors = HashMap::new();
+			channel_monitors.insert(chan_0_monitor.get_funding_txo().0, &mut chan_0_monitor);
+
+			if !chan_1_monitor_serialized.0.is_empty() {
+				let mut chan_1_monitor_read = &chan_1_monitor_serialized.0[..];
+				chan_1_monitor = Some(<(BlockHash, ChannelMonitor<EnforcingSigner>)>::read(
+					&mut chan_1_monitor_read, keys_manager).unwrap().1);
+				assert!(chan_1_monitor_read.is_empty());
+				channel_monitors.insert(chan_1_monitor.as_ref().unwrap().get_funding_txo().0, chan_1_monitor.as_mut().unwrap());
+			}
+
+			let mut nodes_0_read = &nodes_0_serialized[..];
+			let (_, nodes_0_deserialized_tmp) = {
+				<(BlockHash, ChannelManager<EnforcingSigner, &test_utils::TestChainMonitor, &test_utils::TestBroadcaster, &test_utils::TestKeysInterface, &test_utils::TestFeeEstimator, &test_utils::TestLogger>)>::read(&mut nodes_0_read, ChannelManagerReadArgs {
+					default_config: test_default_channel_config(),
+					keys_manager,
+					fee_estimator: node_cfgs[0].fee_estimator,
+					chain_monitor: nodes[0].chain_monitor,
+					tx_broadcaster: nodes[0].tx_broadcaster.clone(),
+					logger: nodes[0].logger,
+					channel_monitors,
+				}).unwrap()
+			};
+			$chan_manager = nodes_0_deserialized_tmp;
+			assert!(nodes_0_read.is_empty());
+
+			assert!(nodes[0].chain_monitor.watch_channel(chan_0_monitor.get_funding_txo().0, chan_0_monitor).is_ok());
+			if !chan_1_monitor_serialized.0.is_empty() {
+				let funding_txo = chan_1_monitor.as_ref().unwrap().get_funding_txo().0;
+				assert!(nodes[0].chain_monitor.watch_channel(funding_txo, chan_1_monitor.unwrap()).is_ok());
+			}
+			nodes[0].node = &$chan_manager;
+			check_added_monitors!(nodes[0], if !chan_1_monitor_serialized.0.is_empty() { 2 } else { 1 });
+
+			nodes[1].node.peer_disconnected(&nodes[0].node.get_our_node_id(), false);
+		} }
+	}
+
+	reload_node!(first_new_chain_monitor, first_nodes_0_deserialized, first_persister);
+
+	// On reload, the ChannelManager should realize it is stale compared to the ChannelMonitor and
+	// force-close the channel.
+	check_closed_event!(nodes[0], 1, ClosureReason::OutdatedChannelManager);
+	assert!(nodes[0].node.list_channels().is_empty());
+	assert!(nodes[0].node.has_pending_payments());
+	assert_eq!(nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0).len(), 1);
+
+	nodes[0].node.peer_connected(&nodes[1].node.get_our_node_id(), &msgs::Init { features: InitFeatures::known(), remote_network_address: None });
+	assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());
+
+	// Now nodes[1] should send a channel reestablish, which nodes[0] will respond to with an
+	// error, as the channel has hit the chain.
+	nodes[1].node.peer_connected(&nodes[0].node.get_our_node_id(), &msgs::Init { features: InitFeatures::known(), remote_network_address: None });
+	let bs_reestablish = get_event_msg!(nodes[1], MessageSendEvent::SendChannelReestablish, nodes[0].node.get_our_node_id());
+	nodes[0].node.handle_channel_reestablish(&nodes[1].node.get_our_node_id(), &bs_reestablish);
+	let as_err = nodes[0].node.get_and_clear_pending_msg_events();
+	assert_eq!(as_err.len(), 1);
+	let bs_commitment_tx;
+	match as_err[0] {
+		MessageSendEvent::HandleError { node_id, action: msgs::ErrorAction::SendErrorMessage { ref msg } } => {
+			assert_eq!(node_id, nodes[1].node.get_our_node_id());
+			nodes[1].node.handle_error(&nodes[0].node.get_our_node_id(), msg);
+			check_closed_event!(nodes[1], 1, ClosureReason::CounterpartyForceClosed { peer_msg: "Failed to find corresponding channel".to_string() });
+			check_added_monitors!(nodes[1], 1);
+			bs_commitment_tx = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
+		},
+		_ => panic!("Unexpected event"),
+	}
+	check_closed_broadcast!(nodes[1], false);
+
+	// Now fail back the payment from nodes[2] to nodes[1]. This doesn't really matter as the
+	// previous hop channel is already on-chain, but it makes nodes[2] willing to see additional
+	// incoming HTLCs with the same payment hash later.
+	nodes[2].node.fail_htlc_backwards(&payment_hash);
+	expect_pending_htlcs_forwardable_and_htlc_handling_failed!(nodes[2], [HTLCDestination::FailedPayment { payment_hash }]);
+	check_added_monitors!(nodes[2], 1);
+
+	let htlc_fulfill_updates = get_htlc_update_msgs!(nodes[2], nodes[1].node.get_our_node_id());
+	nodes[1].node.handle_update_fail_htlc(&nodes[2].node.get_our_node_id(), &htlc_fulfill_updates.update_fail_htlcs[0]);
+	commitment_signed_dance!(nodes[1], nodes[2], htlc_fulfill_updates.commitment_signed, false);
+	expect_pending_htlcs_forwardable_and_htlc_handling_failed!(nodes[1],
+		[HTLCDestination::NextHopChannel { node_id: Some(nodes[2].node.get_our_node_id()), channel_id: chan_id_2 }]);
+
+	// Connect the HTLC-Timeout transaction, timing out the HTLC on both nodes (but not confirming
+	// the HTLC-Timeout transaction beyond 1 conf). For dust HTLCs, the HTLC is considered resolved
+	// after the commitment transaction, so always connect the commitment transaction.
+	mine_transaction(&nodes[0], &bs_commitment_tx[0]);
+	mine_transaction(&nodes[1], &bs_commitment_tx[0]);
+	if !use_dust {
+		connect_blocks(&nodes[0], TEST_FINAL_CLTV - 1 + (MIN_CLTV_EXPIRY_DELTA as u32));
+		connect_blocks(&nodes[1], TEST_FINAL_CLTV - 1 + (MIN_CLTV_EXPIRY_DELTA as u32));
+		let as_htlc_timeout = nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
+		check_spends!(as_htlc_timeout[0], bs_commitment_tx[0]);
+		assert_eq!(as_htlc_timeout.len(), 1);
+
+		mine_transaction(&nodes[0], &as_htlc_timeout[0]);
+		// nodes[0] may rebroadcast (or RBF-bump) its HTLC-Timeout, so wipe the announced set.
+		nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().clear();
+		mine_transaction(&nodes[1], &as_htlc_timeout[0]);
+	}
+
+	// Create a new channel on which to retry the payment before we fail the payment via the
+	// HTLC-Timeout transaction. This avoids ChannelManager timing out the payment due to us
+	// connecting several blocks while creating the channel (implying time has passed).
+	// We do this with a zero-conf channel to avoid connecting blocks as a side-effect.
+	let (_, chan_id_3) = open_zero_conf_channel(&nodes[0], &nodes[1], None);
+	assert_eq!(nodes[0].node.list_usable_channels().len(), 1);
+
+	// If we attempt to retry prior to the HTLC-Timeout (or commitment transaction, for dust HTLCs)
+	// confirming, we will fail as it's considered still-pending...
+	let (new_route, _, _, _) = get_route_and_payment_hash!(nodes[0], nodes[2], if use_dust { 1_000 } else { 1_000_000 });
+	assert!(nodes[0].node.retry_payment(&new_route, payment_id).is_err());
+	assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());
+
+	// After ANTI_REORG_DELAY confirmations, the HTLC should be failed and we can try the payment
+	// again. We serialize the node first as we'll then test retrying the HTLC after a restart
+	// (which should also still work).
+	connect_blocks(&nodes[0], ANTI_REORG_DELAY - 1);
+	connect_blocks(&nodes[1], ANTI_REORG_DELAY - 1);
+	// We set mpp_parts_remain to avoid having abandon_payment called
+	expect_payment_failed_conditions(&nodes[0], payment_hash, false, PaymentFailedConditions::new().mpp_parts_remain());
+
+	chan_0_monitor_serialized = test_utils::TestVecWriter(Vec::new());
+	get_monitor!(nodes[0], chan_id).write(&mut chan_0_monitor_serialized).unwrap();
+	chan_1_monitor_serialized = test_utils::TestVecWriter(Vec::new());
+	get_monitor!(nodes[0], chan_id_3).write(&mut chan_1_monitor_serialized).unwrap();
+	nodes_0_serialized = nodes[0].node.encode();
+
+	assert!(nodes[0].node.retry_payment(&new_route, payment_id).is_ok());
+	assert!(!nodes[0].node.get_and_clear_pending_msg_events().is_empty());
+
+	reload_node!(second_new_chain_monitor, second_nodes_0_deserialized, second_persister);
+	reconnect_nodes(&nodes[0], &nodes[1], (true, true), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
+
+	// Now resend the payment, delivering the HTLC and actually claiming it this time. This ensures
+	// the payment is not (spuriously) listed as still pending.
+	assert!(nodes[0].node.retry_payment(&new_route, payment_id).is_ok());
+	check_added_monitors!(nodes[0], 1);
+	pass_along_route(&nodes[0], &[&[&nodes[1], &nodes[2]]], if use_dust { 1_000 } else { 1_000_000 }, payment_hash, payment_secret);
+	claim_payment(&nodes[0], &[&nodes[1], &nodes[2]], payment_preimage);
+
+	assert!(nodes[0].node.retry_payment(&new_route, payment_id).is_err());
+	assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());
+
+	chan_0_monitor_serialized = test_utils::TestVecWriter(Vec::new());
+	get_monitor!(nodes[0], chan_id).write(&mut chan_0_monitor_serialized).unwrap();
+	chan_1_monitor_serialized = test_utils::TestVecWriter(Vec::new());
+	get_monitor!(nodes[0], chan_id_3).write(&mut chan_1_monitor_serialized).unwrap();
+	nodes_0_serialized = nodes[0].node.encode();
+
+	// Ensure that after reload we cannot retry the payment.
+	reload_node!(third_new_chain_monitor, third_nodes_0_deserialized, third_persister);
+	reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
+
+	assert!(nodes[0].node.retry_payment(&new_route, payment_id).is_err());
+	assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());
+}
+
+#[test]
+fn test_completed_payment_not_retryable_on_reload() {
+	do_test_completed_payment_not_retryable_on_reload(true);
+	do_test_completed_payment_not_retryable_on_reload(false);
+}
+
+
 fn do_test_dup_htlc_onchain_fails_on_reload(persist_manager_post_event: bool, confirm_commitment_tx: bool, payment_timeout: bool) {
 	// When a Channel is closed, any outbound HTLCs which were relayed through it are simply
 	// dropped when the Channel is. From there, the ChannelManager relies on the ChannelMonitor