From: Matt Corallo <git@bluematt.me>
Date: Thu, 9 Nov 2023 04:14:15 +0000 (+0000)
Subject: Handle missing case in reestablish local commitment number checks
X-Git-Tag: v0.0.119~38^2
X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=refs%2Fheads%2F2023-11-log-forward-peer;p=rust-lightning

Handle missing case in reestablish local commitment number checks

If we're behind exactly one commitment (which we've revoked), we'd
previously force-close the channel, guaranteeing we'll lose funds
as the counterparty has our latest local commitment state's
revocation secret.

While this shouldn't happen because users should never lose data,
sometimes issues happen, and we should ensure we always panic.

Further, `test_data_loss_protect` is updated to test this case.
---

diff --git a/lightning/src/ln/channel.rs b/lightning/src/ln/channel.rs
index 8332cf322..182ba2bdb 100644
--- a/lightning/src/ln/channel.rs
+++ b/lightning/src/ln/channel.rs
@@ -4152,6 +4152,7 @@ impl<SP: Deref> Channel<SP> where
 			return Err(ChannelError::Close("Peer sent an invalid channel_reestablish to force close in a non-standard way".to_owned()));
 		}
 
+		let our_commitment_transaction = INITIAL_COMMITMENT_NUMBER - self.context.cur_holder_commitment_transaction_number - 1;
 		if msg.next_remote_commitment_number > 0 {
 			let expected_point = self.context.holder_signer.as_ref().get_per_commitment_point(INITIAL_COMMITMENT_NUMBER - msg.next_remote_commitment_number + 1, &self.context.secp_ctx);
 			let given_secret = SecretKey::from_slice(&msg.your_last_per_commitment_secret)
@@ -4159,7 +4160,7 @@ impl<SP: Deref> Channel<SP> where
 			if expected_point != PublicKey::from_secret_key(&self.context.secp_ctx, &given_secret) {
 				return Err(ChannelError::Close("Peer sent a garbage channel_reestablish with secret key not matching the commitment height provided".to_owned()));
 			}
-			if msg.next_remote_commitment_number > INITIAL_COMMITMENT_NUMBER - self.context.cur_holder_commitment_transaction_number {
+			if msg.next_remote_commitment_number > our_commitment_transaction {
 				macro_rules! log_and_panic {
 					($err_msg: expr) => {
 						log_error!(logger, $err_msg, &self.context.channel_id, log_pubkey!(self.context.counterparty_node_id));
@@ -4179,7 +4180,6 @@ impl<SP: Deref> Channel<SP> where
 
 		// Before we change the state of the channel, we check if the peer is sending a very old
 		// commitment transaction number, if yes we send a warning message.
-		let our_commitment_transaction = INITIAL_COMMITMENT_NUMBER - self.context.cur_holder_commitment_transaction_number - 1;
 		if msg.next_remote_commitment_number + 1 < our_commitment_transaction {
 			return Err(ChannelError::Warn(format!(
 				"Peer attempted to reestablish channel with a very old local commitment transaction: {} (received) vs {} (expected)",
@@ -4239,6 +4239,7 @@ impl<SP: Deref> Channel<SP> where
 				Some(self.get_last_revoke_and_ack())
 			}
 		} else {
+			debug_assert!(false, "All values should have been handled in the four cases above");
 			return Err(ChannelError::Close(format!(
 				"Peer attempted to reestablish channel expecting a future local commitment transaction: {} (received) vs {} (expected)",
 				msg.next_remote_commitment_number,
diff --git a/lightning/src/ln/reload_tests.rs b/lightning/src/ln/reload_tests.rs
index 909f76e06..4a52fa08a 100644
--- a/lightning/src/ln/reload_tests.rs
+++ b/lightning/src/ln/reload_tests.rs
@@ -15,9 +15,10 @@ use crate::chain::channelmonitor::{CLOSED_CHANNEL_UPDATE_ID, ChannelMonitor};
 use crate::sign::EntropySource;
 use crate::chain::transaction::OutPoint;
 use crate::events::{ClosureReason, Event, HTLCDestination, MessageSendEvent, MessageSendEventsProvider};
-use crate::ln::channelmanager::{ChannelManager, ChannelManagerReadArgs, PaymentId, RecipientOnionFields};
+use crate::ln::channelmanager::{ChannelManager, ChannelManagerReadArgs, PaymentId, Retry, RecipientOnionFields};
 use crate::ln::msgs;
 use crate::ln::msgs::{ChannelMessageHandler, RoutingMessageHandler, ErrorAction};
+use crate::routing::router::{RouteParameters, PaymentParameters};
 use crate::util::test_channel_signer::TestChannelSigner;
 use crate::util::test_utils;
 use crate::util::errors::APIError;
@@ -494,7 +495,7 @@ fn test_manager_serialize_deserialize_inconsistent_monitor() {
 }
 
 #[cfg(feature = "std")]
-fn do_test_data_loss_protect(reconnect_panicing: bool) {
+fn do_test_data_loss_protect(reconnect_panicing: bool, substantially_old: bool, not_stale: bool) {
 	// When we get a data_loss_protect proving we're behind, we immediately panic as the
 	// chain::Watch API requirements have been violated (e.g. the user restored from a backup). The
 	// panic message informs the user they should force-close without broadcasting, which is tested
@@ -518,8 +519,38 @@ fn do_test_data_loss_protect(reconnect_panicing: bool) {
 	let previous_node_state = nodes[0].node.encode();
 	let previous_chain_monitor_state = get_monitor!(nodes[0], chan.2).encode();
 
-	send_payment(&nodes[0], &vec!(&nodes[1])[..], 8000000);
-	send_payment(&nodes[0], &vec!(&nodes[1])[..], 8000000);
+	assert!(!substantially_old || !not_stale, "substantially_old and not_stale doesn't make sense");
+	if not_stale || !substantially_old {
+		// Previously, we'd only hit the data_loss_protect assertion if we had a state which
+		// revoked at least two revocations ago, not the latest revocation. Here, we use
+		// `not_stale` to test the boundary condition.
+		let pay_params = PaymentParameters::for_keysend(nodes[1].node.get_our_node_id(), 100, false);
+		let route_params = RouteParameters::from_payment_params_and_value(pay_params, 40000);
+		nodes[0].node.send_spontaneous_payment_with_retry(None, RecipientOnionFields::spontaneous_empty(), PaymentId([0; 32]), route_params, Retry::Attempts(0));
+		check_added_monitors(&nodes[0], 1);
+		let update_add_commit = SendEvent::from_node(&nodes[0]);
+
+		nodes[1].node.handle_update_add_htlc(&nodes[0].node.get_our_node_id(), &update_add_commit.msgs[0]);
+		nodes[1].node.handle_commitment_signed(&nodes[0].node.get_our_node_id(), &update_add_commit.commitment_msg);
+		check_added_monitors(&nodes[1], 1);
+		let (raa, cs) = get_revoke_commit_msgs(&nodes[1], &nodes[0].node.get_our_node_id());
+
+		nodes[0].node.handle_revoke_and_ack(&nodes[1].node.get_our_node_id(), &raa);
+		check_added_monitors(&nodes[0], 1);
+		assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());
+		if !not_stale {
+			nodes[0].node.handle_commitment_signed(&nodes[1].node.get_our_node_id(), &cs);
+			check_added_monitors(&nodes[0], 1);
+			// A now revokes their original state, at which point reconnect should panic
+			let raa = get_event_msg!(nodes[0], MessageSendEvent::SendRevokeAndACK, nodes[1].node.get_our_node_id());
+			nodes[1].node.handle_revoke_and_ack(&nodes[0].node.get_our_node_id(), &raa);
+			check_added_monitors(&nodes[1], 1);
+			expect_pending_htlcs_forwardable_ignore!(nodes[1]);
+		}
+	} else {
+		send_payment(&nodes[0], &[&nodes[1]], 8000000);
+		send_payment(&nodes[0], &[&nodes[1]], 8000000);
+	}
 
 	nodes[0].node.peer_disconnected(&nodes[1].node.get_our_node_id());
 	nodes[1].node.peer_disconnected(&nodes[0].node.get_our_node_id());
@@ -536,99 +567,131 @@ fn do_test_data_loss_protect(reconnect_panicing: bool) {
 
 		let reestablish_1 = get_chan_reestablish_msgs!(nodes[0], nodes[1]);
 
-		// Check we close channel detecting A is fallen-behind
-		// Check that we sent the warning message when we detected that A has fallen behind,
-		// and give the possibility for A to recover from the warning.
+		// If A has fallen behind substantially, B should send it a message letting it know
+		// that.
 		nodes[1].node.handle_channel_reestablish(&nodes[0].node.get_our_node_id(), &reestablish_1[0]);
-		let warn_msg = "Peer attempted to reestablish channel with a very old local commitment transaction: 0 (received) vs 4 (expected)".to_owned();
-
-		let warn_reestablish = nodes[1].node.get_and_clear_pending_msg_events();
-		assert_eq!(warn_reestablish.len(), 2);
-		match warn_reestablish[1] {
-			MessageSendEvent::HandleError { action: ErrorAction::SendWarningMessage { ref msg, .. }, .. } => {
-				assert_eq!(msg.data, warn_msg);
-			},
-			_ => panic!("Unexpected event"),
+		let reestablish_msg;
+		if substantially_old {
+			let warn_msg = "Peer attempted to reestablish channel with a very old local commitment transaction: 0 (received) vs 4 (expected)".to_owned();
+
+			let warn_reestablish = nodes[1].node.get_and_clear_pending_msg_events();
+			assert_eq!(warn_reestablish.len(), 2);
+			match warn_reestablish[1] {
+				MessageSendEvent::HandleError { action: ErrorAction::SendWarningMessage { ref msg, .. }, .. } => {
+					assert_eq!(msg.data, warn_msg);
+				},
+				_ => panic!("Unexpected events: {:?}", warn_reestablish),
+			}
+			reestablish_msg = match &warn_reestablish[0] {
+				MessageSendEvent::SendChannelReestablish { msg, .. } => msg.clone(),
+				_ => panic!("Unexpected events: {:?}", warn_reestablish),
+			};
+		} else {
+			let msgs = nodes[1].node.get_and_clear_pending_msg_events();
+			assert!(msgs.len() >= 4);
+			match msgs.last() {
+				Some(MessageSendEvent::SendChannelUpdate { .. }) => {},
+				_ => panic!("Unexpected events: {:?}", msgs),
+			}
+			assert!(msgs.iter().any(|msg| matches!(msg, MessageSendEvent::SendRevokeAndACK { .. })));
+			assert!(msgs.iter().any(|msg| matches!(msg, MessageSendEvent::UpdateHTLCs { .. })));
+			reestablish_msg = match &msgs[0] {
+				MessageSendEvent::SendChannelReestablish { msg, .. } => msg.clone(),
+				_ => panic!("Unexpected events: {:?}", msgs),
+			};
 		}
-		let reestablish_msg = match &warn_reestablish[0] {
-			MessageSendEvent::SendChannelReestablish { msg, .. } => msg.clone(),
-			_ => panic!("Unexpected event"),
-		};
 
 		{
-			let mut node_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().clone();
-			// The node B should not broadcast the transaction to force close the channel!
+			let mut node_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap();
+			// The node B should never force-close the channel.
 			assert!(node_txn.is_empty());
 		}
 
 		// Check A panics upon seeing proof it has fallen behind.
-		assert!(std::panic::catch_unwind(|| {
+		let reconnect_res = std::panic::catch_unwind(|| {
 			nodes[0].node.handle_channel_reestablish(&nodes[1].node.get_our_node_id(), &reestablish_msg);
-		}).is_err());
-		std::mem::forget(nodes); // Skip the `Drop` handler for `Node`
-		return; // By this point we should have panic'ed!
-	}
+		});
+		if not_stale {
+			assert!(reconnect_res.is_ok());
+			// At this point A gets confused because B expects a commitment state newer than A
+			// has sent, but not a newer revocation secret, so A just (correctly) closes.
+			check_closed_broadcast(&nodes[0], 1, true);
+			check_added_monitors(&nodes[0], 1);
+			check_closed_event!(nodes[0], 1, ClosureReason::ProcessingError {
+				err: "Peer attempted to reestablish channel with a future remote commitment transaction: 2 (received) vs 1 (expected)".to_owned()
+			}, [nodes[1].node.get_our_node_id()], 1000000);
+		} else {
+			assert!(reconnect_res.is_err());
+			// Skip the `Drop` handler for `Node`s as some may be in an invalid (panicked) state.
+			std::mem::forget(nodes);
+		}
+	} else {
+		assert!(!not_stale, "We only care about the stale case when not testing panicking");
 
-	nodes[0].node.force_close_without_broadcasting_txn(&chan.2, &nodes[1].node.get_our_node_id()).unwrap();
-	check_added_monitors!(nodes[0], 1);
-	check_closed_event!(nodes[0], 1, ClosureReason::HolderForceClosed, [nodes[1].node.get_our_node_id()], 1000000);
-	{
-		let node_txn = nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap();
-		assert_eq!(node_txn.len(), 0);
-	}
+		nodes[0].node.force_close_without_broadcasting_txn(&chan.2, &nodes[1].node.get_our_node_id()).unwrap();
+		check_added_monitors!(nodes[0], 1);
+		check_closed_event!(nodes[0], 1, ClosureReason::HolderForceClosed, [nodes[1].node.get_our_node_id()], 1000000);
+		{
+			let node_txn = nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap();
+			assert_eq!(node_txn.len(), 0);
+		}
 
-	for msg in nodes[0].node.get_and_clear_pending_msg_events() {
-		if let MessageSendEvent::BroadcastChannelUpdate { .. } = msg {
-		} else if let MessageSendEvent::HandleError { ref action, .. } = msg {
+		for msg in nodes[0].node.get_and_clear_pending_msg_events() {
+			if let MessageSendEvent::BroadcastChannelUpdate { .. } = msg {
+			} else if let MessageSendEvent::HandleError { ref action, .. } = msg {
+				match action {
+					&ErrorAction::DisconnectPeer { ref msg } => {
+						assert_eq!(msg.as_ref().unwrap().data, "Channel force-closed");
+					},
+					_ => panic!("Unexpected event!"),
+				}
+			} else {
+				panic!("Unexpected event {:?}", msg)
+			}
+		}
+
+		// after the warning message sent by B, we should not able to
+		// use the channel, or reconnect with success to the channel.
+		assert!(nodes[0].node.list_usable_channels().is_empty());
+		nodes[0].node.peer_connected(&nodes[1].node.get_our_node_id(), &msgs::Init {
+			features: nodes[1].node.init_features(), networks: None, remote_network_address: None
+		}, true).unwrap();
+		nodes[1].node.peer_connected(&nodes[0].node.get_our_node_id(), &msgs::Init {
+			features: nodes[0].node.init_features(), networks: None, remote_network_address: None
+		}, false).unwrap();
+		let retry_reestablish = get_chan_reestablish_msgs!(nodes[1], nodes[0]);
+
+		nodes[0].node.handle_channel_reestablish(&nodes[1].node.get_our_node_id(), &retry_reestablish[0]);
+		let mut err_msgs_0 = Vec::with_capacity(1);
+		if let MessageSendEvent::HandleError { ref action, .. } = nodes[0].node.get_and_clear_pending_msg_events()[1] {
 			match action {
-				&ErrorAction::DisconnectPeer { ref msg } => {
-					assert_eq!(msg.as_ref().unwrap().data, "Channel force-closed");
+				&ErrorAction::SendErrorMessage { ref msg } => {
+					assert_eq!(msg.data, format!("Got a message for a channel from the wrong node! No such channel for the passed counterparty_node_id {}", &nodes[1].node.get_our_node_id()));
+					err_msgs_0.push(msg.clone());
 				},
 				_ => panic!("Unexpected event!"),
 			}
 		} else {
-			panic!("Unexpected event {:?}", msg)
-		}
-	}
-
-	// after the warning message sent by B, we should not able to
-	// use the channel, or reconnect with success to the channel.
-	assert!(nodes[0].node.list_usable_channels().is_empty());
-	nodes[0].node.peer_connected(&nodes[1].node.get_our_node_id(), &msgs::Init {
-		features: nodes[1].node.init_features(), networks: None, remote_network_address: None
-	}, true).unwrap();
-	nodes[1].node.peer_connected(&nodes[0].node.get_our_node_id(), &msgs::Init {
-		features: nodes[0].node.init_features(), networks: None, remote_network_address: None
-	}, false).unwrap();
-	let retry_reestablish = get_chan_reestablish_msgs!(nodes[1], nodes[0]);
-
-	nodes[0].node.handle_channel_reestablish(&nodes[1].node.get_our_node_id(), &retry_reestablish[0]);
-	let mut err_msgs_0 = Vec::with_capacity(1);
-	if let MessageSendEvent::HandleError { ref action, .. } = nodes[0].node.get_and_clear_pending_msg_events()[1] {
-		match action {
-			&ErrorAction::SendErrorMessage { ref msg } => {
-				assert_eq!(msg.data, format!("Got a message for a channel from the wrong node! No such channel for the passed counterparty_node_id {}", &nodes[1].node.get_our_node_id()));
-				err_msgs_0.push(msg.clone());
-			},
-			_ => panic!("Unexpected event!"),
+			panic!("Unexpected event!");
 		}
-	} else {
-		panic!("Unexpected event!");
+		assert_eq!(err_msgs_0.len(), 1);
+		nodes[1].node.handle_error(&nodes[0].node.get_our_node_id(), &err_msgs_0[0]);
+		assert!(nodes[1].node.list_usable_channels().is_empty());
+		check_added_monitors!(nodes[1], 1);
+		check_closed_event!(nodes[1], 1, ClosureReason::CounterpartyForceClosed { peer_msg: UntrustedString(format!("Got a message for a channel from the wrong node! No such channel for the passed counterparty_node_id {}", &nodes[1].node.get_our_node_id())) }
+			, [nodes[0].node.get_our_node_id()], 1000000);
+		check_closed_broadcast!(nodes[1], false);
 	}
-	assert_eq!(err_msgs_0.len(), 1);
-	nodes[1].node.handle_error(&nodes[0].node.get_our_node_id(), &err_msgs_0[0]);
-	assert!(nodes[1].node.list_usable_channels().is_empty());
-	check_added_monitors!(nodes[1], 1);
-	check_closed_event!(nodes[1], 1, ClosureReason::CounterpartyForceClosed { peer_msg: UntrustedString(format!("Got a message for a channel from the wrong node! No such channel for the passed counterparty_node_id {}", &nodes[1].node.get_our_node_id())) }
-		, [nodes[0].node.get_our_node_id()], 1000000);
-	check_closed_broadcast!(nodes[1], false);
 }
 
 #[test]
 #[cfg(feature = "std")]
 fn test_data_loss_protect() {
-	do_test_data_loss_protect(true);
-	do_test_data_loss_protect(false);
+	do_test_data_loss_protect(true, false, true);
+	do_test_data_loss_protect(true, true, false);
+	do_test_data_loss_protect(true, false, false);
+	do_test_data_loss_protect(false, true, false);
+	do_test_data_loss_protect(false, false, false);
 }
 
 #[test]