From: Matt Corallo <git@bluematt.me>
Date: Mon, 29 Jul 2019 17:45:35 +0000 (-0400)
Subject: Handle monitor update failure during funding on the fundee side
X-Git-Tag: v0.0.12~194^2
X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=8ba352952242421041cef4019d0bf2172e712986;p=rust-lightning

Handle monitor update failure during funding on the fundee side

This carries a surprising amount of complexity despite only being
possible in the case where monitor updating failed during the
processing of funding_generated. Specifically, this requires
handling rebroadcasting funding_locked once we successfully persist
our monitor again.

As an alternative we could never send funding_signed when the
monitor failed to persist, but this approach avoids needless
delays during funding.
---

diff --git a/src/ln/chanmon_update_fail_tests.rs b/src/ln/chanmon_update_fail_tests.rs
index 4803b0054..137d98346 100644
--- a/src/ln/chanmon_update_fail_tests.rs
+++ b/src/ln/chanmon_update_fail_tests.rs
@@ -1557,7 +1557,9 @@ fn monitor_update_claim_fail_no_response() {
 // Note that restore_between_fails with !fail_on_generate is useless
 // Also note that !fail_on_generate && !fail_on_signed is useless
 // Finally, note that !fail_on_signed is not possible with fail_on_generate && !restore_between_fails
-fn do_during_funding_monitor_fail(fail_on_generate: bool, restore_between_fails: bool, fail_on_signed: bool) {
+// confirm_a_first and restore_b_before_conf are wholly unrelated to earlier bools and
+// restore_b_before_conf has no meaning if !confirm_a_first
+fn do_during_funding_monitor_fail(fail_on_generate: bool, restore_between_fails: bool, fail_on_signed: bool, confirm_a_first: bool, restore_b_before_conf: bool) {
 	// Test that if the monitor update generated by funding_transaction_generated fails we continue
 	// the channel setup happily after the update is restored.
 	let mut nodes = create_network(2, &[None, None]);
@@ -1574,6 +1576,7 @@ fn do_during_funding_monitor_fail(fail_on_generate: bool, restore_between_fails:
 	nodes[0].node.funding_transaction_generated(&temporary_channel_id, funding_output);
 	check_added_monitors!(nodes[0], 1);
 
+	*nodes[1].chan_monitor.update_ret.lock().unwrap() = Err(ChannelMonitorUpdateErr::TemporaryFailure);
 	nodes[1].node.handle_funding_created(&nodes[0].node.get_our_node_id(), &get_event_msg!(nodes[0], MessageSendEvent::SendFundingCreated, nodes[1].node.get_our_node_id())).unwrap();
 	check_added_monitors!(nodes[1], 1);
 
@@ -1623,8 +1626,45 @@ fn do_during_funding_monitor_fail(fail_on_generate: bool, restore_between_fails:
 		_ => panic!("Unexpected event"),
 	};
 
-	let (funding_locked, channel_id) = create_chan_between_nodes_with_value_confirm(&nodes[0], &nodes[1], &funding_tx);
-	let (announcement, as_update, bs_update) = create_chan_between_nodes_with_value_b(&nodes[0], &nodes[1], &funding_locked);
+	if confirm_a_first {
+		confirm_transaction(&nodes[0].chain_monitor, &funding_tx, funding_tx.version);
+		nodes[1].node.handle_funding_locked(&nodes[0].node.get_our_node_id(), &get_event_msg!(nodes[0], MessageSendEvent::SendFundingLocked, nodes[1].node.get_our_node_id())).unwrap();
+	} else {
+		assert!(!restore_b_before_conf);
+		confirm_transaction(&nodes[1].chain_monitor, &funding_tx, funding_tx.version);
+		assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
+	}
+
+	// Make sure nodes[1] isn't stupid enough to re-send the FundingLocked on reconnect
+	nodes[0].node.peer_disconnected(&nodes[1].node.get_our_node_id(), false);
+	nodes[1].node.peer_disconnected(&nodes[0].node.get_our_node_id(), false);
+	reconnect_nodes(&nodes[0], &nodes[1], (false, confirm_a_first), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
+	assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());
+	assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
+
+	if !restore_b_before_conf {
+		confirm_transaction(&nodes[1].chain_monitor, &funding_tx, funding_tx.version);
+		assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
+		assert!(nodes[1].node.get_and_clear_pending_events().is_empty());
+	}
+
+	*nodes[1].chan_monitor.update_ret.lock().unwrap() = Ok(());
+	nodes[1].node.test_restore_channel_monitor();
+	check_added_monitors!(nodes[1], 1);
+
+	let (channel_id, (announcement, as_update, bs_update)) = if !confirm_a_first {
+		nodes[0].node.handle_funding_locked(&nodes[1].node.get_our_node_id(), &get_event_msg!(nodes[1], MessageSendEvent::SendFundingLocked, nodes[0].node.get_our_node_id())).unwrap();
+
+		confirm_transaction(&nodes[0].chain_monitor, &funding_tx, funding_tx.version);
+		let (funding_locked, channel_id) = create_chan_between_nodes_with_value_confirm_second(&nodes[1], &nodes[0]);
+		(channel_id, create_chan_between_nodes_with_value_b(&nodes[0], &nodes[1], &funding_locked))
+	} else {
+		if restore_b_before_conf {
+			confirm_transaction(&nodes[1].chain_monitor, &funding_tx, funding_tx.version);
+		}
+		let (funding_locked, channel_id) = create_chan_between_nodes_with_value_confirm_second(&nodes[0], &nodes[1]);
+		(channel_id, create_chan_between_nodes_with_value_b(&nodes[1], &nodes[0], &funding_locked))
+	};
 	for node in nodes.iter() {
 		assert!(node.router.handle_channel_announcement(&announcement).unwrap());
 		node.router.handle_channel_update(&as_update).unwrap();
@@ -1637,8 +1677,8 @@ fn do_during_funding_monitor_fail(fail_on_generate: bool, restore_between_fails:
 
 #[test]
 fn during_funding_monitor_fail() {
-	do_during_funding_monitor_fail(false, false, true);
-	do_during_funding_monitor_fail(true, false, true);
-	do_during_funding_monitor_fail(true, true, true);
-	do_during_funding_monitor_fail(true, true, false);
+	do_during_funding_monitor_fail(false, false, true, true, true);
+	do_during_funding_monitor_fail(true, false, true, false, false);
+	do_during_funding_monitor_fail(true, true, true, true, false);
+	do_during_funding_monitor_fail(true, true, false, false, false);
 }
diff --git a/src/ln/channel.rs b/src/ln/channel.rs
index 6dd7e8c82..bcc91508f 100644
--- a/src/ln/channel.rs
+++ b/src/ln/channel.rs
@@ -181,9 +181,9 @@ enum ChannelState {
 	/// "disconnected" and no updates are allowed until after we've done a channel_reestablish
 	/// dance.
 	PeerDisconnected = (1 << 7),
-	/// Flag which is set on ChannelFunded and FundingSent indicating the user has told us they
-	/// failed to update our ChannelMonitor somewhere and we should pause sending any outbound
-	/// messages until they've managed to do so.
+	/// Flag which is set on ChannelFunded, FundingCreated, and FundingSent indicating the user has
+	/// told us they failed to update our ChannelMonitor somewhere and we should pause sending any
+	/// outbound messages until they've managed to do so.
 	MonitorUpdateFailed = (1 << 8),
 	/// Flag which implies that we have sent a commitment_signed but are awaiting the responding
 	/// revoke_and_ack message. During this time period, we can't generate new commitment_signed
@@ -248,6 +248,7 @@ pub(super) struct Channel {
 	/// send it first.
 	resend_order: RAACommitmentOrder,
 
+	monitor_pending_funding_locked: bool,
 	monitor_pending_revoke_and_ack: bool,
 	monitor_pending_commitment_signed: bool,
 	monitor_pending_forwards: Vec<(PendingForwardHTLCInfo, u64)>,
@@ -457,6 +458,7 @@ impl Channel {
 
 			resend_order: RAACommitmentOrder::CommitmentFirst,
 
+			monitor_pending_funding_locked: false,
 			monitor_pending_revoke_and_ack: false,
 			monitor_pending_commitment_signed: false,
 			monitor_pending_forwards: Vec::new(),
@@ -672,6 +674,7 @@ impl Channel {
 
 			resend_order: RAACommitmentOrder::CommitmentFirst,
 
+			monitor_pending_funding_locked: false,
 			monitor_pending_revoke_and_ack: false,
 			monitor_pending_commitment_signed: false,
 			monitor_pending_forwards: Vec::new(),
@@ -2352,11 +2355,29 @@ impl Channel {
 	/// Indicates that the latest ChannelMonitor update has been committed by the client
 	/// successfully and we should restore normal operation. Returns messages which should be sent
 	/// to the remote side.
-	pub fn monitor_updating_restored(&mut self) -> (Option<msgs::RevokeAndACK>, Option<msgs::CommitmentUpdate>, RAACommitmentOrder, Vec<(PendingForwardHTLCInfo, u64)>, Vec<(HTLCSource, PaymentHash, HTLCFailReason)>, bool) {
+	pub fn monitor_updating_restored(&mut self) -> (Option<msgs::RevokeAndACK>, Option<msgs::CommitmentUpdate>, RAACommitmentOrder, Vec<(PendingForwardHTLCInfo, u64)>, Vec<(HTLCSource, PaymentHash, HTLCFailReason)>, bool, Option<msgs::FundingLocked>) {
 		assert_eq!(self.channel_state & ChannelState::MonitorUpdateFailed as u32, ChannelState::MonitorUpdateFailed as u32);
 		self.channel_state &= !(ChannelState::MonitorUpdateFailed as u32);
+
 		let needs_broadcast_safe = self.channel_state & (ChannelState::FundingSent as u32) != 0 && self.channel_outbound;
 
+		// Because we will never generate a FundingBroadcastSafe event when we're in
+		// MonitorUpdateFailed, if we assume the user only broadcast the funding transaction when
+		// they received the FundingBroadcastSafe event, we can only ever hit
+		// monitor_pending_funding_locked when we're an inbound channel which failed to persist the
+		// monitor on funding_created, and we even got the funding transaction confirmed before the
+		// monitor was persisted.
+		let funding_locked = if self.monitor_pending_funding_locked {
+			assert!(!self.channel_outbound, "Funding transaction broadcast without FundingBroadcastSafe!");
+			self.monitor_pending_funding_locked = false;
+			let next_per_commitment_secret = self.build_local_commitment_secret(self.cur_local_commitment_transaction_number);
+			let next_per_commitment_point = PublicKey::from_secret_key(&self.secp_ctx, &next_per_commitment_secret);
+			Some(msgs::FundingLocked {
+				channel_id: self.channel_id(),
+				next_per_commitment_point: next_per_commitment_point,
+			})
+		} else { None };
+
 		let mut forwards = Vec::new();
 		mem::swap(&mut forwards, &mut self.monitor_pending_forwards);
 		let mut failures = Vec::new();
@@ -2365,7 +2386,7 @@ impl Channel {
 		if self.channel_state & (ChannelState::PeerDisconnected as u32) != 0 {
 			self.monitor_pending_revoke_and_ack = false;
 			self.monitor_pending_commitment_signed = false;
-			return (None, None, RAACommitmentOrder::RevokeAndACKFirst, forwards, failures, needs_broadcast_safe);
+			return (None, None, RAACommitmentOrder::RevokeAndACKFirst, forwards, failures, needs_broadcast_safe, funding_locked);
 		}
 
 		let raa = if self.monitor_pending_revoke_and_ack {
@@ -2383,7 +2404,7 @@ impl Channel {
 			if commitment_update.is_some() { "a" } else { "no" },
 			if raa.is_some() { "an" } else { "no" },
 			match order { RAACommitmentOrder::CommitmentFirst => "commitment", RAACommitmentOrder::RevokeAndACKFirst => "RAA"});
-		(raa, commitment_update, order, forwards, failures, needs_broadcast_safe)
+		(raa, commitment_update, order, forwards, failures, needs_broadcast_safe, funding_locked)
 	}
 
 	pub fn update_fee(&mut self, fee_estimator: &FeeEstimator, msg: &msgs::UpdateFee) -> Result<(), ChannelError> {
@@ -2493,7 +2514,9 @@ impl Channel {
 		} else { None };
 
 		if self.channel_state & (ChannelState::FundingSent as u32) == ChannelState::FundingSent as u32 {
-			if self.channel_state & ChannelState::OurFundingLocked as u32 == 0 {
+			// If we're waiting on a monitor update, we shouldn't re-send any funding_locked's.
+			if self.channel_state & (ChannelState::OurFundingLocked as u32) == 0 ||
+					self.channel_state & (ChannelState::MonitorUpdateFailed as u32) != 0 {
 				if msg.next_remote_commitment_number != 0 {
 					return Err(ChannelError::Close("Peer claimed they saw a revoke_and_ack but we haven't sent funding_locked yet"));
 				}
@@ -2984,12 +3007,17 @@ impl Channel {
 					//they can by sending two revoke_and_acks back-to-back, but not really). This appears to be
 					//a protocol oversight, but I assume I'm just missing something.
 					if need_commitment_update {
-						let next_per_commitment_secret = self.build_local_commitment_secret(self.cur_local_commitment_transaction_number);
-						let next_per_commitment_point = PublicKey::from_secret_key(&self.secp_ctx, &next_per_commitment_secret);
-						return Ok(Some(msgs::FundingLocked {
-							channel_id: self.channel_id,
-							next_per_commitment_point: next_per_commitment_point,
-						}));
+						if self.channel_state & (ChannelState::MonitorUpdateFailed as u32) == 0 {
+							let next_per_commitment_secret = self.build_local_commitment_secret(self.cur_local_commitment_transaction_number);
+							let next_per_commitment_point = PublicKey::from_secret_key(&self.secp_ctx, &next_per_commitment_secret);
+							return Ok(Some(msgs::FundingLocked {
+								channel_id: self.channel_id,
+								next_per_commitment_point: next_per_commitment_point,
+							}));
+						} else {
+							self.monitor_pending_funding_locked = true;
+							return Ok(None);
+						}
 					}
 				}
 			}
@@ -3708,6 +3736,7 @@ impl Writeable for Channel {
 			RAACommitmentOrder::RevokeAndACKFirst => 1u8.write(writer)?,
 		}
 
+		self.monitor_pending_funding_locked.write(writer)?;
 		self.monitor_pending_revoke_and_ack.write(writer)?;
 		self.monitor_pending_commitment_signed.write(writer)?;
 
@@ -3875,6 +3904,7 @@ impl<R : ::std::io::Read> ReadableArgs<R, Arc<Logger>> for Channel {
 			_ => return Err(DecodeError::InvalidValue),
 		};
 
+		let monitor_pending_funding_locked = Readable::read(reader)?;
 		let monitor_pending_revoke_and_ack = Readable::read(reader)?;
 		let monitor_pending_commitment_signed = Readable::read(reader)?;
 
@@ -3971,6 +4001,7 @@ impl<R : ::std::io::Read> ReadableArgs<R, Arc<Logger>> for Channel {
 
 			resend_order,
 
+			monitor_pending_funding_locked,
 			monitor_pending_revoke_and_ack,
 			monitor_pending_commitment_signed,
 			monitor_pending_forwards,
diff --git a/src/ln/channelmanager.rs b/src/ln/channelmanager.rs
index 3336aa31d..afabcd7cc 100644
--- a/src/ln/channelmanager.rs
+++ b/src/ln/channelmanager.rs
@@ -1684,7 +1684,7 @@ impl ChannelManager {
 							ChannelMonitorUpdateErr::TemporaryFailure => true,
 						}
 					} else {
-						let (raa, commitment_update, order, pending_forwards, mut pending_failures, needs_broadcast_safe) = channel.monitor_updating_restored();
+						let (raa, commitment_update, order, pending_forwards, mut pending_failures, needs_broadcast_safe, funding_locked) = channel.monitor_updating_restored();
 						if !pending_forwards.is_empty() {
 							htlc_forwards.push((channel.get_short_channel_id().expect("We can't have pending forwards before funding confirmation"), pending_forwards));
 						}
@@ -1722,6 +1722,19 @@ impl ChannelManager {
 								user_channel_id: channel.get_user_id(),
 							});
 						}
+						if let Some(msg) = funding_locked {
+							pending_msg_events.push(events::MessageSendEvent::SendFundingLocked {
+								node_id: channel.get_their_node_id(),
+								msg,
+							});
+							if let Some(announcement_sigs) = self.get_announcement_sigs(channel) {
+								pending_msg_events.push(events::MessageSendEvent::SendAnnouncementSignatures {
+									node_id: channel.get_their_node_id(),
+									msg: announcement_sigs,
+								});
+							}
+							short_to_id.insert(channel.get_short_channel_id().unwrap(), channel.channel_id());
+						}
 						true
 					}
 				} else { true }
@@ -1790,7 +1803,7 @@ impl ChannelManager {
 	}
 
 	fn internal_funding_created(&self, their_node_id: &PublicKey, msg: &msgs::FundingCreated) -> Result<(), MsgHandleErrInternal> {
-		let ((funding_msg, monitor_update), chan) = {
+		let ((funding_msg, monitor_update), mut chan) = {
 			let mut channel_lock = self.channel_state.lock().unwrap();
 			let channel_state = channel_lock.borrow_parts();
 			match channel_state.by_id.entry(msg.temporary_channel_id.clone()) {
@@ -1806,8 +1819,23 @@ impl ChannelManager {
 		};
 		// Because we have exclusive ownership of the channel here we can release the channel_state
 		// lock before add_update_monitor
-		if let Err(_e) = self.monitor.add_update_monitor(monitor_update.get_funding_txo().unwrap(), monitor_update) {
-			unimplemented!();
+		if let Err(e) = self.monitor.add_update_monitor(monitor_update.get_funding_txo().unwrap(), monitor_update) {
+			match e {
+				ChannelMonitorUpdateErr::PermanentFailure => {
+					// Note that we reply with the new channel_id in error messages if we gave up on the
+					// channel, not the temporary_channel_id. This is compatible with ourselves, but the
+					// spec is somewhat ambiguous here. Not a huge deal since we'll send error messages for
+					// any messages referencing a previously-closed channel anyway.
+					return Err(MsgHandleErrInternal::from_finish_shutdown("ChannelMonitor storage failure", funding_msg.channel_id, chan.force_shutdown(), None));
+				},
+				ChannelMonitorUpdateErr::TemporaryFailure => {
+					// There's no problem signing a counterparty's funding transaction if our monitor
+					// hasn't persisted to disk yet - we can't lose money on a transaction that we haven't
+					// accepted payment from yet. We do, however, need to wait to send our funding_locked
+					// until we have persisted our monitor.
+					chan.monitor_update_failed(false, false, Vec::new(), Vec::new());
+				},
+			}
 		}
 		let mut channel_state_lock = self.channel_state.lock().unwrap();
 		let channel_state = channel_state_lock.borrow_parts();
diff --git a/src/ln/functional_test_utils.rs b/src/ln/functional_test_utils.rs
index 5ea6961bd..001da7009 100644
--- a/src/ln/functional_test_utils.rs
+++ b/src/ln/functional_test_utils.rs
@@ -221,31 +221,37 @@ pub fn create_chan_between_nodes_with_value_init(node_a: &Node, node_b: &Node, c
 	tx
 }
 
-pub fn create_chan_between_nodes_with_value_confirm(node_a: &Node, node_b: &Node, tx: &Transaction) -> ((msgs::FundingLocked, msgs::AnnouncementSignatures), [u8; 32]) {
-	confirm_transaction(&node_b.chain_monitor, &tx, tx.version);
-	node_a.node.handle_funding_locked(&node_b.node.get_our_node_id(), &get_event_msg!(node_b, MessageSendEvent::SendFundingLocked, node_a.node.get_our_node_id())).unwrap();
+pub fn create_chan_between_nodes_with_value_confirm_first(node_recv: &Node, node_conf: &Node, tx: &Transaction) {
+	confirm_transaction(&node_conf.chain_monitor, &tx, tx.version);
+	node_recv.node.handle_funding_locked(&node_conf.node.get_our_node_id(), &get_event_msg!(node_conf, MessageSendEvent::SendFundingLocked, node_recv.node.get_our_node_id())).unwrap();
+}
 
+pub fn create_chan_between_nodes_with_value_confirm_second(node_recv: &Node, node_conf: &Node) -> ((msgs::FundingLocked, msgs::AnnouncementSignatures), [u8; 32]) {
 	let channel_id;
-
-	confirm_transaction(&node_a.chain_monitor, &tx, tx.version);
-	let events_6 = node_a.node.get_and_clear_pending_msg_events();
+	let events_6 = node_conf.node.get_and_clear_pending_msg_events();
 	assert_eq!(events_6.len(), 2);
 	((match events_6[0] {
 		MessageSendEvent::SendFundingLocked { ref node_id, ref msg } => {
 			channel_id = msg.channel_id.clone();
-			assert_eq!(*node_id, node_b.node.get_our_node_id());
+			assert_eq!(*node_id, node_recv.node.get_our_node_id());
 			msg.clone()
 		},
 		_ => panic!("Unexpected event"),
 	}, match events_6[1] {
 		MessageSendEvent::SendAnnouncementSignatures { ref node_id, ref msg } => {
-			assert_eq!(*node_id, node_b.node.get_our_node_id());
+			assert_eq!(*node_id, node_recv.node.get_our_node_id());
 			msg.clone()
 		},
 		_ => panic!("Unexpected event"),
 	}), channel_id)
 }
 
+pub fn create_chan_between_nodes_with_value_confirm(node_a: &Node, node_b: &Node, tx: &Transaction) -> ((msgs::FundingLocked, msgs::AnnouncementSignatures), [u8; 32]) {
+	create_chan_between_nodes_with_value_confirm_first(node_a, node_b, tx);
+	confirm_transaction(&node_a.chain_monitor, &tx, tx.version);
+	create_chan_between_nodes_with_value_confirm_second(node_b, node_a)
+}
+
 pub fn create_chan_between_nodes_with_value_a(node_a: &Node, node_b: &Node, channel_value: u64, push_msat: u64, a_flags: LocalFeatures, b_flags: LocalFeatures) -> ((msgs::FundingLocked, msgs::AnnouncementSignatures), [u8; 32], Transaction) {
 	let tx = create_chan_between_nodes_with_value_init(node_a, node_b, channel_value, push_msat, a_flags, b_flags);
 	let (msgs, chan_id) = create_chan_between_nodes_with_value_confirm(node_a, node_b, &tx);