From: Matt Corallo Date: Tue, 5 Mar 2019 20:36:11 +0000 (-0500) Subject: Rewrite Channel resend tracking to make it much more reliable X-Git-Tag: v0.0.12~212^2~1 X-Git-Url: http://git.bitcoin.ninja/?a=commitdiff_plain;h=50e16c9573ef5af43abf4837e6199c6526429d42;p=rust-lightning Rewrite Channel resend tracking to make it much more reliable Resending revoke_and_ack and commitment_signed (+update) messages after monitor-update-failure or disconnection has been a highly unreliable part of our codebase for some time (as evidenced by the number of bugs caught in the chanmon_fail_consistency fuzz target). This is due to its rather ad-hoc nature and tracking/behavior which consists of checking a number of different flags to try to deduce which messages were/were not delivered and go from there. Instead, this commit rewrites it to simply keep track of the order messages were generated originally, as we always resend in the originally-generated order. I'm anticipating this will be way more robust than the old code, in addition to its simplicity. --- diff --git a/src/ln/channel.rs b/src/ln/channel.rs index b5a2f5ec2..e73abf92c 100644 --- a/src/ln/channel.rs +++ b/src/ln/channel.rs @@ -237,19 +237,21 @@ pub(super) struct Channel { cur_local_commitment_transaction_number: u64, cur_remote_commitment_transaction_number: u64, value_to_self_msat: u64, // Excluding all pending_htlcs, excluding fees - /// Upon receipt of a channel_reestablish we have to figure out whether to send a - /// revoke_and_ack first or a commitment update first. Generally, we prefer to send - /// revoke_and_ack first, but if we had a pending commitment update of our own waiting on a - /// remote revoke when we received the latest commitment update from the remote we have to make - /// sure that commitment update gets resent first. - received_commitment_while_awaiting_raa: bool, pending_inbound_htlcs: Vec, pending_outbound_htlcs: Vec, holding_cell_htlc_updates: Vec, + /// When resending CS/RAA messages on channel monitor restoration or on reconnect, we always + /// need to ensure we resend them in the order we originally generated them. Note that because + /// there can only ever be one in-flight CS and/or one in-flight RAA at any time, it is + /// sufficient to simply set this to the opposite of any message we are generating as we + /// generate it. ie when we generate a CS, we set this to RAAFirst as, if there is a pending + /// in-flight RAA to resend, it will have been the first thing we generated, and thus we should + /// send it first. + resend_order: RAACommitmentOrder, + monitor_pending_revoke_and_ack: bool, monitor_pending_commitment_signed: bool, - monitor_pending_order: Option, monitor_pending_forwards: Vec<(PendingForwardHTLCInfo, u64)>, monitor_pending_failures: Vec<(HTLCSource, PaymentHash, HTLCFailReason)>, @@ -457,7 +459,6 @@ impl Channel { cur_local_commitment_transaction_number: INITIAL_COMMITMENT_NUMBER, cur_remote_commitment_transaction_number: INITIAL_COMMITMENT_NUMBER, value_to_self_msat: channel_value_satoshis * 1000 - push_msat, - received_commitment_while_awaiting_raa: false, pending_inbound_htlcs: Vec::new(), pending_outbound_htlcs: Vec::new(), @@ -468,9 +469,10 @@ impl Channel { next_remote_htlc_id: 0, channel_update_count: 1, + resend_order: RAACommitmentOrder::CommitmentFirst, + monitor_pending_revoke_and_ack: false, monitor_pending_commitment_signed: false, - monitor_pending_order: None, monitor_pending_forwards: Vec::new(), monitor_pending_failures: Vec::new(), @@ -646,7 +648,6 @@ impl Channel { cur_local_commitment_transaction_number: INITIAL_COMMITMENT_NUMBER, cur_remote_commitment_transaction_number: INITIAL_COMMITMENT_NUMBER, value_to_self_msat: msg.push_msat, - received_commitment_while_awaiting_raa: false, pending_inbound_htlcs: Vec::new(), pending_outbound_htlcs: Vec::new(), @@ -657,9 +658,10 @@ impl Channel { next_remote_htlc_id: 0, channel_update_count: 1, + resend_order: RAACommitmentOrder::CommitmentFirst, + monitor_pending_revoke_and_ack: false, monitor_pending_commitment_signed: false, - monitor_pending_order: None, monitor_pending_forwards: Vec::new(), monitor_pending_failures: Vec::new(), @@ -1812,12 +1814,6 @@ impl Channel { } } - if self.channel_state & (ChannelState::MonitorUpdateFailed as u32) == 0 { - // This is a response to our post-monitor-failed unfreeze messages, so we can clear the - // monitor_pending_order requirement as we won't re-send the monitor_pending messages. - self.monitor_pending_order = None; - } - self.channel_monitor.provide_latest_local_commitment_tx_info(local_commitment_tx.0, local_keys, self.feerate_per_kw, htlcs_and_sigs); for htlc in self.pending_inbound_htlcs.iter_mut() { @@ -1840,14 +1836,13 @@ impl Channel { self.cur_local_commitment_transaction_number -= 1; self.last_local_commitment_txn = new_local_commitment_txn; - self.received_commitment_while_awaiting_raa = (self.channel_state & (ChannelState::AwaitingRemoteRevoke as u32)) != 0; + // Note that if we need_our_commitment & !AwaitingRemoteRevoke we'll call + // send_commitment_no_status_check() next which will reset this to RAAFirst. + self.resend_order = RAACommitmentOrder::CommitmentFirst; if (self.channel_state & ChannelState::MonitorUpdateFailed as u32) != 0 { // In case we initially failed monitor updating without requiring a response, we need // to make sure the RAA gets sent first. - if !self.monitor_pending_commitment_signed { - self.monitor_pending_order = Some(RAACommitmentOrder::RevokeAndACKFirst); - } self.monitor_pending_revoke_and_ack = true; if need_our_commitment && (self.channel_state & (ChannelState::AwaitingRemoteRevoke as u32)) == 0 { // If we were going to send a commitment_signed after the RAA, go ahead and do all @@ -2021,12 +2016,6 @@ impl Channel { self.their_prev_commitment_point = self.their_cur_commitment_point; self.their_cur_commitment_point = Some(msg.next_per_commitment_point); self.cur_remote_commitment_transaction_number -= 1; - self.received_commitment_while_awaiting_raa = false; - if self.channel_state & (ChannelState::MonitorUpdateFailed as u32) == 0 { - // This is a response to our post-monitor-failed unfreeze messages, so we can clear the - // monitor_pending_order requirement as we won't re-send the monitor_pending messages. - self.monitor_pending_order = None; - } log_trace!(self, "Updating HTLCs on receipt of RAA..."); let mut to_forward_infos = Vec::new(); @@ -2144,7 +2133,7 @@ impl Channel { // When the monitor updating is restored we'll call get_last_commitment_update(), // which does not update state, but we're definitely now awaiting a remote revoke // before we can step forward any more, so set it here. - self.channel_state |= ChannelState::AwaitingRemoteRevoke as u32; + self.send_commitment_no_status_check()?; } self.monitor_pending_forwards.append(&mut to_forward_infos); self.monitor_pending_failures.append(&mut revoked_htlcs); @@ -2292,15 +2281,13 @@ impl Channel { /// Indicates that a ChannelMonitor update failed to be stored by the client and further /// updates are partially paused. /// This must be called immediately after the call which generated the ChannelMonitor update - /// which failed, with the order argument set to the type of call it represented (ie a - /// commitment update or a revoke_and_ack generation). The messages which were generated from - /// that original call must *not* have been sent to the remote end, and must instead have been - /// dropped. They will be regenerated when monitor_updating_restored is called. - pub fn monitor_update_failed(&mut self, order: RAACommitmentOrder, resend_raa: bool, resend_commitment: bool, mut pending_forwards: Vec<(PendingForwardHTLCInfo, u64)>, mut pending_fails: Vec<(HTLCSource, PaymentHash, HTLCFailReason)>) { + /// which failed. The messages which were generated from that call which generated the + /// monitor update failure must *not* have been sent to the remote end, and must instead + /// have been dropped. They will be regenerated when monitor_updating_restored is called. + pub fn monitor_update_failed(&mut self, resend_raa: bool, resend_commitment: bool, mut pending_forwards: Vec<(PendingForwardHTLCInfo, u64)>, mut pending_fails: Vec<(HTLCSource, PaymentHash, HTLCFailReason)>) { assert_eq!(self.channel_state & ChannelState::MonitorUpdateFailed as u32, 0); self.monitor_pending_revoke_and_ack = resend_raa; self.monitor_pending_commitment_signed = resend_commitment; - self.monitor_pending_order = Some(order); assert!(self.monitor_pending_forwards.is_empty()); mem::swap(&mut pending_forwards, &mut self.monitor_pending_forwards); assert!(self.monitor_pending_failures.is_empty()); @@ -2321,7 +2308,6 @@ impl Channel { mem::swap(&mut failures, &mut self.monitor_pending_failures); if self.channel_state & (ChannelState::PeerDisconnected as u32) != 0 { - // Leave monitor_pending_order so we can order our channel_reestablish responses self.monitor_pending_revoke_and_ack = false; self.monitor_pending_commitment_signed = false; return (None, None, RAACommitmentOrder::RevokeAndACKFirst, forwards, failures); @@ -2336,7 +2322,7 @@ impl Channel { self.monitor_pending_revoke_and_ack = false; self.monitor_pending_commitment_signed = false; - let order = self.monitor_pending_order.clone().unwrap(); + let order = self.resend_order.clone(); log_trace!(self, "Restored monitor updating resulting in {} commitment update and {} RAA, with {} first", if commitment_update.is_some() { "a" } else { "no" }, if raa.is_some() { "an" } else { "no" }, @@ -2499,12 +2485,6 @@ impl Channel { }) } else { None }; - let order = self.monitor_pending_order.clone().unwrap_or(if self.received_commitment_while_awaiting_raa { - RAACommitmentOrder::CommitmentFirst - } else { - RAACommitmentOrder::RevokeAndACKFirst - }); - if msg.next_local_commitment_number == our_next_remote_commitment_number { if required_revoke.is_some() { log_debug!(self, "Reconnected channel {} with only lost outbound RAA", log_bytes!(self.channel_id())); @@ -2512,8 +2492,7 @@ impl Channel { log_debug!(self, "Reconnected channel {} with no loss", log_bytes!(self.channel_id())); } - if (self.channel_state & (ChannelState::AwaitingRemoteRevoke as u32 | ChannelState::MonitorUpdateFailed as u32)) == 0 && - self.monitor_pending_order.is_none() { // monitor_pending_order indicates we're waiting on a response to a unfreeze + if (self.channel_state & (ChannelState::AwaitingRemoteRevoke as u32 | ChannelState::MonitorUpdateFailed as u32)) == 0 { // We're up-to-date and not waiting on a remote revoke (if we are our // channel_reestablish should result in them sending a revoke_and_ack), but we may // have received some updates while we were disconnected. Free the holding cell @@ -2521,11 +2500,11 @@ impl Channel { match self.free_holding_cell_htlcs() { Err(ChannelError::Close(msg)) => return Err(ChannelError::Close(msg)), Err(ChannelError::Ignore(_)) => panic!("Got non-channel-failing result from free_holding_cell_htlcs"), - Ok(Some((commitment_update, channel_monitor))) => return Ok((resend_funding_locked, required_revoke, Some(commitment_update), Some(channel_monitor), order, shutdown_msg)), - Ok(None) => return Ok((resend_funding_locked, required_revoke, None, None, order, shutdown_msg)), + Ok(Some((commitment_update, channel_monitor))) => return Ok((resend_funding_locked, required_revoke, Some(commitment_update), Some(channel_monitor), self.resend_order.clone(), shutdown_msg)), + Ok(None) => return Ok((resend_funding_locked, required_revoke, None, None, self.resend_order.clone(), shutdown_msg)), } } else { - return Ok((resend_funding_locked, required_revoke, None, None, order, shutdown_msg)); + return Ok((resend_funding_locked, required_revoke, None, None, self.resend_order.clone(), shutdown_msg)); } } else if msg.next_local_commitment_number == our_next_remote_commitment_number - 1 { if required_revoke.is_some() { @@ -2536,10 +2515,10 @@ impl Channel { if self.channel_state & (ChannelState::MonitorUpdateFailed as u32) != 0 { self.monitor_pending_commitment_signed = true; - return Ok((resend_funding_locked, None, None, None, order, shutdown_msg)); + return Ok((resend_funding_locked, None, None, None, self.resend_order.clone(), shutdown_msg)); } - return Ok((resend_funding_locked, required_revoke, Some(self.get_last_commitment_update()), None, order, shutdown_msg)); + return Ok((resend_funding_locked, required_revoke, Some(self.get_last_commitment_update()), None, self.resend_order.clone(), shutdown_msg)); } else { return Err(ChannelError::Close("Peer attempted to reestablish channel with a very old remote commitment transaction")); } @@ -3360,6 +3339,7 @@ impl Channel { htlc.state = OutboundHTLCState::AwaitingRemovedRemoteRevoke(fail_reason); } } + self.resend_order = RAACommitmentOrder::RevokeAndACKFirst; let (res, remote_commitment_tx, htlcs) = match self.send_commitment_no_state_update() { Ok((res, (remote_commitment_tx, mut htlcs))) => { @@ -3570,8 +3550,6 @@ impl Writeable for Channel { self.cur_remote_commitment_transaction_number.write(writer)?; self.value_to_self_msat.write(writer)?; - self.received_commitment_while_awaiting_raa.write(writer)?; - let mut dropped_inbound_htlcs = 0; for htlc in self.pending_inbound_htlcs.iter() { if let InboundHTLCState::RemoteAnnounced(_) = htlc.state { @@ -3671,13 +3649,13 @@ impl Writeable for Channel { } } + match self.resend_order { + RAACommitmentOrder::CommitmentFirst => 0u8.write(writer)?, + RAACommitmentOrder::RevokeAndACKFirst => 1u8.write(writer)?, + } + self.monitor_pending_revoke_and_ack.write(writer)?; self.monitor_pending_commitment_signed.write(writer)?; - match self.monitor_pending_order { - None => 0u8.write(writer)?, - Some(RAACommitmentOrder::CommitmentFirst) => 1u8.write(writer)?, - Some(RAACommitmentOrder::RevokeAndACKFirst) => 2u8.write(writer)?, - } (self.monitor_pending_forwards.len() as u64).write(writer)?; for &(ref pending_forward, ref htlc_id) in self.monitor_pending_forwards.iter() { @@ -3775,8 +3753,6 @@ impl ReadableArgs> for Channel { let cur_remote_commitment_transaction_number = Readable::read(reader)?; let value_to_self_msat = Readable::read(reader)?; - let received_commitment_while_awaiting_raa = Readable::read(reader)?; - let pending_inbound_htlc_count: u64 = Readable::read(reader)?; let mut pending_inbound_htlcs = Vec::with_capacity(cmp::min(pending_inbound_htlc_count as usize, OUR_MAX_HTLCS as usize)); for _ in 0..pending_inbound_htlc_count { @@ -3839,16 +3815,15 @@ impl ReadableArgs> for Channel { }); } - let monitor_pending_revoke_and_ack = Readable::read(reader)?; - let monitor_pending_commitment_signed = Readable::read(reader)?; - - let monitor_pending_order = match >::read(reader)? { - 0 => None, - 1 => Some(RAACommitmentOrder::CommitmentFirst), - 2 => Some(RAACommitmentOrder::RevokeAndACKFirst), + let resend_order = match >::read(reader)? { + 0 => RAACommitmentOrder::CommitmentFirst, + 1 => RAACommitmentOrder::RevokeAndACKFirst, _ => return Err(DecodeError::InvalidValue), }; + let monitor_pending_revoke_and_ack = Readable::read(reader)?; + let monitor_pending_commitment_signed = Readable::read(reader)?; + let monitor_pending_forwards_count: u64 = Readable::read(reader)?; let mut monitor_pending_forwards = Vec::with_capacity(cmp::min(monitor_pending_forwards_count as usize, OUR_MAX_HTLCS as usize)); for _ in 0..monitor_pending_forwards_count { @@ -3935,14 +3910,14 @@ impl ReadableArgs> for Channel { cur_remote_commitment_transaction_number, value_to_self_msat, - received_commitment_while_awaiting_raa, pending_inbound_htlcs, pending_outbound_htlcs, holding_cell_htlc_updates, + resend_order, + monitor_pending_revoke_and_ack, monitor_pending_commitment_signed, - monitor_pending_order, monitor_pending_forwards, monitor_pending_failures, diff --git a/src/ln/channelmanager.rs b/src/ln/channelmanager.rs index e3de3c09e..3ffd080e9 100644 --- a/src/ln/channelmanager.rs +++ b/src/ln/channelmanager.rs @@ -494,7 +494,7 @@ macro_rules! handle_monitor_err { if !$resend_raa { debug_assert!($action_type == RAACommitmentOrder::CommitmentFirst || !$resend_commitment); } - $entry.get_mut().monitor_update_failed($action_type, $resend_raa, $resend_commitment, $failed_forwards, $failed_fails); + $entry.get_mut().monitor_update_failed($resend_raa, $resend_commitment, $failed_forwards, $failed_fails); Err(MsgHandleErrInternal::from_chan_no_close(ChannelError::Ignore("Failed to update ChannelMonitor"), *$entry.key())) }, }