From: Matt Corallo Date: Thu, 19 Nov 2020 21:09:56 +0000 (-0500) Subject: Free holding cell on monitor-updating-restored when there's no upd X-Git-Url: http://git.bitcoin.ninja/?a=commitdiff_plain;h=eb0b66460da45d65922b0ef703e1cd503d9b5206;p=rust-lightning Free holding cell on monitor-updating-restored when there's no upd If there is no pending channel update messages when monitor updating is restored (though there may be an RAA to send), and we're connected to our peer and not awaiting a remote RAA, we need to free anything in our holding cell. Without this, chanmon_fail_consistency was able to find a stuck condition where we sit on an HTLC failure in our holding cell and don't ever handle it (at least until we have other actions to take which empty the holding cell). Still, this approach sucks - it introduces reentrancy in a particularly dangerous form: a) we re-enter user code around monitor updates while being called from user code around monitor updates, making deadlocks very likely (in fact, our current tests have a bug here!), b) the re-entrancy only occurs in a very rare case, making it likely users will not hit it in testing, only deadlocking in production. I'm not entirely sure what the alternative is, however - we could move to a world where we poll for holding cell events that can be freed on our 1-minute-timer, but we still have a super rare reentrancy case, just in timer_chan_freshness_every_min() instead. --- diff --git a/lightning/src/ln/channel.rs b/lightning/src/ln/channel.rs index d9c7fc5ad..0ce5561ef 100644 --- a/lightning/src/ln/channel.rs +++ b/lightning/src/ln/channel.rs @@ -2756,7 +2756,10 @@ impl Channel { /// Indicates that the latest ChannelMonitor update has been committed by the client /// successfully and we should restore normal operation. Returns messages which should be sent /// to the remote side. - pub fn monitor_updating_restored(&mut self, logger: &L) -> (Option, Option, RAACommitmentOrder, Vec<(PendingHTLCInfo, u64)>, Vec<(HTLCSource, PaymentHash, HTLCFailReason)>, bool, Option) where L::Target: Logger { + pub fn monitor_updating_restored(&mut self, logger: &L) -> ( + Option, Option, RAACommitmentOrder, Option, + Vec<(PendingHTLCInfo, u64)>, Vec<(HTLCSource, PaymentHash, HTLCFailReason)>, Vec<(HTLCSource, PaymentHash)>, + bool, Option) where L::Target: Logger { assert_eq!(self.channel_state & ChannelState::MonitorUpdateFailed as u32, ChannelState::MonitorUpdateFailed as u32); self.channel_state &= !(ChannelState::MonitorUpdateFailed as u32); @@ -2786,25 +2789,39 @@ impl Channel { if self.channel_state & (ChannelState::PeerDisconnected as u32) != 0 { self.monitor_pending_revoke_and_ack = false; self.monitor_pending_commitment_signed = false; - return (None, None, RAACommitmentOrder::RevokeAndACKFirst, forwards, failures, needs_broadcast_safe, funding_locked); + return (None, None, RAACommitmentOrder::RevokeAndACKFirst, None, forwards, failures, Vec::new(), needs_broadcast_safe, funding_locked); } let raa = if self.monitor_pending_revoke_and_ack { Some(self.get_last_revoke_and_ack()) } else { None }; - let commitment_update = if self.monitor_pending_commitment_signed { + let mut commitment_update = if self.monitor_pending_commitment_signed { Some(self.get_last_commitment_update(logger)) } else { None }; + let mut order = self.resend_order.clone(); self.monitor_pending_revoke_and_ack = false; self.monitor_pending_commitment_signed = false; - let order = self.resend_order.clone(); + + let mut htlcs_failed_to_forward = Vec::new(); + let mut chanmon_update = None; + if commitment_update.is_none() && self.channel_state & (ChannelState::AwaitingRemoteRevoke as u32) == 0 { + order = RAACommitmentOrder::RevokeAndACKFirst; + + let (update_opt, mut failed_htlcs) = self.free_holding_cell_htlcs(logger).unwrap(); + htlcs_failed_to_forward.append(&mut failed_htlcs); + if let Some((com_update, mon_update)) = update_opt { + commitment_update = Some(com_update); + chanmon_update = Some(mon_update); + } + } + log_trace!(logger, "Restored monitor updating resulting in {}{} commitment update and {} RAA, with {} first", if needs_broadcast_safe { "a funding broadcast safe, " } else { "" }, if commitment_update.is_some() { "a" } else { "no" }, if raa.is_some() { "an" } else { "no" }, match order { RAACommitmentOrder::CommitmentFirst => "commitment", RAACommitmentOrder::RevokeAndACKFirst => "RAA"}); - (raa, commitment_update, order, forwards, failures, needs_broadcast_safe, funding_locked) + (raa, commitment_update, order, chanmon_update, forwards, failures, htlcs_failed_to_forward, needs_broadcast_safe, funding_locked) } pub fn update_fee(&mut self, fee_estimator: &F, msg: &msgs::UpdateFee) -> Result<(), ChannelError> diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index afb8d3e82..8ae2915ae 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -745,20 +745,30 @@ macro_rules! maybe_break_monitor_err { } macro_rules! handle_chan_restoration_locked { - ($self: expr, $channel_lock: expr, $channel_state: expr, $channel_entry: expr, - $raa: expr, $commitment_update: expr, $order: expr, + ($self: ident, $channel_lock: expr, $channel_state: expr, $channel_entry: expr, + $raa: expr, $commitment_update: expr, $order: expr, $chanmon_update: expr, $pending_forwards: expr, $broadcast_safe: expr, $funding_locked: expr) => { { let mut htlc_forwards = None; let mut funding_broadcast_safe = None; let counterparty_node_id = $channel_entry.get().get_counterparty_node_id(); + let channel_id = $channel_entry.get().channel_id(); - { + let res = loop { if !$pending_forwards.is_empty() { htlc_forwards = Some(($channel_entry.get().get_short_channel_id().expect("We can't have pending forwards before funding confirmation"), $channel_entry.get().get_funding_txo().unwrap(), $pending_forwards)); } macro_rules! handle_cs { () => { + if let Some(monitor_update) = $chanmon_update { + assert!($order == RAACommitmentOrder::RevokeAndACKFirst); + assert!(!$broadcast_safe); + assert!($funding_locked.is_none()); + assert!($commitment_update.is_some()); + if let Err(e) = $self.chain_monitor.update_channel($channel_entry.get().get_funding_txo().unwrap(), monitor_update) { + break handle_monitor_err!($self, e, $channel_state, $channel_entry, RAACommitmentOrder::CommitmentFirst, false, true); + } + } if let Some(update) = $commitment_update { $channel_state.pending_msg_events.push(events::MessageSendEvent::UpdateHTLCs { node_id: counterparty_node_id, @@ -801,21 +811,26 @@ macro_rules! handle_chan_restoration_locked { msg: announcement_sigs, }); } - $channel_state.short_to_id.insert($channel_entry.get().get_short_channel_id().unwrap(), $channel_entry.get().channel_id()); + $channel_state.short_to_id.insert($channel_entry.get().get_short_channel_id().unwrap(), channel_id); } - } - (htlc_forwards, funding_broadcast_safe) + break Ok(()); + }; + + (htlc_forwards, funding_broadcast_safe, res, channel_id, counterparty_node_id) } } } macro_rules! post_handle_chan_restoration { - ($self: expr, $locked_res: expr, $pending_failures: expr) => { { - let (htlc_forwards, funding_broadcast_safe) = $locked_res; + ($self: ident, $locked_res: expr, $pending_failures: expr, $forwarding_failures: expr) => { { + let (htlc_forwards, funding_broadcast_safe, res, channel_id, counterparty_node_id) = $locked_res; + + let _ = handle_error!($self, res, counterparty_node_id); if let Some(ev) = funding_broadcast_safe { $self.pending_events.lock().unwrap().push(ev); } + $self.fail_holding_cell_htlcs($forwarding_failures, channel_id); for failure in $pending_failures.drain(..) { $self.fail_htlc_backwards_internal($self.channel_state.lock().unwrap(), failure.0, &failure.1, failure.2); } @@ -2332,6 +2347,12 @@ impl ChannelMana /// ChannelMonitorUpdateErr::TemporaryFailures is fine. The highest_applied_update_id field /// exists largely only to prevent races between this and concurrent update_monitor calls. /// + /// In some cases, this may generate a monitor update, resulting in a call to the + /// `chain::Watch`'s `update_channel` method for the same channel monitor which is being + /// notified of a successful update here. Because of this, please be very careful with + /// reentrancy bugs! It is incredibly easy to write an implementation of `update_channel` which + /// will take a lock that is also held when calling this method. + /// /// Thus, the anticipated use is, at a high level: /// 1) You register a chain::Watch with this ChannelManager, /// 2) it stores each update to disk, and begins updating any remote (eg watchtower) copies of @@ -2343,7 +2364,7 @@ impl ChannelMana pub fn channel_monitor_updated(&self, funding_txo: &OutPoint, highest_applied_update_id: u64) { let _persistence_guard = PersistenceNotifierGuard::new(&self.total_consistency_lock, &self.persistence_notifier); - let (mut pending_failures, chan_restoration_res) = { + let (mut pending_failures, forwarding_failures, chan_restoration_res) = { let mut channel_lock = self.channel_state.lock().unwrap(); let channel_state = &mut *channel_lock; let mut channel = match channel_state.by_id.entry(funding_txo.to_channel_id()) { @@ -2354,10 +2375,10 @@ impl ChannelMana return; } - let (raa, commitment_update, order, pending_forwards, pending_failures, needs_broadcast_safe, funding_locked) = channel.get_mut().monitor_updating_restored(&self.logger); - (pending_failures, handle_chan_restoration_locked!(self, channel_lock, channel_state, channel, raa, commitment_update, order, pending_forwards, needs_broadcast_safe, funding_locked)) + let (raa, commitment_update, order, chanmon_update, pending_forwards, pending_failures, forwarding_failures, needs_broadcast_safe, funding_locked) = channel.get_mut().monitor_updating_restored(&self.logger); + (pending_failures, forwarding_failures, handle_chan_restoration_locked!(self, channel_lock, channel_state, channel, raa, commitment_update, order, chanmon_update, pending_forwards, needs_broadcast_safe, funding_locked)) }; - post_handle_chan_restoration!(self, chan_restoration_res, pending_failures); + post_handle_chan_restoration!(self, chan_restoration_res, pending_failures, forwarding_failures); } fn internal_open_channel(&self, counterparty_node_id: &PublicKey, their_features: InitFeatures, msg: &msgs::OpenChannel) -> Result<(), MsgHandleErrInternal> {