From: Matt Corallo Date: Fri, 26 Feb 2021 17:02:11 +0000 (-0500) Subject: Process monitor update events in block_[dis]connected asynchronously X-Git-Tag: v0.0.13~8^2~2 X-Git-Url: http://git.bitcoin.ninja/index.cgi?p=rust-lightning;a=commitdiff_plain;h=d4810087c111da74a80b85117c62fd6d012e697d Process monitor update events in block_[dis]connected asynchronously The instructions for `ChannelManagerReadArgs` indicate that you need to connect blocks on a newly-deserialized `ChannelManager` in a separate pass from the newly-deserialized `ChannelMontiors` as the `ChannelManager` assumes the ability to update the monitors during block [dis]connected events, saying that users need to: ``` 4) Reconnect blocks on your ChannelMonitors 5) Move the ChannelMonitors into your local chain::Watch. 6) Disconnect/connect blocks on the ChannelManager. ``` This is fine for `ChannelManager`'s purpose, but is very awkward for users. Notably, our new `lightning-block-sync` implemented on-load reconnection in the most obvious (and performant) way - connecting the blocks all at once, violating the `ChannelManagerReadArgs` API. Luckily, the events in question really don't need to be processed with the same urgency as most channel monitor updates. The only two monitor updates which can occur in block_[dis]connected is either a) in block_connected, we identify a now-confirmed commitment transaction, closing one of our channels, or b) in block_disconnected, the funding transaction is reorganized out of the chain, making our channel no longer funded. In the case of (a), sending a monitor update which broadcasts a conflicting holder commitment transaction is far from time-critical, though we should still ensure we do it. In the case of (b), we should try to broadcast our holder commitment transaction when we can, but within a few minutes is fine on the scale of block mining anyway. Note that in both cases cannot simply move the logic to ChannelMonitor::block[dis]_connected, as this could result in us broadcasting a commitment transaction from ChannelMonitor, then revoking the now-broadcasted state, and only then receiving the block_[dis]connected event in the ChannelManager. Thus, we move both events into an internal invent queue and process them in timer_chan_freshness_every_min(). --- diff --git a/lightning/src/ln/channel.rs b/lightning/src/ln/channel.rs index 16d1a0f8..a5f4b16e 100644 --- a/lightning/src/ln/channel.rs +++ b/lightning/src/ln/channel.rs @@ -4180,6 +4180,10 @@ impl Channel { /// Also returns the list of payment_hashes for channels which we can safely fail backwards /// immediately (others we will have to allow to time out). pub fn force_shutdown(&mut self, should_broadcast: bool) -> (Option<(OutPoint, ChannelMonitorUpdate)>, Vec<(HTLCSource, PaymentHash)>) { + // Note that we MUST only generate a monitor update that indicates force-closure - we're + // called during initialization prior to the chain_monitor in the encompassing ChannelManager + // being fully configured in some cases. Thus, its likely any monitor events we generate will + // be delayed in being processed! See the docs for `ChannelManagerReadArgs` for more. assert!(self.channel_state != ChannelState::ShutdownComplete as u32); // We go ahead and "free" any holding cell HTLCs or HTLCs we haven't yet committed to and diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index 4ac2833d..51920a4f 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -333,6 +333,15 @@ pub(super) struct ChannelHolder { pub(super) pending_msg_events: Vec, } +/// Events which we process internally but cannot be procsesed immediately at the generation site +/// for some reason. They are handled in timer_chan_freshness_every_min, so may be processed with +/// quite some time lag. +enum BackgroundEvent { + /// Handle a ChannelMonitorUpdate that closes a channel, broadcasting its current latest holder + /// commitment transaction. + ClosingMonitorUpdate((OutPoint, ChannelMonitorUpdate)), +} + /// State we hold per-peer. In the future we should put channels in here, but for now we only hold /// the latest Init features we heard from the peer. struct PeerState { @@ -436,6 +445,7 @@ pub struct ChannelManager>>, pending_events: Mutex>, + pending_background_events: Mutex>, /// Used when we have to take a BIG lock to make sure everything is self-consistent. /// Essentially just when we're serializing ourselves out. /// Taken first everywhere where we are making changes before any other locks. @@ -794,6 +804,7 @@ impl ChannelMana per_peer_state: RwLock::new(HashMap::new()), pending_events: Mutex::new(Vec::new()), + pending_background_events: Mutex::new(Vec::new()), total_consistency_lock: RwLock::new(()), persistence_notifier: PersistenceNotifier::new(), @@ -1854,13 +1865,42 @@ impl ChannelMana events.append(&mut new_events); } + /// Free the background events, generally called from timer_chan_freshness_every_min. + /// + /// Exposed for testing to allow us to process events quickly without generating accidental + /// BroadcastChannelUpdate events in timer_chan_freshness_every_min. + /// + /// Expects the caller to have a total_consistency_lock read lock. + fn process_background_events(&self) { + let mut background_events = Vec::new(); + mem::swap(&mut *self.pending_background_events.lock().unwrap(), &mut background_events); + for event in background_events.drain(..) { + match event { + BackgroundEvent::ClosingMonitorUpdate((funding_txo, update)) => { + // The channel has already been closed, so no use bothering to care about the + // monitor updating completing. + let _ = self.chain_monitor.update_channel(funding_txo, update); + }, + } + } + } + + #[cfg(any(test, feature = "_test_utils"))] + pub(crate) fn test_process_background_events(&self) { + self.process_background_events(); + } + /// If a peer is disconnected we mark any channels with that peer as 'disabled'. /// After some time, if channels are still disabled we need to broadcast a ChannelUpdate /// to inform the network about the uselessness of these channels. /// /// This method handles all the details, and must be called roughly once per minute. + /// + /// Note that in some rare cases this may generate a `chain::Watch::update_channel` call. pub fn timer_chan_freshness_every_min(&self) { let _persistence_guard = PersistenceNotifierGuard::new(&self.total_consistency_lock, &self.persistence_notifier); + self.process_background_events(); + let mut channel_state_lock = self.channel_state.lock().unwrap(); let channel_state = &mut *channel_state_lock; for (_, chan) in channel_state.by_id.iter_mut() { @@ -1953,6 +1993,10 @@ impl ChannelMana //identify whether we sent it or not based on the (I presume) very different runtime //between the branches here. We should make this async and move it into the forward HTLCs //timer handling. + + // Note that we MUST NOT end up calling methods on self.chain_monitor here - we're called + // from block_connected which may run during initialization prior to the chain_monitor + // being fully configured. See the docs for `ChannelManagerReadArgs` for more. match source { HTLCSource::OutboundRoute { ref path, .. } => { log_trace!(self.logger, "Failing outbound payment HTLC with payment_hash {}", log_bytes!(payment_hash.0)); @@ -3100,6 +3144,29 @@ impl ChannelMana self.finish_force_close_channel(failure); } } + + /// Handle a list of channel failures during a block_connected or block_disconnected call, + /// pushing the channel monitor update (if any) to the background events queue and removing the + /// Channel object. + fn handle_init_event_channel_failures(&self, mut failed_channels: Vec) { + for mut failure in failed_channels.drain(..) { + // Either a commitment transactions has been confirmed on-chain or + // Channel::block_disconnected detected that the funding transaction has been + // reorganized out of the main chain. + // We cannot broadcast our latest local state via monitor update (as + // Channel::force_shutdown tries to make us do) as we may still be in initialization, + // so we track the update internally and handle it when the user next calls + // timer_chan_freshness_every_min, guaranteeing we're running normally. + if let Some((funding_txo, update)) = failure.0.take() { + assert_eq!(update.updates.len(), 1); + if let ChannelMonitorUpdateStep::ChannelForceClosed { should_broadcast } = update.updates[0] { + assert!(should_broadcast); + } else { unreachable!(); } + self.pending_background_events.lock().unwrap().push(BackgroundEvent::ClosingMonitorUpdate((funding_txo, update))); + } + self.finish_force_close_channel(failure); + } + } } impl MessageSendEventsProvider for ChannelManager @@ -3167,6 +3234,9 @@ impl ChannelMana { /// Updates channel state based on transactions seen in a connected block. pub fn block_connected(&self, header: &BlockHeader, txdata: &TransactionData, height: u32) { + // Note that we MUST NOT end up calling methods on self.chain_monitor here - we're called + // during initialization prior to the chain_monitor being fully configured in some cases. + // See the docs for `ChannelManagerReadArgs` for more. let header_hash = header.block_hash(); log_trace!(self.logger, "Block {} at height {} connected", header_hash, height); let _persistence_guard = PersistenceNotifierGuard::new(&self.total_consistency_lock, &self.persistence_notifier); @@ -3218,9 +3288,7 @@ impl ChannelMana if let Some(short_id) = channel.get_short_channel_id() { short_to_id.remove(&short_id); } - // It looks like our counterparty went on-chain. We go ahead and - // broadcast our latest local state as well here, just in case its - // some kind of SPV attack, though we expect these to be dropped. + // It looks like our counterparty went on-chain. Close the channel. failed_channels.push(channel.force_shutdown(true)); if let Ok(update) = self.get_channel_update(&channel) { pending_msg_events.push(events::MessageSendEvent::BroadcastChannelUpdate { @@ -3254,9 +3322,8 @@ impl ChannelMana !htlcs.is_empty() // Only retain this entry if htlcs has at least one entry. }); } - for failure in failed_channels.drain(..) { - self.finish_force_close_channel(failure); - } + + self.handle_init_event_channel_failures(failed_channels); for (source, payment_hash, reason) in timed_out_htlcs.drain(..) { self.fail_htlc_backwards_internal(self.channel_state.lock().unwrap(), source, &payment_hash, reason); @@ -3282,6 +3349,9 @@ impl ChannelMana /// If necessary, the channel may be force-closed without letting the counterparty participate /// in the shutdown. pub fn block_disconnected(&self, header: &BlockHeader) { + // Note that we MUST NOT end up calling methods on self.chain_monitor here - we're called + // during initialization prior to the chain_monitor being fully configured in some cases. + // See the docs for `ChannelManagerReadArgs` for more. let _persistence_guard = PersistenceNotifierGuard::new(&self.total_consistency_lock, &self.persistence_notifier); let mut failed_channels = Vec::new(); { @@ -3306,9 +3376,7 @@ impl ChannelMana } }); } - for failure in failed_channels.drain(..) { - self.finish_force_close_channel(failure); - } + self.handle_init_event_channel_failures(failed_channels); self.latest_block_height.fetch_sub(1, Ordering::AcqRel); *self.last_block_hash.try_lock().expect("block_(dis)connected must not be called in parallel") = header.block_hash(); } @@ -3914,6 +3982,18 @@ impl Writeable f event.write(writer)?; } + let background_events = self.pending_background_events.lock().unwrap(); + (background_events.len() as u64).write(writer)?; + for event in background_events.iter() { + match event { + BackgroundEvent::ClosingMonitorUpdate((funding_txo, monitor_update)) => { + 0u8.write(writer)?; + funding_txo.write(writer)?; + monitor_update.write(writer)?; + }, + } + } + (self.last_node_announcement_serial.load(Ordering::Acquire) as u32).write(writer)?; Ok(()) @@ -3932,8 +4012,11 @@ impl Writeable f /// 3) Register all relevant ChannelMonitor outpoints with your chain watch mechanism using /// ChannelMonitor::get_outputs_to_watch() and ChannelMonitor::get_funding_txo(). /// 4) Reconnect blocks on your ChannelMonitors. -/// 5) Move the ChannelMonitors into your local chain::Watch. -/// 6) Disconnect/connect blocks on the ChannelManager. +/// 5) Disconnect/connect blocks on the ChannelManager. +/// 6) Move the ChannelMonitors into your local chain::Watch. +/// +/// Note that the ordering of #4-6 is not of importance, however all three must occur before you +/// call any other methods on the newly-deserialized ChannelManager. /// /// Note that because some channels may be closed during deserialization, it is critical that you /// always deserialize only the latest version of a ChannelManager and ChannelMonitors available to @@ -4135,6 +4218,15 @@ impl<'a, Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> } } + let background_event_count: u64 = Readable::read(reader)?; + let mut pending_background_events_read: Vec = Vec::with_capacity(cmp::min(background_event_count as usize, MAX_ALLOC_SIZE/mem::size_of::())); + for _ in 0..background_event_count { + match ::read(reader)? { + 0 => pending_background_events_read.push(BackgroundEvent::ClosingMonitorUpdate((Readable::read(reader)?, Readable::read(reader)?))), + _ => return Err(DecodeError::InvalidValue), + } + } + let last_node_announcement_serial: u32 = Readable::read(reader)?; let mut secp_ctx = Secp256k1::new(); @@ -4164,6 +4256,7 @@ impl<'a, Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> per_peer_state: RwLock::new(per_peer_state), pending_events: Mutex::new(pending_events_read), + pending_background_events: Mutex::new(pending_background_events_read), total_consistency_lock: RwLock::new(()), persistence_notifier: PersistenceNotifier::new(), diff --git a/lightning/src/ln/functional_test_utils.rs b/lightning/src/ln/functional_test_utils.rs index d5d85666..b0ffef2a 100644 --- a/lightning/src/ln/functional_test_utils.rs +++ b/lightning/src/ln/functional_test_utils.rs @@ -83,11 +83,13 @@ pub fn connect_block<'a, 'b, 'c, 'd>(node: &'a Node<'b, 'c, 'd>, block: &Block, let txdata: Vec<_> = block.txdata.iter().enumerate().collect(); node.chain_monitor.chain_monitor.block_connected(&block.header, &txdata, height); node.node.block_connected(&block.header, &txdata, height); + node.node.test_process_background_events(); } pub fn disconnect_block<'a, 'b, 'c, 'd>(node: &'a Node<'b, 'c, 'd>, header: &BlockHeader, height: u32) { node.chain_monitor.chain_monitor.block_disconnected(header, height); node.node.block_disconnected(header); + node.node.test_process_background_events(); } pub struct TestChanMonCfg { diff --git a/lightning/src/ln/reorg_tests.rs b/lightning/src/ln/reorg_tests.rs index be5dc43d..c666e603 100644 --- a/lightning/src/ln/reorg_tests.rs +++ b/lightning/src/ln/reorg_tests.rs @@ -207,6 +207,7 @@ fn test_unconf_chan() { nodes[0].node.block_disconnected(&headers.pop().unwrap()); } check_closed_broadcast!(nodes[0], false); + nodes[0].node.test_process_background_events(); // Required to free the pending background monitor update check_added_monitors!(nodes[0], 1); let channel_state = nodes[0].node.channel_state.lock().unwrap(); assert_eq!(channel_state.by_id.len(), 0);