ChannelManager support for monitor update failure in one place
authorMatt Corallo <git@bluematt.me>
Wed, 17 Oct 2018 22:21:06 +0000 (18:21 -0400)
committerMatt Corallo <git@bluematt.me>
Tue, 23 Oct 2018 20:03:30 +0000 (16:03 -0400)
src/ln/channelmanager.rs
src/ln/channelmonitor.rs
src/util/errors.rs

index 55962d95b16e026cd35b4dc74ed59c1c98a49cb2..f2a191fed71ad50bbc9d32c70d0fca29f7c59872 100644 (file)
@@ -23,7 +23,7 @@ use secp256k1;
 use chain::chaininterface::{BroadcasterInterface,ChainListener,ChainWatchInterface,FeeEstimator};
 use chain::transaction::OutPoint;
 use ln::channel::{Channel, ChannelError, ChannelKeys};
-use ln::channelmonitor::{ManyChannelMonitor, CLTV_CLAIM_BUFFER, HTLC_FAIL_TIMEOUT_BLOCKS};
+use ln::channelmonitor::{ChannelMonitorUpdateErr, ManyChannelMonitor, CLTV_CLAIM_BUFFER, HTLC_FAIL_TIMEOUT_BLOCKS};
 use ln::router::{Route,RouteHop};
 use ln::msgs;
 use ln::msgs::{ChannelMessageHandler, HandleError, RAACommitmentOrder};
@@ -586,6 +586,33 @@ impl ChannelManager {
                }
        }
 
+       fn handle_monitor_update_fail(&self, mut channel_state_lock: MutexGuard<ChannelHolder>, channel_id: &[u8; 32], err: ChannelMonitorUpdateErr, reason: RAACommitmentOrder) {
+               match err {
+                       ChannelMonitorUpdateErr::PermanentFailure => {
+                               let mut chan = {
+                                       let channel_state = channel_state_lock.borrow_parts();
+                                       let chan = channel_state.by_id.remove(channel_id).expect("monitor_update_failed must be called within the same lock as the channel get!");
+                                       if let Some(short_id) = chan.get_short_channel_id() {
+                                               channel_state.short_to_id.remove(&short_id);
+                                       }
+                                       chan
+                               };
+                               mem::drop(channel_state_lock);
+                               self.finish_force_close_channel(chan.force_shutdown());
+                               let mut events = self.pending_events.lock().unwrap();
+                               if let Ok(update) = self.get_channel_update(&chan) {
+                                       events.push(events::Event::BroadcastChannelUpdate {
+                                               msg: update
+                                       });
+                               }
+                       },
+                       ChannelMonitorUpdateErr::TemporaryFailure => {
+                               let channel = channel_state_lock.by_id.get_mut(channel_id).expect("monitor_update_failed must be called within the same lock as the channel get!");
+                               channel.monitor_update_failed(reason);
+                       },
+               }
+       }
+
        #[inline]
        fn gen_rho_mu_from_shared_secret(shared_secret: &SharedSecret) -> ([u8; 32], [u8; 32]) {
                ({
@@ -984,6 +1011,11 @@ impl ChannelManager {
                                if let Some((err, code, chan_update)) = loop {
                                        let chan = channel_state.as_mut().unwrap().by_id.get_mut(&forwarding_id).unwrap();
 
+                                       // Note that we could technically not return an error yet here and just hope
+                                       // that the connection is reestablished or monitor updated by the time we get
+                                       // around to doing the actual forward, but better to fail early if we can and
+                                       // hopefully an attacker trying to path-trace payments cannot make this occur
+                                       // on a small/per-node/per-channel scale.
                                        if !chan.is_live() { // channel_disabled
                                                break Some(("Forwarding channel is not in a ready state.", 0x1000 | 20, Some(self.get_channel_update(chan).unwrap())));
                                        }
@@ -1027,6 +1059,7 @@ impl ChannelManager {
        }
 
        /// only fails if the channel does not yet have an assigned short_id
+       /// May be called with channel_state already locked!
        fn get_channel_update(&self, chan: &Channel) -> Result<msgs::ChannelUpdate, HandleError> {
                let short_channel_id = match chan.get_short_channel_id() {
                        None => return Err(HandleError{err: "Channel not yet established", action: None}),
@@ -1097,8 +1130,7 @@ impl ChannelManager {
                let onion_packet = ChannelManager::construct_onion_packet(onion_payloads, onion_keys, &payment_hash);
 
                let (first_hop_node_id, update_add, commitment_signed) = {
-                       let mut channel_state_lock = self.channel_state.lock().unwrap();
-                       let channel_state = channel_state_lock.borrow_parts();
+                       let mut channel_state = self.channel_state.lock().unwrap();
 
                        let id = match channel_state.short_to_id.get(&route.hops.first().unwrap().short_channel_id) {
                                None => return Err(APIError::ChannelUnavailable{err: "No channel available with first hop!"}),
@@ -1106,21 +1138,28 @@ impl ChannelManager {
                        };
 
                        let res = {
-                               let chan = channel_state.by_id.get_mut(&id).unwrap();
-                               if chan.get_their_node_id() != route.hops.first().unwrap().pubkey {
-                                       return Err(APIError::RouteError{err: "Node ID mismatch on first hop!"});
-                               }
-                               if !chan.is_live() {
-                                       return Err(APIError::ChannelUnavailable{err: "Peer for first hop currently disconnected!"});
-                               }
-                               match chan.send_htlc_and_commit(htlc_msat, payment_hash.clone(), htlc_cltv, HTLCSource::OutboundRoute {
-                                       route: route.clone(),
-                                       session_priv: session_priv.clone(),
-                                       first_hop_htlc_msat: htlc_msat,
-                               }, onion_packet).map_err(|he| APIError::ChannelUnavailable{err: he.err})? {
+                               let res = {
+                                       let chan = channel_state.by_id.get_mut(&id).unwrap();
+                                       if chan.get_their_node_id() != route.hops.first().unwrap().pubkey {
+                                               return Err(APIError::RouteError{err: "Node ID mismatch on first hop!"});
+                                       }
+                                       if chan.is_awaiting_monitor_update() {
+                                               return Err(APIError::MonitorUpdateFailed);
+                                       }
+                                       if !chan.is_live() {
+                                               return Err(APIError::ChannelUnavailable{err: "Peer for first hop currently disconnected!"});
+                                       }
+                                       chan.send_htlc_and_commit(htlc_msat, payment_hash.clone(), htlc_cltv, HTLCSource::OutboundRoute {
+                                               route: route.clone(),
+                                               session_priv: session_priv.clone(),
+                                               first_hop_htlc_msat: htlc_msat,
+                                       }, onion_packet).map_err(|he| APIError::ChannelUnavailable{err: he.err})?
+                               };
+                               match res {
                                        Some((update_add, commitment_signed, chan_monitor)) => {
-                                               if let Err(_e) = self.monitor.add_update_monitor(chan_monitor.get_funding_txo().unwrap(), chan_monitor) {
-                                                       unimplemented!();
+                                               if let Err(e) = self.monitor.add_update_monitor(chan_monitor.get_funding_txo().unwrap(), chan_monitor) {
+                                                       self.handle_monitor_update_fail(channel_state, &id, e, RAACommitmentOrder::CommitmentFirst);
+                                                       return Err(APIError::MonitorUpdateFailed);
                                                }
                                                Some((update_add, commitment_signed))
                                        },
@@ -1540,7 +1579,83 @@ impl ChannelManager {
        /// ChannelMonitorUpdateErr::TemporaryFailure was returned from a channel monitor update
        /// operation.
        pub fn test_restore_channel_monitor(&self) {
-               unimplemented!();
+               let mut new_events = Vec::new();
+               let mut close_results = Vec::new();
+               let mut htlc_forwards = Vec::new();
+               let mut htlc_failures = Vec::new();
+
+               {
+                       let mut channel_lock = self.channel_state.lock().unwrap();
+                       let channel_state = channel_lock.borrow_parts();
+                       let short_to_id = channel_state.short_to_id;
+                       channel_state.by_id.retain(|_, channel| {
+                               if channel.is_awaiting_monitor_update() {
+                                       let chan_monitor = channel.channel_monitor();
+                                       if let Err(e) = self.monitor.add_update_monitor(chan_monitor.get_funding_txo().unwrap(), chan_monitor) {
+                                               match e {
+                                                       ChannelMonitorUpdateErr::PermanentFailure => {
+                                                               if let Some(short_id) = channel.get_short_channel_id() {
+                                                                       short_to_id.remove(&short_id);
+                                                               }
+                                                               close_results.push(channel.force_shutdown());
+                                                               if let Ok(update) = self.get_channel_update(&channel) {
+                                                                       new_events.push(events::Event::BroadcastChannelUpdate {
+                                                                               msg: update
+                                                                       });
+                                                               }
+                                                               false
+                                                       },
+                                                       ChannelMonitorUpdateErr::TemporaryFailure => true,
+                                               }
+                                       } else {
+                                               let (raa, commitment_update, order, pending_forwards, mut pending_failures) = channel.monitor_updating_restored();
+                                               if !pending_forwards.is_empty() {
+                                                       htlc_forwards.push((channel.get_short_channel_id().expect("We can't have pending forwards before funding confirmation"), pending_forwards));
+                                               }
+                                               htlc_failures.append(&mut pending_failures);
+
+                                               macro_rules! handle_cs { () => {
+                                                       if let Some(update) = commitment_update {
+                                                               new_events.push(events::Event::UpdateHTLCs {
+                                                                       node_id: channel.get_their_node_id(),
+                                                                       updates: update,
+                                                               });
+                                                       }
+                                               } }
+                                               macro_rules! handle_raa { () => {
+                                                       if let Some(revoke_and_ack) = raa {
+                                                               new_events.push(events::Event::SendRevokeAndACK {
+                                                                       node_id: channel.get_their_node_id(),
+                                                                       msg: revoke_and_ack,
+                                                               });
+                                                       }
+                                               } }
+                                               match order {
+                                                       RAACommitmentOrder::CommitmentFirst => {
+                                                               handle_cs!();
+                                                               handle_raa!();
+                                                       },
+                                                       RAACommitmentOrder::RevokeAndACKFirst => {
+                                                               handle_raa!();
+                                                               handle_cs!();
+                                                       },
+                                               }
+                                               true
+                                       }
+                               } else { true }
+                       });
+               }
+
+               for failure in htlc_failures.drain(..) {
+                       self.fail_htlc_backwards_internal(self.channel_state.lock().unwrap(), failure.0, &failure.1, failure.2);
+               }
+               self.forward_htlcs(&mut htlc_forwards[..]);
+
+               for res in close_results.drain(..) {
+                       self.finish_force_close_channel(res);
+               }
+
+               self.pending_events.lock().unwrap().append(&mut new_events);
        }
 
        fn internal_open_channel(&self, their_node_id: &PublicKey, msg: &msgs::OpenChannel) -> Result<msgs::AcceptChannel, MsgHandleErrInternal> {
@@ -2211,6 +2326,9 @@ impl ChannelManager {
                                if !chan.is_outbound() {
                                        return Err(APIError::APIMisuseError{err: "update_fee cannot be sent for an inbound channel"});
                                }
+                               if chan.is_awaiting_monitor_update() {
+                                       return Err(APIError::MonitorUpdateFailed);
+                               }
                                if !chan.is_live() {
                                        return Err(APIError::ChannelUnavailable{err: "Channel is either not yet fully established or peer is currently disconnected"});
                                }
index 59ed177b38fb3da0743e4a2ee71332f0e9abc1b0..f67ae5dd6d559977196f10f84764ca785dbd8437 100644 (file)
@@ -47,6 +47,22 @@ pub enum ChannelMonitorUpdateErr {
        /// submitting new commitment transactions to the remote party.
        /// ChannelManager::test_restore_channel_monitor can be used to retry the update(s) and restore
        /// the channel to an operational state.
+       ///
+       /// Note that continuing to operate when no copy of the updated ChannelMonitor could be
+       /// persisted is unsafe - if you failed to store the update on your own local disk you should
+       /// instead return PermanentFailure to force closure of the channel ASAP.
+       ///
+       /// Even when a channel has been "frozen" updates to the ChannelMonitor can continue to occur
+       /// (eg if an inbound HTLC which we forwarded was claimed upstream resulting in us attempting
+       /// to claim it on this channel) and those updates must be applied wherever they can be. At
+       /// least one such updated ChannelMonitor must be persisted otherwise PermanentFailure should
+       /// be returned to get things on-chain ASAP using only the in-memory copy. Obviously updates to
+       /// the channel which would invalidate previous ChannelMonitors are not made when a channel has
+       /// been "frozen".
+       ///
+       /// Note that even if updates made after TemporaryFailure succeed you must still call
+       /// test_restore_channel_monitor to ensure you have the latest monitor and re-enable normal
+       /// channel operation.
        TemporaryFailure,
        /// Used to indicate no further channel monitor updates will be allowed (eg we've moved on to a
        /// different watchtower and cannot update with all watchtowers that were previously informed
index 9446b35854baf9e34f665d3e1755ab7f599c4992..66ddf84d96871857ed027cc5c3c74f3bfb749dfb 100644 (file)
@@ -32,7 +32,10 @@ pub enum APIError {
        ChannelUnavailable {
                /// A human-readable error message
                err: &'static str
-       }
+       },
+       /// An attempt to call add_update_monitor returned an Err (ie you did this!), causing the
+       /// attempted action to fail.
+       MonitorUpdateFailed,
 }
 
 impl fmt::Debug for APIError {
@@ -42,6 +45,7 @@ impl fmt::Debug for APIError {
                        APIError::FeeRateTooHigh {ref err, ref feerate} => write!(f, "{} feerate: {}", err, feerate),
                        APIError::RouteError {ref err} => f.write_str(err),
                        APIError::ChannelUnavailable {ref err} => f.write_str(err),
+                       APIError::MonitorUpdateFailed => f.write_str("Client indicated a channel monitor update failed"),
                }
        }
 }