Handle monitor update failures in msg-recv functions

author Matt Corallo <git@bluematt.me>

Tue, 27 Nov 2018 02:54:14 +0000 (21:54 -0500)

committer Matt Corallo <git@bluematt.me>

Tue, 11 Dec 2018 18:17:45 +0000 (13:17 -0500)
author Matt Corallo <git@bluematt.me>
Tue, 27 Nov 2018 02:54:14 +0000 (21:54 -0500)
committer Matt Corallo <git@bluematt.me>
Tue, 11 Dec 2018 18:17:45 +0000 (13:17 -0500)
diff --git a/src/ln/channel.rs b/src/ln/channel.rs

index 296d97c164ea967839fa2f6ec93ab66f215f56fe..c2ec53fb353d8fad52d1e6d8d5074ed361546752 100644 (file)
--- a/src/ln/channel.rs
+++ b/src/ln/channel.rs
@@ -2154,7 +2154,7 @@ impl Channel {
         /// commitment update or a revoke_and_ack generation). The messages which were generated from
         /// that original call must *not* have been sent to the remote end, and must instead have been
         /// dropped. They will be regenerated when monitor_updating_restored is called.
-       pub fn monitor_update_failed(&mut self, order: RAACommitmentOrder) {
+       pub fn monitor_update_failed(&mut self, order: RAACommitmentOrder, mut pending_forwards: Vec<(PendingForwardHTLCInfo, u64)>, mut pending_fails: Vec<(HTLCSource, [u8; 32], HTLCFailReason)>, raa_first_dropped_cs: bool) {
                 assert_eq!(self.channel_state & ChannelState::MonitorUpdateFailed as u32, 0);
                 match order {
                         RAACommitmentOrder::CommitmentFirst => {
@@ -2163,9 +2163,13 @@ impl Channel {
                         },
                         RAACommitmentOrder::RevokeAndACKFirst => {
                                 self.monitor_pending_revoke_and_ack = true;
-                               self.monitor_pending_commitment_signed = false;
+                               self.monitor_pending_commitment_signed = raa_first_dropped_cs;
                         },
                 }
+               assert!(self.monitor_pending_forwards.is_empty());
+               mem::swap(&mut pending_forwards, &mut self.monitor_pending_forwards);
+               assert!(self.monitor_pending_failures.is_empty());
+               mem::swap(&mut pending_fails, &mut self.monitor_pending_failures);
                 self.monitor_pending_order = Some(order);
                 self.channel_state |= ChannelState::MonitorUpdateFailed as u32;
         }
diff --git a/src/ln/channelmanager.rs b/src/ln/channelmanager.rs

index f6ee5f8cd9ef6d0b53972143715797d5e346a113..87326f2be940a672773d8db19f69b322e1979c4e 100644 (file)
--- a/src/ln/channelmanager.rs
+++ b/src/ln/channelmanager.rs
@@ -442,6 +442,43 @@ macro_rules! try_chan_entry {
         }
  }
  
+macro_rules! return_monitor_err {
+       ($self: expr, $err: expr, $channel_state: expr, $entry: expr, $action_type: path) => {
+               return_monitor_err!($self, $err, $channel_state, $entry, $action_type, Vec::new(), Vec::new())
+       };
+       ($self: expr, $err: expr, $channel_state: expr, $entry: expr, $action_type: path, $raa_first_dropped_cs: expr) => {
+               if $action_type != RAACommitmentOrder::RevokeAndACKFirst { panic!("Bad return_monitor_err call!"); }
+               return_monitor_err!($self, $err, $channel_state, $entry, $action_type, Vec::new(), Vec::new(), $raa_first_dropped_cs)
+       };
+       ($self: expr, $err: expr, $channel_state: expr, $entry: expr, $action_type: path, $failed_forwards: expr, $failed_fails: expr) => {
+               return_monitor_err!($self, $err, $channel_state, $entry, $action_type, $failed_forwards, $failed_fails, false)
+       };
+       ($self: expr, $err: expr, $channel_state: expr, $entry: expr, $action_type: path, $failed_forwards: expr, $failed_fails: expr, $raa_first_dropped_cs: expr) => {
+               match $err {
+                       ChannelMonitorUpdateErr::PermanentFailure => {
+                               let (channel_id, mut chan) = $entry.remove_entry();
+                               if let Some(short_id) = chan.get_short_channel_id() {
+                                       $channel_state.short_to_id.remove(&short_id);
+                               }
+                               // TODO: $failed_fails is dropped here, which will cause other channels to hit the
+                               // chain in a confused state! We need to move them into the ChannelMonitor which
+                               // will be responsible for failing backwards once things confirm on-chain.
+                               // It's ok that we drop $failed_forwards here - at this point we'd rather they
+                               // broadcast HTLC-Timeout and pay the associated fees to get their funds back than
+                               // us bother trying to claim it just to forward on to another peer. If we're
+                               // splitting hairs we'd prefer to claim payments that were to us, but we haven't
+                               // given up the preimage yet, so might as well just wait until the payment is
+                               // retried, avoiding the on-chain fees.
+                               return Err(MsgHandleErrInternal::from_finish_shutdown("ChannelMonitor storage failure", channel_id, chan.force_shutdown(), $self.get_channel_update(&chan).ok()))
+                       },
+                       ChannelMonitorUpdateErr::TemporaryFailure => {
+                               $entry.get_mut().monitor_update_failed($action_type, $failed_forwards, $failed_fails, $raa_first_dropped_cs);
+                               return Err(MsgHandleErrInternal::from_chan_no_close(ChannelError::Ignore("Failed to update ChannelMonitor"), *$entry.key()));
+                       },
+               }
+       }
+}
+
  // Does not break in case of TemporaryFailure!
  macro_rules! maybe_break_monitor_err {
         ($self: expr, $err: expr, $channel_state: expr, $entry: expr, $action_type: path) => {
@@ -454,7 +491,7 @@ macro_rules! maybe_break_monitor_err {
                                 break Err(MsgHandleErrInternal::from_finish_shutdown("ChannelMonitor storage failure", channel_id, chan.force_shutdown(), $self.get_channel_update(&chan).ok()))
                         },
                         ChannelMonitorUpdateErr::TemporaryFailure => {
-                               $entry.get_mut().monitor_update_failed($action_type);
+                               $entry.get_mut().monitor_update_failed($action_type, Vec::new(), Vec::new(), false);
                         },
                 }
         }
@@ -1681,6 +1718,13 @@ impl ChannelManager {
                                         if let Err(e) = self.monitor.add_update_monitor(chan_monitor.get_funding_txo().unwrap(), chan_monitor) {
                                                 match e {
                                                         ChannelMonitorUpdateErr::PermanentFailure => {
+                                                               // TODO: There may be some pending HTLCs that we intended to fail
+                                                               // backwards when a monitor update failed. We should make sure
+                                                               // knowledge of those gets moved into the appropriate in-memory
+                                                               // ChannelMonitor and they get failed backwards once we get
+                                                               // on-chain confirmations.
+                                                               // Note I think #198 addresses this, so once its merged a test
+                                                               // should be written.
                                                                 if let Some(short_id) = channel.get_short_channel_id() {
                                                                         short_to_id.remove(&short_id);
                                                                 }
@@ -2283,8 +2327,9 @@ impl ChannelManager {
                                 }
                                 let (revoke_and_ack, commitment_signed, closing_signed, chan_monitor) =
                                         try_chan_entry!(self, chan.get_mut().commitment_signed(&msg, &*self.fee_estimator), channel_state, chan);
-                               if let Err(_e) = self.monitor.add_update_monitor(chan_monitor.get_funding_txo().unwrap(), chan_monitor) {
-                                       unimplemented!();
+                               if let Err(e) = self.monitor.add_update_monitor(chan_monitor.get_funding_txo().unwrap(), chan_monitor) {
+                                       return_monitor_err!(self, e, channel_state, chan, RAACommitmentOrder::RevokeAndACKFirst, commitment_signed.is_some());
+                                       //TODO: Rebroadcast closing_signed if present on monitor update restoration
                                 }
                                 channel_state.pending_msg_events.push(events::MessageSendEvent::SendRevokeAndACK {
                                         node_id: their_node_id.clone(),
@@ -2360,8 +2405,8 @@ impl ChannelManager {
                                         }
                                         let (commitment_update, pending_forwards, pending_failures, closing_signed, chan_monitor) =
                                                 try_chan_entry!(self, chan.get_mut().revoke_and_ack(&msg, &*self.fee_estimator), channel_state, chan);
-                                       if let Err(_e) = self.monitor.add_update_monitor(chan_monitor.get_funding_txo().unwrap(), chan_monitor) {
-                                               unimplemented!();
+                                       if let Err(e) = self.monitor.add_update_monitor(chan_monitor.get_funding_txo().unwrap(), chan_monitor) {
+                                               return_monitor_err!(self, e, channel_state, chan, RAACommitmentOrder::CommitmentFirst, pending_forwards, pending_failures);
                                         }
                                         if let Some(updates) = commitment_update {
                                                 channel_state.pending_msg_events.push(events::MessageSendEvent::UpdateHTLCs {
@@ -2455,11 +2500,21 @@ impl ChannelManager {
                                 if chan.get().get_their_node_id() != *their_node_id {
                                         return Err(MsgHandleErrInternal::send_err_msg_no_close("Got a message for a channel from the wrong node!", msg.channel_id));
                                 }
-                               let (funding_locked, revoke_and_ack, commitment_update, channel_monitor, order, shutdown) =
+                               let (funding_locked, revoke_and_ack, commitment_update, channel_monitor, mut order, shutdown) =
                                         try_chan_entry!(self, chan.get_mut().channel_reestablish(msg), channel_state, chan);
                                 if let Some(monitor) = channel_monitor {
-                                       if let Err(_e) = self.monitor.add_update_monitor(monitor.get_funding_txo().unwrap(), monitor) {
-                                               unimplemented!();
+                                       if let Err(e) = self.monitor.add_update_monitor(monitor.get_funding_txo().unwrap(), monitor) {
+                                               // channel_reestablish doesn't guarantee the order it returns is sensical
+                                               // for the messages it returns, but if we're setting what messages to
+                                               // re-transmit on monitor update success, we need to make sure it is sane.
+                                               if revoke_and_ack.is_none() {
+                                                       order = RAACommitmentOrder::CommitmentFirst;
+                                               }
+                                               if commitment_update.is_none() {
+                                                       order = RAACommitmentOrder::RevokeAndACKFirst;
+                                               }
+                                               return_monitor_err!(self, e, channel_state, chan, order);
+                                               //TODO: Resend the funding_locked if needed once we get the monitor running again
                                         }
                                 }
                                 if let Some(msg) = funding_locked {
author	Matt Corallo <git@bluematt.me>
	Tue, 27 Nov 2018 02:54:14 +0000 (21:54 -0500)
committer	Matt Corallo <git@bluematt.me>
	Tue, 11 Dec 2018 18:17:45 +0000 (13:17 -0500)
src/ln/channel.rs		patch \| blob \| history
src/ln/channelmanager.rs		patch \| blob \| history