Skip to content

Commit 114f6b5

Browse files
committed
Fix race condition in async UtxoFuture resolution
Previously, we refactored the `GossipVerifier` to not require holding a circular reference. As part of this, we moved to a model where the `UtxoFuture`s are now polled by the background processor which checks for completion through `get_and_clear_pending_msg_events`. However, as part of this refactor we introduced race-condition: as we only held `Weak` references in `PendingChecksContext` and the `UtxoFuture` was directly dropped by the `GossipVerifier` after calling `resolve`, the actual data was dropped with the future and gone when the background processor attempted to retrieve and apply it via `check_resolved_futures`. Here, we fix this issue by simply holding on to the `state` `Arc`s in a separate `pending_states` `Vec` that is only pruned in `check_resolved_futures`, ensuring any completed results are collected first. Signed-off-by: Elias Rohrer <dev@tnull.de>
1 parent 821559b commit 114f6b5

1 file changed

Lines changed: 36 additions & 1 deletion

File tree

lightning/src/routing/utxo.rs

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ impl UtxoFuture {
166166
}
167167

168168
struct PendingChecksContext {
169+
pending_states: Vec<Arc<Mutex<UtxoMessages>>>,
169170
channels: HashMap<u64, Weak<Mutex<UtxoMessages>>>,
170171
nodes: HashMap<NodeId, Vec<Weak<Mutex<UtxoMessages>>>>,
171172
}
@@ -180,6 +181,7 @@ impl PendingChecks {
180181
pub(super) fn new() -> Self {
181182
PendingChecks {
182183
internal: Mutex::new(PendingChecksContext {
184+
pending_states: Vec::new(),
183185
channels: new_hash_map(),
184186
nodes: new_hash_map(),
185187
}),
@@ -413,6 +415,20 @@ impl PendingChecks {
413415
// handle the result in-line.
414416
handle_result(res)
415417
} else {
418+
// To avoid cases where we drop the resolved data before it can be
419+
// collected by `check_resolved_futures`, we here track all pending
420+
// states at least until the next call of `check_resolved_futures`.
421+
let pending_states = &mut pending_checks.pending_states;
422+
if pending_states
423+
.iter()
424+
.find(|s| Arc::ptr_eq(s, &future.state))
425+
.is_none()
426+
{
427+
// We're not already tracking the future state, keep the `Arc`
428+
// around.
429+
pending_states.push(Arc::clone(&future.state));
430+
}
431+
416432
Self::check_replace_previous_entry(
417433
msg,
418434
full_msg,
@@ -574,6 +590,21 @@ impl PendingChecks {
574590
let mut completed_states = Vec::new();
575591
{
576592
let mut lck = self.internal.lock().unwrap();
593+
lck.pending_states.retain(|state| {
594+
if state.lock().unwrap().complete.is_some() {
595+
// We're done, collect the result and clean up.
596+
completed_states.push(Arc::clone(&state));
597+
false
598+
} else {
599+
if Arc::strong_count(state) == 1 {
600+
// The future has been dropped.
601+
false
602+
} else {
603+
// It's still inflight.
604+
true
605+
}
606+
}
607+
});
577608
lck.channels.retain(|_, state| {
578609
if let Some(state) = state.upgrade() {
579610
if state.lock().unwrap().complete.is_some() {
@@ -1069,8 +1100,12 @@ mod tests {
10691100
assert!(network_graph.pending_checks.too_many_checks_pending());
10701101

10711102
// Once the future is drop'd (by resetting the `utxo_ret` value) the "too many checks" flag
1072-
// should reset to false.
1103+
// should not yet reset to false.
10731104
*chain_source.utxo_ret.lock().unwrap() = UtxoResult::Sync(Err(UtxoLookupError::UnknownTx));
1105+
assert!(network_graph.pending_checks.too_many_checks_pending());
1106+
1107+
// .. but it should once we called check_resolved_futures clearing the `pending_states`.
1108+
network_graph.pending_checks.check_resolved_futures(&network_graph);
10741109
assert!(!network_graph.pending_checks.too_many_checks_pending());
10751110
}
10761111
}

0 commit comments

Comments
 (0)