From 6587677f648dd87a240696bfe57bcdd585acc3d5 Mon Sep 17 00:00:00 2001 From: Konstantin Akimov Date: Sat, 25 Apr 2026 19:07:04 +0700 Subject: [PATCH] fix: close TOCTOU race in islock reconstruction PR #6994 stopped sending ISDLOCK invs to peers requesting recsigs on the premise that they can reconstruct the islock from the recsig. That reconstruction has a pre-existing race that become critical now. It causes intermittent failure in interface_zmq_dash.py - wait_for_instantlock timing out. The previous reconstruction path checked HasRecoveredSigForId before emplacing into creatingInstantSendLocks, which leaves a window: thread A: ProcessRecoveredSig writes recsig to db, listener fires HandleNewRecoveredSig, finds creating empty, no-ops thread B: TrySignInstantSendLock saw HasRecoveredSigForId=false, now emplaces and calls AsyncSignIfMember It causes recsig to be in in db but no one ever builds the islock: - db.HasRecoveredSigForHash blocks any subsequent listener firing for that recsig - TrySignInstantSendLock has already passed its check Before #6994 this was masked because the peer would still receive the ISDLOCK inv from another node, but on develop the peer no longer receives an ISDLOCK inv from peers as a fallback. Reorder to emplace first, then check: 1. Emplace into creatingInstantSendLocks. Any listener that fires after this point sees the entry and reconstructs via the existing HandleNewInstantSendLockRecoveredSig path. 2. Then check HasRecoveredSigForId. If already in db, find+erase the entry; if find misses, the listener already handled it. 3. Only call AsyncSignIfMember when no recsig exists yet. --- src/instantsend/signing.cpp | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/src/instantsend/signing.cpp b/src/instantsend/signing.cpp index 7c46f8d389aa..22569760b3c7 100644 --- a/src/instantsend/signing.cpp +++ b/src/instantsend/signing.cpp @@ -384,10 +384,6 @@ void InstantSendSigner::TrySignInstantSendLock(const CTransaction& tx) auto id = islock.GetRequestId(); - if (m_sigman.HasRecoveredSigForId(llmqType, id)) { - return; - } - const auto& llmq_params_opt = Params().GetLLMQ(llmqType); assert(llmq_params_opt); const auto quorum = llmq::SelectQuorumForSigning(llmq_params_opt.value(), m_chainstate.m_chain, m_qman, id); @@ -402,6 +398,9 @@ void InstantSendSigner::TrySignInstantSendLock(const CTransaction& tx) quorum->m_quorum_base_block_index->nHeight % llmq_params_opt->dkgInterval; islock.cycleHash = quorum->m_quorum_base_block_index->GetAncestor(cycle_height)->GetBlockHash(); + // Emplace into creatingInstantSendLocks first so that any concurrent recsig listener + // (firing on a parallel verification worker) finds the entry and handles reconstruction + // through HandleNewInstantSendLockRecoveredSig. { LOCK(cs_creating); auto e = creatingInstantSendLocks.emplace(id, std::move(islock)); @@ -411,6 +410,33 @@ void InstantSendSigner::TrySignInstantSendLock(const CTransaction& tx) txToCreatingInstantSendLocks.emplace(tx.GetHash(), &e.first->second); } + // After emplacing, check whether the recsig is already in db. If yes, the listener + // either already ran while creating was empty (so we must reconstruct here), or it + // ran after our emplace and already removed the entry (so the find below misses). + // Either case is handled correctly by the find/erase pattern. + if (llmq::CRecoveredSig recSig; m_sigman.GetRecoveredSigForId(llmqType, id, recSig)) { + InstantSendLockPtr islock_ptr; + { + LOCK(cs_creating); + auto it = creatingInstantSendLocks.find(id); + if (it == creatingInstantSendLocks.end()) { + // The listener already fired after our emplace and reconstructed; nothing to do. + return; + } + islock_ptr = std::make_shared(std::move(it->second)); + txToCreatingInstantSendLocks.erase(islock_ptr->txid); + creatingInstantSendLocks.erase(it); + } + if (recSig.getMsgHash() != tx.GetHash()) { + LogPrintf("%s -- txid=%s: existing recovered islock sig has conflicting msgHash %s\n", + __func__, tx.GetHash().ToString(), recSig.getMsgHash().ToString()); + return; + } + islock_ptr->sig = recSig.sig; + m_isman.TryEmplacePendingLock(::SerializeHash(*islock_ptr), /*id=*/-1, islock_ptr); + return; + } + m_shareman.AsyncSignIfMember(llmqType, m_sigman, id, tx.GetHash(), quorum->m_quorum_base_block_index->GetBlockHash()); } } // namespace instantsend