From 0d933cec0134a473f7561b4ac131c064c42a3587 Mon Sep 17 00:00:00 2001 From: Ollie202 Date: Mon, 4 May 2026 15:28:24 +0100 Subject: [PATCH 01/10] fix: wrap blocking operations in validator and remote prover with spawn_blocking/block_in_place Proof verification, VM execution, local signing, and local proving are CPU-bound operations that were running directly on Tokio worker threads, stalling the async runtime. Wraps them in block_in_place or spawn_blocking to isolate blocking work from the async executor. - validator: wrap proof verification and VM execution in block_in_place - validator: wrap local SecretKey signing in block_in_place - remote-prover: wrap LocalBatchProver::prove in spawn_blocking - remote-prover: wrap LocalBlockProver::prove in block_in_place Closes #1976 --- bin/remote-prover/src/server/prover.rs | 16 ++++++++++++---- crates/validator/src/signers/mod.rs | 5 ++++- crates/validator/src/tx_validation/mod.rs | 23 ++++++++++++++--------- 3 files changed, 30 insertions(+), 14 deletions(-) diff --git a/bin/remote-prover/src/server/prover.rs b/bin/remote-prover/src/server/prover.rs index 2931cc70fe..346921c1b7 100644 --- a/bin/remote-prover/src/server/prover.rs +++ b/bin/remote-prover/src/server/prover.rs @@ -112,8 +112,14 @@ impl ProveRequest for LocalBatchProver { type Output = ProvenBatch; async fn prove(&self, input: Self::Input) -> Result { - self.prove(input) - .map_err(|e| tonic::Status::internal(e.as_report_context("failed to prove batch"))) + let prover = self.clone(); + tokio::task::spawn_blocking(move || { + prover + .prove(input) + .map_err(|e| tonic::Status::internal(e.as_report_context("failed to prove batch"))) + }) + .await + .map_err(|e| tonic::Status::internal(format!("batch prover task panicked: {e}")))? } } @@ -124,7 +130,9 @@ impl ProveRequest for LocalBlockProver { async fn prove(&self, input: Self::Input) -> Result { let BlockProofRequest { tx_batches, block_header, block_inputs } = input; - self.prove(tx_batches, &block_header, block_inputs) - .map_err(|e| tonic::Status::internal(e.as_report_context("failed to prove block"))) + tokio::task::block_in_place(|| { + self.prove(tx_batches, &block_header, block_inputs) + .map_err(|e| tonic::Status::internal(e.as_report_context("failed to prove block"))) + }) } } diff --git a/crates/validator/src/signers/mod.rs b/crates/validator/src/signers/mod.rs index 21bbeaa7ae..ad7583fa0e 100644 --- a/crates/validator/src/signers/mod.rs +++ b/crates/validator/src/signers/mod.rs @@ -36,7 +36,10 @@ impl ValidatorSigner { Ok(sig) }, Self::Local(signer) => { - let sig = ::sign(signer, header).await?; + let sig = tokio::task::block_in_place(|| { + tokio::runtime::Handle::current() + .block_on(::sign(signer, header)) + })?; Ok(sig) }, } diff --git a/crates/validator/src/tx_validation/mod.rs b/crates/validator/src/tx_validation/mod.rs index f2d1250a20..6aa057a999 100644 --- a/crates/validator/src/tx_validation/mod.rs +++ b/crates/validator/src/tx_validation/mod.rs @@ -39,23 +39,28 @@ pub async fn validate_transaction( proven_tx: ProvenTransaction, tx_inputs: TransactionInputs, ) -> Result { - // First, verify the transaction proof - info_span!("verify").in_scope(|| { - let tx_verifier = TransactionVerifier::new(MIN_PROOF_SECURITY_LEVEL); - tx_verifier.verify(&proven_tx) + // Proof verification is CPU-intensive; run it in a blocking context. + tokio::task::block_in_place(|| { + info_span!("verify").in_scope(|| { + let tx_verifier = TransactionVerifier::new(MIN_PROOF_SECURITY_LEVEL); + tx_verifier.verify(&proven_tx) + }) })?; // Create a DataStore from the transaction inputs. let data_store = TransactionInputsDataStore::new(tx_inputs.clone()); - // Execute the transaction. + // VM execution may not yield; run it in a blocking context to avoid stalling the runtime. let (account, block_header, _, input_notes, tx_args) = tx_inputs.into_parts(); let executor: TransactionExecutor<'_, '_, _, UnreachableAuth> = TransactionExecutor::new(&data_store); - let executed_tx = executor - .execute_transaction(account.id(), block_header.block_num(), input_notes, tx_args) - .instrument(info_span!("execute")) - .await?; + let executed_tx = tokio::task::block_in_place(|| { + tokio::runtime::Handle::current().block_on( + executor + .execute_transaction(account.id(), block_header.block_num(), input_notes, tx_args) + .instrument(info_span!("execute")), + ) + })?; // Validate that the executed transaction matches the submitted transaction. let executed_tx_header: TransactionHeader = (&executed_tx).into(); From 46ccdd3505c108e6c4c0a653a0fcbab0e9a3f39a Mon Sep 17 00:00:00 2001 From: Ollie202 Date: Tue, 5 May 2026 09:44:28 +0100 Subject: [PATCH 02/10] fix: replace block_in_place with spawn_blocking Replaces all uses of tokio::task::block_in_place with spawn_blocking across the validator and remote-prover crates, as requested in review. spawn_blocking is preferred because it works with both multi-threaded and current_thread runtimes (including #[tokio::test]). For the TX executor which uses an async API that may not yield, a dedicated current_thread runtime is created inside the spawn_blocking closure to drive the future to completion. --- bin/remote-prover/src/server/prover.rs | 8 +++-- crates/validator/src/signers/mod.rs | 14 ++++++--- crates/validator/src/tx_validation/mod.rs | 38 ++++++++++++++--------- 3 files changed, 39 insertions(+), 21 deletions(-) diff --git a/bin/remote-prover/src/server/prover.rs b/bin/remote-prover/src/server/prover.rs index 346921c1b7..f9287064a6 100644 --- a/bin/remote-prover/src/server/prover.rs +++ b/bin/remote-prover/src/server/prover.rs @@ -129,10 +129,14 @@ impl ProveRequest for LocalBlockProver { type Output = BlockProof; async fn prove(&self, input: Self::Input) -> Result { + let prover = self.clone(); let BlockProofRequest { tx_batches, block_header, block_inputs } = input; - tokio::task::block_in_place(|| { - self.prove(tx_batches, &block_header, block_inputs) + tokio::task::spawn_blocking(move || { + prover + .prove(tx_batches, &block_header, block_inputs) .map_err(|e| tonic::Status::internal(e.as_report_context("failed to prove block"))) }) + .await + .map_err(|e| tonic::Status::internal(format!("block prover task panicked: {e}")))? } } diff --git a/crates/validator/src/signers/mod.rs b/crates/validator/src/signers/mod.rs index ad7583fa0e..6e6092350f 100644 --- a/crates/validator/src/signers/mod.rs +++ b/crates/validator/src/signers/mod.rs @@ -36,10 +36,16 @@ impl ValidatorSigner { Ok(sig) }, Self::Local(signer) => { - let sig = tokio::task::block_in_place(|| { - tokio::runtime::Handle::current() - .block_on(::sign(signer, header)) - })?; + let signer = signer.clone(); + let header = header.clone(); + let sig = tokio::task::spawn_blocking(move || { + tokio::runtime::Builder::new_current_thread() + .build() + .expect("failed to build tokio runtime") + .block_on(::sign(&signer, &header)) + }) + .await + .unwrap_or_else(|e| std::panic::resume_unwind(e.into_panic()))?; Ok(sig) }, } diff --git a/crates/validator/src/tx_validation/mod.rs b/crates/validator/src/tx_validation/mod.rs index 6aa057a999..f0707ddd71 100644 --- a/crates/validator/src/tx_validation/mod.rs +++ b/crates/validator/src/tx_validation/mod.rs @@ -39,28 +39,36 @@ pub async fn validate_transaction( proven_tx: ProvenTransaction, tx_inputs: TransactionInputs, ) -> Result { - // Proof verification is CPU-intensive; run it in a blocking context. - tokio::task::block_in_place(|| { + // Proof verification is CPU-intensive; run it on a dedicated blocking thread. + let proven_tx_clone = proven_tx.clone(); + tokio::task::spawn_blocking(move || { info_span!("verify").in_scope(|| { - let tx_verifier = TransactionVerifier::new(MIN_PROOF_SECURITY_LEVEL); - tx_verifier.verify(&proven_tx) + TransactionVerifier::new(MIN_PROOF_SECURITY_LEVEL).verify(&proven_tx_clone) }) - })?; + }) + .await + .unwrap_or_else(|e| std::panic::resume_unwind(e.into_panic()))?; // Create a DataStore from the transaction inputs. let data_store = TransactionInputsDataStore::new(tx_inputs.clone()); - // VM execution may not yield; run it in a blocking context to avoid stalling the runtime. + // VM execution may not yield; run it on a dedicated blocking thread. let (account, block_header, _, input_notes, tx_args) = tx_inputs.into_parts(); - let executor: TransactionExecutor<'_, '_, _, UnreachableAuth> = - TransactionExecutor::new(&data_store); - let executed_tx = tokio::task::block_in_place(|| { - tokio::runtime::Handle::current().block_on( - executor - .execute_transaction(account.id(), block_header.block_num(), input_notes, tx_args) - .instrument(info_span!("execute")), - ) - })?; + let execute_span = info_span!("execute"); + let executed_tx = tokio::task::spawn_blocking(move || { + let executor: TransactionExecutor<'_, '_, _, UnreachableAuth> = + TransactionExecutor::new(&data_store); + tokio::runtime::Builder::new_current_thread() + .build() + .expect("failed to build tokio runtime") + .block_on( + executor + .execute_transaction(account.id(), block_header.block_num(), input_notes, tx_args) + .instrument(execute_span), + ) + }) + .await + .unwrap_or_else(|e| std::panic::resume_unwind(e.into_panic()))?; // Validate that the executed transaction matches the submitted transaction. let executed_tx_header: TransactionHeader = (&executed_tx).into(); From fa8dc3df59663d46db69d1d0bd8ee3695ba1c21c Mon Sep 17 00:00:00 2001 From: Ollie202 Date: Tue, 5 May 2026 11:46:39 +0100 Subject: [PATCH 03/10] fix(ntx-builder): wrap local transaction proving in spawn_blocking LocalTransactionProver::prove generates a ZK proof which is CPU-intensive and does not yield. Wrap it in spawn_blocking with a current_thread runtime when no remote prover is configured. --- crates/ntx-builder/src/actor/execute.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/crates/ntx-builder/src/actor/execute.rs b/crates/ntx-builder/src/actor/execute.rs index 2888e1b774..edcd898ef5 100644 --- a/crates/ntx-builder/src/actor/execute.rs +++ b/crates/ntx-builder/src/actor/execute.rs @@ -316,13 +316,21 @@ impl NtxContext { #[instrument(target = COMPONENT, name = "ntx.execute_transaction.prove", skip_all, err)] async fn prove(&self, tx_inputs: &TransactionInputs) -> NtxResult { if let Some(remote) = &self.prover { - remote.prove(tx_inputs).await + remote.prove(tx_inputs).await.map_err(NtxError::Proving) } else { - // Only perform tx inputs clone for local proving. + // ZK proof generation is CPU-intensive; run it on a dedicated blocking thread. let tx_inputs = tx_inputs.clone(); - LocalTransactionProver::default().prove(tx_inputs).await + tokio::task::spawn_blocking(move || { + tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .expect("failed to build tokio runtime") + .block_on(LocalTransactionProver::default().prove(tx_inputs)) + }) + .await + .unwrap_or_else(|e| std::panic::resume_unwind(e.into_panic())) + .map_err(NtxError::Proving) } - .map_err(NtxError::Proving) } /// Submits the transaction to the block producer. From 2f780a4cf9a0aa896c8526d470177cb237135c46 Mon Sep 17 00:00:00 2001 From: Ollie202 Date: Tue, 5 May 2026 13:13:33 +0100 Subject: [PATCH 04/10] fix(ntx-builder): wrap VM execution and note filtering in spawn_blocking The filter_notes and execute steps call executor.execute_transaction / NoteConsumptionChecker.check_notes_consumability, which are CPU-intensive and may not yield between await points. Move data-store creation + filter + execute onto a dedicated blocking thread via spawn_blocking. Use Handle::current().block_on() (not a new runtime) so that the gRPC data-store callbacks made by the VM are driven by the existing I/O driver on the parent runtime threads. --- crates/ntx-builder/src/actor/execute.rs | 43 +++++++++++++++---------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/crates/ntx-builder/src/actor/execute.rs b/crates/ntx-builder/src/actor/execute.rs index edcd898ef5..c13943a479 100644 --- a/crates/ntx-builder/src/actor/execute.rs +++ b/crates/ntx-builder/src/actor/execute.rs @@ -194,25 +194,34 @@ impl NtxContext { async move { Box::pin(async move { - let data_store = NtxDataStore::new( - account, - chain_tip_header, - chain_mmr, - self.store.clone(), - self.script_cache.clone(), - self.db.clone(), - ); - - // Filter notes. let notes = notes.into_iter().map(Note::from).collect::>(); - let (successful_notes, failed_notes) = - self.filter_notes(&data_store, notes).await?; - // Execute transaction. - let executed_tx = Box::pin(self.execute(&data_store, successful_notes)).await?; - - // Collect scripts fetched from the remote store during execution. - let scripts_to_cache = data_store.take_fetched_scripts(); + // VM execution (note filtering + transaction execution) is CPU-intensive and + // may not yield between await points. Run on a dedicated blocking thread, + // using the parent runtime handle so that async data-store callbacks (gRPC + // calls to the store) are driven by the existing I/O driver. + let ctx = self.clone(); + let (executed_tx, failed_notes, scripts_to_cache) = + tokio::task::spawn_blocking(move || { + let data_store = NtxDataStore::new( + account, + chain_tip_header, + chain_mmr, + ctx.store.clone(), + ctx.script_cache.clone(), + ctx.db.clone(), + ); + tokio::runtime::Handle::current().block_on(async { + let (successful_notes, failed_notes) = + ctx.filter_notes(&data_store, notes).await?; + let executed_tx = + Box::pin(ctx.execute(&data_store, successful_notes)).await?; + let scripts_to_cache = data_store.take_fetched_scripts(); + Ok::<_, NtxError>((executed_tx, failed_notes, scripts_to_cache)) + }) + }) + .await + .unwrap_or_else(|e| std::panic::resume_unwind(e.into_panic()))?; // Prove transaction. let tx_inputs: TransactionInputs = executed_tx.into(); From 6f5535a92125e5a71b285c548565e06f3f942220 Mon Sep 17 00:00:00 2001 From: Ollie202 Date: Tue, 5 May 2026 15:52:09 +0100 Subject: [PATCH 05/10] fix(ntx-builder): capture Handle::current() outside spawn_blocking Move the tokio runtime handle capture to before the spawn_blocking closure so it is explicit that the same runtime is used for execution. --- crates/ntx-builder/src/actor/execute.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/ntx-builder/src/actor/execute.rs b/crates/ntx-builder/src/actor/execute.rs index c13943a479..f001883dcd 100644 --- a/crates/ntx-builder/src/actor/execute.rs +++ b/crates/ntx-builder/src/actor/execute.rs @@ -201,6 +201,7 @@ impl NtxContext { // using the parent runtime handle so that async data-store callbacks (gRPC // calls to the store) are driven by the existing I/O driver. let ctx = self.clone(); + let handle = tokio::runtime::Handle::current(); let (executed_tx, failed_notes, scripts_to_cache) = tokio::task::spawn_blocking(move || { let data_store = NtxDataStore::new( @@ -211,7 +212,7 @@ impl NtxContext { ctx.script_cache.clone(), ctx.db.clone(), ); - tokio::runtime::Handle::current().block_on(async { + handle.block_on(async { let (successful_notes, failed_notes) = ctx.filter_notes(&data_store, notes).await?; let executed_tx = From 67cfc66a6230d396cb7d9af444921cacfbf7fd6d Mon Sep 17 00:00:00 2001 From: Ollie202 Date: Tue, 5 May 2026 19:23:08 +0100 Subject: [PATCH 06/10] style: wrap long execute_transaction call to satisfy rustfmt --- crates/validator/src/tx_validation/mod.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/crates/validator/src/tx_validation/mod.rs b/crates/validator/src/tx_validation/mod.rs index f0707ddd71..d33c0934a0 100644 --- a/crates/validator/src/tx_validation/mod.rs +++ b/crates/validator/src/tx_validation/mod.rs @@ -63,7 +63,12 @@ pub async fn validate_transaction( .expect("failed to build tokio runtime") .block_on( executor - .execute_transaction(account.id(), block_header.block_num(), input_notes, tx_args) + .execute_transaction( + account.id(), + block_header.block_num(), + input_notes, + tx_args, + ) .instrument(execute_span), ) }) From 49587f281c1fda104cbf79756302235637135a38 Mon Sep 17 00:00:00 2001 From: Ollie202 Date: Tue, 5 May 2026 19:44:09 +0100 Subject: [PATCH 07/10] chore: add changelog entry for PR #2041 --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d2031391a..59a710e028 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## v0.14.11 (TBD) +- Replaced blocking-in-async operations in the validator, remote prover, and ntx-builder with `spawn_blocking` to avoid starving the Tokio runtime ([#2041](https://github.com/0xMiden/node/pull/2041)). - Implement persistent RocksDB backend for `AccountStateForest`, improving startup time ([#2020](https://github.com/0xMiden/node/pull/2020)). ## v0.14.10 (2026-05-29) From 73d6c8305d7be9fc965b88f9453d5cead55ac942 Mon Sep 17 00:00:00 2001 From: Mirko <48352201+Mirko-von-Leipzig@users.noreply.github.com> Date: Wed, 6 May 2026 11:28:40 +0200 Subject: [PATCH 08/10] FIxup error context --- bin/remote-prover/src/server/prover.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/remote-prover/src/server/prover.rs b/bin/remote-prover/src/server/prover.rs index f9287064a6..c51bd639be 100644 --- a/bin/remote-prover/src/server/prover.rs +++ b/bin/remote-prover/src/server/prover.rs @@ -119,7 +119,7 @@ impl ProveRequest for LocalBatchProver { .map_err(|e| tonic::Status::internal(e.as_report_context("failed to prove batch"))) }) .await - .map_err(|e| tonic::Status::internal(format!("batch prover task panicked: {e}")))? + .map_err(|e| tonic::Status::internal(e.as_report("batch prover task panicked")))? } } From 13a170c8aa55bf9ab0e80a994125bf1fc2c04ed2 Mon Sep 17 00:00:00 2001 From: Mirko <48352201+Mirko-von-Leipzig@users.noreply.github.com> Date: Wed, 6 May 2026 11:28:57 +0200 Subject: [PATCH 09/10] FIxup error context --- bin/remote-prover/src/server/prover.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/remote-prover/src/server/prover.rs b/bin/remote-prover/src/server/prover.rs index c51bd639be..035480c0ac 100644 --- a/bin/remote-prover/src/server/prover.rs +++ b/bin/remote-prover/src/server/prover.rs @@ -137,6 +137,6 @@ impl ProveRequest for LocalBlockProver { .map_err(|e| tonic::Status::internal(e.as_report_context("failed to prove block"))) }) .await - .map_err(|e| tonic::Status::internal(format!("block prover task panicked: {e}")))? + .map_err(|e| tonic::Status::internal(e.as_report("block prover task panicked")))? } } From 09fdac94c7f76c47a1f34821d407d84f9b88c2cf Mon Sep 17 00:00:00 2001 From: Mirko <48352201+Mirko-von-Leipzig@users.noreply.github.com> Date: Wed, 6 May 2026 11:31:54 +0200 Subject: [PATCH 10/10] Fixup --- bin/remote-prover/src/server/prover.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/remote-prover/src/server/prover.rs b/bin/remote-prover/src/server/prover.rs index 035480c0ac..f8c552138f 100644 --- a/bin/remote-prover/src/server/prover.rs +++ b/bin/remote-prover/src/server/prover.rs @@ -119,7 +119,7 @@ impl ProveRequest for LocalBatchProver { .map_err(|e| tonic::Status::internal(e.as_report_context("failed to prove batch"))) }) .await - .map_err(|e| tonic::Status::internal(e.as_report("batch prover task panicked")))? + .map_err(|e| tonic::Status::internal(e.as_report_context("batch prover task panicked")))? } } @@ -137,6 +137,6 @@ impl ProveRequest for LocalBlockProver { .map_err(|e| tonic::Status::internal(e.as_report_context("failed to prove block"))) }) .await - .map_err(|e| tonic::Status::internal(e.as_report("block prover task panicked")))? + .map_err(|e| tonic::Status::internal(e.as_report_context("block prover task panicked")))? } }