From 4178444a6054bda88b02753eff584f1d6220f7cd Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Sat, 30 May 2026 20:48:53 +0200 Subject: [PATCH] fix: stack overflow when compiling rules if YARA-X uses `musl` (#666) When using musl instead of glibc, the default stack size for threads can be very small (typically 128 KB), which is insufficient for the deep call stacks required by Wasmtime/Cranelift during WebAssembly compilation. To avoid stack overflow crashes, we compile the WebAssembly module in a separate thread with a guaranteed 8 MB stack size. --- lib/Cargo.toml | 2 +- lib/src/compiler/rules.rs | 4 +-- lib/src/wasm/runtime/common.rs | 2 +- lib/src/wasm/runtime/native.rs | 57 +++++++++++++++++++++++++++++++--- 4 files changed, 55 insertions(+), 10 deletions(-) diff --git a/lib/Cargo.toml b/lib/Cargo.toml index a2c5a25bb..cf95e60db 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -104,7 +104,7 @@ parallel-compilation = ["wasmtime/parallel-compilation"] # compiled directly to native host machine instructions at runtime. This yields # maximum execution performance but requires executable memory pages (which may # be restricted in sandbox/secure environments) and is limited to architectures -#natively supported by Cranelift. +# natively supported by Cranelift. # # Enabling the `pulley` feature tells wasmtime to interpret the conditions using # the portable Pulley bytecode instruction virtual machine instead. This diff --git a/lib/src/compiler/rules.rs b/lib/src/compiler/rules.rs index c532169a3..97232d6c0 100644 --- a/lib/src/compiler/rules.rs +++ b/lib/src/compiler/rules.rs @@ -621,9 +621,7 @@ where { let bytes: Option<&[u8]> = Deserialize::deserialize(deserializer)?; let module = if let Some(bytes) = bytes { - unsafe { - wasm::runtime::Module::deserialize(wasm::get_engine(), bytes).ok() - } + wasm::runtime::Module::deserialize(wasm::get_engine(), bytes).ok() } else { None }; diff --git a/lib/src/wasm/runtime/common.rs b/lib/src/wasm/runtime/common.rs index 86279e653..a6b935545 100644 --- a/lib/src/wasm/runtime/common.rs +++ b/lib/src/wasm/runtime/common.rs @@ -793,7 +793,7 @@ impl Module { /// /// Custom runtimes simply rebuild the module from raw WASM bytes, while /// the native runtime preserves Wasmtime's unsafe deserialization API. - pub unsafe fn deserialize(engine: &Engine, bytes: &[u8]) -> Result { + pub fn deserialize(engine: &Engine, bytes: &[u8]) -> Result { Self::from_binary(engine, bytes) } } diff --git a/lib/src/wasm/runtime/native.rs b/lib/src/wasm/runtime/native.rs index a9152c2cb..036505658 100644 --- a/lib/src/wasm/runtime/native.rs +++ b/lib/src/wasm/runtime/native.rs @@ -3,17 +3,64 @@ //! This adapter exists only to normalize a couple of APIs so the rest of the //! crate can talk to native and custom runtimes through the same interface. +use std::mem::transmute; + use crate::errors::SerializationError; use anyhow::anyhow; -use std::mem::transmute; + pub use wasmtime::Caller; + /// Wasmtime types re-exported by the native runtime. pub(crate) use wasmtime::{ AsContext, AsContextMut, Config, Engine, Extern, FuncType, Global, - GlobalType, Instance, Memory, MemoryType, Module, Mutability, OptLevel, - Store, TypedFunc, Val, ValRaw, ValType, + GlobalType, Instance, Memory, MemoryType, Mutability, OptLevel, Store, + TypedFunc, Val, ValRaw, ValType, }; +#[derive(Clone)] +pub(crate) struct Module(wasmtime::Module); + +impl Module { + pub fn from_binary( + engine: &Engine, + binary: &[u8], + ) -> wasmtime::Result { + if cfg!(target_env = "musl") { + // Under musl, the default stack size for threads can be very small + // (typically 128 KB), which is insufficient for the deep call stacks + // required by Wasmtime/Cranelift during WebAssembly compilation. + // To avoid stack overflow crashes, we compile the WebAssembly module + // in a separate thread with a guaranteed 8 MB stack size. + std::thread::scope(|s| { + std::thread::Builder::new() + .name("yara-x-wasm-compiler".to_string()) + .stack_size(8 * 1024 * 1024) // 8MB stack size + .spawn_scoped(s, || { + wasmtime::Module::from_binary(engine, binary) + .map(Module) + }) + .unwrap() + .join() + .unwrap() + }) + } else { + wasmtime::Module::from_binary(engine, binary).map(Module) + } + } + + pub fn deserialize( + engine: &Engine, + bytes: impl AsRef<[u8]>, + ) -> wasmtime::Result { + unsafe { wasmtime::Module::deserialize(engine, bytes).map(Module) } + } + + #[allow(dead_code)] + pub fn serialize(&self) -> wasmtime::Result> { + self.0.serialize() + } +} + /// Thin wrapper around [`wasmtime::Linker`] with a backend-neutral API. pub(crate) struct Linker(wasmtime::Linker); @@ -28,7 +75,7 @@ pub(crate) type TrampolineResult = wasmtime::Result<()>; impl Linker { /// Creates a new linker. - pub fn new(engine: &Engine) -> Self { + pub fn new(engine: &wasmtime::Engine) -> Self { Self(wasmtime::Linker::new(engine)) } @@ -80,7 +127,7 @@ impl Linker { module: &Module, ) -> Result { self.0 - .instantiate(store, module) + .instantiate(store, &module.0) .map_err(|e| SerializationError::InvalidWASM(anyhow!(e))) } }