From a1eccf99ea0d9aae04cd91ffa69ac3bd2a50719c Mon Sep 17 00:00:00 2001 From: shanu Date: Thu, 14 May 2026 14:42:39 +0530 Subject: [PATCH 1/3] fix: pre-CEF single-instance mutex guard on Windows + provider retry for 502s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two independent production fixes: 1. Windows CEF init race (Sentry OPENHUMAN-TAURI-A, 598 events): `tauri_plugin_single_instance` detects duplicate launches inside `.setup()`, which runs AFTER `Builder::build()` triggers `CefRuntime::init` → `cef::initialize()`. On a second launch, `cef::initialize()` returns 0 (primary holds the CEF cache lock) and the vendored runtime asserts `result == 1`, panicking with `assertion left == right failed left: 0 right: 1` (fatal, Windows-only). Added a `#[cfg(windows)]` pre-build named Win32 mutex guard (`com.openhuman.app-cef-init`) at the top of `run()`, mirroring the macOS `cef_preflight::check_default_cache()` pattern. Secondary instances now exit cleanly before touching CEF. Added `Win32_System_Threading` feature to `windows-sys` accordingly. 2. Agent 502 surfacing as fatal (Sentry agent.run_single failed): `create_intelligent_routing_provider` wrapped the backend in a raw `OpenAiCompatibleProvider` with no retry logic. A single transient 502 from the backend bypassed `ReliableProvider` entirely and propagated as a fatal error to `run_single`. Now wraps the raw provider in `ReliableProvider` (same `reliability.provider_retries` / `provider_backoff_ms` config as all other provider paths). --- app/src-tauri/Cargo.toml | 4 +++ app/src-tauri/src/lib.rs | 60 ++++++++++++++++++++++++++++++++++ src/openhuman/providers/ops.rs | 29 +++++++++++++--- 3 files changed, 88 insertions(+), 5 deletions(-) diff --git a/app/src-tauri/Cargo.toml b/app/src-tauri/Cargo.toml index 2746b5a352..ee5e81b853 100644 --- a/app/src-tauri/Cargo.toml +++ b/app/src-tauri/Cargo.toml @@ -160,6 +160,10 @@ windows-sys = { version = "0.59", features = [ # cef::Window internal handle, not the visible Chrome_WidgetWin_1 # top-level frame, so we walk the OS window list ourselves (#1607). "Win32_UI_WindowsAndMessaging", + # CreateMutexW / CloseHandle — used by the pre-CEF single-instance guard + # (see run() in lib.rs) that detects a second launch before CefRuntime::init + # fires (Sentry OPENHUMAN-TAURI-A). + "Win32_System_Threading", ] } [features] diff --git a/app/src-tauri/src/lib.rs b/app/src-tauri/src/lib.rs index 8de76f7788..dcdf14b987 100644 --- a/app/src-tauri/src/lib.rs +++ b/app/src-tauri/src/lib.rs @@ -1404,6 +1404,66 @@ pub fn run() { // Install the ring provider once before any HTTPS client is built. let _ = rustls::crypto::ring::default_provider().install_default(); + // ── Windows pre-CEF single-instance guard (Sentry OPENHUMAN-TAURI-A) ── + // + // `tauri_plugin_single_instance` detects a second launch inside its + // `.setup()` hook — but `.setup()` runs AFTER `Builder::build()` which + // calls `CefRuntime::init` → `cef::initialize()`. On a second launch, + // `cef::initialize()` returns 0 because the primary holds the CEF + // cache lock; the vendored runtime asserts `result == 1` and panics + // (left: 0, right: 1, fatal, Windows-only, 598 events). + // + // Fix: acquire a named Win32 mutex at the very top of `run()` — before + // any CEF or builder work — so any secondary instance sees + // `ERROR_ALREADY_EXISTS` and exits immediately. The mutex name uses + // a `-cef-init` suffix distinct from the plugin's own `-sim` mutex so + // the two guards don't interfere; the plugin still handles WM_COPYDATA + // forwarding for graceful "focus primary" behaviour once the app is + // fully initialised. + // + // The RAII guard holds the mutex handle for the lifetime of `run()`. + // Windows releases all process handles automatically on exit, so + // explicit cleanup is only needed if `run()` returns normally. + #[cfg(windows)] + let _cef_init_mutex_guard = { + use windows_sys::Win32::Foundation::{CloseHandle, ERROR_ALREADY_EXISTS, GetLastError}; + use windows_sys::Win32::System::Threading::CreateMutexW; + + // Must match the bundle identifier in tauri.conf.json. + // Changing the app identifier requires updating this string too. + let mutex_name: Vec = "com.openhuman.app-cef-init\0" + .encode_utf16() + .collect(); + + // SAFETY: mutex_name is null-terminated UTF-16; handle is checked below. + let handle = unsafe { CreateMutexW(std::ptr::null(), 0, mutex_name.as_ptr()) }; + + if unsafe { GetLastError() } == ERROR_ALREADY_EXISTS { + // Another instance is already past this point — exit before we + // touch CEF at all. The plugin's WM_COPYDATA path won't run + // here (it needs an AppHandle from setup()), but the primary + // is already showing its window so the user experience is fine. + if !handle.is_null() { + unsafe { CloseHandle(handle) }; + } + log::info!( + "[single-instance] pre-CEF mutex held by primary; secondary exiting (OPENHUMAN-TAURI-A fix)" + ); + std::process::exit(0); + } + + // Primary: hold the handle until run() returns. + struct OwnedMutex(isize); + impl Drop for OwnedMutex { + fn drop(&mut self) { + if self.0 != 0 { + unsafe { CloseHandle(self.0 as _) }; + } + } + } + OwnedMutex(handle as isize) + }; + // CEF cache-lock preflight (macOS only): if another OpenHuman instance // is already holding the CEF user-data-dir, the vendored // `tauri-runtime-cef` panics inside `cef::initialize` with a Rust diff --git a/src/openhuman/providers/ops.rs b/src/openhuman/providers/ops.rs index ec62b44243..fd314f0a17 100644 --- a/src/openhuman/providers/ops.rs +++ b/src/openhuman/providers/ops.rs @@ -404,15 +404,34 @@ pub fn create_intelligent_routing_provider( config: &crate::openhuman::config::Config, options: &ProviderRuntimeOptions, ) -> anyhow::Result> { - let backend = create_backend_inference_provider(inference_url, backend_url, api_key, options)?; + let raw_backend = + create_backend_inference_provider(inference_url, backend_url, api_key, options)?; + // Wrap the raw backend in ReliableProvider so transient 502/503/504 errors + // are retried before propagating to the agent turn. Without this, a single + // 502 from the backend bypasses the retry layer entirely and surfaces as a + // fatal `run_single` failure. + log::debug!( + "[providers] initialising reliable wrapper: retries={} backoff_ms={} fallbacks={}", + config.reliability.provider_retries, + config.reliability.provider_backoff_ms, + config.reliability.model_fallbacks.len() + ); + let reliable_backend: Box = Box::new( + reliable::ReliableProvider::new( + vec![(INFERENCE_BACKEND_ID.to_string(), raw_backend)], + config.reliability.provider_retries, + config.reliability.provider_backoff_ms, + ) + .with_model_fallbacks(config.reliability.model_fallbacks.clone()), + ); let default_model = config .default_model .as_deref() .unwrap_or(crate::openhuman::config::DEFAULT_MODEL); // When the user has configured `model_routes` (custom provider via - // BackendProviderPanel), wrap the remote in a RouterProvider so abstract - // tier names like `reasoning-v1` get translated to the configured + // BackendProviderPanel), wrap the reliable remote in a RouterProvider so + // abstract tier names like `reasoning-v1` get translated to the configured // provider-specific model id (e.g. `gpt-5.5`) BEFORE the request leaves // the host. Without this step the abstract tier name would reach // `custom_openai` and 404. The OpenHuman backend can dispatch tier names @@ -424,10 +443,10 @@ pub fn create_intelligent_routing_provider( inference_url.is_some() ); let remote: Box = if config.model_routes.is_empty() { - backend + reliable_backend } else { let providers: Vec<(String, Box)> = - vec![(INFERENCE_BACKEND_ID.to_string(), backend)]; + vec![(INFERENCE_BACKEND_ID.to_string(), reliable_backend)]; let routes: Vec<(String, router::Route)> = config .model_routes .iter() From 5c68f15c743293eaa7dfcd6e75ead7983a25b9b4 Mon Sep 17 00:00:00 2001 From: shanu Date: Thu, 14 May 2026 14:43:28 +0530 Subject: [PATCH 2/3] chore: cargo fmt fixes for pre-CEF mutex guard --- app/src-tauri/src/lib.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/app/src-tauri/src/lib.rs b/app/src-tauri/src/lib.rs index dcdf14b987..e68f5dd4b3 100644 --- a/app/src-tauri/src/lib.rs +++ b/app/src-tauri/src/lib.rs @@ -1426,14 +1426,12 @@ pub fn run() { // explicit cleanup is only needed if `run()` returns normally. #[cfg(windows)] let _cef_init_mutex_guard = { - use windows_sys::Win32::Foundation::{CloseHandle, ERROR_ALREADY_EXISTS, GetLastError}; + use windows_sys::Win32::Foundation::{CloseHandle, GetLastError, ERROR_ALREADY_EXISTS}; use windows_sys::Win32::System::Threading::CreateMutexW; // Must match the bundle identifier in tauri.conf.json. // Changing the app identifier requires updating this string too. - let mutex_name: Vec = "com.openhuman.app-cef-init\0" - .encode_utf16() - .collect(); + let mutex_name: Vec = "com.openhuman.app-cef-init\0".encode_utf16().collect(); // SAFETY: mutex_name is null-terminated UTF-16; handle is checked below. let handle = unsafe { CreateMutexW(std::ptr::null(), 0, mutex_name.as_ptr()) }; From ff2457c53c75dc687a15142c8c48d98a33cdfa62 Mon Sep 17 00:00:00 2001 From: shanu Date: Thu, 14 May 2026 15:34:32 +0530 Subject: [PATCH 3/3] fix(windows): add Win32_Security feature for CreateMutexW in windows-sys 0.59 CreateMutexW's SECURITY_ATTRIBUTES parameter is individually gated behind the Win32_Security feature in windows-sys 0.59 in addition to the module-level Win32_System_Threading gate. Without it the Windows E2E build fails with "no `CreateMutexW` in `Win32::System::Threading`". --- app/src-tauri/Cargo.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/app/src-tauri/Cargo.toml b/app/src-tauri/Cargo.toml index ee5e81b853..7b8bf2d0ec 100644 --- a/app/src-tauri/Cargo.toml +++ b/app/src-tauri/Cargo.toml @@ -163,7 +163,10 @@ windows-sys = { version = "0.59", features = [ # CreateMutexW / CloseHandle — used by the pre-CEF single-instance guard # (see run() in lib.rs) that detects a second launch before CefRuntime::init # fires (Sentry OPENHUMAN-TAURI-A). + # Win32_Security is required because CreateMutexW's SECURITY_ATTRIBUTES + # parameter is gated behind it in windows-sys 0.59. "Win32_System_Threading", + "Win32_Security", ] } [features]