From 74f36de5c5cd192c4387453063d2cc6071b24349 Mon Sep 17 00:00:00 2001 From: oxoxDev Date: Wed, 20 May 2026 13:50:53 +0530 Subject: [PATCH 1/2] fix(observability): demote 11 custom-provider config-rejection shapes (Wave 4 Lane O) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend `is_provider_config_rejection_message` PHRASES with 8 new substrings covering wire shapes that the Wave 1-3 phrases miss: - `not available in your region` — R1 (region block) - `modelnotallowed` — R4 (Doubao/ChatGLM allowlist) - `invalid_authentication_error` — YC (user key rejected by upstream) - `requires more credits` — S5 (OpenRouter 402 out-of-credits) - `invalid model name passed in model=` — Y0 (litellm proxy pre-routing reject) - `no active credentials for provider` — JN + KB (upstream API key gap) - `litellm.badrequesterror` — JK (github_copilot OAuth gap) - `not_found_error` — J2 + J5 + J4 (litellm envelope `type`) Each is a deterministic user-state error (wrong model, wrong region, bad key, out of credits, missing OAuth scope) — the reliable-provider stack already falls back to OpenHuman's hosted tier, so the UX is intact; only the Sentry spam was leaking. Closes ~250 events across 11 issue IDs. Pinned tests against the literal Sentry event bodies from each ID so a future provider rename doesn't silently un-classify them. Closes OPENHUMAN-TAURI-R1 Closes OPENHUMAN-TAURI-R4 Closes OPENHUMAN-TAURI-YC Closes OPENHUMAN-TAURI-S5 Closes OPENHUMAN-TAURI-Y0 Closes OPENHUMAN-TAURI-JN Closes OPENHUMAN-TAURI-KB Closes OPENHUMAN-TAURI-JK Closes OPENHUMAN-TAURI-J2 Closes OPENHUMAN-TAURI-J5 Closes OPENHUMAN-TAURI-J4 --- .../inference/provider/config_rejection.rs | 133 +++++++++++++++++- 1 file changed, 127 insertions(+), 6 deletions(-) diff --git a/src/openhuman/inference/provider/config_rejection.rs b/src/openhuman/inference/provider/config_rejection.rs index 542ef8723b..c64eaefa4c 100644 --- a/src/openhuman/inference/provider/config_rejection.rs +++ b/src/openhuman/inference/provider/config_rejection.rs @@ -15,15 +15,32 @@ //! Moonshot Kimi K2) //! - `"The model \`gpt-5.5\` does not exist or you do not have access to //! it."` / `"model_not_found"` (stale model pin) +//! - `"This model is not available in your region."` (R1 — region-blocked +//! model on a custom cloud provider) +//! - `"ModelNotAllowed"` (R4 — Doubao/ChatGLM model-allowlist enforcement) +//! - `"invalid_authentication_error"` (YC — user pasted a malformed / +//! revoked API key into the provider config) +//! - `"This request requires more credits"` (S5 — OpenRouter `402` when +//! the user's account is out of credits) +//! - `"Invalid model name passed in model="` (Y0 — litellm-style proxy +//! rejecting a model id pre-routing) +//! - `"No active credentials for provider:"` (JN / KB — user hasn't +//! plugged in their API key for the selected provider yet) +//! - `"litellm.BadRequestError"` (JK — litellm github_copilot proxy 400 +//! from a user OAuth/scope gap) +//! - `"not_found_error"` (J2 / J5 / J4 — litellm-compatible envelope +//! `type` field carrying "model 'X' not found") //! //! These are **deterministic user-configuration state**, not bugs the //! maintainers can act on: the user pointed OpenHuman at a custom -//! provider with a model / temperature that provider does not accept. The -//! remediation is "fix the model or routing in Settings", which the UI -//! surfaces. Yet every agent turn produces a fresh Sentry event -//! (OPENHUMAN-TAURI-WJ / -QW / -HB / -NH — 88 + 146 + 39 events). This is -//! the same class as budget-exhaustion ([`super::billing_error`]) and -//! must be demoted from Sentry to an info log the same way. +//! provider with a model / temperature / region / credential that +//! provider does not accept. The remediation is "fix the model, key, or +//! routing in Settings", which the UI surfaces. Yet every agent turn +//! produces a fresh Sentry event (OPENHUMAN-TAURI-WJ / -QW / -HB / -NH / +//! -R1 / -R4 / -YC / -S5 / -Y0 / -JN / -KB / -JK / -J2 / -J5 / -J4 — +//! ~250 additional events on top of the Wave 1-3 IDs). This is the +//! same class as budget-exhaustion ([`super::billing_error`]) and must +//! be demoted from Sentry to an info log the same way. //! //! ## Provider-aware polarity (important) //! @@ -72,6 +89,53 @@ pub fn is_provider_config_rejection_message(body: &str) -> bool { // Our own actionable error once a proper tier→model resolution // is in place (keeps this classifier stable across that fix). "is an abstract tier", + // OPENHUMAN-TAURI-R1 — custom_openai upstream 403 with body + // `{"error":{"message":"This model is not available in your region.","code":403}}`. + // User picked a model the provider blocks for their account's + // region. Sentry has no remediation; user must switch model. + "not available in your region", + // OPENHUMAN-TAURI-R4 — Doubao / ChatGLM-style model allowlist + // enforcement. Body: `{"reason":"ModelNotAllowed",...}`. Match + // lowercased — the provider sends the camelCase token as a + // sentinel `reason` value. + "modelnotallowed", + // OPENHUMAN-TAURI-YC — user-supplied custom_openai API key was + // rejected by upstream with the OpenAI-compatible + // `{"error":{"type":"invalid_authentication_error",...}}` + // envelope. Anchored on the type token (stable across providers + // that emit this OpenAI-compatible body). + "invalid_authentication_error", + // OPENHUMAN-TAURI-S5 — OpenRouter 402 when the user is out of + // credits. Body always carries "requires more credits, or fewer + // max_tokens"; pin to the unique-enough credits phrase. (The + // separate `billing_error` classifier handles our own + // OpenHuman-backend balance gate; this catches the third-party + // OpenRouter shape that re-emits via `agent.run_single`.) + "requires more credits", + // OPENHUMAN-TAURI-Y0 — litellm-style proxy rejected the model + // id pre-routing with `Invalid model name passed in model=…`. + // Anchored on the `passed in model=` suffix so a stray "invalid + // model name" log line elsewhere does not classify. + "invalid model name passed in model=", + // OPENHUMAN-TAURI-JN / -KB — custom provider proxy that fronts + // multiple upstream APIs surfaces a "you haven't configured the + // upstream provider yet" 401/404 as `{"error":{"message":"No + // active credentials for provider: openai",...}}`. The + // remediation is "add the upstream API key in Settings". + "no active credentials for provider", + // OPENHUMAN-TAURI-JK — litellm github_copilot proxy 400 driven + // by the user's missing / expired Copilot OAuth scope. The body + // always starts with the `litellm.BadRequestError:` envelope. + // Anchor to that prefix-shaped substring so we don't catch + // unrelated 400s that merely mention litellm in passing. + "litellm.badrequesterror", + // OPENHUMAN-TAURI-J2 / -J5 / -J4 — litellm-compatible + // envelope with `"type":"not_found_error"` carrying "model 'X' + // not found". Distinct from the existing `model_not_found` + // phrase: that's the `code` field used by OpenAI-native bodies; + // this is the `type` field used by litellm/Anthropic-style + // envelopes for the same class of user-state error. + "not_found_error", ]; let lower = body.to_ascii_lowercase(); @@ -102,6 +166,63 @@ mod tests { } } + #[test] + fn detects_wave4_sentry_bodies() { + // Real wire bodies pulled from the OPENHUMAN-TAURI-* Sentry + // events the Wave 4 phrases drop. + for (sentry_id, body) in [ + ( + "R1", + r#"custom_openai API error (403 Forbidden): {"error":{"message":"This model is not available in your region.","code":403}}"#, + ), + ( + "R4", + r#"custom_openai API error (403 Forbidden): {"code":403,"reason":"ModelNotAllowed","message":"模型不允许访问","metadata":{"request_id":"2026051706431574423265420620337"}}"#, + ), + ( + "YC", + r#"custom_openai API error (401 Unauthorized): {"error":{"message":"Invalid Authentication","type":"invalid_authentication_error"}}"#, + ), + ( + "S5", + r#"custom_openai API error (402 Payment Required): {"error":{"message":"This request requires more credits, or fewer max_tokens. You requested up to 65536 tokens, but can only afford 597.","type":"insufficient_credits"}}"#, + ), + ( + "Y0", + r#"custom_openai API error (400 Bad Request): {"error":{"message":"{'error': '/chat/completions: Invalid model name passed in model=reasoning-v1. Call `/v1/models` to view available models for your key.'}","type":"None"}}"#, + ), + ( + "JN", + r#"custom_openai Responses API error: {"error":{"message":"No active credentials for provider: openai","type":"invalid_request_error","code":"model_not_found"}}"#, + ), + ( + "KB", + r#"OpenHuman API error (404 Not Found): {"error":{"message":"No active credentials for provider: openai","type":"invalid_request_error","code":"model_not_found"}}"#, + ), + ( + "JK", + r#"custom_openai API error (400 Bad Request): {"error":{"message":"litellm.BadRequestError: Github_copilotException - Bad Request. Received Model Group=github_copilot/claude-haiku-4.5\nAvailable Model Group Fallbacks=None","type":null}}"#, + ), + ( + "J2", + r#"custom_openai Responses API error: {"error":{"message":"model 'llama3.3' not found","type":"not_found_error","param":null,"code":null}}"#, + ), + ( + "J5", + r#"custom_openai API error (404 Not Found): {"error":{"message":"model 'llama3.3' not found","type":"not_found_error","param":null,"code":null}}"#, + ), + ( + "J4", + r#"custom_openai streaming API error (404 Not Found): {"error":{"message":"model 'llama3.3' not found","type":"not_found_error","param":null,"code":null}}"#, + ), + ] { + assert!( + is_provider_config_rejection_message(body), + "OPENHUMAN-TAURI-{sentry_id} body must classify as provider config-rejection: {body:?}" + ); + } + } + #[test] fn detection_is_case_insensitive() { assert!(is_provider_config_rejection_message( From e16414ac6a73659c24e2994819345695602e70c6 Mon Sep 17 00:00:00 2001 From: oxoxDev Date: Wed, 20 May 2026 13:51:05 +0530 Subject: [PATCH 2/2] fix(observability): classify socket transport wire-shape variants (Wave 4 Lane N) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend `is_network_unreachable_message` with three substring arms for wire shapes the existing `dns error` / status-bearing matchers miss: - `failed to lookup address` — libc `getaddrinfo()` rendering when tungstenite wraps the resolver fail as an `IO error` without the `dns error` prefix (OPENHUMAN-TAURI-44 ~50 events). - `nodename nor servname` — companion phrase from the macOS/BSD libc resolver — same OPENHUMAN-TAURI-44 wire shape, second anchor. - `http error: 200 ok` — tungstenite's `WsError::Http(200)` rendering when a captive portal / corporate proxy intercepts the WS upgrade handshake and returns a plain HTML 200 page (OPENHUMAN-TAURI-4P ~66 events). Tungstenite-only — reqwest renders HTTP 200 as `HTTP status server error (200)` so there is no collision with the regular HTTP path. A precedence test (`http_200_classifier_does_not_silence_unrelated_log_lines`) pins the substring against benign `HTTP/1.1 200 OK` / `status: 200 OK` prose so a future broadening does not silence success traces. Sentry has no remediation path for any of these — the user must change their network (firewall / proxy / DNS). Closes ~116 additional events. Closes OPENHUMAN-TAURI-44 Closes OPENHUMAN-TAURI-4P --- src/core/observability.rs | 64 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/src/core/observability.rs b/src/core/observability.rs index 2b285dcd2c..cd74790af7 100644 --- a/src/core/observability.rs +++ b/src/core/observability.rs @@ -296,15 +296,34 @@ fn is_loopback_unavailable(lower: &str) -> bool { /// through [`is_loopback_unavailable`] *before* this matcher so the /// boot-window race against the embedded core keeps its own bucket — see /// the precedence comment in [`expected_error_kind`]. +/// +/// Three additional substrings cover wire-shape variants observed in +/// Wave 4 that the original `"dns error"` / status-code matchers miss: +/// +/// - `"failed to lookup address"` / `"nodename nor servname"` — +/// `getaddrinfo()` failure renderings on macOS / BSD libc and POSIX +/// resolvers (`OPENHUMAN-TAURI-44` ~50 events, +/// `[socket] Connection failed: WebSocket connect: IO error: failed to +/// lookup address information: nodename nor servname provided, or not +/// known`). +/// - `"http error: 200 ok"` — tungstenite's `WsError::Http(200)` render +/// when a corporate proxy / captive portal intercepts the WebSocket +/// handshake and returns a plain HTML 200 page (`OPENHUMAN-TAURI-4P` +/// ~66 events). Tungstenite-only — reqwest renders HTTP 200 as +/// `"HTTP status server error (200)"`, so this can't collide with the +/// regular HTTP call path. fn is_network_unreachable_message(lower: &str) -> bool { lower.contains("error sending request for url") || lower.contains("dns error") + || lower.contains("failed to lookup address") + || lower.contains("nodename nor servname") || lower.contains("connection refused") || lower.contains("connection reset") || lower.contains("network is unreachable") || lower.contains("no route to host") || lower.contains("tls handshake") || lower.contains("certificate verify failed") + || lower.contains("http error: 200 ok") } /// Detect transient upstream HTTP failures that have bubbled up out of the @@ -1243,6 +1262,51 @@ mod tests { ); } + #[test] + fn classifies_wave4_socket_transport_wire_shapes() { + // OPENHUMAN-TAURI-44 (~50 events): libc `getaddrinfo()` rendering + // without the `dns error` token, wrapped by the socket emit site. + // The Wave 4 matcher arms catch the literal resolver phrases that + // the original `dns error` substring would miss when reqwest's + // wrapper isn't in the chain (e.g. tungstenite IO errors). + assert_eq!( + expected_error_kind( + "[socket] Connection failed (sustained outage after 5 attempts): \ + WebSocket connect: IO error: failed to lookup address information: \ + nodename nor servname provided, or not known" + ), + Some(ExpectedErrorKind::NetworkUnreachable) + ); + + // OPENHUMAN-TAURI-4P (~66 events): tungstenite renders a captive + // portal / corporate proxy that intercepts the WS handshake as + // `WsError::Http(200)` → `"HTTP error: 200 OK"`. Classify as + // network-unreachable since no amount of app-side retry can pierce + // an intercepting proxy. + assert_eq!( + expected_error_kind( + "[socket] Connection failed (sustained outage after 5 attempts): \ + WebSocket connect: HTTP error: 200 OK" + ), + Some(ExpectedErrorKind::NetworkUnreachable) + ); + } + + #[test] + fn http_200_classifier_does_not_silence_unrelated_log_lines() { + // The captive-portal arm anchors on `"http error: 200 ok"` (the + // exact tungstenite `WsError::Http(200)` Display rendering). + // Adjacent non-WebSocket log lines that mention `"HTTP/1.1 200 OK"` + // or `"status: 200 OK"` MUST NOT classify — those are normal-flow + // success traces, not failure events. Pin this precedence so a + // future refactor doesn't broaden the substring. + assert_eq!(expected_error_kind("HTTP/1.1 200 OK"), None); + assert_eq!( + expected_error_kind("upstream returned status: 200 OK after retry"), + None + ); + } + #[test] fn classifies_transient_upstream_http_errors() { // OPENHUMAN-TAURI-5Z: the canonical shape emitted by