From eefdb3ee51d0a2189d4bdfd1c6d8cc648db5c459 Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Thu, 28 May 2026 03:08:27 +0530 Subject: [PATCH] fix(observability): classify list_models 404 as ProviderUserState MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `inference/provider/ops.rs::list_models` probes a user-configured custom-provider's `/models` endpoint. When the upstream returns 404 because the user pointed the base URL at something that doesn't host a `/models` listing (wrong base, model-only proxy, typo'd path), the client emits: "provider returned 404: {}" The model-dropdown / connection-test UI already surfaces this failure inline; Sentry has no remediation path. Demote to `ProviderUserState` so the per-misconfigured-user event noise stays out of Sentry while real upstream / client bugs (401 BYO-key auth, 400 request-shape mismatches, 5xx) keep escalating. Anchor: `lower.starts_with("provider returned 404")` — the `"provider returned NNN: "` prefix is emitted ONLY from this one site (verified via grep across `src/`), so the prefix alone is a sufficient anchor. **404 only**: - 401 / 403 → BYO-key auth wall, must stay actionable (#2286 contract preserved). - 400 → typically a client-shape bug worth investigating. - 429 / 5xx → transient / server faults, retried at provider layer. Targets Sentry OPENHUMAN-TAURI-YJ (issue 1207): 4 events between 2026-05-19 and 2026-05-27 across v0.54.0 and v0.56.0, all `domain=rpc method=openhuman.inference_list_models`. Tests pin the verbatim Sentry payload, three additional body-shape variants (FastAPI `{"detail":...}`, bare HTML, post-`truncate_with_ellipsis` clipped body), and a discrimination guard exercising every sibling 4xx / 5xx code from the same emit site to confirm only 404 demotes. --- src/core/observability.rs | 86 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/src/core/observability.rs b/src/core/observability.rs index c68387104..9ffd3775c 100644 --- a/src/core/observability.rs +++ b/src/core/observability.rs @@ -963,6 +963,33 @@ fn is_provider_user_state_message(lower: &str) -> bool { return true; } + // OPENHUMAN-TAURI-YJ: `inference/provider/ops.rs::list_models` probed a + // user-configured custom-provider's `/models` endpoint and the upstream + // server returned 404. Wire shape emitted at `ops.rs:118-122`: + // + // "provider returned 404: {\"error\":\"path \\\"/api/v1/models\\\" not found\"}" + // + // (the trailing body is whatever the upstream server wrote — `{"error":...}`, + // `{"detail":...}`, bare HTML, etc.; we only anchor on the `provider returned + // 404` prefix). The semantic is unambiguous: the user pointed a custom + // OpenAI-compatible provider at a base URL that does not host a `/models` + // listing endpoint (wrong base, model-only proxy, typo'd path). The model + // dropdown already surfaces the failure inline — Sentry has no remediation. + // + // **404 only**. Other 4xx from the same emit site stay actionable: + // - 401 / 403: BYO-key auth wall — actionable misconfiguration; the + // `does_not_classify_byo_key_provider_401_as_session_expired` contract + // (#2286) intentionally keeps these in Sentry. + // - 400: typically request-shape bugs in OUR client; must escalate. + // - 429 / 5xx: transient — handled by other matchers / retry policy. + // + // No `inference/provider/ops.rs::list_models` other than this site emits + // the `provider returned NNN` prefix (verified via grep), so the prefix + // alone is a sufficient anchor. + if lower.starts_with("provider returned 404") { + return true; + } + false } @@ -3422,6 +3449,65 @@ mod tests { ); } + #[test] + fn classifies_list_models_404_as_provider_user_state() { + // OPENHUMAN-TAURI-YJ: `inference/provider/ops.rs::list_models` probed + // a custom-provider's `/models` endpoint and the upstream server + // returned 404 because the base URL is wrong / doesn't host a models + // listing. User-config state — the model-dropdown probe already + // surfaces it inline. Pin the verbatim Sentry payload plus a few + // body-shape variants (different upstreams emit different 404 bodies) + // so the path-agnostic prefix anchor stays the source of truth. + for raw in [ + // Verbatim shape from the Sentry event. + r#"provider returned 404: {"error":"path \"/api/v1/models\" not found"}"#, + // FastAPI-style: `{"detail":"Not Found"}`. + r#"provider returned 404: {"detail":"Not Found"}"#, + // Bare HTML — happens when the user pointed at a non-API origin + // (e.g. the provider's docs site). + "provider returned 404: Not Found", + // After `truncate_with_ellipsis(.., 300)` clips a longer body — + // prefix anchor must still match. + r#"provider returned 404: {"error":{"message":"The requested URL /api/v1/models was not found on this server. Please check the URL or co…"#, + ] { + assert_eq!( + expected_error_kind(raw), + Some(ExpectedErrorKind::ProviderUserState), + "OPENHUMAN-TAURI-YJ list_models 404 must classify as ProviderUserState: {raw}" + ); + } + } + + #[test] + fn does_not_classify_non_404_list_models_failures_as_user_state() { + // Discrimination guard: only the 404 prefix demotes. Sibling 4xx / + // 5xx codes from the same `provider returned NNN:` emit site must + // stay actionable in Sentry — they map to BYO-key auth walls (401 / + // 403), client-shape bugs (400), and transient / server faults + // (429 / 5xx) respectively. Pinning each shape here protects the + // #2286 BYO-key 401 contract and prevents the arm from silently + // widening to all 4xx. + for raw in [ + // BYO-key auth wall — must still escalate (`does_not_classify_byo_key_provider_401_as_session_expired` sibling guard). + r#"provider returned 401: {"error":"Invalid API key"}"#, + r#"provider returned 403: {"error":"Forbidden: API key revoked"}"#, + // Request-shape mismatch — likely a bug in our client. + r#"provider returned 400: {"error":"Bad Request"}"#, + // Transient — caught by retry/backoff at the provider layer, + // does NOT belong in the user-state bucket. + r#"provider returned 429: {"error":"rate_limited"}"#, + r#"provider returned 503: upstream temporarily unavailable"#, + // 500 — a real upstream bug; must reach Sentry. + r#"provider returned 500: {"error":"internal_server_error"}"#, + ] { + assert_ne!( + expected_error_kind(raw), + Some(ExpectedErrorKind::ProviderUserState), + "non-404 list_models failure must NOT demote to ProviderUserState: {raw}" + ); + } + } + #[test] fn classifies_local_ai_binary_missing_errors() { // OPENHUMAN-TAURI-9N: `local_ai_tts` returns this exact string