diff --git a/src/openhuman/inference/ops.rs b/src/openhuman/inference/ops.rs index 6c7ab7205..bb882b9cd 100644 --- a/src/openhuman/inference/ops.rs +++ b/src/openhuman/inference/ops.rs @@ -29,6 +29,31 @@ fn is_unknown_provider_user_config(err: &str) -> bool { err.contains("no cloud provider with id or slug") } +/// Returns `true` when the error from a provider chat attempt is a known, +/// expected user-state or provider-state condition that already has its own +/// Sentry report (or is deterministically expected and has no remediation +/// path): +/// +/// - **401 Unauthorized** — API key revoked / wrong key. Already reported by +/// the provider layer's `api_error` path. An ops-level duplicate adds noise +/// with no additional context. +/// - **429 Too Many Requests / rate-limit** — Quota exhaustion. Already +/// covered by the `is_upstream_rate_limit_message` classifier in +/// `expected_error_kind`; the reliable-provider layer retries with +/// backoff before propagating. +/// - **Model not found** — User selected a model that doesn't exist for +/// their key. The provider layer already classifies this as a config +/// rejection (TAURI-RUST-68, ~1309 events). +/// +/// The matcher is intentionally broad so the ops-level wrapper stays out +/// of the Sentry funnel for all provider-state failures — the underlying +/// call site (`compatible.rs` / `report_error_or_expected`) is already +/// responsible for the authoritative report. Unclassified failures (5xx, +/// unexpected payloads, network errors) are NOT matched and still escalate. +fn is_expected_chat_failure(err: &str) -> bool { + crate::core::observability::expected_error_kind(err).is_some() +} + #[derive(Debug, Clone, serde::Serialize)] pub struct InferenceTestProviderModelResult { pub reply: String, @@ -179,7 +204,22 @@ pub async fn inference_test_provider_model( output_len = outcome.value.reply.len(), "{LOG_PREFIX} test_provider_model:ok" ), - Err(err) => error!(error = %err, "{LOG_PREFIX} test_provider_model:error"), + Err(err) => { + if is_expected_chat_failure(err) { + // Provider-state / user-config failure (401, 429, model not + // found, API key missing, etc.). The underlying provider + // layer already emitted its own Sentry event or classified + // this as expected. An ops-level duplicate adds noise. + // Targets TAURI-RUST-68 (~1,309 events). + warn!( + provider, + error = %err, + "{LOG_PREFIX} test_provider_model:expected-error (no Sentry)" + ); + } else { + error!(error = %err, "{LOG_PREFIX} test_provider_model:error"); + } + } } result } diff --git a/src/openhuman/inference/ops_tests.rs b/src/openhuman/inference/ops_tests.rs index f3b4235b8..90ca5440f 100644 --- a/src/openhuman/inference/ops_tests.rs +++ b/src/openhuman/inference/ops_tests.rs @@ -316,3 +316,74 @@ fn is_unknown_provider_user_config_rejects_other_list_models_failures() { ); } } + +// ── is_expected_chat_failure (TAURI-RUST-68) ───────────────────────────── +// +// `inference_test_provider_model` calls `simple_chat` which can fail with +// known provider-state or user-config conditions (401, 429, model not +// found). Before this fix every failure escalated to `error!`, which +// sentry-tracing shipped to Sentry as `"[inference::ops] +// test_provider_model:error"` — 1,309 events — while the same underlying +// errors already had their own report or were classified as expected by +// `expected_error_kind`. The gate demotes them to `warn!` so they stay in +// local logs but don't generate duplicate Sentry noise. +// +// Anchored on the shared `expected_error_kind` classifier so both the unit +// tests here and the production gate stay in sync with the central +// suppression logic in `core::observability`. + +#[test] +fn is_expected_chat_failure_matches_api_key_missing() { + // Provider layer emits this phrase when no API key is configured. + assert!(is_expected_chat_failure("api key not set for openai")); + assert!(is_expected_chat_failure( + "missing api key: openai_api_key is not configured" + )); +} + +#[test] +fn is_expected_chat_failure_matches_rate_limit() { + // 429-style rate-limit phrases emitted by the provider / OpenHuman backend. + assert!(is_expected_chat_failure( + "openai API error (429 Too Many Requests): You exceeded your current quota" + )); + assert!(is_expected_chat_failure( + "openai API error (500): 429 rate limit exceeded" + )); +} + +#[test] +fn is_expected_chat_failure_matches_provider_config_rejection() { + // OpenAI-style model-not-found code in error body. + assert!(is_expected_chat_failure( + r#"custom_openai API error (404 Not Found): {"error":{"message":"The model does not exist or you do not have access","code":"model_not_found"}}"# + )); + // Temperature-unsupported model (e.g. o1/o3/o4 reasoning models). + assert!(is_expected_chat_failure( + "custom_openai API error (400 Bad Request): invalid temperature: only 1 is allowed" + )); + // litellm-style not_found_error envelope. + assert!(is_expected_chat_failure( + r#"custom_openai API error (404 Not Found): {"error":{"message":"model 'gpt-99' not found","type":"not_found_error"}}"# + )); +} + +#[test] +fn is_expected_chat_failure_does_not_match_real_errors() { + // Real errors that must still reach Sentry must NOT be demoted. + for raw in [ + // Genuine 500 server error — actionable, must escalate + "openai API error (500 Internal Server Error): Something went wrong", + // Unexpected JSON from provider — potential provider bug + "openai API returned an unexpected chat-completions payload: missing field", + // Local I/O error — real infrastructure problem + "failed to open config file: permission denied", + // Completely empty string — fallthrough + "", + ] { + assert!( + !is_expected_chat_failure(raw), + "must NOT demote real error: {raw:?}" + ); + } +}