Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 41 additions & 1 deletion src/openhuman/inference/ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,31 @@ fn is_unknown_provider_user_config(err: &str) -> bool {
err.contains("no cloud provider with id or slug")
}

/// Returns `true` when the error from a provider chat attempt is a known,
/// expected user-state or provider-state condition that already has its own
/// Sentry report (or is deterministically expected and has no remediation
/// path):
///
/// - **401 Unauthorized** — API key revoked / wrong key. Already reported by
/// the provider layer's `api_error` path. An ops-level duplicate adds noise
/// with no additional context.
/// - **429 Too Many Requests / rate-limit** — Quota exhaustion. Already
/// covered by the `is_upstream_rate_limit_message` classifier in
/// `expected_error_kind`; the reliable-provider layer retries with
/// backoff before propagating.
/// - **Model not found** — User selected a model that doesn't exist for
/// their key. The provider layer already classifies this as a config
/// rejection (TAURI-RUST-68, ~1309 events).
///
/// The matcher is intentionally broad so the ops-level wrapper stays out
/// of the Sentry funnel for all provider-state failures — the underlying
/// call site (`compatible.rs` / `report_error_or_expected`) is already
/// responsible for the authoritative report. Unclassified failures (5xx,
/// unexpected payloads, network errors) are NOT matched and still escalate.
fn is_expected_chat_failure(err: &str) -> bool {
crate::core::observability::expected_error_kind(err).is_some()
}

#[derive(Debug, Clone, serde::Serialize)]
pub struct InferenceTestProviderModelResult {
pub reply: String,
Expand Down Expand Up @@ -179,7 +204,22 @@ pub async fn inference_test_provider_model(
output_len = outcome.value.reply.len(),
"{LOG_PREFIX} test_provider_model:ok"
),
Err(err) => error!(error = %err, "{LOG_PREFIX} test_provider_model:error"),
Err(err) => {
if is_expected_chat_failure(err) {
// Provider-state / user-config failure (401, 429, model not
// found, API key missing, etc.). The underlying provider
// layer already emitted its own Sentry event or classified
// this as expected. An ops-level duplicate adds noise.
// Targets TAURI-RUST-68 (~1,309 events).
warn!(
provider,
error = %err,
"{LOG_PREFIX} test_provider_model:expected-error (no Sentry)"
);
} else {
error!(error = %err, "{LOG_PREFIX} test_provider_model:error");
}
}
}
result
}
Expand Down
71 changes: 71 additions & 0 deletions src/openhuman/inference/ops_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -316,3 +316,74 @@ fn is_unknown_provider_user_config_rejects_other_list_models_failures() {
);
}
}

// ── is_expected_chat_failure (TAURI-RUST-68) ─────────────────────────────
//
// `inference_test_provider_model` calls `simple_chat` which can fail with
// known provider-state or user-config conditions (401, 429, model not
// found). Before this fix every failure escalated to `error!`, which
// sentry-tracing shipped to Sentry as `"[inference::ops]
// test_provider_model:error"` — 1,309 events — while the same underlying
// errors already had their own report or were classified as expected by
// `expected_error_kind`. The gate demotes them to `warn!` so they stay in
// local logs but don't generate duplicate Sentry noise.
//
// Anchored on the shared `expected_error_kind` classifier so both the unit
// tests here and the production gate stay in sync with the central
// suppression logic in `core::observability`.

#[test]
fn is_expected_chat_failure_matches_api_key_missing() {
// Provider layer emits this phrase when no API key is configured.
assert!(is_expected_chat_failure("api key not set for openai"));
assert!(is_expected_chat_failure(
"missing api key: openai_api_key is not configured"
));
}

#[test]
fn is_expected_chat_failure_matches_rate_limit() {
// 429-style rate-limit phrases emitted by the provider / OpenHuman backend.
assert!(is_expected_chat_failure(
"openai API error (429 Too Many Requests): You exceeded your current quota"
));
assert!(is_expected_chat_failure(
"openai API error (500): 429 rate limit exceeded"
));
}

#[test]
fn is_expected_chat_failure_matches_provider_config_rejection() {
// OpenAI-style model-not-found code in error body.
assert!(is_expected_chat_failure(
r#"custom_openai API error (404 Not Found): {"error":{"message":"The model does not exist or you do not have access","code":"model_not_found"}}"#
));
// Temperature-unsupported model (e.g. o1/o3/o4 reasoning models).
assert!(is_expected_chat_failure(
"custom_openai API error (400 Bad Request): invalid temperature: only 1 is allowed"
));
// litellm-style not_found_error envelope.
assert!(is_expected_chat_failure(
r#"custom_openai API error (404 Not Found): {"error":{"message":"model 'gpt-99' not found","type":"not_found_error"}}"#
));
}

#[test]
fn is_expected_chat_failure_does_not_match_real_errors() {
// Real errors that must still reach Sentry must NOT be demoted.
for raw in [
// Genuine 500 server error — actionable, must escalate
"openai API error (500 Internal Server Error): Something went wrong",
// Unexpected JSON from provider — potential provider bug
"openai API returned an unexpected chat-completions payload: missing field",
// Local I/O error — real infrastructure problem
"failed to open config file: permission denied",
// Completely empty string — fallthrough
"",
] {
assert!(
!is_expected_chat_failure(raw),
"must NOT demote real error: {raw:?}"
);
}
}
Loading