tinyhumansai · graycyrus · May 29, 2026 · May 29, 2026
@@ -29,6 +29,31 @@ fn is_unknown_provider_user_config(err: &str) -> bool {
     err.contains("no cloud provider with id or slug")
 }
 
+/// Returns `true` when the error from a provider chat attempt is a known,
+/// expected user-state or provider-state condition that already has its own
+/// Sentry report (or is deterministically expected and has no remediation
+/// path):
+///
+/// - **401 Unauthorized** — API key revoked / wrong key. Already reported by
+///   the provider layer's `api_error` path. An ops-level duplicate adds noise
+///   with no additional context.
+/// - **429 Too Many Requests / rate-limit** — Quota exhaustion. Already
+///   covered by the `is_upstream_rate_limit_message` classifier in
+///   `expected_error_kind`; the reliable-provider layer retries with
+///   backoff before propagating.
+/// - **Model not found** — User selected a model that doesn't exist for
+///   their key. The provider layer already classifies this as a config
+///   rejection (TAURI-RUST-68, ~1309 events).
+///
+/// The matcher is intentionally broad so the ops-level wrapper stays out
+/// of the Sentry funnel for all provider-state failures — the underlying
+/// call site (`compatible.rs` / `report_error_or_expected`) is already
+/// responsible for the authoritative report. Unclassified failures (5xx,
+/// unexpected payloads, network errors) are NOT matched and still escalate.
+fn is_expected_chat_failure(err: &str) -> bool {
+    crate::core::observability::expected_error_kind(err).is_some()
+}
+
 #[derive(Debug, Clone, serde::Serialize)]
 pub struct InferenceTestProviderModelResult {
     pub reply: String,
@@ -179,7 +204,22 @@ pub async fn inference_test_provider_model(
             output_len = outcome.value.reply.len(),
             "{LOG_PREFIX} test_provider_model:ok"
         ),
-        Err(err) => error!(error = %err, "{LOG_PREFIX} test_provider_model:error"),
+        Err(err) => {
+            if is_expected_chat_failure(err) {
+                // Provider-state / user-config failure (401, 429, model not
+                // found, API key missing, etc.). The underlying provider
+                // layer already emitted its own Sentry event or classified
+                // this as expected. An ops-level duplicate adds noise.
+                // Targets TAURI-RUST-68 (~1,309 events).
+                warn!(
+                    provider,
+                    error = %err,
+                    "{LOG_PREFIX} test_provider_model:expected-error (no Sentry)"
+                );
+            } else {
+                error!(error = %err, "{LOG_PREFIX} test_provider_model:error");
+            }
+        }
     }
     result
 }

@@ -316,3 +316,74 @@ fn is_unknown_provider_user_config_rejects_other_list_models_failures() {
         );
     }
 }
+
+// ── is_expected_chat_failure (TAURI-RUST-68) ─────────────────────────────
+//
+// `inference_test_provider_model` calls `simple_chat` which can fail with
+// known provider-state or user-config conditions (401, 429, model not
+// found). Before this fix every failure escalated to `error!`, which
+// sentry-tracing shipped to Sentry as `"[inference::ops]
+// test_provider_model:error"` — 1,309 events — while the same underlying
+// errors already had their own report or were classified as expected by
+// `expected_error_kind`. The gate demotes them to `warn!` so they stay in
+// local logs but don't generate duplicate Sentry noise.
+//
+// Anchored on the shared `expected_error_kind` classifier so both the unit
+// tests here and the production gate stay in sync with the central
+// suppression logic in `core::observability`.
+
+#[test]
+fn is_expected_chat_failure_matches_api_key_missing() {
+    // Provider layer emits this phrase when no API key is configured.
+    assert!(is_expected_chat_failure("api key not set for openai"));
+    assert!(is_expected_chat_failure(
+        "missing api key: openai_api_key is not configured"
+    ));
+}
+
+#[test]
+fn is_expected_chat_failure_matches_rate_limit() {
+    // 429-style rate-limit phrases emitted by the provider / OpenHuman backend.
+    assert!(is_expected_chat_failure(
+        "openai API error (429 Too Many Requests): You exceeded your current quota"
+    ));
+    assert!(is_expected_chat_failure(
+        "openai API error (500): 429 rate limit exceeded"
+    ));
+}
+
+#[test]
+fn is_expected_chat_failure_matches_provider_config_rejection() {
+    // OpenAI-style model-not-found code in error body.
+    assert!(is_expected_chat_failure(
+        r#"custom_openai API error (404 Not Found): {"error":{"message":"The model does not exist or you do not have access","code":"model_not_found"}}"#
+    ));
+    // Temperature-unsupported model (e.g. o1/o3/o4 reasoning models).
+    assert!(is_expected_chat_failure(
+        "custom_openai API error (400 Bad Request): invalid temperature: only 1 is allowed"
+    ));
+    // litellm-style not_found_error envelope.
+    assert!(is_expected_chat_failure(
+        r#"custom_openai API error (404 Not Found): {"error":{"message":"model 'gpt-99' not found","type":"not_found_error"}}"#
+    ));
+}
+
+#[test]
+fn is_expected_chat_failure_does_not_match_real_errors() {
+    // Real errors that must still reach Sentry must NOT be demoted.
+    for raw in [
+        // Genuine 500 server error — actionable, must escalate
+        "openai API error (500 Internal Server Error): Something went wrong",
+        // Unexpected JSON from provider — potential provider bug
+        "openai API returned an unexpected chat-completions payload: missing field",
+        // Local I/O error — real infrastructure problem
+        "failed to open config file: permission denied",
+        // Completely empty string — fallthrough
+        "",
+    ] {
+        assert!(
+            !is_expected_chat_failure(raw),
+            "must NOT demote real error: {raw:?}"
+        );
+    }
+}