Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
848 changes: 15 additions & 833 deletions src/core/observability.rs

Large diffs are not rendered by default.

52 changes: 0 additions & 52 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,21 +66,6 @@ fn main() {
if openhuman_core::core::observability::is_budget_event(&event) {
return None;
}
// CORE-RUST-EK (~827 events): drop all HTTP 401 responses from the
// embeddings call path (domain=embeddings, failure=non_2xx,
// status=401). The primary suppression for the OpenHuman-backend
// "Invalid token" shape lives in `expected_error_kind` /
// `is_session_expired_message`. This is defense-in-depth that also
// catches third-party provider 401s (e.g. OpenAI `invalid_api_key`
// body) that don't carry the OpenHuman envelope and therefore fall
// through the string-based classifier to Sentry.
if openhuman_core::core::observability::is_embeddings_api_key_401_event(&event) {
log::debug!(
"[sentry-embeddings-401-filter] dropping embeddings api-key 401 event_id={:?}",
event.event_id
);
return None;
}
// Defense-in-depth: drop max-tool-iterations cap events that
// slipped past the call-site filters in
// `agent::harness::session::runtime::run_single`,
Expand All @@ -98,43 +83,6 @@ fn main() {
{
return None;
}
// Defense-in-depth: upstream rate-limit events that slipped past
// the call-site suppressors in `ops::api_error` (primary guard)
// and `report_error_or_expected` (secondary guard via
// `expected_error_kind`). Catches the three major shapes:
// · `rate_limit_error` type in the JSON body (OPENHUMAN-TAURI-2E,
// OPENHUMAN-TAURI-RQ — ~2 223 events combined)
// · `"upstream rate limit exceeded"` in a 500 body (TAURI-6Y —
// ~19 849 events)
// · `"429 rate limit exceeded"` in a 500 body (TAURI-S — ~6 984
// events)
// The primary per-attempt suppression lives in
// `openhuman::inference::provider::ops::api_error` (skips
// `report_error` entirely for rate-limit bodies) and in
// `embeddings::openai::embed` (uses `report_error_or_expected` with
// the canonical `"Embedding API error ({status}): …"` format so
// `is_transient_upstream_http_message` catches it). This filter is
// the last line of defense for any future call site that adds a new
// report path without routing through one of those two guards.
{
let direct = event.message.as_deref();
let from_logentry = event.logentry.as_ref().map(|l| l.message.as_str());
let from_exception = event.exception.last().and_then(|e| e.value.as_deref());
let is_rate_limited = [direct, from_logentry, from_exception]
.into_iter()
.flatten()
.map(str::to_ascii_lowercase)
.any(|lower| {
openhuman_core::core::observability::is_upstream_rate_limit_message(&lower)
});
if is_rate_limited {
log::debug!(
"[sentry-rate-limit-filter] dropping upstream rate-limit event_id={:?}",
event.event_id
);
return None;
}
}
// Defense-in-depth: 404 on PATCH/DELETE to a channel-message path
// is an expected state (provider-side delete or backend GC). Primary
// suppression lives in `authed_json`; this catches any future call
Expand Down
42 changes: 1 addition & 41 deletions src/openhuman/inference/ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,31 +29,6 @@ fn is_unknown_provider_user_config(err: &str) -> bool {
err.contains("no cloud provider with id or slug")
}

/// Returns `true` when the error from a provider chat attempt is a known,
/// expected user-state or provider-state condition that already has its own
/// Sentry report (or is deterministically expected and has no remediation
/// path):
///
/// - **401 Unauthorized** — API key revoked / wrong key. Already reported by
/// the provider layer's `api_error` path. An ops-level duplicate adds noise
/// with no additional context.
/// - **429 Too Many Requests / rate-limit** — Quota exhaustion. Already
/// covered by the `is_upstream_rate_limit_message` classifier in
/// `expected_error_kind`; the reliable-provider layer retries with
/// backoff before propagating.
/// - **Model not found** — User selected a model that doesn't exist for
/// their key. The provider layer already classifies this as a config
/// rejection (TAURI-RUST-68, ~1309 events).
///
/// The matcher is intentionally broad so the ops-level wrapper stays out
/// of the Sentry funnel for all provider-state failures — the underlying
/// call site (`compatible.rs` / `report_error_or_expected`) is already
/// responsible for the authoritative report. Unclassified failures (5xx,
/// unexpected payloads, network errors) are NOT matched and still escalate.
fn is_expected_chat_failure(err: &str) -> bool {
crate::core::observability::expected_error_kind(err).is_some()
}

#[derive(Debug, Clone, serde::Serialize)]
pub struct InferenceTestProviderModelResult {
pub reply: String,
Expand Down Expand Up @@ -204,22 +179,7 @@ pub async fn inference_test_provider_model(
output_len = outcome.value.reply.len(),
"{LOG_PREFIX} test_provider_model:ok"
),
Err(err) => {
if is_expected_chat_failure(err) {
// Provider-state / user-config failure (401, 429, model not
// found, API key missing, etc.). The underlying provider
// layer already emitted its own Sentry event or classified
// this as expected. An ops-level duplicate adds noise.
// Targets TAURI-RUST-68 (~1,309 events).
warn!(
provider,
error = %err,
"{LOG_PREFIX} test_provider_model:expected-error (no Sentry)"
);
} else {
error!(error = %err, "{LOG_PREFIX} test_provider_model:error");
}
}
Err(err) => error!(error = %err, "{LOG_PREFIX} test_provider_model:error"),
}
result
}
Expand Down
70 changes: 0 additions & 70 deletions src/openhuman/inference/ops_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -317,73 +317,3 @@ fn is_unknown_provider_user_config_rejects_other_list_models_failures() {
}
}

// ── is_expected_chat_failure (TAURI-RUST-68) ─────────────────────────────
//
// `inference_test_provider_model` calls `simple_chat` which can fail with
// known provider-state or user-config conditions (401, 429, model not
// found). Before this fix every failure escalated to `error!`, which
// sentry-tracing shipped to Sentry as `"[inference::ops]
// test_provider_model:error"` — 1,309 events — while the same underlying
// errors already had their own report or were classified as expected by
// `expected_error_kind`. The gate demotes them to `warn!` so they stay in
// local logs but don't generate duplicate Sentry noise.
//
// Anchored on the shared `expected_error_kind` classifier so both the unit
// tests here and the production gate stay in sync with the central
// suppression logic in `core::observability`.

#[test]
fn is_expected_chat_failure_matches_api_key_missing() {
// Provider layer emits this phrase when no API key is configured.
assert!(is_expected_chat_failure("api key not set for openai"));
assert!(is_expected_chat_failure(
"missing api key: openai_api_key is not configured"
));
}

#[test]
fn is_expected_chat_failure_matches_rate_limit() {
// 429-style rate-limit phrases emitted by the provider / OpenHuman backend.
assert!(is_expected_chat_failure(
"openai API error (429 Too Many Requests): You exceeded your current quota"
));
assert!(is_expected_chat_failure(
"openai API error (500): 429 rate limit exceeded"
));
}

#[test]
fn is_expected_chat_failure_matches_provider_config_rejection() {
// OpenAI-style model-not-found code in error body.
assert!(is_expected_chat_failure(
r#"custom_openai API error (404 Not Found): {"error":{"message":"The model does not exist or you do not have access","code":"model_not_found"}}"#
));
// Temperature-unsupported model (e.g. o1/o3/o4 reasoning models).
assert!(is_expected_chat_failure(
"custom_openai API error (400 Bad Request): invalid temperature: only 1 is allowed"
));
// litellm-style not_found_error envelope.
assert!(is_expected_chat_failure(
r#"custom_openai API error (404 Not Found): {"error":{"message":"model 'gpt-99' not found","type":"not_found_error"}}"#
));
}

#[test]
fn is_expected_chat_failure_does_not_match_real_errors() {
// Real errors that must still reach Sentry must NOT be demoted.
for raw in [
// Genuine 500 server error — actionable, must escalate
"openai API error (500 Internal Server Error): Something went wrong",
// Unexpected JSON from provider — potential provider bug
"openai API returned an unexpected chat-completions payload: missing field",
// Local I/O error — real infrastructure problem
"failed to open config file: permission denied",
// Completely empty string — fallthrough
"",
] {
assert!(
!is_expected_chat_failure(raw),
"must NOT demote real error: {raw:?}"
);
}
}
Loading
Loading