Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
203 changes: 203 additions & 0 deletions src/core/observability.rs
Original file line number Diff line number Diff line change
Expand Up @@ -407,9 +407,86 @@ pub fn expected_error_kind(message: &str) -> Option<ExpectedErrorKind> {
if is_filesystem_user_path_invalid_message(&lower) {
return Some(ExpectedErrorKind::FilesystemUserPathInvalid);
}
// Upstream rate-limit responses — provider throttles the account (429) or
// wraps the 429 inside an HTTP 500 (`"429 rate limit exceeded"` in the
// body). In both cases the reliable-provider layer already retries with
// backoff, and the embeddings path has a proactive token-bucket limiter
// (`embeddings::rate_limit`). The upstream quota is an account-capacity
// signal, not a code bug — Sentry has no remediation path and the
// per-attempt events generate pure noise (OPENHUMAN-TAURI-S: ~6 984
// events from HTTP 500 wrapping a "429 rate limit exceeded" body;
// OPENHUMAN-TAURI-6Y: ~19 849 events from direct 429s; OPENHUMAN-TAURI-2E:
// ~1 482 events carrying a `"rate_limit_error"` type in the JSON body;
// OPENHUMAN-TAURI-RQ: ~741 events from the embeddings path).
//
// Checked LAST inside `expected_error_kind` — transient HTTP status matches
// (`is_transient_upstream_http_message`) are already caught by the earlier
// arm, so this arm only adds coverage for the 500-wrapping-429 body shape
// and provider JSON envelopes that name the error type explicitly.
if is_upstream_rate_limit_message(&lower) {
return Some(ExpectedErrorKind::TransientUpstreamHttp);
}
None
}

/// Detect upstream rate-limit error bodies that bubble up from any provider
/// or embedding API call site.
///
/// Covers three observed wire shapes:
///
/// 1. **OpenAI / Anthropic JSON body** — `"rate_limit_error"` is the `"type"`
/// field in the structured error object:
/// `{"error":{"message":"Rate limit exceeded.","type":"rate_limit_error"}}`
/// (OPENHUMAN-TAURI-2E / -RQ).
///
/// 2. **OpenHuman backend wrapping upstream** — `"Upstream rate limit exceeded
/// for model 'summarization-v1'. Please retry shortly."` embedded in a 500
/// response body (OPENHUMAN-TAURI-6Y / -7H).
///
/// 3. **Plain phrase** — `"429 rate limit exceeded, please try again later"` /
/// `"rate limit exceeded"` from any other upstream (OPENHUMAN-TAURI-S).
///
/// The match is against the full lowercased error string (including any
/// caller wrapping prefix), so it survives `agent.run_single` / `rpc.invoke_method`
/// re-reports as well as the original call-site emit.
///
/// **Polarity contract**: this predicate is *inclusive* — it returns `true`
/// only for messages that are unambiguously rate-limit throttle signals. It
/// must NOT match unrelated errors that incidentally mention "limit" or "rate"
/// (e.g. action-budget `"Rate limit exceeded: action budget exhausted"`
/// from `security::policy` — distinguished by the `"action budget"` anchor).
pub fn is_upstream_rate_limit_message(lower: &str) -> bool {
// `"rate_limit_error"` is the structured error type from OpenAI / Anthropic
// compatible APIs. Tight anchor — colons and underscores don't appear in
// ordinary log text.
if lower.contains("rate_limit_error") {
return true;
}
// `"upstream rate limit exceeded"` is the OpenHuman backend's own phrase
// when it wraps an upstream provider 429 as an HTTP 500.
if lower.contains("upstream rate limit exceeded") {
return true;
}
// `"429 rate limit exceeded"` is the numeric-prefix form emitted by some
// backends (e.g. OPENHUMAN-TAURI-S: `"error":"429 rate limit exceeded"`).
// Anchored on the `"429 rate limit"` substring so a plain `"rate limit
// exceeded"` mention (which could appear in the `security::policy` action-
// budget message) is NOT matched here — the next arm handles clean phrase
// matches only when scoped by a provider API error prefix.
if lower.contains("429 rate limit") {
return true;
}
// `"rate limit exceeded"` on its own is matched ONLY when it appears inside
// a canonical provider API error envelope (`"api error ("` prefix from
// `ops::api_error` / `embeddings::openai`). This keeps the security::policy
// `"Rate limit exceeded: action budget exhausted"` message from being
// silently swallowed — that phrase does not carry an API error prefix.
if lower.contains("api error (") && lower.contains("rate limit exceeded") {
return true;
}
false
}

/// Detect filesystem-out-of-space errors that bubble up from any syscall
/// (`open`, `write`, `mkdir`, `rename`). Three platform-stable renderings:
///
Expand Down Expand Up @@ -2524,6 +2601,132 @@ mod tests {
);
}

// ── Upstream rate-limit suppression (OPENHUMAN-TAURI-S / -6Y / -2E / -RQ) ─

/// Canonical Anthropic / OpenAI body with a structured `"rate_limit_error"`
/// type — OPENHUMAN-TAURI-2E (~1 482 events) and -RQ (~741 events).
#[test]
fn classifies_rate_limit_error_type_as_transient() {
for raw in [
// Direct 429 from the embeddings path (OPENHUMAN-TAURI-RQ):
r#"Embedding API error (429 Too Many Requests): {"error":{"message":"Rate limit exceeded. Please retry after a brief wait.","type":"rate_limit_error"}}"#,
// Via llm_provider.api_error (OPENHUMAN-TAURI-2E):
r#"[observability] llm_provider.api_error failed: OpenHuman API error (429 Too Many Requests): {"error":{"message":"Rate limit exceeded. Please retry after a brief wait.","type":"rate_limit_error"}}"#,
// Re-reported by agent.run_single:
r#"run_chat_task failed client_id=abc thread_id=t1 request_id=r1 error=OpenHuman API error (429 Too Many Requests): {"error":{"message":"Rate limit exceeded.","type":"rate_limit_error"}}"#,
] {
assert_eq!(
expected_error_kind(raw),
Some(ExpectedErrorKind::TransientUpstreamHttp),
"should classify rate_limit_error body as transient: {raw}"
);
}
}

/// OpenHuman backend wrapping an upstream 429 as HTTP 500 with a
/// `"upstream rate limit exceeded"` body — OPENHUMAN-TAURI-6Y (~19 849
/// events).
#[test]
fn classifies_upstream_rate_limit_in_500_body_as_transient() {
for raw in [
r#"OpenHuman API error (500 Internal Server Error): {"success":false,"error":"Upstream rate limit exceeded for model 'summarization-v1'. Please retry shortly."}"#,
r#"[observability] llm_provider.api_error failed: OpenHuman API error (500 Internal Server Error): {"success":false,"error":"Upstream rate limit exceeded for model 'summarization-v1'. Please retry shortly.","details":{"provider":"gmi","upstreamModel":"deepseek-ai/DeepSeek-V3-0324"}}"#,
// Re-wrapped by rpc.invoke_method:
r#"rpc.invoke_method failed: LLM summarisation failed: OpenHuman API error (500 Internal Server Error): {"success":false,"error":"Upstream rate limit exceeded for model 'summarization-v1'."}"#,
] {
assert_eq!(
expected_error_kind(raw),
Some(ExpectedErrorKind::TransientUpstreamHttp),
"should classify upstream-rate-limit-in-500 as transient: {raw}"
);
}
}

/// Backend returning HTTP 500 with a numeric `"429 rate limit exceeded"`
/// body — OPENHUMAN-TAURI-S (~6 984 events).
#[test]
fn classifies_429_rate_limit_in_500_body_as_transient() {
for raw in [
r#"OpenHuman API error (500 Internal Server Error): {"success":false,"error":"429 rate limit exceeded, please try again later"}"#,
r#"[observability] llm_provider.api_error failed: OpenHuman API error (500 Internal Server Error): {"success":false,"error":"429 rate limit exceeded, please try again later"}"#,
] {
assert_eq!(
expected_error_kind(raw),
Some(ExpectedErrorKind::TransientUpstreamHttp),
"should classify 429-in-500-body as transient: {raw}"
);
}
}

/// The security::policy `"Rate limit exceeded: action budget exhausted"`
/// must NOT be silenced — it's a user-facing hard stop, not a transient
/// upstream quota hit.
#[test]
fn does_not_classify_security_policy_rate_limit_as_transient() {
let msg = "Rate limit exceeded: action budget exhausted (0 actions/hour). \
Increase the limit in Settings -> Advanced -> Agent autonomy";
assert_eq!(
expected_error_kind(msg),
None,
"security policy action-budget error must reach Sentry: {msg}"
);
// Wrapped by rpc.invoke_method — the prefix must not accidentally
// trigger the `api error (` anchor.
assert_eq!(
expected_error_kind(&format!("rpc.invoke_method failed: {msg}")),
None,
"wrapped security policy action-budget error must reach Sentry"
);
}

/// Standalone `"rate limit exceeded"` without the `"api error ("` anchor
/// must NOT be silenced — keeps loose phrases from accidentally demoting
/// unrelated errors.
#[test]
fn does_not_classify_bare_rate_limit_exceeded_as_transient() {
assert_eq!(
expected_error_kind("rate limit exceeded"),
None,
"bare 'rate limit exceeded' without API error anchor must reach Sentry"
);
}

/// `is_upstream_rate_limit_message` predicate unit tests — verifies the
/// polarity contract independently of `expected_error_kind`.
#[test]
fn upstream_rate_limit_predicate_matches_expected_shapes() {
for lower in [
r#"{"error":{"message":"rate limit exceeded.","type":"rate_limit_error"}}"#,
"upstream rate limit exceeded for model 'summarization-v1'",
"429 rate limit exceeded, please try again later",
r#"openai api error (429 too many requests): {"error":{"message":"rate limit exceeded.","type":"rate_limit_error"}}"#,
] {
assert!(
is_upstream_rate_limit_message(lower),
"should match: {lower}"
);
}
}

#[test]
fn upstream_rate_limit_predicate_does_not_match_unrelated() {
for lower in [
// security::policy budget message — must not be swallowed
"rate limit exceeded: action budget exhausted (0 actions/hour)",
// bare phrase without anchor
"rate limit exceeded",
// unrelated 500 body
r#"{"success":false,"error":"internal server error"}"#,
// budget exhausted — different concept
"budget exhausted, add credits to continue",
] {
assert!(
!is_upstream_rate_limit_message(lower),
"should not match: {lower}"
);
}
}

#[test]
fn does_not_classify_unrelated_messages_as_capability_unavailable() {
// The classifier anchors on the exact "for this RAM tier" substring.
Expand Down
37 changes: 37 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,43 @@ fn main() {
{
return None;
}
// Defense-in-depth: upstream rate-limit events that slipped past
// the call-site suppressors in `ops::api_error` (primary guard)
// and `report_error_or_expected` (secondary guard via
// `expected_error_kind`). Catches the three major shapes:
// · `rate_limit_error` type in the JSON body (OPENHUMAN-TAURI-2E,
// OPENHUMAN-TAURI-RQ — ~2 223 events combined)
// · `"upstream rate limit exceeded"` in a 500 body (TAURI-6Y —
// ~19 849 events)
// · `"429 rate limit exceeded"` in a 500 body (TAURI-S — ~6 984
// events)
// The primary per-attempt suppression lives in
// `openhuman::inference::provider::ops::api_error` (skips
// `report_error` entirely for rate-limit bodies) and in
// `embeddings::openai::embed` (uses `report_error_or_expected` with
// the canonical `"Embedding API error ({status}): …"` format so
// `is_transient_upstream_http_message` catches it). This filter is
// the last line of defense for any future call site that adds a new
// report path without routing through one of those two guards.
{
let direct = event.message.as_deref();
let from_logentry = event.logentry.as_ref().map(|l| l.message.as_str());
let from_exception = event.exception.last().and_then(|e| e.value.as_deref());
let is_rate_limited = [direct, from_logentry, from_exception]
.into_iter()
.flatten()
.map(str::to_ascii_lowercase)
.any(|lower| {
openhuman_core::core::observability::is_upstream_rate_limit_message(&lower)
});
if is_rate_limited {
log::debug!(
"[sentry-rate-limit-filter] dropping upstream rate-limit event_id={:?}",
event.event_id
);
return None;
}
}
// Defense-in-depth: 404 on PATCH/DELETE to a channel-message path
// is an expected state (provider-side delete or backend GC). Primary
// suppression lives in `authed_json`; this catches any future call
Expand Down
98 changes: 88 additions & 10 deletions src/openhuman/inference/provider/ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -883,16 +883,38 @@ pub async fn api_error(provider: &str, response: reqwest::Response) -> anyhow::E
} else if is_context_window_exceeded {
log_context_window_exceeded("api_error", provider, None, status);
} else if should_report_provider_http_failure(status) {
crate::core::observability::report_error(
message.as_str(),
"llm_provider",
"api_error",
&[
("provider", provider),
("status", status_str.as_str()),
("failure", "non_2xx"),
],
);
// Defense-in-depth: some backends (e.g. OpenHuman) wrap an upstream
// provider 429 as an HTTP 500 with a rate-limit phrase in the body
// (`"429 rate limit exceeded"`, `"upstream rate limit exceeded"`).
// `should_report_provider_http_failure(500)` would otherwise let this
// through to Sentry — suppress it here before the report fires so the
// noise stays off Sentry (OPENHUMAN-TAURI-S: ~6 984 events).
// The `expected_error_kind` classifier in `report_error_or_expected`
// catches the same shape at re-report sites (agent / web_channel).
let lower_body = body.to_ascii_lowercase();
let is_rate_limit_body =
crate::core::observability::is_upstream_rate_limit_message(&lower_body);
if is_rate_limit_body {
tracing::warn!(
domain = "llm_provider",
operation = "api_error",
provider = provider,
status = status_str.as_str(),
"[llm_provider] api_error: skipping Sentry report — rate-limit body in \
non-429 response ({status})"
);
} else {
crate::core::observability::report_error(
message.as_str(),
"llm_provider",
"api_error",
&[
("provider", provider),
("status", status_str.as_str()),
("failure", "non_2xx"),
],
);
}
}
anyhow::anyhow!(message)
}
Expand Down Expand Up @@ -1580,6 +1602,62 @@ mod tests {
}
}

// Tests for the rate-limit body suppression guard added to `api_error`.
// Exercises `is_upstream_rate_limit_message` with the exact body shapes that
// produced OPENHUMAN-TAURI-S (~6 984 events from HTTP 500 wrapping a
// "429 rate limit exceeded" body) and OPENHUMAN-TAURI-6Y / -2E.
mod rate_limit_body_suppression {
use crate::core::observability::is_upstream_rate_limit_message;

/// HTTP 500 with a `"429 rate limit exceeded"` body must be detected
/// as a rate-limit signal so the guard in `api_error` can skip the
/// Sentry report (OPENHUMAN-TAURI-S).
#[test]
fn http_500_with_429_body_phrase_is_rate_limited() {
let body =
r#"{"success":false,"error":"429 rate limit exceeded, please try again later"}"#
.to_ascii_lowercase();
assert!(
is_upstream_rate_limit_message(&body),
"500-body with '429 rate limit exceeded' must be detected as rate-limited"
);
}

/// HTTP 500 with an `"upstream rate limit exceeded"` body
/// (OPENHUMAN-TAURI-6Y shape).
#[test]
fn http_500_with_upstream_rate_limit_body_is_rate_limited() {
let body = r#"{"success":false,"error":"Upstream rate limit exceeded for model 'summarization-v1'. Please retry shortly.","details":{"provider":"gmi"}}"#
.to_ascii_lowercase();
assert!(
is_upstream_rate_limit_message(&body),
"500-body with 'upstream rate limit exceeded' must be detected"
);
}

/// OpenAI / Anthropic `"rate_limit_error"` type body.
#[test]
fn body_with_rate_limit_error_type_is_rate_limited() {
let body = r#"{"error":{"message":"Rate limit exceeded. Please retry after a brief wait.","type":"rate_limit_error"}}"#
.to_ascii_lowercase();
assert!(
is_upstream_rate_limit_message(&body),
"body with 'rate_limit_error' type must be detected"
);
}

/// Unrelated 500 body must NOT be detected as rate-limited.
#[test]
fn http_500_unrelated_body_is_not_rate_limited() {
let body = r#"{"success":false,"error":"internal server error: database unavailable"}"#
.to_ascii_lowercase();
assert!(
!is_upstream_rate_limit_message(&body),
"unrelated 500 body must not be detected as rate-limited"
);
}
}

mod provider_access_policy_suppression {
use super::*;

Expand Down
Loading