diff --git a/src/core/observability.rs b/src/core/observability.rs index c31ee199b2..9d2223e61a 100644 --- a/src/core/observability.rs +++ b/src/core/observability.rs @@ -407,9 +407,86 @@ pub fn expected_error_kind(message: &str) -> Option { if is_filesystem_user_path_invalid_message(&lower) { return Some(ExpectedErrorKind::FilesystemUserPathInvalid); } + // Upstream rate-limit responses — provider throttles the account (429) or + // wraps the 429 inside an HTTP 500 (`"429 rate limit exceeded"` in the + // body). In both cases the reliable-provider layer already retries with + // backoff, and the embeddings path has a proactive token-bucket limiter + // (`embeddings::rate_limit`). The upstream quota is an account-capacity + // signal, not a code bug — Sentry has no remediation path and the + // per-attempt events generate pure noise (OPENHUMAN-TAURI-S: ~6 984 + // events from HTTP 500 wrapping a "429 rate limit exceeded" body; + // OPENHUMAN-TAURI-6Y: ~19 849 events from direct 429s; OPENHUMAN-TAURI-2E: + // ~1 482 events carrying a `"rate_limit_error"` type in the JSON body; + // OPENHUMAN-TAURI-RQ: ~741 events from the embeddings path). + // + // Checked LAST inside `expected_error_kind` — transient HTTP status matches + // (`is_transient_upstream_http_message`) are already caught by the earlier + // arm, so this arm only adds coverage for the 500-wrapping-429 body shape + // and provider JSON envelopes that name the error type explicitly. + if is_upstream_rate_limit_message(&lower) { + return Some(ExpectedErrorKind::TransientUpstreamHttp); + } None } +/// Detect upstream rate-limit error bodies that bubble up from any provider +/// or embedding API call site. +/// +/// Covers three observed wire shapes: +/// +/// 1. **OpenAI / Anthropic JSON body** — `"rate_limit_error"` is the `"type"` +/// field in the structured error object: +/// `{"error":{"message":"Rate limit exceeded.","type":"rate_limit_error"}}` +/// (OPENHUMAN-TAURI-2E / -RQ). +/// +/// 2. **OpenHuman backend wrapping upstream** — `"Upstream rate limit exceeded +/// for model 'summarization-v1'. Please retry shortly."` embedded in a 500 +/// response body (OPENHUMAN-TAURI-6Y / -7H). +/// +/// 3. **Plain phrase** — `"429 rate limit exceeded, please try again later"` / +/// `"rate limit exceeded"` from any other upstream (OPENHUMAN-TAURI-S). +/// +/// The match is against the full lowercased error string (including any +/// caller wrapping prefix), so it survives `agent.run_single` / `rpc.invoke_method` +/// re-reports as well as the original call-site emit. +/// +/// **Polarity contract**: this predicate is *inclusive* — it returns `true` +/// only for messages that are unambiguously rate-limit throttle signals. It +/// must NOT match unrelated errors that incidentally mention "limit" or "rate" +/// (e.g. action-budget `"Rate limit exceeded: action budget exhausted"` +/// from `security::policy` — distinguished by the `"action budget"` anchor). +pub fn is_upstream_rate_limit_message(lower: &str) -> bool { + // `"rate_limit_error"` is the structured error type from OpenAI / Anthropic + // compatible APIs. Tight anchor — colons and underscores don't appear in + // ordinary log text. + if lower.contains("rate_limit_error") { + return true; + } + // `"upstream rate limit exceeded"` is the OpenHuman backend's own phrase + // when it wraps an upstream provider 429 as an HTTP 500. + if lower.contains("upstream rate limit exceeded") { + return true; + } + // `"429 rate limit exceeded"` is the numeric-prefix form emitted by some + // backends (e.g. OPENHUMAN-TAURI-S: `"error":"429 rate limit exceeded"`). + // Anchored on the `"429 rate limit"` substring so a plain `"rate limit + // exceeded"` mention (which could appear in the `security::policy` action- + // budget message) is NOT matched here — the next arm handles clean phrase + // matches only when scoped by a provider API error prefix. + if lower.contains("429 rate limit") { + return true; + } + // `"rate limit exceeded"` on its own is matched ONLY when it appears inside + // a canonical provider API error envelope (`"api error ("` prefix from + // `ops::api_error` / `embeddings::openai`). This keeps the security::policy + // `"Rate limit exceeded: action budget exhausted"` message from being + // silently swallowed — that phrase does not carry an API error prefix. + if lower.contains("api error (") && lower.contains("rate limit exceeded") { + return true; + } + false +} + /// Detect filesystem-out-of-space errors that bubble up from any syscall /// (`open`, `write`, `mkdir`, `rename`). Three platform-stable renderings: /// @@ -2524,6 +2601,132 @@ mod tests { ); } + // ── Upstream rate-limit suppression (OPENHUMAN-TAURI-S / -6Y / -2E / -RQ) ─ + + /// Canonical Anthropic / OpenAI body with a structured `"rate_limit_error"` + /// type — OPENHUMAN-TAURI-2E (~1 482 events) and -RQ (~741 events). + #[test] + fn classifies_rate_limit_error_type_as_transient() { + for raw in [ + // Direct 429 from the embeddings path (OPENHUMAN-TAURI-RQ): + r#"Embedding API error (429 Too Many Requests): {"error":{"message":"Rate limit exceeded. Please retry after a brief wait.","type":"rate_limit_error"}}"#, + // Via llm_provider.api_error (OPENHUMAN-TAURI-2E): + r#"[observability] llm_provider.api_error failed: OpenHuman API error (429 Too Many Requests): {"error":{"message":"Rate limit exceeded. Please retry after a brief wait.","type":"rate_limit_error"}}"#, + // Re-reported by agent.run_single: + r#"run_chat_task failed client_id=abc thread_id=t1 request_id=r1 error=OpenHuman API error (429 Too Many Requests): {"error":{"message":"Rate limit exceeded.","type":"rate_limit_error"}}"#, + ] { + assert_eq!( + expected_error_kind(raw), + Some(ExpectedErrorKind::TransientUpstreamHttp), + "should classify rate_limit_error body as transient: {raw}" + ); + } + } + + /// OpenHuman backend wrapping an upstream 429 as HTTP 500 with a + /// `"upstream rate limit exceeded"` body — OPENHUMAN-TAURI-6Y (~19 849 + /// events). + #[test] + fn classifies_upstream_rate_limit_in_500_body_as_transient() { + for raw in [ + r#"OpenHuman API error (500 Internal Server Error): {"success":false,"error":"Upstream rate limit exceeded for model 'summarization-v1'. Please retry shortly."}"#, + r#"[observability] llm_provider.api_error failed: OpenHuman API error (500 Internal Server Error): {"success":false,"error":"Upstream rate limit exceeded for model 'summarization-v1'. Please retry shortly.","details":{"provider":"gmi","upstreamModel":"deepseek-ai/DeepSeek-V3-0324"}}"#, + // Re-wrapped by rpc.invoke_method: + r#"rpc.invoke_method failed: LLM summarisation failed: OpenHuman API error (500 Internal Server Error): {"success":false,"error":"Upstream rate limit exceeded for model 'summarization-v1'."}"#, + ] { + assert_eq!( + expected_error_kind(raw), + Some(ExpectedErrorKind::TransientUpstreamHttp), + "should classify upstream-rate-limit-in-500 as transient: {raw}" + ); + } + } + + /// Backend returning HTTP 500 with a numeric `"429 rate limit exceeded"` + /// body — OPENHUMAN-TAURI-S (~6 984 events). + #[test] + fn classifies_429_rate_limit_in_500_body_as_transient() { + for raw in [ + r#"OpenHuman API error (500 Internal Server Error): {"success":false,"error":"429 rate limit exceeded, please try again later"}"#, + r#"[observability] llm_provider.api_error failed: OpenHuman API error (500 Internal Server Error): {"success":false,"error":"429 rate limit exceeded, please try again later"}"#, + ] { + assert_eq!( + expected_error_kind(raw), + Some(ExpectedErrorKind::TransientUpstreamHttp), + "should classify 429-in-500-body as transient: {raw}" + ); + } + } + + /// The security::policy `"Rate limit exceeded: action budget exhausted"` + /// must NOT be silenced — it's a user-facing hard stop, not a transient + /// upstream quota hit. + #[test] + fn does_not_classify_security_policy_rate_limit_as_transient() { + let msg = "Rate limit exceeded: action budget exhausted (0 actions/hour). \ + Increase the limit in Settings -> Advanced -> Agent autonomy"; + assert_eq!( + expected_error_kind(msg), + None, + "security policy action-budget error must reach Sentry: {msg}" + ); + // Wrapped by rpc.invoke_method — the prefix must not accidentally + // trigger the `api error (` anchor. + assert_eq!( + expected_error_kind(&format!("rpc.invoke_method failed: {msg}")), + None, + "wrapped security policy action-budget error must reach Sentry" + ); + } + + /// Standalone `"rate limit exceeded"` without the `"api error ("` anchor + /// must NOT be silenced — keeps loose phrases from accidentally demoting + /// unrelated errors. + #[test] + fn does_not_classify_bare_rate_limit_exceeded_as_transient() { + assert_eq!( + expected_error_kind("rate limit exceeded"), + None, + "bare 'rate limit exceeded' without API error anchor must reach Sentry" + ); + } + + /// `is_upstream_rate_limit_message` predicate unit tests — verifies the + /// polarity contract independently of `expected_error_kind`. + #[test] + fn upstream_rate_limit_predicate_matches_expected_shapes() { + for lower in [ + r#"{"error":{"message":"rate limit exceeded.","type":"rate_limit_error"}}"#, + "upstream rate limit exceeded for model 'summarization-v1'", + "429 rate limit exceeded, please try again later", + r#"openai api error (429 too many requests): {"error":{"message":"rate limit exceeded.","type":"rate_limit_error"}}"#, + ] { + assert!( + is_upstream_rate_limit_message(lower), + "should match: {lower}" + ); + } + } + + #[test] + fn upstream_rate_limit_predicate_does_not_match_unrelated() { + for lower in [ + // security::policy budget message — must not be swallowed + "rate limit exceeded: action budget exhausted (0 actions/hour)", + // bare phrase without anchor + "rate limit exceeded", + // unrelated 500 body + r#"{"success":false,"error":"internal server error"}"#, + // budget exhausted — different concept + "budget exhausted, add credits to continue", + ] { + assert!( + !is_upstream_rate_limit_message(lower), + "should not match: {lower}" + ); + } + } + #[test] fn does_not_classify_unrelated_messages_as_capability_unavailable() { // The classifier anchors on the exact "for this RAM tier" substring. diff --git a/src/main.rs b/src/main.rs index 352f710a8a..51ea57297b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -83,6 +83,43 @@ fn main() { { return None; } + // Defense-in-depth: upstream rate-limit events that slipped past + // the call-site suppressors in `ops::api_error` (primary guard) + // and `report_error_or_expected` (secondary guard via + // `expected_error_kind`). Catches the three major shapes: + // · `rate_limit_error` type in the JSON body (OPENHUMAN-TAURI-2E, + // OPENHUMAN-TAURI-RQ — ~2 223 events combined) + // · `"upstream rate limit exceeded"` in a 500 body (TAURI-6Y — + // ~19 849 events) + // · `"429 rate limit exceeded"` in a 500 body (TAURI-S — ~6 984 + // events) + // The primary per-attempt suppression lives in + // `openhuman::inference::provider::ops::api_error` (skips + // `report_error` entirely for rate-limit bodies) and in + // `embeddings::openai::embed` (uses `report_error_or_expected` with + // the canonical `"Embedding API error ({status}): …"` format so + // `is_transient_upstream_http_message` catches it). This filter is + // the last line of defense for any future call site that adds a new + // report path without routing through one of those two guards. + { + let direct = event.message.as_deref(); + let from_logentry = event.logentry.as_ref().map(|l| l.message.as_str()); + let from_exception = event.exception.last().and_then(|e| e.value.as_deref()); + let is_rate_limited = [direct, from_logentry, from_exception] + .into_iter() + .flatten() + .map(str::to_ascii_lowercase) + .any(|lower| { + openhuman_core::core::observability::is_upstream_rate_limit_message(&lower) + }); + if is_rate_limited { + log::debug!( + "[sentry-rate-limit-filter] dropping upstream rate-limit event_id={:?}", + event.event_id + ); + return None; + } + } // Defense-in-depth: 404 on PATCH/DELETE to a channel-message path // is an expected state (provider-side delete or backend GC). Primary // suppression lives in `authed_json`; this catches any future call diff --git a/src/openhuman/inference/provider/ops.rs b/src/openhuman/inference/provider/ops.rs index ae07f21913..91f1b5d857 100644 --- a/src/openhuman/inference/provider/ops.rs +++ b/src/openhuman/inference/provider/ops.rs @@ -883,16 +883,38 @@ pub async fn api_error(provider: &str, response: reqwest::Response) -> anyhow::E } else if is_context_window_exceeded { log_context_window_exceeded("api_error", provider, None, status); } else if should_report_provider_http_failure(status) { - crate::core::observability::report_error( - message.as_str(), - "llm_provider", - "api_error", - &[ - ("provider", provider), - ("status", status_str.as_str()), - ("failure", "non_2xx"), - ], - ); + // Defense-in-depth: some backends (e.g. OpenHuman) wrap an upstream + // provider 429 as an HTTP 500 with a rate-limit phrase in the body + // (`"429 rate limit exceeded"`, `"upstream rate limit exceeded"`). + // `should_report_provider_http_failure(500)` would otherwise let this + // through to Sentry — suppress it here before the report fires so the + // noise stays off Sentry (OPENHUMAN-TAURI-S: ~6 984 events). + // The `expected_error_kind` classifier in `report_error_or_expected` + // catches the same shape at re-report sites (agent / web_channel). + let lower_body = body.to_ascii_lowercase(); + let is_rate_limit_body = + crate::core::observability::is_upstream_rate_limit_message(&lower_body); + if is_rate_limit_body { + tracing::warn!( + domain = "llm_provider", + operation = "api_error", + provider = provider, + status = status_str.as_str(), + "[llm_provider] api_error: skipping Sentry report — rate-limit body in \ + non-429 response ({status})" + ); + } else { + crate::core::observability::report_error( + message.as_str(), + "llm_provider", + "api_error", + &[ + ("provider", provider), + ("status", status_str.as_str()), + ("failure", "non_2xx"), + ], + ); + } } anyhow::anyhow!(message) } @@ -1580,6 +1602,62 @@ mod tests { } } + // Tests for the rate-limit body suppression guard added to `api_error`. + // Exercises `is_upstream_rate_limit_message` with the exact body shapes that + // produced OPENHUMAN-TAURI-S (~6 984 events from HTTP 500 wrapping a + // "429 rate limit exceeded" body) and OPENHUMAN-TAURI-6Y / -2E. + mod rate_limit_body_suppression { + use crate::core::observability::is_upstream_rate_limit_message; + + /// HTTP 500 with a `"429 rate limit exceeded"` body must be detected + /// as a rate-limit signal so the guard in `api_error` can skip the + /// Sentry report (OPENHUMAN-TAURI-S). + #[test] + fn http_500_with_429_body_phrase_is_rate_limited() { + let body = + r#"{"success":false,"error":"429 rate limit exceeded, please try again later"}"# + .to_ascii_lowercase(); + assert!( + is_upstream_rate_limit_message(&body), + "500-body with '429 rate limit exceeded' must be detected as rate-limited" + ); + } + + /// HTTP 500 with an `"upstream rate limit exceeded"` body + /// (OPENHUMAN-TAURI-6Y shape). + #[test] + fn http_500_with_upstream_rate_limit_body_is_rate_limited() { + let body = r#"{"success":false,"error":"Upstream rate limit exceeded for model 'summarization-v1'. Please retry shortly.","details":{"provider":"gmi"}}"# + .to_ascii_lowercase(); + assert!( + is_upstream_rate_limit_message(&body), + "500-body with 'upstream rate limit exceeded' must be detected" + ); + } + + /// OpenAI / Anthropic `"rate_limit_error"` type body. + #[test] + fn body_with_rate_limit_error_type_is_rate_limited() { + let body = r#"{"error":{"message":"Rate limit exceeded. Please retry after a brief wait.","type":"rate_limit_error"}}"# + .to_ascii_lowercase(); + assert!( + is_upstream_rate_limit_message(&body), + "body with 'rate_limit_error' type must be detected" + ); + } + + /// Unrelated 500 body must NOT be detected as rate-limited. + #[test] + fn http_500_unrelated_body_is_not_rate_limited() { + let body = r#"{"success":false,"error":"internal server error: database unavailable"}"# + .to_ascii_lowercase(); + assert!( + !is_upstream_rate_limit_message(&body), + "unrelated 500 body must not be detected as rate-limited" + ); + } + } + mod provider_access_policy_suppression { use super::*;