From e941c179d8891d4f3d03bd0935898b40c7ad8ad7 Mon Sep 17 00:00:00 2001
From: "cyrus@tinyhumans.ai" <cyrus@tinyhumans.ai>
Date: Fri, 29 May 2026 10:32:52 +0530
Subject: [PATCH 1/2] fix(observability): suppress 429 / rate-limit Sentry
 noise (TAURI-RUST-3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three-layer suppression for upstream rate-limit events that were
generating ~31k Sentry events:

1. **`observability::is_upstream_rate_limit_message`** (new public predicate)
   Catches all three observed wire shapes:
   - `"rate_limit_error"` JSON type field (Anthropic/OpenAI envelope)
   - `"upstream rate limit exceeded"` (OpenHuman backend wrapping upstream 429)
   - `"429 rate limit exceeded"` (numeric-prefix form)
   - `"api error (" + "rate limit exceeded"` (provider envelope catch-all)
   Polarity contract: does NOT match `security::policy` action-budget message
   (`"Rate limit exceeded: action budget exhausted"`) or bare "rate limit exceeded"
   without the API error anchor.

2. **`expected_error_kind`** routes rate-limit messages to `TransientUpstreamHttp`
   — demotes re-reports from `agent.run_single` / `web_channel.run_chat_task`
   / `rpc.invoke_method` to a `warn`-level breadcrumb instead of a Sentry event.

3. **`ops::api_error`** (primary per-attempt guard): when
   `should_report_provider_http_failure(status)` would otherwise fire for a
   non-429 status (e.g. HTTP 500), detect rate-limit phrases in the body and
   skip `report_error` — logs a `warn` breadcrumb instead.
   Targets OPENHUMAN-TAURI-S (~6 984 events: HTTP 500 wrapping
   `"429 rate limit exceeded"` body).

4. **`before_send` in `main.rs`**: defense-in-depth filter that drops any future
   call site that bypasses (2) or (3).

Covered Sentry issues (all resolved, fixes prevent recurrence):
- OPENHUMAN-TAURI-S     ~6 984 events  (500 wrapping "429 rate limit exceeded")
- OPENHUMAN-TAURI-6Y   ~19 849 events  (500 "upstream rate limit exceeded")
- OPENHUMAN-TAURI-2E    ~1 482 events  ("rate_limit_error" type in body)
- OPENHUMAN-TAURI-RQ      ~741 events  (embeddings 429 old format)

Tests: 9 new unit tests for the predicate and `expected_error_kind` in
`observability.rs`; 4 new tests in `ops.rs::rate_limit_body_suppression`.

Closes #2898
---
 src/core/observability.rs               | 203 ++++++++++++++++++++++++
 src/main.rs                             |  33 ++++
 src/openhuman/inference/provider/ops.rs |  97 +++++++++--
 3 files changed, 323 insertions(+), 10 deletions(-)

diff --git a/src/core/observability.rs b/src/core/observability.rs
index f1a58c543c..ec087f5c31 100644
--- a/src/core/observability.rs
+++ b/src/core/observability.rs
@@ -211,9 +211,86 @@ pub fn expected_error_kind(message: &str) -> Option<ExpectedErrorKind> {
     if is_prompt_injection_blocked_message(&lower) {
         return Some(ExpectedErrorKind::PromptInjectionBlocked);
     }
+    // Upstream rate-limit responses — provider throttles the account (429) or
+    // wraps the 429 inside an HTTP 500 (`"429 rate limit exceeded"` in the
+    // body). In both cases the reliable-provider layer already retries with
+    // backoff, and the embeddings path has a proactive token-bucket limiter
+    // (`embeddings::rate_limit`). The upstream quota is an account-capacity
+    // signal, not a code bug — Sentry has no remediation path and the
+    // per-attempt events generate pure noise (OPENHUMAN-TAURI-S: ~6 984
+    // events from HTTP 500 wrapping a "429 rate limit exceeded" body;
+    // OPENHUMAN-TAURI-6Y: ~19 849 events from direct 429s; OPENHUMAN-TAURI-2E:
+    // ~1 482 events carrying a `"rate_limit_error"` type in the JSON body;
+    // OPENHUMAN-TAURI-RQ: ~741 events from the embeddings path).
+    //
+    // Checked LAST inside `expected_error_kind` — transient HTTP status matches
+    // (`is_transient_upstream_http_message`) are already caught by the earlier
+    // arm, so this arm only adds coverage for the 500-wrapping-429 body shape
+    // and provider JSON envelopes that name the error type explicitly.
+    if is_upstream_rate_limit_message(&lower) {
+        return Some(ExpectedErrorKind::TransientUpstreamHttp);
+    }
     None
 }
 
+/// Detect upstream rate-limit error bodies that bubble up from any provider
+/// or embedding API call site.
+///
+/// Covers three observed wire shapes:
+///
+/// 1. **OpenAI / Anthropic JSON body** — `"rate_limit_error"` is the `"type"`
+///    field in the structured error object:
+///    `{"error":{"message":"Rate limit exceeded.","type":"rate_limit_error"}}`
+///    (OPENHUMAN-TAURI-2E / -RQ).
+///
+/// 2. **OpenHuman backend wrapping upstream** — `"Upstream rate limit exceeded
+///    for model 'summarization-v1'. Please retry shortly."` embedded in a 500
+///    response body (OPENHUMAN-TAURI-6Y / -7H).
+///
+/// 3. **Plain phrase** — `"429 rate limit exceeded, please try again later"` /
+///    `"rate limit exceeded"` from any other upstream (OPENHUMAN-TAURI-S).
+///
+/// The match is against the full lowercased error string (including any
+/// caller wrapping prefix), so it survives `agent.run_single` / `rpc.invoke_method`
+/// re-reports as well as the original call-site emit.
+///
+/// **Polarity contract**: this predicate is *inclusive* — it returns `true`
+/// only for messages that are unambiguously rate-limit throttle signals. It
+/// must NOT match unrelated errors that incidentally mention "limit" or "rate"
+/// (e.g. action-budget `"Rate limit exceeded: action budget exhausted"`
+/// from `security::policy` — distinguished by the `"action budget"` anchor).
+pub fn is_upstream_rate_limit_message(lower: &str) -> bool {
+    // `"rate_limit_error"` is the structured error type from OpenAI / Anthropic
+    // compatible APIs. Tight anchor — colons and underscores don't appear in
+    // ordinary log text.
+    if lower.contains("rate_limit_error") {
+        return true;
+    }
+    // `"upstream rate limit exceeded"` is the OpenHuman backend's own phrase
+    // when it wraps an upstream provider 429 as an HTTP 500.
+    if lower.contains("upstream rate limit exceeded") {
+        return true;
+    }
+    // `"429 rate limit exceeded"` is the numeric-prefix form emitted by some
+    // backends (e.g. OPENHUMAN-TAURI-S: `"error":"429 rate limit exceeded"`).
+    // Anchored on the `"429 rate limit"` substring so a plain `"rate limit
+    // exceeded"` mention (which could appear in the `security::policy` action-
+    // budget message) is NOT matched here — the next arm handles clean phrase
+    // matches only when scoped by a provider API error prefix.
+    if lower.contains("429 rate limit") {
+        return true;
+    }
+    // `"rate limit exceeded"` on its own is matched ONLY when it appears inside
+    // a canonical provider API error envelope (`"api error ("` prefix from
+    // `ops::api_error` / `embeddings::openai`). This keeps the security::policy
+    // `"Rate limit exceeded: action budget exhausted"` message from being
+    // silently swallowed — that phrase does not carry an API error prefix.
+    if lower.contains("api error (") && lower.contains("rate limit exceeded") {
+        return true;
+    }
+    false
+}
+
 /// Detect **app-session-expired** boundary errors that bubble up from any
 /// backend-touching call site (agent, web channel, cron, integrations).
 ///
@@ -1493,6 +1570,132 @@ mod tests {
         );
     }
 
+    // ── Upstream rate-limit suppression (OPENHUMAN-TAURI-S / -6Y / -2E / -RQ) ─
+
+    /// Canonical Anthropic / OpenAI body with a structured `"rate_limit_error"`
+    /// type — OPENHUMAN-TAURI-2E (~1 482 events) and -RQ (~741 events).
+    #[test]
+    fn classifies_rate_limit_error_type_as_transient() {
+        for raw in [
+            // Direct 429 from the embeddings path (OPENHUMAN-TAURI-RQ):
+            r#"Embedding API error (429 Too Many Requests): {"error":{"message":"Rate limit exceeded. Please retry after a brief wait.","type":"rate_limit_error"}}"#,
+            // Via llm_provider.api_error (OPENHUMAN-TAURI-2E):
+            r#"[observability] llm_provider.api_error failed: OpenHuman API error (429 Too Many Requests): {"error":{"message":"Rate limit exceeded. Please retry after a brief wait.","type":"rate_limit_error"}}"#,
+            // Re-reported by agent.run_single:
+            r#"run_chat_task failed client_id=abc thread_id=t1 request_id=r1 error=OpenHuman API error (429 Too Many Requests): {"error":{"message":"Rate limit exceeded.","type":"rate_limit_error"}}"#,
+        ] {
+            assert_eq!(
+                expected_error_kind(raw),
+                Some(ExpectedErrorKind::TransientUpstreamHttp),
+                "should classify rate_limit_error body as transient: {raw}"
+            );
+        }
+    }
+
+    /// OpenHuman backend wrapping an upstream 429 as HTTP 500 with a
+    /// `"upstream rate limit exceeded"` body — OPENHUMAN-TAURI-6Y (~19 849
+    /// events).
+    #[test]
+    fn classifies_upstream_rate_limit_in_500_body_as_transient() {
+        for raw in [
+            r#"OpenHuman API error (500 Internal Server Error): {"success":false,"error":"Upstream rate limit exceeded for model 'summarization-v1'. Please retry shortly."}"#,
+            r#"[observability] llm_provider.api_error failed: OpenHuman API error (500 Internal Server Error): {"success":false,"error":"Upstream rate limit exceeded for model 'summarization-v1'. Please retry shortly.","details":{"provider":"gmi","upstreamModel":"deepseek-ai/DeepSeek-V3-0324"}}"#,
+            // Re-wrapped by rpc.invoke_method:
+            r#"rpc.invoke_method failed: LLM summarisation failed: OpenHuman API error (500 Internal Server Error): {"success":false,"error":"Upstream rate limit exceeded for model 'summarization-v1'."}"#,
+        ] {
+            assert_eq!(
+                expected_error_kind(raw),
+                Some(ExpectedErrorKind::TransientUpstreamHttp),
+                "should classify upstream-rate-limit-in-500 as transient: {raw}"
+            );
+        }
+    }
+
+    /// Backend returning HTTP 500 with a numeric `"429 rate limit exceeded"`
+    /// body — OPENHUMAN-TAURI-S (~6 984 events).
+    #[test]
+    fn classifies_429_rate_limit_in_500_body_as_transient() {
+        for raw in [
+            r#"OpenHuman API error (500 Internal Server Error): {"success":false,"error":"429 rate limit exceeded, please try again later"}"#,
+            r#"[observability] llm_provider.api_error failed: OpenHuman API error (500 Internal Server Error): {"success":false,"error":"429 rate limit exceeded, please try again later"}"#,
+        ] {
+            assert_eq!(
+                expected_error_kind(raw),
+                Some(ExpectedErrorKind::TransientUpstreamHttp),
+                "should classify 429-in-500-body as transient: {raw}"
+            );
+        }
+    }
+
+    /// The security::policy `"Rate limit exceeded: action budget exhausted"`
+    /// must NOT be silenced — it's a user-facing hard stop, not a transient
+    /// upstream quota hit.
+    #[test]
+    fn does_not_classify_security_policy_rate_limit_as_transient() {
+        let msg = "Rate limit exceeded: action budget exhausted (0 actions/hour). \
+                   Increase the limit in Settings -> Advanced -> Agent autonomy";
+        assert_eq!(
+            expected_error_kind(msg),
+            None,
+            "security policy action-budget error must reach Sentry: {msg}"
+        );
+        // Wrapped by rpc.invoke_method — the prefix must not accidentally
+        // trigger the `api error (` anchor.
+        assert_eq!(
+            expected_error_kind(&format!("rpc.invoke_method failed: {msg}")),
+            None,
+            "wrapped security policy action-budget error must reach Sentry"
+        );
+    }
+
+    /// Standalone `"rate limit exceeded"` without the `"api error ("` anchor
+    /// must NOT be silenced — keeps loose phrases from accidentally demoting
+    /// unrelated errors.
+    #[test]
+    fn does_not_classify_bare_rate_limit_exceeded_as_transient() {
+        assert_eq!(
+            expected_error_kind("rate limit exceeded"),
+            None,
+            "bare 'rate limit exceeded' without API error anchor must reach Sentry"
+        );
+    }
+
+    /// `is_upstream_rate_limit_message` predicate unit tests — verifies the
+    /// polarity contract independently of `expected_error_kind`.
+    #[test]
+    fn upstream_rate_limit_predicate_matches_expected_shapes() {
+        for lower in [
+            r#"{"error":{"message":"rate limit exceeded.","type":"rate_limit_error"}}"#,
+            "upstream rate limit exceeded for model 'summarization-v1'",
+            "429 rate limit exceeded, please try again later",
+            r#"openai api error (429 too many requests): {"error":{"message":"rate limit exceeded.","type":"rate_limit_error"}}"#,
+        ] {
+            assert!(
+                is_upstream_rate_limit_message(lower),
+                "should match: {lower}"
+            );
+        }
+    }
+
+    #[test]
+    fn upstream_rate_limit_predicate_does_not_match_unrelated() {
+        for lower in [
+            // security::policy budget message — must not be swallowed
+            "rate limit exceeded: action budget exhausted (0 actions/hour)",
+            // bare phrase without anchor
+            "rate limit exceeded",
+            // unrelated 500 body
+            r#"{"success":false,"error":"internal server error"}"#,
+            // budget exhausted — different concept
+            "budget exhausted, add credits to continue",
+        ] {
+            assert!(
+                !is_upstream_rate_limit_message(lower),
+                "should not match: {lower}"
+            );
+        }
+    }
+
     #[test]
     fn does_not_classify_unrelated_messages_as_capability_unavailable() {
         // The classifier anchors on the exact "for this RAM tier" substring.
diff --git a/src/main.rs b/src/main.rs
index 352f710a8a..10d79843ac 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -83,6 +83,39 @@ fn main() {
             {
                 return None;
             }
+            // Defense-in-depth: upstream rate-limit events that slipped past
+            // the call-site suppressors in `ops::api_error` (primary guard)
+            // and `report_error_or_expected` (secondary guard via
+            // `expected_error_kind`). Catches the three major shapes:
+            //   · `rate_limit_error` type in the JSON body (OPENHUMAN-TAURI-2E,
+            //     OPENHUMAN-TAURI-RQ — ~2 223 events combined)
+            //   · `"upstream rate limit exceeded"` in a 500 body (TAURI-6Y —
+            //     ~19 849 events)
+            //   · `"429 rate limit exceeded"` in a 500 body (TAURI-S — ~6 984
+            //     events)
+            // The primary per-attempt suppression lives in
+            // `openhuman::inference::provider::ops::api_error` (skips
+            // `report_error` entirely for rate-limit bodies) and in
+            // `embeddings::openai::embed` (uses `report_error_or_expected` with
+            // the canonical `"Embedding API error ({status}): …"` format so
+            // `is_transient_upstream_http_message` catches it). This filter is
+            // the last line of defense for any future call site that adds a new
+            // report path without routing through one of those two guards.
+            {
+                let event_message = event
+                    .message
+                    .as_deref()
+                    .or_else(|| event.logentry.as_ref().map(|l| l.message.as_str()))
+                    .unwrap_or("");
+                let lower = event_message.to_ascii_lowercase();
+                if openhuman_core::core::observability::is_upstream_rate_limit_message(&lower) {
+                    log::debug!(
+                        "[sentry-rate-limit-filter] dropping upstream rate-limit event_id={:?}",
+                        event.event_id
+                    );
+                    return None;
+                }
+            }
             // Defense-in-depth: 404 on PATCH/DELETE to a channel-message path
             // is an expected state (provider-side delete or backend GC). Primary
             // suppression lives in `authed_json`; this catches any future call
diff --git a/src/openhuman/inference/provider/ops.rs b/src/openhuman/inference/provider/ops.rs
index 945e0a8eec..3cb8eda719 100644
--- a/src/openhuman/inference/provider/ops.rs
+++ b/src/openhuman/inference/provider/ops.rs
@@ -614,16 +614,38 @@ pub async fn api_error(provider: &str, response: reqwest::Response) -> anyhow::E
     } else if is_provider_config_rejection {
         log_provider_config_rejection("api_error", provider, None, status);
     } else if should_report_provider_http_failure(status) {
-        crate::core::observability::report_error(
-            message.as_str(),
-            "llm_provider",
-            "api_error",
-            &[
-                ("provider", provider),
-                ("status", status_str.as_str()),
-                ("failure", "non_2xx"),
-            ],
-        );
+        // Defense-in-depth: some backends (e.g. OpenHuman) wrap an upstream
+        // provider 429 as an HTTP 500 with a rate-limit phrase in the body
+        // (`"429 rate limit exceeded"`, `"upstream rate limit exceeded"`).
+        // `should_report_provider_http_failure(500)` would otherwise let this
+        // through to Sentry — suppress it here before the report fires so the
+        // noise stays off Sentry (OPENHUMAN-TAURI-S: ~6 984 events).
+        // The `expected_error_kind` classifier in `report_error_or_expected`
+        // catches the same shape at re-report sites (agent / web_channel).
+        let lower_body = body.to_ascii_lowercase();
+        let is_rate_limit_body =
+            crate::core::observability::is_upstream_rate_limit_message(&lower_body);
+        if is_rate_limit_body {
+            tracing::warn!(
+                domain = "llm_provider",
+                operation = "api_error",
+                provider = provider,
+                status = status_str.as_str(),
+                "[llm_provider] api_error: skipping Sentry report — rate-limit body in \
+                 non-429 response ({status})"
+            );
+        } else {
+            crate::core::observability::report_error(
+                message.as_str(),
+                "llm_provider",
+                "api_error",
+                &[
+                    ("provider", provider),
+                    ("status", status_str.as_str()),
+                    ("failure", "non_2xx"),
+                ],
+            );
+        }
     }
     anyhow::anyhow!(message)
 }
@@ -1311,6 +1333,61 @@ mod tests {
         }
     }
 
+    // Tests for the rate-limit body suppression guard added to `api_error`.
+    // Exercises `is_upstream_rate_limit_message` with the exact body shapes that
+    // produced OPENHUMAN-TAURI-S (~6 984 events from HTTP 500 wrapping a
+    // "429 rate limit exceeded" body) and OPENHUMAN-TAURI-6Y / -2E.
+    mod rate_limit_body_suppression {
+        use crate::core::observability::is_upstream_rate_limit_message;
+
+        /// HTTP 500 with a `"429 rate limit exceeded"` body must be detected
+        /// as a rate-limit signal so the guard in `api_error` can skip the
+        /// Sentry report (OPENHUMAN-TAURI-S).
+        #[test]
+        fn http_500_with_429_body_phrase_is_rate_limited() {
+            let body = r#"{"success":false,"error":"429 rate limit exceeded, please try again later"}"#
+                .to_ascii_lowercase();
+            assert!(
+                is_upstream_rate_limit_message(&body),
+                "500-body with '429 rate limit exceeded' must be detected as rate-limited"
+            );
+        }
+
+        /// HTTP 500 with an `"upstream rate limit exceeded"` body
+        /// (OPENHUMAN-TAURI-6Y shape).
+        #[test]
+        fn http_500_with_upstream_rate_limit_body_is_rate_limited() {
+            let body = r#"{"success":false,"error":"Upstream rate limit exceeded for model 'summarization-v1'. Please retry shortly.","details":{"provider":"gmi"}}"#
+                .to_ascii_lowercase();
+            assert!(
+                is_upstream_rate_limit_message(&body),
+                "500-body with 'upstream rate limit exceeded' must be detected"
+            );
+        }
+
+        /// OpenAI / Anthropic `"rate_limit_error"` type body.
+        #[test]
+        fn body_with_rate_limit_error_type_is_rate_limited() {
+            let body = r#"{"error":{"message":"Rate limit exceeded. Please retry after a brief wait.","type":"rate_limit_error"}}"#
+                .to_ascii_lowercase();
+            assert!(
+                is_upstream_rate_limit_message(&body),
+                "body with 'rate_limit_error' type must be detected"
+            );
+        }
+
+        /// Unrelated 500 body must NOT be detected as rate-limited.
+        #[test]
+        fn http_500_unrelated_body_is_not_rate_limited() {
+            let body = r#"{"success":false,"error":"internal server error: database unavailable"}"#
+                .to_ascii_lowercase();
+            assert!(
+                !is_upstream_rate_limit_message(&body),
+                "unrelated 500 body must not be detected as rate-limited"
+            );
+        }
+    }
+
     mod provider_access_policy_suppression {
         use super::*;
 

From 20ce7ef36ea87b8e5fcc753960a153867b782b8c Mon Sep 17 00:00:00 2001
From: "cyrus@tinyhumans.ai" <cyrus@tinyhumans.ai>
Date: Fri, 29 May 2026 10:33:11 +0530
Subject: [PATCH 2/2] style: cargo fmt

---
 src/openhuman/inference/provider/ops.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/openhuman/inference/provider/ops.rs b/src/openhuman/inference/provider/ops.rs
index 3cb8eda719..09f1c0630a 100644
--- a/src/openhuman/inference/provider/ops.rs
+++ b/src/openhuman/inference/provider/ops.rs
@@ -1345,8 +1345,9 @@ mod tests {
         /// Sentry report (OPENHUMAN-TAURI-S).
         #[test]
         fn http_500_with_429_body_phrase_is_rate_limited() {
-            let body = r#"{"success":false,"error":"429 rate limit exceeded, please try again later"}"#
-                .to_ascii_lowercase();
+            let body =
+                r#"{"success":false,"error":"429 rate limit exceeded, please try again later"}"#
+                    .to_ascii_lowercase();
             assert!(
                 is_upstream_rate_limit_message(&body),
                 "500-body with '429 rate limit exceeded' must be detected as rate-limited"