tinyhumansai · M3gA-Mind · May 28, 2026 · May 28, 2026 · May 28, 2026 · May 28, 2026
@@ -142,6 +142,24 @@ pub enum ExpectedErrorKind {
     /// ~56 events/hour, all from `openhuman.agent_chat` via
     /// `local_ai.ops.agent_chat`).
     PromptInjectionBlocked,
+    /// The request exceeded the model's context window — the
+    /// conversation/prompt is too long for the configured model. A
+    /// deterministic user-state / usage condition; the remediation is
+    /// "start a new chat, trim the conversation, or pick a larger-context
+    /// model", which the UI surfaces. Sentry has no signal to act on.
+    ///
+    /// The provider HTTP layer (`providers::ops::api_error`) suppresses its
+    /// own per-attempt event for this condition, and
+    /// `providers::reliable` marks it non-retryable. This arm catches the
+    /// **re-report** when the same error is raised again by
+    /// `agent.run_single` / `web_channel.run_chat_task` under a different
+    /// `domain` tag (same two-emit-site shape as the empty-response and
+    /// session-expired fixes). Delegates to the single-source matcher
+    /// [`crate::openhuman::inference::provider::is_context_window_exceeded_message`]
+    /// so the retry classifier, the api_error cascade, and this arm can't
+    /// drift. Drops Sentry TAURI-RUST-501
+    /// (`Context size has been exceeded`, custom-provider 500).
+    ContextWindowExceeded,
     /// The memory-store chunk DB's per-path circuit breaker is currently open
     /// because too many consecutive SQLite init attempts failed. This is the
     /// breaker doing its job — it opened *after* the underlying transient
@@ -235,6 +253,14 @@ pub fn expected_error_kind(message: &str) -> Option<ExpectedErrorKind> {
     if is_prompt_injection_blocked_message(&lower) {
         return Some(ExpectedErrorKind::PromptInjectionBlocked);
     }
+    // Context-window-exceeded re-report from a higher layer (agent /
+    // web_channel). The provider api_error cascade suppresses its own
+    // emit; this catches the re-raise. Delegates to the single-source
+    // provider matcher so the phrasing can't drift. Runs last so a more
+    // specific matcher always wins.
+    if crate::openhuman::inference::provider::is_context_window_exceeded_message(message) {
+        return Some(ExpectedErrorKind::ContextWindowExceeded);
+    }
     if is_memory_store_breaker_open(&lower) {
         return Some(ExpectedErrorKind::MemoryStoreBreakerOpen);
     }
@@ -1037,6 +1063,21 @@ fn report_expected_message(kind: ExpectedErrorKind, message: &str, domain: &str,
                 "[observability] {domain}.{operation} skipped expected prompt-injection-blocked error"
             );
         }
+        ExpectedErrorKind::ContextWindowExceeded => {
+            // Request too long for the model's context window. The provider
+            // api_error cascade already demotes its own emit; this is the
+            // higher-layer re-report. Deterministic user-state — the UI
+            // shows the retry message and the user trims / starts a new
+            // chat. Demote to `warn!` (breadcrumb only) — same tier as the
+            // other usage-state conditions.
+            tracing::warn!(
+                domain = domain,
+                operation = operation,
+                kind = "context_window_exceeded",
+                error = %message,
+                "[observability] {domain}.{operation} skipped expected context-window-exceeded error: {message}"
+            );
+        }
         ExpectedErrorKind::DiskFull => {
             // Host filesystem out of space. The user must free space on
             // their machine — Sentry can't help. Demote at `warn!` so a
@@ -1723,6 +1764,57 @@ mod tests {
         );
     }
 
+    // ── ContextWindowExceeded (TAURI-RUST-501) ─────────────────────────────
+
+    #[test]
+    fn classifies_context_window_exceeded_rereport() {
+        // TAURI-RUST-501: the custom-provider 500 body that escapes the
+        // provider api_error cascade's own status-gated checks. When the
+        // error is re-raised by `agent.run_single` / `web_channel.
+        // run_chat_task`, `report_error_or_expected` runs the classifier on
+        // the full message — this arm must catch the new phrasing.
+        assert_eq!(
+            expected_error_kind(
+                "custom API error (500 Internal Server Error): \
+                 {\"error\":{\"code\":500,\"message\":\"Context size has been exceeded.\",\"type\":\"server_error\"}}"
+            ),
+            Some(ExpectedErrorKind::ContextWindowExceeded)
+        );
+
+        // The established phrasings the provider/reliable layer already
+        // recognized must classify here too (single-source matcher).
+        for raw in [
+            "OpenAI API error (400): This model's maximum context length is 8192 tokens",
+            "request exceeds the context window of this model",
+            "context length exceeded",
+            "prompt is too long",
+        ] {
+            assert_eq!(
+                expected_error_kind(raw),
+                Some(ExpectedErrorKind::ContextWindowExceeded),
+                "should classify as context-window-exceeded: {raw}"
+            );
+        }
+    }
+
+    #[test]
+    fn does_not_classify_unrelated_messages_as_context_window_exceeded() {
+        // Anchors are context-overflow specific. A generic "window" or
+        // "context" mention, or an unrelated rate-limit "exceeded", must
+        // not classify.
+        for raw in [
+            "rate limit exceeded, retry after 30s",
+            "failed to open context menu window",
+            "tool call exceeded the allowed budget",
+        ] {
+            assert_eq!(
+                expected_error_kind(raw),
+                None,
+                "must NOT classify as context-window-exceeded: {raw}"
+            );
+        }
+    }
+
     #[test]
     fn classifies_memory_store_breaker_open() {
         // TAURI-RUST-52X (~455 events on self-hosted Sentry): the chunk-store

@@ -606,6 +606,101 @@ pub(super) fn log_provider_config_rejection(
     );
 }
 
+/// Whether a provider error body indicates the request exceeded the model's
+/// context window (the conversation/prompt is too long for the configured
+/// model). This is a deterministic user-state / usage condition — the
+/// remediation is "start a new chat, trim the conversation, or pick a
+/// larger-context model" — not a product bug. Sentry has no signal to act
+/// on.
+///
+/// Single source of truth for the context-overflow phrasing, shared by:
+/// - [`super::reliable`]'s non-retryable classifier (retrying the same
+///   oversized request can't help),
+/// - the [`api_error`] Sentry-suppression cascade (below), and
+/// - the `core::observability` `ContextWindowExceeded` classifier (which
+///   catches the higher-layer re-report under `domain=agent` /
+///   `web_channel`).
+///
+/// Status-agnostic on purpose: providers disagree on the HTTP code for this
+/// condition — OpenAI / most emit `400 context_length_exceeded`, but some
+/// custom / self-hosted gateways mis-report it as `500` (Sentry
+/// TAURI-RUST-501: `"custom API error (500 …): Context size has been
+/// exceeded."`). Matching on the body keeps all of them in one bucket.
+///
+/// Anchoring is deliberately two-tier because this matcher now also feeds
+/// `core::observability::expected_error_kind` (Sentry suppression) and the
+/// `reliable` non-retryable decision, so an over-broad match would both
+/// hide a real error from Sentry *and* wrongly mark a retryable error as
+/// permanent:
+///
+/// - **Length/context phrases** ([`CONTEXT_HINTS`]) are unambiguous —
+///   "context window", "context length", "prompt is too long" only describe
+///   request-size overflow — so they match alone.
+/// - **Token-count phrases** ([`TOKEN_HINTS`]) collide with per-minute token
+///   *rate* limits ("rate limit reached … too many tokens per min"), which
+///   are transient 429s that MUST stay retryable and keep reaching Sentry.
+///   They only count as context-overflow when no rate-limit marker is
+///   present.
+pub fn is_context_window_exceeded_message(body: &str) -> bool {
+    let lower = body.to_ascii_lowercase();
+
+    // Unambiguous request-size / context phrases — match on their own.
+    const CONTEXT_HINTS: &[&str] = &[
+        "exceeds the context window",
+        "context window of this model",
+        "maximum context length",
+        "context length exceeded",
+        "context size has been exceeded",
+        "prompt is too long",
+        "input is too long",
+    ];
+    if CONTEXT_HINTS.iter().any(|hint| lower.contains(hint)) {
+        return true;
+    }
+
+    // Token-count phrases are ambiguous with token-per-minute RATE limits.
+    // Treat them as context-overflow only when the body carries no
+    // rate-limit marker — otherwise a transient TPM 429 would be silenced
+    // from Sentry and (via `reliable`) wrongly classified as non-retryable.
+    const TOKEN_HINTS: &[&str] = &["too many tokens", "token limit exceeded"];
+    if TOKEN_HINTS.iter().any(|hint| lower.contains(hint)) {
+        const RATE_LIMIT_MARKERS: &[&str] = &[
+            "per minute",
+            "per min",
+            "rate limit",
+            "rate_limit",
+            "tpm",
+            "requests per",
+            "retry after",
+            "try again in",
+        ];
+        return !RATE_LIMIT_MARKERS
+            .iter()
+            .any(|marker| lower.contains(marker));
+    }
+
+    false
+}
+
+pub(super) fn log_context_window_exceeded(
+    operation: &str,
+    provider: &str,
+    model: Option<&str>,
+    status: reqwest::StatusCode,
+) {
+    tracing::warn!(
+        domain = "llm_provider",
+        operation = operation,
+        provider = provider,
+        model = model.unwrap_or(""),
+        status = status.as_u16(),
+        failure = "non_2xx",
+        kind = "context_window_exceeded",
+        "[llm_provider] {operation} context-window exceeded ({status}) — \
+         request too long for the model, not reporting to Sentry"
+    );
+}
+
 /// Build a sanitized provider error from a failed HTTP response.
 ///
 /// Reports the failure to Sentry with `provider` and `status` tags so
@@ -647,6 +742,10 @@ pub async fn api_error(provider: &str, response: reqwest::Response) -> anyhow::E
         is_custom_openai_upstream_bad_request_http_400(provider, status, &body);
     let is_provider_access_policy_denied = is_provider_access_policy_denied_http_403(status, &body);
     let is_provider_config_rejection = is_provider_config_rejection_http(status, provider, &body);
+    // Context-overflow is status-agnostic: match the body directly (some
+    // custom gateways mis-report it as 500 — TAURI-RUST-501 — so a status
+    // gate would let those through to `should_report_provider_http_failure`).
+    let is_context_window_exceeded = is_context_window_exceeded_message(&body);
 
     if is_auth_failure && is_backend {
         tracing::warn!(
@@ -675,6 +774,8 @@ pub async fn api_error(provider: &str, response: reqwest::Response) -> anyhow::E
         log_provider_access_policy_denied_http_403("api_error", provider, None, status);
     } else if is_provider_config_rejection {
         log_provider_config_rejection("api_error", provider, None, status);
+    } else if is_context_window_exceeded {
+        log_context_window_exceeded("api_error", provider, None, status);
     } else if should_report_provider_http_failure(status) {
         crate::core::observability::report_error(
             message.as_str(),
@@ -1528,6 +1629,92 @@ mod tests {
         }
     }
 
+    mod context_window_exceeded_suppression {
+        use super::*;
+
+        #[test]
+        fn classifies_tauri_rust_501_custom_provider_500_body() {
+            // TAURI-RUST-501: the custom-provider 500 wire body. The
+            // matcher is status-agnostic, so the 500 mis-report is caught
+            // (the provider api_error cascade routes it to
+            // `log_context_window_exceeded` instead of `report_error`).
+            assert!(is_context_window_exceeded_message(
+                "{\"error\":{\"code\":500,\"message\":\"Context size has been exceeded.\",\"type\":\"server_error\"}}"
+            ));
+        }
+
+        #[test]
+        fn classifies_established_context_overflow_phrasings() {
+            // The phrasings the reliable.rs non-retryable classifier
+            // recognized before this refactor must all still match through
+            // the shared single-source matcher.
+            for body in [
+                "This model's maximum context length is 8192 tokens",
+                "request exceeds the context window of this model",
+                "context length exceeded",
+                "too many tokens in the prompt",
+                "token limit exceeded",
+                "prompt is too long for the selected model",
+                "input is too long",
+            ] {
+                assert!(
+                    is_context_window_exceeded_message(body),
+                    "should match context-overflow body: {body}"
+                );
+            }
+        }
+
+        #[test]
+        fn does_not_match_unrelated_bodies() {
+            for body in [
+                "rate limit exceeded, retry after 30s",
+                "Invalid request: model not found",
+                "Insufficient budget",
+                "tool call exceeded the allowed budget",
+            ] {
+                assert!(
+                    !is_context_window_exceeded_message(body),
+                    "must NOT match unrelated body: {body}"
+                );
+            }
+        }
+
+        #[test]
+        fn token_rate_limits_are_not_context_overflow() {
+            // Token-count phrases collide with per-minute token RATE limits.
+            // Those are transient 429s that must stay retryable and keep
+            // reaching Sentry — they must NOT be classified as context
+            // overflow (CodeRabbit review of #2820). The rate-limit marker
+            // disambiguates.
+            for body in [
+                "Rate limit reached: too many tokens per minute (TPM) for this org",
+                "rate_limit_exceeded: token limit exceeded, retry after 12s",
+                "You have hit too many tokens per min; try again in 30s",
+            ] {
+                assert!(
+                    !is_context_window_exceeded_message(body),
+                    "TPM rate-limit must NOT match as context overflow: {body}"
+                );
+            }
+            // …but a token-count overflow with NO rate marker still matches.
+            assert!(is_context_window_exceeded_message(
+                "Request rejected: too many tokens in the input for this model"
+            ));
+        }
+
+        #[test]
+        fn log_helper_runs_without_panicking() {
+            // Smoke for the demotion path taken by `api_error` — no tracing
+            // subscriber in unit tests.
+            log_context_window_exceeded(
+                "api_error",
+                "custom_openai",
+                None,
+                reqwest::StatusCode::INTERNAL_SERVER_ERROR,
+            );
+        }
+    }
+
     #[test]
     fn test_sanitize_api_error_utf8() {
         let input = "🦀".repeat(MAX_API_ERROR_CHARS + 10);

@@ -99,19 +99,11 @@ fn is_stream_error_non_retryable(err: &StreamError) -> bool {
 }
 
 fn is_context_window_exceeded(err: &anyhow::Error) -> bool {
-    let lower = err.to_string().to_lowercase();
-    let hints = [
-        "exceeds the context window",
-        "context window of this model",
-        "maximum context length",
-        "context length exceeded",
-        "too many tokens",
-        "token limit exceeded",
-        "prompt is too long",
-        "input is too long",
-    ];
-
-    hints.iter().any(|hint| lower.contains(hint))
+    // Single source of truth for the context-overflow phrasing lives in
+    // `ops::is_context_window_exceeded_message` so the non-retryable
+    // classifier here, the `api_error` Sentry-suppression cascade, and the
+    // `core::observability` `ContextWindowExceeded` arm can't drift apart.
+    super::is_context_window_exceeded_message(&err.to_string())
 }
 
 /// Detect provider-side temporary capacity/outage errors. Covers: