diff --git a/src/core/socketio.rs b/src/core/socketio.rs index fe6e899c5..51eb76476 100644 --- a/src/core/socketio.rs +++ b/src/core/socketio.rs @@ -113,6 +113,37 @@ pub struct WebChannelEvent { /// Type of error, if the event represents a failure. #[serde(skip_serializing_if = "Option::is_none")] pub error_type: Option, + /// Structured rate-limit / error metadata produced by + /// `classify_inference_error` (issue #2606). All four fields are + /// additive — older FE clients that only read `message`/`error_type` + /// keep working; new clients can read these to render countdown, + /// retry-button, and fallback-CTA UI without regexing the message. + /// + /// Where the limit originated: + /// `"provider"` | `"openhuman_budget"` | `"agent_loop"` + /// | `"openhuman_billing"` | `"transport"` | `"config"`. + #[serde(skip_serializing_if = "Option::is_none")] + pub error_source: Option, + /// Whether the same prompt can be retried in this same thread. + /// `false` for non-retryable business 429s, auth, model_unavailable, + /// context_overflow, and billing exhaustion. + #[serde(skip_serializing_if = "Option::is_none")] + pub error_retryable: Option, + /// Milliseconds to wait before retrying, as supplied by the upstream + /// `Retry-After:` / `retry_after:` header. `None` when the upstream + /// didn't supply one or the error class has no retry-after concept. + #[serde(skip_serializing_if = "Option::is_none")] + pub error_retry_after_ms: Option, + /// Provider name extracted from `" API error (...)"` + /// envelopes. `None` for non-provider errors (OpenHuman budget cap, + /// agent loop) and for transport failures without a provider prefix. + #[serde(skip_serializing_if = "Option::is_none")] + pub error_provider: Option, + /// `Some(false)` once the reliable-provider chain has exhausted + /// every configured `model_fallbacks` entry. `None` means "unknown + /// — FE should not promise a fallback". + #[serde(skip_serializing_if = "Option::is_none")] + pub error_fallback_available: Option, /// Name of the tool being called. #[serde(skip_serializing_if = "Option::is_none")] pub tool_name: Option, diff --git a/src/openhuman/channels/proactive.rs b/src/openhuman/channels/proactive.rs index fcc2cd505..22a42e1db 100644 --- a/src/openhuman/channels/proactive.rs +++ b/src/openhuman/channels/proactive.rs @@ -126,6 +126,11 @@ impl EventHandler for ProactiveMessageSubscriber { full_response: Some(message.clone()), message: None, error_type: None, + error_source: None, + error_retryable: None, + error_retry_after_ms: None, + error_provider: None, + error_fallback_available: None, tool_name: None, skill_id: None, args: None, diff --git a/src/openhuman/channels/providers/presentation.rs b/src/openhuman/channels/providers/presentation.rs index 16f57287b..1a1ae3000 100644 --- a/src/openhuman/channels/providers/presentation.rs +++ b/src/openhuman/channels/providers/presentation.rs @@ -52,6 +52,11 @@ pub async fn deliver_response( full_response: Some(full_response.to_string()), message: None, error_type: None, + error_source: None, + error_retryable: None, + error_retry_after_ms: None, + error_provider: None, + error_fallback_available: None, tool_name: None, skill_id: None, args: None, @@ -92,6 +97,11 @@ pub async fn deliver_response( full_response: Some(segment.clone()), message: None, error_type: None, + error_source: None, + error_retryable: None, + error_retry_after_ms: None, + error_provider: None, + error_fallback_available: None, tool_name: None, skill_id: None, args: None, @@ -124,6 +134,11 @@ pub async fn deliver_response( full_response: Some(full_response.to_string()), message: None, error_type: None, + error_source: None, + error_retryable: None, + error_retry_after_ms: None, + error_provider: None, + error_fallback_available: None, tool_name: None, skill_id: None, args: None, diff --git a/src/openhuman/channels/providers/web.rs b/src/openhuman/channels/providers/web.rs index 05a07b30b..5acd031e1 100644 --- a/src/openhuman/channels/providers/web.rs +++ b/src/openhuman/channels/providers/web.rs @@ -223,6 +223,98 @@ fn with_provider_detail(summary: &str, err: &str) -> String { } } +/// Structured chat-error envelope produced by [`classify_inference_error`]. +/// +/// Carries the typed metadata the frontend needs to render a recovery UI +/// (retry-after countdown, retry button, fallback CTA) without having to +/// regex the human-readable `message`. Issue #2606. +/// +/// `error_type` and `message` preserve the wire shape PR #2371 established +/// — existing FE handlers that read those fields keep working. The new +/// fields are additive and `Option`-typed where the value isn't always +/// known at the classifier layer. +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) struct ClassifiedError { + /// Stable token: `rate_limited`, `action_budget_exceeded`, + /// `max_iterations`, `timeout`, `auth_error`, `budget_exhausted`, + /// `provider_error`, `context_overflow`, `model_unavailable`, + /// `inference`. + pub(crate) error_type: &'static str, + /// User-facing copy (already includes provider detail block and the + /// retry-after countdown sentence when available). + pub(crate) message: String, + /// Where the limit originated. One of: + /// - `"provider"` — upstream LLM provider 429 / rate limit + /// - `"openhuman_budget"` — local SecurityPolicy per-hour action cap + /// - `"agent_loop"` — agent ran out of tool iterations + /// - `"openhuman_billing"` — OpenHuman credit/quota exhaustion + /// - `"transport"` — network / DNS / TLS / timeout + /// - `"config"` — auth, model, context, generic + pub(crate) source: &'static str, + /// Can the user retry the same prompt in the same thread? `false` for + /// non-retryable business 429s, auth failures, model_unavailable, + /// context_overflow, and OpenHuman billing exhaustion. + pub(crate) retryable: bool, + /// Milliseconds the upstream asked us to wait. Surfaced verbatim from + /// `Retry-After:` / `retry_after:` headers when present; `None` when + /// the upstream didn't supply one OR the error class doesn't have a + /// concept of retry-after (auth, config, etc.). + pub(crate) retry_after_ms: Option, + /// Provider name extracted from the leading + /// `" API error (...)"` envelope emitted by + /// `inference::provider::ops::api_error`. `None` for non-provider + /// errors (OpenHuman budget cap, agent loop) and for transport + /// failures that don't carry an identifiable provider prefix. + pub(crate) provider: Option, + /// `Some(false)` once the reliable-provider chain has exhausted every + /// configured `model_fallbacks` entry (the aggregate "All + /// providers/models failed" branch). `None` means the classifier + /// can't tell from the error string alone — the FE should treat it + /// as "unknown, don't promise a fallback". + pub(crate) fallback_available: Option, +} + +/// Best-effort extraction of the provider name from an error string. +/// +/// `inference::provider::ops::api_error` formats upstream failures as +/// `" API error (): "`, e.g. +/// `"openrouter API error (429 Too Many Requests): ..."`. We pull the +/// leading word and lowercase it so the wire value is stable across +/// providers' own capitalisation. +/// +/// Returns `None` when: +/// - The error string doesn't carry the `" API error"` infix. +/// - The candidate word contains characters that wouldn't appear in a +/// provider name (slashes, colons, etc. — guards against transport +/// error prefixes that happen to be followed by " API error"). +fn extract_provider_name(err: &str) -> Option { + const INFIX: &str = " API error"; + let idx = err.find(INFIX)?; + let prefix = err[..idx].trim_end(); + let candidate = prefix + .rsplit_once(char::is_whitespace) + .map_or(prefix, |(_, last)| last); + if candidate.is_empty() + || !candidate + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-') + { + return None; + } + Some(candidate.to_ascii_lowercase()) +} + +/// Detect the reliable-provider aggregate that fires once every +/// configured `model_fallbacks` entry has been tried. +/// +/// `reliable.rs::format_failure_aggregate` always opens with +/// `"All providers/models failed. Attempts:"`. When that marker is +/// present the FE should NOT offer a fallback retry — there is none +/// left to try. +fn is_fallback_chain_exhausted(err: &str) -> bool { + err.contains("All providers/models failed") +} + /// Extract a Retry-After / retry_after seconds hint from a free-form /// error string. Mirrors the typed [`crate::openhuman::inference:: /// provider::reliable::parse_retry_after_ms`] helper but operates on @@ -303,8 +395,15 @@ fn is_action_budget_exhausted(err_lower: &str) -> bool { || err_lower.contains("action blocked: rate limit exceeded") } -fn classify_inference_error(err: &str) -> (&'static str, String) { +fn classify_inference_error(err: &str) -> ClassifiedError { let lower = err.to_lowercase(); + let provider = extract_provider_name(err); + let fallback_available = if is_fallback_chain_exhausted(err) { + Some(false) + } else { + None + }; + // Order matters: the SecurityPolicy hourly cap and the // agent-loop max-iterations error both surface as strings that // contain "rate limit" / "iteration", so they MUST be checked @@ -312,83 +411,136 @@ fn classify_inference_error(err: &str) -> (&'static str, String) { // a confusing "your AI provider is rate-limiting you" message // for limits OpenHuman itself enforced (issue #2364). if is_action_budget_exhausted(&lower) { - ( - "action_budget_exceeded", - with_provider_detail( + ClassifiedError { + error_type: "action_budget_exceeded", + message: with_provider_detail( "You've hit OpenHuman's per-hour action budget — this is a local safety cap, \ not your AI provider. The window decays gradually; you can keep chatting in \ this thread and tool-heavy steps will resume as the budget refills.", err, ), - ) + source: "openhuman_budget", + // The window decays gradually so the same thread CAN recover + // — we just can't predict the exact wait. + retryable: true, + retry_after_ms: None, + // OpenHuman's own cap — provider name (if any was in the + // surrounding error chain) is irrelevant; the limit isn't + // from a provider. + provider: None, + fallback_available: None, + } } else if crate::openhuman::agent::error::is_max_iterations_error(err) { - ( - "max_iterations", - with_provider_detail( + ClassifiedError { + error_type: "max_iterations", + message: with_provider_detail( "The agent ran the maximum number of tool steps for one turn without \ finishing. This usually means a tool kept failing (often a rate limit on a \ web fetch). You can retry the same question in this thread once the \ underlying limit clears.", err, ), - ) + source: "agent_loop", + retryable: true, + retry_after_ms: None, + provider, + fallback_available: None, + } } else if lower.contains("rate limit") || lower.contains("429") { - let retry = parse_retry_after_secs_from_str(err); + let retry_secs = parse_retry_after_secs_from_str(err); let summary = format!( "Your AI provider is rate-limiting requests. This is a transient upstream \ limit, not a thread-level block — you can retry in this thread.{}", - retry_after_hint(retry) + retry_after_hint(retry_secs) ); - ("rate_limited", with_provider_detail(summary.as_str(), err)) + // Non-retryable business 429s ("plan does not include", balance + // exhausted, known provider business codes like Z.AI 1311/1113) + // also surface here — mark them non-retryable so the FE can hide + // the "Retry" button and route the user to settings/billing. + let retryable = !is_non_retryable_rate_limit_text(&lower); + ClassifiedError { + error_type: "rate_limited", + message: with_provider_detail(summary.as_str(), err), + source: "provider", + retryable, + retry_after_ms: retry_secs.map(|s| s.saturating_mul(1000)), + provider, + fallback_available, + } } else if lower.contains("timeout") || lower.contains("timed out") { - ( - "timeout", - with_provider_detail( + ClassifiedError { + error_type: "timeout", + message: with_provider_detail( "The request timed out. Please check your connection and try again.", err, ), - ) + source: "transport", + retryable: true, + retry_after_ms: None, + provider, + fallback_available, + } } else if lower.contains("401") || lower.contains("unauthorized") || lower.contains("api key") { - ( - "auth_error", - with_provider_detail( + ClassifiedError { + error_type: "auth_error", + message: with_provider_detail( "There's an authentication issue with the AI provider. Please check your API key in settings.", err, ), - ) + source: "config", + retryable: false, + retry_after_ms: None, + provider, + fallback_available: None, + } } else if lower.contains("402") || lower.contains("payment required") || lower.contains("insufficient balance") { - ( - "budget_exhausted", - with_provider_detail("Insufficient credits. Please top up to continue.", err), - ) + ClassifiedError { + error_type: "budget_exhausted", + message: with_provider_detail("Insufficient credits. Please top up to continue.", err), + source: "openhuman_billing", + retryable: false, + retry_after_ms: None, + provider, + fallback_available: None, + } } else if lower.contains("500") || lower.contains("internal server") || lower.contains("service unavailable") || lower.contains("503") { - ( - "provider_error", - with_provider_detail( + ClassifiedError { + error_type: "provider_error", + message: with_provider_detail( "The AI provider is temporarily unavailable. Please try again later.", err, ), - ) + source: "provider", + retryable: true, + retry_after_ms: None, + provider, + fallback_available, + } } else if lower.contains("context") && (lower.contains("length") || lower.contains("limit") || lower.contains("exceed") || lower.contains("token")) { - ( - "context_overflow", - with_provider_detail( + ClassifiedError { + error_type: "context_overflow", + message: with_provider_detail( "The conversation is too long. Please start a new chat.", err, ), - ) + source: "config", + retryable: false, + retry_after_ms: None, + provider, + fallback_available: None, + } } else if crate::openhuman::inference::provider::is_provider_config_rejection_message(err) { // #2079 / #2076 / #2202: an OpenHuman abstract tier alias leaked to // a custom provider, a stale model pin, or a model-specific @@ -398,33 +550,92 @@ fn classify_inference_error(err: &str) -> (&'static str, String) { // specific "Settings → LLM" remediation instead of the generic // copy. Shared predicate keeps this in lockstep with the // Sentry-demotion classifier. - ( - "model_unavailable", - with_provider_detail( + ClassifiedError { + error_type: "model_unavailable", + message: with_provider_detail( "Your AI provider rejected the request's model or temperature setting. \ Check your model and routing in Settings → LLM.", err, ), - ) + source: "config", + retryable: false, + retry_after_ms: None, + provider, + fallback_available: None, + } } else if lower.contains("model") && (lower.contains("not found") || lower.contains("unavailable") || lower.contains("does not exist") || lower.contains("does not have access")) { - ( - "model_unavailable", - with_provider_detail( + ClassifiedError { + error_type: "model_unavailable", + message: with_provider_detail( "The selected model isn't available on your provider. Check your model settings.", err, ), - ) + source: "config", + retryable: false, + retry_after_ms: None, + provider, + fallback_available: None, + } } else { - ( - "inference", - with_provider_detail(generic_inference_error_user_message(), err), - ) + ClassifiedError { + error_type: "inference", + message: with_provider_detail(generic_inference_error_user_message(), err), + source: "provider", + retryable: true, + retry_after_ms: None, + provider, + fallback_available, + } + } +} + +/// String-flat mirror of +/// [`crate::openhuman::inference::provider::reliable::is_non_retryable_rate_limit`]. +/// +/// The reliable provider already classifies 429s into retryable vs +/// non-retryable based on business-quota markers ("plan does not +/// include", "insufficient balance", Z.AI codes 1311/1113, …) — but +/// that typed `anyhow::Error` is collapsed to a `String` at the +/// native-bus boundary before reaching this layer. We re-detect the +/// same markers in the flattened string so the FE knows whether to +/// offer a "Retry" button. +/// +/// Caller passes the already-lowercased error string to avoid double +/// allocation. +fn is_non_retryable_rate_limit_text(lower: &str) -> bool { + const BUSINESS_HINTS: &[&str] = &[ + "plan does not include", + "doesn't include", + "not include", + "insufficient balance", + "insufficient_balance", + "insufficient quota", + "insufficient_quota", + "quota exhausted", + "out of credits", + "no available package", + "package not active", + "purchase package", + "model not available for your plan", + ]; + if BUSINESS_HINTS.iter().any(|hint| lower.contains(hint)) { + return true; + } + // Known provider business codes observed for 429 where retry is + // futile (mirrors reliable.rs). Scan integer-like tokens. + for token in lower.split(|c: char| !c.is_ascii_digit()) { + if let Ok(code) = token.parse::() { + if matches!(code, 1113 | 1311) { + return true; + } + } } + false } fn prompt_guard_user_message(action: PromptEnforcementAction) -> &'static str { @@ -516,6 +727,11 @@ pub async fn start_chat( full_response: None, message: Some("Cancelled by newer request".to_string()), error_type: Some("cancelled".to_string()), + error_source: None, + error_retryable: None, + error_retry_after_ms: None, + error_provider: None, + error_fallback_available: None, tool_name: None, skill_id: None, args: None, @@ -582,7 +798,8 @@ pub async fn start_chat( "run_chat_task failed client_id={} thread_id={} request_id={} error={}", client_id_task, thread_id_task, request_id_task, err ); - let (classified_type, classified_message) = classify_inference_error(&err); + let classified = classify_inference_error(&err); + let classified_type = classified.error_type; let classified_type_string = classified_type.to_string(); // Max-tool-iterations cap is a deterministic agent-state // outcome surfaced to the user via the existing @@ -630,8 +847,13 @@ pub async fn start_chat( thread_id: thread_id_task.clone(), request_id: request_id_task.clone(), full_response: None, - message: Some(classified_message), + message: Some(classified.message), error_type: Some(classified_type_string), + error_source: Some(classified.source.to_string()), + error_retryable: Some(classified.retryable), + error_retry_after_ms: classified.retry_after_ms, + error_provider: classified.provider, + error_fallback_available: classified.fallback_available, tool_name: None, skill_id: None, args: None, @@ -739,6 +961,11 @@ pub async fn cancel_chat(client_id: &str, thread_id: &str) -> Result> = Lazy::new(|| TokioMutex::new(())); + /// Ensures the test-only forced run_chat_task failure toggle is always reset, /// even if the test panics before reaching explicit cleanup code. struct TestForcedRunChatTaskErrorGuard; @@ -77,6 +89,7 @@ async fn cancel_chat_validates_required_fields() { #[tokio::test] async fn start_chat_emits_sanitized_chat_error_on_inference_failure() { + let _serial = FORCED_ERROR_TEST_LOCK.lock().await; set_test_forced_run_chat_task_error(Some( "error sending request for url (https://internal-api.example.invalid/openai/v1/chat/completions)", )) @@ -168,7 +181,11 @@ fn classify_inference_error_quotes_model_unavailable_detail() { // while still classifying as `model_unavailable` and quoting the // upstream detail. let raw = r#"custom_openai API error (404 Not Found): {"error":{"message":"The model `gpt-5.5` does not exist or you do not have access to it.","code":"model_not_found"}}"#; - let (category, message) = classify_inference_error(raw); + let ClassifiedError { + error_type: category, + message, + .. + } = classify_inference_error(raw); assert_eq!(category, "model_unavailable"); assert!( message.contains("Settings → LLM"), @@ -195,7 +212,11 @@ fn classify_inference_error_surfaces_provider_config_rejection_actionably() { r#"custom_openai API error (400): {"error":{"message":"Model 'claude-opus-4-7' is not available. Use GET /openai/v1/models to list available models."}}"#, ]; for raw in cases { - let (category, message) = classify_inference_error(raw); + let ClassifiedError { + error_type: category, + message, + .. + } = classify_inference_error(raw); assert_eq!( category, "model_unavailable", "config-rejection must classify as model_unavailable, not generic: {raw}" @@ -221,7 +242,11 @@ fn classify_inference_error_distinguishes_action_budget_from_provider_429() { "Rate limit exceeded: too many actions in the last hour", "Action blocked: rate limit exceeded", ] { - let (category, message) = classify_inference_error(raw); + let ClassifiedError { + error_type: category, + message, + .. + } = classify_inference_error(raw); assert_eq!( category, "action_budget_exceeded", "action-budget signal must NOT classify as provider rate_limited: {raw}" @@ -246,7 +271,11 @@ fn classify_inference_error_max_iterations_gets_dedicated_branch() { // specific message that says retrying in the same thread is OK. let raw = "run_chat_task failed client_id=abc thread_id=t1 \ error=Agent exceeded maximum tool iterations (10)"; - let (category, message) = classify_inference_error(raw); + let ClassifiedError { + error_type: category, + message, + .. + } = classify_inference_error(raw); assert_eq!(category, "max_iterations"); assert!( message.contains("maximum number of tool steps"), @@ -261,7 +290,11 @@ fn classify_inference_error_max_iterations_gets_dedicated_branch() { #[test] fn classify_inference_error_rate_limited_surfaces_retry_after_seconds() { let raw = "openrouter API error (429 Too Many Requests): Retry-After: 30"; - let (category, message) = classify_inference_error(raw); + let ClassifiedError { + error_type: category, + message, + .. + } = classify_inference_error(raw); assert_eq!(category, "rate_limited"); assert!( message.contains("Try again in 30 seconds"), @@ -276,7 +309,11 @@ fn classify_inference_error_rate_limited_surfaces_retry_after_seconds() { #[test] fn classify_inference_error_rate_limited_no_retry_after_omits_hint() { let raw = "openrouter API error (429 Too Many Requests)"; - let (category, message) = classify_inference_error(raw); + let ClassifiedError { + error_type: category, + message, + .. + } = classify_inference_error(raw); assert_eq!(category, "rate_limited"); // Generic copy must still describe the situation accurately. assert!(message.contains("transient upstream limit")); @@ -291,7 +328,7 @@ fn classify_inference_error_rate_limited_no_retry_after_omits_hint() { fn classify_inference_error_rate_limited_handles_fractional_and_minute_windows() { // Fractional seconds round up — never tell the user to retry // sooner than the upstream actually allows. - let (_, message) = classify_inference_error("429 Too Many Requests: retry_after: 2.4"); + let message = classify_inference_error("429 Too Many Requests: retry_after: 2.4").message; assert!( message.contains("Try again in 3 seconds"), "fractional 2.4 must round up to 3: {message}" @@ -300,7 +337,7 @@ fn classify_inference_error_rate_limited_handles_fractional_and_minute_windows() // Long windows switch to a "minutes" rendering at the 90s // threshold so the user gets a less precise but more readable // hint. - let (_, message) = classify_inference_error("429 Too Many Requests: Retry-After: 180"); + let message = classify_inference_error("429 Too Many Requests: Retry-After: 180").message; assert!( message.contains("about 3 minutes"), "180s must render as minutes: {message}" @@ -316,19 +353,19 @@ fn classify_inference_error_rate_limited_minute_window_uses_singular_and_rounds_ // 119s (ditto). 60s lands in the seconds band; 61s is the // smallest minute-band input but still <90 so seconds; 90s is // the first true minute-band input. - let (_, m_90) = classify_inference_error("429 Too Many Requests: Retry-After: 90"); + let m_90 = classify_inference_error("429 Too Many Requests: Retry-After: 90").message; assert!( m_90.contains("about 2 minutes"), "90s must round up to 2 minutes (not floor to 1): {m_90}" ); - let (_, m_119) = classify_inference_error("429 Too Many Requests: Retry-After: 119"); + let m_119 = classify_inference_error("429 Too Many Requests: Retry-After: 119").message; assert!( m_119.contains("about 2 minutes"), "119s must round up to 2 minutes: {m_119}" ); // Exactly 60-multiple inputs above the 90s threshold render as // exact minutes with no round-up bump. - let (_, m_120) = classify_inference_error("429 Too Many Requests: Retry-After: 120"); + let m_120 = classify_inference_error("429 Too Many Requests: Retry-After: 120").message; assert!( m_120.contains("about 2 minutes"), "exact 120s must stay as 2 minutes: {m_120}" @@ -342,7 +379,11 @@ fn classify_inference_error_rate_limited_parses_quoted_json_retry_after() { // because the quote stopped `lower.find("retry_after:")` from // matching. The parser now strips quotes so the JSON-key shape // resolves the same as the unquoted header shape. - let (category, message) = classify_inference_error( + let ClassifiedError { + error_type: category, + message, + .. + } = classify_inference_error( r#"openrouter API error (429 Too Many Requests): {"retry_after": 30, "code": "rate_limited"}"#, ); assert_eq!(category, "rate_limited"); @@ -352,6 +393,385 @@ fn classify_inference_error_rate_limited_parses_quoted_json_retry_after() { ); } +// ── Structured rate-limit metadata (issue #2606) ────────────── +// +// The classifier MUST surface the structured fields the frontend +// needs to render a countdown / retry / fallback UI without having +// to regex the message string: +// - retry_after_ms — raw, milliseconds, machine-readable +// - source — "provider" | "openhuman_budget" | "agent_loop" +// - provider — name extracted from upstream string when present +// - retryable — same-thread retry safe? (false for non-retryable 429) +// - fallback_available — Some(false) once the reliable provider has +// exhausted its model_fallbacks chain +// +// These supplement the existing `error_type` token and `message` +// text — they do NOT replace either, and pre-#2371 consumers that +// read only the tuple shape keep working. + +#[test] +fn classify_inference_error_rate_limited_returns_structured_retry_after_ms() { + let raw = "openrouter API error (429 Too Many Requests): Retry-After: 30"; + let classified = classify_inference_error(raw); + assert_eq!(classified.error_type, "rate_limited"); + assert_eq!( + classified.retry_after_ms, + Some(30_000), + "30s Retry-After must surface as 30000ms on the structured payload \ + so the FE can render a countdown without regexing the message: \ + got {:?}", + classified.retry_after_ms + ); + assert_eq!( + classified.source, "provider", + "upstream 429 must classify source=provider, not openhuman_budget" + ); + assert_eq!( + classified.retryable, true, + "transient upstream 429 must allow same-thread retry" + ); + assert_eq!( + classified.provider.as_deref(), + Some("openrouter"), + "provider name must be extracted from the ' API error' \ + prefix: got {:?}", + classified.provider + ); +} + +#[tokio::test] +async fn start_chat_chat_error_event_serializes_structured_fields_to_json_wire() { + let _serial = FORCED_ERROR_TEST_LOCK.lock().await; + // The JSON-RPC SSE endpoint emits chat_error by running + // `serde_json::to_value(&event)` over the WebChannelEvent struct + // (see `core/socketio.rs::emit_web_channel_event`). This pins the + // resulting JSON keys so the frontend contract stays stable: the + // FE reads exactly `error_source`, `error_retryable`, + // `error_retry_after_ms`, `error_provider`, `error_fallback_available` + // off the SSE payload — the same keys our Rust struct serializes + // to with `#[serde(rename_all = "snake_case")]`. + // + // Also asserts the additive contract: when these fields are None + // they MUST be omitted from the JSON (older FE clients that don't + // know about them keep working) — `skip_serializing_if = + // "Option::is_none"` on every new field carries this guarantee. + set_test_forced_run_chat_task_error(Some( + "openrouter API error (429 Too Many Requests): Retry-After: 7", + )) + .await; + let _forced_error_guard = TestForcedRunChatTaskErrorGuard; + + let mut rx = subscribe_web_channel_events(); + let request_id = start_chat( + "wire-shape-client", + "wire-shape-thread", + "Please summarize this in one line.", + None, + None, + None, + None, + ) + .await + .expect("start_chat should accept valid request"); + + let recv = timeout(Duration::from_secs(20), async move { + loop { + let event = rx.recv().await.expect("event stream should stay open"); + if event.event != "chat_error" { + continue; + } + if event.request_id != request_id { + continue; + } + return event; + } + }) + .await + .expect("expected chat_error event for started chat request"); + + let json = serde_json::to_value(&recv).expect("WebChannelEvent must serialize"); + assert_eq!( + json.get("error_type").and_then(|v| v.as_str()), + Some("rate_limited") + ); + assert_eq!( + json.get("error_source").and_then(|v| v.as_str()), + Some("provider") + ); + assert_eq!( + json.get("error_retryable").and_then(|v| v.as_bool()), + Some(true) + ); + assert_eq!( + json.get("error_retry_after_ms").and_then(|v| v.as_u64()), + Some(7_000) + ); + assert_eq!( + json.get("error_provider").and_then(|v| v.as_str()), + Some("openrouter") + ); + // No fallback signal in this error string → field omitted. + assert!( + json.get("error_fallback_available").is_none(), + "None fields MUST be omitted from JSON for additive wire compat: {json}" + ); + + // Pin the additive contract: serializing a default (no error) + // event must NOT introduce any of the new keys. + let empty = crate::core::socketio::WebChannelEvent { + event: "chat_done".to_string(), + ..Default::default() + }; + let empty_json = serde_json::to_value(&empty).expect("Default WebChannelEvent serializes"); + for key in [ + "error_source", + "error_retryable", + "error_retry_after_ms", + "error_provider", + "error_fallback_available", + ] { + assert!( + empty_json.get(key).is_none(), + "{key} must be omitted when None so older FE clients aren't surprised: {empty_json}" + ); + } +} + +#[tokio::test] +async fn start_chat_emits_structured_rate_limit_metadata_on_chat_error_event() { + let _serial = FORCED_ERROR_TEST_LOCK.lock().await; + // End-to-end wire check for issue #2606: when run_chat_task fails + // with a 429-shaped error string, the published `chat_error` + // WebChannelEvent on the bus MUST carry the structured fields the + // FE needs (retry_after_ms, source, provider, retryable) — not + // just the message text. This is the contract older PR #2371 + // landed in *message* form but kept off the wire as structured + // metadata. + set_test_forced_run_chat_task_error(Some( + "openrouter API error (429 Too Many Requests): Retry-After: 30", + )) + .await; + let _forced_error_guard = TestForcedRunChatTaskErrorGuard; + + let mut rx = subscribe_web_channel_events(); + let request_id = start_chat( + "rate-limit-client", + "rate-limit-thread", + "Please summarize this in one line.", + None, + None, + None, + None, + ) + .await + .expect("start_chat should accept valid request"); + + let recv = timeout(Duration::from_secs(20), async move { + loop { + let event = rx.recv().await.expect("event stream should stay open"); + if event.event != "chat_error" { + continue; + } + if event.request_id != request_id { + continue; + } + return event; + } + }) + .await + .expect("expected chat_error event for started chat request"); + + assert_eq!( + recv.error_type.as_deref(), + Some("rate_limited"), + "error_type token unchanged for backward compat" + ); + assert_eq!( + recv.error_source.as_deref(), + Some("provider"), + "upstream 429 must classify as provider source on the wire: {recv:?}" + ); + assert_eq!( + recv.error_retryable, + Some(true), + "transient upstream 429 must be retryable on the wire: {recv:?}" + ); + assert_eq!( + recv.error_retry_after_ms, + Some(30_000), + "30s Retry-After must surface as 30000ms on the wire (FE countdown): \ + {recv:?}" + ); + assert_eq!( + recv.error_provider.as_deref(), + Some("openrouter"), + "provider name must reach the wire so the FE can show \"openrouter is throttling\": \ + {recv:?}" + ); +} + +#[test] +fn classify_inference_error_action_budget_marks_source_openhuman_not_provider() { + // OpenHuman's SecurityPolicy per-hour cap is NOT a provider 429 — + // it's a local safety cap. The structured payload must reflect + // that so the FE doesn't tell the user to switch providers, and + // doesn't promise a fallback (the cap applies to OpenHuman itself). + let raw = "Rate limit exceeded: action budget exhausted"; + let classified = classify_inference_error(raw); + assert_eq!(classified.error_type, "action_budget_exceeded"); + assert_eq!( + classified.source, "openhuman_budget", + "OpenHuman's own per-hour cap must NOT be tagged as a provider source" + ); + assert!( + classified.retryable, + "the budget decays gradually so the same thread CAN retry" + ); + assert_eq!( + classified.retry_after_ms, None, + "the SecurityPolicy decay isn't expressed as a discrete Retry-After" + ); + assert_eq!( + classified.provider, None, + "no upstream provider is implicated — the limit is local" + ); +} + +#[test] +fn classify_inference_error_max_iterations_marks_source_agent_loop_retryable() { + let raw = "Agent exceeded maximum tool iterations (12)"; + let classified = classify_inference_error(raw); + assert_eq!(classified.error_type, "max_iterations"); + assert_eq!( + classified.source, "agent_loop", + "the agent's own iteration cap must be its own source category" + ); + assert!( + classified.retryable, + "user CAN re-ask in the same thread once the underlying limit clears" + ); + assert_eq!(classified.retry_after_ms, None); +} + +#[test] +fn classify_inference_error_non_retryable_429_business_quota_unsets_retry() { + // Business 429s (plan doesn't include the model, balance exhausted, + // known business codes like Z.AI 1311/1113) are 429s in shape but + // retrying is futile until the user changes plan/billing. The FE + // should hide the "Retry" button when retryable=false. + let cases: &[&str] = &[ + "openrouter API error (429): plan does not include this model", + "openai API error (429): insufficient_balance", + "zai API error (429): code 1311 quota exhausted", + "zai API error (429): error code 1113", + ]; + for raw in cases { + let classified = classify_inference_error(raw); + assert_eq!( + classified.error_type, "rate_limited", + "still a 429 by classification: {raw}" + ); + assert!( + !classified.retryable, + "business-quota 429 must NOT be marked retryable: {raw}" + ); + } +} + +#[test] +fn classify_inference_error_retryable_429_keeps_retry_flag() { + // Vanilla transient 429 — retry is the right answer. The FE should + // show the countdown + retry button. + let raw = "openai API error (429 Too Many Requests): Retry-After: 5"; + let classified = classify_inference_error(raw); + assert!( + classified.retryable, + "vanilla transient 429 must remain retryable: {classified:?}" + ); + assert_eq!(classified.retry_after_ms, Some(5_000)); + assert_eq!(classified.source, "provider"); +} + +#[test] +fn classify_inference_error_extracts_provider_name_lowercase() { + // The " API error" envelope from + // inference::provider::ops::api_error is the canonical source we + // pull the provider name from. Lowercased so the wire value is + // stable across providers' own capitalisation. + let cases: &[(&str, Option<&str>)] = &[ + ( + "openrouter API error (429): rate limited", + Some("openrouter"), + ), + ( + "Anthropic API error (429): too many requests", + Some("anthropic"), + ), + ("custom_openai API error (429): {}", Some("custom_openai")), + // No "API error" infix — no extraction (the upstream didn't + // come through the standard wrapper). + ("connect timed out after 30s", None), + ]; + for (raw, want) in cases { + let classified = classify_inference_error(raw); + assert_eq!( + classified.provider.as_deref(), + *want, + "provider extraction mismatch for {raw:?}" + ); + } +} + +#[test] +fn classify_inference_error_fallback_chain_exhausted_sets_false() { + // Once reliable.rs's `format_failure_aggregate` has emitted "All + // providers/models failed", we KNOW no fallback remains. The FE + // must not offer a "Try fallback" CTA in that case. + let raw = "openrouter API error (429): rate limited\nAll providers/models failed. Attempts:\nprovider=openai model=gpt-4 attempt 1/3: rate_limited"; + let classified = classify_inference_error(raw); + assert_eq!( + classified.fallback_available, + Some(false), + "aggregate marker must surface as fallback_available=Some(false): {classified:?}" + ); +} + +#[test] +fn classify_inference_error_no_fallback_signal_means_unknown() { + // Single-provider 429 with no aggregate marker — the classifier + // cannot tell whether a fallback exists. Must surface None + // ("unknown") so the FE doesn't promise something we can't deliver. + let raw = "openrouter API error (429): rate limited"; + let classified = classify_inference_error(raw); + assert_eq!(classified.fallback_available, None); +} + +#[test] +fn classify_inference_error_auth_marks_non_retryable_config_source() { + let raw = "openai API error (401 Unauthorized): invalid api key"; + let classified = classify_inference_error(raw); + assert_eq!(classified.error_type, "auth_error"); + assert_eq!(classified.source, "config"); + assert!( + !classified.retryable, + "401 won't recover until the user updates settings" + ); +} + +#[test] +fn classify_inference_error_billing_402_distinguished_from_provider_429() { + // Acceptance criteria for #2606: distinguish upstream provider + // throttling (429) from OpenHuman budget/billing limits (402). + let raw = "OpenHuman API error (402 Payment Required): top up to continue"; + let classified = classify_inference_error(raw); + assert_eq!(classified.error_type, "budget_exhausted"); + assert_eq!( + classified.source, "openhuman_billing", + "402 must NOT share source with provider 429" + ); + assert!(!classified.retryable); +} + #[test] fn generic_error_copy_is_sanitized_and_has_discord_report_action() { let message = generic_inference_error_user_message();