diff --git a/src/core/socketio.rs b/src/core/socketio.rs
index fe6e899c5..51eb76476 100644
--- a/src/core/socketio.rs
+++ b/src/core/socketio.rs
@@ -113,6 +113,37 @@ pub struct WebChannelEvent {
     /// Type of error, if the event represents a failure.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub error_type: Option<String>,
+    /// Structured rate-limit / error metadata produced by
+    /// `classify_inference_error` (issue #2606). All four fields are
+    /// additive — older FE clients that only read `message`/`error_type`
+    /// keep working; new clients can read these to render countdown,
+    /// retry-button, and fallback-CTA UI without regexing the message.
+    ///
+    /// Where the limit originated:
+    /// `"provider"` | `"openhuman_budget"` | `"agent_loop"`
+    /// | `"openhuman_billing"` | `"transport"` | `"config"`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub error_source: Option<String>,
+    /// Whether the same prompt can be retried in this same thread.
+    /// `false` for non-retryable business 429s, auth, model_unavailable,
+    /// context_overflow, and billing exhaustion.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub error_retryable: Option<bool>,
+    /// Milliseconds to wait before retrying, as supplied by the upstream
+    /// `Retry-After:` / `retry_after:` header. `None` when the upstream
+    /// didn't supply one or the error class has no retry-after concept.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub error_retry_after_ms: Option<u64>,
+    /// Provider name extracted from `"<provider> API error (...)"`
+    /// envelopes. `None` for non-provider errors (OpenHuman budget cap,
+    /// agent loop) and for transport failures without a provider prefix.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub error_provider: Option<String>,
+    /// `Some(false)` once the reliable-provider chain has exhausted
+    /// every configured `model_fallbacks` entry. `None` means "unknown
+    /// — FE should not promise a fallback".
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub error_fallback_available: Option<bool>,
     /// Name of the tool being called.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub tool_name: Option<String>,
diff --git a/src/openhuman/channels/proactive.rs b/src/openhuman/channels/proactive.rs
index fcc2cd505..22a42e1db 100644
--- a/src/openhuman/channels/proactive.rs
+++ b/src/openhuman/channels/proactive.rs
@@ -126,6 +126,11 @@ impl EventHandler for ProactiveMessageSubscriber {
             full_response: Some(message.clone()),
             message: None,
             error_type: None,
+            error_source: None,
+            error_retryable: None,
+            error_retry_after_ms: None,
+            error_provider: None,
+            error_fallback_available: None,
             tool_name: None,
             skill_id: None,
             args: None,
diff --git a/src/openhuman/channels/providers/presentation.rs b/src/openhuman/channels/providers/presentation.rs
index 16f57287b..1a1ae3000 100644
--- a/src/openhuman/channels/providers/presentation.rs
+++ b/src/openhuman/channels/providers/presentation.rs
@@ -52,6 +52,11 @@ pub async fn deliver_response(
             full_response: Some(full_response.to_string()),
             message: None,
             error_type: None,
+            error_source: None,
+            error_retryable: None,
+            error_retry_after_ms: None,
+            error_provider: None,
+            error_fallback_available: None,
             tool_name: None,
             skill_id: None,
             args: None,
@@ -92,6 +97,11 @@ pub async fn deliver_response(
             full_response: Some(segment.clone()),
             message: None,
             error_type: None,
+            error_source: None,
+            error_retryable: None,
+            error_retry_after_ms: None,
+            error_provider: None,
+            error_fallback_available: None,
             tool_name: None,
             skill_id: None,
             args: None,
@@ -124,6 +134,11 @@ pub async fn deliver_response(
         full_response: Some(full_response.to_string()),
         message: None,
         error_type: None,
+        error_source: None,
+        error_retryable: None,
+        error_retry_after_ms: None,
+        error_provider: None,
+        error_fallback_available: None,
         tool_name: None,
         skill_id: None,
         args: None,
diff --git a/src/openhuman/channels/providers/web.rs b/src/openhuman/channels/providers/web.rs
index 05a07b30b..5acd031e1 100644
--- a/src/openhuman/channels/providers/web.rs
+++ b/src/openhuman/channels/providers/web.rs
@@ -223,6 +223,98 @@ fn with_provider_detail(summary: &str, err: &str) -> String {
     }
 }
 
+/// Structured chat-error envelope produced by [`classify_inference_error`].
+///
+/// Carries the typed metadata the frontend needs to render a recovery UI
+/// (retry-after countdown, retry button, fallback CTA) without having to
+/// regex the human-readable `message`. Issue #2606.
+///
+/// `error_type` and `message` preserve the wire shape PR #2371 established
+/// — existing FE handlers that read those fields keep working. The new
+/// fields are additive and `Option`-typed where the value isn't always
+/// known at the classifier layer.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub(crate) struct ClassifiedError {
+    /// Stable token: `rate_limited`, `action_budget_exceeded`,
+    /// `max_iterations`, `timeout`, `auth_error`, `budget_exhausted`,
+    /// `provider_error`, `context_overflow`, `model_unavailable`,
+    /// `inference`.
+    pub(crate) error_type: &'static str,
+    /// User-facing copy (already includes provider detail block and the
+    /// retry-after countdown sentence when available).
+    pub(crate) message: String,
+    /// Where the limit originated. One of:
+    /// - `"provider"`         — upstream LLM provider 429 / rate limit
+    /// - `"openhuman_budget"` — local SecurityPolicy per-hour action cap
+    /// - `"agent_loop"`       — agent ran out of tool iterations
+    /// - `"openhuman_billing"` — OpenHuman credit/quota exhaustion
+    /// - `"transport"`        — network / DNS / TLS / timeout
+    /// - `"config"`           — auth, model, context, generic
+    pub(crate) source: &'static str,
+    /// Can the user retry the same prompt in the same thread? `false` for
+    /// non-retryable business 429s, auth failures, model_unavailable,
+    /// context_overflow, and OpenHuman billing exhaustion.
+    pub(crate) retryable: bool,
+    /// Milliseconds the upstream asked us to wait. Surfaced verbatim from
+    /// `Retry-After:` / `retry_after:` headers when present; `None` when
+    /// the upstream didn't supply one OR the error class doesn't have a
+    /// concept of retry-after (auth, config, etc.).
+    pub(crate) retry_after_ms: Option<u64>,
+    /// Provider name extracted from the leading
+    /// `"<provider> API error (...)"` envelope emitted by
+    /// `inference::provider::ops::api_error`. `None` for non-provider
+    /// errors (OpenHuman budget cap, agent loop) and for transport
+    /// failures that don't carry an identifiable provider prefix.
+    pub(crate) provider: Option<String>,
+    /// `Some(false)` once the reliable-provider chain has exhausted every
+    /// configured `model_fallbacks` entry (the aggregate "All
+    /// providers/models failed" branch). `None` means the classifier
+    /// can't tell from the error string alone — the FE should treat it
+    /// as "unknown, don't promise a fallback".
+    pub(crate) fallback_available: Option<bool>,
+}
+
+/// Best-effort extraction of the provider name from an error string.
+///
+/// `inference::provider::ops::api_error` formats upstream failures as
+/// `"<provider> API error (<status>): <body>"`, e.g.
+/// `"openrouter API error (429 Too Many Requests): ..."`. We pull the
+/// leading word and lowercase it so the wire value is stable across
+/// providers' own capitalisation.
+///
+/// Returns `None` when:
+/// - The error string doesn't carry the `" API error"` infix.
+/// - The candidate word contains characters that wouldn't appear in a
+///   provider name (slashes, colons, etc. — guards against transport
+///   error prefixes that happen to be followed by " API error").
+fn extract_provider_name(err: &str) -> Option<String> {
+    const INFIX: &str = " API error";
+    let idx = err.find(INFIX)?;
+    let prefix = err[..idx].trim_end();
+    let candidate = prefix
+        .rsplit_once(char::is_whitespace)
+        .map_or(prefix, |(_, last)| last);
+    if candidate.is_empty()
+        || !candidate
+            .chars()
+            .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
+    {
+        return None;
+    }
+    Some(candidate.to_ascii_lowercase())
+}
+
+/// Detect the reliable-provider aggregate that fires once every
+/// configured `model_fallbacks` entry has been tried.
+///
+/// `reliable.rs::format_failure_aggregate` always opens with
+/// `"All providers/models failed. Attempts:"`. When that marker is
+/// present the FE should NOT offer a fallback retry — there is none
+/// left to try.
+fn is_fallback_chain_exhausted(err: &str) -> bool {
+    err.contains("All providers/models failed")
+}
+
 /// Extract a Retry-After / retry_after seconds hint from a free-form
 /// error string. Mirrors the typed [`crate::openhuman::inference::
 /// provider::reliable::parse_retry_after_ms`] helper but operates on
@@ -303,8 +395,15 @@ fn is_action_budget_exhausted(err_lower: &str) -> bool {
         || err_lower.contains("action blocked: rate limit exceeded")
 }
 
-fn classify_inference_error(err: &str) -> (&'static str, String) {
+fn classify_inference_error(err: &str) -> ClassifiedError {
     let lower = err.to_lowercase();
+    let provider = extract_provider_name(err);
+    let fallback_available = if is_fallback_chain_exhausted(err) {
+        Some(false)
+    } else {
+        None
+    };
+
     // Order matters: the SecurityPolicy hourly cap and the
     // agent-loop max-iterations error both surface as strings that
     // contain "rate limit" / "iteration", so they MUST be checked
@@ -312,83 +411,136 @@ fn classify_inference_error(err: &str) -> (&'static str, String) {
     // a confusing "your AI provider is rate-limiting you" message
     // for limits OpenHuman itself enforced (issue #2364).
     if is_action_budget_exhausted(&lower) {
-        (
-            "action_budget_exceeded",
-            with_provider_detail(
+        ClassifiedError {
+            error_type: "action_budget_exceeded",
+            message: with_provider_detail(
                 "You've hit OpenHuman's per-hour action budget — this is a local safety cap, \
                  not your AI provider. The window decays gradually; you can keep chatting in \
                  this thread and tool-heavy steps will resume as the budget refills.",
                 err,
             ),
-        )
+            source: "openhuman_budget",
+            // The window decays gradually so the same thread CAN recover
+            // — we just can't predict the exact wait.
+            retryable: true,
+            retry_after_ms: None,
+            // OpenHuman's own cap — provider name (if any was in the
+            // surrounding error chain) is irrelevant; the limit isn't
+            // from a provider.
+            provider: None,
+            fallback_available: None,
+        }
     } else if crate::openhuman::agent::error::is_max_iterations_error(err) {
-        (
-            "max_iterations",
-            with_provider_detail(
+        ClassifiedError {
+            error_type: "max_iterations",
+            message: with_provider_detail(
                 "The agent ran the maximum number of tool steps for one turn without \
                  finishing. This usually means a tool kept failing (often a rate limit on a \
                  web fetch). You can retry the same question in this thread once the \
                  underlying limit clears.",
                 err,
             ),
-        )
+            source: "agent_loop",
+            retryable: true,
+            retry_after_ms: None,
+            provider,
+            fallback_available: None,
+        }
     } else if lower.contains("rate limit") || lower.contains("429") {
-        let retry = parse_retry_after_secs_from_str(err);
+        let retry_secs = parse_retry_after_secs_from_str(err);
         let summary = format!(
             "Your AI provider is rate-limiting requests. This is a transient upstream \
              limit, not a thread-level block — you can retry in this thread.{}",
-            retry_after_hint(retry)
+            retry_after_hint(retry_secs)
         );
-        ("rate_limited", with_provider_detail(summary.as_str(), err))
+        // Non-retryable business 429s ("plan does not include", balance
+        // exhausted, known provider business codes like Z.AI 1311/1113)
+        // also surface here — mark them non-retryable so the FE can hide
+        // the "Retry" button and route the user to settings/billing.
+        let retryable = !is_non_retryable_rate_limit_text(&lower);
+        ClassifiedError {
+            error_type: "rate_limited",
+            message: with_provider_detail(summary.as_str(), err),
+            source: "provider",
+            retryable,
+            retry_after_ms: retry_secs.map(|s| s.saturating_mul(1000)),
+            provider,
+            fallback_available,
+        }
     } else if lower.contains("timeout") || lower.contains("timed out") {
-        (
-            "timeout",
-            with_provider_detail(
+        ClassifiedError {
+            error_type: "timeout",
+            message: with_provider_detail(
                 "The request timed out. Please check your connection and try again.",
                 err,
             ),
-        )
+            source: "transport",
+            retryable: true,
+            retry_after_ms: None,
+            provider,
+            fallback_available,
+        }
     } else if lower.contains("401") || lower.contains("unauthorized") || lower.contains("api key") {
-        (
-            "auth_error",
-            with_provider_detail(
+        ClassifiedError {
+            error_type: "auth_error",
+            message: with_provider_detail(
                 "There's an authentication issue with the AI provider. Please check your API key in settings.",
                 err,
             ),
-        )
+            source: "config",
+            retryable: false,
+            retry_after_ms: None,
+            provider,
+            fallback_available: None,
+        }
     } else if lower.contains("402")
         || lower.contains("payment required")
         || lower.contains("insufficient balance")
     {
-        (
-            "budget_exhausted",
-            with_provider_detail("Insufficient credits. Please top up to continue.", err),
-        )
+        ClassifiedError {
+            error_type: "budget_exhausted",
+            message: with_provider_detail("Insufficient credits. Please top up to continue.", err),
+            source: "openhuman_billing",
+            retryable: false,
+            retry_after_ms: None,
+            provider,
+            fallback_available: None,
+        }
     } else if lower.contains("500")
         || lower.contains("internal server")
         || lower.contains("service unavailable")
         || lower.contains("503")
     {
-        (
-            "provider_error",
-            with_provider_detail(
+        ClassifiedError {
+            error_type: "provider_error",
+            message: with_provider_detail(
                 "The AI provider is temporarily unavailable. Please try again later.",
                 err,
             ),
-        )
+            source: "provider",
+            retryable: true,
+            retry_after_ms: None,
+            provider,
+            fallback_available,
+        }
     } else if lower.contains("context")
         && (lower.contains("length")
             || lower.contains("limit")
             || lower.contains("exceed")
             || lower.contains("token"))
     {
-        (
-            "context_overflow",
-            with_provider_detail(
+        ClassifiedError {
+            error_type: "context_overflow",
+            message: with_provider_detail(
                 "The conversation is too long. Please start a new chat.",
                 err,
             ),
-        )
+            source: "config",
+            retryable: false,
+            retry_after_ms: None,
+            provider,
+            fallback_available: None,
+        }
     } else if crate::openhuman::inference::provider::is_provider_config_rejection_message(err) {
         // #2079 / #2076 / #2202: an OpenHuman abstract tier alias leaked to
         // a custom provider, a stale model pin, or a model-specific
@@ -398,33 +550,92 @@ fn classify_inference_error(err: &str) -> (&'static str, String) {
         // specific "Settings → LLM" remediation instead of the generic
         // copy. Shared predicate keeps this in lockstep with the
         // Sentry-demotion classifier.
-        (
-            "model_unavailable",
-            with_provider_detail(
+        ClassifiedError {
+            error_type: "model_unavailable",
+            message: with_provider_detail(
                 "Your AI provider rejected the request's model or temperature setting. \
                  Check your model and routing in Settings → LLM.",
                 err,
             ),
-        )
+            source: "config",
+            retryable: false,
+            retry_after_ms: None,
+            provider,
+            fallback_available: None,
+        }
     } else if lower.contains("model")
         && (lower.contains("not found")
             || lower.contains("unavailable")
             || lower.contains("does not exist")
             || lower.contains("does not have access"))
     {
-        (
-            "model_unavailable",
-            with_provider_detail(
+        ClassifiedError {
+            error_type: "model_unavailable",
+            message: with_provider_detail(
                 "The selected model isn't available on your provider. Check your model settings.",
                 err,
             ),
-        )
+            source: "config",
+            retryable: false,
+            retry_after_ms: None,
+            provider,
+            fallback_available: None,
+        }
     } else {
-        (
-            "inference",
-            with_provider_detail(generic_inference_error_user_message(), err),
-        )
+        ClassifiedError {
+            error_type: "inference",
+            message: with_provider_detail(generic_inference_error_user_message(), err),
+            source: "provider",
+            retryable: true,
+            retry_after_ms: None,
+            provider,
+            fallback_available,
+        }
+    }
+}
+
+/// String-flat mirror of
+/// [`crate::openhuman::inference::provider::reliable::is_non_retryable_rate_limit`].
+///
+/// The reliable provider already classifies 429s into retryable vs
+/// non-retryable based on business-quota markers ("plan does not
+/// include", "insufficient balance", Z.AI codes 1311/1113, …) — but
+/// that typed `anyhow::Error` is collapsed to a `String` at the
+/// native-bus boundary before reaching this layer. We re-detect the
+/// same markers in the flattened string so the FE knows whether to
+/// offer a "Retry" button.
+///
+/// Caller passes the already-lowercased error string to avoid double
+/// allocation.
+fn is_non_retryable_rate_limit_text(lower: &str) -> bool {
+    const BUSINESS_HINTS: &[&str] = &[
+        "plan does not include",
+        "doesn't include",
+        "not include",
+        "insufficient balance",
+        "insufficient_balance",
+        "insufficient quota",
+        "insufficient_quota",
+        "quota exhausted",
+        "out of credits",
+        "no available package",
+        "package not active",
+        "purchase package",
+        "model not available for your plan",
+    ];
+    if BUSINESS_HINTS.iter().any(|hint| lower.contains(hint)) {
+        return true;
+    }
+    // Known provider business codes observed for 429 where retry is
+    // futile (mirrors reliable.rs). Scan integer-like tokens.
+    for token in lower.split(|c: char| !c.is_ascii_digit()) {
+        if let Ok(code) = token.parse::<u16>() {
+            if matches!(code, 1113 | 1311) {
+                return true;
+            }
+        }
     }
+    false
 }
 
 fn prompt_guard_user_message(action: PromptEnforcementAction) -> &'static str {
@@ -516,6 +727,11 @@ pub async fn start_chat(
                 full_response: None,
                 message: Some("Cancelled by newer request".to_string()),
                 error_type: Some("cancelled".to_string()),
+                error_source: None,
+                error_retryable: None,
+                error_retry_after_ms: None,
+                error_provider: None,
+                error_fallback_available: None,
                 tool_name: None,
                 skill_id: None,
                 args: None,
@@ -582,7 +798,8 @@ pub async fn start_chat(
                     "run_chat_task failed client_id={} thread_id={} request_id={} error={}",
                     client_id_task, thread_id_task, request_id_task, err
                 );
-                let (classified_type, classified_message) = classify_inference_error(&err);
+                let classified = classify_inference_error(&err);
+                let classified_type = classified.error_type;
                 let classified_type_string = classified_type.to_string();
                 // Max-tool-iterations cap is a deterministic agent-state
                 // outcome surfaced to the user via the existing
@@ -630,8 +847,13 @@ pub async fn start_chat(
                     thread_id: thread_id_task.clone(),
                     request_id: request_id_task.clone(),
                     full_response: None,
-                    message: Some(classified_message),
+                    message: Some(classified.message),
                     error_type: Some(classified_type_string),
+                    error_source: Some(classified.source.to_string()),
+                    error_retryable: Some(classified.retryable),
+                    error_retry_after_ms: classified.retry_after_ms,
+                    error_provider: classified.provider,
+                    error_fallback_available: classified.fallback_available,
                     tool_name: None,
                     skill_id: None,
                     args: None,
@@ -739,6 +961,11 @@ pub async fn cancel_chat(client_id: &str, thread_id: &str) -> Result<Option<Stri
             full_response: None,
             message: Some("Cancelled".to_string()),
             error_type: Some("cancelled".to_string()),
+            error_source: None,
+            error_retryable: None,
+            error_retry_after_ms: None,
+            error_provider: None,
+            error_fallback_available: None,
             tool_name: None,
             skill_id: None,
             args: None,
@@ -1099,6 +1326,11 @@ fn spawn_progress_bridge(
                         full_response: None,
                         message: None,
                         error_type: None,
+                        error_source: None,
+                        error_retryable: None,
+                        error_retry_after_ms: None,
+                        error_provider: None,
+                        error_fallback_available: None,
                         tool_name: None,
                         skill_id: None,
                         args: None,
@@ -1129,6 +1361,11 @@ fn spawn_progress_bridge(
                         full_response: None,
                         message: Some(format!("Iteration {iteration}/{max_iterations}")),
                         error_type: None,
+                        error_source: None,
+                        error_retryable: None,
+                        error_retry_after_ms: None,
+                        error_provider: None,
+                        error_fallback_available: None,
                         tool_name: None,
                         skill_id: None,
                         args: None,
diff --git a/src/openhuman/channels/providers/web_tests.rs b/src/openhuman/channels/providers/web_tests.rs
index 4d6a25189..4bd9bdd68 100644
--- a/src/openhuman/channels/providers/web_tests.rs
+++ b/src/openhuman/channels/providers/web_tests.rs
@@ -5,11 +5,23 @@ use super::{
     inference_budget_exceeded_user_message, is_inference_budget_exceeded_error, json_output,
     key_for, locale_reply_directive, normalize_model_override, optional_f64, optional_string,
     provider_role_for_model_override, required_string, schemas,
-    set_test_forced_run_chat_task_error, start_chat, subscribe_web_channel_events,
+    set_test_forced_run_chat_task_error, start_chat, subscribe_web_channel_events, ClassifiedError,
 };
 use crate::core::TypeSchema;
+use once_cell::sync::Lazy;
+use tokio::sync::Mutex as TokioMutex;
 use tokio::time::{timeout, Duration};
 
+/// Serializes every test that drives `start_chat` with a
+/// `TEST_FORCED_RUN_CHAT_TASK_ERROR` toggle. The toggle and the
+/// per-thread session cache are process-global, so two such tests
+/// running concurrently can clobber each other's forced error before
+/// `run_chat_task` reads it — leading to flaky asserts where one test
+/// observes another test's error string. Holding this mutex for the
+/// duration of each test body restores isolation without disabling
+/// `cargo test`'s default parallelism for the rest of the suite.
+static FORCED_ERROR_TEST_LOCK: Lazy<TokioMutex<()>> = Lazy::new(|| TokioMutex::new(()));
+
 /// Ensures the test-only forced run_chat_task failure toggle is always reset,
 /// even if the test panics before reaching explicit cleanup code.
 struct TestForcedRunChatTaskErrorGuard;
@@ -77,6 +89,7 @@ async fn cancel_chat_validates_required_fields() {
 
 #[tokio::test]
 async fn start_chat_emits_sanitized_chat_error_on_inference_failure() {
+    let _serial = FORCED_ERROR_TEST_LOCK.lock().await;
     set_test_forced_run_chat_task_error(Some(
         "error sending request for url (https://internal-api.example.invalid/openai/v1/chat/completions)",
     ))
@@ -168,7 +181,11 @@ fn classify_inference_error_quotes_model_unavailable_detail() {
     // while still classifying as `model_unavailable` and quoting the
     // upstream detail.
     let raw = r#"custom_openai API error (404 Not Found): {"error":{"message":"The model `gpt-5.5` does not exist or you do not have access to it.","code":"model_not_found"}}"#;
-    let (category, message) = classify_inference_error(raw);
+    let ClassifiedError {
+        error_type: category,
+        message,
+        ..
+    } = classify_inference_error(raw);
     assert_eq!(category, "model_unavailable");
     assert!(
         message.contains("Settings → LLM"),
@@ -195,7 +212,11 @@ fn classify_inference_error_surfaces_provider_config_rejection_actionably() {
         r#"custom_openai API error (400): {"error":{"message":"Model 'claude-opus-4-7' is not available. Use GET /openai/v1/models to list available models."}}"#,
     ];
     for raw in cases {
-        let (category, message) = classify_inference_error(raw);
+        let ClassifiedError {
+            error_type: category,
+            message,
+            ..
+        } = classify_inference_error(raw);
         assert_eq!(
             category, "model_unavailable",
             "config-rejection must classify as model_unavailable, not generic: {raw}"
@@ -221,7 +242,11 @@ fn classify_inference_error_distinguishes_action_budget_from_provider_429() {
         "Rate limit exceeded: too many actions in the last hour",
         "Action blocked: rate limit exceeded",
     ] {
-        let (category, message) = classify_inference_error(raw);
+        let ClassifiedError {
+            error_type: category,
+            message,
+            ..
+        } = classify_inference_error(raw);
         assert_eq!(
             category, "action_budget_exceeded",
             "action-budget signal must NOT classify as provider rate_limited: {raw}"
@@ -246,7 +271,11 @@ fn classify_inference_error_max_iterations_gets_dedicated_branch() {
     // specific message that says retrying in the same thread is OK.
     let raw = "run_chat_task failed client_id=abc thread_id=t1 \
                error=Agent exceeded maximum tool iterations (10)";
-    let (category, message) = classify_inference_error(raw);
+    let ClassifiedError {
+        error_type: category,
+        message,
+        ..
+    } = classify_inference_error(raw);
     assert_eq!(category, "max_iterations");
     assert!(
         message.contains("maximum number of tool steps"),
@@ -261,7 +290,11 @@ fn classify_inference_error_max_iterations_gets_dedicated_branch() {
 #[test]
 fn classify_inference_error_rate_limited_surfaces_retry_after_seconds() {
     let raw = "openrouter API error (429 Too Many Requests): Retry-After: 30";
-    let (category, message) = classify_inference_error(raw);
+    let ClassifiedError {
+        error_type: category,
+        message,
+        ..
+    } = classify_inference_error(raw);
     assert_eq!(category, "rate_limited");
     assert!(
         message.contains("Try again in 30 seconds"),
@@ -276,7 +309,11 @@ fn classify_inference_error_rate_limited_surfaces_retry_after_seconds() {
 #[test]
 fn classify_inference_error_rate_limited_no_retry_after_omits_hint() {
     let raw = "openrouter API error (429 Too Many Requests)";
-    let (category, message) = classify_inference_error(raw);
+    let ClassifiedError {
+        error_type: category,
+        message,
+        ..
+    } = classify_inference_error(raw);
     assert_eq!(category, "rate_limited");
     // Generic copy must still describe the situation accurately.
     assert!(message.contains("transient upstream limit"));
@@ -291,7 +328,7 @@ fn classify_inference_error_rate_limited_no_retry_after_omits_hint() {
 fn classify_inference_error_rate_limited_handles_fractional_and_minute_windows() {
     // Fractional seconds round up — never tell the user to retry
     // sooner than the upstream actually allows.
-    let (_, message) = classify_inference_error("429 Too Many Requests: retry_after: 2.4");
+    let message = classify_inference_error("429 Too Many Requests: retry_after: 2.4").message;
     assert!(
         message.contains("Try again in 3 seconds"),
         "fractional 2.4 must round up to 3: {message}"
@@ -300,7 +337,7 @@ fn classify_inference_error_rate_limited_handles_fractional_and_minute_windows()
     // Long windows switch to a "minutes" rendering at the 90s
     // threshold so the user gets a less precise but more readable
     // hint.
-    let (_, message) = classify_inference_error("429 Too Many Requests: Retry-After: 180");
+    let message = classify_inference_error("429 Too Many Requests: Retry-After: 180").message;
     assert!(
         message.contains("about 3 minutes"),
         "180s must render as minutes: {message}"
@@ -316,19 +353,19 @@ fn classify_inference_error_rate_limited_minute_window_uses_singular_and_rounds_
     // 119s (ditto). 60s lands in the seconds band; 61s is the
     // smallest minute-band input but still <90 so seconds; 90s is
     // the first true minute-band input.
-    let (_, m_90) = classify_inference_error("429 Too Many Requests: Retry-After: 90");
+    let m_90 = classify_inference_error("429 Too Many Requests: Retry-After: 90").message;
     assert!(
         m_90.contains("about 2 minutes"),
         "90s must round up to 2 minutes (not floor to 1): {m_90}"
     );
-    let (_, m_119) = classify_inference_error("429 Too Many Requests: Retry-After: 119");
+    let m_119 = classify_inference_error("429 Too Many Requests: Retry-After: 119").message;
     assert!(
         m_119.contains("about 2 minutes"),
         "119s must round up to 2 minutes: {m_119}"
     );
     // Exactly 60-multiple inputs above the 90s threshold render as
     // exact minutes with no round-up bump.
-    let (_, m_120) = classify_inference_error("429 Too Many Requests: Retry-After: 120");
+    let m_120 = classify_inference_error("429 Too Many Requests: Retry-After: 120").message;
     assert!(
         m_120.contains("about 2 minutes"),
         "exact 120s must stay as 2 minutes: {m_120}"
@@ -342,7 +379,11 @@ fn classify_inference_error_rate_limited_parses_quoted_json_retry_after() {
     // because the quote stopped `lower.find("retry_after:")` from
     // matching. The parser now strips quotes so the JSON-key shape
     // resolves the same as the unquoted header shape.
-    let (category, message) = classify_inference_error(
+    let ClassifiedError {
+        error_type: category,
+        message,
+        ..
+    } = classify_inference_error(
         r#"openrouter API error (429 Too Many Requests): {"retry_after": 30, "code": "rate_limited"}"#,
     );
     assert_eq!(category, "rate_limited");
@@ -352,6 +393,385 @@ fn classify_inference_error_rate_limited_parses_quoted_json_retry_after() {
     );
 }
 
+// ── Structured rate-limit metadata (issue #2606) ──────────────
+//
+// The classifier MUST surface the structured fields the frontend
+// needs to render a countdown / retry / fallback UI without having
+// to regex the message string:
+//   - retry_after_ms — raw, milliseconds, machine-readable
+//   - source         — "provider" | "openhuman_budget" | "agent_loop"
+//   - provider       — name extracted from upstream string when present
+//   - retryable      — same-thread retry safe? (false for non-retryable 429)
+//   - fallback_available — Some(false) once the reliable provider has
+//                          exhausted its model_fallbacks chain
+//
+// These supplement the existing `error_type` token and `message`
+// text — they do NOT replace either, and pre-#2371 consumers that
+// read only the tuple shape keep working.
+
+#[test]
+fn classify_inference_error_rate_limited_returns_structured_retry_after_ms() {
+    let raw = "openrouter API error (429 Too Many Requests): Retry-After: 30";
+    let classified = classify_inference_error(raw);
+    assert_eq!(classified.error_type, "rate_limited");
+    assert_eq!(
+        classified.retry_after_ms,
+        Some(30_000),
+        "30s Retry-After must surface as 30000ms on the structured payload \
+         so the FE can render a countdown without regexing the message: \
+         got {:?}",
+        classified.retry_after_ms
+    );
+    assert_eq!(
+        classified.source, "provider",
+        "upstream 429 must classify source=provider, not openhuman_budget"
+    );
+    assert_eq!(
+        classified.retryable, true,
+        "transient upstream 429 must allow same-thread retry"
+    );
+    assert_eq!(
+        classified.provider.as_deref(),
+        Some("openrouter"),
+        "provider name must be extracted from the '<provider> API error' \
+         prefix: got {:?}",
+        classified.provider
+    );
+}
+
+#[tokio::test]
+async fn start_chat_chat_error_event_serializes_structured_fields_to_json_wire() {
+    let _serial = FORCED_ERROR_TEST_LOCK.lock().await;
+    // The JSON-RPC SSE endpoint emits chat_error by running
+    // `serde_json::to_value(&event)` over the WebChannelEvent struct
+    // (see `core/socketio.rs::emit_web_channel_event`). This pins the
+    // resulting JSON keys so the frontend contract stays stable: the
+    // FE reads exactly `error_source`, `error_retryable`,
+    // `error_retry_after_ms`, `error_provider`, `error_fallback_available`
+    // off the SSE payload — the same keys our Rust struct serializes
+    // to with `#[serde(rename_all = "snake_case")]`.
+    //
+    // Also asserts the additive contract: when these fields are None
+    // they MUST be omitted from the JSON (older FE clients that don't
+    // know about them keep working) — `skip_serializing_if =
+    // "Option::is_none"` on every new field carries this guarantee.
+    set_test_forced_run_chat_task_error(Some(
+        "openrouter API error (429 Too Many Requests): Retry-After: 7",
+    ))
+    .await;
+    let _forced_error_guard = TestForcedRunChatTaskErrorGuard;
+
+    let mut rx = subscribe_web_channel_events();
+    let request_id = start_chat(
+        "wire-shape-client",
+        "wire-shape-thread",
+        "Please summarize this in one line.",
+        None,
+        None,
+        None,
+        None,
+    )
+    .await
+    .expect("start_chat should accept valid request");
+
+    let recv = timeout(Duration::from_secs(20), async move {
+        loop {
+            let event = rx.recv().await.expect("event stream should stay open");
+            if event.event != "chat_error" {
+                continue;
+            }
+            if event.request_id != request_id {
+                continue;
+            }
+            return event;
+        }
+    })
+    .await
+    .expect("expected chat_error event for started chat request");
+
+    let json = serde_json::to_value(&recv).expect("WebChannelEvent must serialize");
+    assert_eq!(
+        json.get("error_type").and_then(|v| v.as_str()),
+        Some("rate_limited")
+    );
+    assert_eq!(
+        json.get("error_source").and_then(|v| v.as_str()),
+        Some("provider")
+    );
+    assert_eq!(
+        json.get("error_retryable").and_then(|v| v.as_bool()),
+        Some(true)
+    );
+    assert_eq!(
+        json.get("error_retry_after_ms").and_then(|v| v.as_u64()),
+        Some(7_000)
+    );
+    assert_eq!(
+        json.get("error_provider").and_then(|v| v.as_str()),
+        Some("openrouter")
+    );
+    // No fallback signal in this error string → field omitted.
+    assert!(
+        json.get("error_fallback_available").is_none(),
+        "None fields MUST be omitted from JSON for additive wire compat: {json}"
+    );
+
+    // Pin the additive contract: serializing a default (no error)
+    // event must NOT introduce any of the new keys.
+    let empty = crate::core::socketio::WebChannelEvent {
+        event: "chat_done".to_string(),
+        ..Default::default()
+    };
+    let empty_json = serde_json::to_value(&empty).expect("Default WebChannelEvent serializes");
+    for key in [
+        "error_source",
+        "error_retryable",
+        "error_retry_after_ms",
+        "error_provider",
+        "error_fallback_available",
+    ] {
+        assert!(
+            empty_json.get(key).is_none(),
+            "{key} must be omitted when None so older FE clients aren't surprised: {empty_json}"
+        );
+    }
+}
+
+#[tokio::test]
+async fn start_chat_emits_structured_rate_limit_metadata_on_chat_error_event() {
+    let _serial = FORCED_ERROR_TEST_LOCK.lock().await;
+    // End-to-end wire check for issue #2606: when run_chat_task fails
+    // with a 429-shaped error string, the published `chat_error`
+    // WebChannelEvent on the bus MUST carry the structured fields the
+    // FE needs (retry_after_ms, source, provider, retryable) — not
+    // just the message text. This is the contract older PR #2371
+    // landed in *message* form but kept off the wire as structured
+    // metadata.
+    set_test_forced_run_chat_task_error(Some(
+        "openrouter API error (429 Too Many Requests): Retry-After: 30",
+    ))
+    .await;
+    let _forced_error_guard = TestForcedRunChatTaskErrorGuard;
+
+    let mut rx = subscribe_web_channel_events();
+    let request_id = start_chat(
+        "rate-limit-client",
+        "rate-limit-thread",
+        "Please summarize this in one line.",
+        None,
+        None,
+        None,
+        None,
+    )
+    .await
+    .expect("start_chat should accept valid request");
+
+    let recv = timeout(Duration::from_secs(20), async move {
+        loop {
+            let event = rx.recv().await.expect("event stream should stay open");
+            if event.event != "chat_error" {
+                continue;
+            }
+            if event.request_id != request_id {
+                continue;
+            }
+            return event;
+        }
+    })
+    .await
+    .expect("expected chat_error event for started chat request");
+
+    assert_eq!(
+        recv.error_type.as_deref(),
+        Some("rate_limited"),
+        "error_type token unchanged for backward compat"
+    );
+    assert_eq!(
+        recv.error_source.as_deref(),
+        Some("provider"),
+        "upstream 429 must classify as provider source on the wire: {recv:?}"
+    );
+    assert_eq!(
+        recv.error_retryable,
+        Some(true),
+        "transient upstream 429 must be retryable on the wire: {recv:?}"
+    );
+    assert_eq!(
+        recv.error_retry_after_ms,
+        Some(30_000),
+        "30s Retry-After must surface as 30000ms on the wire (FE countdown): \
+         {recv:?}"
+    );
+    assert_eq!(
+        recv.error_provider.as_deref(),
+        Some("openrouter"),
+        "provider name must reach the wire so the FE can show \"openrouter is throttling\": \
+         {recv:?}"
+    );
+}
+
+#[test]
+fn classify_inference_error_action_budget_marks_source_openhuman_not_provider() {
+    // OpenHuman's SecurityPolicy per-hour cap is NOT a provider 429 —
+    // it's a local safety cap. The structured payload must reflect
+    // that so the FE doesn't tell the user to switch providers, and
+    // doesn't promise a fallback (the cap applies to OpenHuman itself).
+    let raw = "Rate limit exceeded: action budget exhausted";
+    let classified = classify_inference_error(raw);
+    assert_eq!(classified.error_type, "action_budget_exceeded");
+    assert_eq!(
+        classified.source, "openhuman_budget",
+        "OpenHuman's own per-hour cap must NOT be tagged as a provider source"
+    );
+    assert!(
+        classified.retryable,
+        "the budget decays gradually so the same thread CAN retry"
+    );
+    assert_eq!(
+        classified.retry_after_ms, None,
+        "the SecurityPolicy decay isn't expressed as a discrete Retry-After"
+    );
+    assert_eq!(
+        classified.provider, None,
+        "no upstream provider is implicated — the limit is local"
+    );
+}
+
+#[test]
+fn classify_inference_error_max_iterations_marks_source_agent_loop_retryable() {
+    let raw = "Agent exceeded maximum tool iterations (12)";
+    let classified = classify_inference_error(raw);
+    assert_eq!(classified.error_type, "max_iterations");
+    assert_eq!(
+        classified.source, "agent_loop",
+        "the agent's own iteration cap must be its own source category"
+    );
+    assert!(
+        classified.retryable,
+        "user CAN re-ask in the same thread once the underlying limit clears"
+    );
+    assert_eq!(classified.retry_after_ms, None);
+}
+
+#[test]
+fn classify_inference_error_non_retryable_429_business_quota_unsets_retry() {
+    // Business 429s (plan doesn't include the model, balance exhausted,
+    // known business codes like Z.AI 1311/1113) are 429s in shape but
+    // retrying is futile until the user changes plan/billing. The FE
+    // should hide the "Retry" button when retryable=false.
+    let cases: &[&str] = &[
+        "openrouter API error (429): plan does not include this model",
+        "openai API error (429): insufficient_balance",
+        "zai API error (429): code 1311 quota exhausted",
+        "zai API error (429): error code 1113",
+    ];
+    for raw in cases {
+        let classified = classify_inference_error(raw);
+        assert_eq!(
+            classified.error_type, "rate_limited",
+            "still a 429 by classification: {raw}"
+        );
+        assert!(
+            !classified.retryable,
+            "business-quota 429 must NOT be marked retryable: {raw}"
+        );
+    }
+}
+
+#[test]
+fn classify_inference_error_retryable_429_keeps_retry_flag() {
+    // Vanilla transient 429 — retry is the right answer. The FE should
+    // show the countdown + retry button.
+    let raw = "openai API error (429 Too Many Requests): Retry-After: 5";
+    let classified = classify_inference_error(raw);
+    assert!(
+        classified.retryable,
+        "vanilla transient 429 must remain retryable: {classified:?}"
+    );
+    assert_eq!(classified.retry_after_ms, Some(5_000));
+    assert_eq!(classified.source, "provider");
+}
+
+#[test]
+fn classify_inference_error_extracts_provider_name_lowercase() {
+    // The "<provider> API error" envelope from
+    // inference::provider::ops::api_error is the canonical source we
+    // pull the provider name from. Lowercased so the wire value is
+    // stable across providers' own capitalisation.
+    let cases: &[(&str, Option<&str>)] = &[
+        (
+            "openrouter API error (429): rate limited",
+            Some("openrouter"),
+        ),
+        (
+            "Anthropic API error (429): too many requests",
+            Some("anthropic"),
+        ),
+        ("custom_openai API error (429): {}", Some("custom_openai")),
+        // No "API error" infix — no extraction (the upstream didn't
+        // come through the standard wrapper).
+        ("connect timed out after 30s", None),
+    ];
+    for (raw, want) in cases {
+        let classified = classify_inference_error(raw);
+        assert_eq!(
+            classified.provider.as_deref(),
+            *want,
+            "provider extraction mismatch for {raw:?}"
+        );
+    }
+}
+
+#[test]
+fn classify_inference_error_fallback_chain_exhausted_sets_false() {
+    // Once reliable.rs's `format_failure_aggregate` has emitted "All
+    // providers/models failed", we KNOW no fallback remains. The FE
+    // must not offer a "Try fallback" CTA in that case.
+    let raw = "openrouter API error (429): rate limited\nAll providers/models failed. Attempts:\nprovider=openai model=gpt-4 attempt 1/3: rate_limited";
+    let classified = classify_inference_error(raw);
+    assert_eq!(
+        classified.fallback_available,
+        Some(false),
+        "aggregate marker must surface as fallback_available=Some(false): {classified:?}"
+    );
+}
+
+#[test]
+fn classify_inference_error_no_fallback_signal_means_unknown() {
+    // Single-provider 429 with no aggregate marker — the classifier
+    // cannot tell whether a fallback exists. Must surface None
+    // ("unknown") so the FE doesn't promise something we can't deliver.
+    let raw = "openrouter API error (429): rate limited";
+    let classified = classify_inference_error(raw);
+    assert_eq!(classified.fallback_available, None);
+}
+
+#[test]
+fn classify_inference_error_auth_marks_non_retryable_config_source() {
+    let raw = "openai API error (401 Unauthorized): invalid api key";
+    let classified = classify_inference_error(raw);
+    assert_eq!(classified.error_type, "auth_error");
+    assert_eq!(classified.source, "config");
+    assert!(
+        !classified.retryable,
+        "401 won't recover until the user updates settings"
+    );
+}
+
+#[test]
+fn classify_inference_error_billing_402_distinguished_from_provider_429() {
+    // Acceptance criteria for #2606: distinguish upstream provider
+    // throttling (429) from OpenHuman budget/billing limits (402).
+    let raw = "OpenHuman API error (402 Payment Required): top up to continue";
+    let classified = classify_inference_error(raw);
+    assert_eq!(classified.error_type, "budget_exhausted");
+    assert_eq!(
+        classified.source, "openhuman_billing",
+        "402 must NOT share source with provider 429"
+    );
+    assert!(!classified.retryable);
+}
+
 #[test]
 fn generic_error_copy_is_sanitized_and_has_discord_report_action() {
     let message = generic_inference_error_user_message();