From 34ed335b9e26219b8e66ce5bde8d8975151aaac4 Mon Sep 17 00:00:00 2001 From: "cyrus@tinyhumans.ai" Date: Fri, 29 May 2026 22:06:09 +0530 Subject: [PATCH 1/7] revert: remove all Sentry error suppression from PRs #2813 #2850 #2899 #2906 #2915 #2923 #2924 #2928 #2930 #2934 #2937 #2939 These PRs suppressed/demoted errors instead of fixing root causes. Errors will now properly fire Sentry events again so they can be tracked and fixed with proper root-cause solutions. --- src/core/observability.rs | 848 +----------------- src/main.rs | 52 -- src/openhuman/inference/ops.rs | 42 +- src/openhuman/inference/ops_tests.rs | 70 -- .../inference/provider/config_rejection.rs | 226 +---- src/openhuman/inference/provider/ops.rs | 176 +--- src/openhuman/memory_store/factories.rs | 35 +- src/openhuman/subconscious/store.rs | 14 +- 8 files changed, 55 insertions(+), 1408 deletions(-) diff --git a/src/core/observability.rs b/src/core/observability.rs index 3bcb564e02..9d46e77203 100644 --- a/src/core/observability.rs +++ b/src/core/observability.rs @@ -181,26 +181,6 @@ pub enum ExpectedErrorKind { /// deliberately not matched because that's an `rm -rf` invariant /// violation, not user input. FilesystemUserPathInvalid, - /// A memory-store write (document upsert or KV set) was rejected because - /// the namespace or key contained what the PII guard classified as a - /// personal identifier (national ID, phone number, formatted credential, - /// etc.). The guard fires *before* the write reaches SQLite so no data - /// is persisted, and the LLM or caller that triggered the write already - /// receives the error string. Sentry has no remediation path — the fix - /// is either a less aggressive namespace/key choice from the caller or a - /// PII-guard allowlist update — and the volume is high from a single user - /// (TAURI-RUST-54T: 915 events, escalating), indicating that the guard - /// is flagging false positives on valid channel names or usernames used - /// as namespace/key identifiers. Demote to `warn` so the breadcrumb - /// survives for local diagnosis but Sentry sees no error event. - /// - /// Canonical wire shapes (from `memory_store/unified/documents.rs` and - /// `memory_store/kv.rs`): - /// - /// - `"document namespace/key cannot contain personal identifiers"` - /// - `"kv key cannot contain personal identifiers"` - /// - `"kv namespace/key cannot contain personal identifiers"` - MemoryStorePiiRejection, /// The provider/model completed a turn with a completely empty body /// (`text_chars=0 thinking_chars=0 tool_calls=0`), so the agent harness /// bailed with the user-facing `"The model returned an empty response. @@ -253,17 +233,7 @@ pub fn expected_error_kind(message: &str) -> Option { if lower.contains("local ai is disabled") { return Some(ExpectedErrorKind::LocalAiDisabled); } - // `_api_key is not configured` catches backend-reported environment variable - // phrases like `VOYAGE_API_KEY is not configured` and - // `COHERE_API_KEY is not configured` returned by the embeddings backend - // when the relevant env var is absent (TAURI-RUST-2H5, ~5 K events). - // The `_api_key` anchor (lower-cased suffix of an env-var name) keeps - // generic "X is not configured" prose from being silenced — only - // ALL_CAPS_API_KEY-style names match. - if lower.contains("api key not set") - || lower.contains("missing api key") - || lower.contains("_api_key is not configured") - { + if lower.contains("api key not set") || lower.contains("missing api key") { return Some(ExpectedErrorKind::ApiKeyMissing); } // Check `ChannelSupervisorRestart` BEFORE `is_loopback_unavailable` and @@ -370,9 +340,6 @@ pub fn expected_error_kind(message: &str) -> Option { if is_config_load_timed_out_message(&lower) { return Some(ExpectedErrorKind::ConfigLoadTimedOut); } - if is_memory_store_pii_rejection(&lower) { - return Some(ExpectedErrorKind::MemoryStorePiiRejection); - } // Empty-provider-response re-report from the web-channel layer. Runs // last so an earlier, more specific matcher always wins. See the // variant doc-comment and [`is_empty_provider_response_message`] for @@ -389,86 +356,9 @@ pub fn expected_error_kind(message: &str) -> Option { if is_filesystem_user_path_invalid_message(&lower) { return Some(ExpectedErrorKind::FilesystemUserPathInvalid); } - // Upstream rate-limit responses — provider throttles the account (429) or - // wraps the 429 inside an HTTP 500 (`"429 rate limit exceeded"` in the - // body). In both cases the reliable-provider layer already retries with - // backoff, and the embeddings path has a proactive token-bucket limiter - // (`embeddings::rate_limit`). The upstream quota is an account-capacity - // signal, not a code bug — Sentry has no remediation path and the - // per-attempt events generate pure noise (OPENHUMAN-TAURI-S: ~6 984 - // events from HTTP 500 wrapping a "429 rate limit exceeded" body; - // OPENHUMAN-TAURI-6Y: ~19 849 events from direct 429s; OPENHUMAN-TAURI-2E: - // ~1 482 events carrying a `"rate_limit_error"` type in the JSON body; - // OPENHUMAN-TAURI-RQ: ~741 events from the embeddings path). - // - // Checked LAST inside `expected_error_kind` — transient HTTP status matches - // (`is_transient_upstream_http_message`) are already caught by the earlier - // arm, so this arm only adds coverage for the 500-wrapping-429 body shape - // and provider JSON envelopes that name the error type explicitly. - if is_upstream_rate_limit_message(&lower) { - return Some(ExpectedErrorKind::TransientUpstreamHttp); - } None } -/// Detect upstream rate-limit error bodies that bubble up from any provider -/// or embedding API call site. -/// -/// Covers three observed wire shapes: -/// -/// 1. **OpenAI / Anthropic JSON body** — `"rate_limit_error"` is the `"type"` -/// field in the structured error object: -/// `{"error":{"message":"Rate limit exceeded.","type":"rate_limit_error"}}` -/// (OPENHUMAN-TAURI-2E / -RQ). -/// -/// 2. **OpenHuman backend wrapping upstream** — `"Upstream rate limit exceeded -/// for model 'summarization-v1'. Please retry shortly."` embedded in a 500 -/// response body (OPENHUMAN-TAURI-6Y / -7H). -/// -/// 3. **Plain phrase** — `"429 rate limit exceeded, please try again later"` / -/// `"rate limit exceeded"` from any other upstream (OPENHUMAN-TAURI-S). -/// -/// The match is against the full lowercased error string (including any -/// caller wrapping prefix), so it survives `agent.run_single` / `rpc.invoke_method` -/// re-reports as well as the original call-site emit. -/// -/// **Polarity contract**: this predicate is *inclusive* — it returns `true` -/// only for messages that are unambiguously rate-limit throttle signals. It -/// must NOT match unrelated errors that incidentally mention "limit" or "rate" -/// (e.g. action-budget `"Rate limit exceeded: action budget exhausted"` -/// from `security::policy` — distinguished by the `"action budget"` anchor). -pub fn is_upstream_rate_limit_message(lower: &str) -> bool { - // `"rate_limit_error"` is the structured error type from OpenAI / Anthropic - // compatible APIs. Tight anchor — colons and underscores don't appear in - // ordinary log text. - if lower.contains("rate_limit_error") { - return true; - } - // `"upstream rate limit exceeded"` is the OpenHuman backend's own phrase - // when it wraps an upstream provider 429 as an HTTP 500. - if lower.contains("upstream rate limit exceeded") { - return true; - } - // `"429 rate limit exceeded"` is the numeric-prefix form emitted by some - // backends (e.g. OPENHUMAN-TAURI-S: `"error":"429 rate limit exceeded"`). - // Anchored on the `"429 rate limit"` substring so a plain `"rate limit - // exceeded"` mention (which could appear in the `security::policy` action- - // budget message) is NOT matched here — the next arm handles clean phrase - // matches only when scoped by a provider API error prefix. - if lower.contains("429 rate limit") { - return true; - } - // `"rate limit exceeded"` on its own is matched ONLY when it appears inside - // a canonical provider API error envelope (`"api error ("` prefix from - // `ops::api_error` / `embeddings::openai`). This keeps the security::policy - // `"Rate limit exceeded: action budget exhausted"` message from being - // silently swallowed — that phrase does not carry an API error prefix. - if lower.contains("api error (") && lower.contains("rate limit exceeded") { - return true; - } - false -} - /// Detect filesystem-out-of-space errors that bubble up from any syscall /// (`open`, `write`, `mkdir`, `rename`). Three platform-stable renderings: /// @@ -583,18 +473,6 @@ fn is_memory_store_breaker_open(lower: &str) -> bool { /// `"session JWT required"` — local pre-flight guards that fire when the /// stored profile is empty (`#1465`-ish onboarding spam) or has been /// cleared by a previous 401 cycle. Both shapes are OpenHuman-specific. -/// - `"backend rejected session token on GET /payments/stripe/currentPlan"` and -/// all analogous `"{METHOD} {path}"` variants — the `BackendApiError::Unauthorized` -/// typed error surfaced by `api::rest::BackendOAuthClient::authed_json` when any -/// OpenHuman REST endpoint returns HTTP 401. The `get_authed_value` wrapper in -/// `billing::ops` stringifies this via `.to_string()`, producing the -/// `"backend rejected session token on …"` prefix. This is uniquely scoped to -/// the `BackendApiError::Unauthorized` variant (the phrase does not appear in -/// any third-party provider error path) so it is safe to classify as session -/// expiry without the conjunctive-anchor guard pattern needed for `"Invalid -/// token"`. Targets TAURI-RUST-E (~1 437 events from -/// `openhuman.billing_get_current_plan` polling on every background billing -/// refresh cycle after the user's JWT lapses). /// /// At the JSON-RPC dispatch boundary the same strict match controls /// `DomainEvent::SessionExpired` publication, so downstream/provider 401s stay @@ -607,14 +485,6 @@ pub fn is_session_expired_message(msg: &str) -> bool { || msg.contains("SESSION_EXPIRED") || (msg.contains("OpenHuman API error (401") && msg.contains("\"error\":\"Invalid token\"")) || (msg.contains("Embedding API error (401") && msg.contains("\"error\":\"Invalid token\"")) - // TAURI-RUST-E — billing endpoint 401s via `BackendApiError::Unauthorized` - // stringified by `billing::ops::get_authed_value(..).map_err(|e| e.to_string())`. - // The display form is `"backend rejected session token on {METHOD} {path}"`; - // the phrase is uniquely scoped to `BackendApiError::Unauthorized` so no - // conjunctive guard is needed. Covers all billing RPC methods - // (billing_get_current_plan, billing_get_balance, etc.) and any other - // `authed_json` caller that stringifies via `.to_string()`. - || lower.contains("backend rejected session token") // OPENHUMAN-TAURI-4P0 — OpenHuman backend's "Invalid token" 401 // envelope. Both anchors must be present: the OpenHuman-scoped // `"OpenHuman API error (401"` prefix (so a third-party provider's @@ -1118,28 +988,8 @@ fn is_provider_user_state_message(lower: &str) -> bool { // // Drops Sentry TAURI-RUST-X9 (~15.7 k events / ~22 h, single user, // release openhuman@0.54.0+c25fc8e5fd3e). - // - // TAURI-RUST-322 (#2929): same direct-mode path but the Composio v3 - // `/connected_accounts` API returns HTTP 403 instead of 401. This - // happens when the BYO API key exists and is syntactically valid but - // does not carry the `connected_accounts:read` permission (e.g. a - // scoped or legacy key). Wire shape: - // - // `[composio-direct] list_connections failed: Composio v3 - // connected_accounts failed: HTTP 403` - // - // 403 from Composio v3 is a user-state condition (key permissions), - // not a bug in openhuman_core. Sentry has no remediation path — the - // user must regenerate their key with the correct scopes on - // app.composio.dev. The polling layer retries every 5 s and the UI - // already surfaces the error; flooding Sentry with 1,000+ events per - // user adds no signal. - // - // Drops Sentry TAURI-RUST-322 (1,021 events, multi-release). if lower.contains("[composio-direct]") - && (lower.contains("http 401") - || lower.contains("http 403") - || lower.contains("invalid api key")) + && (lower.contains("http 401") || lower.contains("invalid api key")) { return true; } @@ -1264,31 +1114,6 @@ fn is_filesystem_user_path_invalid_message(lower: &str) -> bool { || lower.contains("hosted path is not a directory:") } -/// Detect memory-store writes rejected because the namespace or key contained -/// a personal identifier detected by the PII guard. -/// -/// The three canonical wire shapes are emitted by -/// `memory_store/unified/documents.rs` and `memory_store/kv.rs`: -/// -/// - `"document namespace/key cannot contain personal identifiers"` — -/// `upsert_document` / `upsert_document_metadata_only` -/// - `"kv key cannot contain personal identifiers"` — `kv_set_global` -/// - `"kv namespace/key cannot contain personal identifiers"` — `kv_set_namespace` -/// -/// These are expected user-content conditions: the PII guard classifies a -/// channel name, username, or LLM-generated key as a personal identifier and -/// rejects the write. The LLM or caller already receives the error message; -/// Sentry has no remediation path. Drops TAURI-RUST-54T (~915 events, -/// escalating — all from a single user hitting false positives on valid -/// namespace/key identifiers). -/// -/// Anchor on `"cannot contain personal identifiers"` — the exact string -/// shared by all three sites — so typos or future rewordings that drop the -/// anchor still reach Sentry until explicitly classified. -fn is_memory_store_pii_rejection(lower: &str) -> bool { - lower.contains("cannot contain personal identifiers") -} - /// Detect the agent harness's empty-provider-response bail. /// /// Anchored on the literal user-facing string emitted at @@ -1618,20 +1443,6 @@ fn report_expected_message(kind: ExpectedErrorKind, message: &str, domain: &str, "[observability] {domain}.{operation} skipped expected filesystem path validation error" ); } - ExpectedErrorKind::MemoryStorePiiRejection => { - // PII guard rejected a memory-store write because the namespace or - // key was classified as containing a personal identifier. The guard - // already logs a `[memory:safety]` warn at the write site; this - // match arm keeps the diagnostic breadcrumb at warn level (not - // error) so local log files retain the context without spawning a - // Sentry error event. TAURI-RUST-54T (~915 events from one user). - tracing::warn!( - domain = domain, - operation = operation, - kind = "memory_store_pii_rejection", - "[observability] {domain}.{operation} skipped expected memory-store PII rejection" - ); - } ExpectedErrorKind::EmptyProviderResponse => { // Model/user-config condition — the provider returned a // completely empty body and the agent harness bailed with the @@ -2020,83 +1831,25 @@ pub fn is_transient_message_failure(msg: &str) -> bool { /// Returns true when a Sentry event is a budget-exhausted 400 that should be /// dropped from `before_send`. /// -/// **Two-tier match — either tier fires a drop:** -/// -/// 1. **Tag-gated path** (primary suppression, added in PR #1633): -/// - tag `failure == "non_2xx"` -/// - tag `status == "400"` -/// - event message or any exception value contains one of the tight -/// budget-exhaustion phrases from -/// [`crate::openhuman::inference::provider::is_budget_exhausted_message`] -/// -/// 2. **Text-only path** (defense-in-depth, covers OPENHUMAN-CORE-N / -/// TAURI-RUST-1P — GitHub issue #2935): -/// - event message or any exception value contains the exact phrase -/// `"Insufficient budget"` (the literal wire body the OpenHuman API -/// returns in `{"success":false,"error":"Insufficient budget"}`), -/// regardless of which tags are set. -/// -/// The text-only tier is intentionally tighter than tier 1 — it only -/// matches the exact phrase the backend uses, never the looser -/// `"add credits"` / `"budget exceeded"` phrases that might appear in -/// unrelated product copy. This prevents future call sites that call -/// `report_error` without setting `failure` / `status` tags (or that -/// invoke `sentry::capture_message` directly) from leaking budget -/// exhaustion events to Sentry. -/// -/// Note: `domain` is intentionally not gated here so a future re-emitter -/// under a different domain tag still gets filtered. -pub fn is_budget_event(event: &sentry::protocol::Event<'_>) -> bool { - // Tier 1 — tag-gated primary path. - let tags = &event.tags; - if tags.get("failure").map(String::as_str) == Some("non_2xx") - && tags.get("status").map(String::as_str) == Some("400") - && event_contains_budget_exhausted_message(event) - { - return true; - } - // Tier 2 — text-only defense-in-depth: drop any event whose message or - // exception value contains the exact wire phrase the OpenHuman backend - // emits for budget exhaustion, regardless of tags. - event_contains_budget_insufficient_phrase(event) -} - -/// Defense-in-depth `before_send` filter for Sentry event CORE-RUST-EK -/// (~827 events): every call to the cloud embedding API (OpenAI -/// `text-embedding-3-large` or Voyage) that returns HTTP 401 fires a Sentry -/// error event via `report_error_or_expected` in -/// `src/openhuman/embeddings/openai.rs`. -/// -/// 401 on the embedding call path means the configured API key is stale or -/// invalid. This is the same class of condition as the VOYAGE_API_KEY-missing -/// error (PR #2915) and the billing-expired 401 (PR #2924): the user's LLM -/// session was interrupted by an auth failure that the core will retry on the -/// next turn, but the Sentry volume-per-key ratio yields zero actionable signal. -/// /// Match criteria (all required): -/// - tag `domain == "embeddings"` — pins the filter to the embeddings call -/// path and avoids silencing unrelated 401s from provider chat, billing, or -/// the backend RPC layer -/// - tag `failure == "non_2xx"` — the marker set by `embeddings::openai::embed` -/// at the non-2xx path -/// - tag `status == "401"` — narrows to auth-rejection failures (429 / 500 -/// are handled by the existing rate-limit filter) +/// - tag `failure == "non_2xx"` +/// - tag `status == "400"` +/// - the event message or any exception value contains one of the tight +/// budget-exhaustion phrases /// -/// The primary suppression for the OpenHuman-backend "Invalid token" shape -/// already lives in `expected_error_kind` / `is_session_expired_message`. This -/// filter is defense-in-depth: it catches any third-party provider 401 that -/// doesn't carry the OpenHuman-backend body (e.g. OpenAI's -/// `{"error":{"code":"invalid_api_key",...}}`), ensuring CORE-RUST-EK stays -/// off Sentry regardless of which embedding provider is configured. -pub fn is_embeddings_api_key_401_event(event: &sentry::protocol::Event<'_>) -> bool { +/// Note: `domain` is intentionally not gated here as defense-in-depth over +/// the emit-site classifier — any non_2xx/400 event that carries the +/// budget-exhausted phrasing is dropped regardless of which domain produced +/// it, so a future re-emitter under a different tag still gets filtered. +pub fn is_budget_event(event: &sentry::protocol::Event<'_>) -> bool { let tags = &event.tags; - if tags.get("domain").map(String::as_str) != Some("embeddings") { + if tags.get("failure").map(String::as_str) != Some("non_2xx") { return false; } - if tags.get("failure").map(String::as_str) != Some("non_2xx") { + if tags.get("status").map(String::as_str) != Some("400") { return false; } - tags.get("status").map(String::as_str) == Some("401") + event_contains_budget_exhausted_message(event) } /// 404 on PATCH/DELETE to a channel-message path is an expected backend state @@ -2158,30 +1911,6 @@ fn event_contains_budget_exhausted_message(event: &sentry::protocol::Event<'_>) }) } -/// Tier-2 (text-only) budget check for [`is_budget_event`]. -/// -/// Matches the exact literal phrase `"Insufficient budget"` (case-insensitive) -/// in the event message or any exception value. This is the wire body the -/// OpenHuman backend returns: `{"success":false,"error":"Insufficient budget"}`. -/// -/// Deliberately narrower than [`event_contains_budget_exhausted_message`]: -/// only the precise backend phrase is matched, never the looser -/// `"add credits"` / `"budget exceeded"` / `"insufficient balance"` phrases -/// which might appear in unrelated product copy that would otherwise produce -/// false positives. -fn event_contains_budget_insufficient_phrase(event: &sentry::protocol::Event<'_>) -> bool { - const PHRASE: &str = "insufficient budget"; - let has_phrase = |s: &str| s.to_ascii_lowercase().contains(PHRASE); - if event.message.as_deref().is_some_and(has_phrase) { - return true; - } - event - .exception - .values - .iter() - .any(|exc| exc.value.as_deref().is_some_and(has_phrase)) -} - #[cfg(test)] mod tests { use super::*; @@ -2271,40 +2000,12 @@ mod tests { ); } - #[test] - fn classifies_backend_env_api_key_not_configured() { - // TAURI-RUST-2H5 (~5 K events): backend embedding endpoint returns a - // 400 with `{"success":false,"error":"VOYAGE_API_KEY is not configured"}` - // whenever the backend env var is absent. This is a known server-side - // config state, not an app error — silence it the same way we silence - // other `ApiKeyMissing` variants. - for raw in [ - r#"Embedding API error (400 Bad Request): {"success":false,"error":"VOYAGE_API_KEY is not configured"}"#, - r#"Embedding API error 400 Bad Request: {"success":false,"error":"VOYAGE_API_KEY is not configured"}"#, - // Future-proof: same shape for any other backend-managed embedder. - r#"Embedding API error (400 Bad Request): {"success":false,"error":"COHERE_API_KEY is not configured"}"#, - ] { - assert_eq!( - expected_error_kind(raw), - Some(ExpectedErrorKind::ApiKeyMissing), - "should classify backend env api-key missing: {raw}" - ); - } - } - #[test] fn does_not_classify_unrelated_is_not_configured_messages() { - // The `_api_key` anchor must keep prose that merely says "is not - // configured" from being silenced — only env-var-style key names - // should match. assert_eq!( expected_error_kind("workspace path is not configured for this user"), None ); - assert_eq!( - expected_error_kind("embedding model is not configured"), - None - ); assert_eq!( expected_error_kind("provider 'voyage' is not configured in settings"), None @@ -2678,36 +2379,6 @@ mod tests { } } - #[test] - fn classifies_memory_store_pii_rejection_errors() { - // TAURI-RUST-54T: ~915 events from one user where the PII guard - // rejected memory-store writes on namespace/key values that look like - // personal identifiers. All three canonical wire shapes — from - // `documents.rs` (upsert_document / upsert_document_metadata_only) - // and `kv.rs` (kv_set_global / kv_set_namespace) — must classify as - // expected so they stop reaching Sentry. - for raw in [ - "document namespace/key cannot contain personal identifiers", - "kv key cannot contain personal identifiers", - "kv namespace/key cannot contain personal identifiers", - ] { - assert_eq!( - expected_error_kind(raw), - Some(ExpectedErrorKind::MemoryStorePiiRejection), - "should classify as memory-store PII rejection: {raw}" - ); - } - - // Wrapped by the RPC dispatch layer — substring match must survive the - // `rpc.invoke_method failed: ` prefix that `jsonrpc.rs` prepends. - assert_eq!( - expected_error_kind( - "rpc.invoke_method failed: document namespace/key cannot contain personal identifiers" - ), - Some(ExpectedErrorKind::MemoryStorePiiRejection) - ); - } - #[test] fn classifies_memory_store_breaker_open() { // TAURI-RUST-52X (~455 events on self-hosted Sentry): the chunk-store @@ -2833,24 +2504,6 @@ mod tests { } } - #[test] - fn does_not_classify_unrelated_messages_as_memory_pii_rejection() { - // A generic "personal identifiers" mention without the "cannot contain" - // anchor must not be silenced. - assert_eq!( - expected_error_kind("processing personal identifiers"), - None, - "must not match a bare 'personal identifiers' mention" - ); - // The secret-rejection variant uses different wording and must not be - // swallowed by the PII classifier. - assert_eq!( - expected_error_kind("document namespace/key cannot contain secrets"), - None, - "secret rejection must remain unclassified" - ); - } - #[test] fn does_not_classify_unrelated_breaker_messages() { // Generic "circuit breaker open" without the `[memory_tree]` anchor @@ -2868,132 +2521,6 @@ mod tests { ); } - // ── Upstream rate-limit suppression (OPENHUMAN-TAURI-S / -6Y / -2E / -RQ) ─ - - /// Canonical Anthropic / OpenAI body with a structured `"rate_limit_error"` - /// type — OPENHUMAN-TAURI-2E (~1 482 events) and -RQ (~741 events). - #[test] - fn classifies_rate_limit_error_type_as_transient() { - for raw in [ - // Direct 429 from the embeddings path (OPENHUMAN-TAURI-RQ): - r#"Embedding API error (429 Too Many Requests): {"error":{"message":"Rate limit exceeded. Please retry after a brief wait.","type":"rate_limit_error"}}"#, - // Via llm_provider.api_error (OPENHUMAN-TAURI-2E): - r#"[observability] llm_provider.api_error failed: OpenHuman API error (429 Too Many Requests): {"error":{"message":"Rate limit exceeded. Please retry after a brief wait.","type":"rate_limit_error"}}"#, - // Re-reported by agent.run_single: - r#"run_chat_task failed client_id=abc thread_id=t1 request_id=r1 error=OpenHuman API error (429 Too Many Requests): {"error":{"message":"Rate limit exceeded.","type":"rate_limit_error"}}"#, - ] { - assert_eq!( - expected_error_kind(raw), - Some(ExpectedErrorKind::TransientUpstreamHttp), - "should classify rate_limit_error body as transient: {raw}" - ); - } - } - - /// OpenHuman backend wrapping an upstream 429 as HTTP 500 with a - /// `"upstream rate limit exceeded"` body — OPENHUMAN-TAURI-6Y (~19 849 - /// events). - #[test] - fn classifies_upstream_rate_limit_in_500_body_as_transient() { - for raw in [ - r#"OpenHuman API error (500 Internal Server Error): {"success":false,"error":"Upstream rate limit exceeded for model 'summarization-v1'. Please retry shortly."}"#, - r#"[observability] llm_provider.api_error failed: OpenHuman API error (500 Internal Server Error): {"success":false,"error":"Upstream rate limit exceeded for model 'summarization-v1'. Please retry shortly.","details":{"provider":"gmi","upstreamModel":"deepseek-ai/DeepSeek-V3-0324"}}"#, - // Re-wrapped by rpc.invoke_method: - r#"rpc.invoke_method failed: LLM summarisation failed: OpenHuman API error (500 Internal Server Error): {"success":false,"error":"Upstream rate limit exceeded for model 'summarization-v1'."}"#, - ] { - assert_eq!( - expected_error_kind(raw), - Some(ExpectedErrorKind::TransientUpstreamHttp), - "should classify upstream-rate-limit-in-500 as transient: {raw}" - ); - } - } - - /// Backend returning HTTP 500 with a numeric `"429 rate limit exceeded"` - /// body — OPENHUMAN-TAURI-S (~6 984 events). - #[test] - fn classifies_429_rate_limit_in_500_body_as_transient() { - for raw in [ - r#"OpenHuman API error (500 Internal Server Error): {"success":false,"error":"429 rate limit exceeded, please try again later"}"#, - r#"[observability] llm_provider.api_error failed: OpenHuman API error (500 Internal Server Error): {"success":false,"error":"429 rate limit exceeded, please try again later"}"#, - ] { - assert_eq!( - expected_error_kind(raw), - Some(ExpectedErrorKind::TransientUpstreamHttp), - "should classify 429-in-500-body as transient: {raw}" - ); - } - } - - /// The security::policy `"Rate limit exceeded: action budget exhausted"` - /// must NOT be silenced — it's a user-facing hard stop, not a transient - /// upstream quota hit. - #[test] - fn does_not_classify_security_policy_rate_limit_as_transient() { - let msg = "Rate limit exceeded: action budget exhausted (0 actions/hour). \ - Increase the limit in Settings -> Advanced -> Agent autonomy"; - assert_eq!( - expected_error_kind(msg), - None, - "security policy action-budget error must reach Sentry: {msg}" - ); - // Wrapped by rpc.invoke_method — the prefix must not accidentally - // trigger the `api error (` anchor. - assert_eq!( - expected_error_kind(&format!("rpc.invoke_method failed: {msg}")), - None, - "wrapped security policy action-budget error must reach Sentry" - ); - } - - /// Standalone `"rate limit exceeded"` without the `"api error ("` anchor - /// must NOT be silenced — keeps loose phrases from accidentally demoting - /// unrelated errors. - #[test] - fn does_not_classify_bare_rate_limit_exceeded_as_transient() { - assert_eq!( - expected_error_kind("rate limit exceeded"), - None, - "bare 'rate limit exceeded' without API error anchor must reach Sentry" - ); - } - - /// `is_upstream_rate_limit_message` predicate unit tests — verifies the - /// polarity contract independently of `expected_error_kind`. - #[test] - fn upstream_rate_limit_predicate_matches_expected_shapes() { - for lower in [ - r#"{"error":{"message":"rate limit exceeded.","type":"rate_limit_error"}}"#, - "upstream rate limit exceeded for model 'summarization-v1'", - "429 rate limit exceeded, please try again later", - r#"openai api error (429 too many requests): {"error":{"message":"rate limit exceeded.","type":"rate_limit_error"}}"#, - ] { - assert!( - is_upstream_rate_limit_message(lower), - "should match: {lower}" - ); - } - } - - #[test] - fn upstream_rate_limit_predicate_does_not_match_unrelated() { - for lower in [ - // security::policy budget message — must not be swallowed - "rate limit exceeded: action budget exhausted (0 actions/hour)", - // bare phrase without anchor - "rate limit exceeded", - // unrelated 500 body - r#"{"success":false,"error":"internal server error"}"#, - // budget exhausted — different concept - "budget exhausted, add credits to continue", - ] { - assert!( - !is_upstream_rate_limit_message(lower), - "should not match: {lower}" - ); - } - } - #[test] fn does_not_classify_unrelated_messages_as_capability_unavailable() { // The classifier anchors on the exact "for this RAM tier" substring. @@ -4006,84 +3533,6 @@ mod tests { ); } - // ── TAURI-RUST-322 (#2929): composio-direct 403 (key missing perms) ─ - - #[test] - fn classifies_composio_direct_403_as_provider_user_state() { - // Canonical Sentry TAURI-RUST-322 wire shape — the verbatim - // title body from the issue (1,021 events, multi-release). The - // Composio v3 `/connected_accounts` endpoint returns HTTP 403 - // when the BYO API key exists but lacks `connected_accounts:read` - // permission. This is a user-state condition; Sentry has no - // remediation path. - let msg = "[composio-direct] list_connections failed: \ - Composio v3 connected_accounts failed: HTTP 403"; - assert_eq!( - expected_error_kind(msg), - Some(ExpectedErrorKind::ProviderUserState), - "composio-direct HTTP 403 must demote to ProviderUserState (TAURI-RUST-322)" - ); - } - - #[test] - fn classifies_composio_direct_403_for_other_ops() { - // The `[composio-direct]` + `HTTP 403` arm must cover every op - // that can hit a 403 from the Composio v3 tenant (list_tools - // prefetch, authorize, etc.) — not just list_connections. - let shapes = [ - // list_tools prefetch of connections hits the 403 wall - "[composio-direct] list_tools: prefetch connections failed: \ - Composio v3 connected_accounts failed: HTTP 403", - // list_connections itself (the primary source of the leak) - "[composio-direct] list_connections (direct) failed: \ - Composio v3 connected_accounts failed: HTTP 403", - // any future direct-mode op that hits a 403 - "[composio-direct] composio_list_connections (direct) failed: \ - Composio v3 connected_accounts failed: HTTP 403", - ]; - for msg in shapes { - assert_eq!( - expected_error_kind(msg), - Some(ExpectedErrorKind::ProviderUserState), - "every [composio-direct] op with HTTP 403 must demote to ProviderUserState: {msg}" - ); - } - } - - #[test] - fn does_not_classify_unrelated_http_403_as_composio_direct_user_state() { - // Discrimination test: a 403 that does NOT carry the - // `[composio-direct]` prefix must NOT match this arm. Backend-mode - // composio 403s and unrelated 403s must remain visible in Sentry. - let backend_403 = "[composio] list_connections failed: \ - Backend returned 403 Forbidden for GET \ - https://api.tinyhumans.ai/agent-integrations/composio/connections"; - // The backend-mode shape passes through `is_backend_user_error_message` - // (4xx matcher), not this arm. Verify it does NOT match this arm. - assert!( - !lower_contains_composio_direct_auth_wall(backend_403), - "backend-mode 403 must NOT match the composio-direct arm" - ); - - let unrelated_403 = "GitHub API error: HTTP 403: rate limit exceeded"; - assert_ne!( - expected_error_kind(unrelated_403), - Some(ExpectedErrorKind::ProviderUserState), - "unrelated 403 (no [composio-direct] anchor) must NOT match the composio-direct arm" - ); - } - - // Helper used only in the discrimination test above — mirrors the - // exact condition in `is_provider_user_state_message` without - // requiring access to the private function. - fn lower_contains_composio_direct_auth_wall(msg: &str) -> bool { - let lower = msg.to_ascii_lowercase(); - lower.contains("[composio-direct]") - && (lower.contains("http 401") - || lower.contains("http 403") - || lower.contains("invalid api key")) - } - // ── TAURI-RUST-34H: backend-wrapped Cloudflare anti-bot interstitial ─ #[test] @@ -4571,83 +4020,6 @@ mod tests { assert_eq!(expected_error_kind("session_expired lowercase"), None); } - /// TAURI-RUST-E (~1 437 events): billing poll fires `report_error_or_expected` - /// on every refresh cycle once the user's JWT lapses because the - /// `BackendApiError::Unauthorized` typed error was stringified to - /// `"backend rejected session token on GET /payments/stripe/currentPlan"` by - /// `billing::ops::get_authed_value(..).map_err(|e| e.to_string())` before the - /// phrase was added to `is_session_expired_message`. - /// - /// The phrase `"backend rejected session token"` is uniquely produced by - /// `BackendApiError::Unauthorized`'s `Display` impl in `api::rest` — no - /// third-party provider path emits it — so no conjunctive guard is needed. - #[test] - fn classifies_billing_401_as_session_expired() { - // Exact wire shape from `billing_get_current_plan` — the most common - // event in TAURI-RUST-E. - assert_eq!( - expected_error_kind( - "backend rejected session token on GET /payments/stripe/currentPlan" - ), - Some(ExpectedErrorKind::SessionExpired), - "TAURI-RUST-E: billing_get_current_plan 401 must classify as SessionExpired" - ); - - // Other billing methods share the same `BackendApiError::Unauthorized` - // display shape — pin them so a wording change in `rest.rs` would catch - // every billing call site. - for path in [ - "/payments/credits/balance", - "/payments/credits/transactions?limit=20&offset=0", - "/payments/credits/auto-recharge", - "/payments/credits/auto-recharge/cards", - "/payments/stripe/purchasePlan", - "/payments/stripe/portal", - "/coupons/me", - ] { - let msg = format!("backend rejected session token on GET {path}"); - assert_eq!( - expected_error_kind(&msg), - Some(ExpectedErrorKind::SessionExpired), - "billing 401 must classify as SessionExpired: {msg}" - ); - } - - // POST / PATCH / DELETE variants are also produced by `authed_json`. - for raw in [ - "backend rejected session token on POST /payments/credits/top-up", - "backend rejected session token on PATCH /payments/credits/auto-recharge", - "backend rejected session token on DELETE /payments/credits/auto-recharge/cards/pm_123", - ] { - assert_eq!( - expected_error_kind(raw), - Some(ExpectedErrorKind::SessionExpired), - "TAURI-RUST-E variant must classify as SessionExpired: {raw}" - ); - } - } - - /// `"backend rejected session token"` is scoped to `BackendApiError::Unauthorized` - /// in `api::rest`. Ensure unrelated messages containing the individual - /// words don't accidentally match. - #[test] - fn does_not_classify_unrelated_rejected_messages_as_session_expired() { - // Third-party provider errors that mention a token being rejected but - // do not contain the exact OpenHuman `BackendApiError::Unauthorized` - // display phrase. - for raw in [ - "Discord API error: token rejected by upstream", - "Stripe webhook signature rejected — bad secret", - "API token rejected: please regenerate", - ] { - assert_eq!( - expected_error_kind(raw), - None, - "unrelated token-rejected message must NOT suppress Sentry: {raw}" - ); - } - } - #[test] fn report_error_does_not_panic_with_many_tags() { let err = anyhow::anyhow!("multi-tag"); @@ -5110,11 +4482,7 @@ mod tests { } #[test] - fn budget_filter_requires_non_2xx_failure_and_400_status_for_loose_phrases() { - // Tier 1 requires both tags for the loose budget phrases ("budget exceeded", - // "add credits", …) that might coincidentally appear in unrelated product - // copy. Tier 2 (text-only) only fires for the exact backend phrase - // "insufficient budget" — so the loose phrases still need both tags. + fn budget_filter_requires_non_2xx_failure_and_400_status() { let message = "Budget exceeded — add credits to continue"; for tags in [ vec![("failure", "transport"), ("status", "400")], @@ -5126,192 +4494,6 @@ mod tests { } } - /// Tier-2 defense-in-depth: drop the exact "Insufficient budget" phrase - /// the OpenHuman backend returns regardless of which tags are set. - /// Regression guard for OPENHUMAN-CORE-N / TAURI-RUST-1P (GitHub #2935). - #[test] - fn budget_filter_drops_insufficient_budget_without_tags() { - // The exact JSON wire body from the OpenHuman backend. - let message = r#"OpenHuman API error (400 Bad Request): {"success":false,"error":"Insufficient budget"}"#; - - // No tags at all. - let event = event_with_message(message); - assert!( - is_budget_event(&event), - "tier-2 must drop 'Insufficient budget' even without failure/status tags" - ); - - // Wrong status tag only. - let event = event_with_tags_and_message(&[("status", "400")], message); - assert!( - is_budget_event(&event), - "tier-2 must drop 'Insufficient budget' when only status tag is set" - ); - - // Wrong failure tag only. - let event = event_with_tags_and_message(&[("failure", "transport")], message); - assert!( - is_budget_event(&event), - "tier-2 must drop 'Insufficient budget' when failure tag doesn't match" - ); - } - - /// Regression guard: tier-2 must also catch the exception/tracing path - /// (`sentry-tracing` with `attach_stacktrace=true` may populate the - /// exception list rather than `event.message`). - #[test] - fn budget_filter_drops_insufficient_budget_exception_without_tags() { - // Exact wire body from OpenHuman backend wrapped in an exception value. - let event = event_with_exception_value( - r#"OpenHuman API error (400 Bad Request): {"success":false,"error":"INSUFFICIENT BUDGET"}"#, - ); - assert!( - is_budget_event(&event), - "tier-2 must drop exception-path 'Insufficient budget' case-insensitively" - ); - - // Mixed case variant. - let event = event_with_exception_value( - r#"OpenHuman API error (400 Bad Request): {"success":false,"error":"Insufficient Budget"}"#, - ); - assert!( - is_budget_event(&event), - "tier-2 must drop exception-path 'Insufficient Budget' case-insensitively" - ); - - // Wrong failure/status tags should not prevent tier-2 from matching. - let mut event = event_with_exception_value( - r#"OpenHuman API error (400 Bad Request): {"success":false,"error":"Insufficient budget"}"#, - ); - event - .tags - .insert("failure".to_string(), "transport".to_string()); - assert!( - is_budget_event(&event), - "tier-2 must drop exception-path even when failure tag does not match" - ); - - // Unrelated exception values must not match. - let event = event_with_exception_value("retry budget exhausted after 3 attempts"); - assert!( - !is_budget_event(&event), - "tier-2 must not match unrelated exception value" - ); - } - - /// Tier-2 only matches the tight phrase — unrelated 400 errors with "budget" - /// in an unrelated context should not be silently dropped. - #[test] - fn budget_filter_tier2_does_not_match_unrelated_messages() { - for msg in [ - "retry budget exhausted after 3 attempts", - "bad request: missing field", - "budget_id=42 not found", - "", - ] { - let event = event_with_message(msg); - assert!( - !is_budget_event(&event), - "tier-2 must not match unrelated message: {msg:?}" - ); - } - } - - /// CORE-RUST-EK (~827 events): every 401 from the embeddings call path - /// (`domain=embeddings`, `failure=non_2xx`, `status=401`) must be filtered - /// before it reaches Sentry. Covers both the OpenHuman-backend "Invalid - /// token" shape (already handled by the primary `is_session_expired_message` - /// classifier) and third-party provider body shapes (OpenAI - /// `invalid_api_key`, plain `Unauthorized`) that fall through the string - /// classifier. - #[test] - fn embeddings_401_filter_drops_domain_embeddings_status_401() { - // Canonical CORE-RUST-EK wire shape: OpenAI `text-embedding-3-large` - // key is stale. `report_error_or_expected` sets - // domain=embeddings / operation=openai_embed / failure=non_2xx / - // status=401 / model=text-embedding-3-large. - let event = event_with_tags_and_message( - &[ - ("domain", "embeddings"), - ("operation", "openai_embed"), - ("failure", "non_2xx"), - ("status", "401"), - ("model", "text-embedding-3-large"), - ], - r#"Embedding API error (401 Unauthorized): {"error":{"message":"Incorrect API key provided. You can find your API key at https://platform.openai.com/account/api-keys.","type":"invalid_request_error","param":null,"code":"invalid_api_key"}}"#, - ); - assert!( - is_embeddings_api_key_401_event(&event), - "CORE-RUST-EK: domain=embeddings status=401 must be filtered" - ); - } - - /// Any other embedding status (e.g. 429 rate-limit, 500 server error) - /// must not be filtered by the embeddings-401 guard — those have their - /// own handlers (rate-limit filter, general error reporting). - #[test] - fn embeddings_401_filter_passes_other_statuses() { - for status in ["429", "500", "400"] { - let event = event_with_tags_and_message( - &[ - ("domain", "embeddings"), - ("failure", "non_2xx"), - ("status", status), - ], - "Embedding API error", - ); - assert!( - !is_embeddings_api_key_401_event(&event), - "domain=embeddings status={status} must NOT be filtered by the 401 guard" - ); - } - } - - /// Non-embeddings domains must not be filtered even if status=401 — the - /// guard is scoped specifically to `domain=embeddings` so that provider-chat - /// and backend-API 401s remain subject to their own classifiers. - #[test] - fn embeddings_401_filter_passes_non_embeddings_domains() { - for domain in ["llm_provider", "backend_api", "rpc", "composio"] { - let event = event_with_tags_and_message( - &[ - ("domain", domain), - ("failure", "non_2xx"), - ("status", "401"), - ], - "API error (401 Unauthorized): some body", - ); - assert!( - !is_embeddings_api_key_401_event(&event), - "domain={domain} status=401 must NOT be swallowed by the embeddings-401 guard" - ); - } - } - - /// The guard requires both `failure=non_2xx` and `status=401` to be - /// present; missing either tag must cause the filter to pass the event - /// through. - #[test] - fn embeddings_401_filter_requires_failure_and_status_tags() { - let no_failure_tag = event_with_tags_and_message( - &[("domain", "embeddings"), ("status", "401")], - "Embedding API error (401 Unauthorized): unauthorized", - ); - assert!( - !is_embeddings_api_key_401_event(&no_failure_tag), - "missing failure tag must not trigger the guard" - ); - - let no_status_tag = event_with_tags_and_message( - &[("domain", "embeddings"), ("failure", "non_2xx")], - "Embedding API error (401 Unauthorized): unauthorized", - ); - assert!( - !is_embeddings_api_key_401_event(&no_status_tag), - "missing status tag must not trigger the guard" - ); - } - #[test] fn report_error_or_expected_does_not_panic() { report_error_or_expected( diff --git a/src/main.rs b/src/main.rs index 10fd69eca9..352f710a8a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -66,21 +66,6 @@ fn main() { if openhuman_core::core::observability::is_budget_event(&event) { return None; } - // CORE-RUST-EK (~827 events): drop all HTTP 401 responses from the - // embeddings call path (domain=embeddings, failure=non_2xx, - // status=401). The primary suppression for the OpenHuman-backend - // "Invalid token" shape lives in `expected_error_kind` / - // `is_session_expired_message`. This is defense-in-depth that also - // catches third-party provider 401s (e.g. OpenAI `invalid_api_key` - // body) that don't carry the OpenHuman envelope and therefore fall - // through the string-based classifier to Sentry. - if openhuman_core::core::observability::is_embeddings_api_key_401_event(&event) { - log::debug!( - "[sentry-embeddings-401-filter] dropping embeddings api-key 401 event_id={:?}", - event.event_id - ); - return None; - } // Defense-in-depth: drop max-tool-iterations cap events that // slipped past the call-site filters in // `agent::harness::session::runtime::run_single`, @@ -98,43 +83,6 @@ fn main() { { return None; } - // Defense-in-depth: upstream rate-limit events that slipped past - // the call-site suppressors in `ops::api_error` (primary guard) - // and `report_error_or_expected` (secondary guard via - // `expected_error_kind`). Catches the three major shapes: - // · `rate_limit_error` type in the JSON body (OPENHUMAN-TAURI-2E, - // OPENHUMAN-TAURI-RQ — ~2 223 events combined) - // · `"upstream rate limit exceeded"` in a 500 body (TAURI-6Y — - // ~19 849 events) - // · `"429 rate limit exceeded"` in a 500 body (TAURI-S — ~6 984 - // events) - // The primary per-attempt suppression lives in - // `openhuman::inference::provider::ops::api_error` (skips - // `report_error` entirely for rate-limit bodies) and in - // `embeddings::openai::embed` (uses `report_error_or_expected` with - // the canonical `"Embedding API error ({status}): …"` format so - // `is_transient_upstream_http_message` catches it). This filter is - // the last line of defense for any future call site that adds a new - // report path without routing through one of those two guards. - { - let direct = event.message.as_deref(); - let from_logentry = event.logentry.as_ref().map(|l| l.message.as_str()); - let from_exception = event.exception.last().and_then(|e| e.value.as_deref()); - let is_rate_limited = [direct, from_logentry, from_exception] - .into_iter() - .flatten() - .map(str::to_ascii_lowercase) - .any(|lower| { - openhuman_core::core::observability::is_upstream_rate_limit_message(&lower) - }); - if is_rate_limited { - log::debug!( - "[sentry-rate-limit-filter] dropping upstream rate-limit event_id={:?}", - event.event_id - ); - return None; - } - } // Defense-in-depth: 404 on PATCH/DELETE to a channel-message path // is an expected state (provider-side delete or backend GC). Primary // suppression lives in `authed_json`; this catches any future call diff --git a/src/openhuman/inference/ops.rs b/src/openhuman/inference/ops.rs index bb882b9cd9..6c7ab72056 100644 --- a/src/openhuman/inference/ops.rs +++ b/src/openhuman/inference/ops.rs @@ -29,31 +29,6 @@ fn is_unknown_provider_user_config(err: &str) -> bool { err.contains("no cloud provider with id or slug") } -/// Returns `true` when the error from a provider chat attempt is a known, -/// expected user-state or provider-state condition that already has its own -/// Sentry report (or is deterministically expected and has no remediation -/// path): -/// -/// - **401 Unauthorized** — API key revoked / wrong key. Already reported by -/// the provider layer's `api_error` path. An ops-level duplicate adds noise -/// with no additional context. -/// - **429 Too Many Requests / rate-limit** — Quota exhaustion. Already -/// covered by the `is_upstream_rate_limit_message` classifier in -/// `expected_error_kind`; the reliable-provider layer retries with -/// backoff before propagating. -/// - **Model not found** — User selected a model that doesn't exist for -/// their key. The provider layer already classifies this as a config -/// rejection (TAURI-RUST-68, ~1309 events). -/// -/// The matcher is intentionally broad so the ops-level wrapper stays out -/// of the Sentry funnel for all provider-state failures — the underlying -/// call site (`compatible.rs` / `report_error_or_expected`) is already -/// responsible for the authoritative report. Unclassified failures (5xx, -/// unexpected payloads, network errors) are NOT matched and still escalate. -fn is_expected_chat_failure(err: &str) -> bool { - crate::core::observability::expected_error_kind(err).is_some() -} - #[derive(Debug, Clone, serde::Serialize)] pub struct InferenceTestProviderModelResult { pub reply: String, @@ -204,22 +179,7 @@ pub async fn inference_test_provider_model( output_len = outcome.value.reply.len(), "{LOG_PREFIX} test_provider_model:ok" ), - Err(err) => { - if is_expected_chat_failure(err) { - // Provider-state / user-config failure (401, 429, model not - // found, API key missing, etc.). The underlying provider - // layer already emitted its own Sentry event or classified - // this as expected. An ops-level duplicate adds noise. - // Targets TAURI-RUST-68 (~1,309 events). - warn!( - provider, - error = %err, - "{LOG_PREFIX} test_provider_model:expected-error (no Sentry)" - ); - } else { - error!(error = %err, "{LOG_PREFIX} test_provider_model:error"); - } - } + Err(err) => error!(error = %err, "{LOG_PREFIX} test_provider_model:error"), } result } diff --git a/src/openhuman/inference/ops_tests.rs b/src/openhuman/inference/ops_tests.rs index 90ca5440fe..27083c8e4e 100644 --- a/src/openhuman/inference/ops_tests.rs +++ b/src/openhuman/inference/ops_tests.rs @@ -317,73 +317,3 @@ fn is_unknown_provider_user_config_rejects_other_list_models_failures() { } } -// ── is_expected_chat_failure (TAURI-RUST-68) ───────────────────────────── -// -// `inference_test_provider_model` calls `simple_chat` which can fail with -// known provider-state or user-config conditions (401, 429, model not -// found). Before this fix every failure escalated to `error!`, which -// sentry-tracing shipped to Sentry as `"[inference::ops] -// test_provider_model:error"` — 1,309 events — while the same underlying -// errors already had their own report or were classified as expected by -// `expected_error_kind`. The gate demotes them to `warn!` so they stay in -// local logs but don't generate duplicate Sentry noise. -// -// Anchored on the shared `expected_error_kind` classifier so both the unit -// tests here and the production gate stay in sync with the central -// suppression logic in `core::observability`. - -#[test] -fn is_expected_chat_failure_matches_api_key_missing() { - // Provider layer emits this phrase when no API key is configured. - assert!(is_expected_chat_failure("api key not set for openai")); - assert!(is_expected_chat_failure( - "missing api key: openai_api_key is not configured" - )); -} - -#[test] -fn is_expected_chat_failure_matches_rate_limit() { - // 429-style rate-limit phrases emitted by the provider / OpenHuman backend. - assert!(is_expected_chat_failure( - "openai API error (429 Too Many Requests): You exceeded your current quota" - )); - assert!(is_expected_chat_failure( - "openai API error (500): 429 rate limit exceeded" - )); -} - -#[test] -fn is_expected_chat_failure_matches_provider_config_rejection() { - // OpenAI-style model-not-found code in error body. - assert!(is_expected_chat_failure( - r#"custom_openai API error (404 Not Found): {"error":{"message":"The model does not exist or you do not have access","code":"model_not_found"}}"# - )); - // Temperature-unsupported model (e.g. o1/o3/o4 reasoning models). - assert!(is_expected_chat_failure( - "custom_openai API error (400 Bad Request): invalid temperature: only 1 is allowed" - )); - // litellm-style not_found_error envelope. - assert!(is_expected_chat_failure( - r#"custom_openai API error (404 Not Found): {"error":{"message":"model 'gpt-99' not found","type":"not_found_error"}}"# - )); -} - -#[test] -fn is_expected_chat_failure_does_not_match_real_errors() { - // Real errors that must still reach Sentry must NOT be demoted. - for raw in [ - // Genuine 500 server error — actionable, must escalate - "openai API error (500 Internal Server Error): Something went wrong", - // Unexpected JSON from provider — potential provider bug - "openai API returned an unexpected chat-completions payload: missing field", - // Local I/O error — real infrastructure problem - "failed to open config file: permission denied", - // Completely empty string — fallthrough - "", - ] { - assert!( - !is_expected_chat_failure(raw), - "must NOT demote real error: {raw:?}" - ); - } -} diff --git a/src/openhuman/inference/provider/config_rejection.rs b/src/openhuman/inference/provider/config_rejection.rs index 5bec74693c..1d3b918084 100644 --- a/src/openhuman/inference/provider/config_rejection.rs +++ b/src/openhuman/inference/provider/config_rejection.rs @@ -32,15 +32,6 @@ //! from a user OAuth/scope gap) //! - `"not_found_error"` (J2 / J5 / J4 — litellm-compatible envelope //! `type` field carrying "model 'X' not found") -//! - `"does not support tools"` / `"function calling is not supported"` / -//! `"unknown parameter: tools"` / `"unrecognized field \`tools\`"` / -//! `"unsupported parameter: tools"` (TAURI-RUST-4K7 — Ollama models such -//! as `gemma3:1b-it-qat` and `huihui_ai/deepseek-r1-abliterated:8b` -//! reject tool-enabled requests with HTTP 400. The compatible provider -//! already retries without tools, so the initial 400 is not a -//! bug — it's expected discovery of the model's capability boundary. -//! Sentry noise suppressed here; the retry path in `compatible.rs` runs -//! unchanged.) //! //! These are **deterministic user-configuration state**, not bugs the //! maintainers can act on: the user pointed OpenHuman at a custom @@ -171,54 +162,33 @@ pub fn is_provider_config_rejection_message(body: &str) -> bool { // this is the `type` field used by litellm/Anthropic-style // envelopes for the same class of user-state error. "not_found_error", - // TAURI-RUST-4NM — nvidia-nim (and some other providers) return - // `{"error":{"message":"model field is required","type":"invalid_request_error","code":"missing_required_field"}}` - // when the `model` key is absent or empty in the request body. - // This is a user-configuration error (provider string has no model - // component, e.g. `nvidia-nim:` with empty model), not a product - // regression. Demote from Sentry; the factory now validates this - // up-front so in practice this phrase should no longer appear. - "model field is required", - // TAURI-RUST-4XK — Ollama 403 when the requested model requires a - // paid Ollama subscription. Body carries the upgrade URL. User must - // switch to a free model or upgrade their Ollama account. - "requires a subscription", - // TAURI-RUST-2G / TAURI-RUST-2F — DeepSeek / compatible providers - // that use extended thinking reject tool-call turns when the - // `reasoning_content` block from a prior assistant turn is not - // threaded back. This is user-config state (model requires the - // caller to replay the thinking block; the frontend replay logic in - // `turn.rs` handles it for subsequent turns, so the first-turn 400 - // is expected capability-discovery, not a regression). - "in the thinking mode must be passed back", - // TAURI-RUST-35 / TAURI-RUST-4K7 / TAURI-RUST-4Z0 — Ollama models - // (e.g. gemma3, phi3, deepseek-r1) that do not support function - // calling return HTTP 400 with this phrase. The compatible provider - // retries without tools on 400, so the initial rejection is expected - // capability-discovery. Sentry noise suppressed here. - "does not support tools", - // TAURI-RUST-4K7-d — alternative phrasing used by some Ollama model - // versions for the same tool-unsupported condition. - "function calling is not supported", - // TAURI-RUST-4K7-e — litellm / OpenAI-compatible proxies reject the - // `tools` key in the request body when the backing model does not - // support tool use (e.g. local Ollama via LiteLLM gateway). - "unknown parameter: tools", - // TAURI-RUST-4K7-f — Ollama native API surface rejects the field - // outright when the model has no function-calling capability. - "unrecognized field `tools`", - // TAURI-RUST-4K7-g — another litellm / proxy variant of the same - // tool-unsupported condition. - "unsupported parameter: tools", // TAURI-RUST-4NM — nvidia-nim (and compatible providers) return // `{"error":{"message":"model field is required","code":"missing_required_field"}}` // when the request body contains an empty `"model":""` field. "model field is required", // TAURI-RUST-2G (~2684 events) / TAURI-RUST-2F (~950 events) — - // thinking-mode model rejects a follow-up turn that doesn't echo - // the prior assistant's `reasoning_content` field. + // thinking-mode model (DeepSeek-R1 / Moonshot K2-thinking on + // `provider=cloud` custom_openai) rejects a follow-up turn that + // doesn't echo the prior assistant's `reasoning_content` field. + // Body shape (backtick-quoted JSON literal in the upstream body): + // `{"error":{"message":"The `reasoning_content` in the thinking + // mode must be passed back to the API.",...}}`. The + // provider-contract gap is on our side, but until the thinking- + // mode round-tripping ships in the inference layer, every affected + // turn fires a fresh Sentry event — and the UI already surfaces + // the actionable error to the user. Anchor on the unique + // `thinking mode must be passed back` substring so the match + // doesn't depend on the upstream's backtick-quoting around + // `reasoning_content` (some provider versions ship without them). "thinking mode must be passed back", // TAURI-RUST-4XK (~649 events) — Ollama Cloud subscription gate. + // Body: `{"error":"this model requires a subscription, upgrade for + // access: https://ollama.com/upgrade (ref: )"}` on a 403 + // Forbidden from `compatible::OpenAiCompatibleProvider` with + // `name = "ollama"`. User-state: the model picked in Settings is + // a paid-tier Ollama Cloud model the user's account doesn't + // cover. The UI surfaces an actionable upgrade link in the + // remediation message itself. "requires a subscription, upgrade for access", // TAURI-RUST-35 family — user picked a model that doesn't // implement tool calling, agent harness sent a tool spec @@ -402,103 +372,6 @@ mod tests { } } - /// TAURI-RUST-35 family — model picked by the user doesn't implement - /// tool calling. The agent harness tries to send a tool spec and the - /// upstream (Ollama / cloud Ollama relay / hosted OpenAI-compatible) - /// rejects with `{"error":{"message":" does not support tools", - /// "type":"invalid_request_error",...}}`. Pure user-config — the user - /// has to pick a tool-capable model (or run a non-agent flow). No - /// remediation path through Sentry, and the long tail is large: each - /// distinct model id + provider prefix combo creates a new Sentry - /// fingerprint, so the same root cause is currently split across at - /// least 10 unresolved Sentry issues (458 events total as of - /// 2026-05-28): - /// - /// | shortId | events | provider prefix | - /// |---|---|---| - /// | TAURI-RUST-35 | 307 | cloud | - /// | TAURI-RUST-DF | 83 | cloud | - /// | TAURI-RUST-123 | 25 | cloud | - /// | TAURI-RUST-4K7 | 19 | ollama | - /// | TAURI-RUST-4FS | 10 | cloud | - /// | TAURI-RUST-4F6 | 5 | cloud | - /// | TAURI-RUST-2YA | 4 | cloud | - /// | TAURI-RUST-4KR | 3 | ollama | - /// | TAURI-RUST-4KH | 1 | cloud | - /// | TAURI-RUST-4KY | 1 | ollama | - /// - /// Anchored on the exact `"does not support tools"` substring (the - /// message body's stable token — the model id varies per user). The - /// `streaming API error` / `API error` wrappers and the - /// `cloud` / `ollama` / `custom_openai` provider prefixes all share - /// this body, so a single phrase covers every variant. - #[test] - fn detects_does_not_support_tools_family() { - for (sentry_id, body) in [ - // TAURI-RUST-35 — verbatim from latest issue 168 event - // (model=`gemma3:1b-it-qat`, provider=cloud). - ( - "35", - r#"cloud streaming API error (400 Bad Request): {"error":{"message":"registry.ollama.ai/library/gemma3:1b-it-qat does not support tools","type":"invalid_request_error","param":null,"code":null}}"#, - ), - // TAURI-RUST-4K7 — ollama prefix, different upstream wrapper. - ( - "4K7", - r#"ollama streaming API error (400 Bad Request): {"error":{"message":"some-local-model does not support tools","type":"invalid_request_error","param":null,"code":null}}"#, - ), - // Non-streaming sibling — `API error` (no `streaming` token) - // for hosted providers that aren't using the streaming endpoint. - ( - "non-streaming", - r#"cloud API error (400 Bad Request): {"error":{"message":"registry.ollama.ai/library/qwen2.5:0.5b does not support tools","type":"invalid_request_error"}}"#, - ), - // Bare body (no wrapper) — what `expected_error_kind` would - // see if the body got extracted from the envelope upstream. - ( - "bare", - r#"{"error":{"message":"phi3.5:mini does not support tools","type":"invalid_request_error"}}"#, - ), - // TAURI-RUST-4Z0 — verbatim from issue 5664 (model=`deepseek-r1:8b`, - // provider=ollama). The envelope carries `"type":"api_error"` - // rather than `"invalid_request_error"` — pin it so the matcher - // can never be narrowed to require a specific `type` token; the - // `"does not support tools"` body substring is the only anchor. - ( - "4Z0", - r#"ollama streaming API error (400 Bad Request): {"error":{"message":"registry.ollama.ai/library/deepseek-r1:8b does not support tools","type":"api_error","param":null,"code":null}}"#, - ), - ] { - assert!( - is_provider_config_rejection_message(body), - "TAURI-RUST-{sentry_id} body must classify as provider config-rejection: {body:?}" - ); - } - } - - /// Polarity guard for the does-not-support-tools arm. The phrase is - /// scoped enough that no real bug-class body should accidentally - /// match — but pin a few near-miss shapes so a future loosening of - /// the matcher can't silently re-classify them. - #[test] - fn does_not_classify_unrelated_tools_phrases_as_config_rejection() { - for body in [ - // Tool-call dispatch failure (real bug) — must reach Sentry. - "tool execution failed: shell returned exit 1", - // Generic "tools" mention without the does-not-support phrase. - "agent ran with 0 tools available", - // Reversed phrasing — provider says they DO support tools but - // the call shape is wrong. Still actionable for triage. - "supports tools but received malformed tool_calls array", - // Empty body. - "", - ] { - assert!( - !is_provider_config_rejection_message(body), - "{body:?} must NOT classify as a provider config-rejection" - ); - } - } - #[test] fn detection_is_case_insensitive() { assert!(is_provider_config_rejection_message( @@ -629,63 +502,4 @@ mod tests { } } - /// TAURI-RUST-4K7 — Ollama models that don't support tool calling - /// (e.g. `gemma3:1b-it-qat`, `huihui_ai/deepseek-r1-abliterated:8b`) - /// return HTTP 400 with one of several tool-rejection phrases. - /// The compatible provider retries without tools, so the 400 is expected - /// capability-discovery rather than a product bug. These phrases must be - /// classified as config-rejections so Sentry is not flooded on every turn. - #[test] - fn detects_ollama_tool_unsupported_bodies() { - for (sentry_id, body) in [ - ( - "4K7-a", - r#"{"error":"gemma3:1b-it-qat does not support tools"}"#, - ), - ( - "4K7-b", - r#"{"error":"huihui_ai/deepseek-r1-abliterated:8b does not support tools"}"#, - ), - ( - "4K7-c", - r#"ollama streaming API error (400 Bad Request): {"error":"phi3:mini does not support tools"}"#, - ), - ( - "4K7-d", - r#"{"error":"function calling is not supported by this model"}"#, - ), - ( - "4K7-e", - r#"{"error":{"message":"unknown parameter: tools","type":"invalid_request_error"}}"#, - ), - ( - "4K7-f", - r#"{"error":"unrecognized field `tools` in request body"}"#, - ), - ( - "4K7-g", - r#"{"error":{"message":"unsupported parameter: tools","type":"invalid_request_error"}}"#, - ), - ] { - assert!( - is_provider_config_rejection_message(body), - "TAURI-RUST-{sentry_id} body must classify as provider config-rejection (tool-unsupported): {body:?}" - ); - } - } - - #[test] - fn detects_ollama_tool_unsupported_bodies_case_insensitive() { - // Ollama error messages should match regardless of casing. - for body in [ - "Model 'gemma3:1b-it-qat' DOES NOT SUPPORT TOOLS", - "Function Calling Is Not Supported By This Model", - "Unknown Parameter: Tools", - ] { - assert!( - is_provider_config_rejection_message(body), - "{body:?} must classify as config-rejection regardless of case" - ); - } - } } diff --git a/src/openhuman/inference/provider/ops.rs b/src/openhuman/inference/provider/ops.rs index 6de963de2a..ae07f21913 100644 --- a/src/openhuman/inference/provider/ops.rs +++ b/src/openhuman/inference/provider/ops.rs @@ -116,22 +116,6 @@ async fn list_configured_models_from_config( let status = response.status(); if !status.is_success() { - // A 404 from the /models endpoint means the provider does not support model - // listing — this is expected for many OpenAI-compatible providers (e.g. DeepSeek, - // Moonshot, Kimi, custom proxies). Return an empty model list so the caller can - // proceed normally instead of surfacing a spurious error / Sentry event. - // (Sentry issue TAURI-RUST-1Z — 819 events from this path alone.) - if status == reqwest::StatusCode::NOT_FOUND { - log::debug!( - "[providers][list_models] slug={} returned 404 — provider does not support /models listing; returning empty list", - entry.slug - ); - return Ok(crate::rpc::RpcOutcome::new( - serde_json::json!({ "models": serde_json::Value::Array(vec![]), "unsupported": true }), - vec!["provider does not support model listing (404)".to_string()], - )); - } - let body = response.text().await.unwrap_or_default(); let sanitized = sanitize_api_error(&body); let truncated = crate::openhuman::util::truncate_with_ellipsis(&sanitized, 300); @@ -899,38 +883,16 @@ pub async fn api_error(provider: &str, response: reqwest::Response) -> anyhow::E } else if is_context_window_exceeded { log_context_window_exceeded("api_error", provider, None, status); } else if should_report_provider_http_failure(status) { - // Defense-in-depth: some backends (e.g. OpenHuman) wrap an upstream - // provider 429 as an HTTP 500 with a rate-limit phrase in the body - // (`"429 rate limit exceeded"`, `"upstream rate limit exceeded"`). - // `should_report_provider_http_failure(500)` would otherwise let this - // through to Sentry — suppress it here before the report fires so the - // noise stays off Sentry (OPENHUMAN-TAURI-S: ~6 984 events). - // The `expected_error_kind` classifier in `report_error_or_expected` - // catches the same shape at re-report sites (agent / web_channel). - let lower_body = body.to_ascii_lowercase(); - let is_rate_limit_body = - crate::core::observability::is_upstream_rate_limit_message(&lower_body); - if is_rate_limit_body { - tracing::warn!( - domain = "llm_provider", - operation = "api_error", - provider = provider, - status = status_str.as_str(), - "[llm_provider] api_error: skipping Sentry report — rate-limit body in \ - non-429 response ({status})" - ); - } else { - crate::core::observability::report_error( - message.as_str(), - "llm_provider", - "api_error", - &[ - ("provider", provider), - ("status", status_str.as_str()), - ("failure", "non_2xx"), - ], - ); - } + crate::core::observability::report_error( + message.as_str(), + "llm_provider", + "api_error", + &[ + ("provider", provider), + ("status", status_str.as_str()), + ("failure", "non_2xx"), + ], + ); } anyhow::anyhow!(message) } @@ -1514,68 +1476,6 @@ mod tests { ); } - /// Spawn a minimal axum server that always returns 404 for the /models endpoint. - /// Used to verify that providers without model listing return an empty list, - /// not an error (Sentry issue TAURI-RUST-1Z). - async fn spawn_models_404_server() -> String { - let listener = tokio::net::TcpListener::bind("127.0.0.1:0") - .await - .expect("bind"); - let addr = listener.local_addr().expect("local_addr"); - let app = axum::Router::new().route( - "/models", - axum::routing::get(|| async { - (axum::http::StatusCode::NOT_FOUND, "Not Found").into_response() - }), - ); - tokio::spawn(async move { - axum::serve(listener, app).await.expect("serve"); - }); - format!("http://{addr}") - } - - #[tokio::test] - async fn models_404_returns_empty_list_not_error() { - // Providers that return 404 on /models (e.g. DeepSeek, Kimi, custom proxies) - // must yield an empty model list, not an Err. Returning an Err was firing a - // Sentry error for every `inference_list_models` call (TAURI-RUST-1Z, 819 events). - let tmp = tempfile::tempdir().expect("tempdir"); - let endpoint = spawn_models_404_server().await; - - let mut config = Config { - config_path: tmp.path().join("config.toml"), - workspace_dir: tmp.path().join("workspace"), - ..Config::default() - }; - config.secrets.encrypt = false; - config.cloud_providers.push(CloudProviderCreds { - id: "p_custom_test".to_string(), - slug: "custom-no-models".to_string(), - label: "Custom (no /models)".to_string(), - endpoint, - auth_style: AuthStyle::Bearer, - legacy_type: None, - default_model: None, - }); - - let outcome = list_configured_models_from_config("custom-no-models", &config) - .await - .expect("404 from /models must succeed with an empty list"); - - let models = outcome.value["models"] - .as_array() - .expect("response must have a `models` array"); - assert!( - models.is_empty(), - "expected empty model list for a 404 /models endpoint, got: {models:?}" - ); - assert_eq!( - outcome.value["unsupported"], - serde_json::Value::Bool(true), - "unsupported flag must be set to true for 404 providers" - ); - } - #[test] fn factory_backend() { assert!(create_backend_inference_provider( @@ -1680,62 +1580,6 @@ mod tests { } } - // Tests for the rate-limit body suppression guard added to `api_error`. - // Exercises `is_upstream_rate_limit_message` with the exact body shapes that - // produced OPENHUMAN-TAURI-S (~6 984 events from HTTP 500 wrapping a - // "429 rate limit exceeded" body) and OPENHUMAN-TAURI-6Y / -2E. - mod rate_limit_body_suppression { - use crate::core::observability::is_upstream_rate_limit_message; - - /// HTTP 500 with a `"429 rate limit exceeded"` body must be detected - /// as a rate-limit signal so the guard in `api_error` can skip the - /// Sentry report (OPENHUMAN-TAURI-S). - #[test] - fn http_500_with_429_body_phrase_is_rate_limited() { - let body = - r#"{"success":false,"error":"429 rate limit exceeded, please try again later"}"# - .to_ascii_lowercase(); - assert!( - is_upstream_rate_limit_message(&body), - "500-body with '429 rate limit exceeded' must be detected as rate-limited" - ); - } - - /// HTTP 500 with an `"upstream rate limit exceeded"` body - /// (OPENHUMAN-TAURI-6Y shape). - #[test] - fn http_500_with_upstream_rate_limit_body_is_rate_limited() { - let body = r#"{"success":false,"error":"Upstream rate limit exceeded for model 'summarization-v1'. Please retry shortly.","details":{"provider":"gmi"}}"# - .to_ascii_lowercase(); - assert!( - is_upstream_rate_limit_message(&body), - "500-body with 'upstream rate limit exceeded' must be detected" - ); - } - - /// OpenAI / Anthropic `"rate_limit_error"` type body. - #[test] - fn body_with_rate_limit_error_type_is_rate_limited() { - let body = r#"{"error":{"message":"Rate limit exceeded. Please retry after a brief wait.","type":"rate_limit_error"}}"# - .to_ascii_lowercase(); - assert!( - is_upstream_rate_limit_message(&body), - "body with 'rate_limit_error' type must be detected" - ); - } - - /// Unrelated 500 body must NOT be detected as rate-limited. - #[test] - fn http_500_unrelated_body_is_not_rate_limited() { - let body = r#"{"success":false,"error":"internal server error: database unavailable"}"# - .to_ascii_lowercase(); - assert!( - !is_upstream_rate_limit_message(&body), - "unrelated 500 body must not be detected as rate-limited" - ); - } - } - mod provider_access_policy_suppression { use super::*; diff --git a/src/openhuman/memory_store/factories.rs b/src/openhuman/memory_store/factories.rs index 5b745df97d..8e5f056199 100644 --- a/src/openhuman/memory_store/factories.rs +++ b/src/openhuman/memory_store/factories.rs @@ -51,13 +51,13 @@ fn report_ollama_health_gate_once(base_url: &str, model: &str) -> bool { let sentry_message = format!( "ollama embeddings opted-in but daemon unreachable at {base_url}; falling back to cloud embeddings for this session" ); - // Route through `report_error_or_expected` so the `expected_error_kind` - // classifier runs. The wire shape `"ollama embeddings opted-in but daemon - // unreachable at …"` matches `is_ollama_user_config_rejection` → routes to - // `ExpectedErrorKind::ProviderUserState` → demoted to a warn breadcrumb, - // NOT a Sentry error event. Using `report_error_message` directly would - // bypass the classifier and fire `sentry::capture_message(…, Level::Error)` - // unconditionally — the root cause of TAURI-RUST-B (472 events). + // Route through `report_error_or_expected` so the GX arm of + // `is_ollama_user_config_rejection` in `expected_error_kind` demotes + // the message to an info breadcrumb (user-state: ollama daemon not + // running). Direct `report_error_message` here bypassed the classifier + // and produced TAURI-RUST-B (~409 events). The `&str` input avoids + // the `format!("{:#}")` round-trip that `report_error` would do on an + // anyhow chain — the wire shape stays bit-identical. crate::core::observability::report_error_or_expected( sentry_message.as_str(), "memory", @@ -766,25 +766,4 @@ mod tests { "different URL also suppressed — gate is process-scoped, not per-URL" ); } - - /// TAURI-RUST-B (issue #2921): the exact wire shape produced by - /// `report_ollama_health_gate_once` must classify as - /// `ExpectedErrorKind::ProviderUserState` so `report_error_or_expected` - /// routes it to a warn breadcrumb rather than a Sentry error event. - /// - /// Previously the gate called `report_error_message` directly, bypassing - /// the classifier and firing `sentry::capture_message(…, Level::Error)` - /// unconditionally for every process restart where Ollama was opted-in but - /// not running (472 events at time of fix). The fix routes through - /// `report_error_or_expected` which checks `expected_error_kind` first. - #[test] - fn tauri_rust_b_wire_shape_classifies_as_expected() { - // Canonical format produced by `report_ollama_health_gate_once`. - let msg = "ollama embeddings opted-in but daemon unreachable at http://localhost:11434; falling back to cloud embeddings for this session"; - assert_eq!( - crate::core::observability::expected_error_kind(msg), - Some(crate::core::observability::ExpectedErrorKind::ProviderUserState), - "TAURI-RUST-B — daemon-unreachable health-gate message must demote to ProviderUserState, not fire as Sentry error" - ); - } } diff --git a/src/openhuman/subconscious/store.rs b/src/openhuman/subconscious/store.rs index 0e2a850ae1..23b3df9c5b 100644 --- a/src/openhuman/subconscious/store.rs +++ b/src/openhuman/subconscious/store.rs @@ -2,17 +2,6 @@ //! //! Follows the cron module's `with_connection` pattern: opens the database, //! runs DDL on every connection, and provides pure functions. -//! -//! ## Init-failure noise suppression (TAURI-RUST-A) -//! -//! `with_connection` runs the schema DDL on every call. Transient -//! `SQLITE_BUSY` / `SQLITE_LOCKED` errors (e.g. concurrent in-process RPC -//! calls, antivirus hold, network-drive WAL rejection) are handled by a -//! per-connection busy timeout (5 s) plus an application-level retry loop -//! (3 retries, 100 / 300 / 900 ms backoff). Only `DatabaseBusy` / -//! `DatabaseLocked` errors are retried — schema or corruption errors fail -//! through immediately so Sentry captures a real root cause rather than a -//! transient noise event. use anyhow::{Context, Result}; use rusqlite::{Connection, OptionalExtension}; @@ -72,8 +61,9 @@ pub fn with_connection( let db_path = workspace_dir.join("subconscious").join("subconscious.db"); if let Some(parent) = db_path.parent() { std::fs::create_dir_all(parent) - .with_context(|| format!("create subconscious dir: {}", parent.display()))?; + .with_context(|| format!("failed to create subconscious dir: {}", parent.display()))?; } + let conn = open_and_initialize_with_retry(&db_path)?; f(&conn) } From 833bd2e6433d36c90f1e92c5bd0457cc8cf123d5 Mon Sep 17 00:00:00 2001 From: Steven Enamakel Date: Sat, 30 May 2026 09:46:22 -0700 Subject: [PATCH 2/7] style: cargo fmt after merge --- src/openhuman/inference/ops_tests.rs | 1 - src/openhuman/inference/provider/config_rejection.rs | 1 - 2 files changed, 2 deletions(-) diff --git a/src/openhuman/inference/ops_tests.rs b/src/openhuman/inference/ops_tests.rs index 27083c8e4e..f3b4235b8b 100644 --- a/src/openhuman/inference/ops_tests.rs +++ b/src/openhuman/inference/ops_tests.rs @@ -316,4 +316,3 @@ fn is_unknown_provider_user_config_rejects_other_list_models_failures() { ); } } - diff --git a/src/openhuman/inference/provider/config_rejection.rs b/src/openhuman/inference/provider/config_rejection.rs index 6d6f12abfd..9f2aa689e1 100644 --- a/src/openhuman/inference/provider/config_rejection.rs +++ b/src/openhuman/inference/provider/config_rejection.rs @@ -597,5 +597,4 @@ mod tests { ); } } - } From 6147dc01108c7ae49489f8dac64b56c07c3e5313 Mon Sep 17 00:00:00 2001 From: sanil-23 Date: Sun, 31 May 2026 19:11:24 +0200 Subject: [PATCH 3/7] fix(inference): add workload/provider correlation fields to test_provider_model error log (addresses @coderabbitai on src/openhuman/inference/ops.rs:182) The Err branch logged only the error, while the :start event logs workload and provider. Add the same correlation fields to the :error event so failure logs stay queryable with the same context. Co-Authored-By: Claude --- src/openhuman/inference/ops.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/openhuman/inference/ops.rs b/src/openhuman/inference/ops.rs index bbdd837560..14084b43bb 100644 --- a/src/openhuman/inference/ops.rs +++ b/src/openhuman/inference/ops.rs @@ -179,7 +179,12 @@ pub async fn inference_test_provider_model( output_len = outcome.value.reply.len(), "{LOG_PREFIX} test_provider_model:ok" ), - Err(err) => error!(error = %err, "{LOG_PREFIX} test_provider_model:error"), + Err(err) => error!( + workload, + provider, + error = %err, + "{LOG_PREFIX} test_provider_model:error" + ), } result } From a061662572318323297c284e260e2c9bec6cdc6e Mon Sep 17 00:00:00 2001 From: sanil-23 Date: Sun, 31 May 2026 19:49:54 +0200 Subject: [PATCH 4/7] test(inference): align provider-config-rejection coverage test with reverted classifier The merge brought in main's `inference_provider_factory_and_classifiers_cover_user_state_edges` coverage test, which asserted `"unknown parameter: tools"` classifies as a provider-config rejection. PR #2959 reverts that suppression (the pattern was removed from `config_rejection.rs`), so the shape now fires to Sentry again. Move the string out of the positive-assertion loop into a negative assertion documenting the reverted behavior. Co-Authored-By: Claude --- tests/inference_agent_raw_coverage_e2e.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/inference_agent_raw_coverage_e2e.rs b/tests/inference_agent_raw_coverage_e2e.rs index 266439bbc7..b14fa88eca 100644 --- a/tests/inference_agent_raw_coverage_e2e.rs +++ b/tests/inference_agent_raw_coverage_e2e.rs @@ -1970,7 +1970,6 @@ async fn inference_provider_factory_and_classifiers_cover_user_state_edges() { "The supported API model names are native-a or native-b", "ModelNotAllowed", "invalid_authentication_error", - "unknown parameter: tools", "requires a subscription, upgrade for access", "No active credentials for provider: openai", ] { @@ -1982,6 +1981,12 @@ async fn inference_provider_factory_and_classifiers_cover_user_state_edges() { assert!(is_openai_compatible_unknown_model_message( "Model `gpt-unknown` is not available. Use GET /openai/v1/models to list available models." )); + // PR #2959 reverted the "unknown parameter: tools" suppression: this shape + // is no longer demoted to user-config state, so it fires to Sentry again + // (root cause to be fixed separately). + assert!(!is_provider_config_rejection_message( + "unknown parameter: tools" + )); assert!(!is_provider_config_rejection_message( "internal server error while streaming tokens" )); From 981913db066285bad1da1208703b9047eb9d12a5 Mon Sep 17 00:00:00 2001 From: sanil-23 Date: Sun, 31 May 2026 21:03:50 +0200 Subject: [PATCH 5/7] fix(merge): preserve #2748 Codex OAuth model listing; keep only the intended #2939/#2899 reverts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PR branch predated #2748 (Codex OAuth provider support), so merging current main auto-resolved provider/ops.rs toward the stale PR side — silently dropping #2748's OAuth routing, the `data`/`models` envelope fallback, and `model_info_from_catalog_item`. #2748 is NOT in the revert list, so this was an unintended regression. Restore main's provider/ops.rs (and its extracted ops_tests.rs) and re-apply ONLY the author's two intended Sentry-suppression reverts: - #2939: list_models 404 no longer returns a synthetic `{unsupported: true}` success — it surfaces as an error so the failure fires to Sentry. - #2899: api_error no longer suppresses rate-limit-body Sentry reports on non-429 responses. Their two suppression-specific test blocks are removed to match. Co-Authored-By: Claude --- src/openhuman/inference/provider/ops.rs | 1352 ++--------------- src/openhuman/inference/provider/ops_tests.rs | 1218 +++++++++++++++ 2 files changed, 1352 insertions(+), 1218 deletions(-) create mode 100644 src/openhuman/inference/provider/ops_tests.rs diff --git a/src/openhuman/inference/provider/ops.rs b/src/openhuman/inference/provider/ops.rs index ae07f21913..aa11eeef13 100644 --- a/src/openhuman/inference/provider/ops.rs +++ b/src/openhuman/inference/provider/ops.rs @@ -3,6 +3,12 @@ use super::*; use serde::Serialize; use std::path::PathBuf; +use super::openai_codex::{ + openai_codex_client_version, openai_codex_user_agent, resolve_openai_codex_routing, + OpenAiCodexRouting, OPENAI_CODEX_ACCOUNT_HEADER, OPENAI_CODEX_MODEL_HINTS, + OPENAI_CODEX_ORIGINATOR, OPENAI_CODEX_ORIGINATOR_HEADER, +}; + const MAX_API_ERROR_CHARS: usize = 200; /// Fixed id for the single inference backend (OpenHuman API). @@ -57,20 +63,33 @@ async fn list_configured_models_from_config( .or_else(|| synthesize_local_runtime_entry(&provider_id, config)) .ok_or_else(|| format!("no cloud provider with id or slug '{}' found", provider_id))?; - let base = entry.endpoint.trim_end_matches('/'); - let models_url = format!("{}/models", base); - - log::debug!( - "[providers][list_models] fetching url={} slug={}", - models_url, - entry.slug - ); - let api_key = crate::openhuman::inference::provider::factory::lookup_key_for_slug(&entry.slug, config) .unwrap_or_default(); let api_key = api_key.trim().to_string(); + let routing = resolve_openai_codex_routing(config, &entry.slug, &entry.endpoint, &api_key) + .unwrap_or_else(|err| { + log::warn!( + "[providers][list_models] openai codex routing unavailable; continuing with configured endpoint: {err}" + ); + OpenAiCodexRouting::standard(&entry.endpoint) + }); + + let mut models_url = format!("{}/models", routing.endpoint); + if routing.using_oauth { + models_url = + append_query_param(&models_url, "client_version", openai_codex_client_version()); + } + + log::debug!( + "[providers][list_models] fetching url={} slug={} codex_oauth={} account_id_header={}", + models_url, + entry.slug, + routing.using_oauth, + routing.account_id.is_some() + ); + let client = crate::openhuman::config::build_runtime_proxy_client_with_timeouts( "providers.list_models", 30, @@ -79,15 +98,24 @@ async fn list_configured_models_from_config( use crate::openhuman::config::schema::cloud_providers::AuthStyle; if is_openrouter_provider(&entry) { - validate_openrouter_api_key(&client, base, &api_key).await?; + validate_openrouter_api_key(&client, &routing.endpoint, &api_key).await?; } let mut request = client.get(&models_url); + if routing.using_oauth { + request = request + .header(reqwest::header::USER_AGENT, openai_codex_user_agent()) + .header(OPENAI_CODEX_ORIGINATOR_HEADER, OPENAI_CODEX_ORIGINATOR); + } request = match entry.auth_style { AuthStyle::Bearer => { if !api_key.is_empty() { - request.header("Authorization", format!("Bearer {}", api_key)) + let mut r = request.header("Authorization", format!("Bearer {}", api_key)); + if let Some(account_id) = routing.account_id.as_deref() { + r = r.header(OPENAI_CODEX_ACCOUNT_HEADER, account_id); + } + r } else { request } @@ -172,8 +200,12 @@ async fn list_configured_models_from_config( // Parse the OpenAI-compatible `/models` envelope into typed model // entries. See `parse_models_response` for the distinct error shapes // returned for "missing field" vs "field present but wrong type" - // (TAURI-RUST-4Y). - let models = parse_models_response(&body)?; + // (TAURI-RUST-4Y). The ChatGPT Codex backend uses a sibling `models` + // array keyed by `slug`, so that shape is accepted here too. + let mut models = parse_models_response(&body)?; + if routing.using_oauth { + merge_openai_codex_model_hints(&mut models); + } log::info!( "[providers][list_models] slug={} fetched {} models", @@ -187,29 +219,25 @@ async fn list_configured_models_from_config( )) } -/// Parse the OpenAI-compatible `/models` response envelope into typed -/// [`ModelInfo`] entries. +/// Parse the OpenAI-compatible `/models` response envelope, or the ChatGPT +/// Codex backend's sibling `models` envelope, into typed [`ModelInfo`] entries. /// /// Returns distinct errors for the three failure modes the wild has /// produced in `inference_list_models` Sentry events: /// -/// 1. **Missing `data` field** — endpoint isn't `/models`-compatible +/// 1. **Missing `data`/`models` field** — endpoint isn't `/models`-compatible /// (user typo'd the base URL, pointed at a vector-DB host, etc.). -/// Original TAURI-RUST-4Y wire shape, preserved verbatim so the -/// Sentry fingerprint stays stable for that population. -/// 2. **`data` field present but wrong type** — provider returned -/// `{"object":"error","data":{…}}` or `{"data":null}` or similar -/// non-array. The pre-fix code conflated this with case (1), emitting -/// a misleading `"missing 'data' array (got keys: data, object)"` -/// message; the new shape names the actual JSON type so triage knows -/// what the provider sent. +/// 2. **`data`/`models` field present but wrong type** — provider returned +/// `{"object":"error","data":{…}}`, `{"data":null}`, or similar +/// non-array. The error names the actual JSON type so triage knows what +/// the provider sent. /// 3. **Non-object top-level body** — provider returned a bare array, /// string, etc. Caught explicitly so the parser doesn't silently /// drop into the missing-data arm with a `` keys list. /// -/// Per-entry parsing ignores entries that don't have a string `id` (lax -/// on purpose — many OpenAI-compatible servers include malformed rows -/// for capabilities they don't fully implement). +/// Per-entry parsing ignores entries that don't have a usable string id/slug +/// (lax on purpose — many OpenAI-compatible servers include malformed rows for +/// capabilities they don't fully implement). fn parse_models_response(body: &serde_json::Value) -> Result, String> { let obj = body.as_object().ok_or_else(|| { format!( @@ -218,10 +246,14 @@ fn parse_models_response(body: &serde_json::Value) -> Result, Str ) })?; - let data_value = obj.get("data").ok_or_else(|| { + let (field_name, data_value) = obj + .get("data") + .map(|value| ("data", value)) + .or_else(|| obj.get("models").map(|value| ("models", value))) + .ok_or_else(|| { let keys = obj.keys().cloned().collect::>().join(", "); format!( - "provider response missing `data` field — endpoint is not OpenAI-compatible (got keys: {})", + "provider response missing `data` or `models` field — endpoint is not OpenAI-compatible (got keys: {})", keys ) })?; @@ -236,7 +268,8 @@ fn parse_models_response(body: &serde_json::Value) -> Result, Str .map(|v| v.to_string()) .unwrap_or_else(|| "".to_string()); format!( - "provider response has `data` field but it is {}, expected array — endpoint may be returning an error envelope (\"object\" = {})", + "provider response has `{}` field but it is {}, expected array — endpoint may be returning an error envelope (\"object\" = {})", + field_name, json_value_kind(data_value), object_field, ) @@ -244,22 +277,7 @@ fn parse_models_response(body: &serde_json::Value) -> Result, Str Ok(data .iter() - .filter_map(|item| { - let id = item.get("id")?.as_str()?.to_string(); - let owned_by = item - .get("owned_by") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()); - let context_window = item - .get("context_length") - .or_else(|| item.get("context_window")) - .and_then(|v| v.as_u64()); - Some(ModelInfo { - id, - owned_by, - context_window, - }) - }) + .filter_map(model_info_from_catalog_item) .collect()) } @@ -332,6 +350,23 @@ fn synthesize_local_runtime_entry( }) } +fn merge_openai_codex_model_hints(models: &mut Vec) { + let mut seen = models + .iter() + .map(|model| model.id.to_ascii_lowercase()) + .collect::>(); + + for id in OPENAI_CODEX_MODEL_HINTS { + if seen.insert(id.to_ascii_lowercase()) { + models.push(ModelInfo { + id: (*id).to_string(), + owned_by: Some("openai-codex".to_string()), + context_window: None, + }); + } + } +} + fn is_openrouter_provider( entry: &crate::openhuman::config::schema::cloud_providers::CloudProviderCreds, ) -> bool { @@ -345,6 +380,57 @@ fn is_openrouter_provider( .is_some_and(|host| host == "openrouter.ai" || host.ends_with(".openrouter.ai")) } +fn append_query_param(url: &str, key: &str, value: &str) -> String { + if let Ok(mut parsed) = reqwest::Url::parse(url) { + parsed.query_pairs_mut().append_pair(key, value); + return parsed.to_string(); + } + + let separator = if url.contains('?') { '&' } else { '?' }; + format!("{url}{separator}{key}={value}") +} + +fn model_items_from_body(body: &serde_json::Value) -> Option> { + body.get("data") + .and_then(|d| d.as_array()) + .or_else(|| body.get("models").and_then(|d| d.as_array())) + .cloned() +} + +fn model_info_from_catalog_item(item: &serde_json::Value) -> Option { + if let Some(id) = item.as_str().map(str::trim).filter(|id| !id.is_empty()) { + return Some(ModelInfo { + id: id.to_string(), + owned_by: None, + context_window: None, + }); + } + + let id = item + .get("id") + .or_else(|| item.get("slug")) + .or_else(|| item.get("name")) + .and_then(|v| v.as_str()) + .map(str::trim) + .filter(|id| !id.is_empty())? + .to_string(); + let owned_by = item + .get("owned_by") + .or_else(|| item.get("owned_by_organization")) + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + let context_window = item + .get("context_length") + .or_else(|| item.get("context_window")) + .or_else(|| item.get("max_context_window")) + .and_then(|v| v.as_u64()); + Some(ModelInfo { + id, + owned_by, + context_window, + }) +} + async fn validate_openrouter_api_key( client: &reqwest::Client, base: &str, @@ -1200,1175 +1286,5 @@ pub fn canonical_china_provider_name(_name: &str) -> Option<&'static str> { } #[cfg(test)] -mod tests { - use super::*; - use crate::openhuman::config::schema::cloud_providers::{AuthStyle, CloudProviderCreds}; - use crate::openhuman::config::Config; - use crate::openhuman::credentials::AuthService; - use axum::{ - extract::State, - http::{HeaderMap, StatusCode}, - response::{IntoResponse, Response}, - routing::get, - Json, Router, - }; - use std::collections::HashMap; - use std::sync::{ - atomic::{AtomicUsize, Ordering as AtomicOrdering}, - Arc, Mutex, - }; - use tempfile::TempDir; - - #[derive(Clone)] - struct ModelProbeState { - key_status: StatusCode, - key_calls: Arc, - model_calls: Arc, - key_authorization: Arc>>>, - model_authorization: Arc>>>, - } - - async fn openrouter_key_handler( - State(state): State, - headers: HeaderMap, - ) -> Response { - state.key_calls.fetch_add(1, AtomicOrdering::SeqCst); - state - .key_authorization - .lock() - .unwrap_or_else(|e| e.into_inner()) - .push(authorization_header(&headers)); - if state.key_status.is_success() { - Json(serde_json::json!({ - "data": { - "label": "test-key", - "usage": 0 - } - })) - .into_response() - } else { - ( - state.key_status, - Json(serde_json::json!({ - "error": { - "message": "No auth credentials found" - } - })), - ) - .into_response() - } - } - - async fn models_handler(State(state): State, headers: HeaderMap) -> Response { - state.model_calls.fetch_add(1, AtomicOrdering::SeqCst); - state - .model_authorization - .lock() - .unwrap_or_else(|e| e.into_inner()) - .push(authorization_header(&headers)); - Json(serde_json::json!({ - "data": [{ - "id": "openrouter/test-model", - "owned_by": "openrouter", - "context_length": 128000 - }] - })) - .into_response() - } - - fn authorization_header(headers: &HeaderMap) -> Option { - headers - .get("authorization") - .and_then(|value| value.to_str().ok()) - .map(|value| value.to_string()) - } - - async fn spawn_openrouter_probe_server(key_status: StatusCode) -> (String, ModelProbeState) { - let state = ModelProbeState { - key_status, - key_calls: Arc::new(AtomicUsize::new(0)), - model_calls: Arc::new(AtomicUsize::new(0)), - key_authorization: Arc::new(Mutex::new(Vec::new())), - model_authorization: Arc::new(Mutex::new(Vec::new())), - }; - let listener = tokio::net::TcpListener::bind("127.0.0.1:0") - .await - .expect("bind"); - let addr = listener.local_addr().expect("local_addr"); - let app = Router::new() - .route("/key", get(openrouter_key_handler)) - .route("/models", get(models_handler)) - .with_state(state.clone()); - tokio::spawn(async move { - axum::serve(listener, app).await.expect("serve"); - }); - (format!("http://{addr}"), state) - } - - async fn configure_openrouter_workspace( - tmp: &TempDir, - endpoint: String, - token: &str, - ) -> Config { - let mut config = Config { - config_path: tmp.path().join("config.toml"), - workspace_dir: tmp.path().join("workspace"), - ..Config::default() - }; - config.secrets.encrypt = false; - config.cloud_providers.push(CloudProviderCreds { - id: "p_openrouter_test".to_string(), - slug: "openrouter".to_string(), - label: "OpenRouter".to_string(), - endpoint, - auth_style: AuthStyle::Bearer, - legacy_type: None, - default_model: None, - }); - config.save().await.expect("save config"); - - let auth = AuthService::from_config(&config); - auth.store_provider_token( - &crate::openhuman::inference::provider::factory::auth_key_for_slug("openrouter"), - "default", - token, - HashMap::new(), - true, - ) - .expect("store provider key"); - config - } - - #[test] - fn list_configured_models_accepts_slug() { - // list_configured_models should find a provider by slug when the caller - // passes a slug instead of the opaque random id. This lets the frontend - // call the RPC before the provider config has been persisted (where only - // the slug is stable). - use crate::openhuman::config::schema::cloud_providers::{AuthStyle, CloudProviderCreds}; - use crate::openhuman::config::Config; - - let mut config = Config::default(); - config.cloud_providers.push(CloudProviderCreds { - id: "p_openai_xyz99".to_string(), - slug: "openai".to_string(), - label: "OpenAI".to_string(), - endpoint: "https://api.openai.com/v1".to_string(), - auth_style: AuthStyle::Bearer, - legacy_type: None, - default_model: None, - }); - - // The find predicate must match on slug. - let found_by_slug = config - .cloud_providers - .iter() - .find(|e| e.id == "openai" || e.slug == "openai"); - assert!( - found_by_slug.is_some(), - "slug lookup must find the provider" - ); - assert_eq!(found_by_slug.unwrap().id, "p_openai_xyz99"); - - // The find predicate must still match on id. - let found_by_id = config - .cloud_providers - .iter() - .find(|e| e.id == "p_openai_xyz99" || e.slug == "p_openai_xyz99"); - assert!(found_by_id.is_some(), "id lookup must still work"); - } - - #[test] - fn openrouter_detection_matches_builtin_slug_or_host() { - let provider = |slug: &str, endpoint: &str| CloudProviderCreds { - id: format!("p_{slug}"), - slug: slug.to_string(), - label: slug.to_string(), - endpoint: endpoint.to_string(), - auth_style: AuthStyle::Bearer, - legacy_type: None, - default_model: None, - }; - - assert!(is_openrouter_provider(&provider( - "openrouter", - "http://127.0.0.1:1234" - ))); - assert!(is_openrouter_provider(&provider( - "custom-router", - "https://openrouter.ai/api/v1" - ))); - assert!(is_openrouter_provider(&provider( - "custom-router", - "https://oauth.openrouter.ai/api/v1" - ))); - assert!(!is_openrouter_provider(&provider( - "custom-openai", - "https://api.openai.com/v1" - ))); - } - - #[tokio::test] - async fn openrouter_invalid_key_fails_before_models_catalog_probe() { - let tmp = tempfile::tempdir().expect("tempdir"); - let (endpoint, state) = spawn_openrouter_probe_server(StatusCode::UNAUTHORIZED).await; - let config = configure_openrouter_workspace(&tmp, endpoint, "bad-openrouter-key").await; - - let err = list_configured_models_from_config("openrouter", &config) - .await - .expect_err("invalid OpenRouter key must fail"); - - assert!( - err.contains("OpenRouter key validation returned 401"), - "unexpected error: {err}" - ); - assert_eq!(state.key_calls.load(AtomicOrdering::SeqCst), 1); - assert_eq!( - state.model_calls.load(AtomicOrdering::SeqCst), - 0, - "invalid OpenRouter credentials must not fall through to /models" - ); - } - - #[tokio::test] - async fn openrouter_valid_key_allows_models_catalog_probe() { - let tmp = tempfile::tempdir().expect("tempdir"); - let (endpoint, state) = spawn_openrouter_probe_server(StatusCode::OK).await; - let config = configure_openrouter_workspace(&tmp, endpoint, "valid-openrouter-key").await; - - let outcome = list_configured_models_from_config("openrouter", &config) - .await - .expect("valid OpenRouter key should list models"); - - assert_eq!(state.key_calls.load(AtomicOrdering::SeqCst), 1); - assert_eq!(state.model_calls.load(AtomicOrdering::SeqCst), 1); - assert_eq!(outcome.value["models"][0]["id"], "openrouter/test-model"); - } - - #[tokio::test] - async fn openrouter_key_is_trimmed_for_validation_and_catalog_probe() { - let tmp = tempfile::tempdir().expect("tempdir"); - let (endpoint, state) = spawn_openrouter_probe_server(StatusCode::OK).await; - let config = - configure_openrouter_workspace(&tmp, endpoint, " valid-openrouter-key\r\n").await; - - list_configured_models_from_config("openrouter", &config) - .await - .expect("trimmed OpenRouter key should list models"); - - let key_authorization = state - .key_authorization - .lock() - .unwrap_or_else(|e| e.into_inner()) - .clone(); - let model_authorization = state - .model_authorization - .lock() - .unwrap_or_else(|e| e.into_inner()) - .clone(); - assert_eq!( - key_authorization, - vec![Some("Bearer valid-openrouter-key".to_string())] - ); - assert_eq!( - model_authorization, - vec![Some("Bearer valid-openrouter-key".to_string())] - ); - } - - #[test] - fn factory_backend() { - assert!(create_backend_inference_provider( - None, - None, - None, - &ProviderRuntimeOptions::default() - ) - .is_ok()); - } - - #[test] - fn skips_sentry_report_for_transient_upstream_statuses() { - // Transient statuses — 429 rate-limit, 408 client timeout, and 502/503/504 - // gateway-layer failures — are retried by reliable.rs. The aggregate - // "all providers exhausted" event still fires for genuine outages. - // Reporting each attempt individually floods Sentry (OPENHUMAN-TAURI-2E - // ~1393 events, 84 ~1050 events, T ~871 events). - for transient in [ - reqwest::StatusCode::TOO_MANY_REQUESTS, - reqwest::StatusCode::REQUEST_TIMEOUT, - reqwest::StatusCode::BAD_GATEWAY, - reqwest::StatusCode::SERVICE_UNAVAILABLE, - reqwest::StatusCode::GATEWAY_TIMEOUT, - ] { - assert!( - !should_report_provider_http_failure(transient), - "transient status {transient} must not trigger per-attempt Sentry report" - ); - } - // Auth + permanent server faults remain reportable — those are - // misconfiguration or genuine bugs, not transient capacity issues. - for reportable in [ - reqwest::StatusCode::UNAUTHORIZED, - reqwest::StatusCode::FORBIDDEN, - reqwest::StatusCode::BAD_REQUEST, - reqwest::StatusCode::NOT_FOUND, - reqwest::StatusCode::INTERNAL_SERVER_ERROR, - ] { - assert!( - should_report_provider_http_failure(reportable), - "status {reportable} must still report to Sentry" - ); - } - } - - // Confirm the budget-exhausted suppression predicate is scoped correctly. - // These tests exercise the real production function, not a duplicate. - mod budget_exhausted_suppression { - use super::*; - - const BUDGET_BODY: &str = "Insufficient budget"; - const UNRELATED_BODY: &str = "Invalid request: model not found"; - - #[test] - fn budget_exhausted_400_is_suppressed() { - assert!(is_budget_exhausted_http_400( - reqwest::StatusCode::BAD_REQUEST, - BUDGET_BODY, - )); - } - - #[test] - fn budget_exhausted_400_is_case_insensitive() { - assert!(is_budget_exhausted_http_400( - reqwest::StatusCode::BAD_REQUEST, - "budget exceeded — ADD credits to continue", - )); - } - - #[test] - fn budget_exhausted_500_is_not_suppressed() { - // A 500 is a server bug, not expected user-state — keep reporting. - assert!(!is_budget_exhausted_http_400( - reqwest::StatusCode::INTERNAL_SERVER_ERROR, - BUDGET_BODY, - )); - } - - #[test] - fn budget_exhausted_400_unrelated_body_is_not_suppressed() { - assert!(!is_budget_exhausted_http_400( - reqwest::StatusCode::BAD_REQUEST, - UNRELATED_BODY, - )); - } - - #[test] - fn budget_exhausted_402_is_not_suppressed() { - assert!(!is_budget_exhausted_http_400( - reqwest::StatusCode::PAYMENT_REQUIRED, - BUDGET_BODY, - )); - } - - #[test] - fn budget_exhausted_empty_body_is_not_suppressed() { - assert!(!is_budget_exhausted_http_400( - reqwest::StatusCode::BAD_REQUEST, - "", - )); - } - } - - mod provider_access_policy_suppression { - use super::*; - - const ACCESS_TERMINATED_BODY: &str = - "{\"error\":{\"message\":\"Kimi For Coding is currently only available for Coding Agents.\",\"type\":\"access_terminated_error\"}}"; - - #[test] - fn access_terminated_403_is_suppressed() { - assert!(is_provider_access_policy_denied_http_403( - reqwest::StatusCode::FORBIDDEN, - ACCESS_TERMINATED_BODY, - )); - } - - #[test] - fn access_terminated_non_403_is_not_suppressed() { - assert!(!is_provider_access_policy_denied_http_403( - reqwest::StatusCode::BAD_REQUEST, - ACCESS_TERMINATED_BODY, - )); - } - - #[test] - fn unrelated_403_is_not_suppressed() { - assert!(!is_provider_access_policy_denied_http_403( - reqwest::StatusCode::FORBIDDEN, - "{\"error\":{\"message\":\"forbidden\"}}", - )); - } - } - - // Exercises the real `is_provider_config_rejection_http` decision used - // by `api_error`, including the inverted provider-aware polarity. - mod provider_config_rejection_suppression { - use super::*; - - // The exact #2079 Sentry body shape. - const TIER_LEAK_BODY: &str = - "The supported API model names are deepseek-v4-pro or deepseek-v4-flash, \ - but you passed reasoning-v1."; - // #2076 Moonshot Kimi K2 temperature constraint. - const TEMP_BODY: &str = "invalid temperature: only 1 is allowed for this model"; - - #[test] - fn custom_provider_4xx_config_rejection_is_suppressed() { - assert!(is_provider_config_rejection_http( - reqwest::StatusCode::BAD_REQUEST, - "custom_openai", - TIER_LEAK_BODY, - )); - assert!(is_provider_config_rejection_http( - reqwest::StatusCode::BAD_REQUEST, - "custom_openai", - TEMP_BODY, - )); - // 404 "model does not exist" is the same user-config class. - assert!(is_provider_config_rejection_http( - reqwest::StatusCode::NOT_FOUND, - "custom_openai", - "The model `gpt-5.5` does not exist or you do not have access to it.", - )); - } - - #[test] - fn openhuman_backend_same_body_is_not_suppressed() { - // Inverted polarity: for tier-leak / temperature / litellm / - // OpenRouter-style phrases, the OpenHuman backend never - // emits them, so the same body from our OWN backend would - // mean we sent it a bad request — a real regression that - // must still reach Sentry. (Mirror of the 401/403 backend - // rule.) - assert!(!is_provider_config_rejection_http( - reqwest::StatusCode::BAD_REQUEST, - openhuman_backend::PROVIDER_LABEL, - TIER_LEAK_BODY, - )); - assert!(!is_provider_config_rejection_http( - reqwest::StatusCode::BAD_REQUEST, - openhuman_backend::PROVIDER_LABEL, - TEMP_BODY, - )); - } - - #[test] - fn openhuman_backend_openai_compatible_unknown_model_is_suppressed() { - // TAURI-RUST-2Z1 — the OpenHuman backend DOES emit the - // OpenAI-compatible "Model 'X' is not available. Use GET - // /openai/v1/models …" wire body for user-configured unknown - // model ids (here `MiniMax-M2.7-highspeed` and two - // `custom:`-prefixed fallback variants from the user's own - // `model_fallbacks` config). That's user-state, not a - // regression — drop the polarity guard for this specific - // shape so the per-attempt event stops reaching Sentry. - // (The aggregate sibling TAURI-RUST-2Z2 is already covered by - // `expected_error_kind` via the broader message-only - // classifier.) - for body in [ - r#"OpenHuman API error (400 Bad Request): {"success":false,"error":"Model 'MiniMax-M2.7-highspeed' is not available. Use GET /openai/v1/models to list available models."}"#, - r#"OpenHuman API error (400 Bad Request): {"success":false,"error":"Model 'custom:MiniMax-M2.7' is not available. Use GET /openai/v1/models to list available models."}"#, - ] { - assert!( - is_provider_config_rejection_http( - reqwest::StatusCode::BAD_REQUEST, - openhuman_backend::PROVIDER_LABEL, - body, - ), - "TAURI-RUST-2Z1 body must be suppressed for openhuman backend: {body:?}" - ); - } - } - - #[test] - fn server_error_is_not_suppressed() { - // A 5xx is a server bug, not user-config — keep reporting. - assert!(!is_provider_config_rejection_http( - reqwest::StatusCode::INTERNAL_SERVER_ERROR, - "custom_openai", - TIER_LEAK_BODY, - )); - } - - #[test] - fn transient_429_is_not_suppressed_here() { - // 429 is transient; handled by should_report_provider_http_failure, - // not this classifier (must not be swallowed as user-config). - assert!(!is_provider_config_rejection_http( - reqwest::StatusCode::TOO_MANY_REQUESTS, - "custom_openai", - TIER_LEAK_BODY, - )); - } - - #[test] - fn unrelated_4xx_body_is_not_suppressed() { - assert!(!is_provider_config_rejection_http( - reqwest::StatusCode::BAD_REQUEST, - "custom_openai", - "Bad request: missing required field 'messages'", - )); - } - - #[test] - fn log_helper_runs_without_panicking() { - // Covers the demotion log path taken by `api_error` when a - // custom provider rejects the user's model/param config. No - // tracing subscriber in unit tests, so this is a pure smoke. - log_provider_config_rejection( - "api_error", - "custom_openai", - Some("reasoning-v1"), - reqwest::StatusCode::BAD_REQUEST, - ); - } - } - - mod context_window_exceeded_suppression { - use super::*; - - #[test] - fn classifies_tauri_rust_501_custom_provider_500_body() { - // TAURI-RUST-501: the custom-provider 500 wire body. The - // matcher is status-agnostic, so the 500 mis-report is caught - // (the provider api_error cascade routes it to - // `log_context_window_exceeded` instead of `report_error`). - assert!(is_context_window_exceeded_message( - "{\"error\":{\"code\":500,\"message\":\"Context size has been exceeded.\",\"type\":\"server_error\"}}" - )); - } - - #[test] - fn classifies_established_context_overflow_phrasings() { - // The phrasings the reliable.rs non-retryable classifier - // recognized before this refactor must all still match through - // the shared single-source matcher. - for body in [ - "This model's maximum context length is 8192 tokens", - "request exceeds the context window of this model", - "context length exceeded", - "too many tokens in the prompt", - "token limit exceeded", - "prompt is too long for the selected model", - "input is too long", - ] { - assert!( - is_context_window_exceeded_message(body), - "should match context-overflow body: {body}" - ); - } - } - - #[test] - fn does_not_match_unrelated_bodies() { - for body in [ - "rate limit exceeded, retry after 30s", - "Invalid request: model not found", - "Insufficient budget", - "tool call exceeded the allowed budget", - ] { - assert!( - !is_context_window_exceeded_message(body), - "must NOT match unrelated body: {body}" - ); - } - } - - #[test] - fn token_rate_limits_are_not_context_overflow() { - // Token-count phrases collide with per-minute token RATE limits. - // Those are transient 429s that must stay retryable and keep - // reaching Sentry — they must NOT be classified as context - // overflow (CodeRabbit review of #2820). The rate-limit marker - // disambiguates. - for body in [ - "Rate limit reached: too many tokens per minute (TPM) for this org", - "rate_limit_exceeded: token limit exceeded, retry after 12s", - "You have hit too many tokens per min; try again in 30s", - ] { - assert!( - !is_context_window_exceeded_message(body), - "TPM rate-limit must NOT match as context overflow: {body}" - ); - } - // …but a token-count overflow with NO rate marker still matches. - assert!(is_context_window_exceeded_message( - "Request rejected: too many tokens in the input for this model" - )); - } - - #[test] - fn log_helper_runs_without_panicking() { - // Smoke for the demotion path taken by `api_error` — no tracing - // subscriber in unit tests. - log_context_window_exceeded( - "api_error", - "custom_openai", - None, - reqwest::StatusCode::INTERNAL_SERVER_ERROR, - ); - } - } - - #[test] - fn test_sanitize_api_error_utf8() { - let input = "🦀".repeat(MAX_API_ERROR_CHARS + 10); - let sanitized = sanitize_api_error(&input); - assert!(sanitized.ends_with("...")); - // Should truncate at MAX_API_ERROR_CHARS crabs - let crabs_count = sanitized.chars().filter(|c| *c == '🦀').count(); - assert_eq!(crabs_count, MAX_API_ERROR_CHARS); - } - - // ── TAURI-RUST-12: list_models JSON parse error must surface body ────── - // - // `response.json()` previously dropped the body when decoding failed, so - // Sentry saw `[providers][list_models] failed to parse JSON: error decoding - // response body` with no clue what the server actually returned. The fix - // reads the body as text first, parses with `serde_json::from_str`, and - // appends a sanitized + truncated snippet to the error string so the - // failure is diagnosable from the log line alone. - - #[derive(Clone)] - struct StaticResponse { - status: StatusCode, - body: &'static str, - } - - async fn static_models_handler(State(s): State) -> Response { - (s.status, s.body).into_response() - } - - async fn spawn_static_models_server(status: StatusCode, body: &'static str) -> String { - let listener = tokio::net::TcpListener::bind("127.0.0.1:0") - .await - .expect("bind"); - let addr = listener.local_addr().expect("local_addr"); - let app = Router::new() - .route("/models", get(static_models_handler)) - .with_state(StaticResponse { status, body }); - tokio::spawn(async move { - axum::serve(listener, app).await.expect("serve"); - }); - format!("http://{addr}") - } - - async fn configure_generic_workspace(tmp: &TempDir, endpoint: String) -> Config { - // Non-`openrouter` slug so the OpenRouter pre-validation path is - // skipped and the test hits `/models` directly. - let mut config = Config { - config_path: tmp.path().join("config.toml"), - workspace_dir: tmp.path().join("workspace"), - ..Config::default() - }; - config.secrets.encrypt = false; - config.cloud_providers.push(CloudProviderCreds { - id: "p_generic_test".to_string(), - slug: "generic-test".to_string(), - label: "Generic".to_string(), - endpoint, - auth_style: AuthStyle::None, - legacy_type: None, - default_model: None, - }); - config.save().await.expect("save config"); - config - } - - #[tokio::test] - async fn list_models_html_body_returns_diagnostic_snippet() { - // Captive-portal / proxy-login wire shape: 200 OK with HTML. - let tmp = tempfile::tempdir().expect("tempdir"); - let html = "Sign incaptive portal"; - let endpoint = spawn_static_models_server(StatusCode::OK, html).await; - let config = configure_generic_workspace(&tmp, endpoint).await; - - let err = list_configured_models_from_config("generic-test", &config) - .await - .expect_err("HTML body must not parse as JSON"); - - assert!( - err.contains("failed to parse JSON"), - "error must keep canonical prefix: {err}" - ); - assert!( - err.contains("captive portal") || err.contains("Sign in") || err.contains("html"), - "error must include a body snippet for diagnosis: {err}" - ); - } - - #[tokio::test] - async fn list_models_empty_body_returns_diagnostic_error() { - // Some misconfigured load balancers return 200 with an empty body. - let tmp = tempfile::tempdir().expect("tempdir"); - let endpoint = spawn_static_models_server(StatusCode::OK, "").await; - let config = configure_generic_workspace(&tmp, endpoint).await; - - let err = list_configured_models_from_config("generic-test", &config) - .await - .expect_err("empty body must not parse as JSON"); - - assert!( - err.contains("failed to parse JSON"), - "error must keep canonical prefix: {err}" - ); - } - - #[tokio::test] - async fn list_models_valid_json_still_succeeds() { - // Regression guard: the new text-then-parse path must still accept - // a valid `/models` JSON response. - let tmp = tempfile::tempdir().expect("tempdir"); - let body = r#"{"data":[{"id":"some-model","owned_by":"vendor","context_length":4096}]}"#; - let endpoint = spawn_static_models_server(StatusCode::OK, body).await; - let config = configure_generic_workspace(&tmp, endpoint).await; - - let outcome = list_configured_models_from_config("generic-test", &config) - .await - .expect("valid JSON must list models"); - assert_eq!(outcome.value["models"][0]["id"], "some-model"); - } - - // ── parse_models_response (TAURI-RUST-4Y) ────────────────────────────── - // - // Before this fix the `/models` parser collapsed "no `data` field" and - // "`data` field present but not an array" into a single misleading - // error string: `"provider response missing `data` array — endpoint is - // not OpenAI-compatible (got keys: data, object)"` — the keys list - // included `data`, contradicting the "missing" claim. The split - // surfaces the actual JSON-type mismatch so future Sentry events on - // this code path are triageable instead of looking like the parser - // is hallucinating. - - #[test] - fn parse_models_response_returns_models_for_well_formed_data_array() { - // Happy path — exact OpenAI `/models` shape, must yield model ids - // and `owned_by` / `context_length` projections from each entry. - let body = serde_json::json!({ - "object": "list", - "data": [ - { "id": "m1", "owned_by": "openai", "context_length": 8192 }, - { "id": "m2", "owned_by": "openai" }, - { "id": "m3", "context_window": 4096 }, - ], - }); - let models = parse_models_response(&body).expect("well-formed body must parse"); - assert_eq!(models.len(), 3); - assert_eq!(models[0].id, "m1"); - assert_eq!(models[0].owned_by.as_deref(), Some("openai")); - assert_eq!(models[0].context_window, Some(8192)); - assert_eq!(models[2].id, "m3"); - assert_eq!(models[2].owned_by, None); - assert_eq!(models[2].context_window, Some(4096)); - } - - #[test] - fn parse_models_response_distinguishes_missing_data_field_from_wrong_type() { - // (1) `data` field completely absent — original Sentry message - // shape, kept for backward fingerprint with the well-known - // "wrong endpoint" misconfiguration. - let body = serde_json::json!({ "object": "list", "models": [] }); - let err = parse_models_response(&body).expect_err("no data field must fail"); - assert!( - err.contains("missing `data` field"), - "no-data error should say `missing`: {err}" - ); - assert!( - err.contains("object, models") || err.contains("models, object"), - "no-data error should list actual keys: {err}" - ); - - // (2) `data` field present but wrong type — TAURI-RUST-4Y verbatim - // shape (`object` + `data` keys both present, but `data` isn't an - // array). The error MUST NOT say "missing" — it must surface the - // actual JSON type so triage knows what shape the provider sent. - for (label, value) in [ - ( - "object", - serde_json::json!({"object":"error","message":"boom"}), - ), - ("string", serde_json::json!("models go here")), - ("null", serde_json::Value::Null), - ("bool", serde_json::json!(true)), - ("number", serde_json::json!(42)), - ] { - let body = serde_json::json!({ "object": "list", "data": value }); - let err = parse_models_response(&body).expect_err("wrong-type data must fail"); - assert!( - !err.contains("missing"), - "wrong-type error must not say `missing` ({label}): {err}" - ); - assert!( - err.contains(label), - "wrong-type error must name the actual JSON kind ({label}): {err}" - ); - } - } - - // ── synthesize_local_runtime_entry (TAURI-RUST-28Z fallback) ──────────── - - #[test] - fn synthesize_local_runtime_entry_ollama_returns_v1_endpoint_with_no_auth() { - // Sentry TAURI-RUST-28Z fires when `inference_list_models("ollama")` - // runs against a config that has no `ollama` cloud_providers entry. - // The synth fallback must produce an entry routed to Ollama's - // OpenAI-compatible `/v1/models` surface at the resolved base URL, - // with `AuthStyle::None` so the probe runs without an Authorization - // header (loopback Ollama accepts unauthenticated requests). - let config = Config::default(); - let entry = synthesize_local_runtime_entry("ollama", &config) - .expect("ollama must produce a synthetic entry"); - assert_eq!(entry.slug, "ollama"); - assert_eq!(entry.auth_style, AuthStyle::None); - assert!( - entry.endpoint.ends_with("/v1"), - "ollama endpoint must terminate at /v1 so `/models` hits the OpenAI-compat surface; got {}", - entry.endpoint - ); - } - - #[test] - fn synthesize_local_runtime_entry_lmstudio_returns_v1_endpoint_with_no_auth() { - // LM Studio's default `lm_studio_base_url` already terminates at - // `/v1`; the synth must preserve that and select `AuthStyle::None` - // so the probe doesn't attach a bearer header (LM Studio runs - // unauthenticated on loopback). - let config = Config::default(); - let entry = synthesize_local_runtime_entry("lmstudio", &config) - .expect("lmstudio must produce a synthetic entry"); - assert_eq!(entry.slug, "lmstudio"); - assert_eq!(entry.auth_style, AuthStyle::None); - assert!( - entry.endpoint.ends_with("/v1"), - "lmstudio endpoint must terminate at /v1; got {}", - entry.endpoint - ); - } - - #[test] - fn synthesize_local_runtime_entry_returns_none_for_unknown_slug() { - // Only `ollama` and `lmstudio` are the recognized local-runtime - // aliases. Every other slug — built-in cloud providers (`openai`, - // `anthropic`), opaque ids (`p_random_xyz`), or typos — must fall - // through to the existing "no cloud provider" error. Pinning this - // rejection contract guards against the synth growing into a - // blanket "any unknown slug points at localhost" matcher. - let config = Config::default(); - for slug in ["openai", "anthropic", "openrouter", "p_random_xyz", "", " "] { - assert!( - synthesize_local_runtime_entry(slug, &config).is_none(), - "{slug:?} must NOT synthesize a local-runtime entry" - ); - } - } - - #[test] - fn parse_models_response_handles_non_object_body() { - // Provider returned a bare array / string / number at the - // top level — not an object at all. Surface as a parse failure - // (not a panic). - for body in [ - serde_json::json!([{"id": "m1"}]), - serde_json::json!("hello"), - serde_json::Value::Null, - ] { - let err = parse_models_response(&body) - .expect_err("non-object body must fail with a clear message"); - assert!( - !err.is_empty(), - "non-object body error must be non-empty: {err}" - ); - } - } - - /// `is_backend_auth_failure` is the polarity guard that decides whether a - /// 401/403 is the OpenHuman backend's expired session (silence + drive - /// reauth) or a third-party BYO-key rejection (actionable, must reach - /// Sentry). Getting this wrong in either direction is a regression: - /// over-matching silences real misconfig; under-matching is TAURI-RUST-N. - #[test] - fn is_backend_auth_failure_only_matches_openhuman_backend_401_403() { - use reqwest::StatusCode; - let backend = crate::openhuman::inference::provider::openhuman_backend::PROVIDER_LABEL; - - assert!(is_backend_auth_failure(backend, StatusCode::UNAUTHORIZED)); - assert!(is_backend_auth_failure(backend, StatusCode::FORBIDDEN)); - - // Non-auth backend statuses stay reportable (real server bugs / transient). - for s in [ - StatusCode::INTERNAL_SERVER_ERROR, - StatusCode::TOO_MANY_REQUESTS, - StatusCode::BAD_REQUEST, - StatusCode::NOT_FOUND, - ] { - assert!( - !is_backend_auth_failure(backend, s), - "backend {s} must not be treated as session-expiry" - ); - } - - // Third-party BYO-key 401/403 (user's own key revoked) must NOT be - // silenced — that is actionable misconfiguration for Sentry. - for provider in ["custom_openai", "OpenAI", "Anthropic", "openrouter"] { - assert!( - !is_backend_auth_failure(provider, StatusCode::UNAUTHORIZED), - "{provider} 401 must reach Sentry as actionable BYO-key error" - ); - assert!( - !is_backend_auth_failure(provider, StatusCode::FORBIDDEN), - "{provider} 403 must reach Sentry as actionable BYO-key error" - ); - } - } - - /// `publish_backend_session_expired` must emit a `SessionExpired` event on - /// the `auth` domain with the canonical source and a sanitized reason, so - /// the credentials subscriber can drive reauth. - #[tokio::test] - async fn publish_backend_session_expired_emits_sanitized_session_expired() { - use crate::core::event_bus::{global, init_global, DomainEvent}; - - init_global(1024); - let mut rx = global().expect("event bus initialized").raw_receiver(); - - // `TEST_MARKER_A` makes this event distinguishable from the sibling - // `chat_completions_backend_401_*` test's event on the shared global - // bus (both run in parallel against the same singleton). The `sk-` - // token probes that `sanitize_api_error` actually scrubs secrets out - // of the SessionExpired reason rather than just emitting the event. - let secret = "sk-LIVEA0123456789abcdefSECRET"; - let msg = format!( - r#"OpenHuman API error (401 Unauthorized): {{"success":false,"error":"TEST_MARKER_A Invalid token {secret}"}}"# - ); - publish_backend_session_expired( - "chat_completions", - crate::openhuman::inference::provider::openhuman_backend::PROVIDER_LABEL, - reqwest::StatusCode::UNAUTHORIZED, - &msg, - ); - - let mut reason_seen: Option = None; - loop { - match rx.try_recv() { - Ok(DomainEvent::SessionExpired { source, reason }) => { - if source == "llm_provider.openhuman_backend" - && reason.contains("TEST_MARKER_A") - { - reason_seen = Some(reason); - break; - } - } - Ok(_) => continue, - Err(tokio::sync::broadcast::error::TryRecvError::Lagged(_)) => continue, - Err(_) => break, - } - } - let reason = reason_seen.expect( - "publish_backend_session_expired must emit SessionExpired(source=llm_provider.openhuman_backend) carrying TEST_MARKER_A", - ); - assert!( - reason.contains("[REDACTED]"), - "sanitize_api_error must redact the sk- token in the reason: {reason}" - ); - assert!( - !reason.contains(secret), - "raw secret must not survive into the SessionExpired reason: {reason}" - ); - } - - /// End-to-end regression for TAURI-RUST-N: a backend `401 Invalid token` - /// on the hand-rolled `chat_completions` path must publish `SessionExpired` - /// (driving reauth) and surface the typed error — NOT spam Sentry. The - /// provider is labelled exactly like the OpenHuman backend provider, which - /// is what gates the backend-auth-failure branch. - #[tokio::test] - async fn chat_completions_backend_401_publishes_session_expired() { - use crate::core::event_bus::{global, init_global, DomainEvent}; - use axum::routing::post; - - init_global(1024); - let mut rx = global().expect("event bus initialized").raw_receiver(); - - async fn unauthorized_handler() -> Response { - // `TEST_MARKER_B` distinguishes this event from the sibling - // `publish_backend_session_expired_*` test on the shared global - // bus; the `sk-` token probes end-to-end redaction through - // `api_error` → `publish_backend_session_expired`. - ( - StatusCode::UNAUTHORIZED, - Json(serde_json::json!({ - "success": false, - "error": "TEST_MARKER_B Invalid token sk-LIVEB9876543210fedcbaSECRET" - })), - ) - .into_response() - } - - let listener = tokio::net::TcpListener::bind("127.0.0.1:0") - .await - .expect("bind"); - let addr = listener.local_addr().expect("local_addr"); - let app = Router::new().route("/chat/completions", post(unauthorized_handler)); - tokio::spawn(async move { - axum::serve(listener, app).await.expect("serve"); - }); - - let provider = - crate::openhuman::inference::provider::compatible::OpenAiCompatibleProvider::new_no_responses_fallback( - crate::openhuman::inference::provider::openhuman_backend::PROVIDER_LABEL, - &format!("http://{addr}"), - Some("expired-jwt"), - crate::openhuman::inference::provider::compatible::AuthStyle::Bearer, - ); - - let err = crate::openhuman::inference::provider::traits::Provider::chat_with_system( - &provider, - None, - "hi", - "reasoning-quick-v1", - 0.0, - ) - .await - .expect_err("backend 401 must surface as an error"); - let msg = err.to_string(); - assert!( - msg.contains("OpenHuman API error (401") && msg.contains("Invalid token"), - "error must carry the backend 401 envelope: {msg}" - ); - - let mut reason_seen: Option = None; - loop { - match rx.try_recv() { - Ok(DomainEvent::SessionExpired { source, reason }) => { - if source == "llm_provider.openhuman_backend" - && reason.contains("TEST_MARKER_B") - { - reason_seen = Some(reason); - break; - } - } - Ok(_) => continue, - Err(tokio::sync::broadcast::error::TryRecvError::Lagged(_)) => continue, - Err(_) => break, - } - } - let reason = reason_seen.expect( - "backend 401 on chat_completions must publish SessionExpired carrying TEST_MARKER_B, not report to Sentry", - ); - assert!( - reason.contains("[REDACTED]"), - "sanitize_api_error must redact the sk- token end-to-end: {reason}" - ); - assert!( - !reason.contains("sk-LIVEB9876543210fedcbaSECRET"), - "raw secret must not survive into the SessionExpired reason: {reason}" - ); - } - - #[test] - fn synthesize_local_runtime_entry_ollama_respects_config_base_url() { - // The synth must honor `config.local_ai.base_url` (the same - // priority `ollama_base_url_from_config` uses for chat routing). - // This is the path users hit when they point Ollama at a non-loopback - // host (e.g. a LAN box at 192.168.1.5). - let mut config = Config::default(); - config.local_ai.base_url = Some("http://192.168.1.5:11434".to_string()); - let entry = synthesize_local_runtime_entry("ollama", &config) - .expect("ollama with custom base_url must still synthesize"); - assert_eq!( - entry.endpoint, "http://192.168.1.5:11434/v1", - "synth must use config.local_ai.base_url and append /v1 once", - ); - } - - #[test] - fn cloud_providers_entry_takes_precedence_over_local_runtime_synthesis() { - // Pin the precedence: if the user has explicitly added an `ollama` - // entry to `cloud_providers` (e.g. a remote ollama box at - // https://ollama.example.com/v1), that entry MUST win — the synth - // fallback is reached only when the find returns `None`. Mirrors - // the lookup in `list_configured_models_from_config` so a future - // refactor that swaps `find().or_else(synth)` for unconditional - // synthesis fails this test loudly. - let mut config = Config::default(); - config.cloud_providers.push(CloudProviderCreds { - id: "p_ollama_explicit".to_string(), - slug: "ollama".to_string(), - label: "Remote Ollama".to_string(), - endpoint: "https://ollama.example.com/v1".to_string(), - auth_style: AuthStyle::Bearer, - legacy_type: None, - default_model: None, - }); - - let resolved = config - .cloud_providers - .iter() - .find(|e| e.id == "ollama" || e.slug == "ollama") - .cloned() - .or_else(|| synthesize_local_runtime_entry("ollama", &config)) - .expect("either explicit or synth must resolve"); - assert_eq!( - resolved.endpoint, "https://ollama.example.com/v1", - "explicit cloud_providers entry must beat local-runtime synth", - ); - assert_eq!(resolved.auth_style, AuthStyle::Bearer); - } - - #[test] - fn missing_cloud_providers_entry_falls_back_to_local_runtime_synth() { - // The TAURI-RUST-28Z regression contract: when no `ollama` entry - // exists in `cloud_providers` AND the slug is a recognized - // local-runtime alias, the find/synth chain must yield a synthetic - // entry (instead of `None`, which produces the - // "no cloud provider with id or slug 'ollama' found" Sentry error). - let config = Config::default(); - assert!( - config.cloud_providers.is_empty(), - "precondition: clean config has no providers configured", - ); - - let resolved = config - .cloud_providers - .iter() - .find(|e| e.id == "ollama" || e.slug == "ollama") - .cloned() - .or_else(|| synthesize_local_runtime_entry("ollama", &config)); - assert!( - resolved.is_some(), - "ollama must resolve via synth when cloud_providers is empty" - ); - assert_eq!(resolved.unwrap().slug, "ollama"); - } - - #[test] - fn missing_cloud_providers_entry_for_unknown_slug_still_errors() { - // The synth is intentionally narrow: only `ollama` and `lmstudio` - // get fallback routing. An unknown slug with no `cloud_providers` - // match must continue to produce `None` (which the caller surfaces - // as the "no cloud provider" error) — otherwise typos would - // silently route to localhost. - let config = Config::default(); - let resolved = config - .cloud_providers - .iter() - .find(|e| e.id == "tpyo" || e.slug == "tpyo") - .cloned() - .or_else(|| synthesize_local_runtime_entry("tpyo", &config)); - assert!( - resolved.is_none(), - "unknown slug with no cloud_providers entry must NOT synthesize", - ); - } -} +#[path = "ops_tests.rs"] +mod tests; diff --git a/src/openhuman/inference/provider/ops_tests.rs b/src/openhuman/inference/provider/ops_tests.rs new file mode 100644 index 0000000000..f5efdc3bed --- /dev/null +++ b/src/openhuman/inference/provider/ops_tests.rs @@ -0,0 +1,1218 @@ +use super::*; +use crate::openhuman::config::schema::cloud_providers::{AuthStyle, CloudProviderCreds}; +use crate::openhuman::config::Config; +use crate::openhuman::credentials::AuthService; +use axum::{ + extract::State, + http::{HeaderMap, StatusCode}, + response::{IntoResponse, Response}, + routing::get, + Json, Router, +}; +use std::collections::HashMap; +use std::sync::{ + atomic::{AtomicUsize, Ordering as AtomicOrdering}, + Arc, Mutex, +}; +use tempfile::TempDir; + +#[derive(Clone)] +struct ModelProbeState { + key_status: StatusCode, + key_calls: Arc, + model_calls: Arc, + key_authorization: Arc>>>, + model_authorization: Arc>>>, +} + +async fn openrouter_key_handler( + State(state): State, + headers: HeaderMap, +) -> Response { + state.key_calls.fetch_add(1, AtomicOrdering::SeqCst); + state + .key_authorization + .lock() + .unwrap_or_else(|e| e.into_inner()) + .push(authorization_header(&headers)); + if state.key_status.is_success() { + Json(serde_json::json!({ + "data": { + "label": "test-key", + "usage": 0 + } + })) + .into_response() + } else { + ( + state.key_status, + Json(serde_json::json!({ + "error": { + "message": "No auth credentials found" + } + })), + ) + .into_response() + } +} + +async fn models_handler(State(state): State, headers: HeaderMap) -> Response { + state.model_calls.fetch_add(1, AtomicOrdering::SeqCst); + state + .model_authorization + .lock() + .unwrap_or_else(|e| e.into_inner()) + .push(authorization_header(&headers)); + Json(serde_json::json!({ + "data": [{ + "id": "openrouter/test-model", + "owned_by": "openrouter", + "context_length": 128000 + }] + })) + .into_response() +} + +fn authorization_header(headers: &HeaderMap) -> Option { + headers + .get("authorization") + .and_then(|value| value.to_str().ok()) + .map(|value| value.to_string()) +} + +async fn spawn_openrouter_probe_server(key_status: StatusCode) -> (String, ModelProbeState) { + let state = ModelProbeState { + key_status, + key_calls: Arc::new(AtomicUsize::new(0)), + model_calls: Arc::new(AtomicUsize::new(0)), + key_authorization: Arc::new(Mutex::new(Vec::new())), + model_authorization: Arc::new(Mutex::new(Vec::new())), + }; + let listener = tokio::net::TcpListener::bind("127.0.0.1:0") + .await + .expect("bind"); + let addr = listener.local_addr().expect("local_addr"); + let app = Router::new() + .route("/key", get(openrouter_key_handler)) + .route("/models", get(models_handler)) + .with_state(state.clone()); + tokio::spawn(async move { + axum::serve(listener, app).await.expect("serve"); + }); + (format!("http://{addr}"), state) +} + +async fn configure_openrouter_workspace(tmp: &TempDir, endpoint: String, token: &str) -> Config { + let mut config = Config { + config_path: tmp.path().join("config.toml"), + workspace_dir: tmp.path().join("workspace"), + ..Config::default() + }; + config.secrets.encrypt = false; + config.cloud_providers.push(CloudProviderCreds { + id: "p_openrouter_test".to_string(), + slug: "openrouter".to_string(), + label: "OpenRouter".to_string(), + endpoint, + auth_style: AuthStyle::Bearer, + legacy_type: None, + default_model: None, + }); + config.save().await.expect("save config"); + + let auth = AuthService::from_config(&config); + auth.store_provider_token( + &crate::openhuman::inference::provider::factory::auth_key_for_slug("openrouter"), + "default", + token, + HashMap::new(), + true, + ) + .expect("store provider key"); + config +} + +#[test] +fn list_configured_models_accepts_slug() { + // list_configured_models should find a provider by slug when the caller + // passes a slug instead of the opaque random id. This lets the frontend + // call the RPC before the provider config has been persisted (where only + // the slug is stable). + use crate::openhuman::config::schema::cloud_providers::{AuthStyle, CloudProviderCreds}; + use crate::openhuman::config::Config; + + let mut config = Config::default(); + config.cloud_providers.push(CloudProviderCreds { + id: "p_openai_xyz99".to_string(), + slug: "openai".to_string(), + label: "OpenAI".to_string(), + endpoint: "https://api.openai.com/v1".to_string(), + auth_style: AuthStyle::Bearer, + legacy_type: None, + default_model: None, + }); + + // The find predicate must match on slug. + let found_by_slug = config + .cloud_providers + .iter() + .find(|e| e.id == "openai" || e.slug == "openai"); + assert!( + found_by_slug.is_some(), + "slug lookup must find the provider" + ); + assert_eq!(found_by_slug.unwrap().id, "p_openai_xyz99"); + + // The find predicate must still match on id. + let found_by_id = config + .cloud_providers + .iter() + .find(|e| e.id == "p_openai_xyz99" || e.slug == "p_openai_xyz99"); + assert!(found_by_id.is_some(), "id lookup must still work"); +} + +#[test] +fn openrouter_detection_matches_builtin_slug_or_host() { + let provider = |slug: &str, endpoint: &str| CloudProviderCreds { + id: format!("p_{slug}"), + slug: slug.to_string(), + label: slug.to_string(), + endpoint: endpoint.to_string(), + auth_style: AuthStyle::Bearer, + legacy_type: None, + default_model: None, + }; + + assert!(is_openrouter_provider(&provider( + "openrouter", + "http://127.0.0.1:1234" + ))); + assert!(is_openrouter_provider(&provider( + "custom-router", + "https://openrouter.ai/api/v1" + ))); + assert!(is_openrouter_provider(&provider( + "custom-router", + "https://oauth.openrouter.ai/api/v1" + ))); + assert!(!is_openrouter_provider(&provider( + "custom-openai", + "https://api.openai.com/v1" + ))); +} + +#[test] +fn openai_codex_models_url_includes_client_version_query() { + let url = append_query_param( + "https://chatgpt.com/backend-api/codex/models", + "client_version", + openai_codex_client_version(), + ); + let parsed = reqwest::Url::parse(&url).expect("url"); + + assert_eq!(parsed.path(), "/backend-api/codex/models"); + assert_eq!( + parsed + .query_pairs() + .find(|(key, _)| key == "client_version") + .map(|(_, value)| value.into_owned()), + Some(openai_codex_client_version().to_string()) + ); +} + +#[tokio::test] +async fn openrouter_invalid_key_fails_before_models_catalog_probe() { + let tmp = tempfile::tempdir().expect("tempdir"); + let (endpoint, state) = spawn_openrouter_probe_server(StatusCode::UNAUTHORIZED).await; + let config = configure_openrouter_workspace(&tmp, endpoint, "bad-openrouter-key").await; + + let err = list_configured_models_from_config("openrouter", &config) + .await + .expect_err("invalid OpenRouter key must fail"); + + assert!( + err.contains("OpenRouter key validation returned 401"), + "unexpected error: {err}" + ); + assert_eq!(state.key_calls.load(AtomicOrdering::SeqCst), 1); + assert_eq!( + state.model_calls.load(AtomicOrdering::SeqCst), + 0, + "invalid OpenRouter credentials must not fall through to /models" + ); +} + +#[tokio::test] +async fn openrouter_valid_key_allows_models_catalog_probe() { + let tmp = tempfile::tempdir().expect("tempdir"); + let (endpoint, state) = spawn_openrouter_probe_server(StatusCode::OK).await; + let config = configure_openrouter_workspace(&tmp, endpoint, "valid-openrouter-key").await; + + let outcome = list_configured_models_from_config("openrouter", &config) + .await + .expect("valid OpenRouter key should list models"); + + assert_eq!(state.key_calls.load(AtomicOrdering::SeqCst), 1); + assert_eq!(state.model_calls.load(AtomicOrdering::SeqCst), 1); + assert_eq!(outcome.value["models"][0]["id"], "openrouter/test-model"); +} + +#[tokio::test] +async fn openrouter_key_is_trimmed_for_validation_and_catalog_probe() { + let tmp = tempfile::tempdir().expect("tempdir"); + let (endpoint, state) = spawn_openrouter_probe_server(StatusCode::OK).await; + let config = configure_openrouter_workspace(&tmp, endpoint, " valid-openrouter-key\r\n").await; + + list_configured_models_from_config("openrouter", &config) + .await + .expect("trimmed OpenRouter key should list models"); + + let key_authorization = state + .key_authorization + .lock() + .unwrap_or_else(|e| e.into_inner()) + .clone(); + let model_authorization = state + .model_authorization + .lock() + .unwrap_or_else(|e| e.into_inner()) + .clone(); + assert_eq!( + key_authorization, + vec![Some("Bearer valid-openrouter-key".to_string())] + ); + assert_eq!( + model_authorization, + vec![Some("Bearer valid-openrouter-key".to_string())] + ); +} + +#[test] +fn factory_backend() { + assert!(create_backend_inference_provider( + None, + None, + None, + &ProviderRuntimeOptions::default() + ) + .is_ok()); +} + +#[test] +fn skips_sentry_report_for_transient_upstream_statuses() { + // Transient statuses — 429 rate-limit, 408 client timeout, and 502/503/504 + // gateway-layer failures — are retried by reliable.rs. The aggregate + // "all providers exhausted" event still fires for genuine outages. + // Reporting each attempt individually floods Sentry (OPENHUMAN-TAURI-2E + // ~1393 events, 84 ~1050 events, T ~871 events). + for transient in [ + reqwest::StatusCode::TOO_MANY_REQUESTS, + reqwest::StatusCode::REQUEST_TIMEOUT, + reqwest::StatusCode::BAD_GATEWAY, + reqwest::StatusCode::SERVICE_UNAVAILABLE, + reqwest::StatusCode::GATEWAY_TIMEOUT, + ] { + assert!( + !should_report_provider_http_failure(transient), + "transient status {transient} must not trigger per-attempt Sentry report" + ); + } + // Auth + permanent server faults remain reportable — those are + // misconfiguration or genuine bugs, not transient capacity issues. + for reportable in [ + reqwest::StatusCode::UNAUTHORIZED, + reqwest::StatusCode::FORBIDDEN, + reqwest::StatusCode::BAD_REQUEST, + reqwest::StatusCode::NOT_FOUND, + reqwest::StatusCode::INTERNAL_SERVER_ERROR, + ] { + assert!( + should_report_provider_http_failure(reportable), + "status {reportable} must still report to Sentry" + ); + } +} + +// Confirm the budget-exhausted suppression predicate is scoped correctly. +// These tests exercise the real production function, not a duplicate. +mod budget_exhausted_suppression { + use super::*; + + const BUDGET_BODY: &str = "Insufficient budget"; + const UNRELATED_BODY: &str = "Invalid request: model not found"; + + #[test] + fn budget_exhausted_400_is_suppressed() { + assert!(is_budget_exhausted_http_400( + reqwest::StatusCode::BAD_REQUEST, + BUDGET_BODY, + )); + } + + #[test] + fn budget_exhausted_400_is_case_insensitive() { + assert!(is_budget_exhausted_http_400( + reqwest::StatusCode::BAD_REQUEST, + "budget exceeded — ADD credits to continue", + )); + } + + #[test] + fn budget_exhausted_500_is_not_suppressed() { + // A 500 is a server bug, not expected user-state — keep reporting. + assert!(!is_budget_exhausted_http_400( + reqwest::StatusCode::INTERNAL_SERVER_ERROR, + BUDGET_BODY, + )); + } + + #[test] + fn budget_exhausted_400_unrelated_body_is_not_suppressed() { + assert!(!is_budget_exhausted_http_400( + reqwest::StatusCode::BAD_REQUEST, + UNRELATED_BODY, + )); + } + + #[test] + fn budget_exhausted_402_is_not_suppressed() { + assert!(!is_budget_exhausted_http_400( + reqwest::StatusCode::PAYMENT_REQUIRED, + BUDGET_BODY, + )); + } + + #[test] + fn budget_exhausted_empty_body_is_not_suppressed() { + assert!(!is_budget_exhausted_http_400( + reqwest::StatusCode::BAD_REQUEST, + "", + )); + } +} + +mod provider_access_policy_suppression { + use super::*; + + const ACCESS_TERMINATED_BODY: &str = + "{\"error\":{\"message\":\"Kimi For Coding is currently only available for Coding Agents.\",\"type\":\"access_terminated_error\"}}"; + + #[test] + fn access_terminated_403_is_suppressed() { + assert!(is_provider_access_policy_denied_http_403( + reqwest::StatusCode::FORBIDDEN, + ACCESS_TERMINATED_BODY, + )); + } + + #[test] + fn access_terminated_non_403_is_not_suppressed() { + assert!(!is_provider_access_policy_denied_http_403( + reqwest::StatusCode::BAD_REQUEST, + ACCESS_TERMINATED_BODY, + )); + } + + #[test] + fn unrelated_403_is_not_suppressed() { + assert!(!is_provider_access_policy_denied_http_403( + reqwest::StatusCode::FORBIDDEN, + "{\"error\":{\"message\":\"forbidden\"}}", + )); + } +} + +// Exercises the real `is_provider_config_rejection_http` decision used +// by `api_error`, including the inverted provider-aware polarity. +mod provider_config_rejection_suppression { + use super::*; + + // The exact #2079 Sentry body shape. + const TIER_LEAK_BODY: &str = + "The supported API model names are deepseek-v4-pro or deepseek-v4-flash, \ + but you passed reasoning-v1."; + // #2076 Moonshot Kimi K2 temperature constraint. + const TEMP_BODY: &str = "invalid temperature: only 1 is allowed for this model"; + + #[test] + fn custom_provider_4xx_config_rejection_is_suppressed() { + assert!(is_provider_config_rejection_http( + reqwest::StatusCode::BAD_REQUEST, + "custom_openai", + TIER_LEAK_BODY, + )); + assert!(is_provider_config_rejection_http( + reqwest::StatusCode::BAD_REQUEST, + "custom_openai", + TEMP_BODY, + )); + // 404 "model does not exist" is the same user-config class. + assert!(is_provider_config_rejection_http( + reqwest::StatusCode::NOT_FOUND, + "custom_openai", + "The model `gpt-5.5` does not exist or you do not have access to it.", + )); + } + + #[test] + fn openhuman_backend_same_body_is_not_suppressed() { + // Inverted polarity: for tier-leak / temperature / litellm / + // OpenRouter-style phrases, the OpenHuman backend never + // emits them, so the same body from our OWN backend would + // mean we sent it a bad request — a real regression that + // must still reach Sentry. (Mirror of the 401/403 backend + // rule.) + assert!(!is_provider_config_rejection_http( + reqwest::StatusCode::BAD_REQUEST, + openhuman_backend::PROVIDER_LABEL, + TIER_LEAK_BODY, + )); + assert!(!is_provider_config_rejection_http( + reqwest::StatusCode::BAD_REQUEST, + openhuman_backend::PROVIDER_LABEL, + TEMP_BODY, + )); + } + + #[test] + fn openhuman_backend_openai_compatible_unknown_model_is_suppressed() { + // TAURI-RUST-2Z1 — the OpenHuman backend DOES emit the + // OpenAI-compatible "Model 'X' is not available. Use GET + // /openai/v1/models …" wire body for user-configured unknown + // model ids (here `MiniMax-M2.7-highspeed` and two + // `custom:`-prefixed fallback variants from the user's own + // `model_fallbacks` config). That's user-state, not a + // regression — drop the polarity guard for this specific + // shape so the per-attempt event stops reaching Sentry. + // (The aggregate sibling TAURI-RUST-2Z2 is already covered by + // `expected_error_kind` via the broader message-only + // classifier.) + for body in [ + r#"OpenHuman API error (400 Bad Request): {"success":false,"error":"Model 'MiniMax-M2.7-highspeed' is not available. Use GET /openai/v1/models to list available models."}"#, + r#"OpenHuman API error (400 Bad Request): {"success":false,"error":"Model 'custom:MiniMax-M2.7' is not available. Use GET /openai/v1/models to list available models."}"#, + ] { + assert!( + is_provider_config_rejection_http( + reqwest::StatusCode::BAD_REQUEST, + openhuman_backend::PROVIDER_LABEL, + body, + ), + "TAURI-RUST-2Z1 body must be suppressed for openhuman backend: {body:?}" + ); + } + } + + #[test] + fn server_error_is_not_suppressed() { + // A 5xx is a server bug, not user-config — keep reporting. + assert!(!is_provider_config_rejection_http( + reqwest::StatusCode::INTERNAL_SERVER_ERROR, + "custom_openai", + TIER_LEAK_BODY, + )); + } + + #[test] + fn transient_429_is_not_suppressed_here() { + // 429 is transient; handled by should_report_provider_http_failure, + // not this classifier (must not be swallowed as user-config). + assert!(!is_provider_config_rejection_http( + reqwest::StatusCode::TOO_MANY_REQUESTS, + "custom_openai", + TIER_LEAK_BODY, + )); + } + + #[test] + fn unrelated_4xx_body_is_not_suppressed() { + assert!(!is_provider_config_rejection_http( + reqwest::StatusCode::BAD_REQUEST, + "custom_openai", + "Bad request: missing required field 'messages'", + )); + } + + #[test] + fn log_helper_runs_without_panicking() { + // Covers the demotion log path taken by `api_error` when a + // custom provider rejects the user's model/param config. No + // tracing subscriber in unit tests, so this is a pure smoke. + log_provider_config_rejection( + "api_error", + "custom_openai", + Some("reasoning-v1"), + reqwest::StatusCode::BAD_REQUEST, + ); + } +} + +mod context_window_exceeded_suppression { + use super::*; + + #[test] + fn classifies_tauri_rust_501_custom_provider_500_body() { + // TAURI-RUST-501: the custom-provider 500 wire body. The + // matcher is status-agnostic, so the 500 mis-report is caught + // (the provider api_error cascade routes it to + // `log_context_window_exceeded` instead of `report_error`). + assert!(is_context_window_exceeded_message( + "{\"error\":{\"code\":500,\"message\":\"Context size has been exceeded.\",\"type\":\"server_error\"}}" + )); + } + + #[test] + fn classifies_established_context_overflow_phrasings() { + // The phrasings the reliable.rs non-retryable classifier + // recognized before this refactor must all still match through + // the shared single-source matcher. + for body in [ + "This model's maximum context length is 8192 tokens", + "request exceeds the context window of this model", + "context length exceeded", + "too many tokens in the prompt", + "token limit exceeded", + "prompt is too long for the selected model", + "input is too long", + ] { + assert!( + is_context_window_exceeded_message(body), + "should match context-overflow body: {body}" + ); + } + } + + #[test] + fn does_not_match_unrelated_bodies() { + for body in [ + "rate limit exceeded, retry after 30s", + "Invalid request: model not found", + "Insufficient budget", + "tool call exceeded the allowed budget", + ] { + assert!( + !is_context_window_exceeded_message(body), + "must NOT match unrelated body: {body}" + ); + } + } + + #[test] + fn token_rate_limits_are_not_context_overflow() { + // Token-count phrases collide with per-minute token RATE limits. + // Those are transient 429s that must stay retryable and keep + // reaching Sentry — they must NOT be classified as context + // overflow (CodeRabbit review of #2820). The rate-limit marker + // disambiguates. + for body in [ + "Rate limit reached: too many tokens per minute (TPM) for this org", + "rate_limit_exceeded: token limit exceeded, retry after 12s", + "You have hit too many tokens per min; try again in 30s", + ] { + assert!( + !is_context_window_exceeded_message(body), + "TPM rate-limit must NOT match as context overflow: {body}" + ); + } + // …but a token-count overflow with NO rate marker still matches. + assert!(is_context_window_exceeded_message( + "Request rejected: too many tokens in the input for this model" + )); + } + + #[test] + fn log_helper_runs_without_panicking() { + // Smoke for the demotion path taken by `api_error` — no tracing + // subscriber in unit tests. + log_context_window_exceeded( + "api_error", + "custom_openai", + None, + reqwest::StatusCode::INTERNAL_SERVER_ERROR, + ); + } +} + +#[test] +fn test_sanitize_api_error_utf8() { + let input = "🦀".repeat(MAX_API_ERROR_CHARS + 10); + let sanitized = sanitize_api_error(&input); + assert!(sanitized.ends_with("...")); + // Should truncate at MAX_API_ERROR_CHARS crabs + let crabs_count = sanitized.chars().filter(|c| *c == '🦀').count(); + assert_eq!(crabs_count, MAX_API_ERROR_CHARS); +} + +// ── TAURI-RUST-12: list_models JSON parse error must surface body ────── +// +// `response.json()` previously dropped the body when decoding failed, so +// Sentry saw `[providers][list_models] failed to parse JSON: error decoding +// response body` with no clue what the server actually returned. The fix +// reads the body as text first, parses with `serde_json::from_str`, and +// appends a sanitized + truncated snippet to the error string so the +// failure is diagnosable from the log line alone. + +#[derive(Clone)] +struct StaticResponse { + status: StatusCode, + body: &'static str, +} + +async fn static_models_handler(State(s): State) -> Response { + (s.status, s.body).into_response() +} + +async fn spawn_static_models_server(status: StatusCode, body: &'static str) -> String { + let listener = tokio::net::TcpListener::bind("127.0.0.1:0") + .await + .expect("bind"); + let addr = listener.local_addr().expect("local_addr"); + let app = Router::new() + .route("/models", get(static_models_handler)) + .with_state(StaticResponse { status, body }); + tokio::spawn(async move { + axum::serve(listener, app).await.expect("serve"); + }); + format!("http://{addr}") +} + +async fn configure_generic_workspace(tmp: &TempDir, endpoint: String) -> Config { + // Non-`openrouter` slug so the OpenRouter pre-validation path is + // skipped and the test hits `/models` directly. + let mut config = Config { + config_path: tmp.path().join("config.toml"), + workspace_dir: tmp.path().join("workspace"), + ..Config::default() + }; + config.secrets.encrypt = false; + config.cloud_providers.push(CloudProviderCreds { + id: "p_generic_test".to_string(), + slug: "generic-test".to_string(), + label: "Generic".to_string(), + endpoint, + auth_style: AuthStyle::None, + legacy_type: None, + default_model: None, + }); + config.save().await.expect("save config"); + config +} + +#[tokio::test] +async fn list_models_html_body_returns_diagnostic_snippet() { + // Captive-portal / proxy-login wire shape: 200 OK with HTML. + let tmp = tempfile::tempdir().expect("tempdir"); + let html = "Sign incaptive portal"; + let endpoint = spawn_static_models_server(StatusCode::OK, html).await; + let config = configure_generic_workspace(&tmp, endpoint).await; + + let err = list_configured_models_from_config("generic-test", &config) + .await + .expect_err("HTML body must not parse as JSON"); + + assert!( + err.contains("failed to parse JSON"), + "error must keep canonical prefix: {err}" + ); + assert!( + err.contains("captive portal") || err.contains("Sign in") || err.contains("html"), + "error must include a body snippet for diagnosis: {err}" + ); +} + +#[tokio::test] +async fn list_models_empty_body_returns_diagnostic_error() { + // Some misconfigured load balancers return 200 with an empty body. + let tmp = tempfile::tempdir().expect("tempdir"); + let endpoint = spawn_static_models_server(StatusCode::OK, "").await; + let config = configure_generic_workspace(&tmp, endpoint).await; + + let err = list_configured_models_from_config("generic-test", &config) + .await + .expect_err("empty body must not parse as JSON"); + + assert!( + err.contains("failed to parse JSON"), + "error must keep canonical prefix: {err}" + ); +} + +#[tokio::test] +async fn list_models_valid_json_still_succeeds() { + // Regression guard: the new text-then-parse path must still accept + // a valid `/models` JSON response. + let tmp = tempfile::tempdir().expect("tempdir"); + let body = r#"{"data":[{"id":"some-model","owned_by":"vendor","context_length":4096}]}"#; + let endpoint = spawn_static_models_server(StatusCode::OK, body).await; + let config = configure_generic_workspace(&tmp, endpoint).await; + + let outcome = list_configured_models_from_config("generic-test", &config) + .await + .expect("valid JSON must list models"); + assert_eq!(outcome.value["models"][0]["id"], "some-model"); +} + +// ── parse_models_response (TAURI-RUST-4Y) ────────────────────────────── +// +// Before this fix the `/models` parser collapsed "no `data` field" and +// "`data` field present but not an array" into a single misleading +// error string: `"provider response missing `data` array — endpoint is +// not OpenAI-compatible (got keys: data, object)"` — the keys list +// included `data`, contradicting the "missing" claim. The split +// surfaces the actual JSON-type mismatch so future Sentry events on +// this code path are triageable instead of looking like the parser +// is hallucinating. + +#[test] +fn parse_models_response_returns_models_for_well_formed_data_array() { + // Happy path — exact OpenAI `/models` shape, must yield model ids + // and `owned_by` / `context_length` projections from each entry. + let body = serde_json::json!({ + "object": "list", + "data": [ + { "id": "m1", "owned_by": "openai", "context_length": 8192 }, + { "id": "m2", "owned_by": "openai" }, + { "id": "m3", "context_window": 4096 }, + ], + }); + let models = parse_models_response(&body).expect("well-formed body must parse"); + assert_eq!(models.len(), 3); + assert_eq!(models[0].id, "m1"); + assert_eq!(models[0].owned_by.as_deref(), Some("openai")); + assert_eq!(models[0].context_window, Some(8192)); + assert_eq!(models[2].id, "m3"); + assert_eq!(models[2].owned_by, None); + assert_eq!(models[2].context_window, Some(4096)); +} + +#[test] +fn parse_models_response_returns_models_for_codex_models_array() { + let body = serde_json::json!({ + "models": [ + { "slug": "gpt-5.5", "owned_by_organization": "openai", "max_context_window": 272000 }, + "gpt-5.4", + ], + }); + + let models = parse_models_response(&body).expect("Codex models body must parse"); + + assert_eq!(models.len(), 2); + assert_eq!(models[0].id, "gpt-5.5"); + assert_eq!(models[0].owned_by.as_deref(), Some("openai")); + assert_eq!(models[0].context_window, Some(272000)); + assert_eq!(models[1].id, "gpt-5.4"); +} + +#[test] +fn parse_models_response_distinguishes_missing_data_field_from_wrong_type() { + // (1) `data`/`models` fields completely absent — wrong endpoint + // misconfiguration. Codex uses `models`, so it is accepted alongside + // OpenAI-compatible `data`. + let body = serde_json::json!({ "object": "list", "items": [] }); + let err = parse_models_response(&body).expect_err("no model catalog field must fail"); + assert!( + err.contains("missing `data` or `models` field"), + "no-data error should say `missing`: {err}" + ); + assert!( + err.contains("items") && err.contains("object"), + "no-data error should list actual keys: {err}" + ); + + // (2) `data` field present but wrong type — TAURI-RUST-4Y verbatim + // shape (`object` + `data` keys both present, but `data` isn't an + // array). The error MUST NOT say "missing" — it must surface the + // actual JSON type so triage knows what shape the provider sent. + for (label, value) in [ + ( + "object", + serde_json::json!({"object":"error","message":"boom"}), + ), + ("string", serde_json::json!("models go here")), + ("null", serde_json::Value::Null), + ("bool", serde_json::json!(true)), + ("number", serde_json::json!(42)), + ] { + let body = serde_json::json!({ "object": "list", "data": value }); + let err = parse_models_response(&body).expect_err("wrong-type data must fail"); + assert!( + !err.contains("missing"), + "wrong-type error must not say `missing` ({label}): {err}" + ); + assert!( + err.contains(label), + "wrong-type error must name the actual JSON kind ({label}): {err}" + ); + } +} + +#[test] +fn openai_codex_model_hints_are_merged_without_duplicates() { + let mut models = vec![ModelInfo { + id: "gpt-5.4".to_string(), + owned_by: Some("openai-codex".to_string()), + context_window: Some(128000), + }]; + + merge_openai_codex_model_hints(&mut models); + + let ids = models + .iter() + .map(|model| model.id.as_str()) + .collect::>(); + assert_eq!( + ids, + vec!["gpt-5.4", "gpt-5.5", "gpt-5.3-codex-spark", "gpt-5.3-codex"] + ); +} + +// ── synthesize_local_runtime_entry (TAURI-RUST-28Z fallback) ──────────── + +#[test] +fn synthesize_local_runtime_entry_ollama_returns_v1_endpoint_with_no_auth() { + // Sentry TAURI-RUST-28Z fires when `inference_list_models("ollama")` + // runs against a config that has no `ollama` cloud_providers entry. + // The synth fallback must produce an entry routed to Ollama's + // OpenAI-compatible `/v1/models` surface at the resolved base URL, + // with `AuthStyle::None` so the probe runs without an Authorization + // header (loopback Ollama accepts unauthenticated requests). + let config = Config::default(); + let entry = synthesize_local_runtime_entry("ollama", &config) + .expect("ollama must produce a synthetic entry"); + assert_eq!(entry.slug, "ollama"); + assert_eq!(entry.auth_style, AuthStyle::None); + assert!( + entry.endpoint.ends_with("/v1"), + "ollama endpoint must terminate at /v1 so `/models` hits the OpenAI-compat surface; got {}", + entry.endpoint + ); +} + +#[test] +fn synthesize_local_runtime_entry_lmstudio_returns_v1_endpoint_with_no_auth() { + // LM Studio's default `lm_studio_base_url` already terminates at + // `/v1`; the synth must preserve that and select `AuthStyle::None` + // so the probe doesn't attach a bearer header (LM Studio runs + // unauthenticated on loopback). + let config = Config::default(); + let entry = synthesize_local_runtime_entry("lmstudio", &config) + .expect("lmstudio must produce a synthetic entry"); + assert_eq!(entry.slug, "lmstudio"); + assert_eq!(entry.auth_style, AuthStyle::None); + assert!( + entry.endpoint.ends_with("/v1"), + "lmstudio endpoint must terminate at /v1; got {}", + entry.endpoint + ); +} + +#[test] +fn synthesize_local_runtime_entry_returns_none_for_unknown_slug() { + // Only `ollama` and `lmstudio` are the recognized local-runtime + // aliases. Every other slug — built-in cloud providers (`openai`, + // `anthropic`), opaque ids (`p_random_xyz`), or typos — must fall + // through to the existing "no cloud provider" error. Pinning this + // rejection contract guards against the synth growing into a + // blanket "any unknown slug points at localhost" matcher. + let config = Config::default(); + for slug in ["openai", "anthropic", "openrouter", "p_random_xyz", "", " "] { + assert!( + synthesize_local_runtime_entry(slug, &config).is_none(), + "{slug:?} must NOT synthesize a local-runtime entry" + ); + } +} + +#[test] +fn parse_models_response_handles_non_object_body() { + // Provider returned a bare array / string / number at the + // top level — not an object at all. Surface as a parse failure + // (not a panic). + for body in [ + serde_json::json!([{"id": "m1"}]), + serde_json::json!("hello"), + serde_json::Value::Null, + ] { + let err = parse_models_response(&body) + .expect_err("non-object body must fail with a clear message"); + assert!( + !err.is_empty(), + "non-object body error must be non-empty: {err}" + ); + } +} + +/// `is_backend_auth_failure` is the polarity guard that decides whether a +/// 401/403 is the OpenHuman backend's expired session (silence + drive +/// reauth) or a third-party BYO-key rejection (actionable, must reach +/// Sentry). Getting this wrong in either direction is a regression: +/// over-matching silences real misconfig; under-matching is TAURI-RUST-N. +#[test] +fn is_backend_auth_failure_only_matches_openhuman_backend_401_403() { + use reqwest::StatusCode; + let backend = crate::openhuman::inference::provider::openhuman_backend::PROVIDER_LABEL; + + assert!(is_backend_auth_failure(backend, StatusCode::UNAUTHORIZED)); + assert!(is_backend_auth_failure(backend, StatusCode::FORBIDDEN)); + + // Non-auth backend statuses stay reportable (real server bugs / transient). + for s in [ + StatusCode::INTERNAL_SERVER_ERROR, + StatusCode::TOO_MANY_REQUESTS, + StatusCode::BAD_REQUEST, + StatusCode::NOT_FOUND, + ] { + assert!( + !is_backend_auth_failure(backend, s), + "backend {s} must not be treated as session-expiry" + ); + } + + // Third-party BYO-key 401/403 (user's own key revoked) must NOT be + // silenced — that is actionable misconfiguration for Sentry. + for provider in ["custom_openai", "OpenAI", "Anthropic", "openrouter"] { + assert!( + !is_backend_auth_failure(provider, StatusCode::UNAUTHORIZED), + "{provider} 401 must reach Sentry as actionable BYO-key error" + ); + assert!( + !is_backend_auth_failure(provider, StatusCode::FORBIDDEN), + "{provider} 403 must reach Sentry as actionable BYO-key error" + ); + } +} + +/// `publish_backend_session_expired` must emit a `SessionExpired` event on +/// the `auth` domain with the canonical source and a sanitized reason, so +/// the credentials subscriber can drive reauth. +#[tokio::test] +async fn publish_backend_session_expired_emits_sanitized_session_expired() { + use crate::core::event_bus::{global, init_global, DomainEvent}; + + init_global(1024); + let mut rx = global().expect("event bus initialized").raw_receiver(); + + // `TEST_MARKER_A` makes this event distinguishable from the sibling + // `chat_completions_backend_401_*` test's event on the shared global + // bus (both run in parallel against the same singleton). The `sk-` + // token probes that `sanitize_api_error` actually scrubs secrets out + // of the SessionExpired reason rather than just emitting the event. + let secret = "sk-LIVEA0123456789abcdefSECRET"; + let msg = format!( + r#"OpenHuman API error (401 Unauthorized): {{"success":false,"error":"TEST_MARKER_A Invalid token {secret}"}}"# + ); + publish_backend_session_expired( + "chat_completions", + crate::openhuman::inference::provider::openhuman_backend::PROVIDER_LABEL, + reqwest::StatusCode::UNAUTHORIZED, + &msg, + ); + + let mut reason_seen: Option = None; + loop { + match rx.try_recv() { + Ok(DomainEvent::SessionExpired { source, reason }) => { + if source == "llm_provider.openhuman_backend" && reason.contains("TEST_MARKER_A") { + reason_seen = Some(reason); + break; + } + } + Ok(_) => continue, + Err(tokio::sync::broadcast::error::TryRecvError::Lagged(_)) => continue, + Err(_) => break, + } + } + let reason = reason_seen.expect( + "publish_backend_session_expired must emit SessionExpired(source=llm_provider.openhuman_backend) carrying TEST_MARKER_A", + ); + assert!( + reason.contains("[REDACTED]"), + "sanitize_api_error must redact the sk- token in the reason: {reason}" + ); + assert!( + !reason.contains(secret), + "raw secret must not survive into the SessionExpired reason: {reason}" + ); +} + +/// End-to-end regression for TAURI-RUST-N: a backend `401 Invalid token` +/// on the hand-rolled `chat_completions` path must publish `SessionExpired` +/// (driving reauth) and surface the typed error — NOT spam Sentry. The +/// provider is labelled exactly like the OpenHuman backend provider, which +/// is what gates the backend-auth-failure branch. +#[tokio::test] +async fn chat_completions_backend_401_publishes_session_expired() { + use crate::core::event_bus::{global, init_global, DomainEvent}; + use axum::routing::post; + + init_global(1024); + let mut rx = global().expect("event bus initialized").raw_receiver(); + + async fn unauthorized_handler() -> Response { + // `TEST_MARKER_B` distinguishes this event from the sibling + // `publish_backend_session_expired_*` test on the shared global + // bus; the `sk-` token probes end-to-end redaction through + // `api_error` → `publish_backend_session_expired`. + ( + StatusCode::UNAUTHORIZED, + Json(serde_json::json!({ + "success": false, + "error": "TEST_MARKER_B Invalid token sk-LIVEB9876543210fedcbaSECRET" + })), + ) + .into_response() + } + + let listener = tokio::net::TcpListener::bind("127.0.0.1:0") + .await + .expect("bind"); + let addr = listener.local_addr().expect("local_addr"); + let app = Router::new().route("/chat/completions", post(unauthorized_handler)); + tokio::spawn(async move { + axum::serve(listener, app).await.expect("serve"); + }); + + let provider = + crate::openhuman::inference::provider::compatible::OpenAiCompatibleProvider::new_no_responses_fallback( + crate::openhuman::inference::provider::openhuman_backend::PROVIDER_LABEL, + &format!("http://{addr}"), + Some("expired-jwt"), + crate::openhuman::inference::provider::compatible::AuthStyle::Bearer, + ); + + let err = crate::openhuman::inference::provider::traits::Provider::chat_with_system( + &provider, + None, + "hi", + "reasoning-quick-v1", + 0.0, + ) + .await + .expect_err("backend 401 must surface as an error"); + let msg = err.to_string(); + assert!( + msg.contains("OpenHuman API error (401") && msg.contains("Invalid token"), + "error must carry the backend 401 envelope: {msg}" + ); + + let mut reason_seen: Option = None; + loop { + match rx.try_recv() { + Ok(DomainEvent::SessionExpired { source, reason }) => { + if source == "llm_provider.openhuman_backend" && reason.contains("TEST_MARKER_B") { + reason_seen = Some(reason); + break; + } + } + Ok(_) => continue, + Err(tokio::sync::broadcast::error::TryRecvError::Lagged(_)) => continue, + Err(_) => break, + } + } + let reason = reason_seen.expect( + "backend 401 on chat_completions must publish SessionExpired carrying TEST_MARKER_B, not report to Sentry", + ); + assert!( + reason.contains("[REDACTED]"), + "sanitize_api_error must redact the sk- token end-to-end: {reason}" + ); + assert!( + !reason.contains("sk-LIVEB9876543210fedcbaSECRET"), + "raw secret must not survive into the SessionExpired reason: {reason}" + ); +} + +#[test] +fn synthesize_local_runtime_entry_ollama_respects_config_base_url() { + // The synth must honor `config.local_ai.base_url` (the same + // priority `ollama_base_url_from_config` uses for chat routing). + // This is the path users hit when they point Ollama at a non-loopback + // host (e.g. a LAN box at 192.168.1.5). + let mut config = Config::default(); + config.local_ai.base_url = Some("http://192.168.1.5:11434".to_string()); + let entry = synthesize_local_runtime_entry("ollama", &config) + .expect("ollama with custom base_url must still synthesize"); + assert_eq!( + entry.endpoint, "http://192.168.1.5:11434/v1", + "synth must use config.local_ai.base_url and append /v1 once", + ); +} + +#[test] +fn cloud_providers_entry_takes_precedence_over_local_runtime_synthesis() { + // Pin the precedence: if the user has explicitly added an `ollama` + // entry to `cloud_providers` (e.g. a remote ollama box at + // https://ollama.example.com/v1), that entry MUST win — the synth + // fallback is reached only when the find returns `None`. Mirrors + // the lookup in `list_configured_models_from_config` so a future + // refactor that swaps `find().or_else(synth)` for unconditional + // synthesis fails this test loudly. + let mut config = Config::default(); + config.cloud_providers.push(CloudProviderCreds { + id: "p_ollama_explicit".to_string(), + slug: "ollama".to_string(), + label: "Remote Ollama".to_string(), + endpoint: "https://ollama.example.com/v1".to_string(), + auth_style: AuthStyle::Bearer, + legacy_type: None, + default_model: None, + }); + + let resolved = config + .cloud_providers + .iter() + .find(|e| e.id == "ollama" || e.slug == "ollama") + .cloned() + .or_else(|| synthesize_local_runtime_entry("ollama", &config)) + .expect("either explicit or synth must resolve"); + assert_eq!( + resolved.endpoint, "https://ollama.example.com/v1", + "explicit cloud_providers entry must beat local-runtime synth", + ); + assert_eq!(resolved.auth_style, AuthStyle::Bearer); +} + +#[test] +fn missing_cloud_providers_entry_falls_back_to_local_runtime_synth() { + // The TAURI-RUST-28Z regression contract: when no `ollama` entry + // exists in `cloud_providers` AND the slug is a recognized + // local-runtime alias, the find/synth chain must yield a synthetic + // entry (instead of `None`, which produces the + // "no cloud provider with id or slug 'ollama' found" Sentry error). + let config = Config::default(); + assert!( + config.cloud_providers.is_empty(), + "precondition: clean config has no providers configured", + ); + + let resolved = config + .cloud_providers + .iter() + .find(|e| e.id == "ollama" || e.slug == "ollama") + .cloned() + .or_else(|| synthesize_local_runtime_entry("ollama", &config)); + assert!( + resolved.is_some(), + "ollama must resolve via synth when cloud_providers is empty" + ); + assert_eq!(resolved.unwrap().slug, "ollama"); +} + +#[test] +fn missing_cloud_providers_entry_for_unknown_slug_still_errors() { + // The synth is intentionally narrow: only `ollama` and `lmstudio` + // get fallback routing. An unknown slug with no `cloud_providers` + // match must continue to produce `None` (which the caller surfaces + // as the "no cloud provider" error) — otherwise typos would + // silently route to localhost. + let config = Config::default(); + let resolved = config + .cloud_providers + .iter() + .find(|e| e.id == "tpyo" || e.slug == "tpyo") + .cloned() + .or_else(|| synthesize_local_runtime_entry("tpyo", &config)); + assert!( + resolved.is_none(), + "unknown slug with no cloud_providers entry must NOT synthesize", + ); +} From b49c7f33fb27380e37a9a36bca8e3ff6e65f97c6 Mon Sep 17 00:00:00 2001 From: sanil-23 Date: Sun, 31 May 2026 21:03:50 +0200 Subject: [PATCH 6/7] test(inference): align list_models 404 coverage tests with reverted behavior MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Main's raw-coverage e2e tests asserted the #2939 404→`unsupported:true` suppression. With that branch reverted, a 404 from /models now surfaces as an error ("provider returned 404: …"), so update the three affected assertions (direct calls + the inference_list_models RPC path) to expect the error. Co-Authored-By: Claude --- ..._compatible_admin_leftovers_raw_coverage_e2e.rs | 14 ++++++++++---- tests/inference_provider_raw_coverage_e2e.rs | 13 +++++++++---- tests/worker_b_raw_coverage_e2e.rs | 13 +++++++------ 3 files changed, 26 insertions(+), 14 deletions(-) diff --git a/tests/inference_compatible_admin_leftovers_raw_coverage_e2e.rs b/tests/inference_compatible_admin_leftovers_raw_coverage_e2e.rs index fe32e7143f..3d3202f254 100644 --- a/tests/inference_compatible_admin_leftovers_raw_coverage_e2e.rs +++ b/tests/inference_compatible_admin_leftovers_raw_coverage_e2e.rs @@ -340,11 +340,17 @@ async fn provider_ops_leftovers_cover_model_listing_error_shapes_and_auth_styles .expect_err("unknown provider id"); assert!(unknown.contains("no cloud provider")); - let unsupported = list_configured_models("not-found") + // PR #2959 reverted the list_models 404 suppression: a 404 from the + // /models endpoint no longer returns a synthetic `{models: [], unsupported: + // true}` success — it surfaces as a real error so the failure fires to + // Sentry and gets a root-cause fix (e.g. a wrong base URL). + let not_found_err = list_configured_models("not-found") .await - .expect("404 models unsupported"); - assert_eq!(unsupported.value["models"].as_array().unwrap().len(), 0); - assert_eq!(unsupported.value["unsupported"], true); + .expect_err("404 list_models now surfaces as an error"); + assert!( + not_found_err.contains("provider returned 404"), + "404 list_models error should surface the status: {not_found_err:?}" + ); let missing_data = list_configured_models("missing-data") .await diff --git a/tests/inference_provider_raw_coverage_e2e.rs b/tests/inference_provider_raw_coverage_e2e.rs index bd64f8b630..511a8d7f16 100644 --- a/tests/inference_provider_raw_coverage_e2e.rs +++ b/tests/inference_provider_raw_coverage_e2e.rs @@ -310,11 +310,16 @@ async fn provider_factory_and_model_listing_cover_cloud_local_and_invalid_shapes .value; assert_eq!(local_listed["models"][0]["id"], "demo-chat"); - let unsupported = list_configured_models("missing") + // PR #2959 reverted the list_models 404 suppression: a 404 from /models + // now surfaces as a real error instead of a synthetic `unsupported: true` + // success, so the failure fires to Sentry for a root-cause fix. + let missing_err = list_configured_models("missing") .await - .expect("404 unsupported") - .value; - assert_eq!(unsupported["unsupported"], true); + .expect_err("404 list_models now surfaces as an error"); + assert!( + missing_err.contains("provider returned 404"), + "404 list_models error should surface the status: {missing_err:?}" + ); let openrouter = list_configured_models("openrouter") .await diff --git a/tests/worker_b_raw_coverage_e2e.rs b/tests/worker_b_raw_coverage_e2e.rs index ed7bc226ba..10a471a85c 100644 --- a/tests/worker_b_raw_coverage_e2e.rs +++ b/tests/worker_b_raw_coverage_e2e.rs @@ -393,18 +393,19 @@ async fn inference_provider_success_paths_use_mock_models_and_chat() { "mock model should round-trip through provider /models: {models}" ); - let unsupported = rpc( + // PR #2959 reverted the list_models 404 suppression: a 404 from /models + // now surfaces as a JSON-RPC error instead of a synthetic `unsupported: + // true` success, so the failure fires to Sentry for a root-cause fix. + let models_404 = rpc( &harness.rpc_base, 102, "openhuman.inference_list_models", json!({ "provider_id": "mock-404" }), ) .await; - assert_eq!( - payload(&unsupported, "inference_list_models 404") - .get("unsupported") - .and_then(Value::as_bool), - Some(true) + assert!( + error_message(&models_404, "inference_list_models 404").contains("provider returned 404"), + "404 list_models should surface as an error: {models_404}" ); let reply = rpc( From d0bcedf1f67af790ea061ae6db2757068aff2538 Mon Sep 17 00:00:00 2001 From: Steven Enamakel Date: Mon, 1 Jun 2026 17:44:25 -0400 Subject: [PATCH 7/7] fix(observability): add missing #[test] attribute on classifies_whatsapp_data_sqlite_busy_errors The test function was missing its #[test] attribute, making it dead code that would never run. Introduced when the revert inlined tests from the external observability_tests.rs file. --- src/core/observability.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core/observability.rs b/src/core/observability.rs index 51193be510..fb1c093ec8 100644 --- a/src/core/observability.rs +++ b/src/core/observability.rs @@ -2477,6 +2477,7 @@ mod tests { assert_eq!(expected_error_kind("cron job timed out after 30s"), None,); } + #[test] fn classifies_whatsapp_data_sqlite_busy_errors() { for raw in [ r#"[whatsapp_data] ingest failed: upsert wa_message chat=120363402402350155@g.us msg=false_120363402402350155@g.us_3A357F28AE74548B1507_207897942335683@lid: database is locked: Error code 5: The database file is locked"#,