From 02f7f986cf2fcd5d4996fa60d5b3d436ff8781ab Mon Sep 17 00:00:00 2001 From: oxoxDev Date: Fri, 15 May 2026 14:08:43 +0530 Subject: [PATCH 1/9] feat(observability): match socket "HTTP error: NNN" wire shape (#1608) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends `is_transient_upstream_http_message` to recognize tungstenite's `WsError::Http` Display format ("HTTP error: "), which the socket reconnect loop wraps as `format!("WebSocket connect: {e}")` and routes through `report_error_or_expected` after `FAIL_ESCALATE_THRESHOLD` escalations. Without this matcher, OPENHUMAN-TAURI-5P (~110ev) and -EZ (~51ev) — backend LB returning 502/504 during the WebSocket upgrade — slip past the existing classifier (which only knew the provider-layer "api error (NNN" prefix) and fire one Sentry event per affected client. Three separator variants cover the observed shapes: trailing space, trailing newline, and trailing colon. Bare `"HTTP error: NNN"` at end-of-string is intentionally not matched (would collide with port numbers / runbook IDs like "HTTP error: 5023"). Co-Authored-By: Claude Opus 4.7 --- src/core/observability.rs | 95 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 92 insertions(+), 3 deletions(-) diff --git a/src/core/observability.rs b/src/core/observability.rs index 704a2538d9..15ca2062e5 100644 --- a/src/core/observability.rs +++ b/src/core/observability.rs @@ -215,10 +215,33 @@ fn is_network_unreachable_message(lower: &str) -> bool { /// `"OpenHuman API error (504 Gateway Timeout): error code: 504"`. Pin the /// match to that exact `"api error ("` prefix so an unrelated message /// that merely mentions "504" (a log line, a doc URL) is not silenced. +/// +/// Also matches the second canonical wire shape: tungstenite's +/// `WsError::Http(response)` Display, which renders as `"HTTP error: "` +/// (and which `socket::ws_loop::run_connection` wraps as +/// `"WebSocket connect: HTTP error: 502 Bad Gateway"`). Per +/// OPENHUMAN-TAURI-5P (~110 events) and -EZ (~51 events), backend +/// staging/production load balancers emit HTTP 502/504 during the WebSocket +/// upgrade handshake; tungstenite surfaces those as `WsError::Http` and the +/// socket reconnect loop already handles them via exponential backoff. Each +/// `FAIL_ESCALATE_THRESHOLD` escalation fires `report_error_or_expected` with +/// the formatted reason, which would land in Sentry as `domain=socket` +/// noise without this matcher (the existing `domain=integrations` +/// before_send filter scopes too narrowly). +/// +/// Three separator variants cover every observed shape: trailing space +/// (`"HTTP error: 502 Bad Gateway"`), trailing newline (`"HTTP error: 502\n…"` +/// from chained errors), and trailing colon (`"HTTP error: 502: …"`). Bare +/// `"HTTP error: 502"` at end-of-string is not matched on purpose — the +/// status integer alone could collide with unrelated log lines containing +/// `"HTTP error: 5023"` (port number, runbook ID). fn is_transient_upstream_http_message(lower: &str) -> bool { - TRANSIENT_PROVIDER_HTTP_STATUSES - .iter() - .any(|code| lower.contains(&format!("api error ({code}"))) + TRANSIENT_PROVIDER_HTTP_STATUSES.iter().any(|code| { + lower.contains(&format!("api error ({code}")) + || lower.contains(&format!("http error: {code} ")) + || lower.contains(&format!("http error: {code}\n")) + || lower.contains(&format!("http error: {code}:")) + }) } /// Detect non-2xx HTTP failures returned from the backend integrations / composio @@ -1065,6 +1088,72 @@ mod tests { ); } + #[test] + fn classifies_socket_transient_http_errors() { + // OPENHUMAN-TAURI-5P / -EZ: tungstenite's `WsError::Http(response)` + // surfaces during the WebSocket upgrade handshake when the backend + // load balancer returns 502 / 504. The socket reconnect loop wraps + // it as `format!("WebSocket connect: {e}")`, producing + // `"WebSocket connect: HTTP error: "`. Each + // sustained-outage threshold escalation routes the formatted reason + // through `report_error_or_expected`, which must classify as + // transient so the per-client noise stops reaching Sentry. + for raw in [ + "WebSocket connect: HTTP error: 502 Bad Gateway", + "WebSocket connect: HTTP error: 503 Service Unavailable", + "WebSocket connect: HTTP error: 504 Gateway Timeout", + "[socket] Connection failed (sustained outage after 5 attempts): \ + WebSocket connect: HTTP error: 502 Bad Gateway", + ] { + assert_eq!( + expected_error_kind(raw), + Some(ExpectedErrorKind::TransientUpstreamHttp), + "should classify as transient upstream HTTP (socket shape): {raw}" + ); + } + + // Trailing-colon separator (chained error formatting). + // Note: avoid words like "connection refused" or "timeout" in the + // suffix — those would also match `is_network_unreachable_message` / + // `TRANSIENT_TRANSPORT_PHRASES` and the order in `expected_error_kind` + // would route through `NetworkUnreachable` first, defeating the + // assertion. Both classifications silence the event so production + // behavior is identical, but the test is anchored on the canonical + // socket shape so a future regression in `is_transient_upstream_http_message` + // surfaces here, not behind another classifier. + assert_eq!( + expected_error_kind("WebSocket connect: HTTP error: 502: upstream returned bad gateway"), + Some(ExpectedErrorKind::TransientUpstreamHttp) + ); + + // Trailing-newline separator (multi-line error chain). + assert_eq!( + expected_error_kind("WebSocket connect: HTTP error: 504\nupstream gateway"), + Some(ExpectedErrorKind::TransientUpstreamHttp) + ); + } + + #[test] + fn does_not_classify_unrelated_http_error_text_as_transient_socket() { + // Bare numeric "HTTP error: 5023" (port number, runbook ID) without + // a separator must NOT silence — pin the matcher to space/newline/colon. + assert_eq!(expected_error_kind("HTTP error: 5023"), None); + // Non-transient HTTP statuses must not match — `WsError::Http` for + // a 401 / 403 / 404 is genuinely actionable (auth / routing bug). + for raw in [ + "WebSocket connect: HTTP error: 401 Unauthorized", + "WebSocket connect: HTTP error: 403 Forbidden", + "WebSocket connect: HTTP error: 404 Not Found", + "WebSocket connect: HTTP error: 500 Internal Server Error", + ] { + assert_eq!( + expected_error_kind(raw), + None, + "must NOT silence actionable socket HTTP error: {raw}" + ); + } + } + #[test] fn does_not_classify_actionable_provider_errors_as_transient_upstream() { // 4xx (other than 408/429) and non-transient 5xx must continue to From 7cf858ea947c0132d4438cab8c3ac4c527cbe6b5 Mon Sep 17 00:00:00 2001 From: oxoxDev Date: Fri, 15 May 2026 14:10:17 +0530 Subject: [PATCH 2/9] feat(observability): route composio domain through transient-integrations filter (#1608) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends `is_transient_integrations_failure` to also match `domain="composio"`. Composio routes through the same `IntegrationClient` HTTP wrapper as the generic integrations layer, so the transient failure shape (timeouts, gateway 5xx) is identical — but op-level reporters in `crate::openhuman::composio::ops` re-emit those errors under their own domain tag, which the integrations-scoped filter then ignored. Closes the gap behind OPENHUMAN-TAURI-35 (~139 events) and -2H (~26 events): `[composio] list_connections failed: Backend returned 502 …` events that landed in Sentry under `domain=composio` and slipped past every existing classifier. Updates the existing `integrations_filter_keeps_non_transient_failures` assertion (which previously verified composio-tagged events were NOT silenced — the deliberate inverse, now flipped) and adds a dedicated `composio_domain_routes_through_integrations_filter` test covering both non_2xx + transport shapes. Co-Authored-By: Claude Opus 4.7 --- src/core/observability.rs | 76 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 72 insertions(+), 4 deletions(-) diff --git a/src/core/observability.rs b/src/core/observability.rs index 15ca2062e5..7604150149 100644 --- a/src/core/observability.rs +++ b/src/core/observability.rs @@ -812,8 +812,20 @@ pub fn is_transient_backend_api_failure(event: &sentry::protocol::Event<'_>) -> /// Transient integrations / Composio failures (timeout, connection reset, /// gateway hiccups). +/// +/// Accepts both `domain="integrations"` (the shared +/// [`crate::openhuman::integrations::IntegrationClient`] HTTP wrapper that +/// fronts every backend-proxied integration) and `domain="composio"` (errors +/// reported from the Composio op layer in +/// [`crate::openhuman::composio::ops`]). Composio routes through the same +/// `IntegrationClient`, so the failure shape is identical — but op-level +/// reporters that wrap and re-emit those errors with their own domain tag +/// would otherwise escape the integrations-scoped filter (OPENHUMAN-TAURI-35 +/// ~139ev, -2H ~26ev: `[composio] list_connections failed: Backend returned +/// 502 …` events that landed in Sentry under `domain=composio`). pub fn is_transient_integrations_failure(event: &sentry::protocol::Event<'_>) -> bool { is_transient_domain_failure(event, "integrations") + || is_transient_domain_failure(event, "composio") } /// Transient updater failures from GitHub release probes/downloads. @@ -1826,14 +1838,19 @@ mod tests { ); } - let wrong_domain = event_with_tags(&[ - ("domain", "composio"), + // Sibling-domain check: composio op-layer events MUST be silenced + // by the integrations filter — composio routes through the same + // `IntegrationClient` so the failure shape is identical, but + // op-level reporters that wrap and re-emit with their own domain + // tag would otherwise escape (OPENHUMAN-TAURI-35 / -2H). + let scheduler_domain = event_with_tags(&[ + ("domain", "scheduler"), ("failure", "non_2xx"), ("status", "503"), ]); assert!( - !is_transient_integrations_failure(&wrong_domain), - "domain scoping must keep composio-tagged events visible" + !is_transient_integrations_failure(&scheduler_domain), + "domain scoping must keep unrelated transient-shaped events visible" ); let non_matching_transport = event_with_tags_and_message( @@ -1846,6 +1863,57 @@ mod tests { ); } + #[test] + fn composio_domain_routes_through_integrations_filter() { + // OPENHUMAN-TAURI-35 (~139 events) / -2H (~26 events): + // `[composio] list_connections failed: Backend returned 502 …` — + // composio op-layer wrappers (e.g. `composio_list_connections`) emit + // errors under `domain="composio"` so the original + // `domain="integrations"` filter let them through. Routing the + // composio domain through the same transient classifier closes + // that gap; the underlying transport / non_2xx semantics are + // identical because both layers share the same `IntegrationClient`. + for status in TRANSIENT_HTTP_STATUSES { + let event = event_with_tags(&[ + ("domain", "composio"), + ("failure", "non_2xx"), + ("status", status), + ]); + assert!( + is_transient_integrations_failure(&event), + "composio status {status} must be classified as transient" + ); + } + + // Transport-phrase variant — composio also surfaces reqwest + // transport failures (timeouts, connection resets) once the op + // wrapper has tagged the event with `failure=transport`. + for phrase in TRANSIENT_TRANSPORT_PHRASES { + let event = event_with_tags_and_message( + &[("domain", "composio"), ("failure", "transport")], + &format!("[composio] execute failed: {phrase}"), + ); + assert!( + is_transient_integrations_failure(&event), + "composio transport phrase {phrase} must be classified as transient" + ); + } + + // Non-transient composio statuses (404 / 500) must still surface — + // actionable bugs even when reported under the composio domain. + for status in ["404", "500"] { + let event = event_with_tags(&[ + ("domain", "composio"), + ("failure", "non_2xx"), + ("status", status), + ]); + assert!( + !is_transient_integrations_failure(&event), + "composio status {status} must stay visible" + ); + } + } + #[test] fn updater_transient_403_is_dropped() { let event = event_with_tags_and_message( From a8cedf60f0240defc4c307810d6a8a48461768e5 Mon Sep 17 00:00:00 2001 From: oxoxDev Date: Fri, 15 May 2026 14:19:06 +0530 Subject: [PATCH 3/9] feat(composio): wrap ops errors through observability classifier before RPC return (#1608) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a defense-in-depth `report_composio_op_error` helper that re-routes every composio op-layer failure through `report_error_or_expected` under `domain="composio"` BEFORE the error is formatted to the RPC `Err(String)` return value. The shared `IntegrationClient` (which fronts most composio HTTP calls) already reports its own failures under `domain="integrations"` and Commit 2 of this series extended `is_transient_integrations_failure` to cover both domain values — so the additional composio-domain event is caught by the same `before_send` filter without duplicating Sentry volume. Why bother re-emitting under composio when integrations already does: 1. Catches future call sites that bypass `IntegrationClient` (the existing `raw_delete` path in `composio/client.rs:436` uses `domain="composio"` directly; any new bespoke HTTP added under `composio/` will follow the same convention). 2. Op-layer-specific failures — provider sync errors, profile resolution errors, history archive errors — get tagged consistently rather than reaching Sentry as bare `Err(String)` returned via RPC with no domain attribution. Tag picking: `classify_composio_failure_tag` inspects the rendered chain. Transport phrases (`operation timed out`, `connection reset`, `tls handshake eof`, "error sending request" anchor) → `transport`; everything else (the dominant `Backend returned …` shape) → `non_2xx`. Audit of `format!("[composio] ... failed:")` sites covered: composio_list_toolkits, composio_list_connections, composio_authorize, composio_delete_connection, composio_list_tools, composio_execute, composio_list_github_repos, composio_create_trigger, composio_list_available_triggers, composio_list_triggers, composio_enable_trigger, composio_disable_trigger, resolve_toolkit_for_connection, composio_get_user_profile, composio_refresh_all_identities, composio_sync. Tests: - `composio_failure_tag_is_*` x4 — pin tag-routing per error shape. - `composio_domain_502_is_dropped_by_before_send` — cross-module contract guard against accidental revert of the integrations-filter domain widening. - `composio_transport_timeout_is_dropped_by_before_send` — same for the transport-phrase branch. Co-Authored-By: Claude Opus 4.7 --- src/openhuman/composio/ops.rs | 162 +++++++++++++++++++++------- src/openhuman/composio/ops_tests.rs | 125 +++++++++++++++++++++ 2 files changed, 248 insertions(+), 39 deletions(-) diff --git a/src/openhuman/composio/ops.rs b/src/openhuman/composio/ops.rs index 18c045abe8..9ab414c94a 100644 --- a/src/openhuman/composio/ops.rs +++ b/src/openhuman/composio/ops.rs @@ -48,6 +48,68 @@ fn resolve_client(config: &Config) -> OpResult { }) } +/// Defense-in-depth Sentry funnel for composio op-layer errors. +/// +/// The shared [`crate::openhuman::integrations::IntegrationClient`] +/// (which fronts every `client.list_*` / `client.execute_tool` / +/// `client.authorize` call) already reports its own failures under +/// `domain="integrations"` with `failure="non_2xx" | "transport"` tags, +/// and the Sentry `before_send` filter (`is_transient_integrations_failure`) +/// drops the transient subset. This helper re-classifies the same +/// anyhow chain at the **op layer** under `domain="composio"` so: +/// +/// 1. Future call sites that bypass `IntegrationClient` (the existing +/// `raw_delete` path, or any new bespoke HTTP client added under +/// `composio/`) still funnel through the same classifier. +/// 2. Op-layer-specific failures — provider sync errors, history archive +/// errors, profile-resolution errors — get tagged consistently rather +/// than reaching Sentry as bare `Err(String)` returned via RPC. +/// +/// The classifier (`expected_error_kind`) is purely message-substring +/// based — `Backend returned 502 …`, `error sending request for url …`, +/// `operation timed out` etc. all resolve to a warn/info breadcrumb +/// without a Sentry event. Genuine bugs (404s, 500s with bug-shape +/// payloads, envelope errors) still surface. +/// +/// `failure="non_2xx"` is the default tag because that is the dominant +/// shape in the leak set (OPENHUMAN-TAURI-35 / -2H: backend 502 from +/// `Backend returned …`). When the message contains a recognized +/// transport phrase (`operation timed out`, `connection refused`, `tls +/// handshake eof`, …), we tag `failure="transport"` instead so the +/// `before_send` filter's transport-phrase branch fires — and keep the +/// status tag absent (transport failures don't carry a status). +fn report_composio_op_error(operation: &str, err: &E) { + // `{err:#}` renders the full anyhow chain when applicable; for plain + // `String` / `&str` errors it falls back to the Display impl. + let rendered = format!("{err:#}"); + let failure_tag = classify_composio_failure_tag(rendered.as_str()); + crate::core::observability::report_error_or_expected( + rendered.as_str(), + "composio", + operation, + &[("failure", failure_tag)], + ); +} + +/// Pick the `failure` tag for a composio op-layer error message based on +/// shape inspection. Transport-level reqwest chains (timeout, connection +/// reset, TLS handshake EOF, "error sending request for url") tag as +/// `"transport"` so the `before_send` filter's transport-phrase branch +/// fires; everything else (the dominant `Backend returned …` +/// shape from the integrations layer) tags as `"non_2xx"`. +/// +/// Extracted so tests can pin the routing without a Sentry test client. +fn classify_composio_failure_tag(rendered: &str) -> &'static str { + let lower = rendered.to_ascii_lowercase(); + let is_transport = crate::core::observability::contains_transient_transport_phrase(rendered) + || lower.contains("error sending request"); + if is_transport { + "transport" + } else { + "non_2xx" + } +} + // ── Toolkits ──────────────────────────────────────────────────────── pub async fn composio_list_toolkits( @@ -55,10 +117,10 @@ pub async fn composio_list_toolkits( ) -> OpResult> { tracing::debug!("[composio] rpc list_toolkits"); let client = resolve_client(config)?; - let resp = client - .list_toolkits() - .await - .map_err(|e| format!("[composio] list_toolkits failed: {e:#}"))?; + let resp = client.list_toolkits().await.map_err(|e| { + report_composio_op_error("list_toolkits", &e); + format!("[composio] list_toolkits failed: {e:#}") + })?; let count = resp.toolkits.len(); Ok(RpcOutcome::new( resp, @@ -73,10 +135,10 @@ pub async fn composio_list_connections( ) -> OpResult> { tracing::debug!("[composio] rpc list_connections"); let client = resolve_client(config)?; - let resp = client - .list_connections() - .await - .map_err(|e| format!("[composio] list_connections failed: {e:#}"))?; + let resp = client.list_connections().await.map_err(|e| { + report_composio_op_error("list_connections", &e); + format!("[composio] list_connections failed: {e:#}") + })?; let active = resp.connections.iter().filter(|c| c.is_active()).count(); let total = resp.connections.len(); // Reconcile the chat-runtime integrations cache against this fresh @@ -101,10 +163,10 @@ pub async fn composio_authorize( ) -> OpResult> { tracing::debug!(toolkit = %toolkit, has_extra_params = extra_params.is_some(), "[composio] rpc authorize"); let client = resolve_client(config)?; - let resp = client - .authorize(toolkit, extra_params) - .await - .map_err(|e| format!("[composio] authorize failed: {e:#}"))?; + let resp = client.authorize(toolkit, extra_params).await.map_err(|e| { + report_composio_op_error("authorize", &e); + format!("[composio] authorize failed: {e:#}") + })?; // Publish an event so any interested subscribers (e.g. UI refreshers, // analytics) can react to the new connection handoff. @@ -134,7 +196,10 @@ pub async fn composio_delete_connection( let resp = client .delete_connection(connection_id) .await - .map_err(|e| format!("[composio] delete_connection failed: {e:#}"))?; + .map_err(|e| { + report_composio_op_error("delete_connection", &e); + format!("[composio] delete_connection failed: {e:#}") + })?; if let Some(toolkit) = toolkit.as_deref() { let deleted = super::providers::profile::delete_connected_identity_facets(toolkit, connection_id); @@ -212,7 +277,10 @@ pub async fn composio_list_tools( let resp = client .list_tools(toolkits.as_deref()) .await - .map_err(|e| format!("[composio] list_tools failed: {e:#}"))?; + .map_err(|e| { + report_composio_op_error("list_tools", &e); + format!("[composio] list_tools failed: {e:#}") + })?; let count = resp.tools.len(); Ok(RpcOutcome::new( resp, @@ -265,6 +333,7 @@ pub async fn composio_execute( elapsed_ms, }, ); + report_composio_op_error("execute", &e); Err(format!("[composio] execute failed: {e:#}")) } } @@ -281,7 +350,10 @@ pub async fn composio_list_github_repos( let resp = client .list_github_repos(connection_id.as_deref()) .await - .map_err(|e| format!("[composio] list_github_repos failed: {e:#}"))?; + .map_err(|e| { + report_composio_op_error("list_github_repos", &e); + format!("[composio] list_github_repos failed: {e:#}") + })?; let count = resp.repositories.len(); let connection_id = resp.connection_id.clone(); Ok(RpcOutcome::new( @@ -303,7 +375,10 @@ pub async fn composio_create_trigger( let resp = client .create_trigger(slug, connection_id.as_deref(), trigger_config) .await - .map_err(|e| format!("[composio] create_trigger failed: {e:#}"))?; + .map_err(|e| { + report_composio_op_error("create_trigger", &e); + format!("[composio] create_trigger failed: {e:#}") + })?; let trigger_id = resp.trigger_id.clone(); Ok(RpcOutcome::new( resp, @@ -325,7 +400,10 @@ pub async fn composio_list_available_triggers( let resp = client .list_available_triggers(toolkit, connection_id.as_deref()) .await - .map_err(|e| format!("[composio] list_available_triggers failed: {e:#}"))?; + .map_err(|e| { + report_composio_op_error("list_available_triggers", &e); + format!("[composio] list_available_triggers failed: {e:#}") + })?; let count = resp.triggers.len(); Ok(RpcOutcome::new( resp, @@ -344,7 +422,10 @@ pub async fn composio_list_triggers( let resp = client .list_active_triggers(toolkit.as_deref()) .await - .map_err(|e| format!("[composio] list_triggers failed: {e:#}"))?; + .map_err(|e| { + report_composio_op_error("list_triggers", &e); + format!("[composio] list_triggers failed: {e:#}") + })?; let count = resp.triggers.len(); Ok(RpcOutcome::new( resp, @@ -363,7 +444,10 @@ pub async fn composio_enable_trigger( let resp = client .enable_trigger(connection_id, slug, trigger_config) .await - .map_err(|e| format!("[composio] enable_trigger failed: {e:#}"))?; + .map_err(|e| { + report_composio_op_error("enable_trigger", &e); + format!("[composio] enable_trigger failed: {e:#}") + })?; let trigger_id = resp.trigger_id.clone(); Ok(RpcOutcome::new( resp, @@ -377,10 +461,10 @@ pub async fn composio_disable_trigger( ) -> OpResult> { tracing::debug!(trigger_id = %trigger_id, "[composio] rpc disable_trigger"); let client = resolve_client(config)?; - let resp = client - .disable_trigger(trigger_id) - .await - .map_err(|e| format!("[composio] disable_trigger failed: {e:#}"))?; + let resp = client.disable_trigger(trigger_id).await.map_err(|e| { + report_composio_op_error("disable_trigger", &e); + format!("[composio] disable_trigger failed: {e:#}") + })?; let message = if resp.deleted { format!("composio: disabled trigger {trigger_id}") } else { @@ -445,10 +529,10 @@ async fn resolve_toolkit_for_connection( connection_id: &str, ) -> OpResult { tracing::debug!(connection_id = %connection_id, "[composio] resolve_toolkit_for_connection"); - let resp = client - .list_connections() - .await - .map_err(|e| format!("[composio] list_connections failed: {e:#}"))?; + let resp = client.list_connections().await.map_err(|e| { + report_composio_op_error("resolve_toolkit_for_connection", &e); + format!("[composio] list_connections failed: {e:#}") + })?; let conn = resp .connections .into_iter() @@ -479,10 +563,10 @@ pub async fn composio_get_user_profile( connection_id: Some(connection_id.to_string()), }; - let profile = provider - .fetch_user_profile(&ctx) - .await - .map_err(|e| format!("[composio] get_user_profile({toolkit}) failed: {e}"))?; + let profile = provider.fetch_user_profile(&ctx).await.map_err(|e| { + report_composio_op_error("get_user_profile", &e); + format!("[composio] get_user_profile({toolkit}) failed: {e}") + })?; // Side-effect: persist profile fields into the local user_profile // facet table so any RPC call also refreshes the local store. @@ -515,10 +599,10 @@ pub async fn composio_refresh_all_identities( ) -> OpResult> { tracing::info!("[composio] rpc refresh_all_identities"); let client = resolve_client(config)?; - let conns = client - .list_connections() - .await - .map_err(|e| format!("[composio] list_connections failed: {e:#}"))?; + let conns = client.list_connections().await.map_err(|e| { + report_composio_op_error("refresh_all_identities", &e); + format!("[composio] list_connections failed: {e:#}") + })?; let mut report = RefreshIdentitiesReport::default(); let mut messages: Vec = Vec::with_capacity(conns.connections.len() + 1); @@ -634,10 +718,10 @@ pub async fn composio_sync( connection_id: Some(connection_id.to_string()), }; - let outcome = provider - .sync(&ctx, reason) - .await - .map_err(|e| format!("[composio] sync({toolkit}) failed: {e}"))?; + let outcome = provider.sync(&ctx, reason).await.map_err(|e| { + report_composio_op_error("sync", &e); + format!("[composio] sync({toolkit}) failed: {e}") + })?; let summary = outcome.summary.clone(); Ok(RpcOutcome::new(outcome, vec![summary])) diff --git a/src/openhuman/composio/ops_tests.rs b/src/openhuman/composio/ops_tests.rs index 5d8f4c83af..42d0493a2a 100644 --- a/src/openhuman/composio/ops_tests.rs +++ b/src/openhuman/composio/ops_tests.rs @@ -931,3 +931,128 @@ async fn composio_disable_trigger_propagates_backend_error() { .unwrap_err(); assert!(err.contains("disable_trigger failed"), "unexpected: {err}"); } + +// ── classify_composio_failure_tag ────────────────────────────── +// +// Pin the failure-tag routing for `report_composio_op_error` so the +// `before_send` filter (`is_transient_integrations_failure` extended to +// `domain="composio"` in the same #1608 patch series) matches. The tag +// drives which branch of the filter fires: +// - `failure="non_2xx"` + transient `status` (set by the integrations +// wrapper) → dropped +// - `failure="transport"` + transient transport phrase in the message +// → dropped +// Any drift between the helper's classification and the filter's +// expectations would silently re-open the leak path. + +#[test] +fn composio_failure_tag_is_non_2xx_for_backend_returned_502() { + // OPENHUMAN-TAURI-35 / -2H wire shape — the dominant leak. The + // integrations layer renders this on a 5xx response; composio's op + // layer wraps the chain and re-reports under `domain=composio`. The + // tag MUST be `non_2xx` so the existing transient-status filter + // branch matches. + let rendered = "Backend returned 502 Bad Gateway for POST \ + https://api.tinyhumans.ai/agent-integrations/composio/connections: \ + upstream temporarily unavailable"; + assert_eq!(classify_composio_failure_tag(rendered), "non_2xx"); +} + +#[test] +fn composio_failure_tag_is_non_2xx_for_envelope_error() { + // Envelope errors don't carry a transport phrase or "error sending + // request" anchor; default to non_2xx. + let rendered = "Backend error for POST https://api.tinyhumans.ai/x: \ + unknown backend error"; + assert_eq!(classify_composio_failure_tag(rendered), "non_2xx"); +} + +#[test] +fn composio_failure_tag_is_transport_for_operation_timed_out() { + // OPENHUMAN-TAURI-18 / -G shape — `composio/execute` reqwest chain + // surfaces `operation timed out` (one of `TRANSIENT_TRANSPORT_PHRASES`). + // Tag MUST be `transport` so the filter's transport-phrase branch fires + // even though the report carries no `status`. + let rendered = "POST https://api.tinyhumans.ai/agent-integrations/composio/execute \ + failed: error sending request for url \ + (https://api.tinyhumans.ai/agent-integrations/composio/execute) → \ + client error (SendRequest) → connection error → \ + Operation timed out (os error 60)"; + assert_eq!(classify_composio_failure_tag(rendered), "transport"); +} + +#[test] +fn composio_failure_tag_is_transport_for_dns_and_tls_phrases() { + for raw in [ + "POST /v1/foo failed: error sending request for url (https://api.example.com/x)", + "GET /agent-integrations/composio/connections failed: tls handshake eof", + "POST /agent-integrations/composio/triggers failed: connection reset by peer", + "GET /agent-integrations/composio/toolkits failed: connection forcibly closed (os 10054)", + ] { + assert_eq!( + classify_composio_failure_tag(raw), + "transport", + "should classify as transport: {raw}" + ); + } +} + +#[test] +fn composio_failure_tag_does_not_misclassify_unrelated_messages() { + // A bare error string with no transport / "error sending request" + // anchor must default to non_2xx — the safe choice for the dominant + // leak shape. + for raw in [ + "[composio] no connection with id 'abc'", + "[composio] no native provider registered for toolkit 'foo'", + "fetch_user_profile failed: invalid JSON in profile facet", + ] { + assert_eq!( + classify_composio_failure_tag(raw), + "non_2xx", + "should default to non_2xx: {raw}" + ); + } +} + +// ── before_send filter integration ───────────────────────────── +// +// Belt-and-suspenders: re-assert the cross-module contract from the +// composio side. If `is_transient_integrations_failure` ever stops +// matching `domain="composio"` (e.g. accidental revert), the +// `report_composio_op_error` events flood Sentry again with no test in +// the composio crate to catch it. These guards make the link explicit. + +#[test] +fn composio_domain_502_is_dropped_by_before_send() { + let mut event = sentry::protocol::Event::default(); + let mut tags: std::collections::BTreeMap = + std::collections::BTreeMap::new(); + tags.insert("domain".into(), "composio".into()); + tags.insert("failure".into(), "non_2xx".into()); + tags.insert("status".into(), "502".into()); + event.tags = tags; + assert!( + crate::core::observability::is_transient_integrations_failure(&event), + "composio non_2xx 502 must be dropped by integrations filter (#1608)" + ); +} + +#[test] +fn composio_transport_timeout_is_dropped_by_before_send() { + let mut event = sentry::protocol::Event::default(); + let mut tags: std::collections::BTreeMap = + std::collections::BTreeMap::new(); + tags.insert("domain".into(), "composio".into()); + tags.insert("failure".into(), "transport".into()); + event.tags = tags; + event.message = Some( + "POST /agent-integrations/composio/execute failed: error sending request → \ + operation timed out" + .to_string(), + ); + assert!( + crate::core::observability::is_transient_integrations_failure(&event), + "composio transport timeout must be dropped by integrations filter (#1608)" + ); +} From 79fa788317308b3f44a513a85c2e2ee3195758ab Mon Sep 17 00:00:00 2001 From: oxoxDev Date: Fri, 15 May 2026 14:24:15 +0530 Subject: [PATCH 4/9] feat(channels): route dispatch llm-error re-emit through observability classifier (#1608) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Switches `channels::runtime::dispatch`'s LLM-error re-emit at the chat-task funnel from raw `report_error` to `report_error_or_expected`. The dispatch layer was the actual leak source for OPENHUMAN-TAURI-4F (~157 events) / -1C (~87 events) / -8F (~39 events): the reliable provider layer retried 5xx, the agent re-raised, `agent.run_single` correctly demoted via the classifier — and then channels.dispatch called raw `report_error(&e, "channels", "dispatch_llm_error", …)` which fires Sentry unconditionally regardless of message content, re-creating the per-attempt event we had just suppressed. Routing through `report_error_or_expected` lets `is_transient_upstream_http_message` match the canonical `"OpenHuman API error (NNN ...)"` substring still anchored in the chain after agent + harness wrapping, demoting it to a warn breadcrumb. Genuine bugs (404 / 500 / unrelated agent failures) still surface because the classifier only matches the documented transient shapes. Mirrors the `is_max_iterations_error` short-circuit added in #1601 — same site, same file, same reasoning (don't re-emit a deterministic outcome that has already been classified upstream). Adds `channels_dispatch_re_emit_of_provider_502_classifies_as_transient` in observability tests covering three real-world wrapping shapes (bare provider error, agent.provider_chat prefix, and all-providers-exhausted prefix) so a future regression in the classifier or in the chain-rendering surfaces here. Co-Authored-By: Claude Opus 4.7 --- src/core/observability.rs | 32 ++++++++++++++++++++++ src/openhuman/channels/runtime/dispatch.rs | 19 ++++++++++++- 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/core/observability.rs b/src/core/observability.rs index 7604150149..18cf709506 100644 --- a/src/core/observability.rs +++ b/src/core/observability.rs @@ -1100,6 +1100,38 @@ mod tests { ); } + #[test] + fn channels_dispatch_re_emit_of_provider_502_classifies_as_transient() { + // OPENHUMAN-TAURI-4F (~157 events) / -1C (~87 events) / -8F + // (~39 events): the reliable provider layer retried 5xx, the + // agent re-raised the error, and `channels::runtime::dispatch` + // re-emitted it under `domain="channels", operation="dispatch_llm_error"` + // via raw `report_error` (which skips classification). Switching + // that site to `report_error_or_expected` routes the chain + // through this classifier — but only works if the canonical + // `"OpenHuman API error (NNN ...)"` substring still anchors the + // match through the channels-layer wrapping. + // + // The wrapping shape at the dispatch site is the agent error + // chain rendered via `format!("{e:#}")`. For a backend 502 from + // `providers::ops::api_error`, that resolves to: + // "OpenHuman API error (502 Bad Gateway): error code: 502" + // possibly prepended with a runner / iteration prefix. Both + // shapes must classify as transient so the dispatch re-emit + // gets demoted. + for raw in [ + "OpenHuman API error (502 Bad Gateway): error code: 502", + "agent.provider_chat failed: OpenHuman API error (503 Service Unavailable): retry budget exhausted", + "all providers exhausted: OpenHuman API error (504 Gateway Timeout): error code: 504", + ] { + assert_eq!( + expected_error_kind(raw), + Some(ExpectedErrorKind::TransientUpstreamHttp), + "channels.dispatch re-emit of {raw:?} must classify as transient" + ); + } + } + #[test] fn classifies_socket_transient_http_errors() { // OPENHUMAN-TAURI-5P / -EZ: tungstenite's `WsError::Http(response)` diff --git a/src/openhuman/channels/runtime/dispatch.rs b/src/openhuman/channels/runtime/dispatch.rs index 8d6a663d8b..08346515b8 100644 --- a/src/openhuman/channels/runtime/dispatch.rs +++ b/src/openhuman/channels/runtime/dispatch.rs @@ -1190,7 +1190,24 @@ pub(crate) async fn process_channel_message( e ); } else { - crate::core::observability::report_error( + // Route through `report_error_or_expected` so + // transient-upstream provider HTTP errors that bubbled + // up via `agent.run_single` (`OpenHuman API error + // (502 Bad Gateway): …`) get demoted via + // `is_transient_upstream_http_message` — the agent + // re-emit at the dispatch layer was previously + // unconditionally calling `report_error`, which firehoses + // Sentry under `domain="channels"` even though the same + // chain was already classified at the provider + agent + // layers (OPENHUMAN-TAURI-4F ~157ev / -1C ~87ev / -8F + // ~39ev: provider 5xx that the reliable layer retried + // and exhausted, then the channels layer re-reported as + // a fresh per-attempt event). Genuine bugs (404 / 500 + // / unrelated agent failures) still surface — the + // classifier only demotes the canonical transient + // shapes documented in + // `crate::core::observability::expected_error_kind`. + crate::core::observability::report_error_or_expected( &e, "channels", "dispatch_llm_error", From 744217940bb4452fdbb4f0f5fe7b07222cf76e44 Mon Sep 17 00:00:00 2001 From: oxoxDev Date: Fri, 15 May 2026 14:25:52 +0530 Subject: [PATCH 5/9] test(observability): regression tests for composio/execute timeout shape (#1608) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pins the exact OPENHUMAN-TAURI-18 / -G wire shape produced by `crate::openhuman::integrations::client::IntegrationClient::post` through the `report_error_or_expected("integrations", "post", &[("failure", "transport")])` funnel: "error sending request for url (https://api.tinyhumans.ai/agent-integrations/composio/execute) → \ client error (SendRequest) → connection error → \ Operation timed out (os error 60)" Asserts both classification paths: 1. `is_network_unreachable_message` matches the `"error sending request for url"` URL anchor — primary suppression path. 2. `is_transient_message_failure` matches the `"operation timed out"` transport phrase — defense-in-depth for sites that lose the URL anchor (e.g. a chain-flatten helper that strips it for PII safety). Verification of `before_send` registration in `src/main.rs:48-85`: - `is_transient_provider_http_failure` ✓ - `is_budget_event` ✓ - `is_max_iterations_event` ✓ - `is_transient_backend_api_failure` ✓ - `is_transient_integrations_failure` (extended in commit 2 of #1608 to cover `domain="composio"`) ✓ - `is_updater_transient_event` ✓ No wiring bug found — the integrations layer already classifies and demotes the canonical TAURI-18 chain. This regression test pins the shape so a future refactor cannot silently re-open the leak. Co-Authored-By: Claude Opus 4.7 --- src/core/observability.rs | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/src/core/observability.rs b/src/core/observability.rs index 18cf709506..d6ab840aee 100644 --- a/src/core/observability.rs +++ b/src/core/observability.rs @@ -1100,6 +1100,44 @@ mod tests { ); } + #[test] + fn integrations_post_composio_timeout_dropped() { + // OPENHUMAN-TAURI-18 / -G regression guard. The integrations + // client at `crate::openhuman::integrations::client::IntegrationClient::post` + // builds the reqwest error chain and routes it through + // `report_error_or_expected(.., "integrations", "post", &[("failure", + // "transport")])`. The chain text contains the + // `"error sending request for url"` anchor so + // `is_network_unreachable_message` matches first and demotes to + // `NetworkUnreachable` (functionally equivalent to + // `TransientUpstreamHttp` for Sentry suppression — both routes + // skip the report path via `report_expected_message`). + // + // Pinning this exact wire shape catches a future refactor that + // drops the URL anchor (e.g. a chain-flatten helper that strips + // it for "PII safety"), which would silently re-open the leak. + let chain = "error sending request for url \ + (https://api.tinyhumans.ai/agent-integrations/composio/execute) → \ + client error (SendRequest) → connection error → \ + Operation timed out (os error 60)"; + assert_eq!( + expected_error_kind(chain), + Some(ExpectedErrorKind::NetworkUnreachable), + "TAURI-18 chain shape must classify as NetworkUnreachable" + ); + + // If the URL anchor is ever dropped, the transport-phrase + // fallback (`operation timed out` from + // `TRANSIENT_TRANSPORT_PHRASES`) catches it via the message + // classifier helper used at upstream re-emit sites — confirm + // both paths so the regression surface is fully pinned. + assert!( + is_transient_message_failure(chain), + "TAURI-18 chain must also satisfy upstream message classifier \ + (defense-in-depth for sites that lose the URL anchor)" + ); + } + #[test] fn channels_dispatch_re_emit_of_provider_502_classifies_as_transient() { // OPENHUMAN-TAURI-4F (~157 events) / -1C (~87 events) / -8F From 9d8e7ed5b6ee69313474c47dbbb0e062a80abb45 Mon Sep 17 00:00:00 2001 From: oxoxDev Date: Fri, 15 May 2026 14:26:56 +0530 Subject: [PATCH 6/9] style: cargo fmt fixups for #1608 series MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pure formatter pass — collapses two-line `let` declarations onto one line and reflows long arg-chains where rustfmt prefers a single call chain. Behavior unchanged. Co-Authored-By: Claude Opus 4.7 --- src/core/observability.rs | 4 +++- src/openhuman/composio/ops.rs | 22 ++++++++-------------- src/openhuman/composio/ops_tests.rs | 6 ++---- 3 files changed, 13 insertions(+), 19 deletions(-) diff --git a/src/core/observability.rs b/src/core/observability.rs index d6ab840aee..68ad0054b4 100644 --- a/src/core/observability.rs +++ b/src/core/observability.rs @@ -1204,7 +1204,9 @@ mod tests { // socket shape so a future regression in `is_transient_upstream_http_message` // surfaces here, not behind another classifier. assert_eq!( - expected_error_kind("WebSocket connect: HTTP error: 502: upstream returned bad gateway"), + expected_error_kind( + "WebSocket connect: HTTP error: 502: upstream returned bad gateway" + ), Some(ExpectedErrorKind::TransientUpstreamHttp) ); diff --git a/src/openhuman/composio/ops.rs b/src/openhuman/composio/ops.rs index 9ab414c94a..50df2dbaf2 100644 --- a/src/openhuman/composio/ops.rs +++ b/src/openhuman/composio/ops.rs @@ -193,13 +193,10 @@ pub async fn composio_delete_connection( let toolkit = resolve_toolkit_for_connection(&client, connection_id) .await .ok(); - let resp = client - .delete_connection(connection_id) - .await - .map_err(|e| { - report_composio_op_error("delete_connection", &e); - format!("[composio] delete_connection failed: {e:#}") - })?; + let resp = client.delete_connection(connection_id).await.map_err(|e| { + report_composio_op_error("delete_connection", &e); + format!("[composio] delete_connection failed: {e:#}") + })?; if let Some(toolkit) = toolkit.as_deref() { let deleted = super::providers::profile::delete_connected_identity_facets(toolkit, connection_id); @@ -274,13 +271,10 @@ pub async fn composio_list_tools( ) -> OpResult> { tracing::debug!(?toolkits, "[composio] rpc list_tools"); let client = resolve_client(config)?; - let resp = client - .list_tools(toolkits.as_deref()) - .await - .map_err(|e| { - report_composio_op_error("list_tools", &e); - format!("[composio] list_tools failed: {e:#}") - })?; + let resp = client.list_tools(toolkits.as_deref()).await.map_err(|e| { + report_composio_op_error("list_tools", &e); + format!("[composio] list_tools failed: {e:#}") + })?; let count = resp.tools.len(); Ok(RpcOutcome::new( resp, diff --git a/src/openhuman/composio/ops_tests.rs b/src/openhuman/composio/ops_tests.rs index 42d0493a2a..51131e6540 100644 --- a/src/openhuman/composio/ops_tests.rs +++ b/src/openhuman/composio/ops_tests.rs @@ -1026,8 +1026,7 @@ fn composio_failure_tag_does_not_misclassify_unrelated_messages() { #[test] fn composio_domain_502_is_dropped_by_before_send() { let mut event = sentry::protocol::Event::default(); - let mut tags: std::collections::BTreeMap = - std::collections::BTreeMap::new(); + let mut tags: std::collections::BTreeMap = std::collections::BTreeMap::new(); tags.insert("domain".into(), "composio".into()); tags.insert("failure".into(), "non_2xx".into()); tags.insert("status".into(), "502".into()); @@ -1041,8 +1040,7 @@ fn composio_domain_502_is_dropped_by_before_send() { #[test] fn composio_transport_timeout_is_dropped_by_before_send() { let mut event = sentry::protocol::Event::default(); - let mut tags: std::collections::BTreeMap = - std::collections::BTreeMap::new(); + let mut tags: std::collections::BTreeMap = std::collections::BTreeMap::new(); tags.insert("domain".into(), "composio".into()); tags.insert("failure".into(), "transport".into()); event.tags = tags; From 6653127fe54f54aed2f85ddfcf510a58d99e8845 Mon Sep 17 00:00:00 2001 From: oxoxDev Date: Fri, 15 May 2026 17:58:35 +0530 Subject: [PATCH 7/9] fix(composio): tag report_composio_op_error with backend HTTP status (#1608) Extract the numeric status from `Backend returned ...` renderings and emit it as a Sentry tag so the integrations before_send filter can drop the dominant 5xx leak shape without also dropping genuine 4xx bug-shape failures. Co-Authored-By: Claude Opus 4.7 --- src/openhuman/composio/ops.rs | 27 ++++++++++++++++++++ src/openhuman/composio/ops_tests.rs | 39 +++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) diff --git a/src/openhuman/composio/ops.rs b/src/openhuman/composio/ops.rs index 50df2dbaf2..dd520dd666 100644 --- a/src/openhuman/composio/ops.rs +++ b/src/openhuman/composio/ops.rs @@ -83,6 +83,17 @@ fn report_composio_op_error(operation: &str, err: // `String` / `&str` errors it falls back to the Display impl. let rendered = format!("{err:#}"); let failure_tag = classify_composio_failure_tag(rendered.as_str()); + if failure_tag == "non_2xx" { + if let Some(status) = extract_backend_returned_status(&rendered) { + crate::core::observability::report_error_or_expected( + rendered.as_str(), + "composio", + operation, + &[("failure", failure_tag), ("status", status.as_str())], + ); + return; + } + } crate::core::observability::report_error_or_expected( rendered.as_str(), "composio", @@ -110,6 +121,22 @@ fn classify_composio_failure_tag(rendered: &str) -> &'static str { } } +/// Extract the HTTP status code from a `Backend returned ...` +/// rendering produced by the integrations layer. Returns `None` when no +/// numeric status follows the anchor phrase (e.g. envelope-only errors). +/// +/// Surfacing the status as a Sentry tag gives the `before_send` filter's +/// transient-status branch (`is_transient_integrations_failure`) a precise +/// signal to drop the dominant 5xx leak shape (OPENHUMAN-TAURI-35 / -2H) +/// without also dropping genuine 4xx bug-shape failures that share the +/// `failure="non_2xx"` tag. +fn extract_backend_returned_status(rendered: &str) -> Option { + let lower = rendered.to_ascii_lowercase(); + let rest = lower.split_once("backend returned ")?.1; + let digits: String = rest.chars().take_while(|c| c.is_ascii_digit()).collect(); + (!digits.is_empty()).then_some(digits) +} + // ── Toolkits ──────────────────────────────────────────────────────── pub async fn composio_list_toolkits( diff --git a/src/openhuman/composio/ops_tests.rs b/src/openhuman/composio/ops_tests.rs index 51131e6540..dfce0c5d21 100644 --- a/src/openhuman/composio/ops_tests.rs +++ b/src/openhuman/composio/ops_tests.rs @@ -1015,6 +1015,45 @@ fn composio_failure_tag_does_not_misclassify_unrelated_messages() { } } +// ── extract_backend_returned_status ─────────────────────────── +// +// Pin status extraction so the `report_composio_op_error` Sentry tag +// stays in lockstep with the `Backend returned ...` rendering +// the integrations layer produces. Without the digit anchor the +// `before_send` filter's transient-status branch can't distinguish a 502 +// from a 401, and the dominant leak shape (OPENHUMAN-TAURI-35 / -2H) +// re-opens. + +#[test] +fn extract_backend_returned_status_parses_three_digit_status() { + let rendered = "Backend returned 502 Bad Gateway for POST \ + https://api.tinyhumans.ai/agent-integrations/composio/connections: \ + upstream temporarily unavailable"; + assert_eq!( + extract_backend_returned_status(rendered), + Some("502".to_string()) + ); +} + +#[test] +fn extract_backend_returned_status_returns_none_when_no_status() { + // Envelope-style error with no HTTP status digits after the anchor. + let rendered = "Backend returned bad gateway (envelope-only error)"; + assert_eq!(extract_backend_returned_status(rendered), None); +} + +#[test] +fn extract_backend_returned_status_handles_mixed_case() { + // Some renders upper-case the prefix; the helper lowercases before + // matching so both shapes resolve to the same status string. + let rendered = "BACKEND RETURNED 429 Too Many Requests for GET \ + https://api.tinyhumans.ai/agent-integrations/composio/triggers"; + assert_eq!( + extract_backend_returned_status(rendered), + Some("429".to_string()) + ); +} + // ── before_send filter integration ───────────────────────────── // // Belt-and-suspenders: re-assert the cross-module contract from the From 4b33b4fbfc9fb0acda8fc6d80cf4f3621546f2f1 Mon Sep 17 00:00:00 2001 From: oxoxDev Date: Fri, 15 May 2026 17:58:42 +0530 Subject: [PATCH 8/9] test(composio): tighten retry-hit assertion to discrete 2|4 (#1608) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the (2..=4) range bound with `matches!(hits, 2 | 4)` so an unintended 3-hit retry path can't slip through silently — only the two known layer models (single-layer = 2, compound outer × inner = 4) are accepted. Co-Authored-By: Claude Opus 4.7 --- src/openhuman/composio/auth_retry_tests.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/openhuman/composio/auth_retry_tests.rs b/src/openhuman/composio/auth_retry_tests.rs index 5b7f48357d..c4fd3d985a 100644 --- a/src/openhuman/composio/auth_retry_tests.rs +++ b/src/openhuman/composio/auth_retry_tests.rs @@ -250,10 +250,9 @@ async fn retries_once_only_even_when_second_call_still_errors() { // Once collapsed, tighten this to `assert_eq!(counter, 2)`. let hits = counter.load(Ordering::SeqCst); assert!( - (2..=4).contains(&hits), - "compound retry must be bounded: got {hits} gateway hits, expected 2-4 \ - (2 = single-layer, 4 = outer auth_retry.rs #1708 × inner execute_tool_with_post_oauth_retry #1707). \ - A count outside this range means an unintended retry loop." + matches!(hits, 2 | 4), + "compound retry must stay within known layer models: got {hits} gateway hits, \ + expected 2 (single-layer) or 4 (outer auth_retry.rs #1708 × inner execute_tool_with_post_oauth_retry #1707)." ); } From b4f988324beb1d5d7f31a4522035cc9f99d55711 Mon Sep 17 00:00:00 2001 From: oxoxDev Date: Fri, 15 May 2026 19:14:43 +0530 Subject: [PATCH 9/9] docs(composio): clarify rendered vs lower variable contract in classify_composio_failure_tag (#1608) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit @graycyrus minor inline nit on src/openhuman/composio/ops.rs:111 — `contains_transient_transport_phrase(rendered)` takes original-case while the fallback `lower.contains("error sending request")` uses the pre-lowered copy. Both work but a future contributor adding a new condition wouldn't know which variable to extend. Add a 5-line comment block above the `is_transport` block explaining the contract: `rendered` is for callee-normalised checks (the callee handles casing internally), `lower` is for literal substring matches that intentionally do their own case-folding inline. Extend whichever side matches the new check's normaliser contract. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/openhuman/composio/ops.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/openhuman/composio/ops.rs b/src/openhuman/composio/ops.rs index dd520dd666..69ec28a363 100644 --- a/src/openhuman/composio/ops.rs +++ b/src/openhuman/composio/ops.rs @@ -112,6 +112,12 @@ fn report_composio_op_error(operation: &str, err: /// Extracted so tests can pin the routing without a Sentry test client. fn classify_composio_failure_tag(rendered: &str) -> &'static str { let lower = rendered.to_ascii_lowercase(); + // `rendered`: pass to callee-normalised checks + // (`contains_transient_transport_phrase` handles casing internally). + // `lower`: pre-lowered copy reused for literal substring matches that + // intentionally do their own case-folding here. + // A future contributor adding a new condition should extend the side + // that matches the new check's normaliser contract. let is_transport = crate::core::observability::contains_transient_transport_phrase(rendered) || lower.contains("error sending request"); if is_transport {