Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions app/src-tauri/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1314,6 +1314,19 @@ pub fn run() {
);
return None;
}
// Defense-in-depth: drop max-tool-iterations cap events that
// slipped past the call-site filters in the core (see
// `openhuman_core::core::observability::is_max_iterations_event`
// for the rationale). The shell links the core in-process so
// any captured event for this deterministic agent-state
// outcome is filtered here too (OPENHUMAN-TAURI-99 / -98).
if openhuman_core::core::observability::is_max_iterations_event(&event) {
log::debug!(
"[sentry-max-iter-filter] dropping max-iteration cap noise event: {:?}",
event.message.as_deref().unwrap_or("<no message>")
);
return None;
}
if openhuman_core::core::observability::is_transient_backend_api_failure(&event)
|| openhuman_core::core::observability::is_transient_integrations_failure(&event)
{
Expand Down
73 changes: 73 additions & 0 deletions src/core/observability.rs
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,34 @@ pub fn is_transient_provider_http_failure(event: &sentry::protocol::Event<'_>) -
TRANSIENT_PROVIDER_HTTP_STATUSES.contains(&status_u16)
}

/// Returns true when a Sentry event's message/exception text contains the
/// canonical max-tool-iterations cap phrase (see
/// `openhuman::agent::error::MAX_ITERATIONS_ERROR_PREFIX`).
///
/// Defense-in-depth filter for the Sentry `before_send` hook: the primary
/// suppression lives at the call sites in `agent::harness::session::
/// runtime::run_single`, `channels::runtime::dispatch`, and
/// `channels::providers::web::run_chat_task`, all of which now skip
/// `report_error` when this variant is detected. This filter catches any
/// future call site that re-emits the message without going through those
/// funnels — e.g. a new wrapper that calls `tracing::error!` directly with
/// the typed error rendering — and keeps OPENHUMAN-TAURI-99 / -98
/// permanently off Sentry without requiring touch-ups at each new site.
///
/// Match strategy: scans `event.message` first (the path used by
/// `report_error_message` → `sentry::capture_message`) and falls back to
/// the last exception's `value` (the shape `sentry-tracing` produces when
/// stacktraces are attached). Both fields are checked for the canonical
/// prefix so the filter stays robust to future Sentry plumbing changes.
pub fn is_max_iterations_event(event: &sentry::protocol::Event<'_>) -> bool {
let direct = event.message.as_deref();
let from_exception = event.exception.last().and_then(|e| e.value.as_deref());
[direct, from_exception]
.into_iter()
.flatten()
.any(crate::openhuman::agent::error::is_max_iterations_error)
}

pub fn is_transient_http_status(status: &str) -> bool {
TRANSIENT_HTTP_STATUSES.contains(&status)
}
Expand Down Expand Up @@ -1000,4 +1028,49 @@ mod tests {
&[("provider", "ollama")],
);
}

fn event_with_message(msg: &str) -> sentry::protocol::Event<'static> {
let mut event = sentry::protocol::Event::default();
event.message = Some(msg.to_string());
event
}

fn event_with_exception_value(value: &str) -> sentry::protocol::Event<'static> {
let mut event = sentry::protocol::Event::default();
event.exception = vec![sentry::protocol::Exception {
value: Some(value.to_string()),
..Default::default()
}]
.into();
event
}

#[test]
fn max_iterations_filter_matches_message_path() {
// `report_error_message` calls `sentry::capture_message`, which
// populates `event.message`. The filter must see the canonical
// phrase on that field path.
let event = event_with_message("Agent exceeded maximum tool iterations (8)");
assert!(is_max_iterations_event(&event));
}

#[test]
fn max_iterations_filter_matches_exception_path() {
// sentry-tracing with attach_stacktrace=true populates the
// exception list instead of (or in addition to) `event.message`.
// Filter must still catch the noise.
let event = event_with_exception_value(
"agent.run_single failed: Agent exceeded maximum tool iterations (10)",
);
assert!(is_max_iterations_event(&event));
}

#[test]
fn max_iterations_filter_keeps_unrelated_events() {
assert!(!is_max_iterations_event(&event_with_message(
"provider returned 503"
)));
assert!(!is_max_iterations_event(&event_with_message("")));
assert!(!is_max_iterations_event(&sentry::protocol::Event::default()));
}
}
11 changes: 11 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,17 @@ fn main() {
if openhuman_core::core::observability::is_transient_provider_http_failure(&event) {
return None;
}
// Defense-in-depth: drop max-tool-iterations cap events that
// slipped past the call-site filters in
// `agent::harness::session::runtime::run_single`,
// `channels::runtime::dispatch`, and
// `channels::providers::web::run_chat_task`. The cap is a
// deterministic agent-state outcome surfaced to the user via
// the chat-rendered "Error: …" message — Sentry is the wrong
// surface for it (OPENHUMAN-TAURI-99 / -98).
if openhuman_core::core::observability::is_max_iterations_event(&event) {
return None;
}
if openhuman_core::core::observability::is_transient_backend_api_failure(&event)
|| openhuman_core::core::observability::is_transient_integrations_failure(&event)
{
Expand Down
46 changes: 45 additions & 1 deletion src/openhuman/agent/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ impl fmt::Display for AgentError {
)
}
Self::MaxIterationsExceeded { max } => {
write!(f, "Agent exceeded maximum tool iterations ({max})")
write!(f, "{MAX_ITERATIONS_ERROR_PREFIX} ({max})")
}
Self::CompactionFailed {
message,
Expand Down Expand Up @@ -121,6 +121,31 @@ impl From<anyhow::Error> for AgentError {
}
}

/// Canonical user-facing prefix for the max-tool-iterations cap.
///
/// Single source of truth for:
/// - `AgentError::MaxIterationsExceeded` `Display` (in this file)
/// - Substring detection at Sentry-emit funnels where the typed variant has
/// already been marshalled through `String` (channels dispatch path,
/// web-channel run_chat_task, optional `before_send` defense)
///
/// Keep the literal **exactly** in sync with the `Display` impl above — UI
/// surfaces and tests grep for this prefix.
pub const MAX_ITERATIONS_ERROR_PREFIX: &str = "Agent exceeded maximum tool iterations";

/// Returns true when an error rendering contains the canonical
/// max-tool-iterations cap message.
///
/// Use this at Sentry-emit sites (`channels::dispatch`, `web_channel::
/// run_chat_task`, and Sentry `before_send` filters) where the typed
/// [`AgentError::MaxIterationsExceeded`] variant has already been flattened
/// to a `String` by the native bus / handler boundary and cannot be
/// downcast directly. Sites that still hold an `anyhow::Error` should
/// prefer `err.downcast_ref::<AgentError>()` for precision.
pub fn is_max_iterations_error(error_msg: &str) -> bool {
error_msg.contains(MAX_ITERATIONS_ERROR_PREFIX)
}

/// Check if an error message indicates a context/prompt-too-long failure.
pub fn is_context_limit_error(error_msg: &str) -> bool {
let lower = error_msg.to_lowercase();
Expand Down Expand Up @@ -158,6 +183,25 @@ mod tests {
assert!(!is_context_limit_error("rate limit exceeded"));
}

#[test]
fn max_iterations_detection_matches_display() {
// The substring helper must match the variant's own Display output —
// the channels dispatch / web_channel sites flatten the typed error
// through a `String` boundary, so any drift between the constant
// and `Display` silently re-enables Sentry emission for the cap
// (OPENHUMAN-TAURI-99 / -98).
let rendered = AgentError::MaxIterationsExceeded { max: 8 }.to_string();
assert!(is_max_iterations_error(&rendered));
assert!(is_max_iterations_error(
"run_chat_task failed client_id=abc thread_id=t1 \
error=Agent exceeded maximum tool iterations (10)"
));
assert!(!is_max_iterations_error("provider returned 503"));
assert!(!is_max_iterations_error(
"Tool execution error [shell]: denied"
));
}

#[test]
fn permission_denied_display() {
let err = AgentError::PermissionDenied {
Expand Down
57 changes: 39 additions & 18 deletions src/openhuman/agent/harness/session/runtime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -502,25 +502,46 @@ impl Agent {
}
Err(err) => {
let sanitized_message = Self::sanitize_event_error_message(&err);
// OPENHUMAN-TAURI-5Z: upstream transient HTTP failures
// (408/429/502/503/504) are already retried + filtered at the
// provider layer. When retries exhaust they bubble up here via
// `Result::Err`, and re-reporting under `domain=agent` escapes
// the `domain=llm_provider` filter — one Sentry event per
// failed turn for a transient infrastructure blip. Route
// through `report_error_or_expected` so the classifier demotes
// those (and other expected user-state errors) to a warn-level
// breadcrumb while preserving the AgentError publish + return.
crate::core::observability::report_error_or_expected(
&err,
"agent",
"run_single",
&[
("session_id", self.event_session_id()),
("channel", self.event_channel()),
("error_kind", sanitized_message.as_str()),
],
// The max-tool-iterations cap is a deterministic agent-state
// outcome, not a bug — the UI already surfaces the failure
// to the user via the chat-rendered "Error: Agent exceeded
// maximum tool iterations" message. Skip the Sentry funnel
// for this variant entirely and emit a structured
// `log::info!` (OPENHUMAN-TAURI-99 / -98).
//
// Other agent errors go through `report_error_or_expected`
// so OPENHUMAN-TAURI-5Z and friends — upstream transient
// HTTP that bubbles up under `domain=agent` and escapes
// the `domain=llm_provider` filter — get demoted to a
// warn-level breadcrumb without losing genuine bugs.
// `Err` propagation, the `AgentError` domain event, and
// downstream `recoverable=false` semantics are preserved.
let is_max_iter = matches!(
err.downcast_ref::<AgentError>(),
Some(AgentError::MaxIterationsExceeded { .. })
);
if is_max_iter {
log::info!(
target: "agent",
"[agent.run_single] suppressed Sentry emission for max-iteration cap \
session_id={} channel={} error_kind={} message={}",
self.event_session_id(),
self.event_channel(),
sanitized_message.as_str(),
err
);
} else {
crate::core::observability::report_error_or_expected(
&err,
"agent",
"run_single",
&[
("session_id", self.event_session_id()),
("channel", self.event_channel()),
("error_kind", sanitized_message.as_str()),
],
);
}
publish_global(DomainEvent::AgentError {
session_id: self.event_session_id().to_string(),
message: sanitized_message,
Expand Down
50 changes: 50 additions & 0 deletions src/openhuman/agent/harness/session/runtime_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,56 @@ fn sanitizers_and_tool_call_helpers_cover_fallback_paths() {
assert_eq!(Agent::count_iterations(&history), 3);
}

#[tokio::test]
async fn run_single_preserves_typed_max_iterations_error_for_sentry_skip() {
// OPENHUMAN-TAURI-99 regression guard: when the agent hits its tool
// iteration cap, `run_single` MUST surface the typed
// `AgentError::MaxIterationsExceeded` variant so the call site can
// downcast and skip `report_error`. If the error reaches the funnel as
// a plain `anyhow::Error::msg(..)` (e.g. someone reverts to
// `anyhow::bail!`), the downcast fails and Sentry re-floods with the
// exact noise this fix removes.
let _ = init_global(64);

let err_provider: Arc<dyn Provider> = Arc::new(StaticProvider {
response: Mutex::new(Some(Err(anyhow!(AgentError::MaxIterationsExceeded {
max: 8
})))),
});
let mut agent = make_agent(err_provider);
let err = agent
.run_single("hello")
.await
.expect_err("run_single should surface max-iter cap");

// The user-visible chat string MUST stay byte-identical — the UI
// (and `runtime_tool_calls.rs` channel test) reads this verbatim.
assert!(
err.to_string()
.contains("Agent exceeded maximum tool iterations"),
"canonical phrase missing: {err}"
);

// The downcast is the load-bearing condition for the Sentry skip in
// `Agent::run_single` (matches!(err.downcast_ref::<AgentError>(),
// Some(AgentError::MaxIterationsExceeded { .. }))). If this assertion
// ever fails the suppression silently regresses to error-level
// emission.
let downcast = err.downcast_ref::<AgentError>();
assert!(
matches!(downcast, Some(AgentError::MaxIterationsExceeded { max: 8 })),
"expected MaxIterationsExceeded {{ max: 8 }}, got {downcast:?}"
);

// Sanitized event message round-trips to the stable kind tag so the
// structured `log::info!` we emit instead of `report_error` carries
// the right `error_kind` for log-side filtering.
assert_eq!(
Agent::sanitize_event_error_message(&err),
"max_iterations_exceeded"
);
}

#[tokio::test]
async fn run_single_publishes_completed_and_error_events() {
let _ = init_global(64);
Expand Down
16 changes: 12 additions & 4 deletions src/openhuman/agent/harness/session/turn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -859,10 +859,18 @@ impl Agent {
"[agent_loop] exceeded maximum tool iterations max={}",
self.config.max_tool_iterations
);
anyhow::bail!(
"Agent exceeded maximum tool iterations ({})",
self.config.max_tool_iterations
)
// Return the typed `AgentError::MaxIterationsExceeded` variant
// (boxed through `anyhow::Error`) so `Agent::run_single` can
// downcast at the Sentry funnel and suppress emission — this is
// a deterministic agent-state outcome, not a bug (OPENHUMAN-
// TAURI-99 / -98). The `Display` text is preserved verbatim so
// the user-visible chat-rendered "Error: Agent exceeded
// maximum tool iterations" string is unchanged.
Err(anyhow::Error::new(
crate::openhuman::agent::error::AgentError::MaxIterationsExceeded {
max: self.config.max_tool_iterations,
},
))
}; // end of `turn_body` async block

// Run the turn body inside the parent-execution-context scope so
Expand Down
13 changes: 12 additions & 1 deletion src/openhuman/agent/harness/tool_loop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -851,7 +851,18 @@ pub(crate) async fn run_tool_call_loop(
}
}

anyhow::bail!("Agent exceeded maximum tool iterations ({max_iterations})")
// Return the typed `AgentError::MaxIterationsExceeded` variant (boxed
// through `anyhow::Error`) so downstream wrappers — notably
// `Agent::run_single` in `harness/session/runtime.rs` — can downcast and
// suppress Sentry emission for this deterministic agent-state outcome
// (OPENHUMAN-TAURI-99 / -98). The `Display` text is preserved verbatim so
// any caller that already inspects the string (UI chat surface, tests)
// continues to work.
Err(anyhow::Error::new(
crate::openhuman::agent::error::AgentError::MaxIterationsExceeded {
max: max_iterations,
},
))
}

#[cfg(test)]
Expand Down
Loading
Loading