Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions app/src-tauri/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1342,6 +1342,7 @@ pub fn run() {
}
if openhuman_core::core::observability::is_transient_backend_api_failure(&event)
|| openhuman_core::core::observability::is_transient_integrations_failure(&event)
|| openhuman_core::core::observability::is_updater_transient_event(&event)
{
return None;
}
Expand Down
126 changes: 125 additions & 1 deletion src/core/observability.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! Centralised error reporting for the core, plus a Sentry
//! `before_send` filters that drop deterministic provider noise:
//! per-attempt transient-upstream failures and budget-exhausted user-state.
//! per-attempt transient-upstream failures, budget-exhausted user-state,
//! and transient updater failures.
//!
//! Wraps `tracing::error!` (which the global subscriber forwards to Sentry via
//! `sentry-tracing`) inside a `sentry::with_scope` so each captured event
Expand Down Expand Up @@ -52,6 +53,21 @@ pub const TRANSIENT_TRANSPORT_PHRASES: &[&str] = &[
"error sending request",
];

/// HTTP statuses from updater probes that are expected GitHub/network noise:
/// unauthenticated GitHub API rate-limit / policy 403s plus gateway/server
/// hiccups. Scoped to updater domains/messages by [`is_updater_transient_event`].
const UPDATER_TRANSIENT_HTTP_STATUSES: &[u16] = &[403, 500, 502, 503, 504];

/// Message fragments observed from Tauri/core updater transient failures.
/// Keep these updater-specific so unrelated GitHub or generic transport
/// failures still reach Sentry.
const UPDATER_TRANSIENT_MESSAGE_PHRASES: &[&str] = &[
"failed to check for updates: error sending request",
"github api error: 403",
"github api error: 5",
"error sending request for url (https://github.com/tinyhumansai/openhuman/releases/",
];

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ExpectedErrorKind {
LocalAiDisabled,
Expand Down Expand Up @@ -473,6 +489,17 @@ pub fn contains_transient_transport_phrase(message: &str) -> bool {
.any(|phrase| lower.contains(phrase))
}

pub fn is_updater_transient_http_status(status: u16) -> bool {
UPDATER_TRANSIENT_HTTP_STATUSES.contains(&status)
}

pub fn is_updater_transient_message(message: &str) -> bool {
let lower = message.to_ascii_lowercase();
UPDATER_TRANSIENT_MESSAGE_PHRASES
.iter()
.any(|phrase| lower.contains(phrase))
}

fn event_has_transient_transport_phrase(event: &sentry::protocol::Event<'_>) -> bool {
event
.message
Expand All @@ -490,6 +517,30 @@ fn event_has_transient_transport_phrase(event: &sentry::protocol::Event<'_>) ->
})
}

fn event_has_updater_transient_message(event: &sentry::protocol::Event<'_>) -> bool {
event
.message
.as_deref()
.is_some_and(is_updater_transient_message)
|| event
.logentry
.as_ref()
.is_some_and(|log| is_updater_transient_message(&log.message))
|| event.exception.values.iter().any(|exception| {
exception
.value
.as_deref()
.is_some_and(is_updater_transient_message)
})
}

fn event_has_updater_domain(event: &sentry::protocol::Event<'_>) -> bool {
matches!(
event.tags.get("domain").map(String::as_str),
Some("update") | Some("update.check_releases") | Some("updater")
)
}

fn is_transient_domain_failure(event: &sentry::protocol::Event<'_>, domain: &str) -> bool {
let tags = &event.tags;
if tags.get("domain").map(String::as_str) != Some(domain) {
Expand Down Expand Up @@ -517,6 +568,34 @@ pub fn is_transient_integrations_failure(event: &sentry::protocol::Event<'_>) ->
is_transient_domain_failure(event, "integrations")
}

/// Transient updater failures from GitHub release probes/downloads.
///
/// Core-side reports carry structured tags (`domain=update`, often
/// `operation=check_releases`, plus `failure/status`). Tauri's updater plugin
/// can also emit message-only events such as
/// `"failed to check for updates: error sending request for url (...latest.json)"`.
/// Match both shapes, but never drop an arbitrary update-domain event unless
/// it also has a transient status/transport marker.
pub fn is_updater_transient_event(event: &sentry::protocol::Event<'_>) -> bool {
if event_has_updater_transient_message(event) {
return true;
}

if !event_has_updater_domain(event) {
return false;
}

match event.tags.get("failure").map(String::as_str) {
Some("non_2xx") => event
.tags
.get("status")
.and_then(|status| status.parse::<u16>().ok())
.is_some_and(is_updater_transient_http_status),
Some("transport") => event_has_transient_transport_phrase(event),
_ => false,
}
}

/// String tokens that mark a formatted error message as a transient HTTP
/// failure. Used at upstream emit sites (`rpc.invoke_method`,
/// `web_channel.run_chat_task`) where the error has already been stringified
Expand Down Expand Up @@ -1165,6 +1244,51 @@ mod tests {
);
}

#[test]
fn updater_transient_403_is_dropped() {
let event = event_with_tags_and_message(
&[
("domain", "update"),
("operation", "check_releases"),
("failure", "non_2xx"),
("status", "403"),
],
"[observability] update.check_releases failed: GitHub API error: 403 Forbidden",
);
assert!(
is_updater_transient_event(&event),
"GitHub 403 updater checks are unactionable transient/rate-limit noise"
);
}

#[test]
fn updater_transient_502_is_dropped() {
let event = event_with_tags_and_message(
&[
("domain", "update.check_releases"),
("failure", "non_2xx"),
("status", "502"),
],
"GitHub API error: 502 Bad Gateway",
);
assert!(
is_updater_transient_event(&event),
"GitHub 5xx updater checks must be filtered as transient"
);
}

#[test]
fn updater_real_panic_still_reported() {
let event = event_with_tags_and_message(
&[("domain", "update"), ("operation", "check_releases")],
"thread 'main' panicked at src/openhuman/update/core.rs: index out of bounds",
);
assert!(
!is_updater_transient_event(&event),
"update-domain events without a transient updater shape must still reach Sentry"
);
}

#[test]
fn message_failure_classifier_matches_canonical_status_phrases() {
for msg in [
Expand Down
1 change: 1 addition & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ fn main() {
}
if openhuman_core::core::observability::is_transient_backend_api_failure(&event)
|| openhuman_core::core::observability::is_transient_integrations_failure(&event)
|| openhuman_core::core::observability::is_updater_transient_event(&event)
{
return None;
}
Expand Down
64 changes: 45 additions & 19 deletions src/openhuman/update/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,16 +105,21 @@ pub async fn check_available() -> Result<UpdateInfo, String> {
.await
.map_err(|e| {
let msg = format!("failed to fetch latest release: {e}");
if is_transport_network_failure(&e) {
if is_transport_network_failure(&e)
|| crate::core::observability::is_updater_transient_message(&msg)
{
// OPENHUMAN-TAURI-2F: reqwest's transport-level failure fires
// before any HTTP status when DNS / TCP / TLS handshake fails,
// or the user's ISP / firewall blocks api.github.com. No
// status, no trace, no payload — Sentry has no signal to act
// on, and every scheduled poll generates another noisy event.
// Log a warn so it shows up in local diagnostics and the next
// tick can retry, without paging.
log::warn!(
"[update] check_releases skipped transport-level failure (will retry next poll): {msg}"
tracing::warn!(
domain = "update",
operation = "check_releases",
failure = "transport",
"[observability] update.check_releases skipped transient updater transport failure: {msg}"
);
} else {
crate::core::observability::report_error(
Expand All @@ -137,12 +142,22 @@ pub async fn check_available() -> Result<UpdateInfo, String> {
&body[..body.len().min(200)]
);
let msg = format!("GitHub API error: {status}");
crate::core::observability::report_error(
msg.as_str(),
"update",
"check_releases",
&[("status", status_str.as_str()), ("failure", "non_2xx")],
);
if crate::core::observability::is_updater_transient_http_status(status.as_u16()) {
tracing::warn!(
domain = "update",
operation = "check_releases",
failure = "non_2xx",
status = status_str.as_str(),
"[observability] update.check_releases skipped transient updater HTTP response: {msg}"
);
} else {
crate::core::observability::report_error(
msg.as_str(),
"update",
"check_releases",
&[("status", status_str.as_str()), ("failure", "non_2xx")],
);
}
return Err(msg);
}

Expand Down Expand Up @@ -239,16 +254,27 @@ pub async fn download_and_stage_with_version(
let status = response.status();
let status_str = status.as_u16().to_string();
let msg = format!("download failed with status {}", status);
crate::core::observability::report_error(
msg.as_str(),
"update",
"download",
&[
("asset", asset_name),
("status", status_str.as_str()),
("failure", "non_2xx"),
],
);
if crate::core::observability::is_updater_transient_http_status(status.as_u16()) {
tracing::warn!(
domain = "update",
operation = "download",
failure = "non_2xx",
status = status_str.as_str(),
asset = asset_name,
"[observability] update.download skipped transient updater HTTP response: {msg}"
);
} else {
crate::core::observability::report_error(
msg.as_str(),
"update",
"download",
&[
("asset", asset_name),
("status", status_str.as_str()),
("failure", "non_2xx"),
],
);
}
return Err(msg);
}

Expand Down
21 changes: 18 additions & 3 deletions tests/observability_smoke.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! Runtime smoke for the Sentry `before_send` filters that drop per-attempt
//! transient-upstream provider, backend_api, and integrations failures plus
//! budget-exhausted user-state 400s (OPENHUMAN-TAURI-3M / 12 / 13).
//! transient-upstream provider, backend_api, integrations, and updater
//! failures plus budget-exhausted user-state 400s (OPENHUMAN-TAURI-3M / 12 / 13).
//!
//! Unit tests in `src/core/observability.rs` exercise the pure filter
//! function. This integration test wires the actual `sentry::init` →
Expand All @@ -10,7 +10,7 @@

use openhuman_core::core::observability::{
is_budget_event, is_transient_backend_api_failure, is_transient_integrations_failure,
is_transient_provider_http_failure,
is_transient_provider_http_failure, is_updater_transient_event,
};
use sentry::protocol::Event;
use std::collections::BTreeMap;
Expand Down Expand Up @@ -60,6 +60,7 @@ fn count_captured(events: Vec<Event<'static>>) -> usize {
|| is_transient_backend_api_failure(&event)
|| is_transient_integrations_failure(&event)
|| is_budget_event(&event)
|| is_updater_transient_event(&event)
{
None
} else {
Expand All @@ -82,6 +83,20 @@ fn count_captured(events: Vec<Event<'static>>) -> usize {
transport.fetch_and_clear_envelopes().len()
}

#[test]
fn drops_updater_transient_check_failure() {
let event = event_with_tags_and_message(
&[],
"failed to check for updates: error sending request for url \
(https://github.com/tinyhumansai/openhuman/releases/latest/download/latest.json)",
);
assert_eq!(
count_captured(vec![event]),
0,
"transient updater check failures must be filtered in before_send"
);
}

#[test]
fn drops_backend_api_transient_statuses() {
let events = ["408", "429", "502", "503", "504", "520"]
Expand Down
Loading