diff --git a/app/src-tauri/src/lib.rs b/app/src-tauri/src/lib.rs index 2a276d5c3e..5f87693bc1 100644 --- a/app/src-tauri/src/lib.rs +++ b/app/src-tauri/src/lib.rs @@ -1342,6 +1342,7 @@ pub fn run() { } if openhuman_core::core::observability::is_transient_backend_api_failure(&event) || openhuman_core::core::observability::is_transient_integrations_failure(&event) + || openhuman_core::core::observability::is_updater_transient_event(&event) { return None; } diff --git a/src/core/observability.rs b/src/core/observability.rs index 22928b7ee1..af72280ae5 100644 --- a/src/core/observability.rs +++ b/src/core/observability.rs @@ -1,6 +1,7 @@ //! Centralised error reporting for the core, plus a Sentry //! `before_send` filters that drop deterministic provider noise: -//! per-attempt transient-upstream failures and budget-exhausted user-state. +//! per-attempt transient-upstream failures, budget-exhausted user-state, +//! and transient updater failures. //! //! Wraps `tracing::error!` (which the global subscriber forwards to Sentry via //! `sentry-tracing`) inside a `sentry::with_scope` so each captured event @@ -52,6 +53,21 @@ pub const TRANSIENT_TRANSPORT_PHRASES: &[&str] = &[ "error sending request", ]; +/// HTTP statuses from updater probes that are expected GitHub/network noise: +/// unauthenticated GitHub API rate-limit / policy 403s plus gateway/server +/// hiccups. Scoped to updater domains/messages by [`is_updater_transient_event`]. +const UPDATER_TRANSIENT_HTTP_STATUSES: &[u16] = &[403, 500, 502, 503, 504]; + +/// Message fragments observed from Tauri/core updater transient failures. +/// Keep these updater-specific so unrelated GitHub or generic transport +/// failures still reach Sentry. +const UPDATER_TRANSIENT_MESSAGE_PHRASES: &[&str] = &[ + "failed to check for updates: error sending request", + "github api error: 403", + "github api error: 5", + "error sending request for url (https://github.com/tinyhumansai/openhuman/releases/", +]; + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ExpectedErrorKind { LocalAiDisabled, @@ -473,6 +489,17 @@ pub fn contains_transient_transport_phrase(message: &str) -> bool { .any(|phrase| lower.contains(phrase)) } +pub fn is_updater_transient_http_status(status: u16) -> bool { + UPDATER_TRANSIENT_HTTP_STATUSES.contains(&status) +} + +pub fn is_updater_transient_message(message: &str) -> bool { + let lower = message.to_ascii_lowercase(); + UPDATER_TRANSIENT_MESSAGE_PHRASES + .iter() + .any(|phrase| lower.contains(phrase)) +} + fn event_has_transient_transport_phrase(event: &sentry::protocol::Event<'_>) -> bool { event .message @@ -490,6 +517,30 @@ fn event_has_transient_transport_phrase(event: &sentry::protocol::Event<'_>) -> }) } +fn event_has_updater_transient_message(event: &sentry::protocol::Event<'_>) -> bool { + event + .message + .as_deref() + .is_some_and(is_updater_transient_message) + || event + .logentry + .as_ref() + .is_some_and(|log| is_updater_transient_message(&log.message)) + || event.exception.values.iter().any(|exception| { + exception + .value + .as_deref() + .is_some_and(is_updater_transient_message) + }) +} + +fn event_has_updater_domain(event: &sentry::protocol::Event<'_>) -> bool { + matches!( + event.tags.get("domain").map(String::as_str), + Some("update") | Some("update.check_releases") | Some("updater") + ) +} + fn is_transient_domain_failure(event: &sentry::protocol::Event<'_>, domain: &str) -> bool { let tags = &event.tags; if tags.get("domain").map(String::as_str) != Some(domain) { @@ -517,6 +568,34 @@ pub fn is_transient_integrations_failure(event: &sentry::protocol::Event<'_>) -> is_transient_domain_failure(event, "integrations") } +/// Transient updater failures from GitHub release probes/downloads. +/// +/// Core-side reports carry structured tags (`domain=update`, often +/// `operation=check_releases`, plus `failure/status`). Tauri's updater plugin +/// can also emit message-only events such as +/// `"failed to check for updates: error sending request for url (...latest.json)"`. +/// Match both shapes, but never drop an arbitrary update-domain event unless +/// it also has a transient status/transport marker. +pub fn is_updater_transient_event(event: &sentry::protocol::Event<'_>) -> bool { + if event_has_updater_transient_message(event) { + return true; + } + + if !event_has_updater_domain(event) { + return false; + } + + match event.tags.get("failure").map(String::as_str) { + Some("non_2xx") => event + .tags + .get("status") + .and_then(|status| status.parse::().ok()) + .is_some_and(is_updater_transient_http_status), + Some("transport") => event_has_transient_transport_phrase(event), + _ => false, + } +} + /// String tokens that mark a formatted error message as a transient HTTP /// failure. Used at upstream emit sites (`rpc.invoke_method`, /// `web_channel.run_chat_task`) where the error has already been stringified @@ -1165,6 +1244,51 @@ mod tests { ); } + #[test] + fn updater_transient_403_is_dropped() { + let event = event_with_tags_and_message( + &[ + ("domain", "update"), + ("operation", "check_releases"), + ("failure", "non_2xx"), + ("status", "403"), + ], + "[observability] update.check_releases failed: GitHub API error: 403 Forbidden", + ); + assert!( + is_updater_transient_event(&event), + "GitHub 403 updater checks are unactionable transient/rate-limit noise" + ); + } + + #[test] + fn updater_transient_502_is_dropped() { + let event = event_with_tags_and_message( + &[ + ("domain", "update.check_releases"), + ("failure", "non_2xx"), + ("status", "502"), + ], + "GitHub API error: 502 Bad Gateway", + ); + assert!( + is_updater_transient_event(&event), + "GitHub 5xx updater checks must be filtered as transient" + ); + } + + #[test] + fn updater_real_panic_still_reported() { + let event = event_with_tags_and_message( + &[("domain", "update"), ("operation", "check_releases")], + "thread 'main' panicked at src/openhuman/update/core.rs: index out of bounds", + ); + assert!( + !is_updater_transient_event(&event), + "update-domain events without a transient updater shape must still reach Sentry" + ); + } + #[test] fn message_failure_classifier_matches_canonical_status_phrases() { for msg in [ diff --git a/src/main.rs b/src/main.rs index 1dc8617e77..7a1ec19588 100644 --- a/src/main.rs +++ b/src/main.rs @@ -79,6 +79,7 @@ fn main() { } if openhuman_core::core::observability::is_transient_backend_api_failure(&event) || openhuman_core::core::observability::is_transient_integrations_failure(&event) + || openhuman_core::core::observability::is_updater_transient_event(&event) { return None; } diff --git a/src/openhuman/update/core.rs b/src/openhuman/update/core.rs index e6dadf04ab..aa29d7c018 100644 --- a/src/openhuman/update/core.rs +++ b/src/openhuman/update/core.rs @@ -105,7 +105,9 @@ pub async fn check_available() -> Result { .await .map_err(|e| { let msg = format!("failed to fetch latest release: {e}"); - if is_transport_network_failure(&e) { + if is_transport_network_failure(&e) + || crate::core::observability::is_updater_transient_message(&msg) + { // OPENHUMAN-TAURI-2F: reqwest's transport-level failure fires // before any HTTP status when DNS / TCP / TLS handshake fails, // or the user's ISP / firewall blocks api.github.com. No @@ -113,8 +115,11 @@ pub async fn check_available() -> Result { // on, and every scheduled poll generates another noisy event. // Log a warn so it shows up in local diagnostics and the next // tick can retry, without paging. - log::warn!( - "[update] check_releases skipped transport-level failure (will retry next poll): {msg}" + tracing::warn!( + domain = "update", + operation = "check_releases", + failure = "transport", + "[observability] update.check_releases skipped transient updater transport failure: {msg}" ); } else { crate::core::observability::report_error( @@ -137,12 +142,22 @@ pub async fn check_available() -> Result { &body[..body.len().min(200)] ); let msg = format!("GitHub API error: {status}"); - crate::core::observability::report_error( - msg.as_str(), - "update", - "check_releases", - &[("status", status_str.as_str()), ("failure", "non_2xx")], - ); + if crate::core::observability::is_updater_transient_http_status(status.as_u16()) { + tracing::warn!( + domain = "update", + operation = "check_releases", + failure = "non_2xx", + status = status_str.as_str(), + "[observability] update.check_releases skipped transient updater HTTP response: {msg}" + ); + } else { + crate::core::observability::report_error( + msg.as_str(), + "update", + "check_releases", + &[("status", status_str.as_str()), ("failure", "non_2xx")], + ); + } return Err(msg); } @@ -239,16 +254,27 @@ pub async fn download_and_stage_with_version( let status = response.status(); let status_str = status.as_u16().to_string(); let msg = format!("download failed with status {}", status); - crate::core::observability::report_error( - msg.as_str(), - "update", - "download", - &[ - ("asset", asset_name), - ("status", status_str.as_str()), - ("failure", "non_2xx"), - ], - ); + if crate::core::observability::is_updater_transient_http_status(status.as_u16()) { + tracing::warn!( + domain = "update", + operation = "download", + failure = "non_2xx", + status = status_str.as_str(), + asset = asset_name, + "[observability] update.download skipped transient updater HTTP response: {msg}" + ); + } else { + crate::core::observability::report_error( + msg.as_str(), + "update", + "download", + &[ + ("asset", asset_name), + ("status", status_str.as_str()), + ("failure", "non_2xx"), + ], + ); + } return Err(msg); } diff --git a/tests/observability_smoke.rs b/tests/observability_smoke.rs index b7a2c57188..66a17dd4d3 100644 --- a/tests/observability_smoke.rs +++ b/tests/observability_smoke.rs @@ -1,6 +1,6 @@ //! Runtime smoke for the Sentry `before_send` filters that drop per-attempt -//! transient-upstream provider, backend_api, and integrations failures plus -//! budget-exhausted user-state 400s (OPENHUMAN-TAURI-3M / 12 / 13). +//! transient-upstream provider, backend_api, integrations, and updater +//! failures plus budget-exhausted user-state 400s (OPENHUMAN-TAURI-3M / 12 / 13). //! //! Unit tests in `src/core/observability.rs` exercise the pure filter //! function. This integration test wires the actual `sentry::init` → @@ -10,7 +10,7 @@ use openhuman_core::core::observability::{ is_budget_event, is_transient_backend_api_failure, is_transient_integrations_failure, - is_transient_provider_http_failure, + is_transient_provider_http_failure, is_updater_transient_event, }; use sentry::protocol::Event; use std::collections::BTreeMap; @@ -60,6 +60,7 @@ fn count_captured(events: Vec>) -> usize { || is_transient_backend_api_failure(&event) || is_transient_integrations_failure(&event) || is_budget_event(&event) + || is_updater_transient_event(&event) { None } else { @@ -82,6 +83,20 @@ fn count_captured(events: Vec>) -> usize { transport.fetch_and_clear_envelopes().len() } +#[test] +fn drops_updater_transient_check_failure() { + let event = event_with_tags_and_message( + &[], + "failed to check for updates: error sending request for url \ + (https://github.com/tinyhumansai/openhuman/releases/latest/download/latest.json)", + ); + assert_eq!( + count_captured(vec![event]), + 0, + "transient updater check failures must be filtered in before_send" + ); +} + #[test] fn drops_backend_api_transient_statuses() { let events = ["408", "429", "502", "503", "504", "520"]