diff --git a/src/openhuman/embeddings/openai.rs b/src/openhuman/embeddings/openai.rs index 1b7eb99359..369ca25b79 100644 --- a/src/openhuman/embeddings/openai.rs +++ b/src/openhuman/embeddings/openai.rs @@ -128,11 +128,11 @@ impl EmbeddingProvider for OpenAiEmbedding { target: "openai::embed", "[openai] embed error: status={status}, body={text}" ); - let message = format!("Embedding API error {status}: {text}"); - // Route through the expected-error classifier so user-state - // conditions (budget exhausted / insufficient credits, missing - // API key, transient upstream HTTP) are demoted to info/warn - // breadcrumbs instead of spawning Sentry error events. + let message = format!("Embedding API error ({status}): {text}"); + // Use `report_error_or_expected` so transient upstream HTTP failures + // (e.g. 429 Too Many Requests, which the memory_tree job runner + // already retries with backoff) log a warning breadcrumb instead of + // firing a Sentry error event per attempt. crate::core::observability::report_error_or_expected( message.as_str(), "embeddings", diff --git a/src/openhuman/embeddings/openai_tests.rs b/src/openhuman/embeddings/openai_tests.rs index 501c120ca3..41d633a795 100644 --- a/src/openhuman/embeddings/openai_tests.rs +++ b/src/openhuman/embeddings/openai_tests.rs @@ -224,6 +224,46 @@ async fn embed_server_error() { assert!(msg.contains("rate limited"), "body: {msg}"); } +/// 429 rate-limit responses must format their message in the canonical +/// `"... API error (): "` shape so the shared +/// `is_transient_upstream_http_message` classifier in `core::observability` +/// demotes them to a warning breadcrumb instead of a Sentry error event. +#[tokio::test] +async fn embed_429_uses_canonical_transient_format() { + let app = Router::new().route( + "/v1/embeddings", + post(|| async { + ( + StatusCode::TOO_MANY_REQUESTS, + r#"{"error":{"message":"Rate limit exceeded.","type":"rate_limit_error"}}"#, + ) + }), + ); + let url = start_mock(app).await; + let p = OpenAiEmbedding::new(&url, "k", "m", 1); + + let err = p.embed(&["hi"]).await.unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("(429 Too Many Requests)"), + "expected canonical transient HTTP shape, got: {msg}" + ); + // Pin the shape to the exact substring `is_transient_upstream_http_message` + // matches on (`"api error ( "`). The broader + // `is_transient_message_failure` classifier below also passes for the *old* + // `"Embedding API error 429 …"` format, so without this assertion a future + // refactor could silently revert the format and the test would still go + // green. + assert!( + msg.to_ascii_lowercase().contains("api error (429 "), + "message must match is_transient_upstream_http_message classifier arm: {msg}" + ); + assert!( + crate::core::observability::is_transient_message_failure(&msg), + "message should classify as transient: {msg}" + ); +} + #[tokio::test] async fn embed_budget_exhausted_400_still_errors() { // OPENHUMAN-TAURI-JM: the backend returns HTTP 400 with a budget-exhausted