Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/openhuman/embeddings/openai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,11 @@ impl EmbeddingProvider for OpenAiEmbedding {
target: "openai::embed",
"[openai] embed error: status={status}, body={text}"
);
let message = format!("Embedding API error {status}: {text}");
// Route through the expected-error classifier so user-state
// conditions (budget exhausted / insufficient credits, missing
// API key, transient upstream HTTP) are demoted to info/warn
// breadcrumbs instead of spawning Sentry error events.
let message = format!("Embedding API error ({status}): {text}");
// Use `report_error_or_expected` so transient upstream HTTP failures
// (e.g. 429 Too Many Requests, which the memory_tree job runner
// already retries with backoff) log a warning breadcrumb instead of
// firing a Sentry error event per attempt.
crate::core::observability::report_error_or_expected(
message.as_str(),
"embeddings",
Expand Down
40 changes: 40 additions & 0 deletions src/openhuman/embeddings/openai_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,46 @@ async fn embed_server_error() {
assert!(msg.contains("rate limited"), "body: {msg}");
}

/// 429 rate-limit responses must format their message in the canonical
/// `"... API error (<status>): <body>"` shape so the shared
/// `is_transient_upstream_http_message` classifier in `core::observability`
/// demotes them to a warning breadcrumb instead of a Sentry error event.
#[tokio::test]
async fn embed_429_uses_canonical_transient_format() {
let app = Router::new().route(
"/v1/embeddings",
post(|| async {
(
StatusCode::TOO_MANY_REQUESTS,
r#"{"error":{"message":"Rate limit exceeded.","type":"rate_limit_error"}}"#,
)
}),
);
let url = start_mock(app).await;
let p = OpenAiEmbedding::new(&url, "k", "m", 1);

let err = p.embed(&["hi"]).await.unwrap_err();
let msg = err.to_string();
assert!(
msg.contains("(429 Too Many Requests)"),
"expected canonical transient HTTP shape, got: {msg}"
);
// Pin the shape to the exact substring `is_transient_upstream_http_message`
// matches on (`"api error (<status> "`). The broader
// `is_transient_message_failure` classifier below also passes for the *old*
// `"Embedding API error 429 …"` format, so without this assertion a future
// refactor could silently revert the format and the test would still go
// green.
assert!(
msg.to_ascii_lowercase().contains("api error (429 "),
"message must match is_transient_upstream_http_message classifier arm: {msg}"
);
assert!(
crate::core::observability::is_transient_message_failure(&msg),
"message should classify as transient: {msg}"
Comment thread
graycyrus marked this conversation as resolved.
);
}

#[tokio::test]
async fn embed_budget_exhausted_400_still_errors() {
// OPENHUMAN-TAURI-JM: the backend returns HTTP 400 with a budget-exhausted
Expand Down
Loading