Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions rust/crates/api/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ const CONTEXT_WINDOW_ERROR_MARKERS: &[&str] = &[
"too many tokens",
"prompt is too long",
"input is too long",
"input tokens exceed",
"configured limit",
"messages resulted in",
"completion tokens",
"prompt tokens",
"request is too large",
];

Expand Down Expand Up @@ -542,6 +547,26 @@ mod tests {
assert_eq!(error.request_id(), Some("req_ctx_123"));
}

#[test]
fn classifies_openai_configured_limit_errors_as_context_window_failures() {
let error = ApiError::Api {
status: reqwest::StatusCode::BAD_REQUEST,
error_type: Some("invalid_request_error".to_string()),
message: Some(
"Input tokens exceed the configured limit of 922000 tokens. Your messages resulted in 1860900 tokens. Please reduce the length of the messages."
.to_string(),
),
request_id: Some("req_ctx_openai_123".to_string()),
body: String::new(),
retryable: false,
suggested_action: None,
};

assert!(error.is_context_window_failure());
assert_eq!(error.safe_failure_class(), "context_window");
assert_eq!(error.request_id(), Some("req_ctx_openai_123"));
}

#[test]
fn missing_credentials_without_hint_renders_the_canonical_message() {
// given
Expand Down
5 changes: 3 additions & 2 deletions rust/crates/api/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,12 @@ pub use prompt_cache::{
pub use providers::anthropic::{AnthropicClient, AnthropicClient as ApiClient, AuthSource};
pub use providers::openai_compat::{
build_chat_completion_request, flatten_tool_result_content, is_reasoning_model,
model_rejects_is_error_field, translate_message, OpenAiCompatClient, OpenAiCompatConfig,
model_rejects_is_error_field, model_requires_reasoning_content_in_history, translate_message,
OpenAiCompatClient, OpenAiCompatConfig,
};
pub use providers::{
detect_provider_kind, max_tokens_for_model, max_tokens_for_model_with_override,
resolve_model_alias, ProviderKind,
model_family_identity_for, model_family_identity_for_kind, resolve_model_alias, ProviderKind,
};
pub use sse::{parse_frame, SseParser};
pub use types::{
Expand Down
148 changes: 134 additions & 14 deletions rust/crates/api/src/providers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,19 +250,31 @@ pub fn detect_provider_kind(model: &str) -> ProviderKind {
ProviderKind::Anthropic
}

#[must_use]
pub const fn model_family_identity_for_kind(kind: ProviderKind) -> runtime::ModelFamilyIdentity {
match kind {
ProviderKind::Anthropic => runtime::ModelFamilyIdentity::Claude,
ProviderKind::Xai | ProviderKind::OpenAi => runtime::ModelFamilyIdentity::Generic,
}
}

#[must_use]
pub fn model_family_identity_for(model: &str) -> runtime::ModelFamilyIdentity {
model_family_identity_for_kind(detect_provider_kind(model))
}

#[must_use]
pub fn max_tokens_for_model(model: &str) -> u32 {
model_token_limit(model).map_or_else(
|| {
let canonical = resolve_model_alias(model);
if canonical.contains("opus") {
32_000
} else {
64_000
}
},
|limit| limit.max_output_tokens,
)
let canonical = resolve_model_alias(model);
let heuristic = if canonical.contains("opus") {
32_000
} else {
64_000
};

model_token_limit(model)
.map(|limit| heuristic.min(limit.max_output_tokens))
.unwrap_or(heuristic)
}

/// Returns the effective max output tokens for a model, preferring a plugin
Expand All @@ -276,7 +288,8 @@ pub fn max_tokens_for_model_with_override(model: &str, plugin_override: Option<u
#[must_use]
pub fn model_token_limit(model: &str) -> Option<ModelTokenLimit> {
let canonical = resolve_model_alias(model);
match canonical.as_str() {
let base_model = canonical.rsplit('/').next().unwrap_or(canonical.as_str());
match base_model {
"claude-opus-4-6" => Some(ModelTokenLimit {
max_output_tokens: 32_000,
context_window_tokens: 200_000,
Expand All @@ -289,6 +302,20 @@ pub fn model_token_limit(model: &str) -> Option<ModelTokenLimit> {
max_output_tokens: 64_000,
context_window_tokens: 131_072,
}),
// GPT-4.1 family via the OpenAI API.
"gpt-4.1" | "gpt-4.1-mini" | "gpt-4.1-nano" => Some(ModelTokenLimit {
max_output_tokens: 32_768,
context_window_tokens: 1_047_576,
}),
// GPT-5.4 family via the OpenAI API.
"gpt-5.4" => Some(ModelTokenLimit {
max_output_tokens: 128_000,
context_window_tokens: 1_000_000,
}),
"gpt-5.4-mini" | "gpt-5.4-nano" => Some(ModelTokenLimit {
max_output_tokens: 128_000,
context_window_tokens: 400_000,
}),
// Kimi models via DashScope (Moonshot AI)
// Source: https://platform.moonshot.cn/docs/intro
"kimi-k2.5" | "kimi-k1.5" => Some(ModelTokenLimit {
Expand Down Expand Up @@ -470,8 +497,8 @@ mod tests {
use super::{
anthropic_missing_credentials, anthropic_missing_credentials_hint, detect_provider_kind,
load_dotenv_file, max_tokens_for_model, max_tokens_for_model_with_override,
model_token_limit, parse_dotenv, preflight_message_request, resolve_model_alias,
ProviderKind,
model_family_identity_for, model_family_identity_for_kind, model_token_limit, parse_dotenv,
preflight_message_request, resolve_model_alias, ProviderKind,
};

/// Serializes every test in this module that mutates process-wide
Expand Down Expand Up @@ -530,6 +557,42 @@ mod tests {
);
}

#[test]
fn maps_provider_kind_to_model_family_identity() {
// given: each supported provider kind
let anthropic = ProviderKind::Anthropic;
let openai = ProviderKind::OpenAi;
let xai = ProviderKind::Xai;

// when: converting provider kinds to prompt model family identities
let anthropic_identity = model_family_identity_for_kind(anthropic);
let openai_identity = model_family_identity_for_kind(openai);
let xai_identity = model_family_identity_for_kind(xai);

// then: Anthropic stays Claude and OpenAI-compatible providers are generic
assert_eq!(anthropic_identity, runtime::ModelFamilyIdentity::Claude);
assert_eq!(openai_identity, runtime::ModelFamilyIdentity::Generic);
assert_eq!(xai_identity, runtime::ModelFamilyIdentity::Generic);
}

#[test]
fn maps_model_name_to_model_family_identity() {
// given: Anthropic, OpenAI-compatible, and xAI model names
let claude_model = "claude-opus-4-6";
let openai_model = "openai/gpt-4.1-mini";
let xai_model = "grok-3";

// when: detecting prompt model family identities from model names
let claude_identity = model_family_identity_for(claude_model);
let openai_identity = model_family_identity_for(openai_model);
let xai_identity = model_family_identity_for(xai_model);

// then: Anthropic stays Claude and OpenAI-compatible providers are generic
assert_eq!(claude_identity, runtime::ModelFamilyIdentity::Claude);
assert_eq!(openai_identity, runtime::ModelFamilyIdentity::Generic);
assert_eq!(xai_identity, runtime::ModelFamilyIdentity::Generic);
}

#[test]
fn openai_namespaced_model_routes_to_openai_not_anthropic() {
// Regression: "openai/gpt-4.1-mini" was misrouted to Anthropic when
Expand Down Expand Up @@ -614,6 +677,15 @@ mod tests {
fn keeps_existing_max_token_heuristic() {
assert_eq!(max_tokens_for_model("opus"), 32_000);
assert_eq!(max_tokens_for_model("grok-3"), 64_000);
assert_eq!(max_tokens_for_model("gpt-5.4"), 64_000);
}

#[test]
fn caps_default_max_tokens_to_openai_model_limits() {
assert_eq!(max_tokens_for_model("gpt-4.1-mini"), 32_768);
assert_eq!(max_tokens_for_model("openai/gpt-4.1-mini"), 32_768);
assert_eq!(max_tokens_for_model("gpt-5.4"), 64_000);
assert_eq!(max_tokens_for_model("openai/gpt-5.4"), 64_000);
}

#[test]
Expand Down Expand Up @@ -680,6 +752,18 @@ mod tests {
.context_window_tokens,
131_072
);
assert_eq!(
model_token_limit("openai/gpt-4.1-mini")
.expect("openai/gpt-4.1-mini should be registered")
.context_window_tokens,
1_047_576
);
assert_eq!(
model_token_limit("gpt-5.4")
.expect("gpt-5.4 should be registered")
.context_window_tokens,
1_000_000
);
}

#[test]
Expand Down Expand Up @@ -728,6 +812,42 @@ mod tests {
}
}

#[test]
fn preflight_blocks_oversized_requests_for_gpt_5_4() {
let request = MessageRequest {
model: "gpt-5.4".to_string(),
max_tokens: 64_000,
messages: vec![InputMessage {
role: "user".to_string(),
content: vec![InputContentBlock::Text {
text: "x".repeat(3_900_000),
}],
}],
system: Some("Keep the answer short.".to_string()),
tools: None,
tool_choice: None,
stream: true,
..Default::default()
};

let error = preflight_message_request(&request)
.expect_err("oversized gpt-5.4 request should be rejected before the provider call");

match error {
ApiError::ContextWindowExceeded {
model,
requested_output_tokens,
context_window_tokens,
..
} => {
assert_eq!(model, "gpt-5.4");
assert_eq!(requested_output_tokens, 64_000);
assert_eq!(context_window_tokens, 1_000_000);
}
other => panic!("expected context-window preflight failure, got {other:?}"),
}
}

#[test]
fn preflight_skips_unknown_models() {
let request = MessageRequest {
Expand Down
Loading