diff --git a/crates/openshell-router/src/backend.rs b/crates/openshell-router/src/backend.rs index 272da265a..a1a55d445 100644 --- a/crates/openshell-router/src/backend.rs +++ b/crates/openshell-router/src/backend.rs @@ -70,6 +70,22 @@ enum StreamingBody { /// See: const VERTEX_ANTHROPIC_VERSION: &str = "vertex-2023-10-16"; +/// Request body fields that are Anthropic SDK extension features not +/// supported by Vertex AI rawPredict endpoints. These cause HTTP 400 +/// "Extra inputs are not permitted" if forwarded to Vertex AI. +/// +/// Add new fields here as Claude Code and other Anthropic SDK clients +/// introduce new extension-only body fields. +const VERTEX_INCOMPATIBLE_BODY_FIELDS: &[&str] = &[ + // Vertex AI rawPredict encodes the model in the URL path, not the request body. + // Clients using the standard Anthropic API always send "model" in the body; strip it. + "model", + // Claude Code 2.1.x context management feature (USE_API_CONTEXT_MANAGEMENT). + // Disabled in clients by CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1. + // Vertex AI schema does not include this field; it causes HTTP 400. + "context_management", +]; + const COMMON_INFERENCE_REQUEST_HEADERS: [&str; 4] = ["content-type", "accept", "accept-encoding", "user-agent"]; @@ -237,12 +253,13 @@ fn prepare_backend_request( // Anthropic publisher endpoints, not for arbitrary model-in-path routes. let needs_vertex_anthropic_version = is_vertex_anthropic_rawpredict_route(route); if needs_vertex_anthropic_version { - // Vertex AI rawPredict encodes the model in the URL path, not - // the request body. Clients using the standard Anthropic API - // (e.g. Claude Code via inference.local) always send "model" - // in the body; strip it so Vertex AI does not reject the - // request with "Extra inputs are not permitted". - obj.remove("model"); + // Strip body fields not supported by Vertex AI rawPredict. + // Vertex AI schema-validates the request body and rejects unknown fields with + // HTTP 400 "Extra inputs are not permitted". Header-level betas are already + // stripped above (`strip_anthropic_beta`); this handles the body-field equivalent. + for field in VERTEX_INCOMPATIBLE_BODY_FIELDS { + obj.remove(*field); + } } else { obj.insert( "model".to_string(), @@ -1618,4 +1635,139 @@ mod tests { "Vertex Gemini route must still rewrite the model field, got: {received_body}" ); } + + #[tokio::test] + async fn vertex_ai_body_strips_context_management_field() { + // Claude Code 2.1.x sends "context_management" in the request body when + // USE_API_CONTEXT_MANAGEMENT is active. Vertex AI rawPredict rejects this + // with HTTP 400 "Extra inputs are not permitted". The router must strip it. + let mock_server = MockServer::start().await; + + let base_path = "/v1/projects/my-project/locations/us-east5/publishers/anthropic/models"; + let route = ResolvedRoute { + name: "vertex-anthropic".to_string(), + endpoint: format!("{}{base_path}", mock_server.uri()), + model: "claude-sonnet-4-6".to_string(), + api_key: "ya29.token".to_string(), + protocols: vec!["anthropic_messages".to_string()], + auth: AuthHeader::Bearer, + default_headers: Vec::new(), + passthrough_headers: Vec::new(), + timeout: DEFAULT_ROUTE_TIMEOUT, + model_in_path: true, + request_path_override: Some(":rawPredict".to_string()), + }; + + Mock::given(method("POST")) + .and(path(format!("{base_path}/claude-sonnet-4-6:rawPredict"))) + .respond_with( + ResponseTemplate::new(200).set_body_json(serde_json::json!({"id": "msg_1"})), + ) + .mount(&mock_server) + .await; + + let client = reqwest::Client::builder().build().unwrap(); + let body = bytes::Bytes::from( + serde_json::to_vec(&serde_json::json!({ + "model": "claude-sonnet-4-6", + "messages": [{"role": "user", "content": "ping"}], + "max_tokens": 32, + "context_management": { + "enabled": true, + "strategy": "auto" + } + })) + .unwrap(), + ); + let headers = vec![("content-type".to_string(), "application/json".to_string())]; + + let (builder, _url) = super::prepare_backend_request( + &client, + &route, + "POST", + "/v1/messages", + &headers, + body, + false, + ) + .unwrap(); + + let response = builder.send().await.unwrap(); + assert_eq!(response.status().as_u16(), 200); + let received = mock_server.received_requests().await.unwrap(); + let received_body: serde_json::Value = serde_json::from_slice(&received[0].body).unwrap(); + assert!( + !received_body + .as_object() + .unwrap() + .contains_key("context_management"), + "context_management must be stripped from Vertex AI rawPredict body, got: {received_body}" + ); + assert!( + !received_body.as_object().unwrap().contains_key("model"), + "model must also be stripped from Vertex AI rawPredict body, got: {received_body}" + ); + } + + #[tokio::test] + async fn direct_anthropic_preserves_context_management_field() { + // context_management must only be stripped for Vertex AI rawPredict routes. + // Direct Anthropic API routes must forward it intact. + let mock_server = MockServer::start().await; + + let route = ResolvedRoute { + name: "direct-anthropic".to_string(), + endpoint: mock_server.uri(), + model: "claude-sonnet-4-6".to_string(), + api_key: "sk-ant-test".to_string(), + protocols: vec!["anthropic_messages".to_string()], + auth: AuthHeader::Custom("x-api-key"), + default_headers: vec![("anthropic-version".to_string(), "2023-06-01".to_string())], + passthrough_headers: vec!["anthropic-beta".to_string()], + timeout: DEFAULT_ROUTE_TIMEOUT, + model_in_path: false, + request_path_override: None, + }; + + Mock::given(method("POST")) + .respond_with( + ResponseTemplate::new(200).set_body_json(serde_json::json!({"id": "msg_1"})), + ) + .mount(&mock_server) + .await; + + let client = reqwest::Client::builder().build().unwrap(); + let body = bytes::Bytes::from( + serde_json::to_vec(&serde_json::json!({ + "model": "claude-sonnet-4-6", + "messages": [{"role": "user", "content": "ping"}], + "max_tokens": 32, + "context_management": {"enabled": true} + })) + .unwrap(), + ); + let headers = vec![("content-type".to_string(), "application/json".to_string())]; + + let (builder, _url) = super::prepare_backend_request( + &client, + &route, + "POST", + "/v1/messages", + &headers, + body, + false, + ) + .unwrap(); + + let _ = builder.send().await.unwrap(); + let received = mock_server.received_requests().await.unwrap(); + let received_body: serde_json::Value = serde_json::from_slice(&received[0].body).unwrap(); + assert!( + received_body + .as_object() + .unwrap() + .contains_key("context_management"), + "context_management must be preserved for direct Anthropic API routes, got: {received_body}" + ); + } } diff --git a/crates/openshell-server/src/grpc/provider.rs b/crates/openshell-server/src/grpc/provider.rs index 7591bdd6b..1ef30e8aa 100644 --- a/crates/openshell-server/src/grpc/provider.rs +++ b/crates/openshell-server/src/grpc/provider.rs @@ -507,6 +507,12 @@ pub(super) async fn resolve_provider_environment( // Project ID derived vars. if !project_id.is_empty() { + warn!( + provider = %provider.metadata.as_ref().map_or("?", |m| m.name.as_str()), + "injecting ANTHROPIC_VERTEX_PROJECT_ID into sandbox; if using inference.local, \ + ensure ANTHROPIC_BASE_URL=https://inference.local is set to prevent Anthropic \ + SDK from routing directly to Vertex AI" + ); env.entry("ANTHROPIC_VERTEX_PROJECT_ID".to_string()) .or_insert_with(|| project_id.to_string()); env.entry("GCP_PROJECT_ID".to_string())