From 6b09fca3f0867d794835f6d8e3f2fbc11561155e Mon Sep 17 00:00:00 2001 From: shudonglin Date: Sun, 28 Jun 2026 00:26:55 +0800 Subject: [PATCH 1/4] feat: inject anthropic prompt cache control --- dto/claude_prompt_cache.go | 109 ++++++++++++++++++ relay/channel/api_request.go | 5 + relay/channel/api_request_test.go | 30 +++++ relay/channel/claude/adaptor.go | 19 +++- relay/channel/claude/relay_claude_test.go | 43 +++++++ relay/claude_handler.go | 6 + relay/claude_prompt_cache.go | 54 +++++++++ relay/claude_prompt_cache_test.go | 133 ++++++++++++++++++++++ 8 files changed, 398 insertions(+), 1 deletion(-) create mode 100644 dto/claude_prompt_cache.go create mode 100644 relay/claude_prompt_cache.go create mode 100644 relay/claude_prompt_cache_test.go diff --git a/dto/claude_prompt_cache.go b/dto/claude_prompt_cache.go new file mode 100644 index 00000000000..2b1a1f2b2bd --- /dev/null +++ b/dto/claude_prompt_cache.go @@ -0,0 +1,109 @@ +package dto + +import ( + "encoding/json" + "strings" + + "github.com/tidwall/sjson" +) + +const ( + AnthropicPromptCacheTTLEnv = "ANTHROPIC_PROMPT_CACHE_TTL" + AnthropicPromptCacheTTLHeader = "x-anthropic-prompt-cache-ttl" + AnthropicPromptCacheWorkloadHeader = "x-anthropic-prompt-cache-workload" + anthropicPromptCacheControlType = "ephemeral" +) + +func ApplyAnthropicPromptCacheControlToClaudeRequest(req *ClaudeRequest, ttl string) bool { + if req == nil || ttl == "" || ClaudeRequestHasCacheControl(req) { + return false + } + req.CacheControl = newAnthropicPromptCacheControlRaw(ttl) + return true +} + +func ApplyAnthropicPromptCacheControlToRawClaudeBody(body []byte, ttl string) ([]byte, bool, error) { + if ttl == "" || RawClaudeBodyHasCacheControl(body) { + return body, false, nil + } + cc := newAnthropicPromptCacheControlRaw(ttl) + out, err := sjson.SetRawBytes(body, "cache_control", cc) + if err != nil { + return body, false, err + } + return out, true, nil +} + +func NormalizeAnthropicPromptCacheTTL(value, workload string) string { + switch strings.ToLower(strings.TrimSpace(value)) { + case "", "off", "false", "none", "disabled": + return "" + case "5m": + return "5m" + case "1h": + return "1h" + case "auto": + if IsLongRunningAnthropicWorkload(workload) { + return "1h" + } + return "5m" + default: + return "" + } +} + +func IsLongRunningAnthropicWorkload(workload string) bool { + switch strings.ToLower(strings.TrimSpace(workload)) { + case "eval", "evaluation", "benchmark", "bench", "batch", "pipeline", "long", "long-running": + return true + default: + return false + } +} + +func ClaudeRequestHasCacheControl(req *ClaudeRequest) bool { + if req == nil { + return false + } + if len(req.CacheControl) > 0 { + return true + } + data, err := json.Marshal(req) + if err != nil { + return false + } + return RawClaudeBodyHasCacheControl(data) +} + +func RawClaudeBodyHasCacheControl(body []byte) bool { + var value any + if err := json.Unmarshal(body, &value); err != nil { + return false + } + return hasCacheControlKey(value) +} + +func hasCacheControlKey(value any) bool { + switch v := value.(type) { + case map[string]any: + for key, child := range v { + if key == "cache_control" && child != nil { + return true + } + if hasCacheControlKey(child) { + return true + } + } + case []any: + for _, child := range v { + if hasCacheControlKey(child) { + return true + } + } + } + return false +} + +func newAnthropicPromptCacheControlRaw(ttl string) json.RawMessage { + return json.RawMessage(`{"type":"` + anthropicPromptCacheControlType + `","ttl":"` + ttl + `"}`) +} diff --git a/relay/channel/api_request.go b/relay/channel/api_request.go index f945a838382..9046ffa2bd7 100644 --- a/relay/channel/api_request.go +++ b/relay/channel/api_request.go @@ -87,6 +87,11 @@ var passthroughSkipHeaderNamesLower = map[string]struct{}{ "x-api-key": {}, "x-goog-api-key": {}, + // Gateway-only Anthropic prompt-cache policy headers. These control New API's + // request mutation and should never be forwarded to upstream providers. + "x-anthropic-prompt-cache-ttl": {}, + "x-anthropic-prompt-cache-workload": {}, + // WebSocket handshake headers are generated by the client/dialer. "sec-websocket-key": {}, "sec-websocket-version": {}, diff --git a/relay/channel/api_request_test.go b/relay/channel/api_request_test.go index f697f855569..ea701919477 100644 --- a/relay/channel/api_request_test.go +++ b/relay/channel/api_request_test.go @@ -138,6 +138,36 @@ func TestProcessHeaderOverride_PassthroughSkipsAcceptEncoding(t *testing.T) { require.False(t, hasAcceptEncoding) } +func TestProcessHeaderOverride_PassthroughSkipsAnthropicPromptCachePolicyHeaders(t *testing.T) { + t.Parallel() + + gin.SetMode(gin.TestMode) + recorder := httptest.NewRecorder() + ctx, _ := gin.CreateTestContext(recorder) + ctx.Request = httptest.NewRequest(http.MethodPost, "/v1/messages", nil) + ctx.Request.Header.Set("X-Trace-Id", "trace-123") + ctx.Request.Header.Set("X-Anthropic-Prompt-Cache-Ttl", "1h") + ctx.Request.Header.Set("X-Anthropic-Prompt-Cache-Workload", "eval") + + info := &relaycommon.RelayInfo{ + IsChannelTest: false, + ChannelMeta: &relaycommon.ChannelMeta{ + HeadersOverride: map[string]any{ + "*": "", + }, + }, + } + + headers, err := processHeaderOverride(info, ctx) + require.NoError(t, err) + require.Equal(t, "trace-123", headers["x-trace-id"]) + + _, hasTTL := headers["x-anthropic-prompt-cache-ttl"] + require.False(t, hasTTL) + _, hasWorkload := headers["x-anthropic-prompt-cache-workload"] + require.False(t, hasWorkload) +} + func TestProcessHeaderOverride_PassHeadersTemplateSetsRuntimeHeaders(t *testing.T) { t.Parallel() diff --git a/relay/channel/claude/adaptor.go b/relay/channel/claude/adaptor.go index 6daf5b6f245..1ccfd915fa7 100644 --- a/relay/channel/claude/adaptor.go +++ b/relay/channel/claude/adaptor.go @@ -6,6 +6,7 @@ import ( "io" "net/http" "net/url" + "os" "github.com/QuantumNous/new-api/dto" "github.com/QuantumNous/new-api/relay/channel" @@ -95,7 +96,23 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn if request == nil { return nil, errors.New("request is nil") } - return RequestOpenAI2ClaudeMessage(c, *request) + claudeReq, err := RequestOpenAI2ClaudeMessage(c, *request) + if err != nil { + return nil, err + } + ttl := os.Getenv(dto.AnthropicPromptCacheTTLEnv) + workload := "" + if c != nil { + if headerTTL := c.GetHeader(dto.AnthropicPromptCacheTTLHeader); headerTTL != "" { + ttl = headerTTL + } + workload = c.GetHeader(dto.AnthropicPromptCacheWorkloadHeader) + } + dto.ApplyAnthropicPromptCacheControlToClaudeRequest( + claudeReq, + dto.NormalizeAnthropicPromptCacheTTL(ttl, workload), + ) + return claudeReq, nil } func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error) { diff --git a/relay/channel/claude/relay_claude_test.go b/relay/channel/claude/relay_claude_test.go index 3bf6b35c055..19b00494513 100644 --- a/relay/channel/claude/relay_claude_test.go +++ b/relay/channel/claude/relay_claude_test.go @@ -1,10 +1,14 @@ package claude import ( + "net/http" + "net/http/httptest" "strings" "testing" "github.com/QuantumNous/new-api/dto" + relaycommon "github.com/QuantumNous/new-api/relay/common" + "github.com/gin-gonic/gin" "github.com/stretchr/testify/require" ) @@ -12,6 +16,45 @@ func commonPointer[T any](value T) *T { return &value } +func TestConvertOpenAIRequestInjectsAnthropicPromptCacheControl(t *testing.T) { + t.Setenv(dto.AnthropicPromptCacheTTLEnv, "1h") + gin.SetMode(gin.TestMode) + c, _ := gin.CreateTestContext(httptest.NewRecorder()) + c.Request = httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil) + + converted, err := (&Adaptor{}).ConvertOpenAIRequest(c, &relaycommon.RelayInfo{}, &dto.GeneralOpenAIRequest{ + Model: "claude-sonnet-4-20250514", + Messages: []dto.Message{{ + Role: "user", + Content: "hello", + }}, + }) + + require.NoError(t, err) + claudeReq := converted.(*dto.ClaudeRequest) + require.JSONEq(t, `{"type":"ephemeral","ttl":"1h"}`, string(claudeReq.CacheControl)) +} + +func TestConvertOpenAIRequestPromptCacheControlHeaderCanDisableEnvDefault(t *testing.T) { + t.Setenv(dto.AnthropicPromptCacheTTLEnv, "1h") + gin.SetMode(gin.TestMode) + c, _ := gin.CreateTestContext(httptest.NewRecorder()) + c.Request = httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil) + c.Request.Header.Set(dto.AnthropicPromptCacheTTLHeader, "off") + + converted, err := (&Adaptor{}).ConvertOpenAIRequest(c, &relaycommon.RelayInfo{}, &dto.GeneralOpenAIRequest{ + Model: "claude-sonnet-4-20250514", + Messages: []dto.Message{{ + Role: "user", + Content: "hello", + }}, + }) + + require.NoError(t, err) + claudeReq := converted.(*dto.ClaudeRequest) + require.Empty(t, claudeReq.CacheControl) +} + func TestFormatClaudeResponseInfo_MessageStart(t *testing.T) { claudeInfo := &ClaudeResponseInfo{ Usage: &dto.Usage{}, diff --git a/relay/claude_handler.go b/relay/claude_handler.go index 527363205a1..0e56969a16c 100644 --- a/relay/claude_handler.go +++ b/relay/claude_handler.go @@ -132,6 +132,8 @@ func ClaudeHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ } } + applyAnthropicPromptCacheControlToClaudeRequest(c, request) + if !model_setting.GetGlobalSettings().PassThroughRequestEnabled && !info.ChannelSetting.PassThroughBodyEnabled && service.ShouldChatCompletionsUseResponsesGlobal(info.ChannelId, info.ChannelType, info.OriginModelName) { @@ -155,6 +157,10 @@ func ClaudeHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ if err != nil { return types.NewErrorWithStatusCode(err, types.ErrorCodeReadRequestBodyFailed, http.StatusBadRequest, types.ErrOptionWithSkipRetry()) } + storage, _, err = applyAnthropicPromptCacheControlToBodyStorage(c, storage) + if err != nil { + return types.NewError(err, types.ErrorCodeConvertRequestFailed, types.ErrOptionWithSkipRetry()) + } info.UpstreamRequestBodySize = storage.Size() requestBody = common.ReaderOnly(storage) } else { diff --git a/relay/claude_prompt_cache.go b/relay/claude_prompt_cache.go new file mode 100644 index 00000000000..76e6d3134ba --- /dev/null +++ b/relay/claude_prompt_cache.go @@ -0,0 +1,54 @@ +package relay + +import ( + "os" + + "github.com/QuantumNous/new-api/common" + "github.com/QuantumNous/new-api/dto" + "github.com/gin-gonic/gin" +) + +const ( + anthropicPromptCacheTTLEnv = dto.AnthropicPromptCacheTTLEnv + anthropicPromptCacheTTLHeader = dto.AnthropicPromptCacheTTLHeader + anthropicPromptCacheWorkloadHeader = dto.AnthropicPromptCacheWorkloadHeader +) + +func applyAnthropicPromptCacheControlToClaudeRequest(c *gin.Context, req *dto.ClaudeRequest) bool { + return dto.ApplyAnthropicPromptCacheControlToClaudeRequest(req, resolveAnthropicPromptCacheTTL(c)) +} + +func applyAnthropicPromptCacheControlToRawClaudeBody(c *gin.Context, body []byte) ([]byte, bool, error) { + return dto.ApplyAnthropicPromptCacheControlToRawClaudeBody(body, resolveAnthropicPromptCacheTTL(c)) +} + +func applyAnthropicPromptCacheControlToBodyStorage(c *gin.Context, storage common.BodyStorage) (common.BodyStorage, bool, error) { + body, err := storage.Bytes() + if err != nil { + return storage, false, err + } + out, changed, err := applyAnthropicPromptCacheControlToRawClaudeBody(c, body) + if err != nil || !changed { + return storage, changed, err + } + newStorage, err := common.CreateBodyStorage(out) + if err != nil { + return storage, false, err + } + _ = storage.Close() + c.Set(common.KeyBodyStorage, newStorage) + return newStorage, true, nil +} + +func resolveAnthropicPromptCacheTTL(c *gin.Context) string { + headerTTL := "" + workload := "" + if c != nil && c.Request != nil { + headerTTL = c.GetHeader(anthropicPromptCacheTTLHeader) + workload = c.GetHeader(anthropicPromptCacheWorkloadHeader) + } + if headerTTL == "" { + headerTTL = os.Getenv(anthropicPromptCacheTTLEnv) + } + return dto.NormalizeAnthropicPromptCacheTTL(headerTTL, workload) +} diff --git a/relay/claude_prompt_cache_test.go b/relay/claude_prompt_cache_test.go new file mode 100644 index 00000000000..9bae7c49d94 --- /dev/null +++ b/relay/claude_prompt_cache_test.go @@ -0,0 +1,133 @@ +package relay + +import ( + "encoding/json" + "net/http" + "testing" + + "github.com/QuantumNous/new-api/dto" + "github.com/gin-gonic/gin" +) + +func TestApplyAnthropicPromptCacheControlToClaudeRequestInjectsOneHour(t *testing.T) { + t.Setenv(anthropicPromptCacheTTLEnv, "1h") + c := testClaudePromptCacheContext(nil) + req := &dto.ClaudeRequest{ + Model: "claude-sonnet-4-20250514", + MaxTokens: uintPtr(1024), + Messages: []dto.ClaudeMessage{{Role: "user", Content: "hello"}}, + } + + changed := applyAnthropicPromptCacheControlToClaudeRequest(c, req) + + if !changed { + t.Fatal("expected cache_control to be injected") + } + var got map[string]string + if err := json.Unmarshal(req.CacheControl, &got); err != nil { + t.Fatalf("unmarshal cache_control: %v", err) + } + if got["type"] != "ephemeral" || got["ttl"] != "1h" { + t.Fatalf("cache_control = %#v, want ephemeral 1h", got) + } +} + +func TestApplyAnthropicPromptCacheControlToClaudeRequestPreservesClientControl(t *testing.T) { + t.Setenv(anthropicPromptCacheTTLEnv, "1h") + c := testClaudePromptCacheContext(nil) + req := &dto.ClaudeRequest{ + Model: "claude-sonnet-4-20250514", + MaxTokens: uintPtr(1024), + CacheControl: json.RawMessage(`{"type":"ephemeral","ttl":"5m"}`), + Messages: []dto.ClaudeMessage{{Role: "user", Content: "hello"}}, + } + + changed := applyAnthropicPromptCacheControlToClaudeRequest(c, req) + + if changed { + t.Fatal("expected existing cache_control to be preserved") + } + if string(req.CacheControl) != `{"type":"ephemeral","ttl":"5m"}` { + t.Fatalf("cache_control = %s, want client value", string(req.CacheControl)) + } +} + +func TestApplyAnthropicPromptCacheControlToClaudeRequestHeaderDisablesEnvDefault(t *testing.T) { + t.Setenv(anthropicPromptCacheTTLEnv, "1h") + c := testClaudePromptCacheContext(map[string]string{anthropicPromptCacheTTLHeader: "off"}) + req := &dto.ClaudeRequest{ + Model: "claude-sonnet-4-20250514", + MaxTokens: uintPtr(1024), + Messages: []dto.ClaudeMessage{{Role: "user", Content: "hello"}}, + } + + changed := applyAnthropicPromptCacheControlToClaudeRequest(c, req) + + if changed { + t.Fatal("expected header off override to disable injection") + } + if len(req.CacheControl) != 0 { + t.Fatalf("cache_control = %s, want empty", string(req.CacheControl)) + } +} + +func TestApplyAnthropicPromptCacheControlToClaudeRequestAutoUsesWorkloadHeader(t *testing.T) { + t.Setenv(anthropicPromptCacheTTLEnv, "auto") + c := testClaudePromptCacheContext(map[string]string{anthropicPromptCacheWorkloadHeader: "benchmark"}) + req := &dto.ClaudeRequest{ + Model: "claude-sonnet-4-20250514", + MaxTokens: uintPtr(1024), + Messages: []dto.ClaudeMessage{{Role: "user", Content: "hello"}}, + } + + changed := applyAnthropicPromptCacheControlToClaudeRequest(c, req) + + if !changed { + t.Fatal("expected cache_control to be injected") + } + var got map[string]string + if err := json.Unmarshal(req.CacheControl, &got); err != nil { + t.Fatalf("unmarshal cache_control: %v", err) + } + if got["ttl"] != "1h" { + t.Fatalf("cache_control = %#v, want 1h for benchmark workload", got) + } +} + +func TestApplyAnthropicPromptCacheControlToRawClaudeBody(t *testing.T) { + t.Setenv(anthropicPromptCacheTTLEnv, "1h") + c := testClaudePromptCacheContext(nil) + body := []byte(`{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"hello"}]}`) + + out, changed, err := applyAnthropicPromptCacheControlToRawClaudeBody(c, body) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !changed { + t.Fatal("expected raw body to be changed") + } + var got struct { + CacheControl map[string]string `json:"cache_control"` + } + if err := json.Unmarshal(out, &got); err != nil { + t.Fatalf("unmarshal raw body: %v", err) + } + if got.CacheControl["type"] != "ephemeral" || got.CacheControl["ttl"] != "1h" { + t.Fatalf("cache_control = %#v, want ephemeral 1h", got.CacheControl) + } +} + +func testClaudePromptCacheContext(headers map[string]string) *gin.Context { + gin.SetMode(gin.TestMode) + c := &gin.Context{} + req, _ := http.NewRequest(http.MethodPost, "/v1/messages", nil) + for k, v := range headers { + req.Header.Set(k, v) + } + c.Request = req + return c +} + +func uintPtr(v uint) *uint { + return &v +} From 719b7ef0964b93b5f7a9a37835495c132a8ff497 Mon Sep 17 00:00:00 2001 From: shudonglin Date: Sun, 28 Jun 2026 00:36:39 +0800 Subject: [PATCH 2/4] fix: use json wrapper for anthropic cache control --- dto/claude_prompt_cache.go | 5 +++-- relay/claude_prompt_cache_test.go | 17 +++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/dto/claude_prompt_cache.go b/dto/claude_prompt_cache.go index 2b1a1f2b2bd..c7a794916fa 100644 --- a/dto/claude_prompt_cache.go +++ b/dto/claude_prompt_cache.go @@ -4,6 +4,7 @@ import ( "encoding/json" "strings" + "github.com/QuantumNous/new-api/common" "github.com/tidwall/sjson" ) @@ -68,7 +69,7 @@ func ClaudeRequestHasCacheControl(req *ClaudeRequest) bool { if len(req.CacheControl) > 0 { return true } - data, err := json.Marshal(req) + data, err := common.Marshal(req) if err != nil { return false } @@ -77,7 +78,7 @@ func ClaudeRequestHasCacheControl(req *ClaudeRequest) bool { func RawClaudeBodyHasCacheControl(body []byte) bool { var value any - if err := json.Unmarshal(body, &value); err != nil { + if err := common.Unmarshal(body, &value); err != nil { return false } return hasCacheControlKey(value) diff --git a/relay/claude_prompt_cache_test.go b/relay/claude_prompt_cache_test.go index 9bae7c49d94..f3e7dfc513f 100644 --- a/relay/claude_prompt_cache_test.go +++ b/relay/claude_prompt_cache_test.go @@ -117,6 +117,23 @@ func TestApplyAnthropicPromptCacheControlToRawClaudeBody(t *testing.T) { } } +func TestApplyAnthropicPromptCacheControlToRawClaudeBodyPreservesNestedClientControl(t *testing.T) { + t.Setenv(anthropicPromptCacheTTLEnv, "1h") + c := testClaudePromptCacheContext(nil) + body := []byte(`{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":[{"type":"text","text":"hello","cache_control":{"type":"ephemeral"}}]}]}`) + + out, changed, err := applyAnthropicPromptCacheControlToRawClaudeBody(c, body) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if changed { + t.Fatal("expected nested client cache_control to prevent gateway injection") + } + if string(out) != string(body) { + t.Fatalf("body changed unexpectedly: %s", string(out)) + } +} + func testClaudePromptCacheContext(headers map[string]string) *gin.Context { gin.SetMode(gin.TestMode) c := &gin.Context{} From 2dd1f40524c4fe816ddaf2dee638f729d35efc12 Mon Sep 17 00:00:00 2001 From: shudonglin Date: Sun, 28 Jun 2026 00:46:56 +0800 Subject: [PATCH 3/4] fix: preserve nested prompt cache controls --- dto/openai_request.go | 30 +++++++++++++++++----- relay/channel/claude/relay-claude.go | 11 +++++--- relay/channel/claude/relay_claude_test.go | 31 +++++++++++++++++++++++ 3 files changed, 62 insertions(+), 10 deletions(-) diff --git a/dto/openai_request.go b/dto/openai_request.go index fd0bed0ea4c..ca54d39b055 100644 --- a/dto/openai_request.go +++ b/dto/openai_request.go @@ -572,8 +572,9 @@ func (m *Message) ParseContent() []MediaContent { case ContentTypeText: if text, ok := contentItem["text"].(string); ok { contentList = append(contentList, MediaContent{ - Type: ContentTypeText, - Text: text, + Type: ContentTypeText, + Text: text, + CacheControl: mediaContentCacheControl(contentItem), }) } @@ -596,8 +597,9 @@ func (m *Message) ParseContent() []MediaContent { } } contentList = append(contentList, MediaContent{ - Type: ContentTypeImageURL, - ImageUrl: temp, + Type: ContentTypeImageURL, + ImageUrl: temp, + CacheControl: mediaContentCacheControl(contentItem), }) case ContentTypeInputAudio: @@ -610,8 +612,9 @@ func (m *Message) ParseContent() []MediaContent { Format: format, } contentList = append(contentList, MediaContent{ - Type: ContentTypeInputAudio, - InputAudio: temp, + Type: ContentTypeInputAudio, + InputAudio: temp, + CacheControl: mediaContentCacheControl(contentItem), }) } } @@ -624,6 +627,7 @@ func (m *Message) ParseContent() []MediaContent { File: &MessageFile{ FileId: fileId, }, + CacheControl: mediaContentCacheControl(contentItem), }) } else { fileName, ok1 := fileData["filename"].(string) @@ -635,6 +639,7 @@ func (m *Message) ParseContent() []MediaContent { FileName: fileName, FileData: fileDataStr, }, + CacheControl: mediaContentCacheControl(contentItem), }) } } @@ -646,6 +651,7 @@ func (m *Message) ParseContent() []MediaContent { VideoUrl: &MessageVideoUrl{ Url: videoUrl, }, + CacheControl: mediaContentCacheControl(contentItem), }) } } @@ -657,6 +663,18 @@ func (m *Message) ParseContent() []MediaContent { return contentList } +func mediaContentCacheControl(contentItem map[string]any) json.RawMessage { + cacheControl, ok := contentItem["cache_control"] + if !ok || cacheControl == nil { + return nil + } + data, err := common.Marshal(cacheControl) + if err != nil { + return nil + } + return data +} + // old code /*func (m *Message) StringContent() string { if m.parsedStringContent != nil { diff --git a/relay/channel/claude/relay-claude.go b/relay/channel/claude/relay-claude.go index 0ba31b1b9ba..71e7be14762 100644 --- a/relay/channel/claude/relay-claude.go +++ b/relay/channel/claude/relay-claude.go @@ -308,8 +308,9 @@ func RequestOpenAI2ClaudeMessage(c *gin.Context, textRequest dto.GeneralOpenAIRe for _, ctx := range message.ParseContent() { if ctx.Type == "text" && ctx.Text != "" { systemMessages = append(systemMessages, dto.ClaudeMediaMessage{ - Type: "text", - Text: common.GetPointer[string](ctx.Text), + Type: "text", + Text: common.GetPointer[string](ctx.Text), + CacheControl: ctx.CacheControl, }) } // 未来可以在这里扩展对图片等其他类型的支持 @@ -376,8 +377,9 @@ func RequestOpenAI2ClaudeMessage(c *gin.Context, textRequest dto.GeneralOpenAIRe case "text": if mediaMessage.Text != "" { claudeMediaMessages = append(claudeMediaMessages, dto.ClaudeMediaMessage{ - Type: "text", - Text: common.GetPointer[string](mediaMessage.Text), + Type: "text", + Text: common.GetPointer[string](mediaMessage.Text), + CacheControl: mediaMessage.CacheControl, }) } default: @@ -390,6 +392,7 @@ func RequestOpenAI2ClaudeMessage(c *gin.Context, textRequest dto.GeneralOpenAIRe return nil, fmt.Errorf("get file data failed: %s", err.Error()) } claudeMediaMessage := dto.ClaudeMediaMessage{ + CacheControl: mediaMessage.CacheControl, Source: &dto.ClaudeMessageSource{ Type: "base64", }, diff --git a/relay/channel/claude/relay_claude_test.go b/relay/channel/claude/relay_claude_test.go index 19b00494513..331871d7de8 100644 --- a/relay/channel/claude/relay_claude_test.go +++ b/relay/channel/claude/relay_claude_test.go @@ -1,6 +1,7 @@ package claude import ( + "encoding/json" "net/http" "net/http/httptest" "strings" @@ -55,6 +56,36 @@ func TestConvertOpenAIRequestPromptCacheControlHeaderCanDisableEnvDefault(t *tes require.Empty(t, claudeReq.CacheControl) } +func TestConvertOpenAIRequestPreservesNestedPromptCacheControl(t *testing.T) { + t.Setenv(dto.AnthropicPromptCacheTTLEnv, "1h") + gin.SetMode(gin.TestMode) + c, _ := gin.CreateTestContext(httptest.NewRecorder()) + c.Request = httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil) + clientCacheControl := map[string]any{"type": "ephemeral", "ttl": "5m"} + + converted, err := (&Adaptor{}).ConvertOpenAIRequest(c, &relaycommon.RelayInfo{}, &dto.GeneralOpenAIRequest{ + Model: "claude-sonnet-4-20250514", + Messages: []dto.Message{{ + Role: "user", + Content: []any{map[string]any{ + "type": dto.ContentTypeText, + "text": "hello", + "cache_control": clientCacheControl, + }}, + }}, + }) + + require.NoError(t, err) + claudeReq := converted.(*dto.ClaudeRequest) + require.Empty(t, claudeReq.CacheControl) + blocks, ok := claudeReq.Messages[0].Content.([]dto.ClaudeMediaMessage) + require.True(t, ok) + require.Len(t, blocks, 1) + expected, err := json.Marshal(clientCacheControl) + require.NoError(t, err) + require.JSONEq(t, string(expected), string(blocks[0].CacheControl)) +} + func TestFormatClaudeResponseInfo_MessageStart(t *testing.T) { claudeInfo := &ClaudeResponseInfo{ Usage: &dto.Usage{}, From 07c8d273209a2879b4811019e5eb213a6ad40990 Mon Sep 17 00:00:00 2001 From: shudonglin Date: Sun, 28 Jun 2026 01:08:36 +0800 Subject: [PATCH 4/4] test: add anthropic prompt cache e2e coverage --- relay/channel/claude/relay_claude_test.go | 90 +++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/relay/channel/claude/relay_claude_test.go b/relay/channel/claude/relay_claude_test.go index 331871d7de8..1c5c4640be7 100644 --- a/relay/channel/claude/relay_claude_test.go +++ b/relay/channel/claude/relay_claude_test.go @@ -1,7 +1,9 @@ package claude import ( + "bytes" "encoding/json" + "io" "net/http" "net/http/httptest" "strings" @@ -9,6 +11,8 @@ import ( "github.com/QuantumNous/new-api/dto" relaycommon "github.com/QuantumNous/new-api/relay/common" + "github.com/QuantumNous/new-api/service" + "github.com/QuantumNous/new-api/types" "github.com/gin-gonic/gin" "github.com/stretchr/testify/require" ) @@ -86,6 +90,92 @@ func TestConvertOpenAIRequestPreservesNestedPromptCacheControl(t *testing.T) { require.JSONEq(t, string(expected), string(blocks[0].CacheControl)) } +func TestClaudeAdaptorE2EInjectsPromptCacheControlAndForwardsUsage(t *testing.T) { + t.Setenv(dto.AnthropicPromptCacheTTLEnv, "auto") + gin.SetMode(gin.TestMode) + service.InitHttpClient() + + var capturedBody []byte + var capturedHeaders http.Header + upstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/v1/messages" { + t.Fatalf("upstream path = %q, want /v1/messages", r.URL.Path) + } + capturedHeaders = r.Header.Clone() + body, err := io.ReadAll(r.Body) + if err != nil { + t.Fatalf("read upstream body: %v", err) + } + capturedBody = body + + w.Header().Set("content-type", "application/json") + _, _ = w.Write([]byte(`{ + "id":"msg_cache_e2e", + "type":"message", + "role":"assistant", + "model":"claude-sonnet-4-20250514", + "content":[{"type":"text","text":"ok"}], + "stop_reason":"end_turn", + "usage":{ + "input_tokens":11, + "cache_creation_input_tokens":64, + "cache_read_input_tokens":32, + "cache_creation":{"ephemeral_1h_input_tokens":64}, + "output_tokens":7 + } + }`)) + })) + defer upstream.Close() + + recorder := httptest.NewRecorder() + c, _ := gin.CreateTestContext(recorder) + c.Request = httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil) + c.Request.Header.Set("content-type", "application/json") + c.Request.Header.Set(dto.AnthropicPromptCacheTTLHeader, "auto") + c.Request.Header.Set(dto.AnthropicPromptCacheWorkloadHeader, "benchmark") + c.Request.Header.Set("x-trace-id", "trace-123") + info := &relaycommon.RelayInfo{ + RelayFormat: types.RelayFormatOpenAI, + OriginModelName: "claude-sonnet-4-20250514", + ChannelMeta: &relaycommon.ChannelMeta{ + ChannelBaseUrl: upstream.URL, + ApiKey: "test-key", + HeadersOverride: map[string]any{"*": ""}, + }, + } + adaptor := &Adaptor{} + converted, err := adaptor.ConvertOpenAIRequest(c, info, &dto.GeneralOpenAIRequest{ + Model: "claude-sonnet-4-20250514", + Messages: []dto.Message{{ + Role: "user", + Content: "stable evaluation prefix", + }}, + MaxTokens: commonPointer(uint(16)), + }) + require.NoError(t, err) + jsonData, err := json.Marshal(converted) + require.NoError(t, err) + + resp, err := adaptor.DoRequest(c, info, bytes.NewReader(jsonData)) + require.NoError(t, err) + usage, newAPIErr := adaptor.DoResponse(c, resp.(*http.Response), info) + require.Nil(t, newAPIErr) + + var upstreamBody struct { + CacheControl map[string]string `json:"cache_control"` + } + require.NoError(t, json.Unmarshal(capturedBody, &upstreamBody)) + require.Equal(t, map[string]string{"type": "ephemeral", "ttl": "1h"}, upstreamBody.CacheControl) + require.Empty(t, capturedHeaders.Get(dto.AnthropicPromptCacheTTLHeader)) + require.Empty(t, capturedHeaders.Get(dto.AnthropicPromptCacheWorkloadHeader)) + require.Equal(t, "trace-123", capturedHeaders.Get("x-trace-id")) + + typedUsage := usage.(*dto.Usage) + require.Equal(t, 32, typedUsage.PromptTokensDetails.CachedTokens) + require.Equal(t, 64, typedUsage.PromptTokensDetails.CachedCreationTokens) + require.Equal(t, 64, typedUsage.ClaudeCacheCreation1hTokens) +} + func TestFormatClaudeResponseInfo_MessageStart(t *testing.T) { claudeInfo := &ClaudeResponseInfo{ Usage: &dto.Usage{},