router-for-me · shenshuoyaoyouguang · Mar 7, 2026 · Mar 7, 2026 · Mar 7, 2026 · Mar 9, 2026
diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go
@@ -391,6 +391,8 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 				log.Errorf("response body close error: %v", errClose)
 			}
 		}()
+		// Ensure every stream path, including Claude passthrough, records at least one usage entry.
+		defer reporter.ensurePublished(ctx)
 
 		// If from == to (Claude → Claude), directly forward the SSE stream without translation
 		if from == to {

diff --git a/internal/runtime/executor/claude_executor_usage_test.go b/internal/runtime/executor/claude_executor_usage_test.go
@@ -0,0 +1,105 @@
+package executor
+
+import (
+	"context"
+	"net/http"
+	"net/http/httptest"
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
+	"github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/usage"
+	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
+)
+
+type authScopedUsagePlugin struct {
+	authID  string
+	records chan usage.Record
+}
+
+func (p *authScopedUsagePlugin) HandleUsage(_ context.Context, record usage.Record) {
+	if p == nil || record.AuthID != p.authID {
+		return
+	}
+	select {
+	case p.records <- record:
+	default:
+	}
+}
+
+var (
+	claudePassthroughUsagePluginOnce sync.Once
+	claudePassthroughUsagePlugin     = &authScopedUsagePlugin{
+		authID:  "claude-passthrough-no-usage",
+		records: make(chan usage.Record, 8),
+	}
+)
+
+func waitForUsageRecord(t *testing.T, records <-chan usage.Record) usage.Record {
+	t.Helper()
+	select {
+	case record := <-records:
+		return record
+	case <-time.After(2 * time.Second):
+		t.Fatal("timed out waiting for usage record")
+		return usage.Record{}
+	}
+}
+
+func TestClaudeExecutorExecuteStream_PassthroughPublishesFallbackUsageWithoutUsageChunk(t *testing.T) {
+	claudePassthroughUsagePluginOnce.Do(func() {
+		usage.RegisterPlugin(claudePassthroughUsagePlugin)
+	})
+
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "text/event-stream")
+		_, _ = w.Write([]byte("data: {\"type\":\"message_start\",\"message\":{\"id\":\"msg_1\"}}\n\n"))
+		_, _ = w.Write([]byte("data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"hi\"}}\n\n"))
+		_, _ = w.Write([]byte("data: {\"type\":\"message_stop\"}\n\n"))
+	}))
+	defer server.Close()
+
+	executor := NewClaudeExecutor(&config.Config{})
+	auth := &cliproxyauth.Auth{
+		ID: "claude-passthrough-no-usage",
+		Attributes: map[string]string{
+			"api_key":  "key-123",
+			"base_url": server.URL,
+		},
+	}
+	payload := []byte(`{"messages":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`)
+
+	result, err := executor.ExecuteStream(context.Background(), auth, cliproxyexecutor.Request{
+		Model:   "claude-3-5-sonnet-20241022",
+		Payload: payload,
+	}, cliproxyexecutor.Options{
+		SourceFormat: sdktranslator.FromString("claude"),
+		Stream:       true,
+	})
+	if err != nil {
+		t.Fatalf("ExecuteStream error: %v", err)
+	}
+
+	for chunk := range result.Chunks {
+		if chunk.Err != nil {
+			t.Fatalf("unexpected stream chunk error: %v", chunk.Err)
+		}
+	}
+
+	record := waitForUsageRecord(t, claudePassthroughUsagePlugin.records)
+	if record.AuthID != auth.ID {
+		t.Fatalf("usage record auth_id = %q, want %q", record.AuthID, auth.ID)
+	}
+	if record.Provider != "claude" {
+		t.Fatalf("usage record provider = %q, want %q", record.Provider, "claude")
+	}
+	if record.Failed {
+		t.Fatal("usage fallback should mark request as successful")
+	}
+	if record.Detail != (usage.Detail{}) {
+		t.Fatalf("usage fallback detail = %+v, want zero-value detail", record.Detail)
+	}
+}
diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go
@@ -333,6 +333,8 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 			reporter.publishFailure(ctx)
 			out <- cliproxyexecutor.StreamChunk{Err: errScan}
 		}
+		// Ensure we record the request if no usage chunk was ever seen
+		reporter.ensurePublished(ctx)
 	}()
 	return &cliproxyexecutor.StreamResult{Headers: httpResp.Header.Clone(), Chunks: out}, nil
 }

diff --git a/internal/runtime/executor/kimi_executor.go b/internal/runtime/executor/kimi_executor.go
@@ -283,6 +283,8 @@ func (e *KimiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 			reporter.publishFailure(ctx)
 			out <- cliproxyexecutor.StreamChunk{Err: errScan}
 		}
+		// Ensure we record the request if no usage chunk was ever seen
+		reporter.ensurePublished(ctx)
 	}()
 	return &cliproxyexecutor.StreamResult{Headers: httpResp.Header.Clone(), Chunks: out}, nil
 }

diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go
@@ -17,9 +17,12 @@ import (
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
 	log "github.com/sirupsen/logrus"
+	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
 
+const openAICompatRetryErrorBodyLimit = 1 << 20
+
 // OpenAICompatExecutor implements a stateless executor for OpenAI-compatible providers.
 // It performs request/response translation and executes against the provider base URL
 // using per-auth credentials (API key) and per-auth HTTP transport (proxy) from context.
@@ -199,15 +202,22 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 	translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)
 	requestedModel := payloadRequestedModel(opts, req.Model)
 	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated, requestedModel)
+	// Preserve historical behavior: if include_usage is omitted or explicitly
+	// sent as false/null, still force it on so upstreams can emit real usage
+	// chunks. Only an explicit true counts as caller-enabled.
+	autoInjectedStreamUsage := !gjson.GetBytes(translated, "stream_options.include_usage").Bool()
 
 	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return nil, err
 	}
 
-	// Request usage data in the final streaming chunk so that token statistics
-	// are captured even when the upstream is an OpenAI-compatible provider.
-	translated, _ = sjson.SetBytes(translated, "stream_options.include_usage", true)
+	if autoInjectedStreamUsage {
+		translated, err = sjson.SetBytes(translated, "stream_options.include_usage", true)
+		if err != nil {
+			return nil, fmt.Errorf("openai compat executor: failed to set stream_options in payload: %w", err)
+		}
+	}
 
 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
@@ -250,6 +260,13 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 		recordAPIResponseError(ctx, e.cfg, err)
 		return nil, err
 	}
+	if retryResp, retryErr := e.retryStreamWithoutInjectedUsage(ctx, auth, httpClient, httpReq, translated, httpResp, autoInjectedStreamUsage); retryResp != nil || retryErr != nil {
+		httpResp = retryResp
+		if retryErr != nil {
+			recordAPIResponseError(ctx, e.cfg, retryErr)
+			return nil, retryErr
+		}
+	}
 	recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
 	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
 		b, _ := io.ReadAll(httpResp.Body)
@@ -304,6 +321,95 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 	return &cliproxyexecutor.StreamResult{Headers: httpResp.Header.Clone(), Chunks: out}, nil
 }
 
+func (e *OpenAICompatExecutor) retryStreamWithoutInjectedUsage(ctx context.Context, auth *cliproxyauth.Auth, httpClient *http.Client, httpReq *http.Request, translated []byte, httpResp *http.Response, autoInjected bool) (*http.Response, error) {
+	if !autoInjected || httpResp == nil {
+		return nil, nil
+	}
+	if httpResp.StatusCode != http.StatusBadRequest && httpResp.StatusCode != http.StatusUnprocessableEntity {
+		return nil, nil
+	}
+	body, err := io.ReadAll(io.LimitReader(httpResp.Body, openAICompatRetryErrorBodyLimit+1))
+	if err != nil {
+		if errClose := httpResp.Body.Close(); errClose != nil {
+			log.Warnf("openai compat executor: failed to close body after read error: %v", errClose)
+		}
+		return nil, err
+	}
+	if errClose := httpResp.Body.Close(); errClose != nil {
+		log.Warnf("openai compat executor: close fallback response body error: %v", errClose)
+	}
+	if len(body) > openAICompatRetryErrorBodyLimit {
+		log.Warnf("openai compat executor: fallback response body exceeded %d bytes; skip retry without include_usage", openAICompatRetryErrorBodyLimit)
+		httpResp.Body = io.NopCloser(bytes.NewReader(body[:openAICompatRetryErrorBodyLimit]))
+		return httpResp, nil
+	}
+	if !isUnsupportedInjectedUsageError(body) {
+		httpResp.Body = io.NopCloser(bytes.NewReader(body))
+		return httpResp, nil
+	}
+	trimmed, err := sjson.DeleteBytes(translated, "stream_options.include_usage")
+	if err != nil {
+		return nil, fmt.Errorf("openai compat executor: failed to remove unsupported stream_options in payload: %w", err)
+	}
+	if streamOptions := gjson.GetBytes(trimmed, "stream_options"); streamOptions.Exists() && len(streamOptions.Map()) == 0 {
+		trimmed, err = sjson.DeleteBytes(trimmed, "stream_options")
+		if err != nil {
+			return nil, fmt.Errorf("openai compat executor: failed to remove empty stream_options in payload: %w", err)
+		}
+	}
+	retryReq, err := http.NewRequestWithContext(ctx, httpReq.Method, httpReq.URL.String(), bytes.NewReader(trimmed))
+	if err != nil {
+		return nil, err
+	}
+	retryReq.Header = httpReq.Header.Clone()
+	var authID, authLabel, authType, authValue string
+	if auth != nil {
+		authID = auth.ID
+		authLabel = auth.Label
+		authType, authValue = auth.AccountInfo()
+	}
+	recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+		URL:       retryReq.URL.String(),
+		Method:    retryReq.Method,
+		Headers:   retryReq.Header.Clone(),
+		Body:      trimmed,
+		Provider:  e.Identifier(),
+		AuthID:    authID,
+		AuthLabel: authLabel,
+		AuthType:  authType,
+		AuthValue: authValue,
+	})
+	return httpClient.Do(retryReq)
+}
+
+func isUnsupportedInjectedUsageError(body []byte) bool {
+	if len(body) == 0 {
+		return false
+	}
+	lower := strings.ToLower(string(body))
+	if !strings.Contains(lower, "stream_options") && !strings.Contains(lower, "include_usage") {
+		return false
+	}
+	unsupportedMarkers := []string{
+		"unknown field",
+		"unknown parameter",
+		"unknown argument",
+		"unrecognized field",
+		"unrecognized parameter",
+		"unsupported field",
+		"unsupported parameter",
+		"not allowed",
+		"not permitted",
+		"extra inputs are not permitted",
+	}
+	for _, marker := range unsupportedMarkers {
+		if strings.Contains(lower, marker) {
+			return true
+		}
+	}
+	return false
+}
+
 func (e *OpenAICompatExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
 	baseModel := thinking.ParseSuffix(req.Model).ModelName