From bc7f5a52cfdce2a56c81a60382418daeb149d171 Mon Sep 17 00:00:00 2001 From: David Gageot Date: Wed, 18 Mar 2026 21:34:34 +0100 Subject: [PATCH] Remove /think command and session-level thinking toggle Thinking is now solely driven by the model's thinking_budget config. If a model has thinking_budget set or requires thinking (e.g. o-series), it uses thinking. There is no longer a per-session toggle. Removed: - /think slash command from TUI - Session.Thinking field and WithThinking option - options.WithThinking model option and applyOverrides - Thinking toggle from sidebar, server API, session state - ThinkingConfigured from agent and teamloader - All related handlers, messages, and tests Assisted-By: docker-agent --- agent-schema.json | 19 +- cmd/root/run.go | 9 +- docs/configuration/models/index.md | 7 - docs/features/api-server/index.md | 1 - docs/features/tui/index.md | 1 - e2e/debug_title_test.go | 4 +- e2e/exec_test.go | 2 +- .../cassettes/TestA2AServer_MultiAgent.yaml | 42 +- .../cassettes/TestDebug_Title/Anthropic.yaml | 17 +- .../TestDebug_Title/Anthropic_Opus46.yaml | 21 +- .../TestDebug_Title/Anthropic_Sonnet45.yaml | 15 +- .../Google_Gemini25FlashLite.yaml | 6 +- .../cassettes/TestDebug_Title/OpenAI.yaml | 18 +- .../cassettes/TestExec_OpenAI_V3Config.yaml | 26 +- .../TestExec_OpenAI_WithThinkingBudget.yaml | 248 +++++++- .../cassettes/TestExec_OpenAI_gpt5.yaml | 263 +++++++- .../cassettes/TestExec_OpenAI_gpt5_1.yaml | 36 +- .../cassettes/TestExec_OpenAI_gpt5_codex.yaml | 340 +++++++++- .../TestExec_ToolCallsNeedAcceptance.yaml | 601 +++++++++++++++++- .../cassettes/TestMCP_MultiAgent.yaml | 335 +++++++++- pkg/acp/agent.go | 1 - pkg/agent/agent.go | 8 - pkg/agent/opts.go | 8 - pkg/api/types.go | 1 - pkg/app/app.go | 3 +- pkg/app/app_test.go | 43 +- pkg/app/app_working_dir_test.go | 2 - pkg/config/latest/types.go | 42 +- pkg/config/latest/types_test.go | 80 ++- pkg/evaluation/eval.go | 1 - pkg/model/provider/anthropic/beta_client.go | 46 +- pkg/model/provider/anthropic/client.go | 78 ++- pkg/model/provider/bedrock/client.go | 56 +- pkg/model/provider/bedrock/client_test.go | 8 +- pkg/model/provider/clone.go | 3 + pkg/model/provider/clone_test.go | 45 +- pkg/model/provider/gemini/client.go | 123 +--- pkg/model/provider/gemini/client_test.go | 124 +--- pkg/model/provider/model_defaults_test.go | 26 + pkg/model/provider/openai/client.go | 77 +-- .../provider/openai/thinking_budget_test.go | 265 +------- pkg/model/provider/options/options.go | 24 +- pkg/model/provider/override_test.go | 267 -------- pkg/model/provider/provider.go | 78 --- pkg/runtime/agent_delegation.go | 1 - pkg/runtime/agent_delegation_test.go | 36 -- pkg/runtime/fallback.go | 10 +- pkg/runtime/fallback_test.go | 90 --- pkg/runtime/loop.go | 10 - pkg/runtime/skill_runner.go | 5 - pkg/server/server.go | 10 - pkg/server/session_manager.go | 17 - pkg/session/branch.go | 1 - pkg/session/migrations.go | 6 +- pkg/session/session.go | 13 - pkg/session/session_options_test.go | 6 - pkg/session/store.go | 16 +- pkg/session/store_test.go | 84 --- pkg/sessiontitle/generator.go | 2 +- pkg/teamloader/teamloader.go | 21 +- pkg/tui/commands/commands.go | 24 +- pkg/tui/components/sidebar/sidebar.go | 54 +- pkg/tui/handlers.go | 38 -- pkg/tui/messages/messages.go | 2 +- pkg/tui/messages/toggle.go | 9 - pkg/tui/page/chat/chat.go | 5 - pkg/tui/service/sessionstate.go | 11 - pkg/tui/tui.go | 25 +- pkg/tui/tui_exit_test.go | 15 +- 69 files changed, 2156 insertions(+), 1775 deletions(-) delete mode 100644 pkg/model/provider/override_test.go diff --git a/agent-schema.json b/agent-schema.json index 6a0f9c558..b1ec92903 100644 --- a/agent-schema.json +++ b/agent-schema.json @@ -554,20 +554,12 @@ "description": "Whether to track usage" }, "thinking_budget": { - "description": "Controls reasoning effort/budget. Use 'none' or 0 to disable thinking. OpenAI: string levels ('minimal','low','medium','high'). Anthropic: integer token budget (1024-32768), 'adaptive' (lets the model decide), or effort levels ('low','medium','high','max') which use adaptive thinking with the given effort. Amazon Bedrock (Claude): integer token budget or effort levels ('low','medium','high') mapped to token budgets. Google Gemini 2.5: integer token budget (-1 for dynamic, 0 to disable, 24576 max). Google Gemini 3: string levels ('minimal' Flash only,'low','medium','high'). Thinking is only enabled when explicitly configured.", + "description": "Controls reasoning effort/budget. Use 'none' or 0 to disable thinking. OpenAI: string levels ('minimal','low','medium','high','xhigh'). Anthropic: integer token budget (1024-32768), 'adaptive' (adaptive thinking with high effort by default), 'adaptive/' where effort is low/medium/high/max (adaptive thinking with specified effort), or effort levels ('low','medium','high','max') which use adaptive thinking with the given effort. Amazon Bedrock (Claude): integer token budget or effort levels ('low','medium','high') mapped to token budgets. Google Gemini 2.5: integer token budget (-1 for dynamic, 0 to disable, 24576 max). Google Gemini 3: string levels ('minimal' Flash only,'low','medium','high'). Thinking is only enabled when explicitly configured.", "oneOf": [ { "type": "string", - "enum": [ - "none", - "minimal", - "low", - "medium", - "high", - "max", - "adaptive" - ], - "description": "Reasoning effort level. 'adaptive'/'max' are Anthropic-specific. Use 'none' to disable thinking." + "pattern": "^(none|minimal|low|medium|high|xhigh|max|adaptive(/low|/medium|/high|/max)?)$", + "description": "Reasoning effort level. 'adaptive' uses adaptive thinking with high effort (default). 'adaptive/' specifies the effort level (low/medium/high/max). Use 'none' to disable thinking." }, { "type": "integer", @@ -583,8 +575,13 @@ "low", "medium", "high", + "xhigh", "max", "adaptive", + "adaptive/low", + "adaptive/medium", + "adaptive/high", + "adaptive/max", -1, 1024, 8192, diff --git a/cmd/root/run.go b/cmd/root/run.go index 52eef0751..ab8ac2096 100644 --- a/cmd/root/run.go +++ b/cmd/root/run.go @@ -404,10 +404,10 @@ func (f *runExecFlags) createLocalRuntimeAndSession(ctx context.Context, loadRes slog.Debug("Loaded existing session", "session_id", resolvedID, "session_ref", f.sessionID, "agent", f.agentName) } else { wd, _ := os.Getwd() - sess = session.New(f.buildSessionOpts(agent.MaxIterations(), agent.ThinkingConfigured(), wd)...) + sess = session.New(f.buildSessionOpts(agent.MaxIterations(), wd)...) // Session is stored lazily on first UpdateSession call (when content is added) // This avoids creating empty sessions in the database - slog.Debug("Using local runtime", "agent", f.agentName, "thinking", agent.ThinkingConfigured()) + slog.Debug("Using local runtime", "agent", f.agentName) } return localRt, sess, nil @@ -494,12 +494,11 @@ func (f *runExecFlags) buildAppOpts(args []string) ([]app.Opt, error) { // buildSessionOpts returns the canonical set of session options derived from // CLI flags and agent configuration. Both the initial session and spawned // sessions use this method so their options never drift apart. -func (f *runExecFlags) buildSessionOpts(maxIterations int, thinking bool, workingDir string) []session.Opt { +func (f *runExecFlags) buildSessionOpts(maxIterations int, workingDir string) []session.Opt { return []session.Opt{ session.WithMaxIterations(maxIterations), session.WithToolsApproved(f.autoApprove), session.WithHideToolResults(f.hideToolResults), - session.WithThinking(thinking), session.WithWorkingDir(workingDir), } } @@ -544,7 +543,7 @@ func (f *runExecFlags) createSessionSpawner(agentSource config.Source, sessStore } // Create a new session - newSess := session.New(f.buildSessionOpts(agent.MaxIterations(), agent.ThinkingConfigured(), workingDir)...) + newSess := session.New(f.buildSessionOpts(agent.MaxIterations(), workingDir)...) // Create cleanup function cleanup := func() { diff --git a/docs/configuration/models/index.md b/docs/configuration/models/index.md index 36d698f5e..da3ffc2d5 100644 --- a/docs/configuration/models/index.md +++ b/docs/configuration/models/index.md @@ -110,13 +110,6 @@ Works for all providers: thinking_budget: none # or 0 ``` -
-
ℹ️ Runtime Toggle -
-

Even when thinking is disabled in config, you can enable it during a session using the /think command in the TUI.

- -
- ## Interleaved Thinking For Anthropic and Bedrock Claude models, interleaved thinking allows tool calls during model reasoning. This is enabled by default: diff --git a/docs/features/api-server/index.md b/docs/features/api-server/index.md index 41c78dc14..ff69f4f00 100644 --- a/docs/features/api-server/index.md +++ b/docs/features/api-server/index.md @@ -49,7 +49,6 @@ All endpoints are under the `/api` prefix. | `PATCH` | `/api/sessions/:id/permissions` | Update session permissions | | `POST` | `/api/sessions/:id/resume` | Resume a paused session (after tool confirmation) | | `POST` | `/api/sessions/:id/tools/toggle` | Toggle auto-approve (YOLO) mode | -| `POST` | `/api/sessions/:id/thinking/toggle` | Toggle thinking/reasoning mode | | `POST` | `/api/sessions/:id/elicitation` | Respond to an MCP tool elicitation request | ### Agent Execution diff --git a/docs/features/tui/index.md b/docs/features/tui/index.md index 9c134d89d..3d12cae9e 100644 --- a/docs/features/tui/index.md +++ b/docs/features/tui/index.md @@ -41,7 +41,6 @@ Type `/` during a session to see available commands, or press Ctrl+" where effort is low/medium/high/max (Anthropic adaptive with specified effort) // - Integer: token count (for Anthropic, range 1024-32768) type ThinkingBudget struct { // Effort stores string-based reasoning effort levels @@ -684,15 +685,25 @@ type ThinkingBudget struct { } // validThinkingEfforts lists all accepted string values for thinking_budget. -const validThinkingEfforts = "none, minimal, low, medium, high, max, adaptive" +const validThinkingEfforts = "none, minimal, low, medium, high, xhigh, max, adaptive, adaptive/" + +// validAdaptiveEfforts lists the accepted effort levels for adaptive thinking. +var validAdaptiveEfforts = map[string]bool{ + "low": true, "medium": true, "high": true, "max": true, +} // isValidThinkingEffort reports whether s (case-insensitive, trimmed) is a // recognised thinking_budget effort level. func isValidThinkingEffort(s string) bool { - switch strings.ToLower(strings.TrimSpace(s)) { - case "none", "minimal", "low", "medium", "high", "max", "adaptive": + norm := strings.ToLower(strings.TrimSpace(s)) + switch norm { + case "none", "minimal", "low", "medium", "high", "xhigh", "max", "adaptive": return true default: + // Support "adaptive/" format (e.g. "adaptive/high") + if after, ok := strings.CutPrefix(norm, "adaptive/"); ok { + return validAdaptiveEfforts[after] + } return false } } @@ -752,11 +763,28 @@ func (t *ThinkingBudget) IsDisabled() bool { // IsAdaptive returns true if the thinking budget is set to adaptive mode. // Adaptive thinking lets the model decide how much thinking to do. +// Matches both "adaptive" and "adaptive/" formats. func (t *ThinkingBudget) IsAdaptive() bool { if t == nil { return false } - return strings.EqualFold(t.Effort, "adaptive") + norm := strings.ToLower(strings.TrimSpace(t.Effort)) + return norm == "adaptive" || strings.HasPrefix(norm, "adaptive/") +} + +// AdaptiveEffort returns the effort level for adaptive thinking. +// For "adaptive" it returns the default ("high"). +// For "adaptive/" it returns the specified effort. +// Returns ("", false) if the budget is not adaptive. +func (t *ThinkingBudget) AdaptiveEffort() (string, bool) { + if !t.IsAdaptive() { + return "", false + } + norm := strings.ToLower(strings.TrimSpace(t.Effort)) + if after, ok := strings.CutPrefix(norm, "adaptive/"); ok && after != "" { + return after, true + } + return "high", true } // EffortTokens maps a string effort level to a token budget for providers diff --git a/pkg/config/latest/types_test.go b/pkg/config/latest/types_test.go index c69e87100..2dbf67edb 100644 --- a/pkg/config/latest/types_test.go +++ b/pkg/config/latest/types_test.go @@ -148,14 +148,51 @@ func TestThinkingBudget_IsDisabled(t *testing.T) { func TestThinkingBudget_UnmarshalYAML_InvalidEffort(t *testing.T) { t.Parallel() - input := []byte(`thinking_budget: adaptative`) - var config struct { - ThinkingBudget *ThinkingBudget `yaml:"thinking_budget"` + for _, tt := range []struct { + name string + input string + }{ + {"typo", `thinking_budget: adaptative`}, + {"invalid adaptive effort", `thinking_budget: adaptive/ultra`}, + } { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + var config struct { + ThinkingBudget *ThinkingBudget `yaml:"thinking_budget"` + } + err := yaml.Unmarshal([]byte(tt.input), &config) + require.Error(t, err) + require.Contains(t, err.Error(), "invalid thinking_budget effort") + }) } +} - err := yaml.Unmarshal(input, &config) - require.Error(t, err) - require.Contains(t, err.Error(), `invalid thinking_budget effort "adaptative"`) +func TestThinkingBudget_UnmarshalYAML_AdaptiveWithEffort(t *testing.T) { + t.Parallel() + + for _, tt := range []struct { + name string + input string + wantEffort string + }{ + {"adaptive", `thinking_budget: adaptive`, "adaptive"}, + {"adaptive/low", `thinking_budget: adaptive/low`, "adaptive/low"}, + {"adaptive/medium", `thinking_budget: adaptive/medium`, "adaptive/medium"}, + {"adaptive/high", `thinking_budget: adaptive/high`, "adaptive/high"}, + {"adaptive/max", `thinking_budget: adaptive/max`, "adaptive/max"}, + } { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + var config struct { + ThinkingBudget *ThinkingBudget `yaml:"thinking_budget"` + } + err := yaml.Unmarshal([]byte(tt.input), &config) + require.NoError(t, err) + require.NotNil(t, config.ThinkingBudget) + require.Equal(t, tt.wantEffort, config.ThinkingBudget.Effort) + require.True(t, config.ThinkingBudget.IsAdaptive()) + }) + } } func TestThinkingBudget_UnmarshalJSON_InvalidEffort(t *testing.T) { @@ -181,6 +218,10 @@ func TestThinkingBudget_IsAdaptive(t *testing.T) { }{ {"nil", nil, false}, {"adaptive", &ThinkingBudget{Effort: "adaptive"}, true}, + {"adaptive/high", &ThinkingBudget{Effort: "adaptive/high"}, true}, + {"adaptive/low", &ThinkingBudget{Effort: "adaptive/low"}, true}, + {"adaptive/medium", &ThinkingBudget{Effort: "adaptive/medium"}, true}, + {"adaptive/max", &ThinkingBudget{Effort: "adaptive/max"}, true}, {"medium", &ThinkingBudget{Effort: "medium"}, false}, {"tokens", &ThinkingBudget{Tokens: 8192}, false}, } { @@ -191,6 +232,33 @@ func TestThinkingBudget_IsAdaptive(t *testing.T) { } } +func TestThinkingBudget_AdaptiveEffort(t *testing.T) { + t.Parallel() + + for _, tt := range []struct { + name string + b *ThinkingBudget + wantEffort string + wantOK bool + }{ + {"nil", nil, "", false}, + {"adaptive defaults to high", &ThinkingBudget{Effort: "adaptive"}, "high", true}, + {"adaptive/low", &ThinkingBudget{Effort: "adaptive/low"}, "low", true}, + {"adaptive/medium", &ThinkingBudget{Effort: "adaptive/medium"}, "medium", true}, + {"adaptive/high", &ThinkingBudget{Effort: "adaptive/high"}, "high", true}, + {"adaptive/max", &ThinkingBudget{Effort: "adaptive/max"}, "max", true}, + {"not adaptive", &ThinkingBudget{Effort: "medium"}, "", false}, + {"tokens", &ThinkingBudget{Tokens: 8192}, "", false}, + } { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + effort, ok := tt.b.AdaptiveEffort() + require.Equal(t, tt.wantOK, ok) + require.Equal(t, tt.wantEffort, effort) + }) + } +} + func TestThinkingBudget_EffortTokens(t *testing.T) { t.Parallel() diff --git a/pkg/evaluation/eval.go b/pkg/evaluation/eval.go index fd8030c62..a55bc301c 100644 --- a/pkg/evaluation/eval.go +++ b/pkg/evaluation/eval.go @@ -625,7 +625,6 @@ func createJudgeModel(ctx context.Context, judgeModel string, runConfig *config. } opts := []options.Opt{ - options.WithThinking(false), options.WithStructuredOutput(judgeResponseSchema), } if runConfig.ModelsGateway != "" { diff --git a/pkg/model/provider/anthropic/beta_client.go b/pkg/model/provider/anthropic/beta_client.go index ae9286ec9..8be726620 100644 --- a/pkg/model/provider/anthropic/beta_client.go +++ b/pkg/model/provider/anthropic/beta_client.go @@ -91,45 +91,19 @@ func (c *Client) createBetaStream( } } - // Configure thinking if not explicitly disabled via /think command - // For interleaved thinking to make sense, we use a default of 16384 tokens for the thinking budget - thinkingEnabled := c.ModelOptions.Thinking() == nil || *c.ModelOptions.Thinking() - if thinkingEnabled { - if c.ModelConfig.ThinkingBudget != nil && c.ModelConfig.ThinkingBudget.IsAdaptive() { - // Adaptive thinking: let the model decide how much thinking to do + // Configure thinking if a thinking budget is set in the model config. + // The beta client is also used for structured output and file attachments, + // which don't require thinking. + if budget := c.ModelConfig.ThinkingBudget; budget != nil { + if effort, ok := anthropicThinkingEffort(budget); ok { adaptive := anthropic.NewBetaThinkingConfigAdaptiveParam() - params.Thinking = anthropic.BetaThinkingConfigParamUnion{ - OfAdaptive: &adaptive, - } - slog.Debug("Anthropic Beta API using adaptive thinking") - } else if effort, ok := anthropicEffort(c.ModelConfig.ThinkingBudget); ok { - // Effort level: use adaptive thinking + output_config.effort - adaptive := anthropic.NewBetaThinkingConfigAdaptiveParam() - params.Thinking = anthropic.BetaThinkingConfigParamUnion{ - OfAdaptive: &adaptive, - } + params.Thinking = anthropic.BetaThinkingConfigParamUnion{OfAdaptive: &adaptive} params.OutputConfig.Effort = anthropic.BetaOutputConfigEffort(effort) - slog.Debug("Anthropic Beta API using adaptive thinking with effort", - "effort", effort) - } else { - thinkingTokens := int64(16384) - if c.ModelConfig.ThinkingBudget != nil { - thinkingTokens = int64(c.ModelConfig.ThinkingBudget.Tokens) - } else { - slog.Info("Anthropic Beta API using default thinking_budget with interleaved thinking", "budget_tokens", thinkingTokens) - } - switch { - case thinkingTokens >= 1024 && thinkingTokens < maxTokens: - params.Thinking = anthropic.BetaThinkingConfigParamOfEnabled(thinkingTokens) - slog.Debug("Anthropic Beta API using thinking_budget with interleaved thinking", "budget_tokens", thinkingTokens) - case thinkingTokens >= maxTokens: - slog.Warn("Anthropic Beta API thinking_budget must be less than max_tokens, ignoring", "tokens", thinkingTokens, "max_tokens", maxTokens) - default: - slog.Warn("Anthropic Beta API thinking_budget below minimum (1024), ignoring", "tokens", thinkingTokens) - } + slog.Debug("Anthropic Beta API using adaptive thinking", "effort", effort) + } else if tokens, ok := validThinkingTokens(int64(budget.Tokens), maxTokens); ok { + params.Thinking = anthropic.BetaThinkingConfigParamOfEnabled(tokens) + slog.Debug("Anthropic Beta API using thinking_budget", "budget_tokens", tokens) } - } else { - slog.Debug("Anthropic Beta API: Thinking disabled via /think command") } if len(requestTools) > 0 { diff --git a/pkg/model/provider/anthropic/client.go b/pkg/model/provider/anthropic/client.go index 8678d8437..05554a5cb 100644 --- a/pkg/model/provider/anthropic/client.go +++ b/pkg/model/provider/anthropic/client.go @@ -51,14 +51,14 @@ func (c *Client) getResponseTrailer() http.Header { // Anthropic's max_tokens represents the combined budget for thinking + output tokens. // Returns the adjusted maxTokens value and an error if user-set max_tokens is too low. // -// This only applies to fixed token budgets. Adaptive thinking and effort-based -// budgets don't need adjustment since the model manages its own thinking allocation. +// Only fixed token budgets need adjustment. Adaptive and effort-based budgets +// don't need it since the model manages its own thinking allocation. func (c *Client) adjustMaxTokensForThinking(maxTokens int64) (int64, error) { - if c.ModelConfig.ThinkingBudget == nil || c.ModelConfig.ThinkingBudget.IsAdaptive() { + if c.ModelConfig.ThinkingBudget == nil { return maxTokens, nil } - // Effort-based budgets use adaptive thinking — no token adjustment needed. - if _, ok := anthropicEffort(c.ModelConfig.ThinkingBudget); ok { + // Adaptive and effort-based budgets: no token adjustment needed. + if _, ok := anthropicThinkingEffort(c.ModelConfig.ThinkingBudget); ok { return maxTokens, nil } @@ -308,35 +308,17 @@ func (c *Client) CreateChatCompletionStream( // Apply thinking budget first, as it affects whether we can set temperature thinkingEnabled := false - if c.ModelConfig.ThinkingBudget != nil && c.ModelConfig.ThinkingBudget.IsAdaptive() { - // Adaptive thinking: let the model decide how much thinking to do - adaptive := anthropic.NewThinkingConfigAdaptiveParam() - params.Thinking = anthropic.ThinkingConfigParamUnion{ - OfAdaptive: &adaptive, - } - thinkingEnabled = true - slog.Debug("Anthropic API using adaptive thinking (standard messages)") - } else if effort, ok := anthropicEffort(c.ModelConfig.ThinkingBudget); ok { - // Effort level: use adaptive thinking + output_config.effort - adaptive := anthropic.NewThinkingConfigAdaptiveParam() - params.Thinking = anthropic.ThinkingConfigParamUnion{ - OfAdaptive: &adaptive, - } - params.OutputConfig.Effort = anthropic.OutputConfigEffort(effort) - thinkingEnabled = true - slog.Debug("Anthropic API using adaptive thinking with effort", - "effort", effort) - } else if c.ModelConfig.ThinkingBudget != nil && c.ModelConfig.ThinkingBudget.Tokens > 0 { - thinkingTokens := int64(c.ModelConfig.ThinkingBudget.Tokens) - switch { - case thinkingTokens >= 1024 && thinkingTokens < maxTokens: - params.Thinking = anthropic.ThinkingConfigParamOfEnabled(thinkingTokens) + if budget := c.ModelConfig.ThinkingBudget; budget != nil { + if effort, ok := anthropicThinkingEffort(budget); ok { + adaptive := anthropic.NewThinkingConfigAdaptiveParam() + params.Thinking = anthropic.ThinkingConfigParamUnion{OfAdaptive: &adaptive} + params.OutputConfig.Effort = anthropic.OutputConfigEffort(effort) + thinkingEnabled = true + slog.Debug("Anthropic API using adaptive thinking", "effort", effort) + } else if tokens, ok := validThinkingTokens(int64(budget.Tokens), maxTokens); ok { + params.Thinking = anthropic.ThinkingConfigParamOfEnabled(tokens) thinkingEnabled = true - slog.Debug("Anthropic API using thinking_budget (standard messages)", "budget_tokens", thinkingTokens) - case thinkingTokens >= maxTokens: - slog.Warn("Anthropic thinking_budget must be less than max_tokens, ignoring", "tokens", thinkingTokens, "max_tokens", maxTokens) - default: - slog.Warn("Anthropic thinking_budget below minimum (1024), ignoring", "tokens", thinkingTokens) + slog.Debug("Anthropic API using thinking_budget", "budget_tokens", tokens) } } @@ -924,6 +906,36 @@ func differenceIDs(a, b map[string]struct{}) []string { return missing } +// validThinkingTokens validates that the token budget is within the +// acceptable range for Anthropic (>= 1024 and < maxTokens). +// Returns (tokens, true) if valid, or (0, false) with a warning log if not. +func validThinkingTokens(tokens, maxTokens int64) (int64, bool) { + if tokens < 1024 { + slog.Warn("Anthropic thinking_budget below minimum (1024), ignoring", "tokens", tokens) + return 0, false + } + if tokens >= maxTokens { + slog.Warn("Anthropic thinking_budget must be less than max_tokens, ignoring", "tokens", tokens, "max_tokens", maxTokens) + return 0, false + } + return tokens, true +} + +// anthropicThinkingEffort returns the Anthropic API effort level for the given +// ThinkingBudget. It covers both explicit adaptive mode ("adaptive", +// "adaptive/") and string effort levels ("low", "medium", "high", "max") +// that Anthropic maps to adaptive thinking. Returns ("", false) when the +// budget uses token counts or is nil. +func anthropicThinkingEffort(b *latest.ThinkingBudget) (string, bool) { + if b == nil { + return "", false + } + if effort, ok := b.AdaptiveEffort(); ok { + return effort, true + } + return anthropicEffort(b) +} + // anthropicEffort maps a ThinkingBudget effort string to an Anthropic API // effort level ("low", "medium", "high", "max"). Returns ("", false) when // the budget uses token counts, adaptive mode, or an unrecognised string. diff --git a/pkg/model/provider/bedrock/client.go b/pkg/model/provider/bedrock/client.go index 135659049..a083f1d47 100644 --- a/pkg/model/provider/bedrock/client.go +++ b/pkg/model/provider/bedrock/client.go @@ -236,23 +236,24 @@ func (c *Client) buildConverseStreamInput(messages []chat.Message, requestTools // Convert and set messages (excluding system) input.Messages, input.System = convertMessages(messages, enableCaching) - // Set inference configuration - input.InferenceConfig = c.buildInferenceConfig() + // Compute thinking fields first — its presence drives the inference config. + additionalFields := c.buildAdditionalModelRequestFields() + if additionalFields != nil { + input.AdditionalModelRequestFields = additionalFields + } + + // Set inference configuration (temp/topP are suppressed when thinking is on). + input.InferenceConfig = c.buildInferenceConfig(additionalFields != nil) // Convert and set tools if len(requestTools) > 0 { input.ToolConfig = convertToolConfig(requestTools, enableCaching) } - // Set extended thinking configuration for Claude models - if additionalFields := c.buildAdditionalModelRequestFields(); additionalFields != nil { - input.AdditionalModelRequestFields = additionalFields - } - return input } -func (c *Client) buildInferenceConfig() *types.InferenceConfiguration { +func (c *Client) buildInferenceConfig(thinkingEnabled bool) *types.InferenceConfiguration { cfg := &types.InferenceConfiguration{} if c.ModelConfig.MaxTokens != nil && *c.ModelConfig.MaxTokens > 0 { @@ -261,7 +262,7 @@ func (c *Client) buildInferenceConfig() *types.InferenceConfiguration { // Temperature and TopP cannot be set when extended thinking is enabled // (Claude requires temperature=1.0 which is the default when thinking is on) - if !c.isThinkingEnabled() { + if !thinkingEnabled { if c.ModelConfig.Temperature != nil { cfg.Temperature = aws.Float32(float32(*c.ModelConfig.Temperature)) } @@ -275,35 +276,6 @@ func (c *Client) buildInferenceConfig() *types.InferenceConfiguration { return cfg } -// resolveThinkingTokens returns the effective token budget for thinking. -// It handles both explicit token counts and effort-level strings. -// Returns 0 if no valid thinking budget is configured. -func (c *Client) resolveThinkingTokens() int { - if c.ModelConfig.ThinkingBudget == nil { - return 0 - } - if tokens, ok := c.ModelConfig.ThinkingBudget.EffortTokens(); ok { - return tokens - } - return c.ModelConfig.ThinkingBudget.Tokens -} - -// isThinkingEnabled mirrors the validation in buildAdditionalModelRequestFields -// to determine if thinking params will affect inference config (temp/topP constraints). -func (c *Client) isThinkingEnabled() bool { - tokens := c.resolveThinkingTokens() - if tokens < 1024 { - return false - } - - // Check against max_tokens - if c.ModelConfig.MaxTokens != nil && tokens >= int(*c.ModelConfig.MaxTokens) { - return false - } - - return true -} - func (c *Client) interleavedThinkingEnabled() bool { return getProviderOpt[bool](c.ModelConfig.ProviderOpts, "interleaved_thinking") } @@ -317,7 +289,13 @@ func (c *Client) promptCachingEnabled() bool { // buildAdditionalModelRequestFields configures Claude's extended thinking (reasoning) mode. func (c *Client) buildAdditionalModelRequestFields() document.Interface { - tokens := c.resolveThinkingTokens() + if c.ModelConfig.ThinkingBudget == nil { + return nil + } + tokens := c.ModelConfig.ThinkingBudget.Tokens + if t, ok := c.ModelConfig.ThinkingBudget.EffortTokens(); ok { + tokens = t + } if tokens <= 0 { return nil } diff --git a/pkg/model/provider/bedrock/client_test.go b/pkg/model/provider/bedrock/client_test.go index dd3bb462d..9c7e0e2bf 100644 --- a/pkg/model/provider/bedrock/client_test.go +++ b/pkg/model/provider/bedrock/client_test.go @@ -740,9 +740,7 @@ func TestBuildInferenceConfig_DisablesTempTopPWhenThinkingEnabled(t *testing.T) }, } - cfg := client.buildInferenceConfig() - - // Claude requires temperature=1.0 when thinking is on, so we don't set it + cfg := client.buildInferenceConfig(true) assert.Nil(t, cfg.Temperature, "temperature should be nil when thinking is enabled") assert.Nil(t, cfg.TopP, "topP should be nil when thinking is enabled") assert.NotNil(t, cfg.MaxTokens) @@ -769,7 +767,7 @@ func TestBuildInferenceConfig_SetsTempTopPWhenThinkingNotConfigured(t *testing.T }, } - cfg := client.buildInferenceConfig() + cfg := client.buildInferenceConfig(false) require.NotNil(t, cfg.Temperature) assert.InDelta(t, 0.7, *cfg.Temperature, 0.01) @@ -799,7 +797,7 @@ func TestBuildInferenceConfig_SetsTempTopPWhenThinkingBudgetInvalid(t *testing.T }, } - cfg := client.buildInferenceConfig() + cfg := client.buildInferenceConfig(false) // thinking not enabled (budget below minimum) require.NotNil(t, cfg.Temperature) assert.InDelta(t, 0.7, *cfg.Temperature, 0.01) diff --git a/pkg/model/provider/clone.go b/pkg/model/provider/clone.go index 1e0809435..2b8a5219d 100644 --- a/pkg/model/provider/clone.go +++ b/pkg/model/provider/clone.go @@ -27,6 +27,9 @@ func CloneWithOptions(ctx context.Context, base Provider, opts ...options.Opt) P if mt := tempOpts.MaxTokens(); mt != 0 { modelConfig.MaxTokens = &mt } + if tempOpts.NoThinking() { + modelConfig.ThinkingBudget = nil + } } // Use NewWithModels to support cloning routers that reference other models. diff --git a/pkg/model/provider/clone_test.go b/pkg/model/provider/clone_test.go index 80facc51c..e5cf49bc4 100644 --- a/pkg/model/provider/clone_test.go +++ b/pkg/model/provider/clone_test.go @@ -33,8 +33,7 @@ func TestCloneWithOptions_RouterWithModelReferences(t *testing.T) { // This test verifies that cloning a router with model references works correctly. // Previously, CloneWithOptions would fail silently because it called New() instead // of NewWithModels(), which meant the models map was nil and model references - // like "fast" couldn't be resolved. The clone would fail and return the original - // provider, causing options like WithThinking(false) to have no effect. + // like "fast" couldn't be resolved. // Create a mock server that returns a minimal valid response server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { @@ -85,19 +84,14 @@ func TestCloneWithOptions_RouterWithModelReferences(t *testing.T) { baseConfig := router.BaseConfig() require.NotNil(t, baseConfig.Models, "Router should store models map in base config") - // Clone with thinking disabled - this should succeed and not fall back to original - cloned := CloneWithOptions(t.Context(), router, options.WithThinking(false)) + // Clone with max tokens option - this should succeed and not fall back to original + newMaxTokens := int64(4096) + cloned := CloneWithOptions(t.Context(), router, options.WithMaxTokens(newMaxTokens)) - // The clone should have the thinking option applied - // We verify this by checking that the clone's base config has nil ThinkingBudget - // (since WithThinking(false) clears thinking configuration) + // The clone should have the option applied clonedConfig := cloned.BaseConfig() - - // The key assertion: ThinkingBudget should be nil because WithThinking(false) was applied - // If the clone failed silently, this would still have the default "medium" from applyOpenAIDefaults - assert.Nil(t, clonedConfig.ModelConfig.ThinkingBudget, - "ThinkingBudget should be nil after cloning with WithThinking(false); "+ - "if it's not nil, the clone may have failed silently and returned the original provider") + require.NotNil(t, clonedConfig.ModelConfig.MaxTokens) + assert.Equal(t, newMaxTokens, *clonedConfig.ModelConfig.MaxTokens) // Also verify the models map is preserved in the clone assert.NotNil(t, clonedConfig.Models, "Cloned router should preserve models map") @@ -115,12 +109,11 @@ func TestCloneWithOptions_DirectProvider(t *testing.T) { })) defer server.Close() - // Test that cloning a non-router provider still works correctly + // Test that cloning a non-router provider works correctly cfg := &latest.ModelConfig{ - Provider: "openai", - Model: "gpt-4o", - BaseURL: server.URL, - ThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, + Provider: "openai", + Model: "gpt-4o", + BaseURL: server.URL, } env := newCloneTestEnv(map[string]string{ @@ -130,20 +123,20 @@ func TestCloneWithOptions_DirectProvider(t *testing.T) { provider, err := New(t.Context(), cfg, env) require.NoError(t, err) - // Clone with thinking disabled - cloned := CloneWithOptions(t.Context(), provider, options.WithThinking(false)) + // Clone with max tokens + newMaxTokens := int64(2048) + cloned := CloneWithOptions(t.Context(), provider, options.WithMaxTokens(newMaxTokens)) clonedConfig := cloned.BaseConfig() - assert.Nil(t, clonedConfig.ModelConfig.ThinkingBudget, - "ThinkingBudget should be nil after cloning with WithThinking(false)") + require.NotNil(t, clonedConfig.ModelConfig.MaxTokens) + assert.Equal(t, newMaxTokens, *clonedConfig.ModelConfig.MaxTokens) } func TestCloneWithOptions_PreservesMaxTokens(t *testing.T) { t.Parallel() // This test verifies that max_tokens is preserved when cloning a provider - // with options that don't explicitly set max_tokens. Previously, options - // that didn't set max_tokens would accidentally clear it to 0. + // with options that don't explicitly set max_tokens. server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.Header().Set("Content-Type", "text/event-stream") @@ -167,8 +160,8 @@ func TestCloneWithOptions_PreservesMaxTokens(t *testing.T) { provider, err := New(t.Context(), cfg, env, options.WithMaxTokens(maxTokens)) require.NoError(t, err) - // Clone with an option that doesn't affect max_tokens (e.g., WithThinking) - cloned := CloneWithOptions(t.Context(), provider, options.WithThinking(false)) + // Clone with an option that doesn't affect max_tokens (e.g., WithGeneratingTitle) + cloned := CloneWithOptions(t.Context(), provider, options.WithGeneratingTitle()) clonedConfig := cloned.BaseConfig() diff --git a/pkg/model/provider/gemini/client.go b/pkg/model/provider/gemini/client.go index 5ea920614..1cf2f2713 100644 --- a/pkg/model/provider/gemini/client.go +++ b/pkg/model/provider/gemini/client.go @@ -351,30 +351,14 @@ func (c *Client) buildConfig() *genai.GenerateContentConfig { } // Apply thinking configuration for Gemini models. - // Per official docs: https://ai.google.dev/gemini-api/docs/thinking - // - // Gemini 2.5 models use token-based configuration (thinkingBudget): - // - Set thinkingBudget to 0 to disable thinking - // - Set thinkingBudget to -1 for dynamic thinking (model decides) - // - Set to a specific value for a fixed token budget - // (max 24576 for most models, 32768 for Gemini 2.5 Pro) - // - // Gemini 3 models use level-based configuration (thinkingLevel): - // - Gemini 3 Pro: "low", "high" - // - Gemini 3 Flash: "minimal", "low", "medium", "high" - // - // When thinking is explicitly disabled via ModelOptions (e.g., for title generation), - // we set ThinkingBudget to 0 to disable thinking completely. This is required for - // operations where max_tokens is very low and thinking would cause the request to - // hang or fail. IncludeThoughts=false is also set to ensure no thinking content - // is returned. - if thinking := c.ModelOptions.Thinking(); thinking != nil && !*thinking { + // See https://ai.google.dev/gemini-api/docs/thinking + if c.ModelOptions.NoThinking() { + // NoThinking requested (e.g. title generation). For Gemini 3+ models + // that always think, use the lowest level and bump MaxOutputTokens so + // internal reasoning doesn't consume the entire budget. Gemini 2.5 and + // older can fully disable thinking with ThinkingBudget=0. model := strings.ToLower(c.ModelConfig.Model) if isGemini3PlusModel(model) { - // Gemini 3 models require thinking — they reject ThinkingBudget=0. - // Use the lowest level instead and bump MaxOutputTokens so that - // even a tiny caller budget (e.g. 20 for title generation) leaves - // room for the model's internal reasoning. config.ThinkingConfig = &genai.ThinkingConfig{ IncludeThoughts: false, ThinkingLevel: genai.ThinkingLevelLow, @@ -383,29 +367,19 @@ func (c *Client) buildConfig() *genai.GenerateContentConfig { if config.MaxOutputTokens < minOutputTokens { config.MaxOutputTokens = minOutputTokens } - slog.Debug("Gemini 3 thinking reduced to low (cannot be fully disabled)", - "model", c.ModelConfig.Model, - "max_output_tokens", config.MaxOutputTokens, - ) } else { - // Gemini 2.5 and older: ThinkingBudget=0 disables thinking. config.ThinkingConfig = &genai.ThinkingConfig{ IncludeThoughts: false, ThinkingBudget: new(int32(0)), } - slog.Debug("Gemini thinking explicitly disabled via ModelOptions", - "model", c.ModelConfig.Model, - "max_output_tokens", config.MaxOutputTokens, - ) } } else if c.ModelConfig.ThinkingBudget != nil { - c.applyThinkingConfig(config) - } else { - slog.Debug("Gemini buildConfig: no thinking configuration applied", - "model", c.ModelConfig.Model, - "thinking_option", c.ModelOptions.Thinking(), - "thinking_budget", c.ModelConfig.ThinkingBudget, - ) + config.ThinkingConfig = &genai.ThinkingConfig{IncludeThoughts: true} + if isGemini3PlusModel(strings.ToLower(c.ModelConfig.Model)) { + c.applyGemini3ThinkingLevel(config) + } else { + c.applyGemini25ThinkingBudget(config) + } } if structuredOutput := c.ModelOptions.StructuredOutput(); structuredOutput != nil { @@ -416,41 +390,11 @@ func (c *Client) buildConfig() *genai.GenerateContentConfig { return config } -// applyThinkingConfig applies the appropriate thinking configuration based on model type. -func (c *Client) applyThinkingConfig(config *genai.GenerateContentConfig) { - if config.ThinkingConfig == nil { - config.ThinkingConfig = &genai.ThinkingConfig{} - } - config.ThinkingConfig.IncludeThoughts = true - - model := strings.ToLower(c.ModelConfig.Model) - - // Gemini 3+ models use ThinkingLevel (effort-based) - if isGemini3PlusModel(model) { - c.applyGemini3ThinkingLevel(config) - return - } - - // Gemini 2.5 and other models use ThinkingBudget (token-based) - c.applyGemini25ThinkingBudget(config) -} - // applyGemini3ThinkingLevel applies level-based thinking for Gemini 3 models. func (c *Client) applyGemini3ThinkingLevel(config *genai.GenerateContentConfig) { - effort := strings.ToLower(c.ModelConfig.ThinkingBudget.Effort) - - var level genai.ThinkingLevel - switch effort { - case "minimal": - level = genai.ThinkingLevelMinimal - case "low": - level = genai.ThinkingLevelLow - case "medium": - level = genai.ThinkingLevelMedium - case "high": - level = genai.ThinkingLevelHigh - default: - // If effort is not set but tokens are, fall back to token-based config + level, ok := gemini3ThinkingLevel(c.ModelConfig.ThinkingBudget.Effort) + if !ok { + // No recognized effort string — fall back to token-based if tokens are set. if c.ModelConfig.ThinkingBudget.Tokens != 0 { slog.Warn("Gemini 3 models use thinkingLevel, not thinkingBudget tokens; falling back to token-based config", "model", c.ModelConfig.Model, @@ -459,36 +403,37 @@ func (c *Client) applyGemini3ThinkingLevel(config *genai.GenerateContentConfig) c.applyGemini25ThinkingBudget(config) return } - // Default to high if no valid effort specified - level = genai.ThinkingLevelHigh - slog.Debug("Gemini 3 using default thinking level", - "model", c.ModelConfig.Model, - "level", "high", - ) - config.ThinkingConfig.ThinkingLevel = level - return + level = genai.ThinkingLevelHigh // default } config.ThinkingConfig.ThinkingLevel = level slog.Debug("Gemini 3 request using thinkingLevel", "model", c.ModelConfig.Model, - "level", effort, + "level", level, ) } +// gemini3ThinkingLevel maps an effort string to a Gemini 3 ThinkingLevel. +func gemini3ThinkingLevel(effort string) (genai.ThinkingLevel, bool) { + switch strings.ToLower(strings.TrimSpace(effort)) { + case "minimal": + return genai.ThinkingLevelMinimal, true + case "low": + return genai.ThinkingLevelLow, true + case "medium": + return genai.ThinkingLevelMedium, true + case "high": + return genai.ThinkingLevelHigh, true + default: + return "", false + } +} + // applyGemini25ThinkingBudget applies token-based thinking for Gemini 2.5 and other models. func (c *Client) applyGemini25ThinkingBudget(config *genai.GenerateContentConfig) { tokens := c.ModelConfig.ThinkingBudget.Tokens config.ThinkingConfig.ThinkingBudget = new(int32(tokens)) - - switch tokens { - case 0: - slog.Debug("Gemini request with thinking disabled", "budget_tokens", tokens) - case -1: - slog.Debug("Gemini request with dynamic thinking", "budget_tokens", tokens) - default: - slog.Debug("Gemini request using thinking_budget", "budget_tokens", tokens) - } + slog.Debug("Gemini request using thinking_budget", "budget_tokens", tokens) } // convertToolsToGemini converts tools to Gemini format diff --git a/pkg/model/provider/gemini/client_test.go b/pkg/model/provider/gemini/client_test.go index 03e42df97..2159bbcbf 100644 --- a/pkg/model/provider/gemini/client_test.go +++ b/pkg/model/provider/gemini/client_test.go @@ -10,7 +10,6 @@ import ( "github.com/docker/docker-agent/pkg/chat" "github.com/docker/docker-agent/pkg/config/latest" "github.com/docker/docker-agent/pkg/model/provider/base" - "github.com/docker/docker-agent/pkg/model/provider/options" "github.com/docker/docker-agent/pkg/tools" ) @@ -294,124 +293,6 @@ func TestBuildConfig_CaseInsensitiveModel(t *testing.T) { } } -func TestBuildConfig_ThinkingExplicitlyDisabled(t *testing.T) { - t.Parallel() - - // Test that when ModelOptions.Thinking() returns false, thinking is explicitly disabled. - // This is important for operations like title generation where max_tokens is very low. - tests := []struct { - name string - model string - thinkingBudget *latest.ThinkingBudget // Would normally enable thinking - expectBudgetZero bool // Gemini 2.5: ThinkingBudget=0 - expectLevelLow bool // Gemini 3: ThinkingLevelLow (cannot fully disable) - expectMinMaxTokens int32 // Gemini 3: bumped MaxOutputTokens - }{ - { - name: "gemini-3-flash-preview with thinking budget but disabled via options", - model: "gemini-3-flash-preview", - thinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, - expectLevelLow: true, - expectMinMaxTokens: 200, - }, - { - name: "gemini-2.5-flash with thinking budget but disabled via options", - model: "gemini-2.5-flash", - thinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - expectBudgetZero: true, - }, - { - name: "gemini-3-pro with nil thinking budget but disabled via options", - model: "gemini-3-pro", - thinkingBudget: nil, // Even without explicit budget, Gemini 3 may use thinking by default - expectLevelLow: true, - expectMinMaxTokens: 200, - }, - { - name: "gemini-3.1-pro-preview with thinking budget but disabled via options", - model: "gemini-3.1-pro-preview", - thinkingBudget: &latest.ThinkingBudget{Effort: "high"}, - expectLevelLow: true, - expectMinMaxTokens: 200, - }, - { - name: "gemini-3.1-flash-preview with thinking budget but disabled via options", - model: "gemini-3.1-flash-preview", - thinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, - expectLevelLow: true, - expectMinMaxTokens: 200, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - - // Create ModelOptions with thinking explicitly disabled - var modelOpts options.ModelOptions - options.WithThinking(false)(&modelOpts) - - client := &Client{ - Config: base.Config{ - ModelConfig: latest.ModelConfig{ - Provider: "google", - Model: tt.model, - ThinkingBudget: tt.thinkingBudget, - }, - ModelOptions: modelOpts, - }, - } - - config := client.buildConfig() - - require.NotNil(t, config.ThinkingConfig, "ThinkingConfig should be explicitly set when thinking is disabled") - - if tt.expectBudgetZero { - // Gemini 2.5: fully disabled via ThinkingBudget=0 - assert.False(t, config.ThinkingConfig.IncludeThoughts, "IncludeThoughts should be false") - require.NotNil(t, config.ThinkingConfig.ThinkingBudget, "ThinkingBudget should be set to 0") - assert.Equal(t, int32(0), *config.ThinkingConfig.ThinkingBudget, "ThinkingBudget should be 0") - assert.Empty(t, config.ThinkingConfig.ThinkingLevel, "ThinkingLevel should be empty") - } - - if tt.expectLevelLow { - // Gemini 3: cannot fully disable, use lowest level - assert.False(t, config.ThinkingConfig.IncludeThoughts, "IncludeThoughts should be false") - assert.Equal(t, genai.ThinkingLevelLow, config.ThinkingConfig.ThinkingLevel, "ThinkingLevel should be low") - assert.Nil(t, config.ThinkingConfig.ThinkingBudget, "ThinkingBudget should not be set for Gemini 3") - assert.GreaterOrEqual(t, config.MaxOutputTokens, tt.expectMinMaxTokens, "MaxOutputTokens should be bumped") - } - }) - } -} - -func TestBuildConfig_ThinkingExplicitlyEnabled(t *testing.T) { - t.Parallel() - - // Test that when ModelOptions.Thinking() returns true, thinking is NOT overridden - // and the ThinkingBudget from ModelConfig is used. - var modelOpts options.ModelOptions - options.WithThinking(true)(&modelOpts) - - client := &Client{ - Config: base.Config{ - ModelConfig: latest.ModelConfig{ - Provider: "google", - Model: "gemini-3-flash-preview", - ThinkingBudget: &latest.ThinkingBudget{Effort: "medium"}, - }, - ModelOptions: modelOpts, - }, - } - - config := client.buildConfig() - - // ThinkingConfig should be set with IncludeThoughts=true from applyThinkingConfig - require.NotNil(t, config.ThinkingConfig, "ThinkingConfig should be set") - assert.True(t, config.ThinkingConfig.IncludeThoughts, "IncludeThoughts should be true when thinking is enabled") - assert.Equal(t, genai.ThinkingLevelMedium, config.ThinkingConfig.ThinkingLevel, "ThinkingLevel should be set from ThinkingBudget") -} - func TestConvertMessagesToGemini_ThoughtSignature(t *testing.T) { t.Parallel() @@ -498,10 +379,10 @@ func TestConvertMessagesToGemini_ThoughtSignature(t *testing.T) { } } -func TestBuildConfig_ThinkingNotSet(t *testing.T) { +func TestBuildConfig_ThinkingFromBudget(t *testing.T) { t.Parallel() - // Test that when ModelOptions.Thinking() is nil (not set), behavior falls back to ThinkingBudget + // Test that thinking configuration is driven by ThinkingBudget in the model config client := &Client{ Config: base.Config{ ModelConfig: latest.ModelConfig{ @@ -509,7 +390,6 @@ func TestBuildConfig_ThinkingNotSet(t *testing.T) { Model: "gemini-3-flash", ThinkingBudget: &latest.ThinkingBudget{Effort: "high"}, }, - // ModelOptions.Thinking() is nil by default }, } diff --git a/pkg/model/provider/model_defaults_test.go b/pkg/model/provider/model_defaults_test.go index f55f0bf2e..fd2e5d268 100644 --- a/pkg/model/provider/model_defaults_test.go +++ b/pkg/model/provider/model_defaults_test.go @@ -263,3 +263,29 @@ func TestIsOpenAIThinkingOnlyModel(t *testing.T) { }) } } + +// TestApplyProviderDefaults_DoesNotModifyOriginal verifies that applyProviderDefaults +// does not mutate the input config's ProviderOpts map. +func TestApplyProviderDefaults_DoesNotModifyOriginal(t *testing.T) { + t.Parallel() + + original := &latest.ModelConfig{ + Provider: "anthropic", + Model: "claude-sonnet-4-0", + ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, + ProviderOpts: map[string]any{"custom_key": "original_value"}, + } + + result := applyProviderDefaults(original, nil) + + // Result should have interleaved_thinking set (because thinking_budget is set). + require.NotNil(t, result.ProviderOpts) + assert.Equal(t, true, result.ProviderOpts["interleaved_thinking"]) + + // Original must NOT have interleaved_thinking added. + _, exists := original.ProviderOpts["interleaved_thinking"] + assert.False(t, exists, "original ProviderOpts must not be mutated by applyProviderDefaults") + + // Original custom key must still be there. + assert.Equal(t, "original_value", original.ProviderOpts["custom_key"]) +} diff --git a/pkg/model/provider/openai/client.go b/pkg/model/provider/openai/client.go index 3d10f775f..3537441fd 100644 --- a/pkg/model/provider/openai/client.go +++ b/pkg/model/provider/openai/client.go @@ -249,9 +249,9 @@ func (c *Client) CreateChatCompletionStream( } } - // Apply thinking budget: set reasoning_effort parameter - if c.ModelConfig.ThinkingBudget != nil { - effort, err := getOpenAIReasoningEffort(&c.ModelConfig) + // Apply thinking budget: set reasoning_effort for reasoning models (o-series, gpt-5) + if c.ModelConfig.ThinkingBudget != nil && isOpenAIReasoningModel(c.ModelConfig.Model) { + effort, err := openAIReasoningEffort(c.ModelConfig.ThinkingBudget) if err != nil { slog.Error("OpenAI request using thinking_budget failed", "error", err) return nil, err @@ -327,17 +327,6 @@ func (c *Client) CreateResponseStream( if maxToken := c.ModelConfig.MaxTokens; maxToken != nil && *maxToken > 0 { maxTokens := *maxToken - - // Reasoning models consume output tokens on internal reasoning even when - // thinking is explicitly disabled. Bump a small budget so the model has - // headroom for both reasoning and actual text output. - thinkingEnabled := c.ModelOptions.Thinking() == nil || *c.ModelOptions.Thinking() - if isOpenAIReasoningModel(c.ModelConfig.Model) && !thinkingEnabled && maxTokens < 200 { - slog.Debug("Bumping max_output_tokens for reasoning model with thinking disabled", - "model", c.ModelConfig.Model, "original", maxTokens, "adjusted", 200) - maxTokens = 200 - } - params.MaxOutputTokens = param.NewOpt(maxTokens) slog.Debug("OpenAI responses request configured with max output tokens", "max_output_tokens", maxTokens) } @@ -370,22 +359,15 @@ func (c *Client) CreateResponseStream( } } - // Configure reasoning for models that support it (o-series, gpt-5) - // Request detailed reasoning summary to get thinking traces for reasoning models - // Skip reasoning configuration entirely if thinking is explicitly disabled (via /think command) - thinkingEnabled := c.ModelOptions.Thinking() == nil || *c.ModelOptions.Thinking() - if isOpenAIReasoningModel(c.ModelConfig.Model) && thinkingEnabled { - // Only set reasoning.summary for models that support it. - // Some reasoning models (e.g. o1-pro) reject this parameter. - if supportsReasoningSummary(c.ModelConfig.Model) { - params.Reasoning = shared.ReasoningParam{ - Summary: shared.ReasoningSummaryDetailed, - } - slog.Debug("OpenAI responses request configured with reasoning summary", "model", c.ModelConfig.Model, "summary", "detailed") + // Configure reasoning for models that support it (o-series, gpt-5). + // Skip reasoning entirely when NoThinking is set (e.g. title generation) + // to avoid wasting output tokens on internal reasoning. + if isOpenAIReasoningModel(c.ModelConfig.Model) && !c.ModelOptions.NoThinking() { + params.Reasoning = shared.ReasoningParam{ + Summary: shared.ReasoningSummaryDetailed, } - // Apply thinking budget as reasoning effort if configured if c.ModelConfig.ThinkingBudget != nil { - effort, err := getOpenAIReasoningEffort(&c.ModelConfig) + effort, err := openAIReasoningEffort(c.ModelConfig.ThinkingBudget) if err != nil { slog.Error("OpenAI responses request using thinking_budget failed", "error", err) return nil, err @@ -919,39 +901,16 @@ func isOpenAIReasoningModel(model string) bool { strings.HasPrefix(m, "gpt-5") } -// supportsReasoningSummary returns true for reasoning models that support the -// reasoning.summary parameter. Some reasoning models (e.g. o1-pro) do not -// support it and will reject the request if it is set. -func supportsReasoningSummary(model string) bool { - if !isOpenAIReasoningModel(model) { - return false - } - m := strings.ToLower(model) - // o1-pro does not support reasoning.summary. - if strings.HasPrefix(m, "o1-pro") { - return false - } - return true -} - -// getOpenAIReasoningEffort resolves the reasoning effort value from the -// model configuration's ThinkingBudget. Returns the effort (minimal|low|medium|high) or an error -func getOpenAIReasoningEffort(cfg *latest.ModelConfig) (effort string, err error) { - if cfg == nil || cfg.ThinkingBudget == nil { - return "", nil - } - - if !isOpenAIReasoningModel(cfg.Model) { - slog.Warn("OpenAI reasoning effort is not supported for this model, ignoring thinking_budget", "model", cfg.Model) - return "", nil - } - - effort = strings.TrimSpace(strings.ToLower(cfg.ThinkingBudget.Effort)) - if effort == "minimal" || effort == "low" || effort == "medium" || effort == "high" { +// openAIReasoningEffort validates a ThinkingBudget effort string for the +// OpenAI API. Returns the effort (minimal|low|medium|high|xhigh) or an error. +func openAIReasoningEffort(b *latest.ThinkingBudget) (string, error) { + effort := strings.TrimSpace(strings.ToLower(b.Effort)) + switch effort { + case "minimal", "low", "medium", "high", "xhigh": return effort, nil + default: + return "", fmt.Errorf("OpenAI requests only support 'minimal', 'low', 'medium', 'high', 'xhigh' as values for thinking_budget effort, got effort: '%s', tokens: '%d'", effort, b.Tokens) } - - return "", fmt.Errorf("OpenAI requests only support 'minimal', 'low', 'medium', 'high' as values for thinking_budget effort, got effort: '%s', tokens: '%d'", effort, cfg.ThinkingBudget.Tokens) } // jsonSchema is a helper type that implements json.Marshaler for map[string]any diff --git a/pkg/model/provider/openai/thinking_budget_test.go b/pkg/model/provider/openai/thinking_budget_test.go index bcc026190..1d6d99460 100644 --- a/pkg/model/provider/openai/thinking_budget_test.go +++ b/pkg/model/provider/openai/thinking_budget_test.go @@ -80,200 +80,67 @@ func TestIsOpenAIReasoningModel(t *testing.T) { } } -func TestSupportsReasoningSummary(t *testing.T) { - t.Parallel() - - tests := []struct { - name string - model string - expected bool - }{ - // Standard reasoning models - should support summary - {"o1-preview", "o1-preview", true}, - {"o1-mini", "o1-mini", true}, - {"o3-mini", "o3-mini", true}, - {"o4-preview", "o4-preview", true}, - {"gpt-5", "gpt-5", true}, - {"gpt-5-mini", "gpt-5-mini", true}, - - // o1-pro models - do NOT support summary - {"o1-pro", "o1-pro", false}, - {"o1-pro-2025-03-19", "o1-pro-2025-03-19", false}, - {"O1-PRO uppercase", "O1-PRO", false}, - - // Non-reasoning models - do NOT support summary - {"gpt-4o", "gpt-4o", false}, - {"gpt-4-turbo", "gpt-4-turbo", false}, - {"gpt-5-chat-latest", "gpt-5-chat-latest", false}, - {"empty string", "", false}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - - result := supportsReasoningSummary(tt.model) - assert.Equal(t, tt.expected, result, "Model %s should return %v", tt.model, tt.expected) - }) - } -} - -func TestGetOpenAIReasoningEffort_Success(t *testing.T) { +func TestOpenAIReasoningEffort_Success(t *testing.T) { t.Parallel() tests := []struct { name string - model string - thinkingBudget *latest.ThinkingBudget + budget *latest.ThinkingBudget expectedEffort string }{ - { - name: "minimal effort for o1 model", - model: "o1-preview", - thinkingBudget: &latest.ThinkingBudget{ - Effort: "minimal", - }, - expectedEffort: "minimal", - }, - { - name: "low effort for gpt-5 model", - model: "gpt-5-mini", - thinkingBudget: &latest.ThinkingBudget{ - Effort: "low", - }, - expectedEffort: "low", - }, - { - name: "medium effort for o3 model", - model: "o3-preview", - thinkingBudget: &latest.ThinkingBudget{ - Effort: "medium", - }, - expectedEffort: "medium", - }, - { - name: "high effort for o4 model", - model: "o4", - thinkingBudget: &latest.ThinkingBudget{ - Effort: "high", - }, - expectedEffort: "high", - }, - { - name: "uppercase effort level", - model: "o1-mini", - thinkingBudget: &latest.ThinkingBudget{ - Effort: "HIGH", - }, - expectedEffort: "high", - }, - { - name: "effort with whitespace", - model: "gpt-5", - thinkingBudget: &latest.ThinkingBudget{ - Effort: " medium ", - }, - expectedEffort: "medium", - }, - { - name: "nil thinking budget", - model: "o1-preview", - thinkingBudget: nil, - expectedEffort: "", - }, + {"minimal", &latest.ThinkingBudget{Effort: "minimal"}, "minimal"}, + {"low", &latest.ThinkingBudget{Effort: "low"}, "low"}, + {"medium", &latest.ThinkingBudget{Effort: "medium"}, "medium"}, + {"high", &latest.ThinkingBudget{Effort: "high"}, "high"}, + {"xhigh", &latest.ThinkingBudget{Effort: "xhigh"}, "xhigh"}, + {"xhigh uppercase", &latest.ThinkingBudget{Effort: "XHIGH"}, "xhigh"}, + {"uppercase", &latest.ThinkingBudget{Effort: "HIGH"}, "high"}, + {"whitespace", &latest.ThinkingBudget{Effort: " medium "}, "medium"}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { t.Parallel() - config := &latest.ModelConfig{ - Model: tt.model, - ThinkingBudget: tt.thinkingBudget, - } - - effort, err := getOpenAIReasoningEffort(config) + effort, err := openAIReasoningEffort(tt.budget) require.NoError(t, err) assert.Equal(t, tt.expectedEffort, effort) }) } } -func TestGetOpenAIReasoningEffort_UnsupportedModel(t *testing.T) { +func TestOpenAIReasoningEffort_InvalidEffort(t *testing.T) { t.Parallel() tests := []struct { - name string - model string - }{ - {"gpt-4o", "gpt-4o"}, - {"gpt-4-turbo", "gpt-4-turbo"}, - {"gpt-3.5-turbo", "gpt-3.5-turbo"}, - {"claude-3", "claude-3"}, - {"gemini-pro", "gemini-pro"}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - - config := &latest.ModelConfig{ - Model: tt.model, - ThinkingBudget: &latest.ThinkingBudget{ - Effort: "high", - }, - } - - effort, err := getOpenAIReasoningEffort(config) - require.NoError(t, err) - assert.Empty(t, effort, "Unsupported model should return empty effort") - }) - } -} - -func TestGetOpenAIReasoningEffort_InvalidEffort(t *testing.T) { - t.Parallel() - - tests := []struct { - name string - thinkingBudget *latest.ThinkingBudget - expectedError string + name string + budget *latest.ThinkingBudget + expectedError string }{ { - name: "invalid effort level", - thinkingBudget: &latest.ThinkingBudget{ - Effort: "invalid", - }, - expectedError: "OpenAI requests only support 'minimal', 'low', 'medium', 'high' as values for thinking_budget effort, got effort: 'invalid', tokens: '0'", + name: "invalid effort level", + budget: &latest.ThinkingBudget{Effort: "invalid"}, + expectedError: "got effort: 'invalid', tokens: '0'", }, { - name: "numeric string as effort", - thinkingBudget: &latest.ThinkingBudget{ - Effort: "2048", - }, - expectedError: "OpenAI requests only support 'minimal', 'low', 'medium', 'high' as values for thinking_budget effort, got effort: '2048', tokens: '0'", + name: "numeric string", + budget: &latest.ThinkingBudget{Effort: "2048"}, + expectedError: "got effort: '2048', tokens: '0'", }, { - name: "tokens set but effort invalid", - thinkingBudget: &latest.ThinkingBudget{ - Effort: "super-high", - Tokens: 4096, - }, - expectedError: "OpenAI requests only support 'minimal', 'low', 'medium', 'high' as values for thinking_budget effort, got effort: 'super-high', tokens: '4096'", + name: "tokens set but effort invalid", + budget: &latest.ThinkingBudget{Effort: "super-high", Tokens: 4096}, + expectedError: "got effort: 'super-high', tokens: '4096'", }, { - name: "tokens only (no effort) - should fail", - thinkingBudget: &latest.ThinkingBudget{ - Tokens: 2048, - }, - expectedError: "OpenAI requests only support 'minimal', 'low', 'medium', 'high' as values for thinking_budget effort, got effort: '', tokens: '2048'", + name: "tokens only", + budget: &latest.ThinkingBudget{Tokens: 2048}, + expectedError: "got effort: '', tokens: '2048'", }, { - name: "empty effort string - should fail", - thinkingBudget: &latest.ThinkingBudget{ - Effort: "", - }, - expectedError: "OpenAI requests only support 'minimal', 'low', 'medium', 'high' as values for thinking_budget effort, got effort: '', tokens: '0'", + name: "empty effort", + budget: &latest.ThinkingBudget{Effort: ""}, + expectedError: "got effort: '', tokens: '0'", }, } @@ -281,80 +148,10 @@ func TestGetOpenAIReasoningEffort_InvalidEffort(t *testing.T) { t.Run(tt.name, func(t *testing.T) { t.Parallel() - config := &latest.ModelConfig{ - Model: "o1-preview", - ThinkingBudget: tt.thinkingBudget, - } - - effort, err := getOpenAIReasoningEffort(config) + effort, err := openAIReasoningEffort(tt.budget) require.Error(t, err) assert.Empty(t, effort) assert.Contains(t, err.Error(), tt.expectedError) }) } } - -func TestGetOpenAIReasoningEffort_NilConfig(t *testing.T) { - t.Parallel() - - effort, err := getOpenAIReasoningEffort(nil) - require.NoError(t, err) - assert.Empty(t, effort) -} - -func TestGetOpenAIReasoningEffort_EdgeCases(t *testing.T) { - t.Parallel() - - tests := []struct { - name string - config *latest.ModelConfig - expectedEffort string - expectedError bool - }{ - { - name: "config with nil thinking budget", - config: &latest.ModelConfig{ - Model: "o1-preview", - ThinkingBudget: nil, - }, - expectedEffort: "", - expectedError: false, - }, - { - name: "empty model string with thinking budget", - config: &latest.ModelConfig{ - Model: "", - ThinkingBudget: &latest.ThinkingBudget{ - Effort: "high", - }, - }, - expectedEffort: "", - expectedError: false, - }, - { - name: "case sensitivity test", - config: &latest.ModelConfig{ - Model: "O1-PREVIEW", - ThinkingBudget: &latest.ThinkingBudget{ - Effort: "MINIMAL", - }, - }, - expectedEffort: "minimal", - expectedError: false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - - effort, err := getOpenAIReasoningEffort(tt.config) - if tt.expectedError { - require.Error(t, err) - } else { - require.NoError(t, err) - } - require.Equal(t, tt.expectedEffort, effort) - }) - } -} diff --git a/pkg/model/provider/options/options.go b/pkg/model/provider/options/options.go index f907ce22e..8ac2866bc 100644 --- a/pkg/model/provider/options/options.go +++ b/pkg/model/provider/options/options.go @@ -8,9 +8,9 @@ type ModelOptions struct { gateway string structuredOutput *latest.StructuredOutput generatingTitle bool + noThinking bool maxTokens int64 providers map[string]latest.ProviderConfig - thinking *bool } func (c *ModelOptions) Gateway() string { @@ -29,12 +29,12 @@ func (c *ModelOptions) MaxTokens() int64 { return c.maxTokens } -func (c *ModelOptions) Providers() map[string]latest.ProviderConfig { - return c.providers +func (c *ModelOptions) NoThinking() bool { + return c.noThinking } -func (c *ModelOptions) Thinking() *bool { - return c.thinking +func (c *ModelOptions) Providers() map[string]latest.ProviderConfig { + return c.providers } type Opt func(*ModelOptions) @@ -63,15 +63,15 @@ func WithMaxTokens(maxTokens int64) Opt { } } -func WithProviders(providers map[string]latest.ProviderConfig) Opt { +func WithNoThinking() Opt { return func(cfg *ModelOptions) { - cfg.providers = providers + cfg.noThinking = true } } -func WithThinking(enabled bool) Opt { +func WithProviders(providers map[string]latest.ProviderConfig) Opt { return func(cfg *ModelOptions) { - cfg.thinking = &enabled + cfg.providers = providers } } @@ -88,14 +88,14 @@ func FromModelOptions(m ModelOptions) []Opt { if m.generatingTitle { out = append(out, WithGeneratingTitle()) } + if m.noThinking { + out = append(out, WithNoThinking()) + } if m.maxTokens != 0 { out = append(out, WithMaxTokens(m.maxTokens)) } if len(m.providers) > 0 { out = append(out, WithProviders(m.providers)) } - if m.thinking != nil { - out = append(out, WithThinking(*m.thinking)) - } return out } diff --git a/pkg/model/provider/override_test.go b/pkg/model/provider/override_test.go deleted file mode 100644 index 5839f8e67..000000000 --- a/pkg/model/provider/override_test.go +++ /dev/null @@ -1,267 +0,0 @@ -package provider - -import ( - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "github.com/docker/docker-agent/pkg/config/latest" - "github.com/docker/docker-agent/pkg/model/provider/options" -) - -func TestApplyOverrides(t *testing.T) { - t.Parallel() - - boolPtr := func(v bool) *bool { return &v } - - tests := []struct { - name string - config *latest.ModelConfig - thinking *bool // nil = no override - wantBudget *latest.ThinkingBudget - wantInterleaved *bool // nil = key must not exist - }{ - // --- Disable clears everything --- - { - name: "disable: clears thinking_budget", - config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}}, - thinking: boolPtr(false), - }, - { - name: "disable: clears interleaved_thinking", - config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Tokens: 16384}, ProviderOpts: map[string]any{"interleaved_thinking": true}}, - thinking: boolPtr(false), - }, - - // --- Enable preserves existing budget --- - { - name: "enable: preserves existing budget", - config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}}, - thinking: boolPtr(true), - wantBudget: &latest.ThinkingBudget{Tokens: 8192}, - }, - { - name: "enable: preserves existing budget + interleaved", - config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, ProviderOpts: map[string]any{"interleaved_thinking": true}}, - thinking: boolPtr(true), - wantBudget: &latest.ThinkingBudget{Tokens: 8192}, - wantInterleaved: boolPtr(true), - }, - - // --- Enable applies defaults when no budget --- - { - name: "enable: OpenAI gets medium default", - config: &latest.ModelConfig{Provider: "openai", Model: "gpt-4o"}, - thinking: boolPtr(true), - wantBudget: &latest.ThinkingBudget{Effort: "medium"}, - }, - { - name: "enable: Anthropic gets 8192 + interleaved", - config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0"}, - thinking: boolPtr(true), - wantBudget: &latest.ThinkingBudget{Tokens: 8192}, - wantInterleaved: boolPtr(true), - }, - { - name: "enable: restores from tokens=0", - config: &latest.ModelConfig{Provider: "openai", Model: "gpt-4o", ThinkingBudget: &latest.ThinkingBudget{Tokens: 0}}, - thinking: boolPtr(true), - wantBudget: &latest.ThinkingBudget{Effort: "medium"}, - }, - { - name: "enable: restores from effort=none", - config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Effort: "none"}}, - thinking: boolPtr(true), - wantBudget: &latest.ThinkingBudget{Tokens: 8192}, - wantInterleaved: boolPtr(true), - }, - - // --- No override = no-op --- - { - name: "nil opts: config unchanged", - config: &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}}, - thinking: nil, - wantBudget: &latest.ThinkingBudget{Tokens: 8192}, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - - var opts *options.ModelOptions - if tt.thinking != nil { - o := options.ModelOptions{} - options.WithThinking(*tt.thinking)(&o) - opts = &o - } - - result := applyOverrides(tt.config, opts) - - // Budget - if tt.wantBudget == nil { - assert.Nil(t, result.ThinkingBudget) - } else { - require.NotNil(t, result.ThinkingBudget) - assert.Equal(t, *tt.wantBudget, *result.ThinkingBudget) - } - - // interleaved_thinking - if tt.wantInterleaved == nil && tt.thinking != nil && !*tt.thinking { - if result.ProviderOpts != nil { - _, exists := result.ProviderOpts["interleaved_thinking"] - assert.False(t, exists, "interleaved_thinking should be removed") - } - } else if tt.wantInterleaved != nil { - require.NotNil(t, result.ProviderOpts) - assert.Equal(t, *tt.wantInterleaved, result.ProviderOpts["interleaved_thinking"]) - } - }) - } -} - -// TestApplyOverrides_DoesNotModifyOriginal verifies that applyOverrides creates -// a proper copy: neither the struct fields, the ProviderOpts map, nor the -// ThinkingBudget pointer of the original config are mutated. -func TestApplyOverrides_DoesNotModifyOriginal(t *testing.T) { - t.Parallel() - - original := &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - ProviderOpts: map[string]any{"interleaved_thinking": true, "other": "value"}, - } - - o := options.ModelOptions{} - options.WithThinking(false)(&o) - result := applyOverrides(original, &o) - - // Result should have thinking cleared. - assert.Nil(t, result.ThinkingBudget, "result ThinkingBudget should be nil") - - // Original ThinkingBudget must be untouched. - require.NotNil(t, original.ThinkingBudget, "original ThinkingBudget must survive") - assert.Equal(t, 8192, original.ThinkingBudget.Tokens) - - // Original ProviderOpts map must still have interleaved_thinking. - val, exists := original.ProviderOpts["interleaved_thinking"] - require.True(t, exists, "original ProviderOpts must still contain interleaved_thinking") - assert.Equal(t, true, val) - - // Other keys must survive in both original and result. - assert.Equal(t, "value", original.ProviderOpts["other"]) - require.NotNil(t, result.ProviderOpts) - assert.Equal(t, "value", result.ProviderOpts["other"]) -} - -// TestApplyOverrides_DisablePreservesOtherProviderOpts verifies that disabling -// thinking only removes "interleaved_thinking" and leaves other keys intact. -func TestApplyOverrides_DisablePreservesOtherProviderOpts(t *testing.T) { - t.Parallel() - - config := &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - ProviderOpts: map[string]any{"interleaved_thinking": true, "custom_key": "preserved"}, - } - - o := options.ModelOptions{} - options.WithThinking(false)(&o) - result := applyOverrides(config, &o) - - // Thinking should be cleared. - assert.Nil(t, result.ThinkingBudget) - - // interleaved_thinking should be removed. - _, exists := result.ProviderOpts["interleaved_thinking"] - assert.False(t, exists, "interleaved_thinking should be removed from result") - - // Other keys must survive. - assert.Equal(t, "preserved", result.ProviderOpts["custom_key"]) -} - -// TestDefaultsThenOverrides tests the complete flow: provider defaults → overrides. -func TestDefaultsThenOverrides(t *testing.T) { - t.Parallel() - - tests := []struct { - name string - config *latest.ModelConfig - thinking bool - wantBudget *latest.ThinkingBudget - }{ - // Disable on models without defaults — already nil, stays nil. - {"gpt-4o /think off", &latest.ModelConfig{Provider: "openai", Model: "gpt-4o"}, false, nil}, - {"anthropic /think off", &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0"}, false, nil}, - - // Enable on models without defaults — applies provider defaults. - {"gpt-4o /think on", &latest.ModelConfig{Provider: "openai", Model: "gpt-4o"}, true, &latest.ThinkingBudget{Effort: "medium"}}, - {"anthropic /think on", &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0"}, true, &latest.ThinkingBudget{Tokens: 8192}}, - {"gemini-2.5 /think on", &latest.ModelConfig{Provider: "google", Model: "gemini-2.5-flash"}, true, &latest.ThinkingBudget{Tokens: -1}}, - {"gemini-3-pro /think on", &latest.ModelConfig{Provider: "google", Model: "gemini-3-pro"}, true, &latest.ThinkingBudget{Effort: "high"}}, - {"gemini-3-flash /think on", &latest.ModelConfig{Provider: "google", Model: "gemini-3-flash"}, true, &latest.ThinkingBudget{Effort: "medium"}}, - {"bedrock claude /think on", &latest.ModelConfig{Provider: "amazon-bedrock", Model: "anthropic.claude-3-sonnet"}, true, &latest.ThinkingBudget{Tokens: 8192}}, - - // Old Gemini model that doesn't support thinking — /think should be a no-op. - {"gemini-2.0 /think on (no thinking support)", &latest.ModelConfig{Provider: "google", Model: "gemini-2.0-flash"}, true, nil}, - - // Thinking-only model defaults preserved when enabled, cleared when disabled. - {"o3-mini /think on", &latest.ModelConfig{Provider: "openai", Model: "o3-mini"}, true, &latest.ThinkingBudget{Effort: "medium"}}, - {"o3-mini /think off", &latest.ModelConfig{Provider: "openai", Model: "o3-mini"}, false, nil}, - - // Explicit budget cleared by disable. - {"explicit cleared", &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Tokens: 32000}}, false, nil}, - - // Restore from disabled (thinking_budget: 0) via /think on. - {"restore from 0", &latest.ModelConfig{Provider: "openai", Model: "gpt-4o", ThinkingBudget: &latest.ThinkingBudget{Tokens: 0}}, true, &latest.ThinkingBudget{Effort: "medium"}}, - {"restore from none", &latest.ModelConfig{Provider: "anthropic", Model: "claude-sonnet-4-0", ThinkingBudget: &latest.ThinkingBudget{Effort: "none"}}, true, &latest.ThinkingBudget{Tokens: 8192}}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - - result := applyProviderDefaults(tt.config, nil) - - o := options.ModelOptions{} - options.WithThinking(tt.thinking)(&o) - result = applyOverrides(result, &o) - - if tt.wantBudget == nil { - assert.Nil(t, result.ThinkingBudget) - } else { - require.NotNil(t, result.ThinkingBudget) - assert.Equal(t, *tt.wantBudget, *result.ThinkingBudget) - } - }) - } -} - -// TestApplyProviderDefaults_DoesNotModifyOriginal verifies that applyProviderDefaults -// does not mutate the input config's ProviderOpts map. -func TestApplyProviderDefaults_DoesNotModifyOriginal(t *testing.T) { - t.Parallel() - - original := &latest.ModelConfig{ - Provider: "anthropic", - Model: "claude-sonnet-4-0", - ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192}, - ProviderOpts: map[string]any{"custom_key": "original_value"}, - } - - result := applyProviderDefaults(original, nil) - - // Result should have interleaved_thinking set (because thinking_budget is set). - require.NotNil(t, result.ProviderOpts) - assert.Equal(t, true, result.ProviderOpts["interleaved_thinking"]) - - // Original must NOT have interleaved_thinking added. - _, exists := original.ProviderOpts["interleaved_thinking"] - assert.False(t, exists, "original ProviderOpts must not be mutated by applyProviderDefaults") - - // Original custom key must still be there. - assert.Equal(t, "original_value", original.ProviderOpts["custom_key"]) -} diff --git a/pkg/model/provider/provider.go b/pkg/model/provider/provider.go index 50c1ff00c..25530765a 100644 --- a/pkg/model/provider/provider.go +++ b/pkg/model/provider/provider.go @@ -229,9 +229,6 @@ func createDirectProvider(ctx context.Context, cfg *latest.ModelConfig, env envi // Apply defaults from custom providers (from config) or built-in aliases enhancedCfg := applyProviderDefaults(cfg, globalOptions.Providers()) - // Apply overrides (e.g., disable/enable thinking via /think command) - enhancedCfg = applyOverrides(enhancedCfg, &globalOptions) - providerType := resolveProviderType(enhancedCfg) switch providerType { @@ -376,81 +373,6 @@ func applyModelDefaults(cfg *latest.ModelConfig) { } } -// applyOverrides applies session-level overrides to the configuration (e.g. /think toggle). -// The returned config never shares mutable state with the input. -func applyOverrides(cfg *latest.ModelConfig, opts *options.ModelOptions) *latest.ModelConfig { - if opts == nil { - return cfg - } - - t := opts.Thinking() - if t == nil { - return cfg - } - - enhancedCfg := cloneModelConfig(cfg) - - // /think OFF — clear everything. - if !*t { - enhancedCfg.ThinkingBudget = nil - delete(enhancedCfg.ProviderOpts, "interleaved_thinking") - slog.Debug("Override: thinking disabled", - "provider", cfg.Provider, "model", cfg.Model) - return enhancedCfg - } - - // /think ON — make sure there is a sensible budget. - if enhancedCfg.ThinkingBudget == nil || enhancedCfg.ThinkingBudget.IsDisabled() { - enhancedCfg.ThinkingBudget = nil - setThinkingDefaults(enhancedCfg) - slog.Debug("Override: thinking enabled with defaults", - "provider", cfg.Provider, "model", cfg.Model, - "thinking_budget", enhancedCfg.ThinkingBudget) - } - - return enhancedCfg -} - -// setThinkingDefaults assigns a sensible default thinking budget for /think ON. -// Unlike applyModelDefaults this applies to every provider (not just thinking-only models) -// because the user explicitly asked for thinking. -func setThinkingDefaults(cfg *latest.ModelConfig) { - providerType := resolveProviderType(cfg) - - switch providerType { - case "openai", "openai_chatcompletions", "openai_responses": - cfg.ThinkingBudget = &latest.ThinkingBudget{Effort: "medium"} - case "anthropic": - cfg.ThinkingBudget = &latest.ThinkingBudget{Tokens: 8192} - ensureInterleavedThinking(cfg, providerType) - case "google": - cfg.ThinkingBudget = defaultGoogleThinkingBudget(cfg.Model) - case "amazon-bedrock": - if isBedrockClaudeModel(cfg.Model) { - cfg.ThinkingBudget = &latest.ThinkingBudget{Tokens: 8192} - ensureInterleavedThinking(cfg, providerType) - } - } -} - -// defaultGoogleThinkingBudget returns a sensible thinking budget for a Google model. -// Returns nil for models that don't have a known thinking capability. -func defaultGoogleThinkingBudget(model string) *latest.ThinkingBudget { - m := strings.ToLower(model) - switch { - case strings.HasPrefix(m, "gemini-2.5-"): - return &latest.ThinkingBudget{Tokens: -1} - case isGeminiProModel(m): - return &latest.ThinkingBudget{Effort: "high"} - case isGeminiFlashModel(m): - return &latest.ThinkingBudget{Effort: "medium"} - default: - // Unknown or older Gemini models (e.g. gemini-2.0-*): don't enable - // thinking since the API may reject it. - return nil - } -} - // --------------------------------------------------------------------------- // Shared helpers // --------------------------------------------------------------------------- diff --git a/pkg/runtime/agent_delegation.go b/pkg/runtime/agent_delegation.go index eb3b4f58a..3e1a75212 100644 --- a/pkg/runtime/agent_delegation.go +++ b/pkg/runtime/agent_delegation.go @@ -108,7 +108,6 @@ func newSubSession(parent *session.Session, cfg SubSessionConfig, childAgent *ag session.WithMaxConsecutiveToolCalls(childAgent.MaxConsecutiveToolCalls()), session.WithTitle(cfg.Title), session.WithToolsApproved(cfg.ToolsApproved), - session.WithThinking(childAgent.ThinkingConfigured()), session.WithSendUserMessage(false), session.WithParentID(parent.ID), } diff --git a/pkg/runtime/agent_delegation_test.go b/pkg/runtime/agent_delegation_test.go index bde424fc9..ea5891cee 100644 --- a/pkg/runtime/agent_delegation_test.go +++ b/pkg/runtime/agent_delegation_test.go @@ -80,7 +80,6 @@ func TestNewSubSession(t *testing.T) { assert.Equal(t, parent.ID, s.ParentID) assert.Equal(t, "Test task", s.Title) assert.True(t, s.ToolsApproved) - assert.False(t, s.Thinking) // childAgent has no ThinkingConfigured assert.False(t, s.SendUserMessage) assert.Equal(t, 10, s.MaxIterations) // AgentName should NOT be set when PinAgent is false @@ -156,45 +155,10 @@ func TestSubSessionConfig_DefaultValues(t *testing.T) { s := newSubSession(parent, cfg, childAgent) assert.False(t, s.ToolsApproved) - assert.False(t, s.Thinking) assert.False(t, s.SendUserMessage) assert.Empty(t, s.AgentName) } -func TestSubSessionConfig_ThinkingFromChildAgent(t *testing.T) { - parent := session.New(session.WithUserMessage("hello")) - - t.Run("child agent without thinking configured gets thinking=false even if parent has thinking=true", func(t *testing.T) { - parent.Thinking = true - - childAgent := agent.New("haiku-worker", "") - - cfg := SubSessionConfig{ - Task: "simple task", - AgentName: "haiku-worker", - Title: "Transferred task", - } - - s := newSubSession(parent, cfg, childAgent) - assert.False(t, s.Thinking, "sub-session should NOT inherit parent's thinking when child agent has no thinking_budget") - }) - - t.Run("child agent with thinking configured gets thinking=true", func(t *testing.T) { - parent.Thinking = false - - childAgent := agent.New("opus-worker", "", agent.WithThinkingConfigured(true)) - - cfg := SubSessionConfig{ - Task: "complex task", - AgentName: "opus-worker", - Title: "Transferred task", - } - - s := newSubSession(parent, cfg, childAgent) - assert.True(t, s.Thinking, "sub-session should have thinking enabled when child agent has thinking_budget") - }) -} - func TestSubSessionConfig_InheritsAgentLimits(t *testing.T) { parent := session.New(session.WithUserMessage("hello")) diff --git a/pkg/runtime/fallback.go b/pkg/runtime/fallback.go index 024665363..546b3e32f 100644 --- a/pkg/runtime/fallback.go +++ b/pkg/runtime/fallback.go @@ -10,7 +10,6 @@ import ( "github.com/docker/docker-agent/pkg/agent" "github.com/docker/docker-agent/pkg/chat" "github.com/docker/docker-agent/pkg/model/provider" - "github.com/docker/docker-agent/pkg/model/provider/options" "github.com/docker/docker-agent/pkg/modelerrors" "github.com/docker/docker-agent/pkg/modelsdev" "github.com/docker/docker-agent/pkg/session" @@ -182,14 +181,7 @@ func (r *LocalRuntime) tryModelWithFallback( m *modelsdev.Model, events chan Event, ) (streamResult, provider.Provider, error) { - // Clone fallback models with the same thinking override as the primary model. - // The primary model was already cloned with options.WithThinking(sess.Thinking) - // in the main runtime loop, so we apply the same to fallbacks for consistency. - rawFallbacks := a.FallbackModels() - fallbackModels := make([]provider.Provider, len(rawFallbacks)) - for i, fb := range rawFallbacks { - fallbackModels[i] = provider.CloneWithOptions(ctx, fb, options.WithThinking(sess.Thinking)) - } + fallbackModels := a.FallbackModels() fallbackRetries := getEffectiveRetries(a) diff --git a/pkg/runtime/fallback_test.go b/pkg/runtime/fallback_test.go index 614642ca4..bcc312602 100644 --- a/pkg/runtime/fallback_test.go +++ b/pkg/runtime/fallback_test.go @@ -53,31 +53,11 @@ func (p *countingProvider) CreateChatCompletionStream(context.Context, []chat.Me func (p *countingProvider) BaseConfig() base.Config { return base.Config{} } func (p *countingProvider) MaxTokens() int { return 0 } -// trackingConfigProvider tracks how many times BaseConfig() is called. -// This is used to verify that fallback providers are cloned (via CloneWithOptions) -// which calls BaseConfig() to get the config to clone from. -type trackingConfigProvider struct { - id string - stream chat.MessageStream - baseConfigCalls int -} - -func (p *trackingConfigProvider) ID() string { return p.id } -func (p *trackingConfigProvider) CreateChatCompletionStream(context.Context, []chat.Message, []tools.Tool) (chat.MessageStream, error) { - return p.stream, nil -} - -func (p *trackingConfigProvider) BaseConfig() base.Config { - p.baseConfigCalls++ - return base.Config{} -} - // Verify interface compliance var ( _ provider.Provider = (*mockProvider)(nil) _ provider.Provider = (*failingProvider)(nil) _ provider.Provider = (*countingProvider)(nil) - _ provider.Provider = (*trackingConfigProvider)(nil) ) func TestBuildModelChain(t *testing.T) { @@ -399,76 +379,6 @@ func TestGetEffectiveRetries(t *testing.T) { assert.Equal(t, 0, getEffectiveRetries(agentNoRetries), "retries=-1 should return 0 (no retries)") } -func TestFallbackModelsAreClonedWithThinkingOverride(t *testing.T) { - synctest.Test(t, func(t *testing.T) { - primary := &failingProvider{id: "primary/fail", err: errors.New("401 unauthorized")} - fallbackStream := newStreamBuilder(). - AddContent("Success from cloned fallback"). - AddStopWithUsage(10, 5). - Build() - fallback := &trackingConfigProvider{id: "fallback/tracked", stream: fallbackStream} - - root := agent.New("root", "test", - agent.WithModel(primary), - agent.WithFallbackModel(fallback), - agent.WithFallbackRetries(0), - ) - - tm := team.New(team.WithAgents(root)) - rt, err := NewLocalRuntime(tm, WithSessionCompaction(false), WithModelStore(mockModelStore{})) - require.NoError(t, err) - - sess := session.New(session.WithUserMessage("test")) - sess.Title = "Fallback Cloning Test" - sess.Thinking = false - - var gotContent bool - for ev := range rt.RunStream(t.Context(), sess) { - if choice, ok := ev.(*AgentChoiceEvent); ok && choice.Content == "Success from cloned fallback" { - gotContent = true - } - } - assert.True(t, gotContent, "should receive content from fallback") - assert.GreaterOrEqual(t, fallback.baseConfigCalls, 1, - "BaseConfig() should be called on fallback provider (proves cloning occurred)") - }) -} - -func TestFallbackModelsClonedWithThinkingEnabled(t *testing.T) { - synctest.Test(t, func(t *testing.T) { - primary := &failingProvider{id: "primary/fail", err: errors.New("401 unauthorized")} - fallbackStream := newStreamBuilder(). - AddContent("Success with thinking enabled"). - AddStopWithUsage(10, 5). - Build() - fallback := &trackingConfigProvider{id: "fallback/tracked", stream: fallbackStream} - - root := agent.New("root", "test", - agent.WithModel(primary), - agent.WithFallbackModel(fallback), - agent.WithFallbackRetries(0), - ) - - tm := team.New(team.WithAgents(root)) - rt, err := NewLocalRuntime(tm, WithSessionCompaction(false), WithModelStore(mockModelStore{})) - require.NoError(t, err) - - sess := session.New(session.WithUserMessage("test")) - sess.Title = "Fallback Cloning Test (Thinking Enabled)" - sess.Thinking = true - - var gotContent bool - for ev := range rt.RunStream(t.Context(), sess) { - if choice, ok := ev.(*AgentChoiceEvent); ok && choice.Content == "Success with thinking enabled" { - gotContent = true - } - } - assert.True(t, gotContent, "should receive content from fallback") - assert.GreaterOrEqual(t, fallback.baseConfigCalls, 1, - "BaseConfig() should be called on fallback provider when thinking is enabled") - }) -} - func TestFallback429WithFallbacksSkipsToNextModel(t *testing.T) { synctest.Test(t, func(t *testing.T) { // Primary gets rate limited; with fallbacks configured it should skip immediately. diff --git a/pkg/runtime/loop.go b/pkg/runtime/loop.go index 6bbb787a1..412b4b11c 100644 --- a/pkg/runtime/loop.go +++ b/pkg/runtime/loop.go @@ -16,8 +16,6 @@ import ( "github.com/docker/docker-agent/pkg/agent" "github.com/docker/docker-agent/pkg/chat" "github.com/docker/docker-agent/pkg/compaction" - "github.com/docker/docker-agent/pkg/model/provider" - "github.com/docker/docker-agent/pkg/model/provider/options" "github.com/docker/docker-agent/pkg/modelerrors" "github.com/docker/docker-agent/pkg/modelsdev" "github.com/docker/docker-agent/pkg/session" @@ -241,14 +239,6 @@ func (r *LocalRuntime) RunStream(ctx context.Context, sess *session.Session) <-c toolModelOverride = "" } - // Apply thinking setting based on session state. - // When thinking is disabled: clone with thinking=false to clear any thinking config. - // When thinking is enabled: clone with thinking=true to ensure defaults are applied - // (this handles models with no thinking config, explicitly disabled thinking, or - // models that already have thinking configured). - model = provider.CloneWithOptions(ctx, model, options.WithThinking(sess.Thinking)) - slog.Debug("Cloned provider with thinking setting", "agent", a.Name(), "model", model.ID(), "thinking", sess.Thinking) - modelID := model.ID() // Notify sidebar of the model for this turn. For rule-based diff --git a/pkg/runtime/skill_runner.go b/pkg/runtime/skill_runner.go index 2f4ca3d9b..7c9690f6c 100644 --- a/pkg/runtime/skill_runner.go +++ b/pkg/runtime/skill_runner.go @@ -77,10 +77,5 @@ func (r *LocalRuntime) handleRunSkill(ctx context.Context, sess *session.Session } s := newSubSession(sess, cfg, a) - // Skills run as the same agent, so they inherit the session's current - // thinking state (which may have been toggled by the user via /think) - // rather than the agent's static config default. - s.Thinking = sess.Thinking - return r.runSubSessionForwarding(ctx, sess, s, span, evts, ca) } diff --git a/pkg/server/server.go b/pkg/server/server.go index 1107bab48..b9cf3b626 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -50,8 +50,6 @@ func New(ctx context.Context, sessionStore session.Store, runConfig *config.Runt group.POST("/sessions/:id/resume", s.resumeSession) // Toggle YOLO mode for a session group.POST("/sessions/:id/tools/toggle", s.toggleSessionYolo) - // Toggle thinking mode for a session - group.POST("/sessions/:id/thinking/toggle", s.toggleSessionThinking) // Update session permissions group.PATCH("/sessions/:id/permissions", s.updateSessionPermissions) // Update session title @@ -196,7 +194,6 @@ func (s *Server) getSession(c echo.Context) error { CreatedAt: sess.CreatedAt, Messages: sess.GetAllMessages(), ToolsApproved: sess.ToolsApproved, - Thinking: sess.Thinking, InputTokens: sess.InputTokens, OutputTokens: sess.OutputTokens, WorkingDir: sess.WorkingDir, @@ -233,13 +230,6 @@ func (s *Server) getAgentToolCount(c echo.Context) error { return c.JSON(http.StatusOK, map[string]int{"available_tools": count}) } -func (s *Server) toggleSessionThinking(c echo.Context) error { - if err := s.sm.ToggleThinking(c.Request().Context(), c.Param("id")); err != nil { - return echo.NewHTTPError(http.StatusInternalServerError, fmt.Sprintf("failed to toggle session thinking mode: %v", err)) - } - return c.JSON(http.StatusOK, nil) -} - func (s *Server) updateSessionPermissions(c echo.Context) error { sessionID := c.Param("id") var req api.UpdateSessionPermissionsRequest diff --git a/pkg/server/session_manager.go b/pkg/server/session_manager.go index 0a81d2554..85b60432e 100644 --- a/pkg/server/session_manager.go +++ b/pkg/server/session_manager.go @@ -255,20 +255,6 @@ func (sm *SessionManager) ToggleToolApproval(ctx context.Context, sessionID stri return sm.sessionStore.UpdateSession(ctx, sess) } -// ToggleThinking toggles the thinking mode for a session. -func (sm *SessionManager) ToggleThinking(ctx context.Context, sessionID string) error { - sm.mux.Lock() - defer sm.mux.Unlock() - sess, err := sm.sessionStore.GetSession(ctx, sessionID) - if err != nil { - return err - } - - sess.Thinking = !sess.Thinking - - return sm.sessionStore.UpdateSession(ctx, sess) -} - // UpdateSessionPermissions updates the permissions for a session. func (sm *SessionManager) UpdateSessionPermissions(ctx context.Context, sessionID string, perms *session.PermissionsConfig) error { sm.mux.Lock() @@ -361,9 +347,6 @@ func (sm *SessionManager) runtimeForSession(ctx context.Context, sess *session.S } sess.MaxIterations = agent.MaxIterations() sess.MaxConsecutiveToolCalls = agent.MaxConsecutiveToolCalls() - // Initialize thinking state based on whether thinking_budget was explicitly configured - // in the agent's YAML config. Only enable thinking by default when explicitly configured. - sess.Thinking = agent.ThinkingConfigured() opts := []runtime.Opt{ runtime.WithCurrentAgent(currentAgent), diff --git a/pkg/session/branch.go b/pkg/session/branch.go index 2baf7ea83..08acc023f 100644 --- a/pkg/session/branch.go +++ b/pkg/session/branch.go @@ -82,7 +82,6 @@ func copySessionMetadata(dst, src *Session, title string) { } dst.Title = title dst.ToolsApproved = src.ToolsApproved - dst.Thinking = src.Thinking dst.HideToolResults = src.HideToolResults dst.WorkingDir = src.WorkingDir dst.SendUserMessage = src.SendUserMessage diff --git a/pkg/session/migrations.go b/pkg/session/migrations.go index 7c64a5822..c76352c77 100644 --- a/pkg/session/migrations.go +++ b/pkg/session/migrations.go @@ -460,13 +460,13 @@ func migrateItem(ctx context.Context, db *sql.DB, sessionID string, position int _, execErr := db.ExecContext(ctx, `INSERT INTO sessions (id, messages, tools_approved, input_tokens, output_tokens, title, cost, send_user_message, max_iterations, working_dir, created_at, starred, permissions, - agent_model_overrides, custom_models_used, thinking, parent_id) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + agent_model_overrides, custom_models_used, parent_id) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, subSessionID, string(subMessagesJSON), item.SubSession.ToolsApproved, item.SubSession.InputTokens, item.SubSession.OutputTokens, item.SubSession.Title, item.SubSession.Cost, item.SubSession.SendUserMessage, item.SubSession.MaxIterations, item.SubSession.WorkingDir, item.SubSession.CreatedAt.Format(time.RFC3339), - item.SubSession.Starred, "", "{}", "[]", item.SubSession.Thinking, sessionID) + item.SubSession.Starred, "", "{}", "[]", sessionID) if execErr != nil { return fmt.Errorf("inserting sub-session: %w", execErr) } diff --git a/pkg/session/session.go b/pkg/session/session.go index ff52efd29..262785f23 100644 --- a/pkg/session/session.go +++ b/pkg/session/session.go @@ -76,12 +76,6 @@ type Session struct { // ToolsApproved is a flag to indicate if the tools have been approved ToolsApproved bool `json:"tools_approved"` - // Thinking is a session-level flag to enable thinking/interleaved thinking - // defaults for all providers. When false, providers will not apply auto-thinking budgets - // or interleaved thinking, regardless of model config. This is controlled by the /think - // command in the TUI. Defaults to true (thinking enabled). - Thinking bool `json:"thinking"` - // HideToolResults is a flag to indicate if tool results should be hidden HideToolResults bool `json:"hide_tool_results"` @@ -466,12 +460,6 @@ func WithToolsApproved(toolsApproved bool) Opt { } } -func WithThinking(thinking bool) Opt { - return func(s *Session) { - s.Thinking = thinking - } -} - func WithHideToolResults(hideToolResults bool) Opt { return func(s *Session) { s.HideToolResults = hideToolResults @@ -573,7 +561,6 @@ func New(opts ...Opt) *Session { ID: sessionID, CreatedAt: time.Now(), SendUserMessage: true, - Thinking: false, } for _, opt := range opts { diff --git a/pkg/session/session_options_test.go b/pkg/session/session_options_test.go index 4da2af387..db4d20925 100644 --- a/pkg/session/session_options_test.go +++ b/pkg/session/session_options_test.go @@ -25,14 +25,12 @@ func TestNewSession_AllOptionsApplied(t *testing.T) { WithMaxIterations(10), WithToolsApproved(true), WithHideToolResults(true), - WithThinking(true), WithWorkingDir("/work"), ) assert.Equal(t, 10, s.MaxIterations) assert.True(t, s.ToolsApproved) assert.True(t, s.HideToolResults) - assert.True(t, s.Thinking) assert.Equal(t, "/work", s.WorkingDir) assert.Equal(t, []string{"/work"}, s.AllowedDirectories()) } @@ -45,7 +43,6 @@ func TestNewSession_ConsistencyBetweenInitialAndSpawned(t *testing.T) { workingDir := "/projects/app" autoApprove := true hideToolResults := true - thinking := true maxIterations := 25 // Simulate what createLocalRuntimeAndSession builds (initial session). @@ -53,7 +50,6 @@ func TestNewSession_ConsistencyBetweenInitialAndSpawned(t *testing.T) { WithMaxIterations(maxIterations), WithToolsApproved(autoApprove), WithHideToolResults(hideToolResults), - WithThinking(thinking), WithWorkingDir(workingDir), ) @@ -62,14 +58,12 @@ func TestNewSession_ConsistencyBetweenInitialAndSpawned(t *testing.T) { WithMaxIterations(maxIterations), WithToolsApproved(autoApprove), WithHideToolResults(hideToolResults), - WithThinking(thinking), WithWorkingDir(workingDir), ) assert.Equal(t, initial.MaxIterations, spawned.MaxIterations) assert.Equal(t, initial.ToolsApproved, spawned.ToolsApproved) assert.Equal(t, initial.HideToolResults, spawned.HideToolResults) - assert.Equal(t, initial.Thinking, spawned.Thinking) assert.Equal(t, initial.WorkingDir, spawned.WorkingDir) assert.Equal(t, initial.AllowedDirectories(), spawned.AllowedDirectories()) } diff --git a/pkg/session/store.go b/pkg/session/store.go index 1b08838ca..7ac06618d 100644 --- a/pkg/session/store.go +++ b/pkg/session/store.go @@ -202,7 +202,6 @@ func (s *InMemorySessionStore) UpdateSession(_ context.Context, session *Session Evals: session.Evals, CreatedAt: session.CreatedAt, ToolsApproved: session.ToolsApproved, - Thinking: session.Thinking, HideToolResults: session.HideToolResults, WorkingDir: session.WorkingDir, SendUserMessage: session.SendUserMessage, @@ -538,7 +537,7 @@ func (s *SQLiteSessionStore) AddSession(ctx context.Context, session *Session) e session.ID, session.ToolsApproved, session.InputTokens, session.OutputTokens, session.Title, session.Cost, session.SendUserMessage, session.MaxIterations, session.WorkingDir, session.CreatedAt.Format(time.RFC3339), permissionsJSON, agentModelOverridesJSON, - customModelsUsedJSON, session.Thinking, parentID) + customModelsUsedJSON, false, parentID) if err != nil { return err } @@ -559,7 +558,8 @@ func scanSession(scanner interface { Scan(dest ...any) error }, ) (*Session, error) { - var toolsApprovedStr, inputTokensStr, outputTokensStr, titleStr, costStr, sendUserMessageStr, maxIterationsStr, createdAtStr, starredStr, agentModelOverridesJSON, customModelsUsedJSON, thinkingStr string + var toolsApprovedStr, inputTokensStr, outputTokensStr, titleStr, costStr, sendUserMessageStr, maxIterationsStr, createdAtStr, starredStr, agentModelOverridesJSON, customModelsUsedJSON string + var thinkingStr string // read from DB but not used (kept for backward compatibility) var sessionID string var workingDir sql.NullString var permissionsJSON sql.NullString @@ -609,10 +609,7 @@ func scanSession(scanner interface { return nil, err } - thinking, err := strconv.ParseBool(thinkingStr) - if err != nil { - return nil, err - } + // thinkingStr is read from the DB but ignored (column kept for backward compatibility). // Parse permissions if present var permissions *PermissionsConfig @@ -644,7 +641,6 @@ func scanSession(scanner interface { Title: titleStr, Messages: nil, // Loaded separately from session_items ToolsApproved: toolsApproved, - Thinking: thinking, InputTokens: inputTokens, OutputTokens: outputTokens, Cost: cost, @@ -1012,7 +1008,7 @@ func (s *SQLiteSessionStore) UpdateSession(ctx context.Context, session *Session session.ID, session.ToolsApproved, session.InputTokens, session.OutputTokens, session.Title, session.Cost, session.SendUserMessage, session.MaxIterations, session.WorkingDir, session.CreatedAt.Format(time.RFC3339), session.Starred, permissionsJSON, agentModelOverridesJSON, - customModelsUsedJSON, session.Thinking, parentID) + customModelsUsedJSON, false, parentID) if err != nil { return err } @@ -1214,7 +1210,7 @@ func (s *SQLiteSessionStore) addSessionTx(ctx context.Context, tx *sql.Tx, sessi session.ID, session.ToolsApproved, session.InputTokens, session.OutputTokens, session.Title, session.Cost, session.SendUserMessage, session.MaxIterations, session.WorkingDir, session.CreatedAt.Format(time.RFC3339), session.Starred, - permissionsJSON, agentModelOverridesJSON, customModelsUsedJSON, session.Thinking, + permissionsJSON, agentModelOverridesJSON, customModelsUsedJSON, false, parentID) return err } diff --git a/pkg/session/store_test.go b/pkg/session/store_test.go index a82ca2a1a..276390c9d 100644 --- a/pkg/session/store_test.go +++ b/pkg/session/store_test.go @@ -629,90 +629,6 @@ func TestAgentModelOverrides_EmptyMap(t *testing.T) { assert.Empty(t, retrieved.AgentModelOverrides) } -func TestThinking_Persistence(t *testing.T) { - t.Parallel() - - t.Run("default is true (thinking enabled)", func(t *testing.T) { - t.Parallel() - - store, err := NewSQLiteSessionStore(filepath.Join(t.TempDir(), "test.db")) - require.NoError(t, err) - defer store.(*SQLiteSessionStore).Close() - - session := &Session{ - ID: "thinking-default-session", - Title: "Test Session", - CreatedAt: time.Now(), - Thinking: true, // Default value for new sessions - } - - err = store.AddSession(t.Context(), session) - require.NoError(t, err) - - retrieved, err := store.GetSession(t.Context(), "thinking-default-session") - require.NoError(t, err) - assert.True(t, retrieved.Thinking) - }) - - t.Run("persists when set to false (thinking disabled)", func(t *testing.T) { - t.Parallel() - - store, err := NewSQLiteSessionStore(filepath.Join(t.TempDir(), "test.db")) - require.NoError(t, err) - defer store.(*SQLiteSessionStore).Close() - - session := &Session{ - ID: "thinking-disabled-session", - Title: "Test Session", - CreatedAt: time.Now(), - Thinking: false, - } - - err = store.AddSession(t.Context(), session) - require.NoError(t, err) - - retrieved, err := store.GetSession(t.Context(), "thinking-disabled-session") - require.NoError(t, err) - assert.False(t, retrieved.Thinking) - }) - - t.Run("updates correctly via toggle", func(t *testing.T) { - t.Parallel() - - store, err := NewSQLiteSessionStore(filepath.Join(t.TempDir(), "test.db")) - require.NoError(t, err) - defer store.(*SQLiteSessionStore).Close() - - session := &Session{ - ID: "thinking-toggle-session", - Title: "Test Session", - CreatedAt: time.Now(), - Thinking: true, - } - - err = store.AddSession(t.Context(), session) - require.NoError(t, err) - - // Simulate toggle: true -> false - session.Thinking = !session.Thinking - err = store.UpdateSession(t.Context(), session) - require.NoError(t, err) - - retrieved, err := store.GetSession(t.Context(), "thinking-toggle-session") - require.NoError(t, err) - assert.False(t, retrieved.Thinking) - - // Toggle again: false -> true - session.Thinking = !session.Thinking - err = store.UpdateSession(t.Context(), session) - require.NoError(t, err) - - retrieved, err = store.GetSession(t.Context(), "thinking-toggle-session") - require.NoError(t, err) - assert.True(t, retrieved.Thinking) - }) -} - func TestNewSQLiteSessionStore_MigrationFailureRecovery(t *testing.T) { tempDir := t.TempDir() dbPath := filepath.Join(tempDir, "test_migration_recovery.db") diff --git a/pkg/sessiontitle/generator.go b/pkg/sessiontitle/generator.go index fc1cb3e90..0ebccc30d 100644 --- a/pkg/sessiontitle/generator.go +++ b/pkg/sessiontitle/generator.go @@ -104,8 +104,8 @@ func (g *Generator) Generate(ctx context.Context, sessionID string, userMessages baseModel, options.WithStructuredOutput(nil), options.WithMaxTokens(20), + options.WithNoThinking(), options.WithGeneratingTitle(), - options.WithThinking(false), // Disable thinking to avoid max_tokens < thinking_budget errors ) // Call the provider directly (no tools needed for title generation) diff --git a/pkg/teamloader/teamloader.go b/pkg/teamloader/teamloader.go index fee1bfc69..4a9b16b99 100644 --- a/pkg/teamloader/teamloader.go +++ b/pkg/teamloader/teamloader.go @@ -176,7 +176,7 @@ func LoadWithConfig(ctx context.Context, agentSource config.Source, runConfig *c agent.WithHooks(config.MergeHooks(agentConfig.Hooks, cliHooks)), } - models, thinkingConfigured, err := getModelsForAgent(ctx, cfg, &agentConfig, autoModel, runConfig) + models, err := getModelsForAgent(ctx, cfg, &agentConfig, autoModel, runConfig) if err != nil { // Return auto model fallback errors and DMR not installed errors directly // without wrapping to provide cleaner messages @@ -188,7 +188,6 @@ func LoadWithConfig(ctx context.Context, agentSource config.Source, runConfig *c for _, model := range models { opts = append(opts, agent.WithModel(model)) } - opts = append(opts, agent.WithThinkingConfigured(thinkingConfigured)) // Load fallback models if configured fallbackModelRefs := agentConfig.GetFallbackModels() @@ -284,9 +283,8 @@ func LoadWithConfig(ctx context.Context, agentSource config.Source, runConfig *c }, nil } -func getModelsForAgent(ctx context.Context, cfg *latest.Config, a *latest.AgentConfig, autoModelFn func() latest.ModelConfig, runConfig *config.RuntimeConfig) ([]provider.Provider, bool, error) { +func getModelsForAgent(ctx context.Context, cfg *latest.Config, a *latest.AgentConfig, autoModelFn func() latest.ModelConfig, runConfig *config.RuntimeConfig) ([]provider.Provider, error) { var models []provider.Provider - thinkingConfigured := false // Obtain the singleton store once, outside the loop. modelsStore, modelsStoreErr := modelsdev.NewStore() @@ -299,18 +297,11 @@ func getModelsForAgent(ctx context.Context, cfg *latest.Config, a *latest.AgentC modelCfg = autoModelFn() isAutoModel = true } else { - return nil, false, fmt.Errorf("model '%s' not found in configuration", name) + return nil, fmt.Errorf("model '%s' not found in configuration", name) } } modelCfg.Name = name - // Check if thinking_budget was explicitly configured BEFORE provider defaults are applied. - // This is used to initialize session thinking state - thinking is only enabled by default - // when the user explicitly configured it in their YAML. - if modelCfg.ThinkingBudget != nil && !modelCfg.ThinkingBudget.IsDisabled() { - thinkingConfigured = true - } - // Use max_tokens from config if specified, otherwise look up from models.dev maxTokens := &defaultMaxTokens if modelCfg.MaxTokens != nil { @@ -341,14 +332,14 @@ func getModelsForAgent(ctx context.Context, cfg *latest.Config, a *latest.AgentC if err != nil { // Return a cleaner error message for auto model selection failures if isAutoModel { - return nil, false, &config.AutoModelFallbackError{} + return nil, &config.AutoModelFallbackError{} } - return nil, false, err + return nil, err } models = append(models, model) } - return models, thinkingConfigured, nil + return models, nil } // getFallbackModelsForAgent returns fallback providers for an agent based on its fallback configuration. diff --git a/pkg/tui/commands/commands.go b/pkg/tui/commands/commands.go index 1deacef21..77d0d9d73 100644 --- a/pkg/tui/commands/commands.go +++ b/pkg/tui/commands/commands.go @@ -10,7 +10,6 @@ import ( "github.com/docker/docker-agent/pkg/app" "github.com/docker/docker-agent/pkg/feedback" - "github.com/docker/docker-agent/pkg/modelsdev" "github.com/docker/docker-agent/pkg/tui/components/toolcommon" "github.com/docker/docker-agent/pkg/tui/core" "github.com/docker/docker-agent/pkg/tui/messages" @@ -197,16 +196,7 @@ func builtInSessionCommands() []Item { return core.CmdHandler(messages.ToggleSessionStarMsg{}) }, }, - { - ID: "session.think", - Label: "Think", - SlashCommand: "/think", - Description: "Toggle thinking/reasoning mode", - Category: "Session", - Execute: func(string) tea.Cmd { - return core.CmdHandler(messages.ToggleThinkingMsg{}) - }, - }, + { ID: "session.title", Label: "Title", @@ -304,18 +294,6 @@ func BuildCommandCategories(ctx context.Context, application *app.App) []Categor // Get session commands and filter based on model capabilities sessionCommands := builtInSessionCommands() - // Check if the current model supports reasoning; hide /think if not - currentModel := application.CurrentAgentModel() - if !modelsdev.ModelSupportsReasoning(ctx, currentModel) { - filtered := make([]Item, 0, len(sessionCommands)) - for _, cmd := range sessionCommands { - if cmd.ID != "session.think" { - filtered = append(filtered, cmd) - } - } - sessionCommands = filtered - } - // Hide /permissions if no permissions are configured if !application.HasPermissions() { filtered := make([]Item, 0, len(sessionCommands)) diff --git a/pkg/tui/components/sidebar/sidebar.go b/pkg/tui/components/sidebar/sidebar.go index e01327c14..3579d933c 100644 --- a/pkg/tui/components/sidebar/sidebar.go +++ b/pkg/tui/components/sidebar/sidebar.go @@ -1,7 +1,6 @@ package sidebar import ( - "context" "fmt" "log/slog" "maps" @@ -15,7 +14,6 @@ import ( tea "charm.land/bubbletea/v2" "charm.land/lipgloss/v2" - "github.com/docker/docker-agent/pkg/modelsdev" "github.com/docker/docker-agent/pkg/paths" "github.com/docker/docker-agent/pkg/runtime" "github.com/docker/docker-agent/pkg/session" @@ -94,8 +92,6 @@ type Model interface { IsScrollbarDragging() bool // WorkingDirectory returns the working directory path displayed in the sidebar. WorkingDirectory() string - // Cleanup cancels any in-flight async operations. - Cleanup() } // ragIndexingState tracks per-strategy indexing progress @@ -138,7 +134,6 @@ type model struct { workingDirectory string queuedMessages []string // Truncated preview of queued messages streamCancelled bool // true after ESC cancel until next StreamStartedEvent - reasoningSupported bool // true if current model supports reasoning (default: true / fail-open) collapsed bool // true when sidebar is collapsed titleRegenerating bool // true when title is being regenerated by AI titleGenerated bool // true once a title has been generated or set (hides pencil until then) @@ -147,8 +142,6 @@ type model struct { titleInput textinput.Model lastTitleClickTime time.Time // for double-click detection on title - cancelReasoningCheck context.CancelFunc // cancels the in-flight ModelSupportsReasoning call - // Render cache to avoid re-rendering sections on every frame during scroll cachedLines []string // Cached rendered lines cachedWidth int // Width used for cached render @@ -185,11 +178,10 @@ func New(sessionState *service.SessionState, opts ...Option) Model { scrollview.WithWheelStep(1), scrollview.WithKeyMap(nil), // Sidebar has no keyboard scroll — only mouse ), - workingDirectory: getCurrentWorkingDirectory(), - reasoningSupported: true, - preferredWidth: DefaultWidth, - titleInput: ti, - cacheDirty: true, // Initial render needed + workingDirectory: getCurrentWorkingDirectory(), + preferredWidth: DefaultWidth, + titleInput: ti, + cacheDirty: true, // Initial render needed } for _, opt := range opts { opt(m) @@ -254,20 +246,6 @@ func (m *model) SetTodos(result *tools.ToolCallResult) error { return m.todoComp.SetTodos(result) } -// reasoningSupportResultMsg carries the async result of a ModelSupportsReasoning check. -type reasoningSupportResultMsg struct { - modelID string - supported bool -} - -// checkReasoningSupportCmd returns a tea.Cmd that checks reasoning support asynchronously. -func checkReasoningSupportCmd(ctx context.Context, modelID string) tea.Cmd { - return func() tea.Msg { - supported := modelsdev.ModelSupportsReasoning(ctx, modelID) - return reasoningSupportResultMsg{modelID: modelID, supported: supported} - } -} - // SetAgentInfo sets the current agent information and updates the model in availableAgents. // It no-ops when the values are unchanged to avoid unnecessary cache invalidation and re-renders. func (m *model) SetAgentInfo(agentName, modelID, description string) tea.Cmd { @@ -279,12 +257,6 @@ func (m *model) SetAgentInfo(agentName, modelID, description string) tea.Cmd { m.agentModel = modelID m.agentDescription = description - if m.cancelReasoningCheck != nil { - m.cancelReasoningCheck() - } - ctx, cancel := context.WithCancel(context.Background()) - m.cancelReasoningCheck = cancel - // Update the provider and model in availableAgents for the current agent. // This is important when fallback models from different providers are used. // Parse "provider/model" format using first slash to handle model names containing slashes @@ -302,7 +274,7 @@ func (m *model) SetAgentInfo(agentName, modelID, description string) tea.Cmd { } } m.invalidateCache() - return checkReasoningSupportCmd(ctx, modelID) + return nil } // SetTeamInfo sets the available agents in the team @@ -694,12 +666,6 @@ func (m *model) Update(msg tea.Msg) (layout.Model, tea.Cmd) { m.invalidateCache() m.stopSpinner() // Will only stop if no other state needs it return m, nil - case reasoningSupportResultMsg: - if msg.modelID == m.agentModel { - m.reasoningSupported = msg.supported - m.invalidateCache() - } - return m, nil case *runtime.AgentInfoEvent: cmd := m.SetAgentInfo(msg.AgentName, msg.Model, msg.Description) return m, cmd @@ -1246,14 +1212,12 @@ func (m *model) toolsetInfo(contentWidth int) string { } // Toggle indicators with shortcuts - // Only show "Thinking enabled" if the model supports reasoning toggles := []struct { enabled bool label string shortcut string }{ {m.sessionState.YoloMode(), "YOLO mode enabled", "^y"}, - {m.sessionState.Thinking() && m.reasoningSupported, "Thinking enabled", "/think"}, {m.sessionState.HideToolResults(), "Tool output hidden", "^o"}, {m.sessionState.SplitDiffView(), "Split Diff View", "/split-diff"}, } @@ -1464,11 +1428,3 @@ func (m *model) UpdateTitleInput(msg tea.Msg) tea.Cmd { m.invalidateCache() // Input changes affect rendering return cmd } - -// Cleanup cancels any in-flight async operations. -func (m *model) Cleanup() { - if m.cancelReasoningCheck != nil { - m.cancelReasoningCheck() - m.cancelReasoningCheck = nil - } -} diff --git a/pkg/tui/handlers.go b/pkg/tui/handlers.go index 3ad181bf3..50ab0dddf 100644 --- a/pkg/tui/handlers.go +++ b/pkg/tui/handlers.go @@ -15,7 +15,6 @@ import ( "github.com/docker/docker-agent/pkg/app" "github.com/docker/docker-agent/pkg/browser" "github.com/docker/docker-agent/pkg/evaluation" - "github.com/docker/docker-agent/pkg/modelsdev" "github.com/docker/docker-agent/pkg/session" "github.com/docker/docker-agent/pkg/shellpath" "github.com/docker/docker-agent/pkg/tools" @@ -258,43 +257,6 @@ func (m *appModel) handleToggleYolo() (tea.Model, tea.Cmd) { return m, cmd } -func (m *appModel) handleToggleThinking() (tea.Model, tea.Cmd) { - if m.cancelThinkingCheck != nil { - m.cancelThinkingCheck() - } - ctx, cancel := context.WithCancel(context.Background()) - m.cancelThinkingCheck = cancel - - currentModel := m.application.CurrentAgentModel() - return m, func() tea.Msg { - supported := modelsdev.ModelSupportsReasoning(ctx, currentModel) - return messages.ToggleThinkingResultMsg{Supported: supported} - } -} - -func (m *appModel) handleToggleThinkingResult(msg messages.ToggleThinkingResultMsg) (tea.Model, tea.Cmd) { - if !msg.Supported { - return m, notification.InfoCmd("Thinking/reasoning is not supported for the current model") - } - sess := m.application.Session() - sess.Thinking = !sess.Thinking - m.sessionState.SetThinking(sess.Thinking) - if store := m.application.SessionStore(); store != nil { - if err := store.UpdateSession(context.Background(), sess); err != nil { - return m, notification.ErrorCmd(fmt.Sprintf("Failed to save session: %v", err)) - } - } - var infoMsg string - if sess.Thinking { - infoMsg = "Thinking/reasoning enabled for this session" - } else { - infoMsg = "Thinking/reasoning disabled for this session" - } - updated, cmd := m.chatPage.Update(messages.SessionToggleChangedMsg{}) - m.chatPage = updated.(chat.Page) - return m, tea.Batch(cmd, notification.InfoCmd(infoMsg)) -} - func (m *appModel) handleToggleHideToolResults() (tea.Model, tea.Cmd) { updated, cmd := m.chatPage.Update(messages.ToggleHideToolResultsMsg{}) m.chatPage = updated.(chat.Page) diff --git a/pkg/tui/messages/messages.go b/pkg/tui/messages/messages.go index d19e7997e..b93dbdaed 100644 --- a/pkg/tui/messages/messages.go +++ b/pkg/tui/messages/messages.go @@ -4,7 +4,7 @@ // - session.go: Session lifecycle (new, exit, save, load, etc.) // - theme.go: Theme selection and preview // - agent.go: Agent switching and model selection -// - toggle.go: UI state toggles (YOLO, thinking, sidebar) +// - toggle.go: UI state toggles (YOLO, sidebar) // - input.go: Editor input, attachments, and speech // - mcp.go: MCP prompt interactions // diff --git a/pkg/tui/messages/toggle.go b/pkg/tui/messages/toggle.go index 4c87322c4..968b8fef2 100644 --- a/pkg/tui/messages/toggle.go +++ b/pkg/tui/messages/toggle.go @@ -5,15 +5,6 @@ type ( // ToggleYoloMsg toggles YOLO mode (auto-approve tools). ToggleYoloMsg struct{} - // ToggleThinkingMsg toggles extended thinking mode. - ToggleThinkingMsg struct{} - - // ToggleThinkingResultMsg carries the async result of reasoning support check. - // If Supported is true, thinking is toggled; otherwise a notification is shown. - ToggleThinkingResultMsg struct { - Supported bool - } - // ToggleHideToolResultsMsg toggles hiding of tool results. ToggleHideToolResultsMsg struct{} diff --git a/pkg/tui/page/chat/chat.go b/pkg/tui/page/chat/chat.go index 7eb86530f..f067dbc18 100644 --- a/pkg/tui/page/chat/chat.go +++ b/pkg/tui/page/chat/chat.go @@ -95,7 +95,6 @@ type Page interface { layout.Sizeable layout.Help CompactSession(additionalPrompt string) tea.Cmd - Cleanup() // SetSessionStarred updates the sidebar star indicator SetSessionStarred(starred bool) // SetTitleRegenerating sets the title regenerating state on the sidebar @@ -899,10 +898,6 @@ func (p *chatPage) CompactSession(additionalPrompt string) tea.Cmd { ) } -func (p *chatPage) Cleanup() { - p.sidebar.Cleanup() -} - // SetSessionStarred updates the sidebar star indicator func (p *chatPage) SetSessionStarred(starred bool) { p.sidebar.SetSessionStarred(starred) diff --git a/pkg/tui/service/sessionstate.go b/pkg/tui/service/sessionstate.go index ef3a4c158..77d4e1d95 100644 --- a/pkg/tui/service/sessionstate.go +++ b/pkg/tui/service/sessionstate.go @@ -14,7 +14,6 @@ import ( type SessionStateReader interface { SplitDiffView() bool YoloMode() bool - Thinking() bool HideToolResults() bool CurrentAgentName() string PreviousMessage() *types.Message @@ -32,7 +31,6 @@ var _ SessionStateReader = (*SessionState)(nil) type SessionState struct { splitDiffView bool yoloMode bool - thinking bool hideToolResults bool sessionTitle string @@ -45,7 +43,6 @@ func NewSessionState(s *session.Session) *SessionState { return &SessionState{ splitDiffView: userconfig.Get().GetSplitDiffView(), yoloMode: s.ToolsApproved, - thinking: s.Thinking, hideToolResults: s.HideToolResults, sessionTitle: s.Title, } @@ -67,14 +64,6 @@ func (s *SessionState) SetYoloMode(yoloMode bool) { s.yoloMode = yoloMode } -func (s *SessionState) Thinking() bool { - return s.thinking -} - -func (s *SessionState) SetThinking(thinking bool) { - s.thinking = thinking -} - func (s *SessionState) HideToolResults() bool { return s.hideToolResults } diff --git a/pkg/tui/tui.go b/pkg/tui/tui.go index c464d4ca9..09f7522a1 100644 --- a/pkg/tui/tui.go +++ b/pkg/tui/tui.go @@ -103,8 +103,6 @@ type appModel struct { wWidth, wHeight int width, height int - cancelThinkingCheck context.CancelFunc // cancels the in-flight thinking toggle check - // Content area height (height minus editor, tab bar, resize handle, status bar) contentHeight int @@ -755,12 +753,6 @@ func (m *appModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) { case messages.ToggleYoloMsg: return m.handleToggleYolo() - case messages.ToggleThinkingMsg: - return m.handleToggleThinking() - - case messages.ToggleThinkingResultMsg: - return m.handleToggleThinkingResult(msg) - case messages.ToggleHideToolResultsMsg: return m.handleToggleHideToolResults() @@ -1088,10 +1080,7 @@ func (m *appModel) replaceActiveSession(ctx context.Context, sess *session.Sessi slog.Debug("Replacing empty session in-place", "tab_id", activeID, "loaded_session", sess.ID) - // Cleanup old chat page and editor for the active session - if cp, ok := m.chatPages[activeID]; ok { - cp.Cleanup() - } + // Cleanup old editor for the active session if ed, ok := m.editors[activeID]; ok { ed.Cleanup() } @@ -1361,10 +1350,7 @@ func (m *appModel) handleCloseTab(sessionID string) (tea.Model, tea.Cmd) { nextActiveID := m.supervisor.CloseSession(sessionID) // Clean up per-session state - if cp, ok := m.chatPages[sessionID]; ok { - cp.Cleanup() - delete(m.chatPages, sessionID) - } + delete(m.chatPages, sessionID) if ed, ok := m.editors[sessionID]; ok { ed.Cleanup() delete(m.editors, sessionID) @@ -2058,15 +2044,8 @@ func (m *appModel) windowTitle() string { // cleanupAll cleans up all sessions, editors, and resources. func (m *appModel) cleanupAll() { - if m.cancelThinkingCheck != nil { - m.cancelThinkingCheck() - m.cancelThinkingCheck = nil - } m.transcriber.Stop() m.closeTranscriptCh() - for _, cp := range m.chatPages { - cp.Cleanup() - } for _, ed := range m.editors { ed.Cleanup() } diff --git a/pkg/tui/tui_exit_test.go b/pkg/tui/tui_exit_test.go index 21b0fe919..0cad9d12d 100644 --- a/pkg/tui/tui_exit_test.go +++ b/pkg/tui/tui_exit_test.go @@ -22,16 +22,13 @@ import ( ) // mockChatPage implements chat.Page for testing. -type mockChatPage struct { - cleanupCalled bool -} +type mockChatPage struct{} func (m *mockChatPage) Init() tea.Cmd { return nil } func (m *mockChatPage) Update(tea.Msg) (layout.Model, tea.Cmd) { return m, nil } func (m *mockChatPage) View() string { return "" } func (m *mockChatPage) SetSize(int, int) tea.Cmd { return nil } func (m *mockChatPage) CompactSession(string) tea.Cmd { return nil } -func (m *mockChatPage) Cleanup() { m.cleanupCalled = true } func (m *mockChatPage) SetSessionStarred(bool) {} func (m *mockChatPage) SetTitleRegenerating(bool) tea.Cmd { return nil } func (m *mockChatPage) ScrollToBottom() tea.Cmd { return nil } @@ -126,7 +123,7 @@ func hasMsg[T any](msgs []tea.Msg) bool { return false } -func newTestModel() (*appModel, *mockChatPage, *mockEditor) { +func newTestModel() (*appModel, *mockEditor) { page := &mockChatPage{} ed := &mockEditor{} @@ -143,17 +140,16 @@ func newTestModel() (*appModel, *mockChatPage, *mockEditor) { dialogMgr: dialog.New(), completions: completion.New(), } - return m, page, ed + return m, ed } func TestExitSessionMsg_ExitsImmediately(t *testing.T) { t.Parallel() - m, page, ed := newTestModel() + m, ed := newTestModel() _, cmd := m.Update(messages.ExitSessionMsg{}) - assert.True(t, page.cleanupCalled, "Cleanup() should be called on chat page") assert.True(t, ed.cleanupCalled, "Cleanup() should be called on editor") require.NotNil(t, cmd, "cmd should not be nil") msgs := collectMsgs(cmd) @@ -163,11 +159,10 @@ func TestExitSessionMsg_ExitsImmediately(t *testing.T) { func TestExitConfirmedMsg_ExitsImmediately(t *testing.T) { t.Parallel() - m, page, ed := newTestModel() + m, ed := newTestModel() _, cmd := m.Update(dialog.ExitConfirmedMsg{}) - assert.True(t, page.cleanupCalled, "Cleanup() should be called on chat page") assert.True(t, ed.cleanupCalled, "Cleanup() should be called on editor") require.NotNil(t, cmd, "cmd should not be nil") msgs := collectMsgs(cmd)