From aa46935106c529792c019f47beaeeb61d22a858c Mon Sep 17 00:00:00 2001
From: Esteban Beltran <sergies@gmail.com>
Date: Tue, 17 Feb 2026 17:32:40 +0100
Subject: [PATCH 1/7] llmclient: add agentic client for code reviews

---
 pkg/llmclient/README.md                       |  43 +++
 pkg/llmclient/agentic_client.go               | 320 +++++++++++++++++
 .../agentic_client_integration_test.go        | 170 +++++++++
 pkg/llmclient/agentic_debug.go                |  53 +++
 pkg/llmclient/agentic_tools.go                | 339 ++++++++++++++++++
 pkg/llmclient/agentic_tools_test.go           | 217 +++++++++++
 pkg/llmclient/testdata/fs_access/config.go    |  42 +++
 pkg/llmclient/testdata/fs_access/logger.go    |  40 +++
 pkg/llmclient/testdata/fs_access/main.go      |  15 +
 pkg/llmclient/testdata/fs_access/scanner.go   |  51 +++
 .../testdata/fs_access/storage/cache.go       |  49 +++
 .../testdata/fs_access/storage/disk.go        |  66 ++++
 pkg/llmclient/testdata/fs_access/validator.go |  66 ++++
 .../testdata/no_fs_access/handlers/data.go    |  49 +++
 .../testdata/no_fs_access/handlers/health.go  |  25 ++
 pkg/llmclient/testdata/no_fs_access/main.go   |   7 +
 pkg/llmclient/testdata/no_fs_access/models.go |  55 +++
 pkg/llmclient/testdata/no_fs_access/server.go |  69 ++++
 pkg/llmclient/testdata/no_fs_access/utils.go  |  50 +++
 19 files changed, 1726 insertions(+)
 create mode 100644 pkg/llmclient/README.md
 create mode 100644 pkg/llmclient/agentic_client.go
 create mode 100644 pkg/llmclient/agentic_client_integration_test.go
 create mode 100644 pkg/llmclient/agentic_debug.go
 create mode 100644 pkg/llmclient/agentic_tools.go
 create mode 100644 pkg/llmclient/agentic_tools_test.go
 create mode 100644 pkg/llmclient/testdata/fs_access/config.go
 create mode 100644 pkg/llmclient/testdata/fs_access/logger.go
 create mode 100644 pkg/llmclient/testdata/fs_access/main.go
 create mode 100644 pkg/llmclient/testdata/fs_access/scanner.go
 create mode 100644 pkg/llmclient/testdata/fs_access/storage/cache.go
 create mode 100644 pkg/llmclient/testdata/fs_access/storage/disk.go
 create mode 100644 pkg/llmclient/testdata/fs_access/validator.go
 create mode 100644 pkg/llmclient/testdata/no_fs_access/handlers/data.go
 create mode 100644 pkg/llmclient/testdata/no_fs_access/handlers/health.go
 create mode 100644 pkg/llmclient/testdata/no_fs_access/main.go
 create mode 100644 pkg/llmclient/testdata/no_fs_access/models.go
 create mode 100644 pkg/llmclient/testdata/no_fs_access/server.go
 create mode 100644 pkg/llmclient/testdata/no_fs_access/utils.go

diff --git a/pkg/llmclient/README.md b/pkg/llmclient/README.md
new file mode 100644
index 00000000..41cb7a84
--- /dev/null
+++ b/pkg/llmclient/README.md
@@ -0,0 +1,43 @@
+# llmclient
+
+LLM client package for code analysis.
+
+- **AgenticClient**: Provider-agnostic agentic client using [langchaingo](https://github.com/tmc/langchaingo). Gives the LLM tools to explore a repository and answer questions about code.
+
+## AgenticClient
+
+The agentic client runs a tool-calling loop where the LLM can explore code using read-only tools, then submits structured answers via `submit_answer`.
+
+**Tools**: `list_directory`, `read_file`, `grep`, `git` (allowlisted subcommands), `submit_answer`
+
+**Providers**: Google (Gemini), Anthropic (Claude), OpenAI
+
+```mermaid
+sequenceDiagram
+    participant Caller
+    participant AgenticClient
+    participant LLM
+    participant Tools
+
+    Caller->>AgenticClient: CallLLM(prompt, repoPath, opts)
+    AgenticClient->>LLM: system prompt + user prompt + tool definitions
+
+    loop until submit_answer or 100 tool calls
+        LLM-->>AgenticClient: tool call(s)
+        alt submit_answer
+            AgenticClient->>AgenticClient: collect answer
+            AgenticClient-->>LLM: "Answer recorded"
+        else read_file / list_directory / grep / git
+            AgenticClient->>Tools: execute tool (sandboxed to repoPath)
+            Tools-->>AgenticClient: result
+            AgenticClient-->>LLM: tool result
+        end
+    end
+
+    Note over LLM,AgenticClient: LLM sends message with no tool calls → done
+    AgenticClient-->>Caller: []AnswerSchema
+```
+
+## Debug logging
+
+Set `DEBUG=1` to write detailed logs to `/tmp/validator-agentic-<timestamp>.log`.
diff --git a/pkg/llmclient/agentic_client.go b/pkg/llmclient/agentic_client.go
new file mode 100644
index 00000000..a9ca0963
--- /dev/null
+++ b/pkg/llmclient/agentic_client.go
@@ -0,0 +1,320 @@
+package llmclient
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"time"
+
+	"github.com/tmc/langchaingo/llms"
+	"github.com/tmc/langchaingo/llms/anthropic"
+	"github.com/tmc/langchaingo/llms/googleai"
+	"github.com/tmc/langchaingo/llms/openai"
+)
+
+const (
+	maxToolCalls            = 100
+	maxLLMRetries           = 3
+	maxConsecutiveNoTools   = 5
+	retryDelay              = 2 * time.Second
+)
+
+// AnswerSchema represents the structured response from the agentic client
+type AnswerSchema struct {
+	Answer      string   `json:"answer"`
+	ShortAnswer bool     `json:"short_answer"`
+	Files       []string `json:"files,omitempty"`
+	CodeSnippet string   `json:"code_snippet,omitempty"`
+}
+
+// AgenticCallOptions contains configuration for the agentic LLM call
+type AgenticCallOptions struct {
+	Model    string // e.g. "gemini-2.0-flash"
+	Provider string // "google", "anthropic", "openai"
+	APIKey   string
+}
+
+// AgenticClient is an interface for agentic LLM interactions
+type AgenticClient interface {
+	CallLLM(ctx context.Context, prompt, repositoryPath string, opts *AgenticCallOptions) ([]AnswerSchema, error)
+}
+
+// agenticClientImpl implements AgenticClient
+type agenticClientImpl struct{}
+
+// NewAgenticClient creates a new AgenticClient
+func NewAgenticClient() AgenticClient {
+	return &agenticClientImpl{}
+}
+
+// CallLLM executes an agentic loop with tools to answer questions about code.
+// The prompt may contain multiple questions, in which case the agent will call
+// submit_answer multiple times. All answers are collected and returned.
+func (c *agenticClientImpl) CallLLM(ctx context.Context, prompt, repositoryPath string, opts *AgenticCallOptions) ([]AnswerSchema, error) {
+	if opts == nil {
+		return nil, fmt.Errorf("options are required")
+	}
+
+	if opts.APIKey == "" {
+		return nil, fmt.Errorf("API key is required")
+	}
+
+	if opts.Model == "" {
+		return nil, fmt.Errorf("model is required")
+	}
+
+	if opts.Provider == "" {
+		return nil, fmt.Errorf("provider is required")
+	}
+
+	// Initialize LLM based on provider
+	llm, err := initLLM(ctx, opts)
+	if err != nil {
+		return nil, fmt.Errorf("failed to initialize LLM: %w", err)
+	}
+
+	// Build tools
+	tools := buildAgenticTools()
+
+	// Create tool executor
+	executor := newToolExecutor(repositoryPath)
+
+	// System prompt
+	systemPrompt := `You are a code analysis assistant. You have tools to explore code in a repository.
+
+AVAILABLE TOOLS:
+- list_directory: List files at a path. Use "." for root.
+- read_file: Read a file's contents. This is your primary tool for understanding code.
+- grep: Search for a pattern across files.
+- git: Run read-only git commands (log, show, diff, status, etc.)
+- submit_answer: Submit your final answer.
+
+STRATEGY:
+1. Use list_directory to see what files exist
+2. Use read_file to read the source code files
+3. Analyze the code to answer the question
+
+You can only use one tool at a time.
+IMPORTANT: You are in non-interactive mode. Start working and using your tools immediately.
+When ready, use submit_answer. For multiple questions, call submit_answer once per question.`
+
+	// Build initial messages
+	messages := []llms.MessageContent{
+		llms.TextParts(llms.ChatMessageTypeSystem, systemPrompt),
+		llms.TextParts(llms.ChatMessageTypeHuman, prompt),
+	}
+
+	// Collect answers
+	var answers []AnswerSchema
+
+	// Agentic loop
+	toolCallsRemaining := maxToolCalls
+
+	// Print debug log file path before starting the loop
+	printDebugLogPath()
+	debugLog("\n\n\n")
+	debugLog("################################################################")
+	debugLog("# NEW CallLLM - provider=%s model=%s", opts.Provider, opts.Model)
+	debugLog("# repo=%s", repositoryPath)
+	debugLog("# prompt=%s", truncateString(prompt, 200))
+	debugLog("################################################################")
+
+	iteration := 0
+	consecutiveNoTools := 0
+	for toolCallsRemaining > 0 {
+		iteration++
+		debugLog("========== AgenticClient: iteration %d ==========", iteration)
+		debugLog("AgenticClient: %d tool calls remaining, %d answers collected", toolCallsRemaining, len(answers))
+
+		// Call LLM with retry logic
+		debugLog("AgenticClient: calling LLM...")
+		resp, err := callLLMWithRetry(ctx, llm, messages, tools)
+		if err != nil {
+			debugLog("AgenticClient: LLM call failed: %v", err)
+			return nil, fmt.Errorf("LLM call failed after %d retries: %w", maxLLMRetries, err)
+		}
+
+		// resp.Choices contains the LLM's response options. Each choice has Content (text)
+		// and/or ToolCalls (function calls the model wants to make). Typically there's
+		// only one choice unless you request multiple completions.
+		if len(resp.Choices) == 0 {
+			debugLog("AgenticClient: no choices in response")
+			return nil, fmt.Errorf("no response from LLM")
+		}
+
+		// Use first choice. Google puts all tool calls in choices[0].ToolCalls.
+		// Anthropic creates a separate choice per content block (text or tool_use),
+		// but langchaingo's handleAIMessage only supports Parts[0] as either
+		// TextContent or ToolCall, so we process one choice at a time.
+		choice := resp.Choices[0]
+		debugLog("AgenticClient: received response with %d tool calls", len(choice.ToolCalls))
+		if choice.Content != "" {
+			debugLog("AgenticClient: AI message: %s", truncateString(choice.Content, 200))
+		}
+
+		// If no tool calls, check if we have answers
+		if len(choice.ToolCalls) == 0 {
+			debugLog("AgenticClient: no tool calls in response")
+
+			// If we have collected answers, the agent is done
+			if len(answers) > 0 {
+				debugLog("AgenticClient: agent finished with %d answers", len(answers))
+				return answers, nil
+			}
+
+			consecutiveNoTools++
+			debugLog("AgenticClient: consecutive no-tool responses: %d/%d", consecutiveNoTools, maxConsecutiveNoTools)
+			if consecutiveNoTools >= maxConsecutiveNoTools {
+				return nil, fmt.Errorf("agent failed to use tools after %d consecutive attempts", maxConsecutiveNoTools)
+			}
+
+			// No answers yet - add the AI response and remind to use tools
+			if choice.Content != "" {
+				messages = append(messages, llms.TextParts(llms.ChatMessageTypeAI, choice.Content))
+			}
+			debugLog("AgenticClient: no answers yet, reminding agent to use tools")
+			messages = append(messages, llms.TextParts(llms.ChatMessageTypeHuman,
+				"You are in non-interactive mode. You must start using your tools now to explore the repository. When you have enough information, use submit_answer to provide your answer."))
+			toolCallsRemaining--
+			continue
+		}
+
+		// Reset consecutive no-tool counter when tools are used
+		consecutiveNoTools = 0
+
+		// Build AI message with tool calls
+		aiMessage := llms.MessageContent{
+			Role: llms.ChatMessageTypeAI,
+		}
+		if choice.Content != "" {
+			aiMessage.Parts = append(aiMessage.Parts, llms.TextContent{Text: choice.Content})
+		}
+		for _, toolCall := range choice.ToolCalls {
+			aiMessage.Parts = append(aiMessage.Parts, toolCall)
+		}
+		messages = append(messages, aiMessage)
+
+		// Process tool calls
+		for i, toolCall := range choice.ToolCalls {
+			toolCallsRemaining--
+			response, answer := processToolCall(toolCall, i, len(choice.ToolCalls), len(answers), executor)
+			messages = append(messages, response)
+			if answer != nil {
+				answers = append(answers, *answer)
+			}
+		}
+	}
+
+	// If we collected some answers but ran out of tool calls, return what we have
+	if len(answers) > 0 {
+		debugLog("AgenticClient: ran out of tool calls, returning %d answers", len(answers))
+		return answers, nil
+	}
+
+	return nil, fmt.Errorf("exceeded maximum tool calls (%d), agent did not complete", maxToolCalls)
+}
+
+// processToolCall processes a single tool call and returns the response message and optional answer
+func processToolCall(toolCall llms.ToolCall, index, total, currentAnswerCount int, executor *toolExecutor) (llms.MessageContent, *AnswerSchema) {
+	debugLog("AgenticClient: [%d/%d] executing tool: %s", index+1, total, toolCall.FunctionCall.Name)
+	debugLog("AgenticClient: tool args: %s", truncateString(toolCall.FunctionCall.Arguments, 500))
+
+	// Check for submit_answer
+	if toolCall.FunctionCall.Name == "submit_answer" {
+		var answer AnswerSchema
+		if err := json.Unmarshal([]byte(toolCall.FunctionCall.Arguments), &answer); err != nil {
+			debugLog("AgenticClient: failed to parse submit_answer: %v", err)
+			// Report parse error back to agent so it can retry
+			return llms.MessageContent{
+				Role: llms.ChatMessageTypeTool,
+				Parts: []llms.ContentPart{
+					llms.ToolCallResponse{
+						ToolCallID: toolCall.ID,
+						Name:       toolCall.FunctionCall.Name,
+						Content:    fmt.Sprintf("Error parsing answer: %v. Please try again with valid JSON.", err),
+					},
+				},
+			}, nil
+		}
+		debugLog("AgenticClient: received answer #%d: short_answer=%v, answer=%s",
+			currentAnswerCount+1, answer.ShortAnswer, truncateString(answer.Answer, 100))
+
+		// Return success response and the answer
+		return llms.MessageContent{
+			Role: llms.ChatMessageTypeTool,
+			Parts: []llms.ContentPart{
+				llms.ToolCallResponse{
+					ToolCallID: toolCall.ID,
+					Name:       toolCall.FunctionCall.Name,
+					Content:    "Answer recorded successfully. If you have answered all questions, respond with a plain text message saying 'I am finished'. Otherwise, continue with the next question.",
+				},
+			},
+		}, &answer
+	}
+
+	// Execute other tools
+	result := executor.execute(toolCall.FunctionCall.Name, toolCall.FunctionCall.Arguments)
+	debugLog("AgenticClient: tool result: %s", truncateString(result, 300))
+
+	return llms.MessageContent{
+		Role: llms.ChatMessageTypeTool,
+		Parts: []llms.ContentPart{
+			llms.ToolCallResponse{
+				ToolCallID: toolCall.ID,
+				Name:       toolCall.FunctionCall.Name,
+				Content:    result,
+			},
+		},
+	}, nil
+}
+
+// callLLMWithRetry calls the LLM with retry logic for transient errors
+func callLLMWithRetry(ctx context.Context, llm llms.Model, messages []llms.MessageContent, tools []llms.Tool) (*llms.ContentResponse, error) {
+	var lastErr error
+	for attempt := 1; attempt <= maxLLMRetries; attempt++ {
+		resp, err := llm.GenerateContent(ctx, messages, llms.WithTools(tools))
+		if err == nil {
+			return resp, nil
+		}
+		lastErr = err
+		debugLog("AgenticClient: LLM call failed (attempt %d/%d): %v", attempt, maxLLMRetries, err)
+
+		if attempt < maxLLMRetries {
+			debugLog("AgenticClient: retrying in %v...", retryDelay)
+			time.Sleep(retryDelay)
+		}
+	}
+	return nil, lastErr
+}
+
+// truncateString truncates a string to maxLen characters, adding "..." if truncated
+func truncateString(s string, maxLen int) string {
+	if len(s) <= maxLen {
+		return s
+	}
+	return s[:maxLen] + "..."
+}
+
+// initLLM initializes the appropriate LLM based on provider
+func initLLM(ctx context.Context, opts *AgenticCallOptions) (llms.Model, error) {
+	switch opts.Provider {
+	case "google":
+		return googleai.New(
+			ctx,
+			googleai.WithAPIKey(opts.APIKey),
+			googleai.WithDefaultModel(opts.Model),
+		)
+	case "anthropic":
+		return anthropic.New(
+			anthropic.WithToken(opts.APIKey),
+			anthropic.WithModel(opts.Model),
+		)
+	case "openai":
+		return openai.New(
+			openai.WithToken(opts.APIKey),
+			openai.WithModel(opts.Model),
+		)
+	default:
+		return nil, fmt.Errorf("unsupported provider: %s (supported: google, anthropic, openai)", opts.Provider)
+	}
+}
diff --git a/pkg/llmclient/agentic_client_integration_test.go b/pkg/llmclient/agentic_client_integration_test.go
new file mode 100644
index 00000000..3c8035ac
--- /dev/null
+++ b/pkg/llmclient/agentic_client_integration_test.go
@@ -0,0 +1,170 @@
+package llmclient
+
+import (
+	"context"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/grafana/plugin-validator/pkg/logme"
+	"github.com/grafana/plugin-validator/pkg/prettyprint"
+	"github.com/stretchr/testify/require"
+)
+
+func hasGeminiAPIKey() bool {
+	return os.Getenv("GEMINI_API_KEY") != ""
+}
+
+func hasAnthropicAPIKey() bool {
+	return os.Getenv("ANTHROPIC_API_KEY") != ""
+}
+
+// TestAgenticClient_NoFilesystemAccess tests that the agent correctly identifies
+// when an application does NOT access the filesystem
+func TestAgenticClient_NoFilesystemAccess(t *testing.T) {
+	if !hasGeminiAPIKey() {
+		t.Skip("GEMINI_API_KEY not set, skipping agentic client integration test")
+	}
+
+	client := NewAgenticClient()
+
+	testDataPath, err := filepath.Abs(filepath.Join("testdata", "no_fs_access"))
+	require.NoError(t, err)
+
+	opts := &AgenticCallOptions{
+		Provider: "google",
+		Model:    "gemini-2.0-flash",
+		APIKey:   os.Getenv("GEMINI_API_KEY"),
+	}
+
+	prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations."
+
+	answers, err := client.CallLLM(context.Background(), prompt, testDataPath, opts)
+	logme.DebugFln("Agent answers:")
+	prettyprint.Print(answers)
+
+	require.NoError(t, err, "CallLLM should not return error")
+	require.Len(t, answers, 1, "Should return exactly one answer")
+
+	answer := answers[0]
+	require.NotEmpty(t, answer.Answer, "Answer field should be populated")
+	require.Equal(t, false, answer.ShortAnswer, "ShortAnswer should be false - this app does not access the filesystem")
+
+	t.Logf("Agent Answer: %s", answer.Answer)
+	t.Logf("Short Answer: %v", answer.ShortAnswer)
+	if len(answer.Files) > 0 {
+		t.Logf("Files: %v", answer.Files)
+	}
+}
+
+// TestAgenticClient_FilesystemAccess tests that the agent correctly identifies
+// when an application DOES access the filesystem
+func TestAgenticClient_FilesystemAccess(t *testing.T) {
+	if !hasGeminiAPIKey() {
+		t.Skip("GEMINI_API_KEY not set, skipping agentic client integration test")
+	}
+
+	client := NewAgenticClient()
+
+	testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access"))
+	require.NoError(t, err)
+
+	opts := &AgenticCallOptions{
+		Provider: "google",
+		Model:    "gemini-2.0-flash",
+		APIKey:   os.Getenv("GEMINI_API_KEY"),
+	}
+
+	prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations."
+
+	answers, err := client.CallLLM(context.Background(), prompt, testDataPath, opts)
+	logme.DebugFln("Agent answers:")
+	prettyprint.Print(answers)
+
+	require.NoError(t, err, "CallLLM should not return error")
+	require.Len(t, answers, 1, "Should return exactly one answer")
+
+	answer := answers[0]
+	require.NotEmpty(t, answer.Answer, "Answer field should be populated")
+	require.Equal(t, true, answer.ShortAnswer, "ShortAnswer should be true - this app accesses the filesystem via os.ReadFile")
+
+	t.Logf("Agent Answer: %s", answer.Answer)
+	t.Logf("Short Answer: %v", answer.ShortAnswer)
+	if len(answer.Files) > 0 {
+		t.Logf("Files: %v", answer.Files)
+	}
+}
+
+// TestAgenticClient_NoFilesystemAccess_Anthropic tests the same scenario using Anthropic Claude
+func TestAgenticClient_NoFilesystemAccess_Anthropic(t *testing.T) {
+	if !hasAnthropicAPIKey() {
+		t.Skip("ANTHROPIC_API_KEY not set, skipping Anthropic agentic client integration test")
+	}
+
+	client := NewAgenticClient()
+
+	testDataPath, err := filepath.Abs(filepath.Join("testdata", "no_fs_access"))
+	require.NoError(t, err)
+
+	opts := &AgenticCallOptions{
+		Provider: "anthropic",
+		Model:    "claude-sonnet-4-5",
+		APIKey:   os.Getenv("ANTHROPIC_API_KEY"),
+	}
+
+	prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations."
+
+	answers, err := client.CallLLM(context.Background(), prompt, testDataPath, opts)
+	logme.DebugFln("Agent answers:")
+	prettyprint.Print(answers)
+
+	require.NoError(t, err, "CallLLM should not return error")
+	require.Len(t, answers, 1, "Should return exactly one answer")
+
+	answer := answers[0]
+	require.NotEmpty(t, answer.Answer, "Answer field should be populated")
+	require.Equal(t, false, answer.ShortAnswer, "ShortAnswer should be false - this app does not access the filesystem")
+
+	t.Logf("Agent Answer: %s", answer.Answer)
+	t.Logf("Short Answer: %v", answer.ShortAnswer)
+	if len(answer.Files) > 0 {
+		t.Logf("Files: %v", answer.Files)
+	}
+}
+
+// TestAgenticClient_FilesystemAccess_Anthropic tests the same scenario using Anthropic Claude
+func TestAgenticClient_FilesystemAccess_Anthropic(t *testing.T) {
+	if !hasAnthropicAPIKey() {
+		t.Skip("ANTHROPIC_API_KEY not set, skipping Anthropic agentic client integration test")
+	}
+
+	client := NewAgenticClient()
+
+	testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access"))
+	require.NoError(t, err)
+
+	opts := &AgenticCallOptions{
+		Provider: "anthropic",
+		Model:    "claude-sonnet-4-5",
+		APIKey:   os.Getenv("ANTHROPIC_API_KEY"),
+	}
+
+	prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations."
+
+	answers, err := client.CallLLM(context.Background(), prompt, testDataPath, opts)
+	logme.DebugFln("Agent answers:")
+	prettyprint.Print(answers)
+
+	require.NoError(t, err, "CallLLM should not return error")
+	require.Len(t, answers, 1, "Should return exactly one answer")
+
+	answer := answers[0]
+	require.NotEmpty(t, answer.Answer, "Answer field should be populated")
+	require.Equal(t, true, answer.ShortAnswer, "ShortAnswer should be true - this app accesses the filesystem via os.ReadFile")
+
+	t.Logf("Agent Answer: %s", answer.Answer)
+	t.Logf("Short Answer: %v", answer.ShortAnswer)
+	if len(answer.Files) > 0 {
+		t.Logf("Files: %v", answer.Files)
+	}
+}
diff --git a/pkg/llmclient/agentic_debug.go b/pkg/llmclient/agentic_debug.go
new file mode 100644
index 00000000..bf6f5050
--- /dev/null
+++ b/pkg/llmclient/agentic_debug.go
@@ -0,0 +1,53 @@
+package llmclient
+
+import (
+	"fmt"
+	"io"
+	"log"
+	"os"
+	"path/filepath"
+	"sync"
+	"time"
+)
+
+var (
+	debugLogger *log.Logger
+	debugOnce   sync.Once
+	debugPath   string
+)
+
+func initDebugLogger() {
+	debugOnce.Do(func() {
+		debugVal := os.Getenv("DEBUG")
+		if debugVal != "1" && debugVal != "true" {
+			debugLogger = log.New(io.Discard, "", 0)
+			return
+		}
+
+		timestamp := time.Now().Format("20060102-150405")
+		debugPath = filepath.Join(os.TempDir(), fmt.Sprintf("validator-agentic-%s.log", timestamp))
+
+		f, err := os.OpenFile(debugPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "AgenticClient: failed to create debug log file: %v\n", err)
+			debugLogger = log.New(io.Discard, "", 0)
+			return
+		}
+
+		debugLogger = log.New(f, "", log.Ltime|log.Lmicroseconds)
+	})
+}
+
+// debugLog writes a formatted message to the debug log file if DEBUG=1 or DEBUG=true
+func debugLog(format string, args ...interface{}) {
+	initDebugLogger()
+	debugLogger.Printf(format, args...)
+}
+
+// printDebugLogPath prints the debug log file path to stderr if debug is enabled
+func printDebugLogPath() {
+	initDebugLogger()
+	if debugPath != "" {
+		fmt.Fprintf(os.Stderr, "AgenticClient: debug log: %s\n", debugPath)
+	}
+}
diff --git a/pkg/llmclient/agentic_tools.go b/pkg/llmclient/agentic_tools.go
new file mode 100644
index 00000000..84bc9c33
--- /dev/null
+++ b/pkg/llmclient/agentic_tools.go
@@ -0,0 +1,339 @@
+package llmclient
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+
+	"github.com/tmc/langchaingo/llms"
+)
+
+const maxFileSize = 500 * 1024 // 500KB
+
+// Git subcommands that are allowed
+var allowedGitSubcommands = map[string]bool{
+	"log":       true,
+	"show":      true,
+	"diff":      true,
+	"status":    true,
+	"ls-files":  true,
+	"blame":     true,
+	"rev-parse": true,
+	"cat-file":  true,
+	"checkout":  true,
+	"fetch":     true,
+	"pull":      true,
+	"branch":    true,
+	"tag":       true,
+}
+
+// Git flags that could execute arbitrary commands
+var blockedGitFlags = []string{
+	"--exec",
+	"--ext-diff",
+	"--upload-pack",
+	"--receive-pack",
+	"-c",
+	"--config",
+	"--hook",
+	"--run",
+}
+
+// buildAgenticTools returns the list of tools available to the agent
+func buildAgenticTools() []llms.Tool {
+	return []llms.Tool{
+		{
+			Type: "function",
+			Function: &llms.FunctionDefinition{
+				Name:        "read_file",
+				Description: "Read the contents of a file at the given path",
+				Parameters: map[string]interface{}{
+					"type": "object",
+					"properties": map[string]interface{}{
+						"path": map[string]interface{}{
+							"type":        "string",
+							"description": "The relative path to the file to read",
+						},
+					},
+					"required": []string{"path"},
+				},
+			},
+		},
+		{
+			Type: "function",
+			Function: &llms.FunctionDefinition{
+				Name:        "list_directory",
+				Description: "List files and directories at the given path",
+				Parameters: map[string]interface{}{
+					"type": "object",
+					"properties": map[string]interface{}{
+						"path": map[string]interface{}{
+							"type":        "string",
+							"description": "The relative path to the directory to list (use '.' for root)",
+						},
+					},
+					"required": []string{"path"},
+				},
+			},
+		},
+		{
+			Type: "function",
+			Function: &llms.FunctionDefinition{
+				Name:        "grep",
+				Description: "Search for a pattern in files. Returns matching lines with file names and line numbers.",
+				Parameters: map[string]interface{}{
+					"type": "object",
+					"properties": map[string]interface{}{
+						"pattern": map[string]interface{}{
+							"type":        "string",
+							"description": "The pattern to search for",
+						},
+						"path": map[string]interface{}{
+							"type":        "string",
+							"description": "Optional: directory or file to search in (defaults to '.')",
+						},
+					},
+					"required": []string{"pattern"},
+				},
+			},
+		},
+		{
+			Type: "function",
+			Function: &llms.FunctionDefinition{
+				Name:        "git",
+				Description: "Execute a git command. Only allowed commands: log, show, diff, status, ls-files, blame, rev-parse, cat-file, checkout, fetch, pull, branch, tag.",
+				Parameters: map[string]interface{}{
+					"type": "object",
+					"properties": map[string]interface{}{
+						"args": map[string]interface{}{
+							"type":        "string",
+							"description": "The git command arguments (e.g., 'log -n 5' or 'show HEAD')",
+						},
+					},
+					"required": []string{"args"},
+				},
+			},
+		},
+		{
+			Type: "function",
+			Function: &llms.FunctionDefinition{
+				Name:        "submit_answer",
+				Description: "Submit your final answer to the question. Use this when you have gathered enough information.",
+				Parameters: map[string]interface{}{
+					"type": "object",
+					"properties": map[string]interface{}{
+						"answer": map[string]interface{}{
+							"type":        "string",
+							"description": "Your detailed answer explaining your findings",
+						},
+						"short_answer": map[string]interface{}{
+							"type":        "boolean",
+							"description": "A boolean true/false answer to yes/no questions",
+						},
+						"files": map[string]interface{}{
+							"type":        "array",
+							"items":       map[string]interface{}{"type": "string"},
+							"description": "List of relevant files (optional)",
+						},
+						"code_snippet": map[string]interface{}{
+							"type":        "string",
+							"description": "A relevant code snippet (optional)",
+						},
+					},
+					"required": []string{"answer", "short_answer"},
+				},
+			},
+		},
+	}
+}
+
+// toolExecutor handles execution of tools within a repository path
+type toolExecutor struct {
+	repoPath string
+}
+
+// newToolExecutor creates a new tool executor for the given repository path
+func newToolExecutor(repoPath string) *toolExecutor {
+	return &toolExecutor{repoPath: repoPath}
+}
+
+// execute runs a tool and returns the result as a string
+func (e *toolExecutor) execute(toolName, argsJSON string) string {
+	var args map[string]interface{}
+	if err := json.Unmarshal([]byte(argsJSON), &args); err != nil {
+		return fmt.Sprintf("Error parsing arguments: %v", err)
+	}
+
+	switch toolName {
+	case "read_file":
+		return e.readFile(args)
+	case "list_directory":
+		return e.listDirectory(args)
+	case "grep":
+		return e.grep(args)
+	case "git":
+		return e.git(args)
+	default:
+		return fmt.Sprintf("Unknown tool: %s", toolName)
+	}
+}
+
+// validatePath checks that the resolved path is within the repository directory.
+// Returns the validated absolute path or an error string.
+func (e *toolExecutor) validatePath(path string) (string, string) {
+	fullPath := filepath.Join(e.repoPath, path)
+	absPath, err := filepath.Abs(fullPath)
+	if err != nil {
+		return "", fmt.Sprintf("Error resolving path: %v", err)
+	}
+
+	absRepo, err := filepath.Abs(e.repoPath)
+	if err != nil {
+		return "", fmt.Sprintf("Error resolving repo path: %v", err)
+	}
+
+	if absPath != absRepo && !strings.HasPrefix(absPath, absRepo+string(os.PathSeparator)) {
+		return "", "Error: path is outside the repository"
+	}
+
+	return absPath, ""
+}
+
+func (e *toolExecutor) readFile(args map[string]interface{}) string {
+	path, ok := args["path"].(string)
+	if !ok {
+		return "Error: path is required"
+	}
+
+	fullPath, errMsg := e.validatePath(path)
+	if errMsg != "" {
+		return errMsg
+	}
+
+	debugLog("AgenticClient: read_file %s", fullPath)
+
+	info, err := os.Stat(fullPath)
+	if err != nil {
+		return fmt.Sprintf("Error reading file: %v", err)
+	}
+	if info.Size() > maxFileSize {
+		return fmt.Sprintf("Error: file is too large (%d bytes, limit is %d bytes). Try reading a smaller file or use grep to find specific content.", info.Size(), maxFileSize)
+	}
+
+	content, err := os.ReadFile(fullPath)
+	if err != nil {
+		return fmt.Sprintf("Error reading file: %v", err)
+	}
+
+	return string(content)
+}
+
+func (e *toolExecutor) listDirectory(args map[string]interface{}) string {
+	path, ok := args["path"].(string)
+	if !ok {
+		path = "."
+	}
+
+	fullPath, errMsg := e.validatePath(path)
+	if errMsg != "" {
+		return errMsg
+	}
+
+	debugLog("AgenticClient: list_directory %s", fullPath)
+
+	entries, err := os.ReadDir(fullPath)
+	if err != nil {
+		return fmt.Sprintf("Error listing directory: %v", err)
+	}
+
+	var result strings.Builder
+	for _, entry := range entries {
+		if entry.IsDir() {
+			result.WriteString(entry.Name() + "/\n")
+		} else {
+			result.WriteString(entry.Name() + "\n")
+		}
+	}
+
+	return result.String()
+}
+
+func (e *toolExecutor) grep(args map[string]interface{}) string {
+	pattern, ok := args["pattern"].(string)
+	if !ok || pattern == "" {
+		return "Error: pattern is required"
+	}
+
+	path := "."
+	if p, ok := args["path"].(string); ok && p != "" {
+		path = p
+	}
+
+	fullPath, errMsg := e.validatePath(path)
+	if errMsg != "" {
+		return errMsg
+	}
+
+	debugLog("AgenticClient: grep '%s' in %s", pattern, fullPath)
+
+	// Use -- to prevent pattern from being interpreted as flags
+	cmd := exec.Command("grep", "-rn", "--", pattern, fullPath)
+	output, err := cmd.CombinedOutput()
+	if err != nil {
+		// grep returns exit code 1 if no matches found
+		if len(output) == 0 {
+			return "No matches found"
+		}
+	}
+
+	return string(output)
+}
+
+func (e *toolExecutor) git(args map[string]interface{}) string {
+	argsStr, ok := args["args"].(string)
+	if !ok || argsStr == "" {
+		return "Error: git args are required"
+	}
+
+	parts := strings.Fields(argsStr)
+	if len(parts) == 0 {
+		return "Error: empty git command"
+	}
+
+	// Strip leading "git" if the LLM included it (e.g. "git diff" instead of "diff")
+	if parts[0] == "git" {
+		parts = parts[1:]
+		if len(parts) == 0 {
+			return "Error: empty git command"
+		}
+	}
+
+	subcommand := parts[0]
+
+	if !allowedGitSubcommands[subcommand] {
+		return fmt.Sprintf("Error: git subcommand '%s' is not allowed. Allowed commands: log, show, diff, status, ls-files, blame, rev-parse, cat-file, checkout, fetch, pull, branch, tag", subcommand)
+	}
+
+	// Check for flags that could execute arbitrary commands
+	for _, arg := range parts[1:] {
+		for _, blocked := range blockedGitFlags {
+			if arg == blocked || strings.HasPrefix(arg, blocked+"=") {
+				return fmt.Sprintf("Error: git flag '%s' is not allowed for security reasons", arg)
+			}
+		}
+	}
+
+	debugLog("AgenticClient: git %s", argsStr)
+
+	cmd := exec.Command("git", append([]string{"--no-pager"}, parts...)...)
+	cmd.Dir = e.repoPath
+	output, err := cmd.CombinedOutput()
+	if err != nil {
+		return fmt.Sprintf("Error executing git: %v\nOutput: %s", err, string(output))
+	}
+
+	return string(output)
+}
diff --git a/pkg/llmclient/agentic_tools_test.go b/pkg/llmclient/agentic_tools_test.go
new file mode 100644
index 00000000..d5ad9499
--- /dev/null
+++ b/pkg/llmclient/agentic_tools_test.go
@@ -0,0 +1,217 @@
+package llmclient
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func setupTestRepo(t *testing.T) string {
+	t.Helper()
+	dir := t.TempDir()
+
+	// Create a nested directory structure
+	require.NoError(t, os.MkdirAll(filepath.Join(dir, "src", "nested"), 0755))
+	require.NoError(t, os.WriteFile(filepath.Join(dir, "main.go"), []byte("package main\n"), 0644))
+	require.NoError(t, os.WriteFile(filepath.Join(dir, "src", "lib.go"), []byte("package src\nfunc Hello() {}\n"), 0644))
+	require.NoError(t, os.WriteFile(filepath.Join(dir, "src", "nested", "deep.go"), []byte("package nested\n"), 0644))
+
+	return dir
+}
+
+func TestValidatePath_WithinRepo(t *testing.T) {
+	dir := setupTestRepo(t)
+	executor := newToolExecutor(dir)
+
+	tests := []struct {
+		name    string
+		path    string
+		wantErr bool
+	}{
+		{name: "root dot", path: ".", wantErr: false},
+		{name: "file at root", path: "main.go", wantErr: false},
+		{name: "nested file", path: "src/lib.go", wantErr: false},
+		{name: "deeply nested", path: "src/nested/deep.go", wantErr: false},
+		{name: "directory", path: "src", wantErr: false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			absPath, errMsg := executor.validatePath(tt.path)
+			require.Empty(t, errMsg)
+			require.NotEmpty(t, absPath)
+		})
+	}
+}
+
+func TestValidatePath_TraversalBlocked(t *testing.T) {
+	dir := setupTestRepo(t)
+	executor := newToolExecutor(dir)
+
+	tests := []struct {
+		name string
+		path string
+	}{
+		{name: "parent directory", path: ".."},
+		{name: "parent with path", path: "../etc/passwd"},
+		{name: "double parent", path: "../../etc/passwd"},
+		{name: "dot dot in middle", path: "src/../../etc/passwd"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			absPath, errMsg := executor.validatePath(tt.path)
+			require.Empty(t, absPath)
+			require.Contains(t, errMsg, "outside the repository")
+		})
+	}
+}
+
+func TestReadFile_PathTraversal(t *testing.T) {
+	dir := setupTestRepo(t)
+	executor := newToolExecutor(dir)
+
+	result := executor.readFile(map[string]interface{}{
+		"path": "../../etc/passwd",
+	})
+	require.Contains(t, result, "outside the repository")
+}
+
+func TestReadFile_MaxFileSize(t *testing.T) {
+	dir := t.TempDir()
+	executor := newToolExecutor(dir)
+
+	// Create a file that exceeds the max size
+	bigContent := strings.Repeat("x", maxFileSize+1)
+	require.NoError(t, os.WriteFile(filepath.Join(dir, "big.txt"), []byte(bigContent), 0644))
+
+	result := executor.readFile(map[string]interface{}{
+		"path": "big.txt",
+	})
+	require.Contains(t, result, "file is too large")
+
+	// A file under the limit should work fine
+	smallContent := "hello world"
+	require.NoError(t, os.WriteFile(filepath.Join(dir, "small.txt"), []byte(smallContent), 0644))
+
+	result = executor.readFile(map[string]interface{}{
+		"path": "small.txt",
+	})
+	require.Equal(t, smallContent, result)
+}
+
+func TestListDirectory_PathTraversal(t *testing.T) {
+	dir := setupTestRepo(t)
+	executor := newToolExecutor(dir)
+
+	result := executor.listDirectory(map[string]interface{}{
+		"path": "../",
+	})
+	require.Contains(t, result, "outside the repository")
+}
+
+func TestGrep_PathTraversal(t *testing.T) {
+	dir := setupTestRepo(t)
+	executor := newToolExecutor(dir)
+
+	result := executor.grep(map[string]interface{}{
+		"pattern": "root",
+		"path":    "../../etc",
+	})
+	require.Contains(t, result, "outside the repository")
+}
+
+func TestGrep_FlagInjection(t *testing.T) {
+	dir := setupTestRepo(t)
+	executor := newToolExecutor(dir)
+
+	// Pattern starting with - should not be interpreted as a flag
+	// thanks to the -- separator
+	result := executor.grep(map[string]interface{}{
+		"pattern": "-rn",
+		"path":    ".",
+	})
+	// Should not error with "invalid option", should just return no matches
+	require.NotContains(t, result, "invalid option")
+}
+
+func TestGit_BlockedFlags(t *testing.T) {
+	dir := setupTestRepo(t)
+	executor := newToolExecutor(dir)
+
+	tests := []struct {
+		name string
+		args string
+	}{
+		{name: "exec flag", args: "log --exec=malicious"},
+		{name: "ext-diff flag", args: "diff --ext-diff"},
+		{name: "upload-pack", args: "fetch --upload-pack=evil"},
+		{name: "config flag", args: "log -c core.pager=evil"},
+		{name: "config equals", args: "log --config=evil"},
+		{name: "git prefix exec flag", args: "git log --exec=malicious"},
+		{name: "git prefix ext-diff", args: "git diff --ext-diff"},
+		{name: "git prefix upload-pack", args: "git fetch --upload-pack=evil"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := executor.git(map[string]interface{}{
+				"args": tt.args,
+			})
+			require.Contains(t, result, "not allowed for security reasons")
+		})
+	}
+}
+
+func TestGit_StripGitPrefix(t *testing.T) {
+	dir := t.TempDir()
+	executor := newToolExecutor(dir)
+
+	// "git status" should be treated the same as "status"
+	result := executor.git(map[string]interface{}{
+		"args": "git status",
+	})
+	require.NotContains(t, result, "is not allowed")
+
+	// "git push" should still be blocked
+	result = executor.git(map[string]interface{}{
+		"args": "git push",
+	})
+	require.Contains(t, result, "is not allowed")
+
+	// bare "git" with nothing after should error
+	result = executor.git(map[string]interface{}{
+		"args": "git",
+	})
+	require.Contains(t, result, "empty git command")
+}
+
+func TestGit_BlockedSubcommand(t *testing.T) {
+	dir := setupTestRepo(t)
+	executor := newToolExecutor(dir)
+
+	result := executor.git(map[string]interface{}{
+		"args": "push origin main",
+	})
+	require.Contains(t, result, "not allowed")
+}
+
+func TestGit_AllowedSubcommands(t *testing.T) {
+	for subcmd := range allowedGitSubcommands {
+		t.Run(subcmd, func(t *testing.T) {
+			// Just verify the subcommand is not rejected by our allowlist check.
+			// The git command itself may fail (not a real repo) but the error
+			// should come from git, not from our validation.
+			dir := t.TempDir()
+			executor := newToolExecutor(dir)
+
+			result := executor.git(map[string]interface{}{
+				"args": subcmd,
+			})
+			require.NotContains(t, result, "is not allowed")
+		})
+	}
+}
diff --git a/pkg/llmclient/testdata/fs_access/config.go b/pkg/llmclient/testdata/fs_access/config.go
new file mode 100644
index 00000000..5389b82a
--- /dev/null
+++ b/pkg/llmclient/testdata/fs_access/config.go
@@ -0,0 +1,42 @@
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+)
+
+type Config struct {
+	DatabaseURL string `json:"database_url"`
+	Port        int    `json:"port"`
+	LogLevel    string `json:"log_level"`
+	PluginDir   string `json:"plugin_dir"`
+}
+
+func LoadConfig(path string) (*Config, error) {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return nil, fmt.Errorf("reading config %s: %w", path, err)
+	}
+
+	var cfg Config
+	if err := json.Unmarshal(data, &cfg); err != nil {
+		return nil, fmt.Errorf("parsing config: %w", err)
+	}
+
+	return &cfg, nil
+}
+
+func (c *Config) PluginPath(name string) string {
+	return filepath.Join(c.PluginDir, name)
+}
+
+func DefaultConfig() *Config {
+	return &Config{
+		DatabaseURL: "postgres://localhost:5432/plugins",
+		Port:        8080,
+		LogLevel:    "info",
+		PluginDir:   "/var/lib/plugins",
+	}
+}
diff --git a/pkg/llmclient/testdata/fs_access/logger.go b/pkg/llmclient/testdata/fs_access/logger.go
new file mode 100644
index 00000000..1f14c85d
--- /dev/null
+++ b/pkg/llmclient/testdata/fs_access/logger.go
@@ -0,0 +1,40 @@
+package main
+
+import (
+	"fmt"
+	"os"
+	"time"
+)
+
+type Logger struct {
+	file   *os.File
+	prefix string
+}
+
+func NewLogger(path, prefix string) (*Logger, error) {
+	f, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
+	if err != nil {
+		return nil, fmt.Errorf("opening log file: %w", err)
+	}
+	return &Logger{file: f, prefix: prefix}, nil
+}
+
+func (l *Logger) Info(msg string) {
+	l.write("INFO", msg)
+}
+
+func (l *Logger) Error(msg string) {
+	l.write("ERROR", msg)
+}
+
+func (l *Logger) write(level, msg string) {
+	ts := time.Now().Format(time.RFC3339)
+	fmt.Fprintf(l.file, "%s [%s] %s: %s\n", ts, level, l.prefix, msg)
+}
+
+func (l *Logger) Close() error {
+	if l.file != nil {
+		return l.file.Close()
+	}
+	return nil
+}
diff --git a/pkg/llmclient/testdata/fs_access/main.go b/pkg/llmclient/testdata/fs_access/main.go
new file mode 100644
index 00000000..39c7f249
--- /dev/null
+++ b/pkg/llmclient/testdata/fs_access/main.go
@@ -0,0 +1,15 @@
+package main
+
+import (
+	"fmt"
+	"os"
+)
+
+func main() {
+	data, err := os.ReadFile("config.txt")
+	if err != nil {
+		fmt.Println("Error reading file:", err)
+		return
+	}
+	fmt.Println(string(data))
+}
diff --git a/pkg/llmclient/testdata/fs_access/scanner.go b/pkg/llmclient/testdata/fs_access/scanner.go
new file mode 100644
index 00000000..2a9403cb
--- /dev/null
+++ b/pkg/llmclient/testdata/fs_access/scanner.go
@@ -0,0 +1,51 @@
+package main
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+)
+
+type PluginScanner struct {
+	baseDir    string
+	extensions []string
+}
+
+func NewPluginScanner(baseDir string) *PluginScanner {
+	return &PluginScanner{
+		baseDir:    baseDir,
+		extensions: []string{".so", ".dll", ".dylib"},
+	}
+}
+
+func (s *PluginScanner) Scan() ([]string, error) {
+	var plugins []string
+
+	entries, err := os.ReadDir(s.baseDir)
+	if err != nil {
+		return nil, fmt.Errorf("scanning plugin directory %s: %w", s.baseDir, err)
+	}
+
+	for _, entry := range entries {
+		if entry.IsDir() {
+			continue
+		}
+		for _, ext := range s.extensions {
+			if strings.HasSuffix(entry.Name(), ext) {
+				plugins = append(plugins, filepath.Join(s.baseDir, entry.Name()))
+			}
+		}
+	}
+
+	return plugins, nil
+}
+
+func (s *PluginScanner) Exists(name string) bool {
+	path := filepath.Join(s.baseDir, name)
+	info, err := os.Stat(path)
+	if err != nil {
+		return false
+	}
+	return !info.IsDir()
+}
diff --git a/pkg/llmclient/testdata/fs_access/storage/cache.go b/pkg/llmclient/testdata/fs_access/storage/cache.go
new file mode 100644
index 00000000..72c1bcf8
--- /dev/null
+++ b/pkg/llmclient/testdata/fs_access/storage/cache.go
@@ -0,0 +1,49 @@
+package storage
+
+import (
+	"sync"
+	"time"
+)
+
+type CacheEntry struct {
+	Value     interface{}
+	ExpiresAt time.Time
+}
+
+type MemoryCache struct {
+	mu      sync.RWMutex
+	entries map[string]CacheEntry
+}
+
+func NewMemoryCache() *MemoryCache {
+	return &MemoryCache{
+		entries: make(map[string]CacheEntry),
+	}
+}
+
+func (c *MemoryCache) Get(key string) (interface{}, bool) {
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+
+	entry, ok := c.entries[key]
+	if !ok || time.Now().After(entry.ExpiresAt) {
+		return nil, false
+	}
+	return entry.Value, true
+}
+
+func (c *MemoryCache) Set(key string, value interface{}, ttl time.Duration) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	c.entries[key] = CacheEntry{
+		Value:     value,
+		ExpiresAt: time.Now().Add(ttl),
+	}
+}
+
+func (c *MemoryCache) Evict(key string) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	delete(c.entries, key)
+}
diff --git a/pkg/llmclient/testdata/fs_access/storage/disk.go b/pkg/llmclient/testdata/fs_access/storage/disk.go
new file mode 100644
index 00000000..5cd2748f
--- /dev/null
+++ b/pkg/llmclient/testdata/fs_access/storage/disk.go
@@ -0,0 +1,66 @@
+package storage
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+)
+
+type DiskStore struct {
+	baseDir string
+}
+
+func NewDiskStore(baseDir string) (*DiskStore, error) {
+	if err := os.MkdirAll(baseDir, 0755); err != nil {
+		return nil, fmt.Errorf("creating storage directory: %w", err)
+	}
+	return &DiskStore{baseDir: baseDir}, nil
+}
+
+func (d *DiskStore) Save(key string, value interface{}) error {
+	data, err := json.MarshalIndent(value, "", "  ")
+	if err != nil {
+		return fmt.Errorf("marshaling data: %w", err)
+	}
+
+	path := filepath.Join(d.baseDir, key+".json")
+	if err := os.WriteFile(path, data, 0644); err != nil {
+		return fmt.Errorf("writing file %s: %w", path, err)
+	}
+
+	return nil
+}
+
+func (d *DiskStore) Load(key string, dest interface{}) error {
+	path := filepath.Join(d.baseDir, key+".json")
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return fmt.Errorf("reading file %s: %w", path, err)
+	}
+
+	return json.Unmarshal(data, dest)
+}
+
+func (d *DiskStore) Delete(key string) error {
+	path := filepath.Join(d.baseDir, key+".json")
+	if err := os.Remove(path); err != nil && !os.IsNotExist(err) {
+		return fmt.Errorf("removing file %s: %w", path, err)
+	}
+	return nil
+}
+
+func (d *DiskStore) List() ([]string, error) {
+	entries, err := os.ReadDir(d.baseDir)
+	if err != nil {
+		return nil, fmt.Errorf("listing storage: %w", err)
+	}
+
+	var keys []string
+	for _, e := range entries {
+		if !e.IsDir() && filepath.Ext(e.Name()) == ".json" {
+			keys = append(keys, e.Name()[:len(e.Name())-5])
+		}
+	}
+	return keys, nil
+}
diff --git a/pkg/llmclient/testdata/fs_access/validator.go b/pkg/llmclient/testdata/fs_access/validator.go
new file mode 100644
index 00000000..6b99c23c
--- /dev/null
+++ b/pkg/llmclient/testdata/fs_access/validator.go
@@ -0,0 +1,66 @@
+package main
+
+import (
+	"fmt"
+	"strings"
+)
+
+type ValidationResult struct {
+	Valid    bool     `json:"valid"`
+	Errors  []string `json:"errors"`
+	Warnings []string `json:"warnings"`
+}
+
+type Validator struct {
+	rules []ValidationRule
+}
+
+type ValidationRule struct {
+	Name    string
+	Check   func(string) error
+}
+
+func NewValidator() *Validator {
+	return &Validator{
+		rules: []ValidationRule{
+			{Name: "not_empty", Check: checkNotEmpty},
+			{Name: "valid_version", Check: checkVersion},
+			{Name: "no_spaces", Check: checkNoSpaces},
+		},
+	}
+}
+
+func (v *Validator) Validate(pluginName string) ValidationResult {
+	result := ValidationResult{Valid: true}
+
+	for _, rule := range v.rules {
+		if err := rule.Check(pluginName); err != nil {
+			result.Valid = false
+			result.Errors = append(result.Errors, fmt.Sprintf("%s: %v", rule.Name, err))
+		}
+	}
+
+	return result
+}
+
+func checkNotEmpty(name string) error {
+	if strings.TrimSpace(name) == "" {
+		return fmt.Errorf("plugin name cannot be empty")
+	}
+	return nil
+}
+
+func checkVersion(name string) error {
+	parts := strings.Split(name, "-")
+	if len(parts) < 2 {
+		return fmt.Errorf("plugin name must include version suffix (e.g., plugin-1.0.0)")
+	}
+	return nil
+}
+
+func checkNoSpaces(name string) error {
+	if strings.Contains(name, " ") {
+		return fmt.Errorf("plugin name cannot contain spaces")
+	}
+	return nil
+}
diff --git a/pkg/llmclient/testdata/no_fs_access/handlers/data.go b/pkg/llmclient/testdata/no_fs_access/handlers/data.go
new file mode 100644
index 00000000..3468bd1d
--- /dev/null
+++ b/pkg/llmclient/testdata/no_fs_access/handlers/data.go
@@ -0,0 +1,49 @@
+package handlers
+
+import (
+	"encoding/json"
+	"net/http"
+	"strings"
+)
+
+type DataRequest struct {
+	Query  string   `json:"query"`
+	Fields []string `json:"fields"`
+	Limit  int      `json:"limit"`
+}
+
+type DataResponse struct {
+	Results []map[string]interface{} `json:"results"`
+	Total   int                      `json:"total"`
+}
+
+func DataHandler(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodPost {
+		http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+
+	var req DataRequest
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		http.Error(w, "invalid request body", http.StatusBadRequest)
+		return
+	}
+
+	if req.Limit <= 0 || req.Limit > 100 {
+		req.Limit = 10
+	}
+
+	query := strings.TrimSpace(req.Query)
+	if query == "" {
+		http.Error(w, "query is required", http.StatusBadRequest)
+		return
+	}
+
+	resp := DataResponse{
+		Results: []map[string]interface{}{},
+		Total:   0,
+	}
+
+	w.Header().Set("Content-Type", "application/json")
+	json.NewEncoder(w).Encode(resp)
+}
diff --git a/pkg/llmclient/testdata/no_fs_access/handlers/health.go b/pkg/llmclient/testdata/no_fs_access/handlers/health.go
new file mode 100644
index 00000000..7e83b940
--- /dev/null
+++ b/pkg/llmclient/testdata/no_fs_access/handlers/health.go
@@ -0,0 +1,25 @@
+package handlers
+
+import (
+	"encoding/json"
+	"net/http"
+	"runtime"
+)
+
+type HealthResponse struct {
+	Status  string `json:"status"`
+	Version string `json:"version"`
+	GoVer   string `json:"go_version"`
+}
+
+func HealthHandler(version string) http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		resp := HealthResponse{
+			Status:  "ok",
+			Version: version,
+			GoVer:   runtime.Version(),
+		}
+		w.Header().Set("Content-Type", "application/json")
+		json.NewEncoder(w).Encode(resp)
+	}
+}
diff --git a/pkg/llmclient/testdata/no_fs_access/main.go b/pkg/llmclient/testdata/no_fs_access/main.go
new file mode 100644
index 00000000..f7b60bde
--- /dev/null
+++ b/pkg/llmclient/testdata/no_fs_access/main.go
@@ -0,0 +1,7 @@
+package main
+
+import "fmt"
+
+func main() {
+	fmt.Println("Hello, world!")
+}
diff --git a/pkg/llmclient/testdata/no_fs_access/models.go b/pkg/llmclient/testdata/no_fs_access/models.go
new file mode 100644
index 00000000..6ec92f79
--- /dev/null
+++ b/pkg/llmclient/testdata/no_fs_access/models.go
@@ -0,0 +1,55 @@
+package main
+
+import (
+	"fmt"
+	"time"
+)
+
+type User struct {
+	ID        string    `json:"id"`
+	Name      string    `json:"name"`
+	Email     string    `json:"email"`
+	CreatedAt time.Time `json:"created_at"`
+	Tags      []string  `json:"tags"`
+}
+
+type Plugin struct {
+	ID          string            `json:"id"`
+	Name        string            `json:"name"`
+	Version     string            `json:"version"`
+	Description string            `json:"description"`
+	Author      User              `json:"author"`
+	Metadata    map[string]string `json:"metadata"`
+}
+
+func (u User) String() string {
+	return fmt.Sprintf("User{id=%s, name=%s, email=%s}", u.ID, u.Name, u.Email)
+}
+
+func (p Plugin) String() string {
+	return fmt.Sprintf("Plugin{id=%s, name=%s, version=%s}", p.ID, p.Name, p.Version)
+}
+
+func (p Plugin) HasMetadata(key string) bool {
+	_, ok := p.Metadata[key]
+	return ok
+}
+
+func NewUser(name, email string) User {
+	return User{
+		ID:        generateID(),
+		Name:      name,
+		Email:     email,
+		CreatedAt: time.Now(),
+	}
+}
+
+func NewPlugin(name, version string, author User) Plugin {
+	return Plugin{
+		ID:       generateID(),
+		Name:     name,
+		Version:  version,
+		Author:   author,
+		Metadata: make(map[string]string),
+	}
+}
diff --git a/pkg/llmclient/testdata/no_fs_access/server.go b/pkg/llmclient/testdata/no_fs_access/server.go
new file mode 100644
index 00000000..513ac051
--- /dev/null
+++ b/pkg/llmclient/testdata/no_fs_access/server.go
@@ -0,0 +1,69 @@
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"sync"
+	"time"
+)
+
+type Server struct {
+	mu       sync.RWMutex
+	cache    map[string]CacheEntry
+	handlers map[string]http.HandlerFunc
+}
+
+type CacheEntry struct {
+	Value     interface{}
+	ExpiresAt time.Time
+}
+
+func NewServer() *Server {
+	return &Server{
+		cache:    make(map[string]CacheEntry),
+		handlers: make(map[string]http.HandlerFunc),
+	}
+}
+
+func (s *Server) Get(key string) (interface{}, bool) {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+
+	entry, ok := s.cache[key]
+	if !ok || time.Now().After(entry.ExpiresAt) {
+		return nil, false
+	}
+	return entry.Value, true
+}
+
+func (s *Server) Set(key string, value interface{}, ttl time.Duration) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	s.cache[key] = CacheEntry{
+		Value:     value,
+		ExpiresAt: time.Now().Add(ttl),
+	}
+}
+
+func (s *Server) HandleHealth(w http.ResponseWriter, r *http.Request) {
+	w.Header().Set("Content-Type", "application/json")
+	json.NewEncoder(w).Encode(map[string]string{"status": "ok"})
+}
+
+func (s *Server) HandleData(w http.ResponseWriter, r *http.Request) {
+	key := r.URL.Query().Get("key")
+	if key == "" {
+		http.Error(w, "missing key parameter", http.StatusBadRequest)
+		return
+	}
+
+	if val, ok := s.Get(key); ok {
+		w.Header().Set("Content-Type", "application/json")
+		json.NewEncoder(w).Encode(val)
+		return
+	}
+
+	http.Error(w, fmt.Sprintf("key %q not found", key), http.StatusNotFound)
+}
diff --git a/pkg/llmclient/testdata/no_fs_access/utils.go b/pkg/llmclient/testdata/no_fs_access/utils.go
new file mode 100644
index 00000000..530205d5
--- /dev/null
+++ b/pkg/llmclient/testdata/no_fs_access/utils.go
@@ -0,0 +1,50 @@
+package main
+
+import (
+	"crypto/sha256"
+	"encoding/hex"
+	"fmt"
+	"math/rand"
+	"strings"
+	"time"
+)
+
+func hash(input string) string {
+	h := sha256.New()
+	h.Write([]byte(input))
+	return hex.EncodeToString(h.Sum(nil))
+}
+
+func generateID() string {
+	src := rand.NewSource(time.Now().UnixNano())
+	r := rand.New(src)
+	return fmt.Sprintf("%08x", r.Uint32())
+}
+
+func truncate(s string, maxLen int) string {
+	if len(s) <= maxLen {
+		return s
+	}
+	return s[:maxLen] + "..."
+}
+
+func contains(slice []string, item string) bool {
+	for _, s := range slice {
+		if s == item {
+			return true
+		}
+	}
+	return false
+}
+
+func joinWithComma(items []string) string {
+	return strings.Join(items, ", ")
+}
+
+func repeatString(s string, count int) string {
+	var b strings.Builder
+	for i := 0; i < count; i++ {
+		b.WriteString(s)
+	}
+	return b.String()
+}

From f8c55003dca92c7d2cf4e15c0450d3917bf5bfa7 Mon Sep 17 00:00:00 2001
From: Esteban Beltran <sergies@gmail.com>
Date: Thu, 19 Feb 2026 14:15:32 +0100
Subject: [PATCH 2/7] pass options to constructor

---
 pkg/llmclient/agentic_client.go               | 39 ++++++++------
 .../agentic_client_integration_test.go        | 52 ++++++++++---------
 2 files changed, 52 insertions(+), 39 deletions(-)

diff --git a/pkg/llmclient/agentic_client.go b/pkg/llmclient/agentic_client.go
index a9ca0963..241a25bf 100644
--- a/pkg/llmclient/agentic_client.go
+++ b/pkg/llmclient/agentic_client.go
@@ -36,38 +36,47 @@ type AgenticCallOptions struct {
 
 // AgenticClient is an interface for agentic LLM interactions
 type AgenticClient interface {
-	CallLLM(ctx context.Context, prompt, repositoryPath string, opts *AgenticCallOptions) ([]AnswerSchema, error)
+	CallLLM(ctx context.Context, prompt, repositoryPath string) ([]AnswerSchema, error)
 }
 
 // agenticClientImpl implements AgenticClient
-type agenticClientImpl struct{}
-
-// NewAgenticClient creates a new AgenticClient
-func NewAgenticClient() AgenticClient {
-	return &agenticClientImpl{}
+type agenticClientImpl struct {
+	apiKey   string
+	model    string
+	provider string
 }
 
-// CallLLM executes an agentic loop with tools to answer questions about code.
-// The prompt may contain multiple questions, in which case the agent will call
-// submit_answer multiple times. All answers are collected and returned.
-func (c *agenticClientImpl) CallLLM(ctx context.Context, prompt, repositoryPath string, opts *AgenticCallOptions) ([]AnswerSchema, error) {
+// NewAgenticClient creates a new AgenticClient with the given options
+func NewAgenticClient(opts *AgenticCallOptions) (AgenticClient, error) {
 	if opts == nil {
 		return nil, fmt.Errorf("options are required")
 	}
-
 	if opts.APIKey == "" {
 		return nil, fmt.Errorf("API key is required")
 	}
-
 	if opts.Model == "" {
 		return nil, fmt.Errorf("model is required")
 	}
-
 	if opts.Provider == "" {
 		return nil, fmt.Errorf("provider is required")
 	}
+	return &agenticClientImpl{
+		apiKey:   opts.APIKey,
+		model:    opts.Model,
+		provider: opts.Provider,
+	}, nil
+}
 
-	// Initialize LLM based on provider
+// CallLLM executes an agentic loop with tools to answer questions about code.
+// The prompt may contain multiple questions, in which case the agent will call
+// submit_answer multiple times. All answers are collected and returned.
+func (c *agenticClientImpl) CallLLM(ctx context.Context, prompt, repositoryPath string) ([]AnswerSchema, error) {
+	// Initialize LLM based on provider using the client's configured settings
+	opts := &AgenticCallOptions{
+		APIKey:   c.apiKey,
+		Model:    c.model,
+		Provider: c.provider,
+	}
 	llm, err := initLLM(ctx, opts)
 	if err != nil {
 		return nil, fmt.Errorf("failed to initialize LLM: %w", err)
@@ -114,7 +123,7 @@ When ready, use submit_answer. For multiple questions, call submit_answer once p
 	printDebugLogPath()
 	debugLog("\n\n\n")
 	debugLog("################################################################")
-	debugLog("# NEW CallLLM - provider=%s model=%s", opts.Provider, opts.Model)
+	debugLog("# NEW CallLLM - provider=%s model=%s", c.provider, c.model)
 	debugLog("# repo=%s", repositoryPath)
 	debugLog("# prompt=%s", truncateString(prompt, 200))
 	debugLog("################################################################")
diff --git a/pkg/llmclient/agentic_client_integration_test.go b/pkg/llmclient/agentic_client_integration_test.go
index 3c8035ac..549aa77a 100644
--- a/pkg/llmclient/agentic_client_integration_test.go
+++ b/pkg/llmclient/agentic_client_integration_test.go
@@ -26,20 +26,21 @@ func TestAgenticClient_NoFilesystemAccess(t *testing.T) {
 		t.Skip("GEMINI_API_KEY not set, skipping agentic client integration test")
 	}
 
-	client := NewAgenticClient()
-
-	testDataPath, err := filepath.Abs(filepath.Join("testdata", "no_fs_access"))
-	require.NoError(t, err)
-
 	opts := &AgenticCallOptions{
 		Provider: "google",
 		Model:    "gemini-2.0-flash",
 		APIKey:   os.Getenv("GEMINI_API_KEY"),
 	}
 
+	client, err := NewAgenticClient(opts)
+	require.NoError(t, err)
+
+	testDataPath, err := filepath.Abs(filepath.Join("testdata", "no_fs_access"))
+	require.NoError(t, err)
+
 	prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations."
 
-	answers, err := client.CallLLM(context.Background(), prompt, testDataPath, opts)
+	answers, err := client.CallLLM(context.Background(), prompt, testDataPath)
 	logme.DebugFln("Agent answers:")
 	prettyprint.Print(answers)
 
@@ -64,20 +65,21 @@ func TestAgenticClient_FilesystemAccess(t *testing.T) {
 		t.Skip("GEMINI_API_KEY not set, skipping agentic client integration test")
 	}
 
-	client := NewAgenticClient()
-
-	testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access"))
-	require.NoError(t, err)
-
 	opts := &AgenticCallOptions{
 		Provider: "google",
 		Model:    "gemini-2.0-flash",
 		APIKey:   os.Getenv("GEMINI_API_KEY"),
 	}
 
+	client, err := NewAgenticClient(opts)
+	require.NoError(t, err)
+
+	testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access"))
+	require.NoError(t, err)
+
 	prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations."
 
-	answers, err := client.CallLLM(context.Background(), prompt, testDataPath, opts)
+	answers, err := client.CallLLM(context.Background(), prompt, testDataPath)
 	logme.DebugFln("Agent answers:")
 	prettyprint.Print(answers)
 
@@ -101,20 +103,21 @@ func TestAgenticClient_NoFilesystemAccess_Anthropic(t *testing.T) {
 		t.Skip("ANTHROPIC_API_KEY not set, skipping Anthropic agentic client integration test")
 	}
 
-	client := NewAgenticClient()
-
-	testDataPath, err := filepath.Abs(filepath.Join("testdata", "no_fs_access"))
-	require.NoError(t, err)
-
 	opts := &AgenticCallOptions{
 		Provider: "anthropic",
 		Model:    "claude-sonnet-4-5",
 		APIKey:   os.Getenv("ANTHROPIC_API_KEY"),
 	}
 
+	client, err := NewAgenticClient(opts)
+	require.NoError(t, err)
+
+	testDataPath, err := filepath.Abs(filepath.Join("testdata", "no_fs_access"))
+	require.NoError(t, err)
+
 	prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations."
 
-	answers, err := client.CallLLM(context.Background(), prompt, testDataPath, opts)
+	answers, err := client.CallLLM(context.Background(), prompt, testDataPath)
 	logme.DebugFln("Agent answers:")
 	prettyprint.Print(answers)
 
@@ -138,20 +141,21 @@ func TestAgenticClient_FilesystemAccess_Anthropic(t *testing.T) {
 		t.Skip("ANTHROPIC_API_KEY not set, skipping Anthropic agentic client integration test")
 	}
 
-	client := NewAgenticClient()
-
-	testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access"))
-	require.NoError(t, err)
-
 	opts := &AgenticCallOptions{
 		Provider: "anthropic",
 		Model:    "claude-sonnet-4-5",
 		APIKey:   os.Getenv("ANTHROPIC_API_KEY"),
 	}
 
+	client, err := NewAgenticClient(opts)
+	require.NoError(t, err)
+
+	testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access"))
+	require.NoError(t, err)
+
 	prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations."
 
-	answers, err := client.CallLLM(context.Background(), prompt, testDataPath, opts)
+	answers, err := client.CallLLM(context.Background(), prompt, testDataPath)
 	logme.DebugFln("Agent answers:")
 	prettyprint.Print(answers)
 

From e055013ac072d17d5548374769a9f96fdbdc51e3 Mon Sep 17 00:00:00 2001
From: Esteban Beltran <sergies@gmail.com>
Date: Thu, 19 Feb 2026 14:19:15 +0100
Subject: [PATCH 3/7] protect symlinks

---
 pkg/llmclient/agentic_tools.go      | 16 ++++++++++++++++
 pkg/llmclient/agentic_tools_test.go | 18 ++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/pkg/llmclient/agentic_tools.go b/pkg/llmclient/agentic_tools.go
index 84bc9c33..d412b2e7 100644
--- a/pkg/llmclient/agentic_tools.go
+++ b/pkg/llmclient/agentic_tools.go
@@ -195,6 +195,22 @@ func (e *toolExecutor) validatePath(path string) (string, string) {
 		return "", fmt.Sprintf("Error resolving repo path: %v", err)
 	}
 
+	// Check before resolving symlinks to catch plain traversal (e.g. "../..").
+	if absPath != absRepo && !strings.HasPrefix(absPath, absRepo+string(os.PathSeparator)) {
+		return "", "Error: path is outside the repository"
+	}
+
+	// Resolve symlinks and check again to prevent a symlink inside the repo
+	// from pointing to a target outside the repo.
+	absPath, err = filepath.EvalSymlinks(absPath)
+	if err != nil {
+		return "", fmt.Sprintf("Error resolving path: %v", err)
+	}
+	absRepo, err = filepath.EvalSymlinks(absRepo)
+	if err != nil {
+		return "", fmt.Sprintf("Error resolving repo path: %v", err)
+	}
+
 	if absPath != absRepo && !strings.HasPrefix(absPath, absRepo+string(os.PathSeparator)) {
 		return "", "Error: path is outside the repository"
 	}
diff --git a/pkg/llmclient/agentic_tools_test.go b/pkg/llmclient/agentic_tools_test.go
index d5ad9499..a4fea3be 100644
--- a/pkg/llmclient/agentic_tools_test.go
+++ b/pkg/llmclient/agentic_tools_test.go
@@ -70,6 +70,24 @@ func TestValidatePath_TraversalBlocked(t *testing.T) {
 	}
 }
 
+func TestValidatePath_SymlinkEscape(t *testing.T) {
+	dir := setupTestRepo(t)
+	executor := newToolExecutor(dir)
+
+	// Create a file outside the repo that the symlink will point to.
+	externalDir := t.TempDir()
+	externalFile := filepath.Join(externalDir, "secret.txt")
+	require.NoError(t, os.WriteFile(externalFile, []byte("secret"), 0644))
+
+	// Place a symlink inside the repo pointing to the external file.
+	symlinkPath := filepath.Join(dir, "escape.txt")
+	require.NoError(t, os.Symlink(externalFile, symlinkPath))
+
+	absPath, errMsg := executor.validatePath("escape.txt")
+	require.Empty(t, absPath)
+	require.Contains(t, errMsg, "outside the repository")
+}
+
 func TestReadFile_PathTraversal(t *testing.T) {
 	dir := setupTestRepo(t)
 	executor := newToolExecutor(dir)

From da909cddc8877b4f3596e8a99018d2271713ff7c Mon Sep 17 00:00:00 2001
From: Esteban Beltran <sergies@gmail.com>
Date: Thu, 19 Feb 2026 14:20:56 +0100
Subject: [PATCH 4/7] use a list from subcommands instead of hardcoded one

---
 pkg/llmclient/agentic_tools.go | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pkg/llmclient/agentic_tools.go b/pkg/llmclient/agentic_tools.go
index d412b2e7..ecc15744 100644
--- a/pkg/llmclient/agentic_tools.go
+++ b/pkg/llmclient/agentic_tools.go
@@ -3,9 +3,11 @@ package llmclient
 import (
 	"encoding/json"
 	"fmt"
+	"maps"
 	"os"
 	"os/exec"
 	"path/filepath"
+	"slices"
 	"strings"
 
 	"github.com/tmc/langchaingo/llms"
@@ -330,7 +332,8 @@ func (e *toolExecutor) git(args map[string]interface{}) string {
 	subcommand := parts[0]
 
 	if !allowedGitSubcommands[subcommand] {
-		return fmt.Sprintf("Error: git subcommand '%s' is not allowed. Allowed commands: log, show, diff, status, ls-files, blame, rev-parse, cat-file, checkout, fetch, pull, branch, tag", subcommand)
+		allowed := strings.Join(slices.Sorted(maps.Keys(allowedGitSubcommands)), ", ")
+		return fmt.Sprintf("Error: git subcommand '%s' is not allowed. Allowed commands: %s", subcommand, allowed)
 	}
 
 	// Check for flags that could execute arbitrary commands

From 8ca35c7081151a22f77db8fd4d7b8054eb804bab Mon Sep 17 00:00:00 2001
From: Esteban Beltran <sergies@gmail.com>
Date: Thu, 19 Feb 2026 14:22:43 +0100
Subject: [PATCH 5/7] validate if it is a text file to return it

---
 pkg/llmclient/agentic_tools.go      |  5 +++++
 pkg/llmclient/agentic_tools_test.go | 27 +++++++++++++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/pkg/llmclient/agentic_tools.go b/pkg/llmclient/agentic_tools.go
index ecc15744..5fcf7990 100644
--- a/pkg/llmclient/agentic_tools.go
+++ b/pkg/llmclient/agentic_tools.go
@@ -9,6 +9,7 @@ import (
 	"path/filepath"
 	"slices"
 	"strings"
+	"unicode/utf8"
 
 	"github.com/tmc/langchaingo/llms"
 )
@@ -246,6 +247,10 @@ func (e *toolExecutor) readFile(args map[string]interface{}) string {
 		return fmt.Sprintf("Error reading file: %v", err)
 	}
 
+	if !utf8.Valid(content) {
+		return "Error: file is not a text file"
+	}
+
 	return string(content)
 }
 
diff --git a/pkg/llmclient/agentic_tools_test.go b/pkg/llmclient/agentic_tools_test.go
index a4fea3be..a435e0ca 100644
--- a/pkg/llmclient/agentic_tools_test.go
+++ b/pkg/llmclient/agentic_tools_test.go
@@ -98,6 +98,33 @@ func TestReadFile_PathTraversal(t *testing.T) {
 	require.Contains(t, result, "outside the repository")
 }
 
+func TestReadFile_BinaryFile(t *testing.T) {
+	dir := t.TempDir()
+	executor := newToolExecutor(dir)
+
+	tests := []struct {
+		name    string
+		content []byte
+	}{
+		{
+			name:    "binary file with invalid utf-8",
+			content: []byte{0xff, 0xfe, 0x00, 0x01, 0x02, 0x03},
+		},
+		{
+			name:    "png image magic bytes",
+			content: []byte{0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			require.NoError(t, os.WriteFile(filepath.Join(dir, "binary.bin"), tt.content, 0644))
+			result := executor.readFile(map[string]interface{}{"path": "binary.bin"})
+			require.Contains(t, result, "not a text file")
+		})
+	}
+}
+
 func TestReadFile_MaxFileSize(t *testing.T) {
 	dir := t.TempDir()
 	executor := newToolExecutor(dir)

From 7afbcd4ec3193ab569550cf100f4f8be098d38e6 Mon Sep 17 00:00:00 2001
From: Esteban Beltran <sergies@gmail.com>
Date: Thu, 19 Feb 2026 14:32:58 +0100
Subject: [PATCH 6/7] return errors in tools

---
 pkg/llmclient/agentic_tools.go      | 109 ++++++++++++++++------------
 pkg/llmclient/agentic_tools_test.go | 105 +++++++++++++++++----------
 2 files changed, 128 insertions(+), 86 deletions(-)

diff --git a/pkg/llmclient/agentic_tools.go b/pkg/llmclient/agentic_tools.go
index 5fcf7990..baf31569 100644
--- a/pkg/llmclient/agentic_tools.go
+++ b/pkg/llmclient/agentic_tools.go
@@ -2,6 +2,7 @@ package llmclient
 
 import (
 	"encoding/json"
+	"errors"
 	"fmt"
 	"maps"
 	"os"
@@ -170,106 +171,114 @@ func (e *toolExecutor) execute(toolName, argsJSON string) string {
 		return fmt.Sprintf("Error parsing arguments: %v", err)
 	}
 
+	var (
+		result string
+		err    error
+	)
 	switch toolName {
 	case "read_file":
-		return e.readFile(args)
+		result, err = e.readFile(args)
 	case "list_directory":
-		return e.listDirectory(args)
+		result, err = e.listDirectory(args)
 	case "grep":
-		return e.grep(args)
+		result, err = e.grep(args)
 	case "git":
-		return e.git(args)
+		result, err = e.git(args)
 	default:
 		return fmt.Sprintf("Unknown tool: %s", toolName)
 	}
+	if err != nil {
+		return fmt.Sprintf("Error: %v", err)
+	}
+	return result
 }
 
 // validatePath checks that the resolved path is within the repository directory.
-// Returns the validated absolute path or an error string.
-func (e *toolExecutor) validatePath(path string) (string, string) {
+// Returns the validated absolute path or an error.
+func (e *toolExecutor) validatePath(path string) (string, error) {
 	fullPath := filepath.Join(e.repoPath, path)
 	absPath, err := filepath.Abs(fullPath)
 	if err != nil {
-		return "", fmt.Sprintf("Error resolving path: %v", err)
+		return "", fmt.Errorf("resolving path: %w", err)
 	}
 
 	absRepo, err := filepath.Abs(e.repoPath)
 	if err != nil {
-		return "", fmt.Sprintf("Error resolving repo path: %v", err)
+		return "", fmt.Errorf("resolving repo path: %w", err)
 	}
 
 	// Check before resolving symlinks to catch plain traversal (e.g. "../..").
 	if absPath != absRepo && !strings.HasPrefix(absPath, absRepo+string(os.PathSeparator)) {
-		return "", "Error: path is outside the repository"
+		return "", errors.New("path is outside the repository")
 	}
 
 	// Resolve symlinks and check again to prevent a symlink inside the repo
 	// from pointing to a target outside the repo.
 	absPath, err = filepath.EvalSymlinks(absPath)
 	if err != nil {
-		return "", fmt.Sprintf("Error resolving path: %v", err)
+		return "", fmt.Errorf("resolving path: %w", err)
 	}
 	absRepo, err = filepath.EvalSymlinks(absRepo)
 	if err != nil {
-		return "", fmt.Sprintf("Error resolving repo path: %v", err)
+		return "", fmt.Errorf("resolving repo path: %w", err)
 	}
 
 	if absPath != absRepo && !strings.HasPrefix(absPath, absRepo+string(os.PathSeparator)) {
-		return "", "Error: path is outside the repository"
+		return "", errors.New("path is outside the repository")
 	}
 
-	return absPath, ""
+	return absPath, nil
 }
 
-func (e *toolExecutor) readFile(args map[string]interface{}) string {
+func (e *toolExecutor) readFile(args map[string]interface{}) (string, error) {
 	path, ok := args["path"].(string)
 	if !ok {
-		return "Error: path is required"
+		return "", errors.New("path is required")
 	}
 
-	fullPath, errMsg := e.validatePath(path)
-	if errMsg != "" {
-		return errMsg
+	fullPath, err := e.validatePath(path)
+	if err != nil {
+		return "", fmt.Errorf("validate path: %w", err)
 	}
 
 	debugLog("AgenticClient: read_file %s", fullPath)
 
 	info, err := os.Stat(fullPath)
 	if err != nil {
-		return fmt.Sprintf("Error reading file: %v", err)
+		return "", fmt.Errorf("reading file: %w", err)
 	}
 	if info.Size() > maxFileSize {
-		return fmt.Sprintf("Error: file is too large (%d bytes, limit is %d bytes). Try reading a smaller file or use grep to find specific content.", info.Size(), maxFileSize)
+		return "", fmt.Errorf("file is too large (%d bytes, limit is %d bytes). Try reading a smaller file or use grep to find specific content", info.Size(), maxFileSize)
 	}
 
 	content, err := os.ReadFile(fullPath)
 	if err != nil {
-		return fmt.Sprintf("Error reading file: %v", err)
+		return "", fmt.Errorf("reading file: %w", err)
 	}
 
 	if !utf8.Valid(content) {
-		return "Error: file is not a text file"
+		return "", errors.New("file is not a text file")
 	}
 
-	return string(content)
+	return string(content), nil
 }
 
-func (e *toolExecutor) listDirectory(args map[string]interface{}) string {
+func (e *toolExecutor) listDirectory(args map[string]interface{}) (string, error) {
 	path, ok := args["path"].(string)
 	if !ok {
 		path = "."
 	}
 
-	fullPath, errMsg := e.validatePath(path)
-	if errMsg != "" {
-		return errMsg
+	fullPath, err := e.validatePath(path)
+	if err != nil {
+		return "", fmt.Errorf("validate path: %w", err)
 	}
 
 	debugLog("AgenticClient: list_directory %s", fullPath)
 
 	entries, err := os.ReadDir(fullPath)
 	if err != nil {
-		return fmt.Sprintf("Error listing directory: %v", err)
+		return "", fmt.Errorf("listing directory: %w", err)
 	}
 
 	var result strings.Builder
@@ -281,13 +290,13 @@ func (e *toolExecutor) listDirectory(args map[string]interface{}) string {
 		}
 	}
 
-	return result.String()
+	return result.String(), nil
 }
 
-func (e *toolExecutor) grep(args map[string]interface{}) string {
+func (e *toolExecutor) grep(args map[string]interface{}) (string, error) {
 	pattern, ok := args["pattern"].(string)
 	if !ok || pattern == "" {
-		return "Error: pattern is required"
+		return "", errors.New("pattern is required")
 	}
 
 	path := "."
@@ -295,9 +304,9 @@ func (e *toolExecutor) grep(args map[string]interface{}) string {
 		path = p
 	}
 
-	fullPath, errMsg := e.validatePath(path)
-	if errMsg != "" {
-		return errMsg
+	fullPath, err := e.validatePath(path)
+	if err != nil {
+		return "", fmt.Errorf("validate path: %w", err)
 	}
 
 	debugLog("AgenticClient: grep '%s' in %s", pattern, fullPath)
@@ -306,31 +315,33 @@ func (e *toolExecutor) grep(args map[string]interface{}) string {
 	cmd := exec.Command("grep", "-rn", "--", pattern, fullPath)
 	output, err := cmd.CombinedOutput()
 	if err != nil {
-		// grep returns exit code 1 if no matches found
-		if len(output) == 0 {
-			return "No matches found"
+		var exitErr *exec.ExitError
+		if errors.As(err, &exitErr) && exitErr.ExitCode() == 1 {
+			// Exit code 1 means no matches found, not a real error.
+			return "No matches found", nil
 		}
+		return "", fmt.Errorf("grep failed: %w\nOutput: %s", err, output)
 	}
 
-	return string(output)
+	return string(output), nil
 }
 
-func (e *toolExecutor) git(args map[string]interface{}) string {
+func (e *toolExecutor) git(args map[string]interface{}) (string, error) {
 	argsStr, ok := args["args"].(string)
 	if !ok || argsStr == "" {
-		return "Error: git args are required"
+		return "", errors.New("git args are required")
 	}
 
 	parts := strings.Fields(argsStr)
 	if len(parts) == 0 {
-		return "Error: empty git command"
+		return "", errors.New("empty git command")
 	}
 
 	// Strip leading "git" if the LLM included it (e.g. "git diff" instead of "diff")
 	if parts[0] == "git" {
 		parts = parts[1:]
 		if len(parts) == 0 {
-			return "Error: empty git command"
+			return "", errors.New("empty git command")
 		}
 	}
 
@@ -338,14 +349,18 @@ func (e *toolExecutor) git(args map[string]interface{}) string {
 
 	if !allowedGitSubcommands[subcommand] {
 		allowed := strings.Join(slices.Sorted(maps.Keys(allowedGitSubcommands)), ", ")
-		return fmt.Sprintf("Error: git subcommand '%s' is not allowed. Allowed commands: %s", subcommand, allowed)
+		return "", fmt.Errorf("git subcommand '%s' is not allowed. Allowed commands: %s", subcommand, allowed)
 	}
 
 	// Check for flags that could execute arbitrary commands
 	for _, arg := range parts[1:] {
 		for _, blocked := range blockedGitFlags {
-			if arg == blocked || strings.HasPrefix(arg, blocked+"=") {
-				return fmt.Sprintf("Error: git flag '%s' is not allowed for security reasons", arg)
+			exactOrLongValue := arg == blocked || strings.HasPrefix(arg, blocked+"=")
+			// Short flags (e.g. -c) also accept a concatenated value with no separator:
+			// git -ccore.pager=evil. Match any arg that starts with the flag token.
+			shortConcatenated := len(blocked) == 2 && blocked[0] == '-' && blocked[1] != '-' && strings.HasPrefix(arg, blocked)
+			if exactOrLongValue || shortConcatenated {
+				return "", fmt.Errorf("git flag '%s' is not allowed for security reasons", arg)
 			}
 		}
 	}
@@ -356,8 +371,8 @@ func (e *toolExecutor) git(args map[string]interface{}) string {
 	cmd.Dir = e.repoPath
 	output, err := cmd.CombinedOutput()
 	if err != nil {
-		return fmt.Sprintf("Error executing git: %v\nOutput: %s", err, string(output))
+		return "", fmt.Errorf("executing git: %w\nOutput: %s", err, string(output))
 	}
 
-	return string(output)
+	return string(output), nil
 }
diff --git a/pkg/llmclient/agentic_tools_test.go b/pkg/llmclient/agentic_tools_test.go
index a435e0ca..679e057a 100644
--- a/pkg/llmclient/agentic_tools_test.go
+++ b/pkg/llmclient/agentic_tools_test.go
@@ -27,21 +27,20 @@ func TestValidatePath_WithinRepo(t *testing.T) {
 	executor := newToolExecutor(dir)
 
 	tests := []struct {
-		name    string
-		path    string
-		wantErr bool
+		name string
+		path string
 	}{
-		{name: "root dot", path: ".", wantErr: false},
-		{name: "file at root", path: "main.go", wantErr: false},
-		{name: "nested file", path: "src/lib.go", wantErr: false},
-		{name: "deeply nested", path: "src/nested/deep.go", wantErr: false},
-		{name: "directory", path: "src", wantErr: false},
+		{name: "root dot", path: "."},
+		{name: "file at root", path: "main.go"},
+		{name: "nested file", path: "src/lib.go"},
+		{name: "deeply nested", path: "src/nested/deep.go"},
+		{name: "directory", path: "src"},
 	}
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			absPath, errMsg := executor.validatePath(tt.path)
-			require.Empty(t, errMsg)
+			absPath, err := executor.validatePath(tt.path)
+			require.NoError(t, err)
 			require.NotEmpty(t, absPath)
 		})
 	}
@@ -63,9 +62,9 @@ func TestValidatePath_TraversalBlocked(t *testing.T) {
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			absPath, errMsg := executor.validatePath(tt.path)
+			absPath, err := executor.validatePath(tt.path)
 			require.Empty(t, absPath)
-			require.Contains(t, errMsg, "outside the repository")
+			require.ErrorContains(t, err, "outside the repository")
 		})
 	}
 }
@@ -83,19 +82,19 @@ func TestValidatePath_SymlinkEscape(t *testing.T) {
 	symlinkPath := filepath.Join(dir, "escape.txt")
 	require.NoError(t, os.Symlink(externalFile, symlinkPath))
 
-	absPath, errMsg := executor.validatePath("escape.txt")
+	absPath, err := executor.validatePath("escape.txt")
 	require.Empty(t, absPath)
-	require.Contains(t, errMsg, "outside the repository")
+	require.ErrorContains(t, err, "outside the repository")
 }
 
 func TestReadFile_PathTraversal(t *testing.T) {
 	dir := setupTestRepo(t)
 	executor := newToolExecutor(dir)
 
-	result := executor.readFile(map[string]interface{}{
+	_, err := executor.readFile(map[string]interface{}{
 		"path": "../../etc/passwd",
 	})
-	require.Contains(t, result, "outside the repository")
+	require.ErrorContains(t, err, "outside the repository")
 }
 
 func TestReadFile_BinaryFile(t *testing.T) {
@@ -119,8 +118,8 @@ func TestReadFile_BinaryFile(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			require.NoError(t, os.WriteFile(filepath.Join(dir, "binary.bin"), tt.content, 0644))
-			result := executor.readFile(map[string]interface{}{"path": "binary.bin"})
-			require.Contains(t, result, "not a text file")
+			_, err := executor.readFile(map[string]interface{}{"path": "binary.bin"})
+			require.ErrorContains(t, err, "not a text file")
 		})
 	}
 }
@@ -133,18 +132,19 @@ func TestReadFile_MaxFileSize(t *testing.T) {
 	bigContent := strings.Repeat("x", maxFileSize+1)
 	require.NoError(t, os.WriteFile(filepath.Join(dir, "big.txt"), []byte(bigContent), 0644))
 
-	result := executor.readFile(map[string]interface{}{
+	_, err := executor.readFile(map[string]interface{}{
 		"path": "big.txt",
 	})
-	require.Contains(t, result, "file is too large")
+	require.ErrorContains(t, err, "file is too large")
 
 	// A file under the limit should work fine
 	smallContent := "hello world"
 	require.NoError(t, os.WriteFile(filepath.Join(dir, "small.txt"), []byte(smallContent), 0644))
 
-	result = executor.readFile(map[string]interface{}{
+	result, err := executor.readFile(map[string]interface{}{
 		"path": "small.txt",
 	})
+	require.NoError(t, err)
 	require.Equal(t, smallContent, result)
 }
 
@@ -152,21 +152,21 @@ func TestListDirectory_PathTraversal(t *testing.T) {
 	dir := setupTestRepo(t)
 	executor := newToolExecutor(dir)
 
-	result := executor.listDirectory(map[string]interface{}{
+	_, err := executor.listDirectory(map[string]interface{}{
 		"path": "../",
 	})
-	require.Contains(t, result, "outside the repository")
+	require.ErrorContains(t, err, "outside the repository")
 }
 
 func TestGrep_PathTraversal(t *testing.T) {
 	dir := setupTestRepo(t)
 	executor := newToolExecutor(dir)
 
-	result := executor.grep(map[string]interface{}{
+	_, err := executor.grep(map[string]interface{}{
 		"pattern": "root",
 		"path":    "../../etc",
 	})
-	require.Contains(t, result, "outside the repository")
+	require.ErrorContains(t, err, "outside the repository")
 }
 
 func TestGrep_FlagInjection(t *testing.T) {
@@ -175,14 +175,35 @@ func TestGrep_FlagInjection(t *testing.T) {
 
 	// Pattern starting with - should not be interpreted as a flag
 	// thanks to the -- separator
-	result := executor.grep(map[string]interface{}{
+	result, err := executor.grep(map[string]interface{}{
 		"pattern": "-rn",
 		"path":    ".",
 	})
 	// Should not error with "invalid option", should just return no matches
+	require.NoError(t, err)
 	require.NotContains(t, result, "invalid option")
 }
 
+func TestGrep_ExitCodes(t *testing.T) {
+	dir := setupTestRepo(t)
+	executor := newToolExecutor(dir)
+
+	// Exit code 1: pattern not found — not an error, just no results.
+	result, err := executor.grep(map[string]interface{}{
+		"pattern": "this_pattern_will_never_match_xyz123",
+		"path":    ".",
+	})
+	require.NoError(t, err)
+	require.Equal(t, "No matches found", result)
+
+	// Exit code 2: invalid regex — should be a real error, not silently swallowed.
+	_, err = executor.grep(map[string]interface{}{
+		"pattern": "[invalid",
+		"path":    ".",
+	})
+	require.Error(t, err)
+}
+
 func TestGit_BlockedFlags(t *testing.T) {
 	dir := setupTestRepo(t)
 	executor := newToolExecutor(dir)
@@ -196,6 +217,8 @@ func TestGit_BlockedFlags(t *testing.T) {
 		{name: "upload-pack", args: "fetch --upload-pack=evil"},
 		{name: "config flag", args: "log -c core.pager=evil"},
 		{name: "config equals", args: "log --config=evil"},
+		{name: "config concatenated", args: "log -ccore.pager=evil"},
+		{name: "config concatenated ssh", args: "log -ccore.sshCommand=evil"},
 		{name: "git prefix exec flag", args: "git log --exec=malicious"},
 		{name: "git prefix ext-diff", args: "git diff --ext-diff"},
 		{name: "git prefix upload-pack", args: "git fetch --upload-pack=evil"},
@@ -203,10 +226,10 @@ func TestGit_BlockedFlags(t *testing.T) {
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			result := executor.git(map[string]interface{}{
+			_, err := executor.git(map[string]interface{}{
 				"args": tt.args,
 			})
-			require.Contains(t, result, "not allowed for security reasons")
+			require.ErrorContains(t, err, "not allowed for security reasons")
 		})
 	}
 }
@@ -215,33 +238,35 @@ func TestGit_StripGitPrefix(t *testing.T) {
 	dir := t.TempDir()
 	executor := newToolExecutor(dir)
 
-	// "git status" should be treated the same as "status"
-	result := executor.git(map[string]interface{}{
+	// "git status" should be treated the same as "status" — not blocked by allowlist
+	_, err := executor.git(map[string]interface{}{
 		"args": "git status",
 	})
-	require.NotContains(t, result, "is not allowed")
+	if err != nil {
+		require.NotContains(t, err.Error(), "is not allowed")
+	}
 
 	// "git push" should still be blocked
-	result = executor.git(map[string]interface{}{
+	_, err = executor.git(map[string]interface{}{
 		"args": "git push",
 	})
-	require.Contains(t, result, "is not allowed")
+	require.ErrorContains(t, err, "is not allowed")
 
 	// bare "git" with nothing after should error
-	result = executor.git(map[string]interface{}{
+	_, err = executor.git(map[string]interface{}{
 		"args": "git",
 	})
-	require.Contains(t, result, "empty git command")
+	require.ErrorContains(t, err, "empty git command")
 }
 
 func TestGit_BlockedSubcommand(t *testing.T) {
 	dir := setupTestRepo(t)
 	executor := newToolExecutor(dir)
 
-	result := executor.git(map[string]interface{}{
+	_, err := executor.git(map[string]interface{}{
 		"args": "push origin main",
 	})
-	require.Contains(t, result, "not allowed")
+	require.ErrorContains(t, err, "not allowed")
 }
 
 func TestGit_AllowedSubcommands(t *testing.T) {
@@ -253,10 +278,12 @@ func TestGit_AllowedSubcommands(t *testing.T) {
 			dir := t.TempDir()
 			executor := newToolExecutor(dir)
 
-			result := executor.git(map[string]interface{}{
+			_, err := executor.git(map[string]interface{}{
 				"args": subcmd,
 			})
-			require.NotContains(t, result, "is not allowed")
+			if err != nil {
+				require.NotContains(t, err.Error(), "is not allowed")
+			}
 		})
 	}
 }

From a8cfe2605e44b6cd2e739c949c0aa0c077c07ffd Mon Sep 17 00:00:00 2001
From: Esteban Beltran <sergies@gmail.com>
Date: Thu, 19 Feb 2026 14:37:22 +0100
Subject: [PATCH 7/7] execute should reutne error and add tests

---
 pkg/llmclient/agentic_client.go     |  5 ++++-
 pkg/llmclient/agentic_tools.go      | 24 ++++++++----------------
 pkg/llmclient/agentic_tools_test.go | 18 ++++++++++++++++++
 3 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/pkg/llmclient/agentic_client.go b/pkg/llmclient/agentic_client.go
index 241a25bf..cef19851 100644
--- a/pkg/llmclient/agentic_client.go
+++ b/pkg/llmclient/agentic_client.go
@@ -262,7 +262,10 @@ func processToolCall(toolCall llms.ToolCall, index, total, currentAnswerCount in
 	}
 
 	// Execute other tools
-	result := executor.execute(toolCall.FunctionCall.Name, toolCall.FunctionCall.Arguments)
+	result, err := executor.execute(toolCall.FunctionCall.Name, toolCall.FunctionCall.Arguments)
+	if err != nil {
+		result = fmt.Sprintf("Error: %v", err)
+	}
 	debugLog("AgenticClient: tool result: %s", truncateString(result, 300))
 
 	return llms.MessageContent{
diff --git a/pkg/llmclient/agentic_tools.go b/pkg/llmclient/agentic_tools.go
index baf31569..dd0fd81c 100644
--- a/pkg/llmclient/agentic_tools.go
+++ b/pkg/llmclient/agentic_tools.go
@@ -164,33 +164,25 @@ func newToolExecutor(repoPath string) *toolExecutor {
 	return &toolExecutor{repoPath: repoPath}
 }
 
-// execute runs a tool and returns the result as a string
-func (e *toolExecutor) execute(toolName, argsJSON string) string {
+// execute runs a tool and returns the result or an error.
+func (e *toolExecutor) execute(toolName, argsJSON string) (string, error) {
 	var args map[string]interface{}
 	if err := json.Unmarshal([]byte(argsJSON), &args); err != nil {
-		return fmt.Sprintf("Error parsing arguments: %v", err)
+		return "", fmt.Errorf("parsing arguments: %w", err)
 	}
 
-	var (
-		result string
-		err    error
-	)
 	switch toolName {
 	case "read_file":
-		result, err = e.readFile(args)
+		return e.readFile(args)
 	case "list_directory":
-		result, err = e.listDirectory(args)
+		return e.listDirectory(args)
 	case "grep":
-		result, err = e.grep(args)
+		return e.grep(args)
 	case "git":
-		result, err = e.git(args)
+		return e.git(args)
 	default:
-		return fmt.Sprintf("Unknown tool: %s", toolName)
+		return "", fmt.Errorf("unknown tool: %s", toolName)
 	}
-	if err != nil {
-		return fmt.Sprintf("Error: %v", err)
-	}
-	return result
 }
 
 // validatePath checks that the resolved path is within the repository directory.
diff --git a/pkg/llmclient/agentic_tools_test.go b/pkg/llmclient/agentic_tools_test.go
index 679e057a..0a46e237 100644
--- a/pkg/llmclient/agentic_tools_test.go
+++ b/pkg/llmclient/agentic_tools_test.go
@@ -1,6 +1,7 @@
 package llmclient
 
 import (
+	"encoding/json"
 	"os"
 	"path/filepath"
 	"strings"
@@ -287,3 +288,20 @@ func TestGit_AllowedSubcommands(t *testing.T) {
 		})
 	}
 }
+
+func TestExecute(t *testing.T) {
+	dir := setupTestRepo(t)
+	executor := newToolExecutor(dir)
+
+	t.Run("success", func(t *testing.T) {
+		args, _ := json.Marshal(map[string]string{"path": "main.go"})
+		result, err := executor.execute("read_file", string(args))
+		require.NoError(t, err)
+		require.Contains(t, result, "package main")
+	})
+
+	t.Run("unknown tool returns error", func(t *testing.T) {
+		_, err := executor.execute("nonexistent_tool", "{}")
+		require.ErrorContains(t, err, "unknown tool")
+	})
+}