From aa46935106c529792c019f47beaeeb61d22a858c Mon Sep 17 00:00:00 2001 From: Esteban Beltran Date: Tue, 17 Feb 2026 17:32:40 +0100 Subject: [PATCH 1/7] llmclient: add agentic client for code reviews --- pkg/llmclient/README.md | 43 +++ pkg/llmclient/agentic_client.go | 320 +++++++++++++++++ .../agentic_client_integration_test.go | 170 +++++++++ pkg/llmclient/agentic_debug.go | 53 +++ pkg/llmclient/agentic_tools.go | 339 ++++++++++++++++++ pkg/llmclient/agentic_tools_test.go | 217 +++++++++++ pkg/llmclient/testdata/fs_access/config.go | 42 +++ pkg/llmclient/testdata/fs_access/logger.go | 40 +++ pkg/llmclient/testdata/fs_access/main.go | 15 + pkg/llmclient/testdata/fs_access/scanner.go | 51 +++ .../testdata/fs_access/storage/cache.go | 49 +++ .../testdata/fs_access/storage/disk.go | 66 ++++ pkg/llmclient/testdata/fs_access/validator.go | 66 ++++ .../testdata/no_fs_access/handlers/data.go | 49 +++ .../testdata/no_fs_access/handlers/health.go | 25 ++ pkg/llmclient/testdata/no_fs_access/main.go | 7 + pkg/llmclient/testdata/no_fs_access/models.go | 55 +++ pkg/llmclient/testdata/no_fs_access/server.go | 69 ++++ pkg/llmclient/testdata/no_fs_access/utils.go | 50 +++ 19 files changed, 1726 insertions(+) create mode 100644 pkg/llmclient/README.md create mode 100644 pkg/llmclient/agentic_client.go create mode 100644 pkg/llmclient/agentic_client_integration_test.go create mode 100644 pkg/llmclient/agentic_debug.go create mode 100644 pkg/llmclient/agentic_tools.go create mode 100644 pkg/llmclient/agentic_tools_test.go create mode 100644 pkg/llmclient/testdata/fs_access/config.go create mode 100644 pkg/llmclient/testdata/fs_access/logger.go create mode 100644 pkg/llmclient/testdata/fs_access/main.go create mode 100644 pkg/llmclient/testdata/fs_access/scanner.go create mode 100644 pkg/llmclient/testdata/fs_access/storage/cache.go create mode 100644 pkg/llmclient/testdata/fs_access/storage/disk.go create mode 100644 pkg/llmclient/testdata/fs_access/validator.go create mode 100644 pkg/llmclient/testdata/no_fs_access/handlers/data.go create mode 100644 pkg/llmclient/testdata/no_fs_access/handlers/health.go create mode 100644 pkg/llmclient/testdata/no_fs_access/main.go create mode 100644 pkg/llmclient/testdata/no_fs_access/models.go create mode 100644 pkg/llmclient/testdata/no_fs_access/server.go create mode 100644 pkg/llmclient/testdata/no_fs_access/utils.go diff --git a/pkg/llmclient/README.md b/pkg/llmclient/README.md new file mode 100644 index 00000000..41cb7a84 --- /dev/null +++ b/pkg/llmclient/README.md @@ -0,0 +1,43 @@ +# llmclient + +LLM client package for code analysis. + +- **AgenticClient**: Provider-agnostic agentic client using [langchaingo](https://github.com/tmc/langchaingo). Gives the LLM tools to explore a repository and answer questions about code. + +## AgenticClient + +The agentic client runs a tool-calling loop where the LLM can explore code using read-only tools, then submits structured answers via `submit_answer`. + +**Tools**: `list_directory`, `read_file`, `grep`, `git` (allowlisted subcommands), `submit_answer` + +**Providers**: Google (Gemini), Anthropic (Claude), OpenAI + +```mermaid +sequenceDiagram + participant Caller + participant AgenticClient + participant LLM + participant Tools + + Caller->>AgenticClient: CallLLM(prompt, repoPath, opts) + AgenticClient->>LLM: system prompt + user prompt + tool definitions + + loop until submit_answer or 100 tool calls + LLM-->>AgenticClient: tool call(s) + alt submit_answer + AgenticClient->>AgenticClient: collect answer + AgenticClient-->>LLM: "Answer recorded" + else read_file / list_directory / grep / git + AgenticClient->>Tools: execute tool (sandboxed to repoPath) + Tools-->>AgenticClient: result + AgenticClient-->>LLM: tool result + end + end + + Note over LLM,AgenticClient: LLM sends message with no tool calls → done + AgenticClient-->>Caller: []AnswerSchema +``` + +## Debug logging + +Set `DEBUG=1` to write detailed logs to `/tmp/validator-agentic-.log`. diff --git a/pkg/llmclient/agentic_client.go b/pkg/llmclient/agentic_client.go new file mode 100644 index 00000000..a9ca0963 --- /dev/null +++ b/pkg/llmclient/agentic_client.go @@ -0,0 +1,320 @@ +package llmclient + +import ( + "context" + "encoding/json" + "fmt" + "time" + + "github.com/tmc/langchaingo/llms" + "github.com/tmc/langchaingo/llms/anthropic" + "github.com/tmc/langchaingo/llms/googleai" + "github.com/tmc/langchaingo/llms/openai" +) + +const ( + maxToolCalls = 100 + maxLLMRetries = 3 + maxConsecutiveNoTools = 5 + retryDelay = 2 * time.Second +) + +// AnswerSchema represents the structured response from the agentic client +type AnswerSchema struct { + Answer string `json:"answer"` + ShortAnswer bool `json:"short_answer"` + Files []string `json:"files,omitempty"` + CodeSnippet string `json:"code_snippet,omitempty"` +} + +// AgenticCallOptions contains configuration for the agentic LLM call +type AgenticCallOptions struct { + Model string // e.g. "gemini-2.0-flash" + Provider string // "google", "anthropic", "openai" + APIKey string +} + +// AgenticClient is an interface for agentic LLM interactions +type AgenticClient interface { + CallLLM(ctx context.Context, prompt, repositoryPath string, opts *AgenticCallOptions) ([]AnswerSchema, error) +} + +// agenticClientImpl implements AgenticClient +type agenticClientImpl struct{} + +// NewAgenticClient creates a new AgenticClient +func NewAgenticClient() AgenticClient { + return &agenticClientImpl{} +} + +// CallLLM executes an agentic loop with tools to answer questions about code. +// The prompt may contain multiple questions, in which case the agent will call +// submit_answer multiple times. All answers are collected and returned. +func (c *agenticClientImpl) CallLLM(ctx context.Context, prompt, repositoryPath string, opts *AgenticCallOptions) ([]AnswerSchema, error) { + if opts == nil { + return nil, fmt.Errorf("options are required") + } + + if opts.APIKey == "" { + return nil, fmt.Errorf("API key is required") + } + + if opts.Model == "" { + return nil, fmt.Errorf("model is required") + } + + if opts.Provider == "" { + return nil, fmt.Errorf("provider is required") + } + + // Initialize LLM based on provider + llm, err := initLLM(ctx, opts) + if err != nil { + return nil, fmt.Errorf("failed to initialize LLM: %w", err) + } + + // Build tools + tools := buildAgenticTools() + + // Create tool executor + executor := newToolExecutor(repositoryPath) + + // System prompt + systemPrompt := `You are a code analysis assistant. You have tools to explore code in a repository. + +AVAILABLE TOOLS: +- list_directory: List files at a path. Use "." for root. +- read_file: Read a file's contents. This is your primary tool for understanding code. +- grep: Search for a pattern across files. +- git: Run read-only git commands (log, show, diff, status, etc.) +- submit_answer: Submit your final answer. + +STRATEGY: +1. Use list_directory to see what files exist +2. Use read_file to read the source code files +3. Analyze the code to answer the question + +You can only use one tool at a time. +IMPORTANT: You are in non-interactive mode. Start working and using your tools immediately. +When ready, use submit_answer. For multiple questions, call submit_answer once per question.` + + // Build initial messages + messages := []llms.MessageContent{ + llms.TextParts(llms.ChatMessageTypeSystem, systemPrompt), + llms.TextParts(llms.ChatMessageTypeHuman, prompt), + } + + // Collect answers + var answers []AnswerSchema + + // Agentic loop + toolCallsRemaining := maxToolCalls + + // Print debug log file path before starting the loop + printDebugLogPath() + debugLog("\n\n\n") + debugLog("################################################################") + debugLog("# NEW CallLLM - provider=%s model=%s", opts.Provider, opts.Model) + debugLog("# repo=%s", repositoryPath) + debugLog("# prompt=%s", truncateString(prompt, 200)) + debugLog("################################################################") + + iteration := 0 + consecutiveNoTools := 0 + for toolCallsRemaining > 0 { + iteration++ + debugLog("========== AgenticClient: iteration %d ==========", iteration) + debugLog("AgenticClient: %d tool calls remaining, %d answers collected", toolCallsRemaining, len(answers)) + + // Call LLM with retry logic + debugLog("AgenticClient: calling LLM...") + resp, err := callLLMWithRetry(ctx, llm, messages, tools) + if err != nil { + debugLog("AgenticClient: LLM call failed: %v", err) + return nil, fmt.Errorf("LLM call failed after %d retries: %w", maxLLMRetries, err) + } + + // resp.Choices contains the LLM's response options. Each choice has Content (text) + // and/or ToolCalls (function calls the model wants to make). Typically there's + // only one choice unless you request multiple completions. + if len(resp.Choices) == 0 { + debugLog("AgenticClient: no choices in response") + return nil, fmt.Errorf("no response from LLM") + } + + // Use first choice. Google puts all tool calls in choices[0].ToolCalls. + // Anthropic creates a separate choice per content block (text or tool_use), + // but langchaingo's handleAIMessage only supports Parts[0] as either + // TextContent or ToolCall, so we process one choice at a time. + choice := resp.Choices[0] + debugLog("AgenticClient: received response with %d tool calls", len(choice.ToolCalls)) + if choice.Content != "" { + debugLog("AgenticClient: AI message: %s", truncateString(choice.Content, 200)) + } + + // If no tool calls, check if we have answers + if len(choice.ToolCalls) == 0 { + debugLog("AgenticClient: no tool calls in response") + + // If we have collected answers, the agent is done + if len(answers) > 0 { + debugLog("AgenticClient: agent finished with %d answers", len(answers)) + return answers, nil + } + + consecutiveNoTools++ + debugLog("AgenticClient: consecutive no-tool responses: %d/%d", consecutiveNoTools, maxConsecutiveNoTools) + if consecutiveNoTools >= maxConsecutiveNoTools { + return nil, fmt.Errorf("agent failed to use tools after %d consecutive attempts", maxConsecutiveNoTools) + } + + // No answers yet - add the AI response and remind to use tools + if choice.Content != "" { + messages = append(messages, llms.TextParts(llms.ChatMessageTypeAI, choice.Content)) + } + debugLog("AgenticClient: no answers yet, reminding agent to use tools") + messages = append(messages, llms.TextParts(llms.ChatMessageTypeHuman, + "You are in non-interactive mode. You must start using your tools now to explore the repository. When you have enough information, use submit_answer to provide your answer.")) + toolCallsRemaining-- + continue + } + + // Reset consecutive no-tool counter when tools are used + consecutiveNoTools = 0 + + // Build AI message with tool calls + aiMessage := llms.MessageContent{ + Role: llms.ChatMessageTypeAI, + } + if choice.Content != "" { + aiMessage.Parts = append(aiMessage.Parts, llms.TextContent{Text: choice.Content}) + } + for _, toolCall := range choice.ToolCalls { + aiMessage.Parts = append(aiMessage.Parts, toolCall) + } + messages = append(messages, aiMessage) + + // Process tool calls + for i, toolCall := range choice.ToolCalls { + toolCallsRemaining-- + response, answer := processToolCall(toolCall, i, len(choice.ToolCalls), len(answers), executor) + messages = append(messages, response) + if answer != nil { + answers = append(answers, *answer) + } + } + } + + // If we collected some answers but ran out of tool calls, return what we have + if len(answers) > 0 { + debugLog("AgenticClient: ran out of tool calls, returning %d answers", len(answers)) + return answers, nil + } + + return nil, fmt.Errorf("exceeded maximum tool calls (%d), agent did not complete", maxToolCalls) +} + +// processToolCall processes a single tool call and returns the response message and optional answer +func processToolCall(toolCall llms.ToolCall, index, total, currentAnswerCount int, executor *toolExecutor) (llms.MessageContent, *AnswerSchema) { + debugLog("AgenticClient: [%d/%d] executing tool: %s", index+1, total, toolCall.FunctionCall.Name) + debugLog("AgenticClient: tool args: %s", truncateString(toolCall.FunctionCall.Arguments, 500)) + + // Check for submit_answer + if toolCall.FunctionCall.Name == "submit_answer" { + var answer AnswerSchema + if err := json.Unmarshal([]byte(toolCall.FunctionCall.Arguments), &answer); err != nil { + debugLog("AgenticClient: failed to parse submit_answer: %v", err) + // Report parse error back to agent so it can retry + return llms.MessageContent{ + Role: llms.ChatMessageTypeTool, + Parts: []llms.ContentPart{ + llms.ToolCallResponse{ + ToolCallID: toolCall.ID, + Name: toolCall.FunctionCall.Name, + Content: fmt.Sprintf("Error parsing answer: %v. Please try again with valid JSON.", err), + }, + }, + }, nil + } + debugLog("AgenticClient: received answer #%d: short_answer=%v, answer=%s", + currentAnswerCount+1, answer.ShortAnswer, truncateString(answer.Answer, 100)) + + // Return success response and the answer + return llms.MessageContent{ + Role: llms.ChatMessageTypeTool, + Parts: []llms.ContentPart{ + llms.ToolCallResponse{ + ToolCallID: toolCall.ID, + Name: toolCall.FunctionCall.Name, + Content: "Answer recorded successfully. If you have answered all questions, respond with a plain text message saying 'I am finished'. Otherwise, continue with the next question.", + }, + }, + }, &answer + } + + // Execute other tools + result := executor.execute(toolCall.FunctionCall.Name, toolCall.FunctionCall.Arguments) + debugLog("AgenticClient: tool result: %s", truncateString(result, 300)) + + return llms.MessageContent{ + Role: llms.ChatMessageTypeTool, + Parts: []llms.ContentPart{ + llms.ToolCallResponse{ + ToolCallID: toolCall.ID, + Name: toolCall.FunctionCall.Name, + Content: result, + }, + }, + }, nil +} + +// callLLMWithRetry calls the LLM with retry logic for transient errors +func callLLMWithRetry(ctx context.Context, llm llms.Model, messages []llms.MessageContent, tools []llms.Tool) (*llms.ContentResponse, error) { + var lastErr error + for attempt := 1; attempt <= maxLLMRetries; attempt++ { + resp, err := llm.GenerateContent(ctx, messages, llms.WithTools(tools)) + if err == nil { + return resp, nil + } + lastErr = err + debugLog("AgenticClient: LLM call failed (attempt %d/%d): %v", attempt, maxLLMRetries, err) + + if attempt < maxLLMRetries { + debugLog("AgenticClient: retrying in %v...", retryDelay) + time.Sleep(retryDelay) + } + } + return nil, lastErr +} + +// truncateString truncates a string to maxLen characters, adding "..." if truncated +func truncateString(s string, maxLen int) string { + if len(s) <= maxLen { + return s + } + return s[:maxLen] + "..." +} + +// initLLM initializes the appropriate LLM based on provider +func initLLM(ctx context.Context, opts *AgenticCallOptions) (llms.Model, error) { + switch opts.Provider { + case "google": + return googleai.New( + ctx, + googleai.WithAPIKey(opts.APIKey), + googleai.WithDefaultModel(opts.Model), + ) + case "anthropic": + return anthropic.New( + anthropic.WithToken(opts.APIKey), + anthropic.WithModel(opts.Model), + ) + case "openai": + return openai.New( + openai.WithToken(opts.APIKey), + openai.WithModel(opts.Model), + ) + default: + return nil, fmt.Errorf("unsupported provider: %s (supported: google, anthropic, openai)", opts.Provider) + } +} diff --git a/pkg/llmclient/agentic_client_integration_test.go b/pkg/llmclient/agentic_client_integration_test.go new file mode 100644 index 00000000..3c8035ac --- /dev/null +++ b/pkg/llmclient/agentic_client_integration_test.go @@ -0,0 +1,170 @@ +package llmclient + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/grafana/plugin-validator/pkg/logme" + "github.com/grafana/plugin-validator/pkg/prettyprint" + "github.com/stretchr/testify/require" +) + +func hasGeminiAPIKey() bool { + return os.Getenv("GEMINI_API_KEY") != "" +} + +func hasAnthropicAPIKey() bool { + return os.Getenv("ANTHROPIC_API_KEY") != "" +} + +// TestAgenticClient_NoFilesystemAccess tests that the agent correctly identifies +// when an application does NOT access the filesystem +func TestAgenticClient_NoFilesystemAccess(t *testing.T) { + if !hasGeminiAPIKey() { + t.Skip("GEMINI_API_KEY not set, skipping agentic client integration test") + } + + client := NewAgenticClient() + + testDataPath, err := filepath.Abs(filepath.Join("testdata", "no_fs_access")) + require.NoError(t, err) + + opts := &AgenticCallOptions{ + Provider: "google", + Model: "gemini-2.0-flash", + APIKey: os.Getenv("GEMINI_API_KEY"), + } + + prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations." + + answers, err := client.CallLLM(context.Background(), prompt, testDataPath, opts) + logme.DebugFln("Agent answers:") + prettyprint.Print(answers) + + require.NoError(t, err, "CallLLM should not return error") + require.Len(t, answers, 1, "Should return exactly one answer") + + answer := answers[0] + require.NotEmpty(t, answer.Answer, "Answer field should be populated") + require.Equal(t, false, answer.ShortAnswer, "ShortAnswer should be false - this app does not access the filesystem") + + t.Logf("Agent Answer: %s", answer.Answer) + t.Logf("Short Answer: %v", answer.ShortAnswer) + if len(answer.Files) > 0 { + t.Logf("Files: %v", answer.Files) + } +} + +// TestAgenticClient_FilesystemAccess tests that the agent correctly identifies +// when an application DOES access the filesystem +func TestAgenticClient_FilesystemAccess(t *testing.T) { + if !hasGeminiAPIKey() { + t.Skip("GEMINI_API_KEY not set, skipping agentic client integration test") + } + + client := NewAgenticClient() + + testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access")) + require.NoError(t, err) + + opts := &AgenticCallOptions{ + Provider: "google", + Model: "gemini-2.0-flash", + APIKey: os.Getenv("GEMINI_API_KEY"), + } + + prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations." + + answers, err := client.CallLLM(context.Background(), prompt, testDataPath, opts) + logme.DebugFln("Agent answers:") + prettyprint.Print(answers) + + require.NoError(t, err, "CallLLM should not return error") + require.Len(t, answers, 1, "Should return exactly one answer") + + answer := answers[0] + require.NotEmpty(t, answer.Answer, "Answer field should be populated") + require.Equal(t, true, answer.ShortAnswer, "ShortAnswer should be true - this app accesses the filesystem via os.ReadFile") + + t.Logf("Agent Answer: %s", answer.Answer) + t.Logf("Short Answer: %v", answer.ShortAnswer) + if len(answer.Files) > 0 { + t.Logf("Files: %v", answer.Files) + } +} + +// TestAgenticClient_NoFilesystemAccess_Anthropic tests the same scenario using Anthropic Claude +func TestAgenticClient_NoFilesystemAccess_Anthropic(t *testing.T) { + if !hasAnthropicAPIKey() { + t.Skip("ANTHROPIC_API_KEY not set, skipping Anthropic agentic client integration test") + } + + client := NewAgenticClient() + + testDataPath, err := filepath.Abs(filepath.Join("testdata", "no_fs_access")) + require.NoError(t, err) + + opts := &AgenticCallOptions{ + Provider: "anthropic", + Model: "claude-sonnet-4-5", + APIKey: os.Getenv("ANTHROPIC_API_KEY"), + } + + prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations." + + answers, err := client.CallLLM(context.Background(), prompt, testDataPath, opts) + logme.DebugFln("Agent answers:") + prettyprint.Print(answers) + + require.NoError(t, err, "CallLLM should not return error") + require.Len(t, answers, 1, "Should return exactly one answer") + + answer := answers[0] + require.NotEmpty(t, answer.Answer, "Answer field should be populated") + require.Equal(t, false, answer.ShortAnswer, "ShortAnswer should be false - this app does not access the filesystem") + + t.Logf("Agent Answer: %s", answer.Answer) + t.Logf("Short Answer: %v", answer.ShortAnswer) + if len(answer.Files) > 0 { + t.Logf("Files: %v", answer.Files) + } +} + +// TestAgenticClient_FilesystemAccess_Anthropic tests the same scenario using Anthropic Claude +func TestAgenticClient_FilesystemAccess_Anthropic(t *testing.T) { + if !hasAnthropicAPIKey() { + t.Skip("ANTHROPIC_API_KEY not set, skipping Anthropic agentic client integration test") + } + + client := NewAgenticClient() + + testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access")) + require.NoError(t, err) + + opts := &AgenticCallOptions{ + Provider: "anthropic", + Model: "claude-sonnet-4-5", + APIKey: os.Getenv("ANTHROPIC_API_KEY"), + } + + prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations." + + answers, err := client.CallLLM(context.Background(), prompt, testDataPath, opts) + logme.DebugFln("Agent answers:") + prettyprint.Print(answers) + + require.NoError(t, err, "CallLLM should not return error") + require.Len(t, answers, 1, "Should return exactly one answer") + + answer := answers[0] + require.NotEmpty(t, answer.Answer, "Answer field should be populated") + require.Equal(t, true, answer.ShortAnswer, "ShortAnswer should be true - this app accesses the filesystem via os.ReadFile") + + t.Logf("Agent Answer: %s", answer.Answer) + t.Logf("Short Answer: %v", answer.ShortAnswer) + if len(answer.Files) > 0 { + t.Logf("Files: %v", answer.Files) + } +} diff --git a/pkg/llmclient/agentic_debug.go b/pkg/llmclient/agentic_debug.go new file mode 100644 index 00000000..bf6f5050 --- /dev/null +++ b/pkg/llmclient/agentic_debug.go @@ -0,0 +1,53 @@ +package llmclient + +import ( + "fmt" + "io" + "log" + "os" + "path/filepath" + "sync" + "time" +) + +var ( + debugLogger *log.Logger + debugOnce sync.Once + debugPath string +) + +func initDebugLogger() { + debugOnce.Do(func() { + debugVal := os.Getenv("DEBUG") + if debugVal != "1" && debugVal != "true" { + debugLogger = log.New(io.Discard, "", 0) + return + } + + timestamp := time.Now().Format("20060102-150405") + debugPath = filepath.Join(os.TempDir(), fmt.Sprintf("validator-agentic-%s.log", timestamp)) + + f, err := os.OpenFile(debugPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) + if err != nil { + fmt.Fprintf(os.Stderr, "AgenticClient: failed to create debug log file: %v\n", err) + debugLogger = log.New(io.Discard, "", 0) + return + } + + debugLogger = log.New(f, "", log.Ltime|log.Lmicroseconds) + }) +} + +// debugLog writes a formatted message to the debug log file if DEBUG=1 or DEBUG=true +func debugLog(format string, args ...interface{}) { + initDebugLogger() + debugLogger.Printf(format, args...) +} + +// printDebugLogPath prints the debug log file path to stderr if debug is enabled +func printDebugLogPath() { + initDebugLogger() + if debugPath != "" { + fmt.Fprintf(os.Stderr, "AgenticClient: debug log: %s\n", debugPath) + } +} diff --git a/pkg/llmclient/agentic_tools.go b/pkg/llmclient/agentic_tools.go new file mode 100644 index 00000000..84bc9c33 --- /dev/null +++ b/pkg/llmclient/agentic_tools.go @@ -0,0 +1,339 @@ +package llmclient + +import ( + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + + "github.com/tmc/langchaingo/llms" +) + +const maxFileSize = 500 * 1024 // 500KB + +// Git subcommands that are allowed +var allowedGitSubcommands = map[string]bool{ + "log": true, + "show": true, + "diff": true, + "status": true, + "ls-files": true, + "blame": true, + "rev-parse": true, + "cat-file": true, + "checkout": true, + "fetch": true, + "pull": true, + "branch": true, + "tag": true, +} + +// Git flags that could execute arbitrary commands +var blockedGitFlags = []string{ + "--exec", + "--ext-diff", + "--upload-pack", + "--receive-pack", + "-c", + "--config", + "--hook", + "--run", +} + +// buildAgenticTools returns the list of tools available to the agent +func buildAgenticTools() []llms.Tool { + return []llms.Tool{ + { + Type: "function", + Function: &llms.FunctionDefinition{ + Name: "read_file", + Description: "Read the contents of a file at the given path", + Parameters: map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "path": map[string]interface{}{ + "type": "string", + "description": "The relative path to the file to read", + }, + }, + "required": []string{"path"}, + }, + }, + }, + { + Type: "function", + Function: &llms.FunctionDefinition{ + Name: "list_directory", + Description: "List files and directories at the given path", + Parameters: map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "path": map[string]interface{}{ + "type": "string", + "description": "The relative path to the directory to list (use '.' for root)", + }, + }, + "required": []string{"path"}, + }, + }, + }, + { + Type: "function", + Function: &llms.FunctionDefinition{ + Name: "grep", + Description: "Search for a pattern in files. Returns matching lines with file names and line numbers.", + Parameters: map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "pattern": map[string]interface{}{ + "type": "string", + "description": "The pattern to search for", + }, + "path": map[string]interface{}{ + "type": "string", + "description": "Optional: directory or file to search in (defaults to '.')", + }, + }, + "required": []string{"pattern"}, + }, + }, + }, + { + Type: "function", + Function: &llms.FunctionDefinition{ + Name: "git", + Description: "Execute a git command. Only allowed commands: log, show, diff, status, ls-files, blame, rev-parse, cat-file, checkout, fetch, pull, branch, tag.", + Parameters: map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "args": map[string]interface{}{ + "type": "string", + "description": "The git command arguments (e.g., 'log -n 5' or 'show HEAD')", + }, + }, + "required": []string{"args"}, + }, + }, + }, + { + Type: "function", + Function: &llms.FunctionDefinition{ + Name: "submit_answer", + Description: "Submit your final answer to the question. Use this when you have gathered enough information.", + Parameters: map[string]interface{}{ + "type": "object", + "properties": map[string]interface{}{ + "answer": map[string]interface{}{ + "type": "string", + "description": "Your detailed answer explaining your findings", + }, + "short_answer": map[string]interface{}{ + "type": "boolean", + "description": "A boolean true/false answer to yes/no questions", + }, + "files": map[string]interface{}{ + "type": "array", + "items": map[string]interface{}{"type": "string"}, + "description": "List of relevant files (optional)", + }, + "code_snippet": map[string]interface{}{ + "type": "string", + "description": "A relevant code snippet (optional)", + }, + }, + "required": []string{"answer", "short_answer"}, + }, + }, + }, + } +} + +// toolExecutor handles execution of tools within a repository path +type toolExecutor struct { + repoPath string +} + +// newToolExecutor creates a new tool executor for the given repository path +func newToolExecutor(repoPath string) *toolExecutor { + return &toolExecutor{repoPath: repoPath} +} + +// execute runs a tool and returns the result as a string +func (e *toolExecutor) execute(toolName, argsJSON string) string { + var args map[string]interface{} + if err := json.Unmarshal([]byte(argsJSON), &args); err != nil { + return fmt.Sprintf("Error parsing arguments: %v", err) + } + + switch toolName { + case "read_file": + return e.readFile(args) + case "list_directory": + return e.listDirectory(args) + case "grep": + return e.grep(args) + case "git": + return e.git(args) + default: + return fmt.Sprintf("Unknown tool: %s", toolName) + } +} + +// validatePath checks that the resolved path is within the repository directory. +// Returns the validated absolute path or an error string. +func (e *toolExecutor) validatePath(path string) (string, string) { + fullPath := filepath.Join(e.repoPath, path) + absPath, err := filepath.Abs(fullPath) + if err != nil { + return "", fmt.Sprintf("Error resolving path: %v", err) + } + + absRepo, err := filepath.Abs(e.repoPath) + if err != nil { + return "", fmt.Sprintf("Error resolving repo path: %v", err) + } + + if absPath != absRepo && !strings.HasPrefix(absPath, absRepo+string(os.PathSeparator)) { + return "", "Error: path is outside the repository" + } + + return absPath, "" +} + +func (e *toolExecutor) readFile(args map[string]interface{}) string { + path, ok := args["path"].(string) + if !ok { + return "Error: path is required" + } + + fullPath, errMsg := e.validatePath(path) + if errMsg != "" { + return errMsg + } + + debugLog("AgenticClient: read_file %s", fullPath) + + info, err := os.Stat(fullPath) + if err != nil { + return fmt.Sprintf("Error reading file: %v", err) + } + if info.Size() > maxFileSize { + return fmt.Sprintf("Error: file is too large (%d bytes, limit is %d bytes). Try reading a smaller file or use grep to find specific content.", info.Size(), maxFileSize) + } + + content, err := os.ReadFile(fullPath) + if err != nil { + return fmt.Sprintf("Error reading file: %v", err) + } + + return string(content) +} + +func (e *toolExecutor) listDirectory(args map[string]interface{}) string { + path, ok := args["path"].(string) + if !ok { + path = "." + } + + fullPath, errMsg := e.validatePath(path) + if errMsg != "" { + return errMsg + } + + debugLog("AgenticClient: list_directory %s", fullPath) + + entries, err := os.ReadDir(fullPath) + if err != nil { + return fmt.Sprintf("Error listing directory: %v", err) + } + + var result strings.Builder + for _, entry := range entries { + if entry.IsDir() { + result.WriteString(entry.Name() + "/\n") + } else { + result.WriteString(entry.Name() + "\n") + } + } + + return result.String() +} + +func (e *toolExecutor) grep(args map[string]interface{}) string { + pattern, ok := args["pattern"].(string) + if !ok || pattern == "" { + return "Error: pattern is required" + } + + path := "." + if p, ok := args["path"].(string); ok && p != "" { + path = p + } + + fullPath, errMsg := e.validatePath(path) + if errMsg != "" { + return errMsg + } + + debugLog("AgenticClient: grep '%s' in %s", pattern, fullPath) + + // Use -- to prevent pattern from being interpreted as flags + cmd := exec.Command("grep", "-rn", "--", pattern, fullPath) + output, err := cmd.CombinedOutput() + if err != nil { + // grep returns exit code 1 if no matches found + if len(output) == 0 { + return "No matches found" + } + } + + return string(output) +} + +func (e *toolExecutor) git(args map[string]interface{}) string { + argsStr, ok := args["args"].(string) + if !ok || argsStr == "" { + return "Error: git args are required" + } + + parts := strings.Fields(argsStr) + if len(parts) == 0 { + return "Error: empty git command" + } + + // Strip leading "git" if the LLM included it (e.g. "git diff" instead of "diff") + if parts[0] == "git" { + parts = parts[1:] + if len(parts) == 0 { + return "Error: empty git command" + } + } + + subcommand := parts[0] + + if !allowedGitSubcommands[subcommand] { + return fmt.Sprintf("Error: git subcommand '%s' is not allowed. Allowed commands: log, show, diff, status, ls-files, blame, rev-parse, cat-file, checkout, fetch, pull, branch, tag", subcommand) + } + + // Check for flags that could execute arbitrary commands + for _, arg := range parts[1:] { + for _, blocked := range blockedGitFlags { + if arg == blocked || strings.HasPrefix(arg, blocked+"=") { + return fmt.Sprintf("Error: git flag '%s' is not allowed for security reasons", arg) + } + } + } + + debugLog("AgenticClient: git %s", argsStr) + + cmd := exec.Command("git", append([]string{"--no-pager"}, parts...)...) + cmd.Dir = e.repoPath + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Sprintf("Error executing git: %v\nOutput: %s", err, string(output)) + } + + return string(output) +} diff --git a/pkg/llmclient/agentic_tools_test.go b/pkg/llmclient/agentic_tools_test.go new file mode 100644 index 00000000..d5ad9499 --- /dev/null +++ b/pkg/llmclient/agentic_tools_test.go @@ -0,0 +1,217 @@ +package llmclient + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/require" +) + +func setupTestRepo(t *testing.T) string { + t.Helper() + dir := t.TempDir() + + // Create a nested directory structure + require.NoError(t, os.MkdirAll(filepath.Join(dir, "src", "nested"), 0755)) + require.NoError(t, os.WriteFile(filepath.Join(dir, "main.go"), []byte("package main\n"), 0644)) + require.NoError(t, os.WriteFile(filepath.Join(dir, "src", "lib.go"), []byte("package src\nfunc Hello() {}\n"), 0644)) + require.NoError(t, os.WriteFile(filepath.Join(dir, "src", "nested", "deep.go"), []byte("package nested\n"), 0644)) + + return dir +} + +func TestValidatePath_WithinRepo(t *testing.T) { + dir := setupTestRepo(t) + executor := newToolExecutor(dir) + + tests := []struct { + name string + path string + wantErr bool + }{ + {name: "root dot", path: ".", wantErr: false}, + {name: "file at root", path: "main.go", wantErr: false}, + {name: "nested file", path: "src/lib.go", wantErr: false}, + {name: "deeply nested", path: "src/nested/deep.go", wantErr: false}, + {name: "directory", path: "src", wantErr: false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + absPath, errMsg := executor.validatePath(tt.path) + require.Empty(t, errMsg) + require.NotEmpty(t, absPath) + }) + } +} + +func TestValidatePath_TraversalBlocked(t *testing.T) { + dir := setupTestRepo(t) + executor := newToolExecutor(dir) + + tests := []struct { + name string + path string + }{ + {name: "parent directory", path: ".."}, + {name: "parent with path", path: "../etc/passwd"}, + {name: "double parent", path: "../../etc/passwd"}, + {name: "dot dot in middle", path: "src/../../etc/passwd"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + absPath, errMsg := executor.validatePath(tt.path) + require.Empty(t, absPath) + require.Contains(t, errMsg, "outside the repository") + }) + } +} + +func TestReadFile_PathTraversal(t *testing.T) { + dir := setupTestRepo(t) + executor := newToolExecutor(dir) + + result := executor.readFile(map[string]interface{}{ + "path": "../../etc/passwd", + }) + require.Contains(t, result, "outside the repository") +} + +func TestReadFile_MaxFileSize(t *testing.T) { + dir := t.TempDir() + executor := newToolExecutor(dir) + + // Create a file that exceeds the max size + bigContent := strings.Repeat("x", maxFileSize+1) + require.NoError(t, os.WriteFile(filepath.Join(dir, "big.txt"), []byte(bigContent), 0644)) + + result := executor.readFile(map[string]interface{}{ + "path": "big.txt", + }) + require.Contains(t, result, "file is too large") + + // A file under the limit should work fine + smallContent := "hello world" + require.NoError(t, os.WriteFile(filepath.Join(dir, "small.txt"), []byte(smallContent), 0644)) + + result = executor.readFile(map[string]interface{}{ + "path": "small.txt", + }) + require.Equal(t, smallContent, result) +} + +func TestListDirectory_PathTraversal(t *testing.T) { + dir := setupTestRepo(t) + executor := newToolExecutor(dir) + + result := executor.listDirectory(map[string]interface{}{ + "path": "../", + }) + require.Contains(t, result, "outside the repository") +} + +func TestGrep_PathTraversal(t *testing.T) { + dir := setupTestRepo(t) + executor := newToolExecutor(dir) + + result := executor.grep(map[string]interface{}{ + "pattern": "root", + "path": "../../etc", + }) + require.Contains(t, result, "outside the repository") +} + +func TestGrep_FlagInjection(t *testing.T) { + dir := setupTestRepo(t) + executor := newToolExecutor(dir) + + // Pattern starting with - should not be interpreted as a flag + // thanks to the -- separator + result := executor.grep(map[string]interface{}{ + "pattern": "-rn", + "path": ".", + }) + // Should not error with "invalid option", should just return no matches + require.NotContains(t, result, "invalid option") +} + +func TestGit_BlockedFlags(t *testing.T) { + dir := setupTestRepo(t) + executor := newToolExecutor(dir) + + tests := []struct { + name string + args string + }{ + {name: "exec flag", args: "log --exec=malicious"}, + {name: "ext-diff flag", args: "diff --ext-diff"}, + {name: "upload-pack", args: "fetch --upload-pack=evil"}, + {name: "config flag", args: "log -c core.pager=evil"}, + {name: "config equals", args: "log --config=evil"}, + {name: "git prefix exec flag", args: "git log --exec=malicious"}, + {name: "git prefix ext-diff", args: "git diff --ext-diff"}, + {name: "git prefix upload-pack", args: "git fetch --upload-pack=evil"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := executor.git(map[string]interface{}{ + "args": tt.args, + }) + require.Contains(t, result, "not allowed for security reasons") + }) + } +} + +func TestGit_StripGitPrefix(t *testing.T) { + dir := t.TempDir() + executor := newToolExecutor(dir) + + // "git status" should be treated the same as "status" + result := executor.git(map[string]interface{}{ + "args": "git status", + }) + require.NotContains(t, result, "is not allowed") + + // "git push" should still be blocked + result = executor.git(map[string]interface{}{ + "args": "git push", + }) + require.Contains(t, result, "is not allowed") + + // bare "git" with nothing after should error + result = executor.git(map[string]interface{}{ + "args": "git", + }) + require.Contains(t, result, "empty git command") +} + +func TestGit_BlockedSubcommand(t *testing.T) { + dir := setupTestRepo(t) + executor := newToolExecutor(dir) + + result := executor.git(map[string]interface{}{ + "args": "push origin main", + }) + require.Contains(t, result, "not allowed") +} + +func TestGit_AllowedSubcommands(t *testing.T) { + for subcmd := range allowedGitSubcommands { + t.Run(subcmd, func(t *testing.T) { + // Just verify the subcommand is not rejected by our allowlist check. + // The git command itself may fail (not a real repo) but the error + // should come from git, not from our validation. + dir := t.TempDir() + executor := newToolExecutor(dir) + + result := executor.git(map[string]interface{}{ + "args": subcmd, + }) + require.NotContains(t, result, "is not allowed") + }) + } +} diff --git a/pkg/llmclient/testdata/fs_access/config.go b/pkg/llmclient/testdata/fs_access/config.go new file mode 100644 index 00000000..5389b82a --- /dev/null +++ b/pkg/llmclient/testdata/fs_access/config.go @@ -0,0 +1,42 @@ +package main + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" +) + +type Config struct { + DatabaseURL string `json:"database_url"` + Port int `json:"port"` + LogLevel string `json:"log_level"` + PluginDir string `json:"plugin_dir"` +} + +func LoadConfig(path string) (*Config, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("reading config %s: %w", path, err) + } + + var cfg Config + if err := json.Unmarshal(data, &cfg); err != nil { + return nil, fmt.Errorf("parsing config: %w", err) + } + + return &cfg, nil +} + +func (c *Config) PluginPath(name string) string { + return filepath.Join(c.PluginDir, name) +} + +func DefaultConfig() *Config { + return &Config{ + DatabaseURL: "postgres://localhost:5432/plugins", + Port: 8080, + LogLevel: "info", + PluginDir: "/var/lib/plugins", + } +} diff --git a/pkg/llmclient/testdata/fs_access/logger.go b/pkg/llmclient/testdata/fs_access/logger.go new file mode 100644 index 00000000..1f14c85d --- /dev/null +++ b/pkg/llmclient/testdata/fs_access/logger.go @@ -0,0 +1,40 @@ +package main + +import ( + "fmt" + "os" + "time" +) + +type Logger struct { + file *os.File + prefix string +} + +func NewLogger(path, prefix string) (*Logger, error) { + f, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) + if err != nil { + return nil, fmt.Errorf("opening log file: %w", err) + } + return &Logger{file: f, prefix: prefix}, nil +} + +func (l *Logger) Info(msg string) { + l.write("INFO", msg) +} + +func (l *Logger) Error(msg string) { + l.write("ERROR", msg) +} + +func (l *Logger) write(level, msg string) { + ts := time.Now().Format(time.RFC3339) + fmt.Fprintf(l.file, "%s [%s] %s: %s\n", ts, level, l.prefix, msg) +} + +func (l *Logger) Close() error { + if l.file != nil { + return l.file.Close() + } + return nil +} diff --git a/pkg/llmclient/testdata/fs_access/main.go b/pkg/llmclient/testdata/fs_access/main.go new file mode 100644 index 00000000..39c7f249 --- /dev/null +++ b/pkg/llmclient/testdata/fs_access/main.go @@ -0,0 +1,15 @@ +package main + +import ( + "fmt" + "os" +) + +func main() { + data, err := os.ReadFile("config.txt") + if err != nil { + fmt.Println("Error reading file:", err) + return + } + fmt.Println(string(data)) +} diff --git a/pkg/llmclient/testdata/fs_access/scanner.go b/pkg/llmclient/testdata/fs_access/scanner.go new file mode 100644 index 00000000..2a9403cb --- /dev/null +++ b/pkg/llmclient/testdata/fs_access/scanner.go @@ -0,0 +1,51 @@ +package main + +import ( + "fmt" + "os" + "path/filepath" + "strings" +) + +type PluginScanner struct { + baseDir string + extensions []string +} + +func NewPluginScanner(baseDir string) *PluginScanner { + return &PluginScanner{ + baseDir: baseDir, + extensions: []string{".so", ".dll", ".dylib"}, + } +} + +func (s *PluginScanner) Scan() ([]string, error) { + var plugins []string + + entries, err := os.ReadDir(s.baseDir) + if err != nil { + return nil, fmt.Errorf("scanning plugin directory %s: %w", s.baseDir, err) + } + + for _, entry := range entries { + if entry.IsDir() { + continue + } + for _, ext := range s.extensions { + if strings.HasSuffix(entry.Name(), ext) { + plugins = append(plugins, filepath.Join(s.baseDir, entry.Name())) + } + } + } + + return plugins, nil +} + +func (s *PluginScanner) Exists(name string) bool { + path := filepath.Join(s.baseDir, name) + info, err := os.Stat(path) + if err != nil { + return false + } + return !info.IsDir() +} diff --git a/pkg/llmclient/testdata/fs_access/storage/cache.go b/pkg/llmclient/testdata/fs_access/storage/cache.go new file mode 100644 index 00000000..72c1bcf8 --- /dev/null +++ b/pkg/llmclient/testdata/fs_access/storage/cache.go @@ -0,0 +1,49 @@ +package storage + +import ( + "sync" + "time" +) + +type CacheEntry struct { + Value interface{} + ExpiresAt time.Time +} + +type MemoryCache struct { + mu sync.RWMutex + entries map[string]CacheEntry +} + +func NewMemoryCache() *MemoryCache { + return &MemoryCache{ + entries: make(map[string]CacheEntry), + } +} + +func (c *MemoryCache) Get(key string) (interface{}, bool) { + c.mu.RLock() + defer c.mu.RUnlock() + + entry, ok := c.entries[key] + if !ok || time.Now().After(entry.ExpiresAt) { + return nil, false + } + return entry.Value, true +} + +func (c *MemoryCache) Set(key string, value interface{}, ttl time.Duration) { + c.mu.Lock() + defer c.mu.Unlock() + + c.entries[key] = CacheEntry{ + Value: value, + ExpiresAt: time.Now().Add(ttl), + } +} + +func (c *MemoryCache) Evict(key string) { + c.mu.Lock() + defer c.mu.Unlock() + delete(c.entries, key) +} diff --git a/pkg/llmclient/testdata/fs_access/storage/disk.go b/pkg/llmclient/testdata/fs_access/storage/disk.go new file mode 100644 index 00000000..5cd2748f --- /dev/null +++ b/pkg/llmclient/testdata/fs_access/storage/disk.go @@ -0,0 +1,66 @@ +package storage + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" +) + +type DiskStore struct { + baseDir string +} + +func NewDiskStore(baseDir string) (*DiskStore, error) { + if err := os.MkdirAll(baseDir, 0755); err != nil { + return nil, fmt.Errorf("creating storage directory: %w", err) + } + return &DiskStore{baseDir: baseDir}, nil +} + +func (d *DiskStore) Save(key string, value interface{}) error { + data, err := json.MarshalIndent(value, "", " ") + if err != nil { + return fmt.Errorf("marshaling data: %w", err) + } + + path := filepath.Join(d.baseDir, key+".json") + if err := os.WriteFile(path, data, 0644); err != nil { + return fmt.Errorf("writing file %s: %w", path, err) + } + + return nil +} + +func (d *DiskStore) Load(key string, dest interface{}) error { + path := filepath.Join(d.baseDir, key+".json") + data, err := os.ReadFile(path) + if err != nil { + return fmt.Errorf("reading file %s: %w", path, err) + } + + return json.Unmarshal(data, dest) +} + +func (d *DiskStore) Delete(key string) error { + path := filepath.Join(d.baseDir, key+".json") + if err := os.Remove(path); err != nil && !os.IsNotExist(err) { + return fmt.Errorf("removing file %s: %w", path, err) + } + return nil +} + +func (d *DiskStore) List() ([]string, error) { + entries, err := os.ReadDir(d.baseDir) + if err != nil { + return nil, fmt.Errorf("listing storage: %w", err) + } + + var keys []string + for _, e := range entries { + if !e.IsDir() && filepath.Ext(e.Name()) == ".json" { + keys = append(keys, e.Name()[:len(e.Name())-5]) + } + } + return keys, nil +} diff --git a/pkg/llmclient/testdata/fs_access/validator.go b/pkg/llmclient/testdata/fs_access/validator.go new file mode 100644 index 00000000..6b99c23c --- /dev/null +++ b/pkg/llmclient/testdata/fs_access/validator.go @@ -0,0 +1,66 @@ +package main + +import ( + "fmt" + "strings" +) + +type ValidationResult struct { + Valid bool `json:"valid"` + Errors []string `json:"errors"` + Warnings []string `json:"warnings"` +} + +type Validator struct { + rules []ValidationRule +} + +type ValidationRule struct { + Name string + Check func(string) error +} + +func NewValidator() *Validator { + return &Validator{ + rules: []ValidationRule{ + {Name: "not_empty", Check: checkNotEmpty}, + {Name: "valid_version", Check: checkVersion}, + {Name: "no_spaces", Check: checkNoSpaces}, + }, + } +} + +func (v *Validator) Validate(pluginName string) ValidationResult { + result := ValidationResult{Valid: true} + + for _, rule := range v.rules { + if err := rule.Check(pluginName); err != nil { + result.Valid = false + result.Errors = append(result.Errors, fmt.Sprintf("%s: %v", rule.Name, err)) + } + } + + return result +} + +func checkNotEmpty(name string) error { + if strings.TrimSpace(name) == "" { + return fmt.Errorf("plugin name cannot be empty") + } + return nil +} + +func checkVersion(name string) error { + parts := strings.Split(name, "-") + if len(parts) < 2 { + return fmt.Errorf("plugin name must include version suffix (e.g., plugin-1.0.0)") + } + return nil +} + +func checkNoSpaces(name string) error { + if strings.Contains(name, " ") { + return fmt.Errorf("plugin name cannot contain spaces") + } + return nil +} diff --git a/pkg/llmclient/testdata/no_fs_access/handlers/data.go b/pkg/llmclient/testdata/no_fs_access/handlers/data.go new file mode 100644 index 00000000..3468bd1d --- /dev/null +++ b/pkg/llmclient/testdata/no_fs_access/handlers/data.go @@ -0,0 +1,49 @@ +package handlers + +import ( + "encoding/json" + "net/http" + "strings" +) + +type DataRequest struct { + Query string `json:"query"` + Fields []string `json:"fields"` + Limit int `json:"limit"` +} + +type DataResponse struct { + Results []map[string]interface{} `json:"results"` + Total int `json:"total"` +} + +func DataHandler(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + + var req DataRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, "invalid request body", http.StatusBadRequest) + return + } + + if req.Limit <= 0 || req.Limit > 100 { + req.Limit = 10 + } + + query := strings.TrimSpace(req.Query) + if query == "" { + http.Error(w, "query is required", http.StatusBadRequest) + return + } + + resp := DataResponse{ + Results: []map[string]interface{}{}, + Total: 0, + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(resp) +} diff --git a/pkg/llmclient/testdata/no_fs_access/handlers/health.go b/pkg/llmclient/testdata/no_fs_access/handlers/health.go new file mode 100644 index 00000000..7e83b940 --- /dev/null +++ b/pkg/llmclient/testdata/no_fs_access/handlers/health.go @@ -0,0 +1,25 @@ +package handlers + +import ( + "encoding/json" + "net/http" + "runtime" +) + +type HealthResponse struct { + Status string `json:"status"` + Version string `json:"version"` + GoVer string `json:"go_version"` +} + +func HealthHandler(version string) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + resp := HealthResponse{ + Status: "ok", + Version: version, + GoVer: runtime.Version(), + } + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(resp) + } +} diff --git a/pkg/llmclient/testdata/no_fs_access/main.go b/pkg/llmclient/testdata/no_fs_access/main.go new file mode 100644 index 00000000..f7b60bde --- /dev/null +++ b/pkg/llmclient/testdata/no_fs_access/main.go @@ -0,0 +1,7 @@ +package main + +import "fmt" + +func main() { + fmt.Println("Hello, world!") +} diff --git a/pkg/llmclient/testdata/no_fs_access/models.go b/pkg/llmclient/testdata/no_fs_access/models.go new file mode 100644 index 00000000..6ec92f79 --- /dev/null +++ b/pkg/llmclient/testdata/no_fs_access/models.go @@ -0,0 +1,55 @@ +package main + +import ( + "fmt" + "time" +) + +type User struct { + ID string `json:"id"` + Name string `json:"name"` + Email string `json:"email"` + CreatedAt time.Time `json:"created_at"` + Tags []string `json:"tags"` +} + +type Plugin struct { + ID string `json:"id"` + Name string `json:"name"` + Version string `json:"version"` + Description string `json:"description"` + Author User `json:"author"` + Metadata map[string]string `json:"metadata"` +} + +func (u User) String() string { + return fmt.Sprintf("User{id=%s, name=%s, email=%s}", u.ID, u.Name, u.Email) +} + +func (p Plugin) String() string { + return fmt.Sprintf("Plugin{id=%s, name=%s, version=%s}", p.ID, p.Name, p.Version) +} + +func (p Plugin) HasMetadata(key string) bool { + _, ok := p.Metadata[key] + return ok +} + +func NewUser(name, email string) User { + return User{ + ID: generateID(), + Name: name, + Email: email, + CreatedAt: time.Now(), + } +} + +func NewPlugin(name, version string, author User) Plugin { + return Plugin{ + ID: generateID(), + Name: name, + Version: version, + Author: author, + Metadata: make(map[string]string), + } +} diff --git a/pkg/llmclient/testdata/no_fs_access/server.go b/pkg/llmclient/testdata/no_fs_access/server.go new file mode 100644 index 00000000..513ac051 --- /dev/null +++ b/pkg/llmclient/testdata/no_fs_access/server.go @@ -0,0 +1,69 @@ +package main + +import ( + "encoding/json" + "fmt" + "net/http" + "sync" + "time" +) + +type Server struct { + mu sync.RWMutex + cache map[string]CacheEntry + handlers map[string]http.HandlerFunc +} + +type CacheEntry struct { + Value interface{} + ExpiresAt time.Time +} + +func NewServer() *Server { + return &Server{ + cache: make(map[string]CacheEntry), + handlers: make(map[string]http.HandlerFunc), + } +} + +func (s *Server) Get(key string) (interface{}, bool) { + s.mu.RLock() + defer s.mu.RUnlock() + + entry, ok := s.cache[key] + if !ok || time.Now().After(entry.ExpiresAt) { + return nil, false + } + return entry.Value, true +} + +func (s *Server) Set(key string, value interface{}, ttl time.Duration) { + s.mu.Lock() + defer s.mu.Unlock() + + s.cache[key] = CacheEntry{ + Value: value, + ExpiresAt: time.Now().Add(ttl), + } +} + +func (s *Server) HandleHealth(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(map[string]string{"status": "ok"}) +} + +func (s *Server) HandleData(w http.ResponseWriter, r *http.Request) { + key := r.URL.Query().Get("key") + if key == "" { + http.Error(w, "missing key parameter", http.StatusBadRequest) + return + } + + if val, ok := s.Get(key); ok { + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(val) + return + } + + http.Error(w, fmt.Sprintf("key %q not found", key), http.StatusNotFound) +} diff --git a/pkg/llmclient/testdata/no_fs_access/utils.go b/pkg/llmclient/testdata/no_fs_access/utils.go new file mode 100644 index 00000000..530205d5 --- /dev/null +++ b/pkg/llmclient/testdata/no_fs_access/utils.go @@ -0,0 +1,50 @@ +package main + +import ( + "crypto/sha256" + "encoding/hex" + "fmt" + "math/rand" + "strings" + "time" +) + +func hash(input string) string { + h := sha256.New() + h.Write([]byte(input)) + return hex.EncodeToString(h.Sum(nil)) +} + +func generateID() string { + src := rand.NewSource(time.Now().UnixNano()) + r := rand.New(src) + return fmt.Sprintf("%08x", r.Uint32()) +} + +func truncate(s string, maxLen int) string { + if len(s) <= maxLen { + return s + } + return s[:maxLen] + "..." +} + +func contains(slice []string, item string) bool { + for _, s := range slice { + if s == item { + return true + } + } + return false +} + +func joinWithComma(items []string) string { + return strings.Join(items, ", ") +} + +func repeatString(s string, count int) string { + var b strings.Builder + for i := 0; i < count; i++ { + b.WriteString(s) + } + return b.String() +} From f8c55003dca92c7d2cf4e15c0450d3917bf5bfa7 Mon Sep 17 00:00:00 2001 From: Esteban Beltran Date: Thu, 19 Feb 2026 14:15:32 +0100 Subject: [PATCH 2/7] pass options to constructor --- pkg/llmclient/agentic_client.go | 39 ++++++++------ .../agentic_client_integration_test.go | 52 ++++++++++--------- 2 files changed, 52 insertions(+), 39 deletions(-) diff --git a/pkg/llmclient/agentic_client.go b/pkg/llmclient/agentic_client.go index a9ca0963..241a25bf 100644 --- a/pkg/llmclient/agentic_client.go +++ b/pkg/llmclient/agentic_client.go @@ -36,38 +36,47 @@ type AgenticCallOptions struct { // AgenticClient is an interface for agentic LLM interactions type AgenticClient interface { - CallLLM(ctx context.Context, prompt, repositoryPath string, opts *AgenticCallOptions) ([]AnswerSchema, error) + CallLLM(ctx context.Context, prompt, repositoryPath string) ([]AnswerSchema, error) } // agenticClientImpl implements AgenticClient -type agenticClientImpl struct{} - -// NewAgenticClient creates a new AgenticClient -func NewAgenticClient() AgenticClient { - return &agenticClientImpl{} +type agenticClientImpl struct { + apiKey string + model string + provider string } -// CallLLM executes an agentic loop with tools to answer questions about code. -// The prompt may contain multiple questions, in which case the agent will call -// submit_answer multiple times. All answers are collected and returned. -func (c *agenticClientImpl) CallLLM(ctx context.Context, prompt, repositoryPath string, opts *AgenticCallOptions) ([]AnswerSchema, error) { +// NewAgenticClient creates a new AgenticClient with the given options +func NewAgenticClient(opts *AgenticCallOptions) (AgenticClient, error) { if opts == nil { return nil, fmt.Errorf("options are required") } - if opts.APIKey == "" { return nil, fmt.Errorf("API key is required") } - if opts.Model == "" { return nil, fmt.Errorf("model is required") } - if opts.Provider == "" { return nil, fmt.Errorf("provider is required") } + return &agenticClientImpl{ + apiKey: opts.APIKey, + model: opts.Model, + provider: opts.Provider, + }, nil +} - // Initialize LLM based on provider +// CallLLM executes an agentic loop with tools to answer questions about code. +// The prompt may contain multiple questions, in which case the agent will call +// submit_answer multiple times. All answers are collected and returned. +func (c *agenticClientImpl) CallLLM(ctx context.Context, prompt, repositoryPath string) ([]AnswerSchema, error) { + // Initialize LLM based on provider using the client's configured settings + opts := &AgenticCallOptions{ + APIKey: c.apiKey, + Model: c.model, + Provider: c.provider, + } llm, err := initLLM(ctx, opts) if err != nil { return nil, fmt.Errorf("failed to initialize LLM: %w", err) @@ -114,7 +123,7 @@ When ready, use submit_answer. For multiple questions, call submit_answer once p printDebugLogPath() debugLog("\n\n\n") debugLog("################################################################") - debugLog("# NEW CallLLM - provider=%s model=%s", opts.Provider, opts.Model) + debugLog("# NEW CallLLM - provider=%s model=%s", c.provider, c.model) debugLog("# repo=%s", repositoryPath) debugLog("# prompt=%s", truncateString(prompt, 200)) debugLog("################################################################") diff --git a/pkg/llmclient/agentic_client_integration_test.go b/pkg/llmclient/agentic_client_integration_test.go index 3c8035ac..549aa77a 100644 --- a/pkg/llmclient/agentic_client_integration_test.go +++ b/pkg/llmclient/agentic_client_integration_test.go @@ -26,20 +26,21 @@ func TestAgenticClient_NoFilesystemAccess(t *testing.T) { t.Skip("GEMINI_API_KEY not set, skipping agentic client integration test") } - client := NewAgenticClient() - - testDataPath, err := filepath.Abs(filepath.Join("testdata", "no_fs_access")) - require.NoError(t, err) - opts := &AgenticCallOptions{ Provider: "google", Model: "gemini-2.0-flash", APIKey: os.Getenv("GEMINI_API_KEY"), } + client, err := NewAgenticClient(opts) + require.NoError(t, err) + + testDataPath, err := filepath.Abs(filepath.Join("testdata", "no_fs_access")) + require.NoError(t, err) + prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations." - answers, err := client.CallLLM(context.Background(), prompt, testDataPath, opts) + answers, err := client.CallLLM(context.Background(), prompt, testDataPath) logme.DebugFln("Agent answers:") prettyprint.Print(answers) @@ -64,20 +65,21 @@ func TestAgenticClient_FilesystemAccess(t *testing.T) { t.Skip("GEMINI_API_KEY not set, skipping agentic client integration test") } - client := NewAgenticClient() - - testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access")) - require.NoError(t, err) - opts := &AgenticCallOptions{ Provider: "google", Model: "gemini-2.0-flash", APIKey: os.Getenv("GEMINI_API_KEY"), } + client, err := NewAgenticClient(opts) + require.NoError(t, err) + + testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access")) + require.NoError(t, err) + prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations." - answers, err := client.CallLLM(context.Background(), prompt, testDataPath, opts) + answers, err := client.CallLLM(context.Background(), prompt, testDataPath) logme.DebugFln("Agent answers:") prettyprint.Print(answers) @@ -101,20 +103,21 @@ func TestAgenticClient_NoFilesystemAccess_Anthropic(t *testing.T) { t.Skip("ANTHROPIC_API_KEY not set, skipping Anthropic agentic client integration test") } - client := NewAgenticClient() - - testDataPath, err := filepath.Abs(filepath.Join("testdata", "no_fs_access")) - require.NoError(t, err) - opts := &AgenticCallOptions{ Provider: "anthropic", Model: "claude-sonnet-4-5", APIKey: os.Getenv("ANTHROPIC_API_KEY"), } + client, err := NewAgenticClient(opts) + require.NoError(t, err) + + testDataPath, err := filepath.Abs(filepath.Join("testdata", "no_fs_access")) + require.NoError(t, err) + prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations." - answers, err := client.CallLLM(context.Background(), prompt, testDataPath, opts) + answers, err := client.CallLLM(context.Background(), prompt, testDataPath) logme.DebugFln("Agent answers:") prettyprint.Print(answers) @@ -138,20 +141,21 @@ func TestAgenticClient_FilesystemAccess_Anthropic(t *testing.T) { t.Skip("ANTHROPIC_API_KEY not set, skipping Anthropic agentic client integration test") } - client := NewAgenticClient() - - testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access")) - require.NoError(t, err) - opts := &AgenticCallOptions{ Provider: "anthropic", Model: "claude-sonnet-4-5", APIKey: os.Getenv("ANTHROPIC_API_KEY"), } + client, err := NewAgenticClient(opts) + require.NoError(t, err) + + testDataPath, err := filepath.Abs(filepath.Join("testdata", "fs_access")) + require.NoError(t, err) + prompt := "Does this application access the filesystem (read or write files)? Examine the code to determine if it performs any file I/O operations." - answers, err := client.CallLLM(context.Background(), prompt, testDataPath, opts) + answers, err := client.CallLLM(context.Background(), prompt, testDataPath) logme.DebugFln("Agent answers:") prettyprint.Print(answers) From e055013ac072d17d5548374769a9f96fdbdc51e3 Mon Sep 17 00:00:00 2001 From: Esteban Beltran Date: Thu, 19 Feb 2026 14:19:15 +0100 Subject: [PATCH 3/7] protect symlinks --- pkg/llmclient/agentic_tools.go | 16 ++++++++++++++++ pkg/llmclient/agentic_tools_test.go | 18 ++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/pkg/llmclient/agentic_tools.go b/pkg/llmclient/agentic_tools.go index 84bc9c33..d412b2e7 100644 --- a/pkg/llmclient/agentic_tools.go +++ b/pkg/llmclient/agentic_tools.go @@ -195,6 +195,22 @@ func (e *toolExecutor) validatePath(path string) (string, string) { return "", fmt.Sprintf("Error resolving repo path: %v", err) } + // Check before resolving symlinks to catch plain traversal (e.g. "../.."). + if absPath != absRepo && !strings.HasPrefix(absPath, absRepo+string(os.PathSeparator)) { + return "", "Error: path is outside the repository" + } + + // Resolve symlinks and check again to prevent a symlink inside the repo + // from pointing to a target outside the repo. + absPath, err = filepath.EvalSymlinks(absPath) + if err != nil { + return "", fmt.Sprintf("Error resolving path: %v", err) + } + absRepo, err = filepath.EvalSymlinks(absRepo) + if err != nil { + return "", fmt.Sprintf("Error resolving repo path: %v", err) + } + if absPath != absRepo && !strings.HasPrefix(absPath, absRepo+string(os.PathSeparator)) { return "", "Error: path is outside the repository" } diff --git a/pkg/llmclient/agentic_tools_test.go b/pkg/llmclient/agentic_tools_test.go index d5ad9499..a4fea3be 100644 --- a/pkg/llmclient/agentic_tools_test.go +++ b/pkg/llmclient/agentic_tools_test.go @@ -70,6 +70,24 @@ func TestValidatePath_TraversalBlocked(t *testing.T) { } } +func TestValidatePath_SymlinkEscape(t *testing.T) { + dir := setupTestRepo(t) + executor := newToolExecutor(dir) + + // Create a file outside the repo that the symlink will point to. + externalDir := t.TempDir() + externalFile := filepath.Join(externalDir, "secret.txt") + require.NoError(t, os.WriteFile(externalFile, []byte("secret"), 0644)) + + // Place a symlink inside the repo pointing to the external file. + symlinkPath := filepath.Join(dir, "escape.txt") + require.NoError(t, os.Symlink(externalFile, symlinkPath)) + + absPath, errMsg := executor.validatePath("escape.txt") + require.Empty(t, absPath) + require.Contains(t, errMsg, "outside the repository") +} + func TestReadFile_PathTraversal(t *testing.T) { dir := setupTestRepo(t) executor := newToolExecutor(dir) From da909cddc8877b4f3596e8a99018d2271713ff7c Mon Sep 17 00:00:00 2001 From: Esteban Beltran Date: Thu, 19 Feb 2026 14:20:56 +0100 Subject: [PATCH 4/7] use a list from subcommands instead of hardcoded one --- pkg/llmclient/agentic_tools.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pkg/llmclient/agentic_tools.go b/pkg/llmclient/agentic_tools.go index d412b2e7..ecc15744 100644 --- a/pkg/llmclient/agentic_tools.go +++ b/pkg/llmclient/agentic_tools.go @@ -3,9 +3,11 @@ package llmclient import ( "encoding/json" "fmt" + "maps" "os" "os/exec" "path/filepath" + "slices" "strings" "github.com/tmc/langchaingo/llms" @@ -330,7 +332,8 @@ func (e *toolExecutor) git(args map[string]interface{}) string { subcommand := parts[0] if !allowedGitSubcommands[subcommand] { - return fmt.Sprintf("Error: git subcommand '%s' is not allowed. Allowed commands: log, show, diff, status, ls-files, blame, rev-parse, cat-file, checkout, fetch, pull, branch, tag", subcommand) + allowed := strings.Join(slices.Sorted(maps.Keys(allowedGitSubcommands)), ", ") + return fmt.Sprintf("Error: git subcommand '%s' is not allowed. Allowed commands: %s", subcommand, allowed) } // Check for flags that could execute arbitrary commands From 8ca35c7081151a22f77db8fd4d7b8054eb804bab Mon Sep 17 00:00:00 2001 From: Esteban Beltran Date: Thu, 19 Feb 2026 14:22:43 +0100 Subject: [PATCH 5/7] validate if it is a text file to return it --- pkg/llmclient/agentic_tools.go | 5 +++++ pkg/llmclient/agentic_tools_test.go | 27 +++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/pkg/llmclient/agentic_tools.go b/pkg/llmclient/agentic_tools.go index ecc15744..5fcf7990 100644 --- a/pkg/llmclient/agentic_tools.go +++ b/pkg/llmclient/agentic_tools.go @@ -9,6 +9,7 @@ import ( "path/filepath" "slices" "strings" + "unicode/utf8" "github.com/tmc/langchaingo/llms" ) @@ -246,6 +247,10 @@ func (e *toolExecutor) readFile(args map[string]interface{}) string { return fmt.Sprintf("Error reading file: %v", err) } + if !utf8.Valid(content) { + return "Error: file is not a text file" + } + return string(content) } diff --git a/pkg/llmclient/agentic_tools_test.go b/pkg/llmclient/agentic_tools_test.go index a4fea3be..a435e0ca 100644 --- a/pkg/llmclient/agentic_tools_test.go +++ b/pkg/llmclient/agentic_tools_test.go @@ -98,6 +98,33 @@ func TestReadFile_PathTraversal(t *testing.T) { require.Contains(t, result, "outside the repository") } +func TestReadFile_BinaryFile(t *testing.T) { + dir := t.TempDir() + executor := newToolExecutor(dir) + + tests := []struct { + name string + content []byte + }{ + { + name: "binary file with invalid utf-8", + content: []byte{0xff, 0xfe, 0x00, 0x01, 0x02, 0x03}, + }, + { + name: "png image magic bytes", + content: []byte{0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + require.NoError(t, os.WriteFile(filepath.Join(dir, "binary.bin"), tt.content, 0644)) + result := executor.readFile(map[string]interface{}{"path": "binary.bin"}) + require.Contains(t, result, "not a text file") + }) + } +} + func TestReadFile_MaxFileSize(t *testing.T) { dir := t.TempDir() executor := newToolExecutor(dir) From 7afbcd4ec3193ab569550cf100f4f8be098d38e6 Mon Sep 17 00:00:00 2001 From: Esteban Beltran Date: Thu, 19 Feb 2026 14:32:58 +0100 Subject: [PATCH 6/7] return errors in tools --- pkg/llmclient/agentic_tools.go | 109 ++++++++++++++++------------ pkg/llmclient/agentic_tools_test.go | 105 +++++++++++++++++---------- 2 files changed, 128 insertions(+), 86 deletions(-) diff --git a/pkg/llmclient/agentic_tools.go b/pkg/llmclient/agentic_tools.go index 5fcf7990..baf31569 100644 --- a/pkg/llmclient/agentic_tools.go +++ b/pkg/llmclient/agentic_tools.go @@ -2,6 +2,7 @@ package llmclient import ( "encoding/json" + "errors" "fmt" "maps" "os" @@ -170,106 +171,114 @@ func (e *toolExecutor) execute(toolName, argsJSON string) string { return fmt.Sprintf("Error parsing arguments: %v", err) } + var ( + result string + err error + ) switch toolName { case "read_file": - return e.readFile(args) + result, err = e.readFile(args) case "list_directory": - return e.listDirectory(args) + result, err = e.listDirectory(args) case "grep": - return e.grep(args) + result, err = e.grep(args) case "git": - return e.git(args) + result, err = e.git(args) default: return fmt.Sprintf("Unknown tool: %s", toolName) } + if err != nil { + return fmt.Sprintf("Error: %v", err) + } + return result } // validatePath checks that the resolved path is within the repository directory. -// Returns the validated absolute path or an error string. -func (e *toolExecutor) validatePath(path string) (string, string) { +// Returns the validated absolute path or an error. +func (e *toolExecutor) validatePath(path string) (string, error) { fullPath := filepath.Join(e.repoPath, path) absPath, err := filepath.Abs(fullPath) if err != nil { - return "", fmt.Sprintf("Error resolving path: %v", err) + return "", fmt.Errorf("resolving path: %w", err) } absRepo, err := filepath.Abs(e.repoPath) if err != nil { - return "", fmt.Sprintf("Error resolving repo path: %v", err) + return "", fmt.Errorf("resolving repo path: %w", err) } // Check before resolving symlinks to catch plain traversal (e.g. "../.."). if absPath != absRepo && !strings.HasPrefix(absPath, absRepo+string(os.PathSeparator)) { - return "", "Error: path is outside the repository" + return "", errors.New("path is outside the repository") } // Resolve symlinks and check again to prevent a symlink inside the repo // from pointing to a target outside the repo. absPath, err = filepath.EvalSymlinks(absPath) if err != nil { - return "", fmt.Sprintf("Error resolving path: %v", err) + return "", fmt.Errorf("resolving path: %w", err) } absRepo, err = filepath.EvalSymlinks(absRepo) if err != nil { - return "", fmt.Sprintf("Error resolving repo path: %v", err) + return "", fmt.Errorf("resolving repo path: %w", err) } if absPath != absRepo && !strings.HasPrefix(absPath, absRepo+string(os.PathSeparator)) { - return "", "Error: path is outside the repository" + return "", errors.New("path is outside the repository") } - return absPath, "" + return absPath, nil } -func (e *toolExecutor) readFile(args map[string]interface{}) string { +func (e *toolExecutor) readFile(args map[string]interface{}) (string, error) { path, ok := args["path"].(string) if !ok { - return "Error: path is required" + return "", errors.New("path is required") } - fullPath, errMsg := e.validatePath(path) - if errMsg != "" { - return errMsg + fullPath, err := e.validatePath(path) + if err != nil { + return "", fmt.Errorf("validate path: %w", err) } debugLog("AgenticClient: read_file %s", fullPath) info, err := os.Stat(fullPath) if err != nil { - return fmt.Sprintf("Error reading file: %v", err) + return "", fmt.Errorf("reading file: %w", err) } if info.Size() > maxFileSize { - return fmt.Sprintf("Error: file is too large (%d bytes, limit is %d bytes). Try reading a smaller file or use grep to find specific content.", info.Size(), maxFileSize) + return "", fmt.Errorf("file is too large (%d bytes, limit is %d bytes). Try reading a smaller file or use grep to find specific content", info.Size(), maxFileSize) } content, err := os.ReadFile(fullPath) if err != nil { - return fmt.Sprintf("Error reading file: %v", err) + return "", fmt.Errorf("reading file: %w", err) } if !utf8.Valid(content) { - return "Error: file is not a text file" + return "", errors.New("file is not a text file") } - return string(content) + return string(content), nil } -func (e *toolExecutor) listDirectory(args map[string]interface{}) string { +func (e *toolExecutor) listDirectory(args map[string]interface{}) (string, error) { path, ok := args["path"].(string) if !ok { path = "." } - fullPath, errMsg := e.validatePath(path) - if errMsg != "" { - return errMsg + fullPath, err := e.validatePath(path) + if err != nil { + return "", fmt.Errorf("validate path: %w", err) } debugLog("AgenticClient: list_directory %s", fullPath) entries, err := os.ReadDir(fullPath) if err != nil { - return fmt.Sprintf("Error listing directory: %v", err) + return "", fmt.Errorf("listing directory: %w", err) } var result strings.Builder @@ -281,13 +290,13 @@ func (e *toolExecutor) listDirectory(args map[string]interface{}) string { } } - return result.String() + return result.String(), nil } -func (e *toolExecutor) grep(args map[string]interface{}) string { +func (e *toolExecutor) grep(args map[string]interface{}) (string, error) { pattern, ok := args["pattern"].(string) if !ok || pattern == "" { - return "Error: pattern is required" + return "", errors.New("pattern is required") } path := "." @@ -295,9 +304,9 @@ func (e *toolExecutor) grep(args map[string]interface{}) string { path = p } - fullPath, errMsg := e.validatePath(path) - if errMsg != "" { - return errMsg + fullPath, err := e.validatePath(path) + if err != nil { + return "", fmt.Errorf("validate path: %w", err) } debugLog("AgenticClient: grep '%s' in %s", pattern, fullPath) @@ -306,31 +315,33 @@ func (e *toolExecutor) grep(args map[string]interface{}) string { cmd := exec.Command("grep", "-rn", "--", pattern, fullPath) output, err := cmd.CombinedOutput() if err != nil { - // grep returns exit code 1 if no matches found - if len(output) == 0 { - return "No matches found" + var exitErr *exec.ExitError + if errors.As(err, &exitErr) && exitErr.ExitCode() == 1 { + // Exit code 1 means no matches found, not a real error. + return "No matches found", nil } + return "", fmt.Errorf("grep failed: %w\nOutput: %s", err, output) } - return string(output) + return string(output), nil } -func (e *toolExecutor) git(args map[string]interface{}) string { +func (e *toolExecutor) git(args map[string]interface{}) (string, error) { argsStr, ok := args["args"].(string) if !ok || argsStr == "" { - return "Error: git args are required" + return "", errors.New("git args are required") } parts := strings.Fields(argsStr) if len(parts) == 0 { - return "Error: empty git command" + return "", errors.New("empty git command") } // Strip leading "git" if the LLM included it (e.g. "git diff" instead of "diff") if parts[0] == "git" { parts = parts[1:] if len(parts) == 0 { - return "Error: empty git command" + return "", errors.New("empty git command") } } @@ -338,14 +349,18 @@ func (e *toolExecutor) git(args map[string]interface{}) string { if !allowedGitSubcommands[subcommand] { allowed := strings.Join(slices.Sorted(maps.Keys(allowedGitSubcommands)), ", ") - return fmt.Sprintf("Error: git subcommand '%s' is not allowed. Allowed commands: %s", subcommand, allowed) + return "", fmt.Errorf("git subcommand '%s' is not allowed. Allowed commands: %s", subcommand, allowed) } // Check for flags that could execute arbitrary commands for _, arg := range parts[1:] { for _, blocked := range blockedGitFlags { - if arg == blocked || strings.HasPrefix(arg, blocked+"=") { - return fmt.Sprintf("Error: git flag '%s' is not allowed for security reasons", arg) + exactOrLongValue := arg == blocked || strings.HasPrefix(arg, blocked+"=") + // Short flags (e.g. -c) also accept a concatenated value with no separator: + // git -ccore.pager=evil. Match any arg that starts with the flag token. + shortConcatenated := len(blocked) == 2 && blocked[0] == '-' && blocked[1] != '-' && strings.HasPrefix(arg, blocked) + if exactOrLongValue || shortConcatenated { + return "", fmt.Errorf("git flag '%s' is not allowed for security reasons", arg) } } } @@ -356,8 +371,8 @@ func (e *toolExecutor) git(args map[string]interface{}) string { cmd.Dir = e.repoPath output, err := cmd.CombinedOutput() if err != nil { - return fmt.Sprintf("Error executing git: %v\nOutput: %s", err, string(output)) + return "", fmt.Errorf("executing git: %w\nOutput: %s", err, string(output)) } - return string(output) + return string(output), nil } diff --git a/pkg/llmclient/agentic_tools_test.go b/pkg/llmclient/agentic_tools_test.go index a435e0ca..679e057a 100644 --- a/pkg/llmclient/agentic_tools_test.go +++ b/pkg/llmclient/agentic_tools_test.go @@ -27,21 +27,20 @@ func TestValidatePath_WithinRepo(t *testing.T) { executor := newToolExecutor(dir) tests := []struct { - name string - path string - wantErr bool + name string + path string }{ - {name: "root dot", path: ".", wantErr: false}, - {name: "file at root", path: "main.go", wantErr: false}, - {name: "nested file", path: "src/lib.go", wantErr: false}, - {name: "deeply nested", path: "src/nested/deep.go", wantErr: false}, - {name: "directory", path: "src", wantErr: false}, + {name: "root dot", path: "."}, + {name: "file at root", path: "main.go"}, + {name: "nested file", path: "src/lib.go"}, + {name: "deeply nested", path: "src/nested/deep.go"}, + {name: "directory", path: "src"}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - absPath, errMsg := executor.validatePath(tt.path) - require.Empty(t, errMsg) + absPath, err := executor.validatePath(tt.path) + require.NoError(t, err) require.NotEmpty(t, absPath) }) } @@ -63,9 +62,9 @@ func TestValidatePath_TraversalBlocked(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - absPath, errMsg := executor.validatePath(tt.path) + absPath, err := executor.validatePath(tt.path) require.Empty(t, absPath) - require.Contains(t, errMsg, "outside the repository") + require.ErrorContains(t, err, "outside the repository") }) } } @@ -83,19 +82,19 @@ func TestValidatePath_SymlinkEscape(t *testing.T) { symlinkPath := filepath.Join(dir, "escape.txt") require.NoError(t, os.Symlink(externalFile, symlinkPath)) - absPath, errMsg := executor.validatePath("escape.txt") + absPath, err := executor.validatePath("escape.txt") require.Empty(t, absPath) - require.Contains(t, errMsg, "outside the repository") + require.ErrorContains(t, err, "outside the repository") } func TestReadFile_PathTraversal(t *testing.T) { dir := setupTestRepo(t) executor := newToolExecutor(dir) - result := executor.readFile(map[string]interface{}{ + _, err := executor.readFile(map[string]interface{}{ "path": "../../etc/passwd", }) - require.Contains(t, result, "outside the repository") + require.ErrorContains(t, err, "outside the repository") } func TestReadFile_BinaryFile(t *testing.T) { @@ -119,8 +118,8 @@ func TestReadFile_BinaryFile(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { require.NoError(t, os.WriteFile(filepath.Join(dir, "binary.bin"), tt.content, 0644)) - result := executor.readFile(map[string]interface{}{"path": "binary.bin"}) - require.Contains(t, result, "not a text file") + _, err := executor.readFile(map[string]interface{}{"path": "binary.bin"}) + require.ErrorContains(t, err, "not a text file") }) } } @@ -133,18 +132,19 @@ func TestReadFile_MaxFileSize(t *testing.T) { bigContent := strings.Repeat("x", maxFileSize+1) require.NoError(t, os.WriteFile(filepath.Join(dir, "big.txt"), []byte(bigContent), 0644)) - result := executor.readFile(map[string]interface{}{ + _, err := executor.readFile(map[string]interface{}{ "path": "big.txt", }) - require.Contains(t, result, "file is too large") + require.ErrorContains(t, err, "file is too large") // A file under the limit should work fine smallContent := "hello world" require.NoError(t, os.WriteFile(filepath.Join(dir, "small.txt"), []byte(smallContent), 0644)) - result = executor.readFile(map[string]interface{}{ + result, err := executor.readFile(map[string]interface{}{ "path": "small.txt", }) + require.NoError(t, err) require.Equal(t, smallContent, result) } @@ -152,21 +152,21 @@ func TestListDirectory_PathTraversal(t *testing.T) { dir := setupTestRepo(t) executor := newToolExecutor(dir) - result := executor.listDirectory(map[string]interface{}{ + _, err := executor.listDirectory(map[string]interface{}{ "path": "../", }) - require.Contains(t, result, "outside the repository") + require.ErrorContains(t, err, "outside the repository") } func TestGrep_PathTraversal(t *testing.T) { dir := setupTestRepo(t) executor := newToolExecutor(dir) - result := executor.grep(map[string]interface{}{ + _, err := executor.grep(map[string]interface{}{ "pattern": "root", "path": "../../etc", }) - require.Contains(t, result, "outside the repository") + require.ErrorContains(t, err, "outside the repository") } func TestGrep_FlagInjection(t *testing.T) { @@ -175,14 +175,35 @@ func TestGrep_FlagInjection(t *testing.T) { // Pattern starting with - should not be interpreted as a flag // thanks to the -- separator - result := executor.grep(map[string]interface{}{ + result, err := executor.grep(map[string]interface{}{ "pattern": "-rn", "path": ".", }) // Should not error with "invalid option", should just return no matches + require.NoError(t, err) require.NotContains(t, result, "invalid option") } +func TestGrep_ExitCodes(t *testing.T) { + dir := setupTestRepo(t) + executor := newToolExecutor(dir) + + // Exit code 1: pattern not found — not an error, just no results. + result, err := executor.grep(map[string]interface{}{ + "pattern": "this_pattern_will_never_match_xyz123", + "path": ".", + }) + require.NoError(t, err) + require.Equal(t, "No matches found", result) + + // Exit code 2: invalid regex — should be a real error, not silently swallowed. + _, err = executor.grep(map[string]interface{}{ + "pattern": "[invalid", + "path": ".", + }) + require.Error(t, err) +} + func TestGit_BlockedFlags(t *testing.T) { dir := setupTestRepo(t) executor := newToolExecutor(dir) @@ -196,6 +217,8 @@ func TestGit_BlockedFlags(t *testing.T) { {name: "upload-pack", args: "fetch --upload-pack=evil"}, {name: "config flag", args: "log -c core.pager=evil"}, {name: "config equals", args: "log --config=evil"}, + {name: "config concatenated", args: "log -ccore.pager=evil"}, + {name: "config concatenated ssh", args: "log -ccore.sshCommand=evil"}, {name: "git prefix exec flag", args: "git log --exec=malicious"}, {name: "git prefix ext-diff", args: "git diff --ext-diff"}, {name: "git prefix upload-pack", args: "git fetch --upload-pack=evil"}, @@ -203,10 +226,10 @@ func TestGit_BlockedFlags(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - result := executor.git(map[string]interface{}{ + _, err := executor.git(map[string]interface{}{ "args": tt.args, }) - require.Contains(t, result, "not allowed for security reasons") + require.ErrorContains(t, err, "not allowed for security reasons") }) } } @@ -215,33 +238,35 @@ func TestGit_StripGitPrefix(t *testing.T) { dir := t.TempDir() executor := newToolExecutor(dir) - // "git status" should be treated the same as "status" - result := executor.git(map[string]interface{}{ + // "git status" should be treated the same as "status" — not blocked by allowlist + _, err := executor.git(map[string]interface{}{ "args": "git status", }) - require.NotContains(t, result, "is not allowed") + if err != nil { + require.NotContains(t, err.Error(), "is not allowed") + } // "git push" should still be blocked - result = executor.git(map[string]interface{}{ + _, err = executor.git(map[string]interface{}{ "args": "git push", }) - require.Contains(t, result, "is not allowed") + require.ErrorContains(t, err, "is not allowed") // bare "git" with nothing after should error - result = executor.git(map[string]interface{}{ + _, err = executor.git(map[string]interface{}{ "args": "git", }) - require.Contains(t, result, "empty git command") + require.ErrorContains(t, err, "empty git command") } func TestGit_BlockedSubcommand(t *testing.T) { dir := setupTestRepo(t) executor := newToolExecutor(dir) - result := executor.git(map[string]interface{}{ + _, err := executor.git(map[string]interface{}{ "args": "push origin main", }) - require.Contains(t, result, "not allowed") + require.ErrorContains(t, err, "not allowed") } func TestGit_AllowedSubcommands(t *testing.T) { @@ -253,10 +278,12 @@ func TestGit_AllowedSubcommands(t *testing.T) { dir := t.TempDir() executor := newToolExecutor(dir) - result := executor.git(map[string]interface{}{ + _, err := executor.git(map[string]interface{}{ "args": subcmd, }) - require.NotContains(t, result, "is not allowed") + if err != nil { + require.NotContains(t, err.Error(), "is not allowed") + } }) } } From a8cfe2605e44b6cd2e739c949c0aa0c077c07ffd Mon Sep 17 00:00:00 2001 From: Esteban Beltran Date: Thu, 19 Feb 2026 14:37:22 +0100 Subject: [PATCH 7/7] execute should reutne error and add tests --- pkg/llmclient/agentic_client.go | 5 ++++- pkg/llmclient/agentic_tools.go | 24 ++++++++---------------- pkg/llmclient/agentic_tools_test.go | 18 ++++++++++++++++++ 3 files changed, 30 insertions(+), 17 deletions(-) diff --git a/pkg/llmclient/agentic_client.go b/pkg/llmclient/agentic_client.go index 241a25bf..cef19851 100644 --- a/pkg/llmclient/agentic_client.go +++ b/pkg/llmclient/agentic_client.go @@ -262,7 +262,10 @@ func processToolCall(toolCall llms.ToolCall, index, total, currentAnswerCount in } // Execute other tools - result := executor.execute(toolCall.FunctionCall.Name, toolCall.FunctionCall.Arguments) + result, err := executor.execute(toolCall.FunctionCall.Name, toolCall.FunctionCall.Arguments) + if err != nil { + result = fmt.Sprintf("Error: %v", err) + } debugLog("AgenticClient: tool result: %s", truncateString(result, 300)) return llms.MessageContent{ diff --git a/pkg/llmclient/agentic_tools.go b/pkg/llmclient/agentic_tools.go index baf31569..dd0fd81c 100644 --- a/pkg/llmclient/agentic_tools.go +++ b/pkg/llmclient/agentic_tools.go @@ -164,33 +164,25 @@ func newToolExecutor(repoPath string) *toolExecutor { return &toolExecutor{repoPath: repoPath} } -// execute runs a tool and returns the result as a string -func (e *toolExecutor) execute(toolName, argsJSON string) string { +// execute runs a tool and returns the result or an error. +func (e *toolExecutor) execute(toolName, argsJSON string) (string, error) { var args map[string]interface{} if err := json.Unmarshal([]byte(argsJSON), &args); err != nil { - return fmt.Sprintf("Error parsing arguments: %v", err) + return "", fmt.Errorf("parsing arguments: %w", err) } - var ( - result string - err error - ) switch toolName { case "read_file": - result, err = e.readFile(args) + return e.readFile(args) case "list_directory": - result, err = e.listDirectory(args) + return e.listDirectory(args) case "grep": - result, err = e.grep(args) + return e.grep(args) case "git": - result, err = e.git(args) + return e.git(args) default: - return fmt.Sprintf("Unknown tool: %s", toolName) + return "", fmt.Errorf("unknown tool: %s", toolName) } - if err != nil { - return fmt.Sprintf("Error: %v", err) - } - return result } // validatePath checks that the resolved path is within the repository directory. diff --git a/pkg/llmclient/agentic_tools_test.go b/pkg/llmclient/agentic_tools_test.go index 679e057a..0a46e237 100644 --- a/pkg/llmclient/agentic_tools_test.go +++ b/pkg/llmclient/agentic_tools_test.go @@ -1,6 +1,7 @@ package llmclient import ( + "encoding/json" "os" "path/filepath" "strings" @@ -287,3 +288,20 @@ func TestGit_AllowedSubcommands(t *testing.T) { }) } } + +func TestExecute(t *testing.T) { + dir := setupTestRepo(t) + executor := newToolExecutor(dir) + + t.Run("success", func(t *testing.T) { + args, _ := json.Marshal(map[string]string{"path": "main.go"}) + result, err := executor.execute("read_file", string(args)) + require.NoError(t, err) + require.Contains(t, result, "package main") + }) + + t.Run("unknown tool returns error", func(t *testing.T) { + _, err := executor.execute("nonexistent_tool", "{}") + require.ErrorContains(t, err, "unknown tool") + }) +}