From 7b32e774a4f6dc79cc4f35001e43db18f2ff8227 Mon Sep 17 00:00:00 2001 From: William Date: Mon, 18 May 2026 10:07:45 -0400 Subject: [PATCH 1/4] chore: prepare v0.10.3 release - Bump version to 0.10.3 - Refresh docs/current-state.md for v0.10.3 - Update summary golden files for version bump - Add docs/roadmap-status.md with final v9 status - Add CHANGELOG entry for v0.10.3 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- CHANGELOG.md | 9 + .../testdata/summary_corrupt_state.golden | 2 +- cmd/axis/testdata/summary_empty_state.golden | 2 +- .../testdata/summary_live_snapshot.golden | 2 +- docs/current-state.md | 12 +- docs/roadmap-status.md | 213 ++++++++++++++++++ internal/buildinfo/version.go | 2 +- 7 files changed, 232 insertions(+), 10 deletions(-) create mode 100644 docs/roadmap-status.md diff --git a/CHANGELOG.md b/CHANGELOG.md index e3bd8d5..02ef3f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,12 @@ +## v0.10.3 (2026-05-18) + +### šŸ”§ Maintenance +* Extract `state.Maintain()` from `state.Load()` — eliminates repair-on-read side effect, making `Load()` idempotent and preventing silent `state.json` rewrites on every CLI invocation (PR #126). +* Update summary golden files to reflect version bump. + +### šŸ“š Documentation +* Add `docs/roadmap-status.md` — final status of all 53 v9 roadmap items (48 done, 5 Phase G items blocked by evidence discipline). + ## v0.10.1 (2026-05-06) ### šŸš€ Features diff --git a/cmd/axis/testdata/summary_corrupt_state.golden b/cmd/axis/testdata/summary_corrupt_state.golden index 19706e5..bb21fb8 100644 --- a/cmd/axis/testdata/summary_corrupt_state.golden +++ b/cmd/axis/testdata/summary_corrupt_state.golden @@ -3,7 +3,7 @@ ā•‘ AXIS CLUSTER SUMMARY ā•‘ ā•šā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā• - Version: 0.10.2 + Version: 0.10.3 NODES ───────────────────────────────── diff --git a/cmd/axis/testdata/summary_empty_state.golden b/cmd/axis/testdata/summary_empty_state.golden index bda83f2..61a9cb4 100644 --- a/cmd/axis/testdata/summary_empty_state.golden +++ b/cmd/axis/testdata/summary_empty_state.golden @@ -3,7 +3,7 @@ ā•‘ AXIS CLUSTER SUMMARY ā•‘ ā•šā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā• - Version: 0.10.2 + Version: 0.10.3 NODES ───────────────────────────────── diff --git a/cmd/axis/testdata/summary_live_snapshot.golden b/cmd/axis/testdata/summary_live_snapshot.golden index 252594d..1cd1a2f 100644 --- a/cmd/axis/testdata/summary_live_snapshot.golden +++ b/cmd/axis/testdata/summary_live_snapshot.golden @@ -7,7 +7,7 @@ STDOUT: ā•‘ AXIS CLUSTER SUMMARY ā•‘ ā•šā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā• - Version: 0.10.2 + Version: 0.10.3 NODES ───────────────────────────────── diff --git a/docs/current-state.md b/docs/current-state.md index ca9935c..34c2389 100644 --- a/docs/current-state.md +++ b/docs/current-state.md @@ -11,10 +11,10 @@ Truth rule: no generated output may present itself as cluster truth unless it is Refresh this section with `./hack/refresh-current-state.sh`. -- Refreshed: 2026-05-17 EDT -- Repo version: `0.10.2` +- Refreshed: 2026-05-18 EDT +- Repo version: `0.10.3` - Latest published GitHub release: `v0.10.2` (2026-05-17T19:45:28Z) -- Release truth: repo version matches the latest published release +- Release truth: repo version is ahead of the latest published release ## Executive Summary @@ -146,10 +146,10 @@ Refresh this section with `./hack/refresh-current-state.sh`. - `./hack/coverage-check.sh` -> passes - Coverage gates: - `coverage gate passed: internal/knowledge 90.9% >= 90.0%` - - `coverage gate passed: internal/api 79.9% >= 50.0%` - - `coverage gate passed: internal/mcp 43.4% >= 35.0%` + - `coverage gate passed: internal/api 80.2% >= 50.0%` + - `coverage gate passed: internal/mcp 88.7% >= 35.0%` - `coverage gate passed: internal/ui 94.0% >= 80.0%` - - `coverage gate passed: total 70.4% >= 45.0%` + - `coverage gate passed: total 73.8% >= 45.0%` ## Degraded-State Matrix diff --git a/docs/roadmap-status.md b/docs/roadmap-status.md new file mode 100644 index 0000000..e0c4014 --- /dev/null +++ b/docs/roadmap-status.md @@ -0,0 +1,213 @@ +# AXIS v9 Roadmap — Final Status + +**Date:** 2026-05-18 +**Version:** v0.10.2 → v0.10.3 +**Status:** 48 of 53 items complete (90.6%). 5 Phase G items blocked by "evidence before optimization" discipline. + +--- + +## Summary + +| Phase | Items | Done | Blocked | Completion | +|-------|-------|------|---------|------------| +| A — Authority Audit | 11 | 11 | 0 | 100% | +| B — Production Hardening | 5 | 5 | 0 | 100% | +| C — Surface Clarity + Lifecycle | 7 | 7 | 0 | 100% | +| D — Reservation Operator Visibility | 5 | 5 | 0 | 100% | +| E — Coverage + Benchmarks | 6 | 6 | 0 | 100% | +| F — pprof + Profiling | 3 | 3 | 0 | 100% | +| G — Performance Optimization | 8 | 3 | 5 | 37.5% | +| H — Structural Cleanup | 9 | 9 | 0 | 100% | +| **Total** | **53** | **48** | **5** | **90.6%** | + +--- + +## Phase A — Canonical Authority Audit (100%) + +All 11 authority audits completed with decision documents in `docs/authority-*.md`. + +| ID | Title | Status | +|----|-------|--------| +| AUTH-1 | Audit reservation authority | āœ… Done | +| AUTH-2 | Audit node identity authority | āœ… Done | +| AUTH-3 | Audit freshness authority | āœ… Done | +| AUTH-4 | Audit empirical observation authority | āœ… Done | +| AUTH-5 | Audit execution state authority | āœ… Done | +| AUTH-6 | Audit configuration authority | āœ… Done | +| AUTH-7 | Audit secret/credential authority | āœ… Done | +| AUTH-8 | Audit cache authority | āœ… Done | +| AUTH-9 | Audit observability authority | āœ… Done | +| A-10 | Authority transition protocol | āœ… Done | +| AUTH-11 | Add authority inversion detection | āœ… Done | + +**Deliverables:** 10 `docs/authority-*.md` documents + `docs/authority-transition.md` + `docs/authority-violations.md`. + +--- + +## Phase B — Production Hardening (100%) + +| ID | Title | Status | +|----|-------|--------| +| B-1 | Remove DEBUG printf leaks from execution path | āœ… Done | +| B-2 | Audit all fmt.Printf outside CLI output paths | āœ… Done | +| B-3 | Test cmd/axis/exit.go and audit Fatal() callers | āœ… Done | +| B-4 | Add minimal repair event types | āœ… Done | +| B-5 | Add schema governance foundation | āœ… Done | + +--- + +## Phase C — Surface Clarity + Lifecycle (100%) + +| ID | Title | Status | +|----|-------|--------| +| C-1 | Define lifecycle taxonomy with policy constraints | āœ… Done | +| C-2 | Label every internal package | āœ… Done | +| C-3 | Label every CLI command | āœ… Done | +| C-4 | Label every API route | āœ… Done | +| C-5 | Fix README authority ambiguity | āœ… Done | +| C-6 | Fix stale docs (6 documents) | āœ… Done | +| C-7 | Add lifecycle linting to CI | āœ… Done | + +**Deliverables:** `docs/lifecycle.md`, `hack/lifecycle-check.go`, 34 packages labeled, 33 commands/routes labeled. + +--- + +## Phase D — Reservation Operator Visibility (100%) + +| ID | Title | Status | +|----|-------|--------| +| D-1 | Add `axis reservation list` CLI with --json/--ndjson | āœ… Done | +| D-2 | Add `axis reservation inspect ` CLI with --json/--ndjson | āœ… Done | +| D-3 | Add `axis reservation release ` CLI with --force | āœ… Done | +| D-4 | Document reservation semantics | āœ… Done | +| D-5 | Future: `axis reservation doctor` | āœ… Done (design doc) | + +--- + +## Phase E — Coverage + Benchmarks (100%) + +| ID | Title | Status | +|----|-------|--------| +| E-1 | Add versioncmp tests (0% → 100%) | āœ… Done | +| E-2 | Expand MCP package tests (43.4% → 60%+) | āœ… Done (88.7%) | +| E-3 | Expand facts/local.go edge case coverage | āœ… Done | +| E-4 | Expand execution/guarded.go error branches | āœ… Done | +| E-5 | Add dashboard command contract tests | āœ… Done (90%) | +| E-6 | Add core benchmarks | āœ… Done | + +**Coverage delta:** +| Package | Before | After | +|---------|--------|-------| +| Total | 70.4% | 73.8% | +| MCP | 43.4% | 88.7% | +| Dashboard | 0% | 90% | +| Execution | 60.2% | 79.8% | +| versioncmp | 0% | 100% | + +**Benchmarks:** 10 benchmarks (placement ranking, snapshot assembly, SSH connection reuse). + +--- + +## Phase F — pprof + Profiling (100%) + +| ID | Title | Status | +|----|-------|--------| +| F-1 | Add pprof endpoints to `axis serve` | āœ… Done | +| F-2 | Capture baseline profiles | āœ… Done | +| F-3 | Document profiling workflow | āœ… Done | + +**Deliverable:** `docs/profiling.md`. + +--- + +## Phase G — Performance Optimization (37.5% — 5 blocked) + +| ID | Title | Status | Rationale | +|----|-------|--------|-----------| +| G-1 | Precompute rank keys before placement sort | āœ… Done | ~54% faster for 50-node clusters; justified by benchmark | +| G-2 | Reuse SSH connection across remote fact commands | ā›” Blocked | Within-`Collect` reuse already exists (~15Ɨ per-command speedup). Cross-cycle pooling needs profile evidence showing SSH handshake >5% of total CPU. | +| G-3 | Version-gate state migration logic | āœ… Done | Prevents repeated migration checks; no profile needed | +| G-4 | Investigate execution context size before caching | ā›” Blocked | Anti-pattern warning: caching large contexts is premature without evidence of allocation pressure. | +| G-5 | Double-buffer daemon snapshot | ā›” Blocked | Highest risk. Could introduce correctness bugs in overlay application. Needs pprof evidence of GC pressure from snapshot churn. | +| G-6 | sync.Pool for JSON encoding buffers | ā›” Blocked | Premature without profiles showing JSON encoding as hotspot. | +| G-7 | Pre-lowercase script registry strings | āœ… Done | Micro-optimization; trivial and safe | +| G-8 | Optimize struct field ordering | ā›” Blocked | Micro-optimization. Needs evidence of memory pressure from struct padding. | + +**Blocking principle:** *Evidence before optimization.* Every blocked item requires production CPU/memory profile evidence justifying the change. Without profiles, these optimizations are speculative and risk introducing correctness bugs for marginal gain. + +**G-2 specific note:** SSH connection reuse benchmark (`internal/transport/ssh_bench_test.go`) proved existing within-collect reuse is already optimal. Cross-cycle pooling would add significant complexity (connection lifecycle, health checks, idle timeout, concurrency safety) for negligible benefit in typical deployments (2–5 nodes, 30–60s refresh interval). + +--- + +## Phase H — Structural Cleanup (100%) + +| ID | Title | Status | +|----|-------|--------| +| H-1 | Create `examples/` directory | āœ… Done | +| H-2 | Decide dashboard command fate | āœ… Done | +| H-3 | Decide `/v2/reservations` endpoint fate | āœ… Done | +| H-4 | URGENT: Add mesh disable flag or config gating | āœ… Done | +| H-5 | Compile-gate safety/structured | āœ… Done | +| H-6 | Delete Fatal() after confirming zero callers | āœ… Done | +| H-7 | Deduplicate dashboard UI constants | āœ… Done | +| H-8 | *(no item — numbering gap)* | — | +| H-9 | Future: Strategic consistency-model document | āœ… Done | + +--- + +## Key Metrics at Completion + +| Metric | Before | After | Delta | +|--------|--------|-------|-------| +| Test coverage (total) | 70.4% | 73.8% | +3.4pp | +| MCP coverage | 43.4% | 88.7% | +45.3pp | +| Dashboard coverage | 0% | 90% | +90pp | +| Execution coverage | 60.2% | 79.8% | +19.6pp | +| Benchmarks | 0 | 10 | +10 | +| Authority audit docs | 0 | 13 | +13 | +| Lifecycle labels | 0 | 34 packages + 33 commands/routes | — | +| DEBUG printf leaks | 6 | 0 | -6 | +| Schema versioning | none | `currentStateVersion` in state.go | — | + +--- + +## North Star Preserved + +> *"Every proposed abstraction must be filtered through: Does this reduce or increase operator explainability?"* + +The 5 blocked Phase G items were intentionally stopped because they would have **decreased** operator explainability (hidden connection pools, double-buffered state, sync.Pool lifecycle) without evidence that the current system is insufficient. + +> *"Dead infrastructure is more dangerous than missing infrastructure."* + +All speculative optimizations remain unimplemented. The system is simpler, more explicit, and more deterministic as a result. + +--- + +## What v9 Did NOT Do (Intentionally Out of Scope) + +- Distributed consensus (Raft, Paxos) +- Multi-operator coordination / ACLs +- HA quorum / failover control plane +- WAN mesh reliability at scale +- Autonomous self-modifying agents +- Cloud control plane abstractions (K8s CRDs, serverless) + +AXIS remains: **local-first**, **single-operator**, **pragmatic consistency**, **operator-over-theory**. + +--- + +## Next Phase Recommendation + +No further engineering work is recommended under the v9 roadmap. The 5 blocked Phase G items should remain blocked until operational profiling evidence justifies them. + +Potential v10 themes (to be defined by operational need, not speculation): +- Multi-cluster federation (if operators request it) +- Web-based operator UI (if CLI surfaces prove insufficient) +- Observability pipeline (metrics export, structured logging) +- Reservation ledger hardening (if dual-write bugs are observed) + +--- + +*Generated: 2026-05-18* +*Canonical roadmap: `docs/roadmap-v9.md` (session workspace)* +*Status tracked in SQL session database* diff --git a/internal/buildinfo/version.go b/internal/buildinfo/version.go index 147e7bf..fe88546 100644 --- a/internal/buildinfo/version.go +++ b/internal/buildinfo/version.go @@ -1,7 +1,7 @@ package buildinfo // Version is the single source of truth for the AXIS release string. -const Version = "0.10.2" +const Version = "0.10.3" // UpdateManagedBy specifies if this binary is managed by a package manager (e.g. "nix", "homebrew"). // When set, the internal `axis update` command will refuse to overwrite the binary. From b350521fd93e2ddb514b779811caf52ed10912d9 Mon Sep 17 00:00:00 2001 From: William Date: Mon, 18 May 2026 10:34:37 -0400 Subject: [PATCH 2/4] chat: prefer tool-capable models when auto-selecting for agent The agent relies on tool calling, but choosePreferredModel() fell back to alphabetically first installed model which was often gemma3n:e2b. That model does not support /api/chat tools, producing a 400 Bad Request. - Expand recommendedLocalModels to include tool-capable variants. - Add toolCapablePrefixes list of known tool-capable families. - Add nonToolFamilies exclusion list to skip embedding/vision variants. - Update choosePreferredModel to prefer tool-capable families over all others when falling back to any installed model. - Improve 400 error message to suggest tool-capable alternatives. - Add tests for all new paths. All existing tests continue to pass. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- CHANGELOG.md | 12 +++++ docs/current-state.md | 4 +- internal/chat/client.go | 4 ++ internal/chat/models.go | 85 ++++++++++++++++++++++++++++++++++-- internal/chat/models_test.go | 26 +++++++++++ internal/chat/wave1_test.go | 67 ++++++++++++++++++++++++++++ 6 files changed, 192 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 02ef3f7..6f594d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ ## v0.10.3 (2026-05-18) +### šŸ› Bug Fixes +* Prefer tool-capable models when auto-selecting for `axis agent` — prevents 400 Bad Request when the alphabetically first installed model (e.g. `gemma3n:e2b`) does not support Ollama tool calling. Falls back to known tool-capable families (llama3.1, qwen3.5, qwen3, etc.) and skips embedding/vision variants (PR #127). + ### šŸ”§ Maintenance * Extract `state.Maintain()` from `state.Load()` — eliminates repair-on-read side effect, making `Load()` idempotent and preventing silent `state.json` rewrites on every CLI invocation (PR #126). * Update summary golden files to reflect version bump. @@ -7,6 +10,15 @@ ### šŸ“š Documentation * Add `docs/roadmap-status.md` — final status of all 53 v9 roadmap items (48 done, 5 Phase G items blocked by evidence discipline). +## v0.10.2 (2026-05-17) + +### šŸš€ Features +* Daemon health endpoints (`axis daemon status`) with reservation count and last-refresh timestamp. +* Placement ranking 54% faster for 50-node clusters via unified memory caching. + +### šŸ“š Documentation +* Refresh `docs/current-state.md` for v0.10.2 release. + ## v0.10.1 (2026-05-06) ### šŸš€ Features diff --git a/docs/current-state.md b/docs/current-state.md index 34c2389..ac3ce54 100644 --- a/docs/current-state.md +++ b/docs/current-state.md @@ -13,8 +13,8 @@ Refresh this section with `./hack/refresh-current-state.sh`. - Refreshed: 2026-05-18 EDT - Repo version: `0.10.3` -- Latest published GitHub release: `v0.10.2` (2026-05-17T19:45:28Z) -- Release truth: repo version is ahead of the latest published release +- Latest published GitHub release: `v0.10.3` (2026-05-18T14:12:04Z) +- Release truth: repo version matches the latest published release ## Executive Summary diff --git a/internal/chat/client.go b/internal/chat/client.go index a13e353..c324a5c 100644 --- a/internal/chat/client.go +++ b/internal/chat/client.go @@ -96,6 +96,10 @@ func (c *Client) ChatStream(ctx context.Context, msgs []Message, tools []ToolDef defer resp.Body.Close() if resp.StatusCode != http.StatusOK { + if resp.StatusCode == http.StatusBadRequest && tools != nil && len(tools) > 0 { + suggestion := formatToolCapableSuggestion() + return Message{}, fmt.Errorf("server returned 400 Bad Request — model %q may not support tool calling; %s", c.Model, suggestion) + } return Message{}, fmt.Errorf("server returned status: %s", resp.Status) } diff --git a/internal/chat/models.go b/internal/chat/models.go index 4bd1fe9..7e36e8f 100644 --- a/internal/chat/models.go +++ b/internal/chat/models.go @@ -28,8 +28,11 @@ type ModelCatalog struct { } var recommendedLocalModels = []ModelOption{ - {Name: "qwen3:1.7b", Description: "best small local default for AXIS"}, - {Name: "qwen3:0.6b", Description: "lighter local fallback with tool-use support"}, + {Name: "qwen3.5:4b", Description: "best small local default for AXIS with tool-use"}, + {Name: "qwen3.5:9b", Description: "stronger local default with tool-use"}, + {Name: "llama3.1:8b", Description: "strong local alternative with tool-use"}, + {Name: "qwen3:1.7b", Description: "older qwen3 fallback with tool-use"}, + {Name: "qwen3:0.6b", Description: "lightweight qwen3 fallback with tool-use"}, {Name: "qwen2.5-coder:1.5b", Description: "older coding-focused fallback"}, } @@ -168,6 +171,26 @@ func formatMissingModelError(model string, installed []string) error { model, available, suggest, model) } +// toolCapablePrefixes lists model families known to support Ollama /api/chat +// tool-calling. When no exact recommended model is installed, the fallback +// prefers models whose family prefix matches this list over models that do +// not (e.g. embedding-only or vision-only models). +var toolCapablePrefixes = []string{ + "llama3.1", "llama3.2", "llama3.3", + "qwen3", "qwen3.5", + "qwen2.5-coder", "qwen2.5", + "mistral", "mixtral", + "phi4", "phi3", + "gemma3", +} + +// nonToolFamilies blocks specific model families that share a prefix with a +// tool-capable family but do not support tool calling (e.g. gemma3n is an +// embedding/vision variant of gemma3). +var nonToolFamilies = []string{ + "gemma3n", // gemma3 supports tools; gemma3n does not +} + func choosePreferredModel(installed []string) (string, bool) { // Prefer recommended models in priority order. for _, candidate := range recommendedLocalModels { @@ -176,10 +199,64 @@ func choosePreferredModel(installed []string) (string, bool) { } } // Any installed model beats falling back to a hardcoded name that may not - // be present. This handles clusters where the operator has pulled their own - // preferred models (e.g. qwen3:4b, llama3.2:latest). + // be present. Prefer tool-capable families over non-tool families so that + // the agent default doesn't select an embedding-only or vision-only model. if len(installed) > 0 { + if best := pickToolCapable(installed); best != "" { + return best, true + } return installed[0], true } return "", false } + +// pickToolCapable returns the first installed model whose family prefix is +// in toolCapablePrefixes, or "" if none match. It explicitly skips families +// listed in nonToolFamilies to avoid false-positive matches on embedding or +// vision variants. +func pickToolCapable(installed []string) string { + for _, name := range installed { + base := name + if idx := strings.LastIndex(name, ":"); idx >= 0 { + base = name[:idx] + } + blocked := false + for _, bad := range nonToolFamilies { + if strings.HasPrefix(base, bad) { + blocked = true + break + } + } + if blocked { + continue + } + for _, prefix := range toolCapablePrefixes { + if base == prefix || strings.HasPrefix(base, prefix+"-") { + return name + } + } + } + return "" +} + +// formatToolCapableSuggestion builds a user-facing hint that lists up to three +// recommended tool-capable models. It avoids hardcoding model names in error +// messages and stays concise even as the recommended list grows. +func formatToolCapableSuggestion() string { + var names []string + for i, opt := range recommendedLocalModels { + if i >= 3 { + break + } + names = append(names, opt.Name) + } + if len(names) == 0 { + return "try a tool-capable model" + } + if len(names) == 1 { + return fmt.Sprintf("try %s", names[0]) + } + last := names[len(names)-1] + rest := strings.Join(names[:len(names)-1], ", ") + return fmt.Sprintf("try a tool-capable model such as %s, or %s", rest, last) +} diff --git a/internal/chat/models_test.go b/internal/chat/models_test.go index c403c25..1f1a86a 100644 --- a/internal/chat/models_test.go +++ b/internal/chat/models_test.go @@ -49,6 +49,32 @@ func TestChoosePreferredModelEmptyReturnsFalse(t *testing.T) { } } +func TestChoosePreferredModelPrefersToolCapable(t *testing.T) { + // pickToolCapable should prefer a tool-capable model over a + // non-tool-capable one even when the non-tool model is first + // alphabetically. + got, ok := choosePreferredModel([]string{"gemma3n:e2b", "llama3.1:8b", "qwen3:4b"}) + if !ok { + t.Fatal("expected ok=true") + } + if got != "llama3.1:8b" { + t.Fatalf("expected tool-capable fallback llama3.1:8b, got %q", got) + } +} + +func TestChoosePreferredModelFallsBackAlphabeticallyWhenNoToolCapable(t *testing.T) { + // When none of the installed models match a known tool-capable family, + // the fallback returns the alphabetically first model. + // Note: listInstalledModels sorts results, so pass them sorted. + got, ok := choosePreferredModel([]string{"all-minilm:latest", "gemma3n:e2b"}) + if !ok { + t.Fatal("expected ok=true") + } + if got != "all-minilm:latest" { + t.Fatalf("expected alphabetical fallback, got %q", got) + } +} + // TestResolveDefaultModelPicksDeterministicInstalledWhenNoneRecommended // verifies the full ResolveDefaultModel path for a node that has its own // models but none from the recommended list. diff --git a/internal/chat/wave1_test.go b/internal/chat/wave1_test.go index e22c44f..5ba4369 100644 --- a/internal/chat/wave1_test.go +++ b/internal/chat/wave1_test.go @@ -12,6 +12,73 @@ import ( "github.com/toasterbook88/axis/internal/models" ) +func TestClientChatStreamError400WithTools(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/", "/api/show": + w.WriteHeader(http.StatusOK) + case "/api/chat": + w.WriteHeader(http.StatusBadRequest) + default: + t.Fatalf("unexpected path: %s", r.URL.Path) + } + })) + defer server.Close() + + client := NewClient(server.URL, "gemma3n:e2b") + var buf bytes.Buffer + msgs := []Message{{Role: RoleUser, Content: "hi"}} + tools := []ToolDef{{ + Type: "function", + Function: ToolDefFunction{ + Name: "axis_status", + }, + }} + + _, err := client.ChatStream(context.Background(), msgs, tools, &buf) + if err == nil { + t.Fatal("expected error on 400 with tools") + } + if !strings.Contains(err.Error(), "400 Bad Request") { + t.Errorf("expected 400 mention, got %v", err) + } + if !strings.Contains(err.Error(), "may not support tool calling") { + t.Errorf("expected tool-calling guidance, got %v", err) + } + if !strings.Contains(err.Error(), "gemma3n:e2b") { + t.Errorf("expected model name in error, got %v", err) + } +} + +func TestClientChatStreamError400WithoutTools(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/", "/api/show": + w.WriteHeader(http.StatusOK) + case "/api/chat": + w.WriteHeader(http.StatusBadRequest) + default: + t.Fatalf("unexpected path: %s", r.URL.Path) + } + })) + defer server.Close() + + client := NewClient(server.URL, "testmodel") + var buf bytes.Buffer + msgs := []Message{{Role: RoleUser, Content: "hi"}} + + _, err := client.ChatStream(context.Background(), msgs, nil, &buf) + if err == nil { + t.Fatal("expected error on 400 without tools") + } + if !strings.Contains(err.Error(), "400 Bad Request") { + t.Errorf("expected 400 mention, got %v", err) + } + if strings.Contains(err.Error(), "tool calling") { + t.Errorf("should NOT mention tool calling when no tools were sent") + } +} + // --------------------------------------------------------------------------- // message.go tests // --------------------------------------------------------------------------- From 08d2a7a988371a20de27f9a115bdd87aaed9f318 Mon Sep 17 00:00:00 2001 From: William Date: Mon, 18 May 2026 11:47:51 -0400 Subject: [PATCH 3/4] feat(chat,agent): improve usability with summarization, new tools, and readline This commit addresses critical usability issues discovered during evaluation of axis chat and axis agent commands: - Tool outputs are now human-readable summaries instead of raw JSON, preventing local LLM hangs on large cluster snapshots. - Added 4 new tools: axis_summary, axis_reservations, read_file, list_directory. - Added tool execution transparency: prints tool calls and one-line result summaries to stderr. - Added conversation persistence with --resume flag for both chat and agent. - Replaced primitive bufio.Scanner REPLs with readline-based editors supporting arrow keys, history, and ctrl+c graceful exit. - Added path validation to file tools to prevent directory traversal. - Updated isReadOnlyTool to include new read-only tools. - Added tests for new tools and persistence. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- cmd/axis/agent.go | 73 +++++++++++++- cmd/axis/chat.go | 90 ++++++++++++++++- go.mod | 1 + go.sum | 5 + internal/agent/agent.go | 53 +++++++++- internal/agent/agent_test.go | 113 ++++++++++++++++++++- internal/agent/confirm.go | 3 +- internal/agent/summarize.go | 135 +++++++++++++++++++++++++ internal/agent/tools.go | 184 ++++++++++++++++++++++++++++++---- internal/chat/persist.go | 81 +++++++++++++++ internal/chat/persist_test.go | 96 ++++++++++++++++++ 11 files changed, 797 insertions(+), 37 deletions(-) create mode 100644 internal/agent/summarize.go create mode 100644 internal/chat/persist.go create mode 100644 internal/chat/persist_test.go diff --git a/cmd/axis/agent.go b/cmd/axis/agent.go index 30071d1..cca70db 100644 --- a/cmd/axis/agent.go +++ b/cmd/axis/agent.go @@ -5,10 +5,12 @@ import ( "context" "encoding/json" "fmt" + "io" "os" "strings" "time" + "github.com/chzyer/readline" "github.com/spf13/cobra" "github.com/toasterbook88/axis/internal/agent" "github.com/toasterbook88/axis/internal/api" @@ -37,6 +39,7 @@ func agentCmd() *cobra.Command { maxTurns int autoApprove bool systemMsg string + resume bool ) cmd := &cobra.Command{ @@ -93,6 +96,16 @@ func agentCmd() *cobra.Command { a := agent.New(cfg) + // Resume previous conversation if requested. + historyPath := chat.PersistPath("agent") + if resume { + if err := a.Conversation().LoadFromFile(historyPath); err != nil { + fmt.Fprintf(errW, "warning: could not resume conversation: %v\n", err) + } else if n := a.Conversation().HistoryCount(); n > 0 { + fmt.Fprintf(errW, "Resumed %d messages from previous session.\n", n) + } + } + // Single-shot mode. if len(args) > 0 { instruction := strings.Join(args, " ") @@ -105,19 +118,30 @@ func agentCmd() *cobra.Command { return ExitCodeError{Code: ExitErrCommandFail, Message: fmt.Sprintf("agent failed: %v", err)} } fmt.Fprintln(w) + _ = a.Conversation().SaveToFile(historyPath) return nil } - // Interactive REPL. + // Interactive REPL with readline. fmt.Fprintf(errW, "AXIS Agent [%s] — max %d turns per query, type exit to quit\n\n", ui.Bold(currentModel), maxTurns) - scanner := bufio.NewScanner(os.Stdin) + rl, err := readline.NewEx(&readline.Config{ + Prompt: ui.Cyan("agent> "), + HistoryFile: historyPath + ".line", + InterruptPrompt: "^C", + EOFPrompt: "exit", + }) + if err != nil { + return runPlainAgentREPL(a, w, errW, timeout, historyPath) + } + defer rl.Close() + for { - fmt.Fprint(errW, ui.Cyan("agent> ")) - if !scanner.Scan() { + line, err := rl.Readline() + if err != nil { break } - instruction := strings.TrimSpace(scanner.Text()) + instruction := strings.TrimSpace(line) if instruction == "" { continue } @@ -133,6 +157,14 @@ func agentCmd() *cobra.Command { cancel() fmt.Fprintln(w) } + + if n := a.Conversation().HistoryCount(); n > 0 { + if err := a.Conversation().SaveToFile(historyPath); err != nil { + fmt.Fprintf(errW, "warning: could not save conversation: %v\n", err) + } else { + fmt.Fprintf(errW, "Saved %d messages to conversation history.\n", n) + } + } return nil }, } @@ -143,9 +175,40 @@ func agentCmd() *cobra.Command { cmd.Flags().IntVar(&maxTurns, "max-turns", 10, "Maximum agent loop iterations per query") cmd.Flags().BoolVar(&autoApprove, "auto-approve", false, "Auto-approve safe commands (safety score < 70)") cmd.Flags().StringVar(&systemMsg, "system", "", "Extra text appended to system prompt") + cmd.Flags().BoolVar(&resume, "resume", false, "Resume previous conversation from history") return cmd } +// runPlainAgentREPL is the fallback scanner-based REPL when readline is unavailable. +func runPlainAgentREPL(a *agent.Agent, w, errW io.Writer, timeout time.Duration, historyPath string) error { + fmt.Fprintln(errW, ui.Yellow("Note: using plain input mode (no arrow keys or history)")) + scanner := bufio.NewScanner(os.Stdin) + for { + fmt.Fprint(errW, ui.Cyan("agent\u003e ")) + if !scanner.Scan() { + break + } + instruction := strings.TrimSpace(scanner.Text()) + if instruction == "" { + continue + } + lower := strings.ToLower(instruction) + if lower == "exit" || lower == "quit" { + break + } + ctx, cancel := agentRequestContext(timeout) + if err := a.Run(ctx, instruction); err != nil { + fmt.Fprintf(errW, "\n%s %v\n", ui.Red("Error:"), err) + } + cancel() + fmt.Fprintln(w) + } + if n := a.Conversation().HistoryCount(); n > 0 { + _ = a.Conversation().SaveToFile(historyPath) + } + return nil +} + func agentRequestContext(timeout time.Duration) (context.Context, context.CancelFunc) { if timeout <= 0 { return context.WithCancel(context.Background()) diff --git a/cmd/axis/chat.go b/cmd/axis/chat.go index 3f16a89..5c02e5d 100644 --- a/cmd/axis/chat.go +++ b/cmd/axis/chat.go @@ -9,6 +9,7 @@ import ( "strings" "time" + "github.com/chzyer/readline" "github.com/spf13/cobra" "github.com/toasterbook88/axis/internal/chat" "github.com/toasterbook88/axis/internal/config" @@ -32,6 +33,7 @@ func chatCmd() *cobra.Command { useContext bool systemMsg string format string + resume bool ) cmd := &cobra.Command{ @@ -68,6 +70,16 @@ func chatCmd() *cobra.Command { sysPrompt := chat.BuildSystemPrompt(cluster, systemMsg) conv.Append(chat.Message{Role: chat.RoleSystem, Content: sysPrompt}) + // Resume previous conversation if requested. + historyPath := chat.PersistPath("chat") + if resume { + if err := conv.LoadFromFile(historyPath); err != nil { + fmt.Fprintf(errW, "warning: could not resume conversation: %v\n", err) + } else if n := conv.HistoryCount(); n > 0 { + fmt.Fprintf(errW, "Resumed %d messages from previous session.\n", n) + } + } + fmt.Fprintln(errW, ui.Dim("advisory: chat output is not cluster truth — validate with axis status or axis facts")) // Single-shot mode. @@ -100,19 +112,32 @@ func chatCmd() *cobra.Command { } else { fmt.Fprintln(w) } + // Save conversation after single-shot. + _ = conv.SaveToFile(historyPath) return nil } - // Interactive REPL. + // Interactive REPL with readline. fmt.Fprintf(errW, "AXIS Chat [%s] — type /help for commands, exit to quit\n\n", ui.Bold(currentModel)) - scanner := bufio.NewScanner(os.Stdin) + + rl, err := readline.NewEx(&readline.Config{ + Prompt: ui.Cyan(">>> "), + HistoryFile: historyPath + ".line", + InterruptPrompt: "^C", + EOFPrompt: "exit", + }) + if err != nil { + // Fallback to plain scanner if readline fails (e.g., non-TTY). + return runPlainREPL(cmd.Context(), client, conv, currentModel, w, errW, timeout, historyPath) + } + defer rl.Close() for { - fmt.Fprint(errW, ui.Cyan(">>> ")) - if !scanner.Scan() { + line, err := rl.Readline() + if err != nil { // io.EOF or readline.ErrInterrupt break } - query := strings.TrimSpace(scanner.Text()) + query := strings.TrimSpace(line) if query == "" { continue } @@ -148,6 +173,15 @@ func chatCmd() *cobra.Command { conv.Append(resp) fmt.Fprintln(w) } + + // Save conversation on exit. + if n := conv.HistoryCount(); n > 0 { + if err := conv.SaveToFile(historyPath); err != nil { + fmt.Fprintf(errW, "warning: could not save conversation: %v\n", err) + } else { + fmt.Fprintf(errW, "Saved %d messages to conversation history.\n", n) + } + } return nil }, } @@ -158,9 +192,55 @@ func chatCmd() *cobra.Command { cmd.Flags().BoolVar(&useContext, "context", false, "Inject live cluster snapshot into system prompt") cmd.Flags().StringVar(&systemMsg, "system", "", "Extra text appended to system prompt") cmd.Flags().StringVar(&format, "format", "text", "Output format for single-shot mode (text, json)") + cmd.Flags().BoolVar(&resume, "resume", false, "Resume previous conversation from history") return cmd } +// runPlainREPL is the fallback scanner-based REPL when readline is unavailable. +func runPlainREPL(ctx context.Context, client *chat.Client, conv *chat.Conversation, currentModel string, w, errW io.Writer, timeout time.Duration, historyPath string) error { + fmt.Fprintln(errW, ui.Yellow("Note: using plain input mode (no arrow keys or history)")) + scanner := bufio.NewScanner(os.Stdin) + for { + fmt.Fprint(errW, ui.Cyan(">>> ")) + if !scanner.Scan() { + break + } + query := strings.TrimSpace(scanner.Text()) + if query == "" { + continue + } + lower := strings.ToLower(query) + if lower == "exit" || lower == "quit" { + break + } + if strings.HasPrefix(query, "/") { + nextModel := handleSlashCommand(query, currentModel, conv, errW) + if nextModel != "" { + currentModel = nextModel + client = chat.NewClient(chat.DefaultEndpoint, currentModel) + } + continue + } + conv.Append(chat.Message{Role: chat.RoleUser, Content: query}) + sp := ui.NewSpinner() + sp.Start("Thinking...") + ctx2, cancel := chatRequestContext(timeout) + resp, err := client.ChatStream(ctx2, conv.Messages(), nil, w) + sp.Stop("") + cancel() + if err != nil { + fmt.Fprintf(errW, "\n%s\n", ui.Red("Error: ", err)) + continue + } + conv.Append(resp) + fmt.Fprintln(w) + } + if n := conv.HistoryCount(); n > 0 { + _ = conv.SaveToFile(historyPath) + } + return nil +} + // handleSlashCommand processes a slash command and returns a new model name // if the model was switched, or empty string otherwise. func handleSlashCommand(input, currentModel string, conv *chat.Conversation, w io.Writer) string { diff --git a/go.mod b/go.mod index 52d1357..65e1070 100644 --- a/go.mod +++ b/go.mod @@ -14,6 +14,7 @@ require ( ) require ( + github.com/chzyer/readline v1.5.1 // indirect github.com/google/jsonschema-go v0.4.2 // indirect github.com/google/uuid v1.6.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect diff --git a/go.sum b/go.sum index 9815d3d..143452f 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,9 @@ al.essio.dev/pkg/shellescape v1.6.0 h1:NxFcEqzFSEVCGN2yq7Huv/9hyCEGVa/TncnOOBBeXHA= al.essio.dev/pkg/shellescape v1.6.0/go.mod h1:6sIqp7X2P6mThCQ7twERpZTuigpr6KbZWtls1U8I890= +github.com/chzyer/logex v1.2.1/go.mod h1:JLbx6lG2kDbNRFnfkgvh4eRJRPX1QCoOIWomwysCBrQ= +github.com/chzyer/readline v1.5.1 h1:upd/6fQk4src78LMRzh5vItIt361/o4uq553V8B5sGI= +github.com/chzyer/readline v1.5.1/go.mod h1:Eh+b79XXUwfKfcPLepksvw2tcLE/Ct21YObkaSkeBlk= +github.com/chzyer/test v1.0.0/go.mod h1:2JlltgoNkt4TW/z9V/IzDdFaMTM2JPIi26O1pF38GC8= github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -51,6 +55,7 @@ golang.org/x/crypto v0.51.0 h1:IBPXwPfKxY7cWQZ38ZCIRPI50YLeevDLlLnyC5wRGTI= golang.org/x/crypto v0.51.0/go.mod h1:8AdwkbraGNABw2kOX6YFPs3WM22XqI4EXEd8g+x7Oc8= golang.org/x/mod v0.36.0 h1:JJjpVx6myfUsUdAzZuOSTTmRE0PfZeNWzzvKrP7amb4= golang.org/x/mod v0.36.0/go.mod h1:moc6ELqsWcOw5Ef3xVprK5ul/MvtVvkIXLziUOICjUQ= +golang.org/x/sys v0.0.0-20220310020820-b874c991c1a5/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.44.0 h1:ildZl3J4uzeKP07r2F++Op7E9B29JRUy+a27EibtBTQ= golang.org/x/sys v0.44.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= golang.org/x/term v0.43.0 h1:S4RLU2sB31O/NCl+zFN9Aru9A/Cq2aqKpTZJ6B+DwT4= diff --git a/internal/agent/agent.go b/internal/agent/agent.go index 60bc42f..726bf8a 100644 --- a/internal/agent/agent.go +++ b/internal/agent/agent.go @@ -144,11 +144,12 @@ func (a *Agent) Run(ctx context.Context, userPrompt string) error { // Process each tool call. for _, tc := range resp.ToolCalls { + fmt.Fprintf(a.output, "\nā–¶ Calling %s...\n", tc.Function.Name) result, err := a.dispatchToolCall(ctx, tc) if err != nil { // Feed the error back to the model for self-correction. errMsg := fmt.Sprintf("Error executing tool %q: %s", tc.Function.Name, err.Error()) - fmt.Fprintf(a.output, "\n⚠ %s\n", errMsg) + fmt.Fprintf(a.output, "⚠ %s\n", errMsg) a.conv.Append(chat.Message{ Role: chat.RoleTool, Content: errMsg, @@ -156,7 +157,9 @@ func (a *Agent) Run(ctx context.Context, userPrompt string) error { continue } - fmt.Fprintf(a.output, "\nāœ“ %s returned %d chars\n", tc.Function.Name, len(result)) + // Print a compact summary line instead of raw char count. + summary := formatToolResultSummary(tc.Function.Name, result) + fmt.Fprintf(a.output, "āœ“ %s\n", summary) a.conv.Append(chat.Message{ Role: chat.RoleTool, Content: result, @@ -168,6 +171,51 @@ func (a *Agent) Run(ctx context.Context, userPrompt string) error { return nil } +// formatToolResultSummary produces a human-readable one-line summary of a +// tool result for operator feedback. +func formatToolResultSummary(toolName, result string) string { + switch toolName { + case "axis_status": + // Extract first line (cluster summary). + if i := strings.Index(result, "\n"); i > 0 { + return toolName + ": " + strings.TrimSpace(result[:i]) + } + case "axis_summary": + return toolName + ": " + strings.TrimSpace(result) + case "axis_facts": + if i := strings.Index(result, "\n"); i > 0 { + return toolName + ": " + strings.TrimSpace(result[:i]) + } + case "axis_place": + return toolName + ": " + strings.TrimSpace(result) + case "axis_reservations": + if strings.Contains(result, "Active reservations") { + lines := strings.Split(result, "\n") + if len(lines) >= 2 { + count := 0 + for _, l := range lines[1:] { + if strings.HasPrefix(l, "-") { + count++ + } + } + return fmt.Sprintf("%s: found %d nodes with active reservations", toolName, count) + } + } + return toolName + ": no active reservations" + case "read_file": + lines := strings.Count(result, "\n") + return fmt.Sprintf("%s: read %d lines (%d chars)", toolName, lines, len(result)) + case "list_directory": + if i := strings.Index(result, "("); i > 0 && strings.Contains(result, " entries)") { + return toolName + ": " + strings.TrimSpace(result[strings.Index(result, "Directory:")+len("Directory:"):]) + } + return toolName + ": listed directory" + case "run_shell": + return toolName + ": executed shell command" + } + return fmt.Sprintf("%s returned %d chars", toolName, len(result)) +} + // dispatchToolCall handles a single tool call with safety gating and confirmation. func (a *Agent) dispatchToolCall(ctx context.Context, tc chat.ToolCall) (string, error) { name := tc.Function.Name @@ -189,7 +237,6 @@ func (a *Agent) dispatchToolCall(ctx context.Context, tc chat.ToolCall) (string, } // 4. Read-only tools execute directly (no confirmation needed). - fmt.Fprintf(a.output, "\nā–¶ Executing: %s\n", name) return a.tools.Execute(ctx, name, args) } diff --git a/internal/agent/agent_test.go b/internal/agent/agent_test.go index 0f3cd26..830f8f5 100644 --- a/internal/agent/agent_test.go +++ b/internal/agent/agent_test.go @@ -7,6 +7,8 @@ import ( "fmt" "net/http" "net/http/httptest" + "os" + "path/filepath" "strings" "testing" @@ -107,7 +109,10 @@ func TestToolRegistryHasAllDefaultTools(t *testing.T) { tc := &ToolContext{} r := NewToolRegistry(tc) - expected := []string{"axis_status", "axis_facts", "axis_place", "run_shell"} + expected := []string{ + "axis_status", "axis_facts", "axis_place", "axis_summary", + "axis_reservations", "read_file", "list_directory", "run_shell", + } for _, name := range expected { if !r.HasTool(name) { t.Errorf("expected tool %q to be registered", name) @@ -142,8 +147,8 @@ func TestToolStatusNilSnapshot(t *testing.T) { if err != nil { t.Fatalf("unexpected error: %v", err) } - if !strings.Contains(result, "no snapshot available") { - t.Errorf("expected 'no snapshot available', got: %s", result) + if !strings.Contains(result, "No cluster snapshot available") { + t.Errorf("expected 'No cluster snapshot available', got: %s", result) } } @@ -203,6 +208,103 @@ func TestToolShellBlockedByDesign(t *testing.T) { } } +func TestToolSummary(t *testing.T) { + snap := &models.ClusterSnapshot{ + Status: "healthy", + Nodes: []models.NodeFacts{{Name: "test-node"}}, + Summary: models.ClusterSummary{ + TotalNodes: 1, + ReachableNodes: 1, + }, + } + tc := &ToolContext{Snapshot: snap} + r := NewToolRegistry(tc) + + result, err := r.Execute(context.Background(), "axis_summary", nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !strings.Contains(result, "1 nodes (1 reachable), status: healthy") { + t.Errorf("expected summary result, got: %s", result) + } +} + +func TestToolReadFile(t *testing.T) { + tmpDir := t.TempDir() + tmpFile := filepath.Join(tmpDir, "test.txt") + content := []byte("Hello, AXIS!") + if err := os.WriteFile(tmpFile, content, 0644); err != nil { + t.Fatalf("failed to write temp file: %v", err) + } + + // Change to temp dir so relative paths work. + origDir, _ := os.Getwd() + os.Chdir(tmpDir) + defer os.Chdir(origDir) + + tc := &ToolContext{} + r := NewToolRegistry(tc) + + result, err := r.Execute(context.Background(), "read_file", json.RawMessage(fmt.Sprintf(`{"path":%q}`, "test.txt"))) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !strings.Contains(result, "Hello, AXIS!") { + t.Errorf("expected file content, got: %s", result) + } +} + +func TestToolReadFilePathValidation(t *testing.T) { + tc := &ToolContext{} + r := NewToolRegistry(tc) + + _, err := r.Execute(context.Background(), "read_file", json.RawMessage(`{"path":"../secret.txt"}`)) + if err == nil { + t.Fatal("expected error for path traversal") + } + if !strings.Contains(err.Error(), "escapes") { + t.Errorf("expected 'escapes' error, got: %s", err.Error()) + } +} + +func TestToolListDirectory(t *testing.T) { + tmpDir := t.TempDir() + for _, name := range []string{"a.txt", "b.txt"} { + path := filepath.Join(tmpDir, name) + if err := os.WriteFile(path, []byte("test"), 0644); err != nil { + t.Fatalf("failed to write temp file: %v", err) + } + } + + origDir, _ := os.Getwd() + os.Chdir(tmpDir) + defer os.Chdir(origDir) + + tc := &ToolContext{} + r := NewToolRegistry(tc) + + result, err := r.Execute(context.Background(), "list_directory", json.RawMessage(`{"path":"."}`)) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !strings.Contains(result, "a.txt") || !strings.Contains(result, "b.txt") { + t.Errorf("expected directory listing with files, got: %s", result) + } +} + +func TestToolListDirectoryPathValidation(t *testing.T) { + tc := &ToolContext{} + r := NewToolRegistry(tc) + + _, err := r.Execute(context.Background(), "list_directory", json.RawMessage(`{"path":"/etc/shadow"}`)) + if err == nil { + t.Fatal("expected error for absolute path") + } + if !strings.Contains(err.Error(), "escapes") { + t.Errorf("expected 'escapes' error, got: %s", err.Error()) + } +} + // --- Confirmation Tests --- func TestDefaultConfirmYes(t *testing.T) { @@ -676,7 +778,10 @@ func TestExecuteShellExitError(t *testing.T) { // --- isReadOnlyTool Tests --- func TestIsReadOnlyTool(t *testing.T) { - readOnly := []string{"axis_status", "axis_facts", "axis_place"} + readOnly := []string{ + "axis_status", "axis_facts", "axis_place", "axis_summary", + "axis_reservations", "read_file", "list_directory", + } for _, name := range readOnly { if !isReadOnlyTool(name) { t.Errorf("expected %q to be read-only", name) diff --git a/internal/agent/confirm.go b/internal/agent/confirm.go index cc5fd25..a86964f 100644 --- a/internal/agent/confirm.go +++ b/internal/agent/confirm.go @@ -78,7 +78,8 @@ func AutoApproveConfirm(threshold int, fallback ConfirmFunc) ConfirmFunc { // isReadOnlyTool returns true for tools that only read cluster state. func isReadOnlyTool(name string) bool { switch name { - case "axis_status", "axis_facts", "axis_place": + case "axis_status", "axis_facts", "axis_place", "axis_summary", + "axis_reservations", "read_file", "list_directory": return true } return false diff --git a/internal/agent/summarize.go b/internal/agent/summarize.go new file mode 100644 index 0000000..88e9580 --- /dev/null +++ b/internal/agent/summarize.go @@ -0,0 +1,135 @@ +package agent + +import ( + "fmt" + "strings" + + "github.com/toasterbook88/axis/internal/models" +) + +// summarizeSnapshot returns a compact human-readable summary of cluster state +// suitable for feeding back to an LLM. Keeps output under ~600 chars. +func summarizeSnapshot(snap *models.ClusterSnapshot) string { + if snap == nil { + return "No cluster snapshot available — cluster may not be configured." + } + + var b strings.Builder + fmt.Fprintf(&b, "Cluster: %d nodes (%d reachable), status: %s\n", + snap.Summary.TotalNodes, snap.Summary.ReachableNodes, snap.Status) + if snap.Summary.TotalRAMMB > 0 { + fmt.Fprintf(&b, "Total RAM: %d MB total, %d MB free\n", + snap.Summary.TotalRAMMB, snap.Summary.TotalFreeRAMMB) + } + + for i, n := range snap.Nodes { + if i >= 6 { + fmt.Fprintf(&b, "... and %d more nodes\n", len(snap.Nodes)-i) + break + } + status := string(n.Status) + if n.Error != "" { + status += " — " + truncate(n.Error, 40) + } + line := fmt.Sprintf("- %s (%s): %s", n.Name, n.Hostname, status) + if n.Resources != nil { + line += fmt.Sprintf(", %d MB free RAM, %d cores", n.Resources.RAMFreeMB, n.Resources.CPUCores) + if len(n.Resources.GPUs) > 0 { + gpuNames := make([]string, 0, len(n.Resources.GPUs)) + for _, g := range n.Resources.GPUs { + gpuNames = append(gpuNames, g.GPUName()) + } + line += fmt.Sprintf(", GPUs: %s", strings.Join(gpuNames, ", ")) + } + } + b.WriteString(line + "\n") + } + + if len(snap.Warnings) > 0 { + b.WriteString("Warnings:\n") + for i, w := range snap.Warnings { + if i >= 3 { + fmt.Fprintf(&b, "... and %d more warnings\n", len(snap.Warnings)-i) + break + } + fmt.Fprintf(&b, "- %s: %s\n", w.Node, truncate(w.Message, 60)) + } + } + + return b.String() +} + +// summarizeNodeFacts returns a compact human-readable summary of a single node. +func summarizeNodeFacts(n models.NodeFacts) string { + var b strings.Builder + fmt.Fprintf(&b, "Node: %s (%s/%s, %s)\n", n.Name, n.OS, n.Arch, n.Hostname) + if n.Resources != nil { + r := n.Resources + fmt.Fprintf(&b, "CPU: %d cores (%s)\n", r.CPUCores, truncate(r.CPUModel, 40)) + fmt.Fprintf(&b, "RAM: %d MB total, %d MB free\n", r.RAMTotalMB, r.RAMFreeMB) + fmt.Fprintf(&b, "Disk: %d GB total, %d GB free\n", r.DiskTotalGB, r.DiskFreeGB) + if r.Load1M > 0 { + fmt.Fprintf(&b, "Load: %.2f (1m)\n", r.Load1M) + } + if len(r.GPUs) > 0 { + for _, g := range r.GPUs { + fmt.Fprintf(&b, "GPU: %s (%s, %d MB VRAM)\n", g.GPUName(), g.Vendor, g.VRAMMB) + } + } + if r.Pressure != "" && r.Pressure != "none" { + fmt.Fprintf(&b, "Pressure: %s\n", r.Pressure) + } + if r.ThermalState != "" && r.ThermalState != "nominal" { + fmt.Fprintf(&b, "Thermal: %s\n", r.ThermalState) + } + } + if len(n.Tools) > 0 { + toolNames := make([]string, 0, len(n.Tools)) + for _, t := range n.Tools { + toolNames = append(toolNames, t.Name) + } + fmt.Fprintf(&b, "Tools: %s\n", strings.Join(toolNames, ", ")) + } + if n.Ollama != nil && n.Ollama.Installed { + fmt.Fprintf(&b, "Ollama: %s (%d models)\n", n.Ollama.Version, len(n.Ollama.Models)) + } + fmt.Fprintf(&b, "Status: %s\n", n.Status) + if n.Error != "" { + fmt.Fprintf(&b, "Error: %s\n", truncate(n.Error, 100)) + } + return b.String() +} + +// summarizePlacementDecision returns a compact human-readable summary. +func summarizePlacementDecision(dec models.PlacementDecision) string { + if !dec.OK { + return "Placement: no suitable node found for this task." + } + var b strings.Builder + fmt.Fprintf(&b, "Placement: %s (fit score: %d/100", dec.Node, dec.FitScore) + if dec.IsLocal { + b.WriteString(", local") + } + b.WriteString(")\n") + if dec.Workload.Class != "" { + fmt.Fprintf(&b, "Workload class: %s\n", dec.Workload.Class) + } + if len(dec.Reasoning) > 0 { + b.WriteString("Reasoning:\n") + for _, r := range dec.Reasoning { + fmt.Fprintf(&b, "- %s\n", r) + } + } + return b.String() +} + +// truncate truncates a string to maxLen, appending "..." if truncated. +func truncate(s string, maxLen int) string { + if len(s) <= maxLen { + return s + } + if maxLen <= 3 { + return s[:maxLen] + } + return s[:maxLen-3] + "..." +} diff --git a/internal/agent/tools.go b/internal/agent/tools.go index 69ea4b8..42dfab7 100644 --- a/internal/agent/tools.go +++ b/internal/agent/tools.go @@ -5,7 +5,9 @@ import ( "context" "encoding/json" "fmt" + "os" "os/exec" + "path/filepath" "strings" "time" @@ -42,6 +44,10 @@ func NewToolRegistry(tc *ToolContext) *ToolRegistry { r.registerStatus(tc) r.registerFacts(tc) r.registerPlace(tc) + r.registerSummary(tc) + r.registerReservations(tc) + r.registerReadFile() + r.registerListDirectory() r.registerShell() return r } @@ -91,17 +97,13 @@ func (r *ToolRegistry) add(name, description string, params json.RawMessage, exe func (r *ToolRegistry) registerStatus(tc *ToolContext) { r.add("axis_status", - "Return the current AXIS cluster status snapshot as JSON. Use this to answer questions about node health, resources, and cluster state.", + "Return a compact human-readable summary of the current AXIS cluster status. Includes node count, health, resources, and warnings. Use this for cluster overview questions.", json.RawMessage(`{"type":"object","properties":{}}`), func(ctx context.Context, args json.RawMessage) (string, error) { if tc.Snapshot == nil { - return `{"error":"no snapshot available — cluster may not be configured"}`, nil + return "No cluster snapshot available — cluster may not be configured.", nil } - out, err := json.Marshal(tc.Snapshot) - if err != nil { - return "", fmt.Errorf("marshal snapshot: %w", err) - } - return string(out), nil + return summarizeSnapshot(tc.Snapshot), nil }, ) } @@ -110,19 +112,15 @@ func (r *ToolRegistry) registerStatus(tc *ToolContext) { func (r *ToolRegistry) registerFacts(tc *ToolContext) { r.add("axis_facts", - "Return local hardware facts for the current machine (CPU, RAM, disk, GPUs, installed tools).", + "Return a compact human-readable summary of local hardware facts for the current machine (CPU, RAM, disk, GPUs, installed tools, Ollama status).", json.RawMessage(`{"type":"object","properties":{}}`), func(ctx context.Context, args json.RawMessage) (string, error) { if tc.Snapshot != nil { if n, ok := models.FindLocalNode(tc.Snapshot.Nodes); ok { - out, err := json.Marshal(n) - if err != nil { - return "", fmt.Errorf("marshal facts: %w", err) - } - return string(out), nil + return summarizeNodeFacts(n), nil } } - return `{"error":"local node not found in snapshot"}`, nil + return "Local node not found in snapshot.", nil }, ) } @@ -135,7 +133,7 @@ type placeArgs struct { func (r *ToolRegistry) registerPlace(tc *ToolContext) { r.add("axis_place", - "Select the best node for a task description. Returns a placement decision with node, fit score, and reasoning.", + "Select the best node for a task description. Returns a human-readable placement decision with node name, fit score, and reasoning.", json.RawMessage(`{"type":"object","properties":{"description":{"type":"string","description":"What the task needs to do"}},"required":["description"]}`), func(ctx context.Context, args json.RawMessage) (string, error) { var a placeArgs @@ -146,19 +144,167 @@ func (r *ToolRegistry) registerPlace(tc *ToolContext) { return "", fmt.Errorf("axis_place requires a non-empty \"description\" argument") } if tc.Snapshot == nil || len(tc.Snapshot.Nodes) == 0 { - return `{"ok":false,"reasoning":["no nodes available in snapshot"]}`, nil + return "Placement: no nodes available in snapshot.", nil } reqs := placement.InferRequirements(a.Description) decision := placement.SelectBestNode(reqs, tc.Snapshot.Nodes, tc.State) - out, err := json.Marshal(decision) + return summarizePlacementDecision(decision), nil + }, + ) +} + +// --- Tool: axis_summary --- + +func (r *ToolRegistry) registerSummary(tc *ToolContext) { + r.add("axis_summary", + "Return an ultra-compact one-line summary of the cluster (node count, health, total RAM). Good for quick status checks.", + json.RawMessage(`{"type":"object","properties":{}}`), + func(ctx context.Context, args json.RawMessage) (string, error) { + if tc.Snapshot == nil { + return "No cluster snapshot available.", nil + } + var b strings.Builder + fmt.Fprintf(&b, "%d nodes (%d reachable), status: %s", + tc.Snapshot.Summary.TotalNodes, tc.Snapshot.Summary.ReachableNodes, tc.Snapshot.Status) + if tc.Snapshot.Summary.TotalRAMMB > 0 { + fmt.Fprintf(&b, ", %d MB RAM total, %d MB free", + tc.Snapshot.Summary.TotalRAMMB, tc.Snapshot.Summary.TotalFreeRAMMB) + } + if len(tc.Snapshot.Warnings) > 0 { + fmt.Fprintf(&b, ", %d warnings", len(tc.Snapshot.Warnings)) + } + return b.String(), nil + }, + ) +} + +// --- Tool: axis_reservations --- + +func (r *ToolRegistry) registerReservations(tc *ToolContext) { + r.add("axis_reservations", + "List active reservations and task assignments across the cluster.", + json.RawMessage(`{"type":"object","properties":{}}`), + func(ctx context.Context, args json.RawMessage) (string, error) { + if tc.State == nil || len(tc.State.Nodes) == 0 { + return "No reservation state available.", nil + } + var b strings.Builder + fmt.Fprintf(&b, "Active reservations for %d nodes:\n", len(tc.State.Nodes)) + for name, ns := range tc.State.Nodes { + if ns.ActiveTasks == 0 && ns.ReservedMB == 0 { + continue + } + fmt.Fprintf(&b, "- %s: %d active tasks, %d MB reserved\n", name, ns.ActiveTasks, ns.ReservedMB) + if ns.LastTask != "" { + fmt.Fprintf(&b, " Last task: %s\n", truncate(ns.LastTask, 60)) + } + if len(ns.ActiveExecs) > 0 { + fmt.Fprintf(&b, " Active execs: %s\n", strings.Join(ns.ActiveExecs, ", ")) + } + } + return b.String(), nil + }, + ) +} + +// --- Tool: read_file --- + +type readFileArgs struct { + Path string `json:"path"` +} + +func (r *ToolRegistry) registerReadFile() { + r.add("read_file", + "Read the contents of a file at a given path. Returns the file contents as text. Paths are restricted to the current working directory and its subdirectories for safety.", + json.RawMessage(`{"type":"object","properties":{"path":{"type":"string","description":"Relative or absolute file path"}},"required":["path"]}`), + func(ctx context.Context, args json.RawMessage) (string, error) { + var a readFileArgs + if err := json.Unmarshal(args, &a); err != nil { + return "", fmt.Errorf("invalid arguments for read_file: expected {\"path\": \"...\"}, got: %s", string(args)) + } + if a.Path == "" { + return "", fmt.Errorf("read_file requires a non-empty \"path\" argument") + } + clean, err := validateToolPath(a.Path) + if err != nil { + return "", err + } + data, err := os.ReadFile(clean) + if err != nil { + return "", fmt.Errorf("cannot read file %q: %w", clean, err) + } + content := string(data) + const maxFileSize = 8000 + if len(content) > maxFileSize { + content = content[:maxFileSize] + "\n... [truncated to 8000 chars]" + } + return content, nil + }, + ) +} + +// --- Tool: list_directory --- + +type listDirArgs struct { + Path string `json:"path"` +} + +func (r *ToolRegistry) registerListDirectory() { + r.add("list_directory", + "List files and directories at a given path. Returns a human-readable directory listing. Paths are restricted to the current working directory and its subdirectories for safety.", + json.RawMessage(`{"type":"object","properties":{"path":{"type":"string","description":"Relative or absolute directory path"}},"required":["path"]}`), + func(ctx context.Context, args json.RawMessage) (string, error) { + var a listDirArgs + if err := json.Unmarshal(args, &a); err != nil { + return "", fmt.Errorf("invalid arguments for list_directory: expected {\"path\": \"...\"}, got: %s", string(args)) + } + if a.Path == "" { + a.Path = "." + } + clean, err := validateToolPath(a.Path) if err != nil { - return "", fmt.Errorf("marshal decision: %w", err) + return "", err + } + entries, err := os.ReadDir(clean) + if err != nil { + return "", fmt.Errorf("cannot read directory %q: %w", clean, err) + } + var b strings.Builder + fmt.Fprintf(&b, "Directory: %s (%d entries)\n", clean, len(entries)) + for _, e := range entries { + name := e.Name() + if e.IsDir() { + name += "/" + } + b.WriteString(name + "\n") } - return string(out), nil + return b.String(), nil }, ) } +// validateToolPath validates and resolves a path for file tools, preventing +// directory traversal outside the current working directory. +func validateToolPath(p string) (string, error) { + cwd, err := os.Getwd() + if err != nil { + return "", fmt.Errorf("cannot determine working directory: %w", err) + } + clean := filepath.Clean(p) + if !filepath.IsAbs(clean) { + clean = filepath.Join(cwd, clean) + } + // Ensure the resolved path is within cwd. + rel, err := filepath.Rel(cwd, clean) + if err != nil { + return "", fmt.Errorf("invalid path %q: %w", p, err) + } + if strings.HasPrefix(rel, "..") { + return "", fmt.Errorf("path %q escapes working directory", p) + } + return clean, nil +} + // --- Tool: run_shell --- type shellArgs struct { diff --git a/internal/chat/persist.go b/internal/chat/persist.go new file mode 100644 index 0000000..56dad3a --- /dev/null +++ b/internal/chat/persist.go @@ -0,0 +1,81 @@ +package chat + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" +) + +// conversationHistory stores a serializable conversation record. +type conversationHistory struct { + Messages []Message `json:"messages"` +} + +// PersistPath returns the default path for conversation history files. +func PersistPath(name string) string { + home, _ := os.UserHomeDir() + return filepath.Join(home, ".axis", name+"-history.json") +} + +// SaveToFile writes the conversation (excluding system messages) to the given path. +func (c *Conversation) SaveToFile(path string) error { + var hist conversationHistory + for _, m := range c.messages { + // Skip system messages — they are reconstructed on load. + if m.Role == RoleSystem { + continue + } + hist.Messages = append(hist.Messages, m) + } + data, err := json.MarshalIndent(hist, "", " ") + if err != nil { + return fmt.Errorf("marshal conversation: %w", err) + } + if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { + return fmt.Errorf("create history directory: %w", err) + } + if err := os.WriteFile(path, data, 0644); err != nil { + return fmt.Errorf("write history file: %w", err) + } + return nil +} + +// LoadFromFile restores non-system messages from a file into the conversation. +// If the file does not exist, this is a no-op. +func (c *Conversation) LoadFromFile(path string) error { + data, err := os.ReadFile(path) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return fmt.Errorf("read history file: %w", err) + } + var hist conversationHistory + if err := json.Unmarshal(data, &hist); err != nil { + return fmt.Errorf("unmarshal conversation: %w", err) + } + for _, m := range hist.Messages { + // Skip system messages to avoid duplicates. + if m.Role == RoleSystem { + continue + } + c.messages = append(c.messages, m) + } + // Compact if needed. + if c.maxChars > 0 { + c.compact() + } + return nil +} + +// HistoryCount returns the number of non-system messages in the conversation. +func (c *Conversation) HistoryCount() int { + n := 0 + for _, m := range c.messages { + if m.Role != RoleSystem { + n++ + } + } + return n +} diff --git a/internal/chat/persist_test.go b/internal/chat/persist_test.go new file mode 100644 index 0000000..363d63d --- /dev/null +++ b/internal/chat/persist_test.go @@ -0,0 +1,96 @@ +package chat + +import ( + "os" + "path/filepath" + "testing" +) + +func TestConversationSaveAndLoad(t *testing.T) { + tmp := filepath.Join(t.TempDir(), "test-history.json") + + c := NewConversation(4096) + c.Append(Message{Role: RoleSystem, Content: "sys prompt"}) + c.Append(Message{Role: RoleUser, Content: "hello"}) + c.Append(Message{Role: RoleAssistant, Content: "hi there"}) + c.Append(Message{Role: RoleTool, Content: "tool result"}) + + if err := c.SaveToFile(tmp); err != nil { + t.Fatalf("save: %v", err) + } + + // Verify file exists and does NOT contain system messages. + data, err := os.ReadFile(tmp) + if err != nil { + t.Fatalf("read: %v", err) + } + if string(data) == "" { + t.Fatal("history file is empty") + } + if contains(t, string(data), "sys prompt") { + t.Error("history file should not contain system messages") + } + + // Load into fresh conversation. + c2 := NewConversation(4096) + c2.Append(Message{Role: RoleSystem, Content: "reconstructed sys prompt"}) + if err := c2.LoadFromFile(tmp); err != nil { + t.Fatalf("load: %v", err) + } + + msgs := c2.Messages() + if len(msgs) != 4 { + t.Fatalf("expected 4 messages after load, got %d", len(msgs)) + } + if msgs[0].Role != RoleSystem || msgs[0].Content != "reconstructed sys prompt" { + t.Errorf("system message mismatch: %+v", msgs[0]) + } + if msgs[1].Content != "hello" { + t.Errorf("user message mismatch: %q", msgs[1].Content) + } + if msgs[2].Content != "hi there" { + t.Errorf("assistant message mismatch: %q", msgs[2].Content) + } + if msgs[3].Content != "tool result" { + t.Errorf("tool message mismatch: %q", msgs[3].Content) + } +} + +func TestConversationLoadMissingFile(t *testing.T) { + tmp := filepath.Join(t.TempDir(), "nonexistent.json") + c := NewConversation(4096) + c.Append(Message{Role: RoleSystem, Content: "sys"}) + + err := c.LoadFromFile(tmp) + if err != nil { + t.Fatalf("loading missing file should be a no-op, got: %v", err) + } + if c.Len() != 1 { + t.Fatalf("expected 1 message, got %d", c.Len()) + } +} + +func TestConversationHistoryCount(t *testing.T) { + c := NewConversation(4096) + c.Append(Message{Role: RoleSystem, Content: "sys"}) + c.Append(Message{Role: RoleUser, Content: "a"}) + c.Append(Message{Role: RoleAssistant, Content: "b"}) + + if c.HistoryCount() != 2 { + t.Errorf("expected history count 2, got %d", c.HistoryCount()) + } +} + +func contains(t *testing.T, s, substr string) bool { + t.Helper() + return len(substr) > 0 && len(s) >= len(substr) && (s == substr || len(s) > len(substr) && s[:len(substr)] == substr || s[len(s)-len(substr):] == substr || findSubstr(s, substr)) +} + +func findSubstr(s, substr string) bool { + for i := 0; i <= len(s)-len(substr); i++ { + if s[i:i+len(substr)] == substr { + return true + } + } + return false +} From c531bcb0cfaad4f3c76ff15900dfc8e69bbd71a9 Mon Sep 17 00:00:00 2001 From: William Date: Mon, 18 May 2026 12:05:50 -0400 Subject: [PATCH 4/4] fix(chat,agent): address PR 128 review comments - read_file: use io.LimitReader instead of os.ReadFile to prevent OOM - validateToolPath: resolve symlinks with filepath.EvalSymlinks before directory traversal bounds check - read_file: UTF-8 safe truncation using rune-based truncateRune helper - list_directory: truncate output to max 100 entries with remaining count - summarize.go/truncate: operate on runes instead of bytes for UTF-8 safety - persist.go: handle os.UserHomeDir() error; restrict permissions to 0700/0600 - Guard all PersistPath usages against empty path (fallback to no persistence) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- cmd/axis/agent.go | 25 ++++++++++------- cmd/axis/chat.go | 25 ++++++++++------- internal/agent/agent_test.go | 5 ++-- internal/agent/summarize.go | 10 ++++--- internal/agent/tools.go | 52 +++++++++++++++++++++++++++++------- internal/chat/persist.go | 13 +++++---- 6 files changed, 91 insertions(+), 39 deletions(-) diff --git a/cmd/axis/agent.go b/cmd/axis/agent.go index cca70db..0fa3721 100644 --- a/cmd/axis/agent.go +++ b/cmd/axis/agent.go @@ -97,8 +97,10 @@ func agentCmd() *cobra.Command { a := agent.New(cfg) // Resume previous conversation if requested. - historyPath := chat.PersistPath("agent") - if resume { + historyPath, err := chat.PersistPath("agent") + if err != nil { + fmt.Fprintf(errW, "warning: cannot determine history path: %v\n", err) + } else if resume { if err := a.Conversation().LoadFromFile(historyPath); err != nil { fmt.Fprintf(errW, "warning: could not resume conversation: %v\n", err) } else if n := a.Conversation().HistoryCount(); n > 0 { @@ -118,19 +120,24 @@ func agentCmd() *cobra.Command { return ExitCodeError{Code: ExitErrCommandFail, Message: fmt.Sprintf("agent failed: %v", err)} } fmt.Fprintln(w) - _ = a.Conversation().SaveToFile(historyPath) + if historyPath != "" { + _ = a.Conversation().SaveToFile(historyPath) + } return nil } // Interactive REPL with readline. fmt.Fprintf(errW, "AXIS Agent [%s] — max %d turns per query, type exit to quit\n\n", ui.Bold(currentModel), maxTurns) - rl, err := readline.NewEx(&readline.Config{ + rlCfg := &readline.Config{ Prompt: ui.Cyan("agent> "), - HistoryFile: historyPath + ".line", InterruptPrompt: "^C", EOFPrompt: "exit", - }) + } + if historyPath != "" { + rlCfg.HistoryFile = historyPath + ".line" + } + rl, err := readline.NewEx(rlCfg) if err != nil { return runPlainAgentREPL(a, w, errW, timeout, historyPath) } @@ -158,11 +165,11 @@ func agentCmd() *cobra.Command { fmt.Fprintln(w) } - if n := a.Conversation().HistoryCount(); n > 0 { + if historyPath != "" && a.Conversation().HistoryCount() > 0 { if err := a.Conversation().SaveToFile(historyPath); err != nil { fmt.Fprintf(errW, "warning: could not save conversation: %v\n", err) } else { - fmt.Fprintf(errW, "Saved %d messages to conversation history.\n", n) + fmt.Fprintf(errW, "Saved %d messages to conversation history.\n", a.Conversation().HistoryCount()) } } return nil @@ -203,7 +210,7 @@ func runPlainAgentREPL(a *agent.Agent, w, errW io.Writer, timeout time.Duration, cancel() fmt.Fprintln(w) } - if n := a.Conversation().HistoryCount(); n > 0 { + if historyPath != "" && a.Conversation().HistoryCount() > 0 { _ = a.Conversation().SaveToFile(historyPath) } return nil diff --git a/cmd/axis/chat.go b/cmd/axis/chat.go index 5c02e5d..ee90a0c 100644 --- a/cmd/axis/chat.go +++ b/cmd/axis/chat.go @@ -71,8 +71,10 @@ func chatCmd() *cobra.Command { conv.Append(chat.Message{Role: chat.RoleSystem, Content: sysPrompt}) // Resume previous conversation if requested. - historyPath := chat.PersistPath("chat") - if resume { + historyPath, err := chat.PersistPath("chat") + if err != nil { + fmt.Fprintf(errW, "warning: cannot determine history path: %v\n", err) + } else if resume { if err := conv.LoadFromFile(historyPath); err != nil { fmt.Fprintf(errW, "warning: could not resume conversation: %v\n", err) } else if n := conv.HistoryCount(); n > 0 { @@ -113,19 +115,24 @@ func chatCmd() *cobra.Command { fmt.Fprintln(w) } // Save conversation after single-shot. - _ = conv.SaveToFile(historyPath) + if historyPath != "" { + _ = conv.SaveToFile(historyPath) + } return nil } // Interactive REPL with readline. fmt.Fprintf(errW, "AXIS Chat [%s] — type /help for commands, exit to quit\n\n", ui.Bold(currentModel)) - rl, err := readline.NewEx(&readline.Config{ + cfg := &readline.Config{ Prompt: ui.Cyan(">>> "), - HistoryFile: historyPath + ".line", InterruptPrompt: "^C", EOFPrompt: "exit", - }) + } + if historyPath != "" { + cfg.HistoryFile = historyPath + ".line" + } + rl, err := readline.NewEx(cfg) if err != nil { // Fallback to plain scanner if readline fails (e.g., non-TTY). return runPlainREPL(cmd.Context(), client, conv, currentModel, w, errW, timeout, historyPath) @@ -175,11 +182,11 @@ func chatCmd() *cobra.Command { } // Save conversation on exit. - if n := conv.HistoryCount(); n > 0 { + if historyPath != "" && conv.HistoryCount() > 0 { if err := conv.SaveToFile(historyPath); err != nil { fmt.Fprintf(errW, "warning: could not save conversation: %v\n", err) } else { - fmt.Fprintf(errW, "Saved %d messages to conversation history.\n", n) + fmt.Fprintf(errW, "Saved %d messages to conversation history.\n", conv.HistoryCount()) } } return nil @@ -235,7 +242,7 @@ func runPlainREPL(ctx context.Context, client *chat.Client, conv *chat.Conversat conv.Append(resp) fmt.Fprintln(w) } - if n := conv.HistoryCount(); n > 0 { + if historyPath != "" && conv.HistoryCount() > 0 { _ = conv.SaveToFile(historyPath) } return nil diff --git a/internal/agent/agent_test.go b/internal/agent/agent_test.go index 830f8f5..27b71f6 100644 --- a/internal/agent/agent_test.go +++ b/internal/agent/agent_test.go @@ -262,8 +262,9 @@ func TestToolReadFilePathValidation(t *testing.T) { if err == nil { t.Fatal("expected error for path traversal") } - if !strings.Contains(err.Error(), "escapes") { - t.Errorf("expected 'escapes' error, got: %s", err.Error()) + // EvalSymlinks may report lstat failure for non-existent paths outside cwd. + if !strings.Contains(err.Error(), "escapes") && !strings.Contains(err.Error(), "cannot resolve") { + t.Errorf("expected 'escapes' or 'cannot resolve' error, got: %s", err.Error()) } } diff --git a/internal/agent/summarize.go b/internal/agent/summarize.go index 88e9580..e9766a4 100644 --- a/internal/agent/summarize.go +++ b/internal/agent/summarize.go @@ -123,13 +123,15 @@ func summarizePlacementDecision(dec models.PlacementDecision) string { return b.String() } -// truncate truncates a string to maxLen, appending "..." if truncated. +// truncate truncates a string to maxLen runes, appending "..." if truncated. +// Safe for UTF-8 — operates on runes, not bytes. func truncate(s string, maxLen int) string { - if len(s) <= maxLen { + runes := []rune(s) + if len(runes) <= maxLen { return s } if maxLen <= 3 { - return s[:maxLen] + return string(runes[:maxLen]) } - return s[:maxLen-3] + "..." + return string(runes[:maxLen-3]) + "..." } diff --git a/internal/agent/tools.go b/internal/agent/tools.go index 42dfab7..6d69ede 100644 --- a/internal/agent/tools.go +++ b/internal/agent/tools.go @@ -5,6 +5,7 @@ import ( "context" "encoding/json" "fmt" + "io" "os" "os/exec" "path/filepath" @@ -229,14 +230,22 @@ func (r *ToolRegistry) registerReadFile() { if err != nil { return "", err } - data, err := os.ReadFile(clean) + f, err := os.Open(clean) if err != nil { return "", fmt.Errorf("cannot read file %q: %w", clean, err) } - content := string(data) + defer f.Close() + const maxFileSize = 8000 - if len(content) > maxFileSize { - content = content[:maxFileSize] + "\n... [truncated to 8000 chars]" + // Read up to maxFileSize+1 to detect truncation. + limited := io.LimitReader(f, int64(maxFileSize)+1) + data, err := io.ReadAll(limited) + if err != nil { + return "", fmt.Errorf("cannot read file %q: %w", clean, err) + } + content := string(data) + if len(data) > maxFileSize { + content = truncateRune(content, maxFileSize) + "\n... [truncated to 8000 chars]" } return content, nil }, @@ -270,8 +279,13 @@ func (r *ToolRegistry) registerListDirectory() { return "", fmt.Errorf("cannot read directory %q: %w", clean, err) } var b strings.Builder + const maxDirEntries = 100 fmt.Fprintf(&b, "Directory: %s (%d entries)\n", clean, len(entries)) - for _, e := range entries { + for i, e := range entries { + if i >= maxDirEntries { + fmt.Fprintf(&b, "... and %d more entries\n", len(entries)-i) + break + } name := e.Name() if e.IsDir() { name += "/" @@ -284,7 +298,8 @@ func (r *ToolRegistry) registerListDirectory() { } // validateToolPath validates and resolves a path for file tools, preventing -// directory traversal outside the current working directory. +// directory traversal outside the current working directory. Symlinks are +// resolved before the bounds check to prevent symlink-based escapes. func validateToolPath(p string) (string, error) { cwd, err := os.Getwd() if err != nil { @@ -294,15 +309,20 @@ func validateToolPath(p string) (string, error) { if !filepath.IsAbs(clean) { clean = filepath.Join(cwd, clean) } + // Resolve symlinks to their real destination. + resolved, err := filepath.EvalSymlinks(clean) + if err != nil { + return "", fmt.Errorf("cannot resolve path %q: %w", p, err) + } // Ensure the resolved path is within cwd. - rel, err := filepath.Rel(cwd, clean) + rel, err := filepath.Rel(cwd, resolved) if err != nil { return "", fmt.Errorf("invalid path %q: %w", p, err) } if strings.HasPrefix(rel, "..") { return "", fmt.Errorf("path %q escapes working directory", p) } - return clean, nil + return resolved, nil } // --- Tool: run_shell --- @@ -375,8 +395,20 @@ func ExecuteShell(ctx context.Context, command string) (string, error) { // Cap output to prevent blowing up the context window. const maxOutput = 4000 - if len(output) > maxOutput { - output = output[:maxOutput] + "\n... [truncated to 4000 chars]" + if len([]rune(output)) > maxOutput { + output = truncateRune(output, maxOutput) + "\n... [truncated to 4000 chars]" } return output, nil } + +// truncateRune truncates a string to maxLen runes, appending "..." if truncated. +func truncateRune(s string, maxLen int) string { + runes := []rune(s) + if len(runes) <= maxLen { + return s + } + if maxLen <= 3 { + return string(runes[:maxLen]) + } + return string(runes[:maxLen-3]) + "..." +} diff --git a/internal/chat/persist.go b/internal/chat/persist.go index 56dad3a..4d5393c 100644 --- a/internal/chat/persist.go +++ b/internal/chat/persist.go @@ -13,9 +13,12 @@ type conversationHistory struct { } // PersistPath returns the default path for conversation history files. -func PersistPath(name string) string { - home, _ := os.UserHomeDir() - return filepath.Join(home, ".axis", name+"-history.json") +func PersistPath(name string) (string, error) { + home, err := os.UserHomeDir() + if err != nil { + return "", fmt.Errorf("cannot determine home directory: %w", err) + } + return filepath.Join(home, ".axis", name+"-history.json"), nil } // SaveToFile writes the conversation (excluding system messages) to the given path. @@ -32,10 +35,10 @@ func (c *Conversation) SaveToFile(path string) error { if err != nil { return fmt.Errorf("marshal conversation: %w", err) } - if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { + if err := os.MkdirAll(filepath.Dir(path), 0700); err != nil { return fmt.Errorf("create history directory: %w", err) } - if err := os.WriteFile(path, data, 0644); err != nil { + if err := os.WriteFile(path, data, 0600); err != nil { return fmt.Errorf("write history file: %w", err) } return nil