From f2b8a8765af70896eeab5bf4404bdf697a02a427 Mon Sep 17 00:00:00 2001 From: Halleluyah Oludele Date: Tue, 26 May 2026 22:53:14 +0100 Subject: [PATCH] pkg/retrieval: add agentic-navigation strategy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A new Strategy/CostStrategy implementation that lets the LLM explore the tree iteratively via JSON action turns (outline, expand, read, done) instead of being handed the whole view at once. Useful for trees too large to fit any single context window or where read-on- demand keeps token cost low for narrow queries. Result gains a HopsTaken field so callers can distinguish single-shot strategies from iterative ones; single-pass and chunked-tree set it to 1. Wiring: - config.RetrievalConfig.Agentic{ MaxHops, Model }; defaults to 6 hops, falls back to budget.ModelName when Model is empty - VLE_RETRIEVAL_AGENTIC_MAX_HOPS / VLE_RETRIEVAL_AGENTIC_MODEL env overrides - buildStrategy in cmd/engine + cmd/server now takes the storage so the agentic strategy can satisfy 'read' via a small ContentFetcher adapter Protocol: JSON-action text protocol rather than llmgate's Tools field; llmgate v0.2.0 declares ToolDef/ToolCall as scaffolding but provider adapters don't yet populate ToolCalls. The strategy stays forward-compatible — when llmgate wires native tool calling the surface here doesn't change. Tests use a scriptedLLM that returns canned per-turn replies and cover: happy-path expand/read/done, expand/expand/done (matches the plan's manual trace), MaxHops cap, graceful degradation on persistent non-JSON output, fabricated-ID filtering, empty tree guard, summary fallback for 'read' on internal nodes, and the tolerant ParseAction decoder. --- cmd/engine/main.go | 27 +- cmd/server/main.go | 27 +- pkg/config/config.go | 36 ++- pkg/retrieval/agentic.go | 482 ++++++++++++++++++++++++++++++++++ pkg/retrieval/agentic_test.go | 386 +++++++++++++++++++++++++++ pkg/retrieval/chunked_tree.go | 1 + pkg/retrieval/single_pass.go | 1 + pkg/retrieval/strategy.go | 14 +- 8 files changed, 965 insertions(+), 9 deletions(-) create mode 100644 pkg/retrieval/agentic.go create mode 100644 pkg/retrieval/agentic_test.go diff --git a/cmd/engine/main.go b/cmd/engine/main.go index 2135af8..71004bd 100644 --- a/cmd/engine/main.go +++ b/cmd/engine/main.go @@ -10,6 +10,7 @@ import ( "errors" "flag" "fmt" + "io" "log/slog" "net/http" "os" @@ -87,7 +88,7 @@ func run() error { if err != nil { return fmt.Errorf("init llm: %w", err) } - strategy := buildStrategy(cfg.Retrieval, llmClient) + strategy := buildStrategy(cfg.Retrieval, llmClient, store) // Wrap with caching if enabled. if cfg.Retrieval.Cache.Enabled { @@ -248,17 +249,39 @@ func buildLLM(c config.LLMConfig) (llmgate.Client, error) { } } -func buildStrategy(c config.RetrievalConfig, client llmgate.Client) retrieval.Strategy { +func buildStrategy(c config.RetrievalConfig, client llmgate.Client, store storage.Storage) retrieval.Strategy { switch c.Strategy { case "single-pass": return retrieval.NewSinglePass(client) case "chunked-tree": return retrieval.NewChunkedTree(client) + case "agentic": + a := retrieval.NewAgentic(client, storageFetcher{s: store}) + if c.Agentic.MaxHops > 0 { + a.MaxHops = c.Agentic.MaxHops + } + a.ModelOverride = c.Agentic.Model + return a default: return retrieval.NewChunkedTree(client) } } +// storageFetcher adapts a storage.Storage to retrieval.ContentFetcher. +// The agentic strategy reads section bodies one at a time, so we +// materialize the full reader contents into a []byte here rather than +// streaming — section bodies are typically a few KB. +type storageFetcher struct{ s storage.Storage } + +func (sf storageFetcher) Get(ctx context.Context, ref string) ([]byte, error) { + rc, _, err := sf.s.Get(ctx, ref) + if err != nil { + return nil, err + } + defer rc.Close() + return io.ReadAll(rc) +} + // buildTLSConfig returns a *tls.Config when direct TLS is enabled, or nil // when the engine should serve plaintext (behind a proxy). Returning nil // leaves http.Server's TLSConfig unset, which is exactly what ListenAndServe diff --git a/cmd/server/main.go b/cmd/server/main.go index e77fc98..0aa1cc9 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -18,6 +18,7 @@ import ( "errors" "flag" "fmt" + "io" "log/slog" "net/http" "os" @@ -131,7 +132,7 @@ func run() error { if err != nil { return fmt.Errorf("init llm: %w", err) } - strategy := buildStrategy(cfg.Engine.Retrieval, llmClient) + strategy := buildStrategy(cfg.Engine.Retrieval, llmClient, store) // Wrap with caching if enabled in engine config. if cfg.Engine.Retrieval.Cache.Enabled { @@ -324,17 +325,39 @@ func buildLLM(c enginecfg.LLMConfig) (llmgate.Client, error) { } } -func buildStrategy(c enginecfg.RetrievalConfig, client llmgate.Client) retrieval.Strategy { +func buildStrategy(c enginecfg.RetrievalConfig, client llmgate.Client, store storage.Storage) retrieval.Strategy { switch c.Strategy { case "single-pass": return retrieval.NewSinglePass(client) case "chunked-tree": return retrieval.NewChunkedTree(client) + case "agentic": + a := retrieval.NewAgentic(client, storageFetcher{s: store}) + if c.Agentic.MaxHops > 0 { + a.MaxHops = c.Agentic.MaxHops + } + a.ModelOverride = c.Agentic.Model + return a default: return retrieval.NewChunkedTree(client) } } +// storageFetcher adapts a storage.Storage to retrieval.ContentFetcher. +// The agentic strategy reads section bodies one at a time, so we +// materialize the full reader contents into a []byte here rather than +// streaming — section bodies are typically a few KB. +type storageFetcher struct{ s storage.Storage } + +func (sf storageFetcher) Get(ctx context.Context, ref string) ([]byte, error) { + rc, _, err := sf.s.Get(ctx, ref) + if err != nil { + return nil, err + } + defer rc.Close() + return io.ReadAll(rc) +} + func buildTLSConfig(c config.TLSConfig) *tls.Config { if !c.Enabled() { return nil diff --git a/pkg/config/config.go b/pkg/config/config.go index ec240a4..e2fe2cc 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -11,6 +11,7 @@ import ( "errors" "fmt" "os" + "strconv" "time" "gopkg.in/yaml.v3" @@ -157,6 +158,7 @@ type GeminiBlock struct { type RetrievalConfig struct { Strategy string `yaml:"strategy"` ChunkedTree ChunkedTreeBlock `yaml:"chunked_tree"` + Agentic AgenticBlock `yaml:"agentic"` Cache CacheBlock `yaml:"cache"` } @@ -181,6 +183,23 @@ type ChunkedTreeBlock struct { IncludeSiblingBreadcrumb bool `yaml:"include_sibling_breadcrumbs"` } +// AgenticBlock configures the agentic-navigation strategy. +// +// The agentic loop trades sequential latency for the ability to handle +// arbitrarily large trees: the model issues outline/expand/read actions +// until it picks a final set of section IDs or hits MaxHops. +type AgenticBlock struct { + // MaxHops caps the number of LLM turns one query consumes, counting + // the terminal "done" turn. Default: 6. + MaxHops int `yaml:"max_hops"` + + // Model optionally overrides the budget's model for navigation + // turns. Empty means use the budget's model. Useful when the + // retrieval engine wants the navigation loop on a fast/cheap + // model while answering is on a stronger one. + Model string `yaml:"model"` +} + // LogConfig configures logging. type LogConfig struct { Level string `yaml:"level"` @@ -213,6 +232,9 @@ func Default() Config { MaxParallelCalls: 8, IncludeSiblingBreadcrumb: true, }, + Agentic: AgenticBlock{ + MaxHops: 6, + }, Cache: CacheBlock{ Enabled: true, MaxEntries: 1024, @@ -314,6 +336,14 @@ func applyEnvOverrides(c *Config) { if v := os.Getenv("VLE_TLS_KEY_FILE"); v != "" { c.Server.TLS.KeyFile = v } + if v := os.Getenv("VLE_RETRIEVAL_AGENTIC_MAX_HOPS"); v != "" { + if n, err := strconv.Atoi(v); err == nil && n >= 0 { + c.Retrieval.Agentic.MaxHops = n + } + } + if v := os.Getenv("VLE_RETRIEVAL_AGENTIC_MODEL"); v != "" { + c.Retrieval.Agentic.Model = v + } } // Validate checks that required fields for the selected drivers are set. @@ -367,11 +397,15 @@ func (c Config) Validate() error { } switch c.Retrieval.Strategy { - case "single-pass", "chunked-tree": + case "single-pass", "chunked-tree", "agentic": default: return fmt.Errorf("unknown retrieval.strategy: %q", c.Retrieval.Strategy) } + if c.Retrieval.Agentic.MaxHops < 0 { + return fmt.Errorf("retrieval.agentic.max_hops must be >= 0, got %d", c.Retrieval.Agentic.MaxHops) + } + if c.Server.TLS.CertFile != "" && c.Server.TLS.KeyFile == "" { return errors.New("server.tls.key_file is required when cert_file is set") } diff --git a/pkg/retrieval/agentic.go b/pkg/retrieval/agentic.go new file mode 100644 index 0000000..5166e32 --- /dev/null +++ b/pkg/retrieval/agentic.go @@ -0,0 +1,482 @@ +package retrieval + +import ( + "context" + "encoding/json" + "fmt" + "io" + "log" + "strings" + + "github.com/hallelx2/llmgate" + + "github.com/hallelx2/vectorless-engine/pkg/tree" +) + +// ContentFetcher reads the raw bytes for a content reference. The agentic +// strategy uses it to satisfy the `read` action, which materializes a +// section's full body into the conversation. Implementations should be +// safe for concurrent use; one tree may be traversed by many queries. +type ContentFetcher interface { + Get(ctx context.Context, ref string) ([]byte, error) +} + +// AgenticStrategy is a tool-using retrieval loop. +// +// Rather than feeding the whole tree to the model in one shot +// (single-pass) or splitting and reasoning over slices in parallel +// (chunked-tree), it lets the model explore the tree iteratively: +// outline → expand interesting branches → read promising sections → +// done. Each turn the model receives the observation from the previous +// action and emits the next action as a JSON object. The strategy +// dispatches the action, fetches the observation, and feeds it back as +// the next user message. +// +// This trades latency (N sequential LLM calls) for the ability to handle +// trees that don't fit in any single context window, with reading +// behaviour that adapts to each query. +// +// Protocol choice +// +// The strategy uses a JSON-action text protocol rather than llmgate's +// Tools field. The provider adapters in llmgate v0.2.0 declare +// ToolDef/ToolCall as scaffolding but do not yet populate ToolCalls on +// responses, so the only portable way to drive a multi-turn tool loop +// today is to ask the model to emit a JSON action each turn and parse +// it with ParseAction. The strategy is forward-compatible: when llmgate +// wires native tool calling, the actionable surface here is the same. +type AgenticStrategy struct { + // LLM is the shared client used for every turn. + LLM llmgate.Client + + // Fetcher reads section bodies for the `read` action. + Fetcher ContentFetcher + + // MaxHops caps the number of LLM turns one Select consumes, + // including the terminal "done" turn. Zero means use defaultMaxHops. + MaxHops int + + // ModelOverride, if non-empty, replaces the budget's ModelName for + // every turn. Useful for routing the navigation loop to a cheaper or + // faster model than the rest of the engine. + ModelOverride string +} + +// defaultMaxHops bounds the agentic loop. Six turns is enough for +// outline → 2 expands → 2 reads → done while keeping latency and cost +// predictable. Bump this only when measurements show selection quality +// climbing with deeper traversal. +const defaultMaxHops = 6 + +// defaultOutlineLevel is the depth of the initial outline observation. +// One level usually surfaces enough structure for the model to choose +// where to expand next without burning a turn. +const defaultOutlineLevel = 1 + +// Compile-time interface checks. +var ( + _ Strategy = (*AgenticStrategy)(nil) + _ CostStrategy = (*AgenticStrategy)(nil) +) + +// NewAgentic constructs an AgenticStrategy with sensible defaults. +func NewAgentic(client llmgate.Client, fetcher ContentFetcher) *AgenticStrategy { + return &AgenticStrategy{ + LLM: client, + Fetcher: fetcher, + MaxHops: defaultMaxHops, + } +} + +// Name implements Strategy. +func (a *AgenticStrategy) Name() string { return "agentic" } + +// Select implements Strategy. +func (a *AgenticStrategy) Select(ctx context.Context, t *tree.Tree, query string, budget ContextBudget) ([]tree.SectionID, error) { + r, err := a.SelectWithCost(ctx, t, query, budget) + if err != nil { + return nil, err + } + return r.SelectedIDs, nil +} + +// SelectWithCost implements CostStrategy. +func (a *AgenticStrategy) SelectWithCost(ctx context.Context, t *tree.Tree, query string, budget ContextBudget) (*Result, error) { + if t == nil || t.Root == nil { + return &Result{}, nil + } + view := t.BuildView() + bySectionID := indexSections(view.Sections) + + model := a.ModelOverride + if model == "" { + model = budget.ModelName + } + + maxHops := a.MaxHops + if maxHops <= 0 { + maxHops = defaultMaxHops + } + + // Conversation: system + initial user message (query + outline). + msgs := []llmgate.Message{ + {Role: llmgate.RoleSystem, Content: agenticSystemPrompt}, + {Role: llmgate.RoleUser, Content: a.initialUserPrompt(view, query)}, + } + + var ( + totalUsage Usage + hopsTaken int + finalIDs []tree.SectionID + reasoning string + ) + + for hop := 0; hop < maxHops; hop++ { + req := llmgate.Request{ + Model: model, + Messages: msgs, + MaxTokens: 1024, + Temperature: 0, + } + resp, err := a.LLM.Complete(ctx, req) + if err != nil { + return nil, fmt.Errorf("agentic hop %d: %w", hop+1, err) + } + hopsTaken++ + totalUsage.Add(Usage{ + InputTokens: resp.Usage.InputTokens, + OutputTokens: resp.Usage.OutputTokens, + TotalTokens: resp.Usage.TotalTokens, + CostUSD: resp.Usage.CostUSD, + LLMCalls: 1, + }) + + // Record the assistant turn before parsing so the next prompt has + // the model's own context. + msgs = append(msgs, llmgate.Message{ + Role: llmgate.RoleAssistant, + Content: resp.Content, + }) + + action, parseErr := ParseAction(resp.Content) + if parseErr != nil { + // Graceful degradation: a malformed action doesn't 500 the + // query — we ask the model to retry once with a stronger + // instruction. If the next turn still misparses, we abort + // and return whatever the model has picked so far (often + // nothing). Matches the runSelectionWithRetry pattern from + // single_pass.go. + log.Printf("retrieval: agentic hop %d action parse failed: %v", hop+1, parseErr) + msgs = append(msgs, llmgate.Message{ + Role: llmgate.RoleUser, + Content: "Your last reply was not a valid JSON action. Reply with EXACTLY one JSON object: {\"action\":\"outline|expand|read|done\", ...}. No prose, no markdown fences.", + }) + continue + } + + switch action.Action { + case actionDone: + finalIDs = filterToTreeIDs(action.PickedIDs, bySectionID) + reasoning = action.Reasoning + return &Result{ + SelectedIDs: finalIDs, + Reasoning: reasoning, + ModelUsed: model, + Usage: totalUsage, + HopsTaken: hopsTaken, + }, nil + + case actionOutline: + level := action.Level + if level <= 0 { + level = defaultOutlineLevel + } + obs := renderOutline(view, level) + msgs = append(msgs, llmgate.Message{ + Role: llmgate.RoleUser, + Content: wrapObservation("outline", obs), + }) + + case actionExpand: + obs, ok := renderExpand(bySectionID, action.SectionID) + if !ok { + msgs = append(msgs, llmgate.Message{ + Role: llmgate.RoleUser, + Content: wrapObservation("expand", fmt.Sprintf("unknown section_id %q. Use outline or pick an ID from a previous observation.", action.SectionID)), + }) + continue + } + msgs = append(msgs, llmgate.Message{ + Role: llmgate.RoleUser, + Content: wrapObservation("expand", obs), + }) + + case actionRead: + obs, ok := a.renderRead(ctx, t, tree.SectionID(action.SectionID)) + if !ok { + msgs = append(msgs, llmgate.Message{ + Role: llmgate.RoleUser, + Content: wrapObservation("read", fmt.Sprintf("unknown section_id %q or no content available.", action.SectionID)), + }) + continue + } + msgs = append(msgs, llmgate.Message{ + Role: llmgate.RoleUser, + Content: wrapObservation("read", obs), + }) + + default: + msgs = append(msgs, llmgate.Message{ + Role: llmgate.RoleUser, + Content: wrapObservation(action.Action, fmt.Sprintf("unsupported action %q. Use one of: outline, expand, read, done.", action.Action)), + }) + } + } + + // Ran out of hops without a `done` action. Return whatever IDs the + // model proposed in the last action (if any) plus the hop count so + // the caller can see the cap was hit. + log.Printf("retrieval: agentic strategy hit max_hops=%d without done; returning %d ids", maxHops, len(finalIDs)) + return &Result{ + SelectedIDs: finalIDs, + Reasoning: reasoning, + ModelUsed: model, + Usage: totalUsage, + HopsTaken: hopsTaken, + }, nil +} + +// initialUserPrompt is the very first user turn: it explains the task, +// renders a shallow outline (default level=1) so the model has +// something to react to, and reminds the model of the action protocol. +func (a *AgenticStrategy) initialUserPrompt(view tree.View, query string) string { + var b strings.Builder + if view.Title != "" { + b.WriteString("Document: ") + b.WriteString(view.Title) + b.WriteString("\n\n") + } + b.WriteString("Initial outline (depth=") + fmt.Fprintf(&b, "%d", defaultOutlineLevel) + b.WriteString("):\n") + b.WriteString(renderOutline(view, defaultOutlineLevel)) + b.WriteString("\nUser query:\n") + b.WriteString(query) + b.WriteString("\n\nReply with a JSON action. The actions you may use are:\n") + b.WriteString(actionProtocolHelp) + return b.String() +} + +// renderRead pulls a section's full content via the Fetcher. Returns +// (text, true) on success, or ("", false) when the section is unknown +// or has no ContentRef / no fetcher. Failures from the storage backend +// (e.g. transient network error) are returned to the model as the +// observation so it can recover with a different action. +func (a *AgenticStrategy) renderRead(ctx context.Context, t *tree.Tree, id tree.SectionID) (string, bool) { + sec := t.FindByID(id) + if sec == nil { + return "", false + } + if a.Fetcher == nil || sec.ContentRef == "" { + // Internal sections summarize their children — fall back to + // the summary so the model still gets useful signal. + if sec.Summary != "" { + return fmt.Sprintf("section %s (%s) has no body content; summary:\n%s", sec.ID, sec.Title, sec.Summary), true + } + return "", false + } + data, err := a.Fetcher.Get(ctx, sec.ContentRef) + if err != nil { + return fmt.Sprintf("error reading section %s: %v", sec.ID, err), true + } + body := string(data) + body = strings.TrimSpace(body) + if body == "" && sec.Summary != "" { + return fmt.Sprintf("section %s body was empty; summary:\n%s", sec.ID, sec.Summary), true + } + return fmt.Sprintf("section %s (%s):\n%s", sec.ID, sec.Title, body), true +} + +// agenticSystemPrompt instructs the model on the navigation loop. It +// mirrors the language of the existing selection prompt so behaviour +// across strategies stays consistent: prefer leaves, pick few, never +// invent IDs. +const agenticSystemPrompt = `You are a navigation agent for a document tree. You explore a hierarchical outline of titles + short summaries + stable section IDs, then pick the leaf section IDs whose full content most directly answers the user's query. + +Process: +- On each turn, reply with EXACTLY one JSON object describing the next action. +- Use 'outline' to refresh your view of the whole tree at a given depth. +- Use 'expand' to see a section's immediate children. +- Use 'read' to read a section's full body (use sparingly — bodies are large). +- Use 'done' to terminate with your final picks. + +Rules: +- Prefer leaf sections. Include a parent only if its own body is directly relevant. +- Include as few sections as possible. Quality over quantity. +- Only return IDs you have seen in a prior observation. Do not invent IDs. +- If nothing in the document is relevant, return done with an empty picked_ids array.` + +// actionProtocolHelp is the one-shot reminder appended to the initial +// user prompt so the model gets concrete examples of valid actions +// without us needing to maintain a separate few-shot block. +const actionProtocolHelp = `- {"action":"outline","level":2} — re-render the outline N levels deep +- {"action":"expand","section_id":"sec_x"} — list immediate children of sec_x +- {"action":"read","section_id":"sec_x"} — fetch the full body of sec_x +- {"action":"done","picked_ids":["sec_x","sec_y"],"reasoning":"why"} — finalize + +Reply with ONLY the JSON object. No prose, no markdown fences.` + +// Action describes the LLM-chosen next step in the agentic loop. +// +// The struct is exported so tests can construct expected actions +// without depending on internal JSON shapes. SectionID is a string +// rather than tree.SectionID because the model's value may not match +// any real section in the tree; we keep the raw input here and let the +// loop attribute "unknown section" errors back to the model. +type Action struct { + // Action is the dispatch tag. One of: outline, expand, read, done. + Action string `json:"action"` + + // Level is the depth requested by an outline action. Optional; + // defaults to defaultOutlineLevel when zero/negative. + Level int `json:"level,omitempty"` + + // SectionID is the target of expand and read actions. + SectionID string `json:"section_id,omitempty"` + + // PickedIDs is the final selection for a done action. + PickedIDs []string `json:"picked_ids,omitempty"` + + // Reasoning is an optional explanation accompanying done. + Reasoning string `json:"reasoning,omitempty"` +} + +// Action tag constants. Defined as untyped strings rather than a +// custom Go enum because the value lives in JSON and must round-trip +// without surprise. +const ( + actionOutline = "outline" + actionExpand = "expand" + actionRead = "read" + actionDone = "done" +) + +// ParseAction is the tolerant JSON decoder for the agentic protocol. +// It mirrors ParseSelection: it strips code fences, peels prose +// wrappers, and isolates the first balanced JSON object. Returns an +// error only when no JSON object can be recovered. +func ParseAction(raw string) (Action, error) { + raw = strings.TrimSpace(raw) + if raw == "" { + return Action{}, fmt.Errorf("empty response") + } + // Strip ```json ... ``` fences if present. + if strings.HasPrefix(raw, "```") { + if i := strings.Index(raw, "\n"); i >= 0 { + raw = raw[i+1:] + } + raw = strings.TrimSuffix(raw, "```") + raw = strings.TrimSpace(raw) + } + if i := strings.Index(raw, "{"); i > 0 { + raw = raw[i:] + } + if j := strings.LastIndex(raw, "}"); j >= 0 && j < len(raw)-1 { + raw = raw[:j+1] + } + + var a Action + dec := json.NewDecoder(strings.NewReader(raw)) + if err := dec.Decode(&a); err != nil && err != io.EOF { + return Action{}, fmt.Errorf("decode action: %w", err) + } + a.Action = strings.ToLower(strings.TrimSpace(a.Action)) + if a.Action == "" { + return Action{}, fmt.Errorf("missing 'action' field") + } + a.SectionID = strings.TrimSpace(a.SectionID) + return a, nil +} + +// renderOutline renders the section view down to the given depth. +// depth=1 shows the root and its immediate children; depth=2 also +// shows grandchildren; etc. The format is the same as the +// chunked-tree prompt so the model sees consistent structure across +// strategies. +func renderOutline(view tree.View, depth int) string { + if depth <= 0 { + depth = defaultOutlineLevel + } + var b strings.Builder + for _, sv := range view.Sections { + if sv.Depth > depth { + continue + } + writeSectionLine(&b, sv) + } + return b.String() +} + +// renderExpand returns a string with the immediate children of +// sectionID rendered as outline lines. Returns ("", false) when the +// ID is not in the tree. The fallback when a section has no children +// is to render the section itself with a note — that lets the model +// distinguish "unknown id" from "leaf, nothing to expand". +func renderExpand(bySectionID map[tree.SectionID]tree.SectionView, sectionID string) (string, bool) { + sv, ok := bySectionID[tree.SectionID(sectionID)] + if !ok { + return "", false + } + if len(sv.Children) == 0 { + return fmt.Sprintf("[%s] %s is a leaf (no children). Use read to fetch its body.", sv.ID, sv.Title), true + } + var b strings.Builder + for _, cid := range sv.Children { + child, ok := bySectionID[cid] + if !ok { + continue + } + writeSectionLine(&b, child) + } + return b.String(), true +} + +// wrapObservation formats an action's result so the model can clearly +// see which action produced which observation. +func wrapObservation(action, body string) string { + return fmt.Sprintf("Observation (%s):\n%s\n\nNext JSON action?", action, body) +} + +// indexSections returns a flat map from SectionID to SectionView for +// O(1) lookup during the loop. The map is read-only after construction +// and so is safe for concurrent use — but each Select call builds its +// own anyway because tree views are cheap. +func indexSections(sections []tree.SectionView) map[tree.SectionID]tree.SectionView { + out := make(map[tree.SectionID]tree.SectionView, len(sections)) + for _, sv := range sections { + out[sv.ID] = sv + } + return out +} + +// filterToTreeIDs drops IDs the model invented (those not in the +// tree's section index) and deduplicates. Preserves first-seen order. +func filterToTreeIDs(rawIDs []string, bySectionID map[tree.SectionID]tree.SectionView) []tree.SectionID { + seen := map[tree.SectionID]struct{}{} + out := make([]tree.SectionID, 0, len(rawIDs)) + for _, id := range rawIDs { + sid := tree.SectionID(strings.TrimSpace(id)) + if sid == "" { + continue + } + if _, ok := bySectionID[sid]; !ok { + continue + } + if _, dup := seen[sid]; dup { + continue + } + seen[sid] = struct{}{} + out = append(out, sid) + } + return out +} diff --git a/pkg/retrieval/agentic_test.go b/pkg/retrieval/agentic_test.go new file mode 100644 index 0000000..b5226a1 --- /dev/null +++ b/pkg/retrieval/agentic_test.go @@ -0,0 +1,386 @@ +package retrieval_test + +import ( + "context" + "errors" + "strings" + "sync" + "sync/atomic" + "testing" + + "github.com/hallelx2/llmgate" + + "github.com/hallelx2/vectorless-engine/pkg/retrieval" + "github.com/hallelx2/vectorless-engine/pkg/tree" +) + +// scriptedLLM returns a sequence of canned responses, one per Complete +// call, exactly as the agentic loop needs for deterministic tests. If +// the script is exhausted, the LLM returns the loopReply value (used +// by the hop-cap test to simulate a model that never decides to +// terminate). +type scriptedLLM struct { + replies []string + loopReply string + + calls int32 + + mu sync.Mutex + lastPrompts []string +} + +func (s *scriptedLLM) Complete(ctx context.Context, req llmgate.Request) (*llmgate.Response, error) { + i := int(atomic.AddInt32(&s.calls, 1)) - 1 + + // Capture the most recent user message for later assertions. + var userMsg string + for _, msg := range req.Messages { + if msg.Role == llmgate.RoleUser { + userMsg = msg.Content + } + } + s.mu.Lock() + s.lastPrompts = append(s.lastPrompts, userMsg) + s.mu.Unlock() + + if i < len(s.replies) { + return &llmgate.Response{Content: s.replies[i]}, nil + } + if s.loopReply != "" { + return &llmgate.Response{Content: s.loopReply}, nil + } + // Exhausted with no loopReply — surface as an error so the test + // notices it under-scripted instead of silently hanging. + return nil, errors.New("scriptedLLM: replies exhausted") +} + +func (s *scriptedLLM) CountTokens(ctx context.Context, t string) (int, error) { + return len(t) / 4, nil +} + +// mapFetcher is an in-memory ContentFetcher backed by a map. The +// agentic strategy only needs Get; we don't bother modelling errors +// here because the tests use real refs. +type mapFetcher struct{ data map[string]string } + +func (m mapFetcher) Get(ctx context.Context, ref string) ([]byte, error) { + v, ok := m.data[ref] + if !ok { + return nil, errors.New("not found") + } + return []byte(v), nil +} + +// buildAgenticTree constructs a 3-level test tree: +// +// sec_root → [sec_a, sec_b] +// sec_a → [sec_a1 (leaf, ref=a1_ref), sec_a2 (leaf, ref=a2_ref)] +// sec_b → [sec_b1 (leaf, ref=b1_ref)] +// +// Enough depth to exercise expand and read in sequence. +func buildAgenticTree() *tree.Tree { + a1 := &tree.Section{ID: "sec_a1", ParentID: "sec_a", Title: "Install", Summary: "install steps", ContentRef: "a1_ref"} + a2 := &tree.Section{ID: "sec_a2", ParentID: "sec_a", Title: "Config", Summary: "config keys", ContentRef: "a2_ref"} + b1 := &tree.Section{ID: "sec_b1", ParentID: "sec_b", Title: "Querying", Summary: "how to query", ContentRef: "b1_ref"} + a := &tree.Section{ID: "sec_a", ParentID: "sec_root", Title: "Setup", Summary: "setup section", Children: []*tree.Section{a1, a2}} + b := &tree.Section{ID: "sec_b", ParentID: "sec_root", Title: "Usage", Summary: "usage section", Children: []*tree.Section{b1}} + root := &tree.Section{ID: "sec_root", Title: "Atlas", Children: []*tree.Section{a, b}} + return &tree.Tree{DocumentID: "doc_x", Title: "Atlas", Root: root} +} + +func TestAgenticHappyPath(t *testing.T) { + t.Parallel() + + tr := buildAgenticTree() + llm := &scriptedLLM{ + replies: []string{ + `{"action":"expand","section_id":"sec_a"}`, + `{"action":"read","section_id":"sec_a1"}`, + `{"action":"done","picked_ids":["sec_a1"],"reasoning":"install matches the query"}`, + }, + } + fetcher := mapFetcher{data: map[string]string{ + "a1_ref": "Install steps: run vle ingest...", + "a2_ref": "Config keys: VLE_*", + "b1_ref": "How to query the API.", + }} + + s := retrieval.NewAgentic(llm, fetcher) + + res, err := s.SelectWithCost(context.Background(), tr, "how do I install?", retrieval.ContextBudget{MaxTokens: 100000}) + if err != nil { + t.Fatalf("SelectWithCost: %v", err) + } + if len(res.SelectedIDs) != 1 || res.SelectedIDs[0] != "sec_a1" { + t.Errorf("want [sec_a1], got %v", res.SelectedIDs) + } + if res.HopsTaken != 3 { + t.Errorf("want HopsTaken=3, got %d", res.HopsTaken) + } + if res.Reasoning == "" { + t.Errorf("want non-empty reasoning, got %q", res.Reasoning) + } + if res.Usage.LLMCalls != 3 { + t.Errorf("want Usage.LLMCalls=3, got %d", res.Usage.LLMCalls) + } + + // Sanity-check that the read action genuinely materialized the body + // content into the next prompt — the read observation must contain + // the body string the fetcher returned. + llm.mu.Lock() + defer llm.mu.Unlock() + if len(llm.lastPrompts) < 3 { + t.Fatalf("expected 3 prompts captured, got %d", len(llm.lastPrompts)) + } + thirdPrompt := llm.lastPrompts[2] + if !strings.Contains(thirdPrompt, "Install steps: run vle ingest") { + t.Errorf("expected read observation to contain body content, got:\n%s", thirdPrompt) + } +} + +// TestAgenticExpandExpandDone covers the plan's manual trace: when the +// model picks expand → expand → done, the strategy must return the +// IDs from the final done action. This is the "two-level navigation +// without a read" scenario, which is the cheapest happy path. +func TestAgenticExpandExpandDone(t *testing.T) { + t.Parallel() + + tr := buildAgenticTree() + llm := &scriptedLLM{ + replies: []string{ + `{"action":"expand","section_id":"sec_a"}`, + `{"action":"expand","section_id":"sec_b"}`, + `{"action":"done","picked_ids":["sec_a1","sec_b1"]}`, + }, + } + s := retrieval.NewAgentic(llm, mapFetcher{data: map[string]string{}}) + + res, err := s.SelectWithCost(context.Background(), tr, "anything", retrieval.ContextBudget{MaxTokens: 100000}) + if err != nil { + t.Fatalf("SelectWithCost: %v", err) + } + if len(res.SelectedIDs) != 2 { + t.Fatalf("want 2 ids, got %v", res.SelectedIDs) + } + want := map[tree.SectionID]bool{"sec_a1": true, "sec_b1": true} + for _, id := range res.SelectedIDs { + if !want[id] { + t.Errorf("unexpected id %q", id) + } + } + if res.HopsTaken != 3 { + t.Errorf("want HopsTaken=3, got %d", res.HopsTaken) + } +} + +func TestAgenticHopCap(t *testing.T) { + t.Parallel() + + tr := buildAgenticTree() + // Never emits done. Strategy should bail at MaxHops. + llm := &scriptedLLM{ + loopReply: `{"action":"expand","section_id":"sec_a"}`, + } + s := retrieval.NewAgentic(llm, mapFetcher{data: map[string]string{}}) + s.MaxHops = 4 + + res, err := s.SelectWithCost(context.Background(), tr, "q", retrieval.ContextBudget{MaxTokens: 100000}) + if err != nil { + t.Fatalf("SelectWithCost: %v", err) + } + if res.HopsTaken != 4 { + t.Errorf("want HopsTaken=4 (capped), got %d", res.HopsTaken) + } + // No done means no final picks. + if len(res.SelectedIDs) != 0 { + t.Errorf("want empty SelectedIDs on cap, got %v", res.SelectedIDs) + } + if got := atomic.LoadInt32(&llm.calls); got != 4 { + t.Errorf("want 4 LLM calls (MaxHops), got %d", got) + } + if res.Usage.LLMCalls != 4 { + t.Errorf("want Usage.LLMCalls=4, got %d", res.Usage.LLMCalls) + } +} + +// TestAgenticBadJSONGraceful covers the JSON-mode-blip path: a +// response that isn't valid JSON must not 500 the query. The first +// turn parses as garbage; we expect the strategy to nudge the model +// with a retry prompt and then bail when subsequent turns also fail. +// Final outcome: nil error, empty selection. +func TestAgenticBadJSONGraceful(t *testing.T) { + t.Parallel() + + tr := buildAgenticTree() + llm := &scriptedLLM{ + // Every reply is prose, never JSON. The loop must consume MaxHops + // turns and return cleanly. + loopReply: "I think it's sec_a1, that's where install lives.", + } + s := retrieval.NewAgentic(llm, mapFetcher{data: map[string]string{}}) + s.MaxHops = 3 + + res, err := s.SelectWithCost(context.Background(), tr, "q", retrieval.ContextBudget{MaxTokens: 100000}) + if err != nil { + t.Fatalf("want nil error on persistent parse failure, got %v", err) + } + if len(res.SelectedIDs) != 0 { + t.Errorf("want empty selection on parse failure, got %v", res.SelectedIDs) + } + if res.HopsTaken != 3 { + t.Errorf("want HopsTaken=3 (capped), got %d", res.HopsTaken) + } +} + +// TestAgenticFiltersUnknownPicks mirrors single-pass: if the model +// invents IDs not present in the tree, they must be dropped. +func TestAgenticFiltersUnknownPicks(t *testing.T) { + t.Parallel() + + tr := buildAgenticTree() + llm := &scriptedLLM{ + replies: []string{ + `{"action":"done","picked_ids":["sec_a1","sec_fake","sec_a1"]}`, + }, + } + s := retrieval.NewAgentic(llm, mapFetcher{data: map[string]string{}}) + + res, err := s.SelectWithCost(context.Background(), tr, "q", retrieval.ContextBudget{MaxTokens: 100000}) + if err != nil { + t.Fatal(err) + } + if len(res.SelectedIDs) != 1 || res.SelectedIDs[0] != "sec_a1" { + t.Errorf("want [sec_a1] after filter+dedup, got %v", res.SelectedIDs) + } + if res.HopsTaken != 1 { + t.Errorf("want HopsTaken=1, got %d", res.HopsTaken) + } +} + +// TestAgenticEmptyTree exercises the early-return guard so callers +// don't pay an LLM hop for a degenerate input. +func TestAgenticEmptyTree(t *testing.T) { + t.Parallel() + + llm := &scriptedLLM{} + s := retrieval.NewAgentic(llm, mapFetcher{data: map[string]string{}}) + + res, err := s.SelectWithCost(context.Background(), &tree.Tree{}, "q", retrieval.ContextBudget{}) + if err != nil { + t.Fatal(err) + } + if len(res.SelectedIDs) != 0 { + t.Errorf("want empty selection on empty tree, got %v", res.SelectedIDs) + } + if atomic.LoadInt32(&llm.calls) != 0 { + t.Errorf("want 0 LLM calls on empty tree, got %d", llm.calls) + } +} + +// TestAgenticReadFallbackWhenNoContent verifies a 'read' on an +// internal section (no ContentRef) falls back to the summary rather +// than erroring out — the model should still get useful signal. +func TestAgenticReadFallbackWhenNoContent(t *testing.T) { + t.Parallel() + + tr := buildAgenticTree() + llm := &scriptedLLM{ + replies: []string{ + // Read on an internal section (sec_a has no ContentRef). + `{"action":"read","section_id":"sec_a"}`, + `{"action":"done","picked_ids":["sec_a1"]}`, + }, + } + s := retrieval.NewAgentic(llm, mapFetcher{data: map[string]string{}}) + + res, err := s.SelectWithCost(context.Background(), tr, "q", retrieval.ContextBudget{MaxTokens: 100000}) + if err != nil { + t.Fatal(err) + } + if len(res.SelectedIDs) != 1 || res.SelectedIDs[0] != "sec_a1" { + t.Errorf("want [sec_a1], got %v", res.SelectedIDs) + } + + // The observation for the read turn should contain "summary" — the + // fallback path — not a fetcher error. + llm.mu.Lock() + defer llm.mu.Unlock() + if len(llm.lastPrompts) < 2 { + t.Fatalf("expected 2 prompts captured, got %d", len(llm.lastPrompts)) + } + if !strings.Contains(llm.lastPrompts[1], "summary") { + t.Errorf("expected summary fallback in read observation, got:\n%s", llm.lastPrompts[1]) + } +} + +func TestParseAction(t *testing.T) { + t.Parallel() + cases := []struct { + name string + in string + want retrieval.Action + hasErr bool + }{ + { + name: "expand", + in: `{"action":"expand","section_id":"sec_a"}`, + want: retrieval.Action{Action: "expand", SectionID: "sec_a"}, + }, + { + name: "outline_with_level", + in: `{"action":"outline","level":3}`, + want: retrieval.Action{Action: "outline", Level: 3}, + }, + { + name: "done_with_picks", + in: `{"action":"done","picked_ids":["a","b"],"reasoning":"why"}`, + want: retrieval.Action{Action: "done", PickedIDs: []string{"a", "b"}, Reasoning: "why"}, + }, + { + name: "code_fence", + in: "```json\n{\"action\":\"done\",\"picked_ids\":[\"x\"]}\n```", + want: retrieval.Action{Action: "done", PickedIDs: []string{"x"}}, + }, + { + name: "prose_before", + in: `Sure: {"action":"expand","section_id":"sec_a"}`, + want: retrieval.Action{Action: "expand", SectionID: "sec_a"}, + }, + { + name: "garbage", + in: "I think it's sec_a", + hasErr: true, + }, + { + name: "empty", + in: "", + hasErr: true, + }, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + got, err := retrieval.ParseAction(c.in) + if c.hasErr { + if err == nil { + t.Fatalf("want error, got %+v", got) + } + return + } + if err != nil { + t.Fatalf("parse: %v", err) + } + if got.Action != c.want.Action { + t.Errorf("Action: got %q want %q", got.Action, c.want.Action) + } + if got.SectionID != c.want.SectionID { + t.Errorf("SectionID: got %q want %q", got.SectionID, c.want.SectionID) + } + if got.Level != c.want.Level { + t.Errorf("Level: got %d want %d", got.Level, c.want.Level) + } + if len(got.PickedIDs) != len(c.want.PickedIDs) { + t.Errorf("PickedIDs: got %v want %v", got.PickedIDs, c.want.PickedIDs) + } + }) + } +} diff --git a/pkg/retrieval/chunked_tree.go b/pkg/retrieval/chunked_tree.go index 866c2c0..716e47a 100644 --- a/pkg/retrieval/chunked_tree.go +++ b/pkg/retrieval/chunked_tree.go @@ -112,6 +112,7 @@ func (c *ChunkedTree) SelectWithCost(ctx context.Context, t *tree.Tree, query st return &Result{ SelectedIDs: c.Merge.Merge(allIDs), Usage: totalUsage, + HopsTaken: 1, }, nil } diff --git a/pkg/retrieval/single_pass.go b/pkg/retrieval/single_pass.go index 1e20440..0f815d4 100644 --- a/pkg/retrieval/single_pass.go +++ b/pkg/retrieval/single_pass.go @@ -69,6 +69,7 @@ func (s *SinglePass) SelectWithCost(ctx context.Context, t *tree.Tree, query str SelectedIDs: FilterKnownIDs(ids, view.Sections), ModelUsed: model, Usage: usage, + HopsTaken: 1, }, nil } diff --git a/pkg/retrieval/strategy.go b/pkg/retrieval/strategy.go index 5900f3b..29dd0dc 100644 --- a/pkg/retrieval/strategy.go +++ b/pkg/retrieval/strategy.go @@ -60,10 +60,16 @@ func (b ContextBudget) Available() int { // Result is returned to the API layer. It includes not just IDs but the // reasoning trace and cost accounting when the strategy supports it. type Result struct { - SelectedIDs []tree.SectionID - Reasoning string - ModelUsed string - Usage Usage + SelectedIDs []tree.SectionID `json:"selected_ids"` + Reasoning string `json:"reasoning,omitempty"` + ModelUsed string `json:"model_used,omitempty"` + Usage Usage `json:"usage"` + + // HopsTaken is the number of LLM turns the strategy issued to reach the + // final selection. Single-shot strategies set this to 1; iterative + // strategies (e.g. agentic) set it to the number of tool-using turns + // actually consumed, including the terminal "done" turn. + HopsTaken int `json:"hops_taken,omitempty"` } // Usage is the aggregated token + cost accounting across all LLM calls