diff --git a/cmd/engine/main.go b/cmd/engine/main.go index 7f5c31d..23df22d 100644 --- a/cmd/engine/main.go +++ b/cmd/engine/main.go @@ -10,6 +10,7 @@ import ( "errors" "flag" "fmt" + "io" "log/slog" "net/http" "os" @@ -87,7 +88,7 @@ func run() error { if err != nil { return fmt.Errorf("init llm: %w", err) } - strategy := buildStrategy(cfg.Retrieval, llmClient) + strategy := buildStrategy(cfg.Retrieval, llmClient, store) // Wrap with caching if enabled. if cfg.Retrieval.Cache.Enabled { @@ -252,17 +253,39 @@ func buildLLM(c config.LLMConfig) (llmgate.Client, error) { } } -func buildStrategy(c config.RetrievalConfig, client llmgate.Client) retrieval.Strategy { +func buildStrategy(c config.RetrievalConfig, client llmgate.Client, store storage.Storage) retrieval.Strategy { switch c.Strategy { case "single-pass": return retrieval.NewSinglePass(client) case "chunked-tree": return retrieval.NewChunkedTree(client) + case "agentic": + a := retrieval.NewAgentic(client, storageFetcher{s: store}) + if c.Agentic.MaxHops > 0 { + a.MaxHops = c.Agentic.MaxHops + } + a.ModelOverride = c.Agentic.Model + return a default: return retrieval.NewChunkedTree(client) } } +// storageFetcher adapts a storage.Storage to retrieval.ContentFetcher. +// The agentic strategy reads section bodies one at a time, so we +// materialize the full reader contents into a []byte here rather than +// streaming — section bodies are typically a few KB. +type storageFetcher struct{ s storage.Storage } + +func (sf storageFetcher) Get(ctx context.Context, ref string) ([]byte, error) { + rc, _, err := sf.s.Get(ctx, ref) + if err != nil { + return nil, err + } + defer rc.Close() + return io.ReadAll(rc) +} + // buildTLSConfig returns a *tls.Config when direct TLS is enabled, or nil // when the engine should serve plaintext (behind a proxy). Returning nil // leaves http.Server's TLSConfig unset, which is exactly what ListenAndServe diff --git a/cmd/server/main.go b/cmd/server/main.go index 62759d9..7586081 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -18,6 +18,7 @@ import ( "errors" "flag" "fmt" + "io" "log/slog" "net/http" "os" @@ -131,7 +132,7 @@ func run() error { if err != nil { return fmt.Errorf("init llm: %w", err) } - strategy := buildStrategy(cfg.Engine.Retrieval, llmClient) + strategy := buildStrategy(cfg.Engine.Retrieval, llmClient, store) // Wrap with caching if enabled in engine config. if cfg.Engine.Retrieval.Cache.Enabled { @@ -328,17 +329,39 @@ func buildLLM(c enginecfg.LLMConfig) (llmgate.Client, error) { } } -func buildStrategy(c enginecfg.RetrievalConfig, client llmgate.Client) retrieval.Strategy { +func buildStrategy(c enginecfg.RetrievalConfig, client llmgate.Client, store storage.Storage) retrieval.Strategy { switch c.Strategy { case "single-pass": return retrieval.NewSinglePass(client) case "chunked-tree": return retrieval.NewChunkedTree(client) + case "agentic": + a := retrieval.NewAgentic(client, storageFetcher{s: store}) + if c.Agentic.MaxHops > 0 { + a.MaxHops = c.Agentic.MaxHops + } + a.ModelOverride = c.Agentic.Model + return a default: return retrieval.NewChunkedTree(client) } } +// storageFetcher adapts a storage.Storage to retrieval.ContentFetcher. +// The agentic strategy reads section bodies one at a time, so we +// materialize the full reader contents into a []byte here rather than +// streaming — section bodies are typically a few KB. +type storageFetcher struct{ s storage.Storage } + +func (sf storageFetcher) Get(ctx context.Context, ref string) ([]byte, error) { + rc, _, err := sf.s.Get(ctx, ref) + if err != nil { + return nil, err + } + defer rc.Close() + return io.ReadAll(rc) +} + func buildTLSConfig(c config.TLSConfig) *tls.Config { if !c.Enabled() { return nil diff --git a/pkg/config/config.go b/pkg/config/config.go index 6534a9e..c29841a 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -188,6 +188,7 @@ type GeminiBlock struct { type RetrievalConfig struct { Strategy string `yaml:"strategy"` ChunkedTree ChunkedTreeBlock `yaml:"chunked_tree"` + Agentic AgenticBlock `yaml:"agentic"` Cache CacheBlock `yaml:"cache"` } @@ -212,6 +213,23 @@ type ChunkedTreeBlock struct { IncludeSiblingBreadcrumb bool `yaml:"include_sibling_breadcrumbs"` } +// AgenticBlock configures the agentic-navigation strategy. +// +// The agentic loop trades sequential latency for the ability to handle +// arbitrarily large trees: the model issues outline/expand/read actions +// until it picks a final set of section IDs or hits MaxHops. +type AgenticBlock struct { + // MaxHops caps the number of LLM turns one query consumes, counting + // the terminal "done" turn. Default: 6. + MaxHops int `yaml:"max_hops"` + + // Model optionally overrides the budget's model for navigation + // turns. Empty means use the budget's model. Useful when the + // retrieval engine wants the navigation loop on a fast/cheap + // model while answering is on a stronger one. + Model string `yaml:"model"` +} + // LogConfig configures logging. type LogConfig struct { Level string `yaml:"level"` @@ -244,6 +262,9 @@ func Default() Config { MaxParallelCalls: 8, IncludeSiblingBreadcrumb: true, }, + Agentic: AgenticBlock{ + MaxHops: 6, + }, Cache: CacheBlock{ Enabled: true, MaxEntries: 1024, @@ -352,6 +373,14 @@ func applyEnvOverrides(c *Config) { if v := os.Getenv("VLE_TLS_KEY_FILE"); v != "" { c.Server.TLS.KeyFile = v } + if v := os.Getenv("VLE_RETRIEVAL_AGENTIC_MAX_HOPS"); v != "" { + if n, err := strconv.Atoi(v); err == nil && n >= 0 { + c.Retrieval.Agentic.MaxHops = n + } + } + if v := os.Getenv("VLE_RETRIEVAL_AGENTIC_MODEL"); v != "" { + c.Retrieval.Agentic.Model = v + } // Ingest / HyDE knobs. Booleans accept the usual truthy strings — // kept narrow so a typo doesn't silently flip the flag. if v := os.Getenv("VLE_INGEST_HYDE_ENABLED"); v != "" { @@ -428,11 +457,15 @@ func (c Config) Validate() error { } switch c.Retrieval.Strategy { - case "single-pass", "chunked-tree": + case "single-pass", "chunked-tree", "agentic": default: return fmt.Errorf("unknown retrieval.strategy: %q", c.Retrieval.Strategy) } + if c.Retrieval.Agentic.MaxHops < 0 { + return fmt.Errorf("retrieval.agentic.max_hops must be >= 0, got %d", c.Retrieval.Agentic.MaxHops) + } + if c.Server.TLS.CertFile != "" && c.Server.TLS.KeyFile == "" { return errors.New("server.tls.key_file is required when cert_file is set") } diff --git a/pkg/retrieval/agentic.go b/pkg/retrieval/agentic.go new file mode 100644 index 0000000..5166e32 --- /dev/null +++ b/pkg/retrieval/agentic.go @@ -0,0 +1,482 @@ +package retrieval + +import ( + "context" + "encoding/json" + "fmt" + "io" + "log" + "strings" + + "github.com/hallelx2/llmgate" + + "github.com/hallelx2/vectorless-engine/pkg/tree" +) + +// ContentFetcher reads the raw bytes for a content reference. The agentic +// strategy uses it to satisfy the `read` action, which materializes a +// section's full body into the conversation. Implementations should be +// safe for concurrent use; one tree may be traversed by many queries. +type ContentFetcher interface { + Get(ctx context.Context, ref string) ([]byte, error) +} + +// AgenticStrategy is a tool-using retrieval loop. +// +// Rather than feeding the whole tree to the model in one shot +// (single-pass) or splitting and reasoning over slices in parallel +// (chunked-tree), it lets the model explore the tree iteratively: +// outline → expand interesting branches → read promising sections → +// done. Each turn the model receives the observation from the previous +// action and emits the next action as a JSON object. The strategy +// dispatches the action, fetches the observation, and feeds it back as +// the next user message. +// +// This trades latency (N sequential LLM calls) for the ability to handle +// trees that don't fit in any single context window, with reading +// behaviour that adapts to each query. +// +// Protocol choice +// +// The strategy uses a JSON-action text protocol rather than llmgate's +// Tools field. The provider adapters in llmgate v0.2.0 declare +// ToolDef/ToolCall as scaffolding but do not yet populate ToolCalls on +// responses, so the only portable way to drive a multi-turn tool loop +// today is to ask the model to emit a JSON action each turn and parse +// it with ParseAction. The strategy is forward-compatible: when llmgate +// wires native tool calling, the actionable surface here is the same. +type AgenticStrategy struct { + // LLM is the shared client used for every turn. + LLM llmgate.Client + + // Fetcher reads section bodies for the `read` action. + Fetcher ContentFetcher + + // MaxHops caps the number of LLM turns one Select consumes, + // including the terminal "done" turn. Zero means use defaultMaxHops. + MaxHops int + + // ModelOverride, if non-empty, replaces the budget's ModelName for + // every turn. Useful for routing the navigation loop to a cheaper or + // faster model than the rest of the engine. + ModelOverride string +} + +// defaultMaxHops bounds the agentic loop. Six turns is enough for +// outline → 2 expands → 2 reads → done while keeping latency and cost +// predictable. Bump this only when measurements show selection quality +// climbing with deeper traversal. +const defaultMaxHops = 6 + +// defaultOutlineLevel is the depth of the initial outline observation. +// One level usually surfaces enough structure for the model to choose +// where to expand next without burning a turn. +const defaultOutlineLevel = 1 + +// Compile-time interface checks. +var ( + _ Strategy = (*AgenticStrategy)(nil) + _ CostStrategy = (*AgenticStrategy)(nil) +) + +// NewAgentic constructs an AgenticStrategy with sensible defaults. +func NewAgentic(client llmgate.Client, fetcher ContentFetcher) *AgenticStrategy { + return &AgenticStrategy{ + LLM: client, + Fetcher: fetcher, + MaxHops: defaultMaxHops, + } +} + +// Name implements Strategy. +func (a *AgenticStrategy) Name() string { return "agentic" } + +// Select implements Strategy. +func (a *AgenticStrategy) Select(ctx context.Context, t *tree.Tree, query string, budget ContextBudget) ([]tree.SectionID, error) { + r, err := a.SelectWithCost(ctx, t, query, budget) + if err != nil { + return nil, err + } + return r.SelectedIDs, nil +} + +// SelectWithCost implements CostStrategy. +func (a *AgenticStrategy) SelectWithCost(ctx context.Context, t *tree.Tree, query string, budget ContextBudget) (*Result, error) { + if t == nil || t.Root == nil { + return &Result{}, nil + } + view := t.BuildView() + bySectionID := indexSections(view.Sections) + + model := a.ModelOverride + if model == "" { + model = budget.ModelName + } + + maxHops := a.MaxHops + if maxHops <= 0 { + maxHops = defaultMaxHops + } + + // Conversation: system + initial user message (query + outline). + msgs := []llmgate.Message{ + {Role: llmgate.RoleSystem, Content: agenticSystemPrompt}, + {Role: llmgate.RoleUser, Content: a.initialUserPrompt(view, query)}, + } + + var ( + totalUsage Usage + hopsTaken int + finalIDs []tree.SectionID + reasoning string + ) + + for hop := 0; hop < maxHops; hop++ { + req := llmgate.Request{ + Model: model, + Messages: msgs, + MaxTokens: 1024, + Temperature: 0, + } + resp, err := a.LLM.Complete(ctx, req) + if err != nil { + return nil, fmt.Errorf("agentic hop %d: %w", hop+1, err) + } + hopsTaken++ + totalUsage.Add(Usage{ + InputTokens: resp.Usage.InputTokens, + OutputTokens: resp.Usage.OutputTokens, + TotalTokens: resp.Usage.TotalTokens, + CostUSD: resp.Usage.CostUSD, + LLMCalls: 1, + }) + + // Record the assistant turn before parsing so the next prompt has + // the model's own context. + msgs = append(msgs, llmgate.Message{ + Role: llmgate.RoleAssistant, + Content: resp.Content, + }) + + action, parseErr := ParseAction(resp.Content) + if parseErr != nil { + // Graceful degradation: a malformed action doesn't 500 the + // query — we ask the model to retry once with a stronger + // instruction. If the next turn still misparses, we abort + // and return whatever the model has picked so far (often + // nothing). Matches the runSelectionWithRetry pattern from + // single_pass.go. + log.Printf("retrieval: agentic hop %d action parse failed: %v", hop+1, parseErr) + msgs = append(msgs, llmgate.Message{ + Role: llmgate.RoleUser, + Content: "Your last reply was not a valid JSON action. Reply with EXACTLY one JSON object: {\"action\":\"outline|expand|read|done\", ...}. No prose, no markdown fences.", + }) + continue + } + + switch action.Action { + case actionDone: + finalIDs = filterToTreeIDs(action.PickedIDs, bySectionID) + reasoning = action.Reasoning + return &Result{ + SelectedIDs: finalIDs, + Reasoning: reasoning, + ModelUsed: model, + Usage: totalUsage, + HopsTaken: hopsTaken, + }, nil + + case actionOutline: + level := action.Level + if level <= 0 { + level = defaultOutlineLevel + } + obs := renderOutline(view, level) + msgs = append(msgs, llmgate.Message{ + Role: llmgate.RoleUser, + Content: wrapObservation("outline", obs), + }) + + case actionExpand: + obs, ok := renderExpand(bySectionID, action.SectionID) + if !ok { + msgs = append(msgs, llmgate.Message{ + Role: llmgate.RoleUser, + Content: wrapObservation("expand", fmt.Sprintf("unknown section_id %q. Use outline or pick an ID from a previous observation.", action.SectionID)), + }) + continue + } + msgs = append(msgs, llmgate.Message{ + Role: llmgate.RoleUser, + Content: wrapObservation("expand", obs), + }) + + case actionRead: + obs, ok := a.renderRead(ctx, t, tree.SectionID(action.SectionID)) + if !ok { + msgs = append(msgs, llmgate.Message{ + Role: llmgate.RoleUser, + Content: wrapObservation("read", fmt.Sprintf("unknown section_id %q or no content available.", action.SectionID)), + }) + continue + } + msgs = append(msgs, llmgate.Message{ + Role: llmgate.RoleUser, + Content: wrapObservation("read", obs), + }) + + default: + msgs = append(msgs, llmgate.Message{ + Role: llmgate.RoleUser, + Content: wrapObservation(action.Action, fmt.Sprintf("unsupported action %q. Use one of: outline, expand, read, done.", action.Action)), + }) + } + } + + // Ran out of hops without a `done` action. Return whatever IDs the + // model proposed in the last action (if any) plus the hop count so + // the caller can see the cap was hit. + log.Printf("retrieval: agentic strategy hit max_hops=%d without done; returning %d ids", maxHops, len(finalIDs)) + return &Result{ + SelectedIDs: finalIDs, + Reasoning: reasoning, + ModelUsed: model, + Usage: totalUsage, + HopsTaken: hopsTaken, + }, nil +} + +// initialUserPrompt is the very first user turn: it explains the task, +// renders a shallow outline (default level=1) so the model has +// something to react to, and reminds the model of the action protocol. +func (a *AgenticStrategy) initialUserPrompt(view tree.View, query string) string { + var b strings.Builder + if view.Title != "" { + b.WriteString("Document: ") + b.WriteString(view.Title) + b.WriteString("\n\n") + } + b.WriteString("Initial outline (depth=") + fmt.Fprintf(&b, "%d", defaultOutlineLevel) + b.WriteString("):\n") + b.WriteString(renderOutline(view, defaultOutlineLevel)) + b.WriteString("\nUser query:\n") + b.WriteString(query) + b.WriteString("\n\nReply with a JSON action. The actions you may use are:\n") + b.WriteString(actionProtocolHelp) + return b.String() +} + +// renderRead pulls a section's full content via the Fetcher. Returns +// (text, true) on success, or ("", false) when the section is unknown +// or has no ContentRef / no fetcher. Failures from the storage backend +// (e.g. transient network error) are returned to the model as the +// observation so it can recover with a different action. +func (a *AgenticStrategy) renderRead(ctx context.Context, t *tree.Tree, id tree.SectionID) (string, bool) { + sec := t.FindByID(id) + if sec == nil { + return "", false + } + if a.Fetcher == nil || sec.ContentRef == "" { + // Internal sections summarize their children — fall back to + // the summary so the model still gets useful signal. + if sec.Summary != "" { + return fmt.Sprintf("section %s (%s) has no body content; summary:\n%s", sec.ID, sec.Title, sec.Summary), true + } + return "", false + } + data, err := a.Fetcher.Get(ctx, sec.ContentRef) + if err != nil { + return fmt.Sprintf("error reading section %s: %v", sec.ID, err), true + } + body := string(data) + body = strings.TrimSpace(body) + if body == "" && sec.Summary != "" { + return fmt.Sprintf("section %s body was empty; summary:\n%s", sec.ID, sec.Summary), true + } + return fmt.Sprintf("section %s (%s):\n%s", sec.ID, sec.Title, body), true +} + +// agenticSystemPrompt instructs the model on the navigation loop. It +// mirrors the language of the existing selection prompt so behaviour +// across strategies stays consistent: prefer leaves, pick few, never +// invent IDs. +const agenticSystemPrompt = `You are a navigation agent for a document tree. You explore a hierarchical outline of titles + short summaries + stable section IDs, then pick the leaf section IDs whose full content most directly answers the user's query. + +Process: +- On each turn, reply with EXACTLY one JSON object describing the next action. +- Use 'outline' to refresh your view of the whole tree at a given depth. +- Use 'expand' to see a section's immediate children. +- Use 'read' to read a section's full body (use sparingly — bodies are large). +- Use 'done' to terminate with your final picks. + +Rules: +- Prefer leaf sections. Include a parent only if its own body is directly relevant. +- Include as few sections as possible. Quality over quantity. +- Only return IDs you have seen in a prior observation. Do not invent IDs. +- If nothing in the document is relevant, return done with an empty picked_ids array.` + +// actionProtocolHelp is the one-shot reminder appended to the initial +// user prompt so the model gets concrete examples of valid actions +// without us needing to maintain a separate few-shot block. +const actionProtocolHelp = `- {"action":"outline","level":2} — re-render the outline N levels deep +- {"action":"expand","section_id":"sec_x"} — list immediate children of sec_x +- {"action":"read","section_id":"sec_x"} — fetch the full body of sec_x +- {"action":"done","picked_ids":["sec_x","sec_y"],"reasoning":"why"} — finalize + +Reply with ONLY the JSON object. No prose, no markdown fences.` + +// Action describes the LLM-chosen next step in the agentic loop. +// +// The struct is exported so tests can construct expected actions +// without depending on internal JSON shapes. SectionID is a string +// rather than tree.SectionID because the model's value may not match +// any real section in the tree; we keep the raw input here and let the +// loop attribute "unknown section" errors back to the model. +type Action struct { + // Action is the dispatch tag. One of: outline, expand, read, done. + Action string `json:"action"` + + // Level is the depth requested by an outline action. Optional; + // defaults to defaultOutlineLevel when zero/negative. + Level int `json:"level,omitempty"` + + // SectionID is the target of expand and read actions. + SectionID string `json:"section_id,omitempty"` + + // PickedIDs is the final selection for a done action. + PickedIDs []string `json:"picked_ids,omitempty"` + + // Reasoning is an optional explanation accompanying done. + Reasoning string `json:"reasoning,omitempty"` +} + +// Action tag constants. Defined as untyped strings rather than a +// custom Go enum because the value lives in JSON and must round-trip +// without surprise. +const ( + actionOutline = "outline" + actionExpand = "expand" + actionRead = "read" + actionDone = "done" +) + +// ParseAction is the tolerant JSON decoder for the agentic protocol. +// It mirrors ParseSelection: it strips code fences, peels prose +// wrappers, and isolates the first balanced JSON object. Returns an +// error only when no JSON object can be recovered. +func ParseAction(raw string) (Action, error) { + raw = strings.TrimSpace(raw) + if raw == "" { + return Action{}, fmt.Errorf("empty response") + } + // Strip ```json ... ``` fences if present. + if strings.HasPrefix(raw, "```") { + if i := strings.Index(raw, "\n"); i >= 0 { + raw = raw[i+1:] + } + raw = strings.TrimSuffix(raw, "```") + raw = strings.TrimSpace(raw) + } + if i := strings.Index(raw, "{"); i > 0 { + raw = raw[i:] + } + if j := strings.LastIndex(raw, "}"); j >= 0 && j < len(raw)-1 { + raw = raw[:j+1] + } + + var a Action + dec := json.NewDecoder(strings.NewReader(raw)) + if err := dec.Decode(&a); err != nil && err != io.EOF { + return Action{}, fmt.Errorf("decode action: %w", err) + } + a.Action = strings.ToLower(strings.TrimSpace(a.Action)) + if a.Action == "" { + return Action{}, fmt.Errorf("missing 'action' field") + } + a.SectionID = strings.TrimSpace(a.SectionID) + return a, nil +} + +// renderOutline renders the section view down to the given depth. +// depth=1 shows the root and its immediate children; depth=2 also +// shows grandchildren; etc. The format is the same as the +// chunked-tree prompt so the model sees consistent structure across +// strategies. +func renderOutline(view tree.View, depth int) string { + if depth <= 0 { + depth = defaultOutlineLevel + } + var b strings.Builder + for _, sv := range view.Sections { + if sv.Depth > depth { + continue + } + writeSectionLine(&b, sv) + } + return b.String() +} + +// renderExpand returns a string with the immediate children of +// sectionID rendered as outline lines. Returns ("", false) when the +// ID is not in the tree. The fallback when a section has no children +// is to render the section itself with a note — that lets the model +// distinguish "unknown id" from "leaf, nothing to expand". +func renderExpand(bySectionID map[tree.SectionID]tree.SectionView, sectionID string) (string, bool) { + sv, ok := bySectionID[tree.SectionID(sectionID)] + if !ok { + return "", false + } + if len(sv.Children) == 0 { + return fmt.Sprintf("[%s] %s is a leaf (no children). Use read to fetch its body.", sv.ID, sv.Title), true + } + var b strings.Builder + for _, cid := range sv.Children { + child, ok := bySectionID[cid] + if !ok { + continue + } + writeSectionLine(&b, child) + } + return b.String(), true +} + +// wrapObservation formats an action's result so the model can clearly +// see which action produced which observation. +func wrapObservation(action, body string) string { + return fmt.Sprintf("Observation (%s):\n%s\n\nNext JSON action?", action, body) +} + +// indexSections returns a flat map from SectionID to SectionView for +// O(1) lookup during the loop. The map is read-only after construction +// and so is safe for concurrent use — but each Select call builds its +// own anyway because tree views are cheap. +func indexSections(sections []tree.SectionView) map[tree.SectionID]tree.SectionView { + out := make(map[tree.SectionID]tree.SectionView, len(sections)) + for _, sv := range sections { + out[sv.ID] = sv + } + return out +} + +// filterToTreeIDs drops IDs the model invented (those not in the +// tree's section index) and deduplicates. Preserves first-seen order. +func filterToTreeIDs(rawIDs []string, bySectionID map[tree.SectionID]tree.SectionView) []tree.SectionID { + seen := map[tree.SectionID]struct{}{} + out := make([]tree.SectionID, 0, len(rawIDs)) + for _, id := range rawIDs { + sid := tree.SectionID(strings.TrimSpace(id)) + if sid == "" { + continue + } + if _, ok := bySectionID[sid]; !ok { + continue + } + if _, dup := seen[sid]; dup { + continue + } + seen[sid] = struct{}{} + out = append(out, sid) + } + return out +} diff --git a/pkg/retrieval/agentic_test.go b/pkg/retrieval/agentic_test.go new file mode 100644 index 0000000..b5226a1 --- /dev/null +++ b/pkg/retrieval/agentic_test.go @@ -0,0 +1,386 @@ +package retrieval_test + +import ( + "context" + "errors" + "strings" + "sync" + "sync/atomic" + "testing" + + "github.com/hallelx2/llmgate" + + "github.com/hallelx2/vectorless-engine/pkg/retrieval" + "github.com/hallelx2/vectorless-engine/pkg/tree" +) + +// scriptedLLM returns a sequence of canned responses, one per Complete +// call, exactly as the agentic loop needs for deterministic tests. If +// the script is exhausted, the LLM returns the loopReply value (used +// by the hop-cap test to simulate a model that never decides to +// terminate). +type scriptedLLM struct { + replies []string + loopReply string + + calls int32 + + mu sync.Mutex + lastPrompts []string +} + +func (s *scriptedLLM) Complete(ctx context.Context, req llmgate.Request) (*llmgate.Response, error) { + i := int(atomic.AddInt32(&s.calls, 1)) - 1 + + // Capture the most recent user message for later assertions. + var userMsg string + for _, msg := range req.Messages { + if msg.Role == llmgate.RoleUser { + userMsg = msg.Content + } + } + s.mu.Lock() + s.lastPrompts = append(s.lastPrompts, userMsg) + s.mu.Unlock() + + if i < len(s.replies) { + return &llmgate.Response{Content: s.replies[i]}, nil + } + if s.loopReply != "" { + return &llmgate.Response{Content: s.loopReply}, nil + } + // Exhausted with no loopReply — surface as an error so the test + // notices it under-scripted instead of silently hanging. + return nil, errors.New("scriptedLLM: replies exhausted") +} + +func (s *scriptedLLM) CountTokens(ctx context.Context, t string) (int, error) { + return len(t) / 4, nil +} + +// mapFetcher is an in-memory ContentFetcher backed by a map. The +// agentic strategy only needs Get; we don't bother modelling errors +// here because the tests use real refs. +type mapFetcher struct{ data map[string]string } + +func (m mapFetcher) Get(ctx context.Context, ref string) ([]byte, error) { + v, ok := m.data[ref] + if !ok { + return nil, errors.New("not found") + } + return []byte(v), nil +} + +// buildAgenticTree constructs a 3-level test tree: +// +// sec_root → [sec_a, sec_b] +// sec_a → [sec_a1 (leaf, ref=a1_ref), sec_a2 (leaf, ref=a2_ref)] +// sec_b → [sec_b1 (leaf, ref=b1_ref)] +// +// Enough depth to exercise expand and read in sequence. +func buildAgenticTree() *tree.Tree { + a1 := &tree.Section{ID: "sec_a1", ParentID: "sec_a", Title: "Install", Summary: "install steps", ContentRef: "a1_ref"} + a2 := &tree.Section{ID: "sec_a2", ParentID: "sec_a", Title: "Config", Summary: "config keys", ContentRef: "a2_ref"} + b1 := &tree.Section{ID: "sec_b1", ParentID: "sec_b", Title: "Querying", Summary: "how to query", ContentRef: "b1_ref"} + a := &tree.Section{ID: "sec_a", ParentID: "sec_root", Title: "Setup", Summary: "setup section", Children: []*tree.Section{a1, a2}} + b := &tree.Section{ID: "sec_b", ParentID: "sec_root", Title: "Usage", Summary: "usage section", Children: []*tree.Section{b1}} + root := &tree.Section{ID: "sec_root", Title: "Atlas", Children: []*tree.Section{a, b}} + return &tree.Tree{DocumentID: "doc_x", Title: "Atlas", Root: root} +} + +func TestAgenticHappyPath(t *testing.T) { + t.Parallel() + + tr := buildAgenticTree() + llm := &scriptedLLM{ + replies: []string{ + `{"action":"expand","section_id":"sec_a"}`, + `{"action":"read","section_id":"sec_a1"}`, + `{"action":"done","picked_ids":["sec_a1"],"reasoning":"install matches the query"}`, + }, + } + fetcher := mapFetcher{data: map[string]string{ + "a1_ref": "Install steps: run vle ingest...", + "a2_ref": "Config keys: VLE_*", + "b1_ref": "How to query the API.", + }} + + s := retrieval.NewAgentic(llm, fetcher) + + res, err := s.SelectWithCost(context.Background(), tr, "how do I install?", retrieval.ContextBudget{MaxTokens: 100000}) + if err != nil { + t.Fatalf("SelectWithCost: %v", err) + } + if len(res.SelectedIDs) != 1 || res.SelectedIDs[0] != "sec_a1" { + t.Errorf("want [sec_a1], got %v", res.SelectedIDs) + } + if res.HopsTaken != 3 { + t.Errorf("want HopsTaken=3, got %d", res.HopsTaken) + } + if res.Reasoning == "" { + t.Errorf("want non-empty reasoning, got %q", res.Reasoning) + } + if res.Usage.LLMCalls != 3 { + t.Errorf("want Usage.LLMCalls=3, got %d", res.Usage.LLMCalls) + } + + // Sanity-check that the read action genuinely materialized the body + // content into the next prompt — the read observation must contain + // the body string the fetcher returned. + llm.mu.Lock() + defer llm.mu.Unlock() + if len(llm.lastPrompts) < 3 { + t.Fatalf("expected 3 prompts captured, got %d", len(llm.lastPrompts)) + } + thirdPrompt := llm.lastPrompts[2] + if !strings.Contains(thirdPrompt, "Install steps: run vle ingest") { + t.Errorf("expected read observation to contain body content, got:\n%s", thirdPrompt) + } +} + +// TestAgenticExpandExpandDone covers the plan's manual trace: when the +// model picks expand → expand → done, the strategy must return the +// IDs from the final done action. This is the "two-level navigation +// without a read" scenario, which is the cheapest happy path. +func TestAgenticExpandExpandDone(t *testing.T) { + t.Parallel() + + tr := buildAgenticTree() + llm := &scriptedLLM{ + replies: []string{ + `{"action":"expand","section_id":"sec_a"}`, + `{"action":"expand","section_id":"sec_b"}`, + `{"action":"done","picked_ids":["sec_a1","sec_b1"]}`, + }, + } + s := retrieval.NewAgentic(llm, mapFetcher{data: map[string]string{}}) + + res, err := s.SelectWithCost(context.Background(), tr, "anything", retrieval.ContextBudget{MaxTokens: 100000}) + if err != nil { + t.Fatalf("SelectWithCost: %v", err) + } + if len(res.SelectedIDs) != 2 { + t.Fatalf("want 2 ids, got %v", res.SelectedIDs) + } + want := map[tree.SectionID]bool{"sec_a1": true, "sec_b1": true} + for _, id := range res.SelectedIDs { + if !want[id] { + t.Errorf("unexpected id %q", id) + } + } + if res.HopsTaken != 3 { + t.Errorf("want HopsTaken=3, got %d", res.HopsTaken) + } +} + +func TestAgenticHopCap(t *testing.T) { + t.Parallel() + + tr := buildAgenticTree() + // Never emits done. Strategy should bail at MaxHops. + llm := &scriptedLLM{ + loopReply: `{"action":"expand","section_id":"sec_a"}`, + } + s := retrieval.NewAgentic(llm, mapFetcher{data: map[string]string{}}) + s.MaxHops = 4 + + res, err := s.SelectWithCost(context.Background(), tr, "q", retrieval.ContextBudget{MaxTokens: 100000}) + if err != nil { + t.Fatalf("SelectWithCost: %v", err) + } + if res.HopsTaken != 4 { + t.Errorf("want HopsTaken=4 (capped), got %d", res.HopsTaken) + } + // No done means no final picks. + if len(res.SelectedIDs) != 0 { + t.Errorf("want empty SelectedIDs on cap, got %v", res.SelectedIDs) + } + if got := atomic.LoadInt32(&llm.calls); got != 4 { + t.Errorf("want 4 LLM calls (MaxHops), got %d", got) + } + if res.Usage.LLMCalls != 4 { + t.Errorf("want Usage.LLMCalls=4, got %d", res.Usage.LLMCalls) + } +} + +// TestAgenticBadJSONGraceful covers the JSON-mode-blip path: a +// response that isn't valid JSON must not 500 the query. The first +// turn parses as garbage; we expect the strategy to nudge the model +// with a retry prompt and then bail when subsequent turns also fail. +// Final outcome: nil error, empty selection. +func TestAgenticBadJSONGraceful(t *testing.T) { + t.Parallel() + + tr := buildAgenticTree() + llm := &scriptedLLM{ + // Every reply is prose, never JSON. The loop must consume MaxHops + // turns and return cleanly. + loopReply: "I think it's sec_a1, that's where install lives.", + } + s := retrieval.NewAgentic(llm, mapFetcher{data: map[string]string{}}) + s.MaxHops = 3 + + res, err := s.SelectWithCost(context.Background(), tr, "q", retrieval.ContextBudget{MaxTokens: 100000}) + if err != nil { + t.Fatalf("want nil error on persistent parse failure, got %v", err) + } + if len(res.SelectedIDs) != 0 { + t.Errorf("want empty selection on parse failure, got %v", res.SelectedIDs) + } + if res.HopsTaken != 3 { + t.Errorf("want HopsTaken=3 (capped), got %d", res.HopsTaken) + } +} + +// TestAgenticFiltersUnknownPicks mirrors single-pass: if the model +// invents IDs not present in the tree, they must be dropped. +func TestAgenticFiltersUnknownPicks(t *testing.T) { + t.Parallel() + + tr := buildAgenticTree() + llm := &scriptedLLM{ + replies: []string{ + `{"action":"done","picked_ids":["sec_a1","sec_fake","sec_a1"]}`, + }, + } + s := retrieval.NewAgentic(llm, mapFetcher{data: map[string]string{}}) + + res, err := s.SelectWithCost(context.Background(), tr, "q", retrieval.ContextBudget{MaxTokens: 100000}) + if err != nil { + t.Fatal(err) + } + if len(res.SelectedIDs) != 1 || res.SelectedIDs[0] != "sec_a1" { + t.Errorf("want [sec_a1] after filter+dedup, got %v", res.SelectedIDs) + } + if res.HopsTaken != 1 { + t.Errorf("want HopsTaken=1, got %d", res.HopsTaken) + } +} + +// TestAgenticEmptyTree exercises the early-return guard so callers +// don't pay an LLM hop for a degenerate input. +func TestAgenticEmptyTree(t *testing.T) { + t.Parallel() + + llm := &scriptedLLM{} + s := retrieval.NewAgentic(llm, mapFetcher{data: map[string]string{}}) + + res, err := s.SelectWithCost(context.Background(), &tree.Tree{}, "q", retrieval.ContextBudget{}) + if err != nil { + t.Fatal(err) + } + if len(res.SelectedIDs) != 0 { + t.Errorf("want empty selection on empty tree, got %v", res.SelectedIDs) + } + if atomic.LoadInt32(&llm.calls) != 0 { + t.Errorf("want 0 LLM calls on empty tree, got %d", llm.calls) + } +} + +// TestAgenticReadFallbackWhenNoContent verifies a 'read' on an +// internal section (no ContentRef) falls back to the summary rather +// than erroring out — the model should still get useful signal. +func TestAgenticReadFallbackWhenNoContent(t *testing.T) { + t.Parallel() + + tr := buildAgenticTree() + llm := &scriptedLLM{ + replies: []string{ + // Read on an internal section (sec_a has no ContentRef). + `{"action":"read","section_id":"sec_a"}`, + `{"action":"done","picked_ids":["sec_a1"]}`, + }, + } + s := retrieval.NewAgentic(llm, mapFetcher{data: map[string]string{}}) + + res, err := s.SelectWithCost(context.Background(), tr, "q", retrieval.ContextBudget{MaxTokens: 100000}) + if err != nil { + t.Fatal(err) + } + if len(res.SelectedIDs) != 1 || res.SelectedIDs[0] != "sec_a1" { + t.Errorf("want [sec_a1], got %v", res.SelectedIDs) + } + + // The observation for the read turn should contain "summary" — the + // fallback path — not a fetcher error. + llm.mu.Lock() + defer llm.mu.Unlock() + if len(llm.lastPrompts) < 2 { + t.Fatalf("expected 2 prompts captured, got %d", len(llm.lastPrompts)) + } + if !strings.Contains(llm.lastPrompts[1], "summary") { + t.Errorf("expected summary fallback in read observation, got:\n%s", llm.lastPrompts[1]) + } +} + +func TestParseAction(t *testing.T) { + t.Parallel() + cases := []struct { + name string + in string + want retrieval.Action + hasErr bool + }{ + { + name: "expand", + in: `{"action":"expand","section_id":"sec_a"}`, + want: retrieval.Action{Action: "expand", SectionID: "sec_a"}, + }, + { + name: "outline_with_level", + in: `{"action":"outline","level":3}`, + want: retrieval.Action{Action: "outline", Level: 3}, + }, + { + name: "done_with_picks", + in: `{"action":"done","picked_ids":["a","b"],"reasoning":"why"}`, + want: retrieval.Action{Action: "done", PickedIDs: []string{"a", "b"}, Reasoning: "why"}, + }, + { + name: "code_fence", + in: "```json\n{\"action\":\"done\",\"picked_ids\":[\"x\"]}\n```", + want: retrieval.Action{Action: "done", PickedIDs: []string{"x"}}, + }, + { + name: "prose_before", + in: `Sure: {"action":"expand","section_id":"sec_a"}`, + want: retrieval.Action{Action: "expand", SectionID: "sec_a"}, + }, + { + name: "garbage", + in: "I think it's sec_a", + hasErr: true, + }, + { + name: "empty", + in: "", + hasErr: true, + }, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + got, err := retrieval.ParseAction(c.in) + if c.hasErr { + if err == nil { + t.Fatalf("want error, got %+v", got) + } + return + } + if err != nil { + t.Fatalf("parse: %v", err) + } + if got.Action != c.want.Action { + t.Errorf("Action: got %q want %q", got.Action, c.want.Action) + } + if got.SectionID != c.want.SectionID { + t.Errorf("SectionID: got %q want %q", got.SectionID, c.want.SectionID) + } + if got.Level != c.want.Level { + t.Errorf("Level: got %d want %d", got.Level, c.want.Level) + } + if len(got.PickedIDs) != len(c.want.PickedIDs) { + t.Errorf("PickedIDs: got %v want %v", got.PickedIDs, c.want.PickedIDs) + } + }) + } +} diff --git a/pkg/retrieval/chunked_tree.go b/pkg/retrieval/chunked_tree.go index 866c2c0..716e47a 100644 --- a/pkg/retrieval/chunked_tree.go +++ b/pkg/retrieval/chunked_tree.go @@ -112,6 +112,7 @@ func (c *ChunkedTree) SelectWithCost(ctx context.Context, t *tree.Tree, query st return &Result{ SelectedIDs: c.Merge.Merge(allIDs), Usage: totalUsage, + HopsTaken: 1, }, nil } diff --git a/pkg/retrieval/single_pass.go b/pkg/retrieval/single_pass.go index 7367ce1..cb9a43e 100644 --- a/pkg/retrieval/single_pass.go +++ b/pkg/retrieval/single_pass.go @@ -69,6 +69,7 @@ func (s *SinglePass) SelectWithCost(ctx context.Context, t *tree.Tree, query str SelectedIDs: FilterKnownIDs(ids, view.Sections), ModelUsed: model, Usage: usage, + HopsTaken: 1, }, nil } diff --git a/pkg/retrieval/strategy.go b/pkg/retrieval/strategy.go index 5900f3b..29dd0dc 100644 --- a/pkg/retrieval/strategy.go +++ b/pkg/retrieval/strategy.go @@ -60,10 +60,16 @@ func (b ContextBudget) Available() int { // Result is returned to the API layer. It includes not just IDs but the // reasoning trace and cost accounting when the strategy supports it. type Result struct { - SelectedIDs []tree.SectionID - Reasoning string - ModelUsed string - Usage Usage + SelectedIDs []tree.SectionID `json:"selected_ids"` + Reasoning string `json:"reasoning,omitempty"` + ModelUsed string `json:"model_used,omitempty"` + Usage Usage `json:"usage"` + + // HopsTaken is the number of LLM turns the strategy issued to reach the + // final selection. Single-shot strategies set this to 1; iterative + // strategies (e.g. agentic) set it to the number of tool-using turns + // actually consumed, including the terminal "done" turn. + HopsTaken int `json:"hops_taken,omitempty"` } // Usage is the aggregated token + cost accounting across all LLM calls