Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 25 additions & 2 deletions cmd/engine/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"errors"
"flag"
"fmt"
"io"
"log/slog"
"net/http"
"os"
Expand Down Expand Up @@ -87,7 +88,7 @@ func run() error {
if err != nil {
return fmt.Errorf("init llm: %w", err)
}
strategy := buildStrategy(cfg.Retrieval, llmClient)
strategy := buildStrategy(cfg.Retrieval, llmClient, store)

// Wrap with caching if enabled.
if cfg.Retrieval.Cache.Enabled {
Expand Down Expand Up @@ -252,17 +253,39 @@ func buildLLM(c config.LLMConfig) (llmgate.Client, error) {
}
}

func buildStrategy(c config.RetrievalConfig, client llmgate.Client) retrieval.Strategy {
func buildStrategy(c config.RetrievalConfig, client llmgate.Client, store storage.Storage) retrieval.Strategy {
switch c.Strategy {
case "single-pass":
return retrieval.NewSinglePass(client)
case "chunked-tree":
return retrieval.NewChunkedTree(client)
case "agentic":
a := retrieval.NewAgentic(client, storageFetcher{s: store})
if c.Agentic.MaxHops > 0 {
a.MaxHops = c.Agentic.MaxHops
}
a.ModelOverride = c.Agentic.Model
return a
default:
return retrieval.NewChunkedTree(client)
}
}

// storageFetcher adapts a storage.Storage to retrieval.ContentFetcher.
// The agentic strategy reads section bodies one at a time, so we
// materialize the full reader contents into a []byte here rather than
// streaming — section bodies are typically a few KB.
type storageFetcher struct{ s storage.Storage }

func (sf storageFetcher) Get(ctx context.Context, ref string) ([]byte, error) {
Comment on lines +274 to +280
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: Factor storageFetcher into a shared helper to avoid duplication between engine and server binaries

The storageFetcher type and its Get method are duplicated here and in cmd/server/main.go. To avoid divergence if retrieval semantics change (compression, limits, metrics, etc.), extract this adapter into a shared package (or the retrieval package) and use it from both binaries.

Suggested implementation:

	fetcher := retrieval.NewStorageFetcher(store)
	strategy := buildStrategy(cfg.Retrieval, llmClient, fetcher)

To fully implement the refactor and remove duplication between the engine and server binaries, you should also:

  1. Extract the storageFetcher adapter into a shared helper, for example in the retrieval package:

    • Add something like:
      // In package retrieval
      type storageFetcher struct{ s storage.Storage }
      
      func NewStorageFetcher(s storage.Storage) ContentFetcher {
          return storageFetcher{s: s}
      }
      
      func (sf storageFetcher) Get(ctx context.Context, ref string) ([]byte, error) {
          rc, _, err := sf.s.Get(ctx, ref)
          if err != nil {
              return nil, err
          }
          defer rc.Close()
      
          data, err := io.ReadAll(rc)
          if err != nil {
              return nil, err
          }
          return data, nil
      }
    • Adjust types/imports to match the existing retrieval and storage packages.
  2. In cmd/engine/main.go, remove the now-duplicated local storageFetcher type and its Get method entirely.

  3. In cmd/server/main.go, replace its local storageFetcher implementation and usage with the shared helper:

    • Delete the duplicate type/method.
    • Change any call sites to use retrieval.NewStorageFetcher(store) (or whatever package name you choose).
  4. Ensure both binaries import the package where NewStorageFetcher lives and that buildStrategy (and any other consumers) accept a retrieval.ContentFetcher rather than storage.Storage directly, if they don't already.

rc, _, err := sf.s.Get(ctx, ref)
if err != nil {
return nil, err
}
defer rc.Close()
return io.ReadAll(rc)
}
Comment on lines +274 to +287

// buildTLSConfig returns a *tls.Config when direct TLS is enabled, or nil
// when the engine should serve plaintext (behind a proxy). Returning nil
// leaves http.Server's TLSConfig unset, which is exactly what ListenAndServe
Expand Down
27 changes: 25 additions & 2 deletions cmd/server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"errors"
"flag"
"fmt"
"io"
"log/slog"
"net/http"
"os"
Expand Down Expand Up @@ -131,7 +132,7 @@ func run() error {
if err != nil {
return fmt.Errorf("init llm: %w", err)
}
strategy := buildStrategy(cfg.Engine.Retrieval, llmClient)
strategy := buildStrategy(cfg.Engine.Retrieval, llmClient, store)

// Wrap with caching if enabled in engine config.
if cfg.Engine.Retrieval.Cache.Enabled {
Expand Down Expand Up @@ -328,17 +329,39 @@ func buildLLM(c enginecfg.LLMConfig) (llmgate.Client, error) {
}
}

func buildStrategy(c enginecfg.RetrievalConfig, client llmgate.Client) retrieval.Strategy {
func buildStrategy(c enginecfg.RetrievalConfig, client llmgate.Client, store storage.Storage) retrieval.Strategy {
switch c.Strategy {
case "single-pass":
return retrieval.NewSinglePass(client)
case "chunked-tree":
return retrieval.NewChunkedTree(client)
case "agentic":
a := retrieval.NewAgentic(client, storageFetcher{s: store})
if c.Agentic.MaxHops > 0 {
a.MaxHops = c.Agentic.MaxHops
}
a.ModelOverride = c.Agentic.Model
return a
default:
return retrieval.NewChunkedTree(client)
}
}

// storageFetcher adapts a storage.Storage to retrieval.ContentFetcher.
// The agentic strategy reads section bodies one at a time, so we
// materialize the full reader contents into a []byte here rather than
// streaming — section bodies are typically a few KB.
type storageFetcher struct{ s storage.Storage }

func (sf storageFetcher) Get(ctx context.Context, ref string) ([]byte, error) {
rc, _, err := sf.s.Get(ctx, ref)
if err != nil {
return nil, err
}
defer rc.Close()
return io.ReadAll(rc)
}
Comment on lines +350 to +363

func buildTLSConfig(c config.TLSConfig) *tls.Config {
if !c.Enabled() {
return nil
Expand Down
35 changes: 34 additions & 1 deletion pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ type GeminiBlock struct {
type RetrievalConfig struct {
Strategy string `yaml:"strategy"`
ChunkedTree ChunkedTreeBlock `yaml:"chunked_tree"`
Agentic AgenticBlock `yaml:"agentic"`
Cache CacheBlock `yaml:"cache"`
}

Expand All @@ -212,6 +213,23 @@ type ChunkedTreeBlock struct {
IncludeSiblingBreadcrumb bool `yaml:"include_sibling_breadcrumbs"`
}

// AgenticBlock configures the agentic-navigation strategy.
//
// The agentic loop trades sequential latency for the ability to handle
// arbitrarily large trees: the model issues outline/expand/read actions
// until it picks a final set of section IDs or hits MaxHops.
type AgenticBlock struct {
// MaxHops caps the number of LLM turns one query consumes, counting
// the terminal "done" turn. Default: 6.
MaxHops int `yaml:"max_hops"`

// Model optionally overrides the budget's model for navigation
// turns. Empty means use the budget's model. Useful when the
// retrieval engine wants the navigation loop on a fast/cheap
// model while answering is on a stronger one.
Model string `yaml:"model"`
}

// LogConfig configures logging.
type LogConfig struct {
Level string `yaml:"level"`
Expand Down Expand Up @@ -244,6 +262,9 @@ func Default() Config {
MaxParallelCalls: 8,
IncludeSiblingBreadcrumb: true,
},
Agentic: AgenticBlock{
MaxHops: 6,
},
Cache: CacheBlock{
Enabled: true,
MaxEntries: 1024,
Expand Down Expand Up @@ -352,6 +373,14 @@ func applyEnvOverrides(c *Config) {
if v := os.Getenv("VLE_TLS_KEY_FILE"); v != "" {
c.Server.TLS.KeyFile = v
}
if v := os.Getenv("VLE_RETRIEVAL_AGENTIC_MAX_HOPS"); v != "" {
if n, err := strconv.Atoi(v); err == nil && n >= 0 {
c.Retrieval.Agentic.MaxHops = n
}
}
if v := os.Getenv("VLE_RETRIEVAL_AGENTIC_MODEL"); v != "" {
c.Retrieval.Agentic.Model = v
}
// Ingest / HyDE knobs. Booleans accept the usual truthy strings —
// kept narrow so a typo doesn't silently flip the flag.
if v := os.Getenv("VLE_INGEST_HYDE_ENABLED"); v != "" {
Expand Down Expand Up @@ -428,11 +457,15 @@ func (c Config) Validate() error {
}

switch c.Retrieval.Strategy {
case "single-pass", "chunked-tree":
case "single-pass", "chunked-tree", "agentic":
default:
return fmt.Errorf("unknown retrieval.strategy: %q", c.Retrieval.Strategy)
}

if c.Retrieval.Agentic.MaxHops < 0 {
return fmt.Errorf("retrieval.agentic.max_hops must be >= 0, got %d", c.Retrieval.Agentic.MaxHops)
}

if c.Server.TLS.CertFile != "" && c.Server.TLS.KeyFile == "" {
return errors.New("server.tls.key_file is required when cert_file is set")
}
Expand Down
Loading
Loading