From 56406d6553c654dfd53ba1195b9d2b176e128c28 Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Tue, 26 May 2026 13:28:47 +0100
Subject: [PATCH 01/34] refactor(embeddings): introduce pluggable provider
 abstraction
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add provider.Provider interface and a Factory/registry under
internal/embeddings/provider. Move the existing llama-server sidecar
into provider/ollama as the first implementation. Add provider/openai
(generic /v1/embeddings) and provider/voyage (api.voyageai.com with
input_type, output_dimension, and int8 dequantization). Rewrite
embeddings.Service to delegate to the active Provider while keeping
the queue/backpressure machinery centralized.

EmbeddingModel() now returns Provider.ID() — a fingerprint of the form
"{kind}:{model}[:{dim}][:{dtype}]" — so the existing per-project
drift check in repojobs transparently treats a provider switch as a
model change and forces a full reindex on the next clone job.

Behaviour unchanged for default ollama deployments; the openai and
voyage providers are registered but not yet selectable through any
admin surface. Tests are green.
---
 .../internal/embeddings/provider/factory.go   | 133 ++++
 .../{ => provider/ollama}/bootstrap_test.go   |   2 +-
 .../{ => provider/ollama}/client.go           |   2 +-
 .../embeddings/provider/ollama/factory.go     | 104 +++
 .../embeddings/provider/ollama/gguf.go        | 164 +++++
 .../{ => provider/ollama}/hfdownload.go       |   2 +-
 .../{ => provider/ollama}/logwriter.go        |   2 +-
 .../{ => provider/ollama}/prefix.go           |   2 +-
 .../{ => provider/ollama}/prefix_test.go      |   2 +-
 .../embeddings/provider/ollama/provider.go    | 384 ++++++++++
 .../{ => provider/ollama}/supervisor.go       |   8 +-
 .../embeddings/provider/openai/factory.go     |  57 ++
 .../embeddings/provider/openai/openai.go      | 228 ++++++
 .../internal/embeddings/provider/provider.go  | 182 +++++
 .../embeddings/provider/voyage/factory.go     |  70 ++
 .../embeddings/provider/voyage/voyage.go      | 295 ++++++++
 server/internal/embeddings/service.go         | 656 ++++++------------
 server/internal/httpapi/admin_server.go       |   6 +-
 18 files changed, 1849 insertions(+), 450 deletions(-)
 create mode 100644 server/internal/embeddings/provider/factory.go
 rename server/internal/embeddings/{ => provider/ollama}/bootstrap_test.go (99%)
 rename server/internal/embeddings/{ => provider/ollama}/client.go (99%)
 create mode 100644 server/internal/embeddings/provider/ollama/factory.go
 create mode 100644 server/internal/embeddings/provider/ollama/gguf.go
 rename server/internal/embeddings/{ => provider/ollama}/hfdownload.go (99%)
 rename server/internal/embeddings/{ => provider/ollama}/logwriter.go (98%)
 rename server/internal/embeddings/{ => provider/ollama}/prefix.go (98%)
 rename server/internal/embeddings/{ => provider/ollama}/prefix_test.go (99%)
 create mode 100644 server/internal/embeddings/provider/ollama/provider.go
 rename server/internal/embeddings/{ => provider/ollama}/supervisor.go (99%)
 create mode 100644 server/internal/embeddings/provider/openai/factory.go
 create mode 100644 server/internal/embeddings/provider/openai/openai.go
 create mode 100644 server/internal/embeddings/provider/provider.go
 create mode 100644 server/internal/embeddings/provider/voyage/factory.go
 create mode 100644 server/internal/embeddings/provider/voyage/voyage.go

diff --git a/server/internal/embeddings/provider/factory.go b/server/internal/embeddings/provider/factory.go
new file mode 100644
index 0000000..4f2e179
--- /dev/null
+++ b/server/internal/embeddings/provider/factory.go
@@ -0,0 +1,133 @@
+package provider
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"sort"
+	"sync"
+)
+
+// Factory constructs Providers of a given kind from a typed Config.
+// One Factory per kind; registered at init time by each provider
+// sub-package via Register. The Service builds the active provider
+// by calling Build on the factory matching the persisted kind.
+type Factory interface {
+	// Kind matches one of the Kind* constants in provider.go.
+	Kind() string
+
+	// SchemaJSON returns a JSON-encoded ConfigSchema describing the
+	// fields this provider accepts. The dashboard reads this to know
+	// which input controls to render in the provider-specific form.
+	// Returned bytes must be deterministic across calls so the API
+	// response is cacheable.
+	SchemaJSON() []byte
+
+	// SecretEnvVars lists the env-var names this provider consults
+	// for credentials. Order is irrelevant. Used by the admin
+	// GET /embedding-providers endpoint to tell the dashboard which
+	// keys are set in the environment so it can render a "set
+	// CIX_VOYAGE_API_KEY before saving" banner when the operator
+	// configures a provider whose key is missing.
+	SecretEnvVars() []string
+
+	// Build constructs a concrete Provider from a JSON-encoded config
+	// blob (shape matches the provider's ConfigSchema). secrets is
+	// used by HTTP-only providers to resolve API-key env-var names;
+	// ollama's factory ignores it. logger may be nil; implementations
+	// fall back to slog.Default().
+	//
+	// Build does NOT start the provider — call Provider.Start
+	// separately so the caller can decide whether a failed Start
+	// rolls back to the previous active provider.
+	Build(cfg []byte, secrets SecretLookup, logger *slog.Logger) (Provider, error)
+}
+
+// ConfigSchema is the dashboard-facing description of a provider's
+// config form. Hardcoded React components in the dashboard ignore
+// this in favour of typed forms, but the JSON is still exposed via
+// /admin/embedding-providers so external tooling (curl, ad-hoc
+// admin scripts) has a contract to read.
+type ConfigSchema struct {
+	Fields []ConfigField `json:"fields"`
+}
+
+// ConfigField describes one input control in a provider config form.
+// Kind drives the widget; the dashboard's hardcoded forms map field
+// Name → input.
+type ConfigField struct {
+	Name        string   `json:"name"`
+	Label       string   `json:"label"`
+	Kind        string   `json:"kind"` // "string" | "int" | "bool" | "enum" | "secret-env"
+	Required    bool     `json:"required,omitempty"`
+	Default     any      `json:"default,omitempty"`
+	Enum        []string `json:"enum,omitempty"`
+	Description string   `json:"description,omitempty"`
+}
+
+var (
+	registryMu sync.RWMutex
+	registry   = map[string]Factory{}
+)
+
+// Register adds a Factory to the global registry. Called from each
+// provider sub-package's init(). Panics if the kind is already
+// registered — that indicates a programmer error (duplicate
+// init) rather than something an operator can recover from.
+func Register(f Factory) {
+	if f == nil {
+		panic("provider.Register: nil factory")
+	}
+	kind := f.Kind()
+	if kind == "" {
+		panic("provider.Register: factory returned empty Kind")
+	}
+	registryMu.Lock()
+	defer registryMu.Unlock()
+	if _, exists := registry[kind]; exists {
+		panic(fmt.Sprintf("provider.Register: kind %q already registered", kind))
+	}
+	registry[kind] = f
+}
+
+// Lookup returns the Factory registered for kind, or (nil, false).
+func Lookup(kind string) (Factory, bool) {
+	registryMu.RLock()
+	defer registryMu.RUnlock()
+	f, ok := registry[kind]
+	return f, ok
+}
+
+// Kinds returns the registered kinds in deterministic order, useful
+// for the admin /embedding-providers list endpoint.
+func Kinds() []string {
+	registryMu.RLock()
+	defer registryMu.RUnlock()
+	kinds := make([]string, 0, len(registry))
+	for k := range registry {
+		kinds = append(kinds, k)
+	}
+	sort.Strings(kinds)
+	return kinds
+}
+
+// Build constructs a provider by kind. Convenience wrapper around
+// Lookup + Factory.Build that returns a clear error when the kind
+// is not registered (so callers don't have to fmt.Errorf at each
+// site).
+func Build(ctx context.Context, kind string, cfg []byte, secrets SecretLookup, logger *slog.Logger) (Provider, error) {
+	_ = ctx // reserved for future per-build cancellation; Build itself is fast
+	f, ok := Lookup(kind)
+	if !ok {
+		return nil, fmt.Errorf("provider %q is not registered", kind)
+	}
+	return f.Build(cfg, secrets, logger)
+}
+
+// EnvSecrets returns a SecretLookup that reads from os.LookupEnv.
+// This is the production wiring; tests pass their own SecretLookup
+// returning fixed values so the test does not have to set env vars
+// in the process.
+func EnvSecrets(lookup func(key string) (string, bool)) SecretLookup {
+	return SecretLookup(lookup)
+}
diff --git a/server/internal/embeddings/bootstrap_test.go b/server/internal/embeddings/provider/ollama/bootstrap_test.go
similarity index 99%
rename from server/internal/embeddings/bootstrap_test.go
rename to server/internal/embeddings/provider/ollama/bootstrap_test.go
index 455df72..46db148 100644
--- a/server/internal/embeddings/bootstrap_test.go
+++ b/server/internal/embeddings/provider/ollama/bootstrap_test.go
@@ -1,4 +1,4 @@
-package embeddings
+package ollama
 
 import (
 	"io/fs"
diff --git a/server/internal/embeddings/client.go b/server/internal/embeddings/provider/ollama/client.go
similarity index 99%
rename from server/internal/embeddings/client.go
rename to server/internal/embeddings/provider/ollama/client.go
index bbcf9b8..4097ddb 100644
--- a/server/internal/embeddings/client.go
+++ b/server/internal/embeddings/provider/ollama/client.go
@@ -1,4 +1,4 @@
-package embeddings
+package ollama
 
 import (
 	"bytes"
diff --git a/server/internal/embeddings/provider/ollama/factory.go b/server/internal/embeddings/provider/ollama/factory.go
new file mode 100644
index 0000000..9e15a6d
--- /dev/null
+++ b/server/internal/embeddings/provider/ollama/factory.go
@@ -0,0 +1,104 @@
+package ollama
+
+import (
+	"encoding/json"
+	"fmt"
+	"log/slog"
+
+	"github.com/dvcdsys/code-index/server/internal/embeddings/provider"
+)
+
+// factory is the provider.Factory implementation for ollama. The
+// init() block below registers it in the global registry so the
+// Service can build an ollama provider purely by kind string.
+type factory struct{}
+
+func (factory) Kind() string { return provider.KindOllama }
+
+// SchemaJSON describes the editable ollama fields for the dashboard.
+// Sidecar-internal knobs (BinDir, SocketPath, Transport, CacheDir,
+// BootstrapPath) are populated from env at bootstrap and are not
+// surfaced for runtime edit — they are deployment-level concerns.
+func (factory) SchemaJSON() []byte {
+	schema := provider.ConfigSchema{
+		Fields: []provider.ConfigField{
+			{
+				Name:        "model",
+				Label:       "Embedding model",
+				Kind:        "string",
+				Required:    true,
+				Description: "HuggingFace repo id (owner/repo) or an absolute path to a .gguf file.",
+			},
+			{
+				Name:        "gguf_path",
+				Label:       "GGUF path override",
+				Kind:        "string",
+				Description: "Optional absolute path that takes precedence over Model (CIX_GGUF_PATH).",
+			},
+			{
+				Name:    "ctx_size",
+				Label:   "Context size",
+				Kind:    "int",
+				Default: 2048,
+			},
+			{
+				Name:        "n_gpu_layers",
+				Label:       "GPU layers",
+				Kind:        "int",
+				Description: "-1 = all on Metal, 0 = CPU only.",
+			},
+			{
+				Name:        "n_threads",
+				Label:       "Threads",
+				Kind:        "int",
+				Description: "0 = let llama-server auto-detect.",
+			},
+			{
+				Name:        "batch_size",
+				Label:       "Batch size",
+				Kind:        "int",
+				Description: "0 = match context size.",
+			},
+		},
+	}
+	// Schema is small and stable; build once per call so the registry
+	// doesn't have to hold long-lived buffers.
+	b, _ := json.Marshal(schema)
+	return b
+}
+
+// SecretEnvVars: ollama has no remote API key.
+func (factory) SecretEnvVars() []string { return nil }
+
+// Build unmarshals cfg into Config and constructs a Provider. Does
+// not call Start; the caller (Service.SwitchProvider) sequences the
+// start so it can roll back to the previous provider on failure.
+func (factory) Build(cfg []byte, _ provider.SecretLookup, logger *slog.Logger) (provider.Provider, error) {
+	if logger == nil {
+		logger = slog.Default()
+	}
+	if len(cfg) == 0 {
+		return nil, fmt.Errorf("ollama: empty config")
+	}
+	var c Config
+	if err := json.Unmarshal(cfg, &c); err != nil {
+		return nil, fmt.Errorf("ollama: unmarshal config: %w", err)
+	}
+	if c.Model == "" {
+		return nil, fmt.Errorf("ollama: model is required")
+	}
+	if c.Transport == "" {
+		c.Transport = "unix"
+	}
+	if c.CtxSize == 0 {
+		c.CtxSize = 2048
+	}
+	if c.StartupSec == 0 {
+		c.StartupSec = 60
+	}
+	return New(c, logger), nil
+}
+
+func init() {
+	provider.Register(factory{})
+}
diff --git a/server/internal/embeddings/provider/ollama/gguf.go b/server/internal/embeddings/provider/ollama/gguf.go
new file mode 100644
index 0000000..14e0cd8
--- /dev/null
+++ b/server/internal/embeddings/provider/ollama/gguf.go
@@ -0,0 +1,164 @@
+package ollama
+
+import (
+	"context"
+	"fmt"
+	"io"
+	"log/slog"
+	"os"
+	"path/filepath"
+	"strings"
+)
+
+// GGUFInputs bundles the env/config values needed to locate (or
+// download) the GGUF weights for the llama-server child. Service
+// extracts them from *config.Config so the ollama package stays free
+// of the config dependency.
+type GGUFInputs struct {
+	GGUFPath          string // CIX_GGUF_PATH absolute override
+	Model             string // HF repo id ("owner/repo") or absolute path
+	CacheDir          string // base dir under which downloaded GGUFs live
+	BootstrapPath     string // CIX_BOOTSTRAP_GGUF_PATH one-shot import source
+}
+
+// ResolveGGUFPath walks the precedence chain:
+//  1. in.GGUFPath (absolute path env override, validated by Stat).
+//  2. in.Model as absolute path — when the dashboard's "Local path"
+//     mode wrote a filesystem path through the runtime_settings row.
+//  3. Cached file under in.CacheDir/<safe-repo>/*.gguf when in.Model
+//     is an HF repo ID.
+//  4. in.BootstrapPath one-shot import — copies the file into the
+//     cache layout, then behaves like step 3 forever after.
+//  5. HuggingFace download into the same cache (only step that
+//     actually writes to disk).
+func ResolveGGUFPath(ctx context.Context, in GGUFInputs, logger *slog.Logger) (string, error) {
+	if in.GGUFPath != "" {
+		if _, err := os.Stat(in.GGUFPath); err != nil {
+			return "", fmt.Errorf("CIX_GGUF_PATH=%s: %w", in.GGUFPath, err)
+		}
+		return in.GGUFPath, nil
+	}
+	if filepath.IsAbs(in.Model) {
+		if _, err := os.Stat(in.Model); err != nil {
+			return "", fmt.Errorf("embedding model path %s: %w", in.Model, err)
+		}
+		return in.Model, nil
+	}
+	if !strings.Contains(in.Model, "/") {
+		return "", fmt.Errorf("embedding model %q is neither an absolute path nor an HF repo id (owner/repo)", in.Model)
+	}
+
+	if cached := findCachedGGUF(in.CacheDir, in.Model); cached != "" {
+		logger.Info("using cached gguf", "path", cached)
+		return cached, nil
+	}
+
+	// CIX_BOOTSTRAP_GGUF_PATH — one-time import. Idempotent across
+	// boots: subsequent boots find the imported file via findCachedGGUF
+	// above and skip this branch entirely.
+	if in.BootstrapPath != "" {
+		imported, err := importBootstrapGGUF(in.CacheDir, in.Model, in.BootstrapPath, logger)
+		if err != nil {
+			logger.Warn("bootstrap gguf import failed; falling through to HF download",
+				"src", in.BootstrapPath, "err", err)
+		} else if imported != "" {
+			return imported, nil
+		}
+	}
+
+	return DownloadGGUF(ctx, in.Model, in.CacheDir, logger)
+}
+
+// importBootstrapGGUF copies srcPath into <cacheDir>/<safe_repo>/<basename>
+// atomically (write to .partial, fsync, rename). Returns the final path
+// on success, "" if the source is missing (caller falls through to HF
+// download), or an error for IO problems we should surface to the operator.
+//
+// safe_repo derived from the HF repo id (`owner/repo` → `owner__repo`)
+// to match DownloadGGUF's layout exactly — so subsequent boots' cache
+// scan finds the imported file under the same name HF would have used.
+func importBootstrapGGUF(cacheDir, repo, srcPath string, logger *slog.Logger) (string, error) {
+	if cacheDir == "" || repo == "" {
+		return "", nil
+	}
+	srcInfo, err := os.Stat(srcPath)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return "", nil
+		}
+		return "", fmt.Errorf("stat bootstrap gguf %s: %w", srcPath, err)
+	}
+	if srcInfo.IsDir() {
+		return "", fmt.Errorf("bootstrap gguf %s is a directory, expected file", srcPath)
+	}
+
+	safeRepo := strings.ReplaceAll(repo, "/", "__")
+	targetDir := filepath.Join(cacheDir, safeRepo)
+	if err := os.MkdirAll(targetDir, 0o755); err != nil {
+		return "", fmt.Errorf("mkdir cache dir: %w", err)
+	}
+	finalPath := filepath.Join(targetDir, filepath.Base(srcPath))
+
+	if _, err := os.Stat(finalPath); err == nil {
+		return finalPath, nil
+	}
+
+	logger.Info("importing bootstrap gguf into cache",
+		"src", srcPath, "dst", finalPath, "size", srcInfo.Size())
+
+	src, err := os.Open(srcPath)
+	if err != nil {
+		return "", fmt.Errorf("open bootstrap gguf: %w", err)
+	}
+	defer src.Close()
+
+	partial := finalPath + ".partial"
+	dst, err := os.OpenFile(partial, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o644)
+	if err != nil {
+		return "", fmt.Errorf("create cache target: %w", err)
+	}
+
+	if _, err := io.Copy(dst, src); err != nil {
+		_ = dst.Close()
+		_ = os.Remove(partial)
+		return "", fmt.Errorf("copy bootstrap gguf: %w", err)
+	}
+	if err := dst.Sync(); err != nil {
+		_ = dst.Close()
+		_ = os.Remove(partial)
+		return "", fmt.Errorf("fsync bootstrap gguf: %w", err)
+	}
+	if err := dst.Close(); err != nil {
+		_ = os.Remove(partial)
+		return "", fmt.Errorf("close bootstrap gguf: %w", err)
+	}
+	if err := os.Rename(partial, finalPath); err != nil {
+		_ = os.Remove(partial)
+		return "", fmt.Errorf("atomic rename bootstrap gguf: %w", err)
+	}
+	logger.Info("bootstrap gguf imported", "path", finalPath)
+	return finalPath, nil
+}
+
+// findCachedGGUF looks for a previously-downloaded .gguf under the
+// standard cache layout produced by DownloadGGUF. Returns "" on any
+// miss (including IO errors) so the caller proceeds to the download
+// path.
+func findCachedGGUF(cacheDir, repo string) string {
+	safeRepo := strings.ReplaceAll(repo, "/", "__")
+	dir := cacheDir + string(os.PathSeparator) + safeRepo
+	entries, err := os.ReadDir(dir)
+	if err != nil {
+		return ""
+	}
+	for _, e := range entries {
+		if e.IsDir() {
+			continue
+		}
+		name := e.Name()
+		if len(name) > 5 && strings.EqualFold(name[len(name)-5:], ".gguf") {
+			return dir + string(os.PathSeparator) + name
+		}
+	}
+	return ""
+}
diff --git a/server/internal/embeddings/hfdownload.go b/server/internal/embeddings/provider/ollama/hfdownload.go
similarity index 99%
rename from server/internal/embeddings/hfdownload.go
rename to server/internal/embeddings/provider/ollama/hfdownload.go
index fc812f7..9510443 100644
--- a/server/internal/embeddings/hfdownload.go
+++ b/server/internal/embeddings/provider/ollama/hfdownload.go
@@ -1,4 +1,4 @@
-package embeddings
+package ollama
 
 import (
 	"context"
diff --git a/server/internal/embeddings/logwriter.go b/server/internal/embeddings/provider/ollama/logwriter.go
similarity index 98%
rename from server/internal/embeddings/logwriter.go
rename to server/internal/embeddings/provider/ollama/logwriter.go
index 0b469d0..7c9ba61 100644
--- a/server/internal/embeddings/logwriter.go
+++ b/server/internal/embeddings/provider/ollama/logwriter.go
@@ -1,4 +1,4 @@
-package embeddings
+package ollama
 
 import (
 	"bytes"
diff --git a/server/internal/embeddings/prefix.go b/server/internal/embeddings/provider/ollama/prefix.go
similarity index 98%
rename from server/internal/embeddings/prefix.go
rename to server/internal/embeddings/provider/ollama/prefix.go
index 76248d1..50a210d 100644
--- a/server/internal/embeddings/prefix.go
+++ b/server/internal/embeddings/provider/ollama/prefix.go
@@ -1,4 +1,4 @@
-package embeddings
+package ollama
 
 import "strings"
 
diff --git a/server/internal/embeddings/prefix_test.go b/server/internal/embeddings/provider/ollama/prefix_test.go
similarity index 99%
rename from server/internal/embeddings/prefix_test.go
rename to server/internal/embeddings/provider/ollama/prefix_test.go
index 9f3e707..7b960e8 100644
--- a/server/internal/embeddings/prefix_test.go
+++ b/server/internal/embeddings/provider/ollama/prefix_test.go
@@ -1,4 +1,4 @@
-package embeddings
+package ollama
 
 import "testing"
 
diff --git a/server/internal/embeddings/provider/ollama/provider.go b/server/internal/embeddings/provider/ollama/provider.go
new file mode 100644
index 0000000..f9bd21e
--- /dev/null
+++ b/server/internal/embeddings/provider/ollama/provider.go
@@ -0,0 +1,384 @@
+// Package ollama implements provider.Provider on top of an in-process
+// llama-server child. It owns the supervisor (fork+exec, crash-restart
+// budget, readiness probe), the unix/TCP llamaClient, the asymmetric
+// retrieval prefix, the GGUF resolver, and the token-aware multi-pass
+// embedding pipeline.
+//
+// The Service layer (embeddings.Service) brackets all calls with a
+// Queue for backpressure; this package does not impose its own
+// concurrency limit.
+package ollama
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"github.com/dvcdsys/code-index/server/internal/embeddings/provider"
+)
+
+// Config is the typed shape of the ollama provider's persisted config
+// (the JSON-blob in runtime_settings.embedding_provider_config when
+// embedding_provider="ollama"). The Factory's Build method
+// unmarshals JSON into this struct and validates.
+type Config struct {
+	// Model is the HF repo id ("owner/repo") or an absolute path to
+	// a .gguf file. Resolved via ResolveGGUFPath at Start.
+	Model string `json:"model"`
+
+	// GGUFPath is an optional absolute-path override (CIX_GGUF_PATH
+	// equivalent). When set it takes precedence over Model and the
+	// cache lookup.
+	GGUFPath string `json:"gguf_path,omitempty"`
+
+	// CacheDir is where downloaded GGUFs live and where the
+	// bootstrap-import drops files (CIX_GGUF_CACHE_DIR).
+	CacheDir string `json:"cache_dir,omitempty"`
+
+	// BootstrapPath is the one-shot import source
+	// (CIX_BOOTSTRAP_GGUF_PATH). Idempotent across boots.
+	BootstrapPath string `json:"bootstrap_path,omitempty"`
+
+	// BinDir is the directory containing llama-server + its
+	// dylibs (CIX_LLAMA_BIN_DIR).
+	BinDir string `json:"bin_dir,omitempty"`
+
+	// SocketPath is the unix socket for IPC with llama-server.
+	// Auto-falls-back to TCP when this exceeds the platform limit
+	// (104 bytes on darwin).
+	SocketPath string `json:"socket_path,omitempty"`
+
+	// Transport is "unix" or "tcp".
+	Transport string `json:"transport,omitempty"`
+
+	// CtxSize is the context window in tokens.
+	CtxSize int `json:"ctx_size,omitempty"`
+
+	// NGpuLayers: -1 = all on Metal, 0 = CPU only.
+	NGpuLayers int `json:"n_gpu_layers"`
+
+	// NThreads: 0 = let llama-server pick.
+	NThreads int `json:"n_threads,omitempty"`
+
+	// BatchSize: 0 = match CtxSize.
+	BatchSize int `json:"batch_size,omitempty"`
+
+	// StartupSec bounds the readiness probe.
+	StartupSec int `json:"startup_sec,omitempty"`
+}
+
+// Provider is the ollama-backed provider.Provider implementation.
+// One per active config; rebuilt (Stop+new+Start) when the admin
+// changes any config field.
+type Provider struct {
+	cfg    Config
+	logger *slog.Logger
+
+	mu       sync.Mutex
+	sup      *supervisor
+	started  atomic.Bool
+	ggufPath string // resolved at Start; empty before
+}
+
+// New constructs an ollama Provider. Does not start it — call Start
+// to spawn llama-server. Provider methods that need the running
+// child return provider.ErrNotReady until Start succeeds.
+func New(cfg Config, logger *slog.Logger) *Provider {
+	if logger == nil {
+		logger = slog.Default()
+	}
+	return &Provider{cfg: cfg, logger: logger}
+}
+
+// Kind reports the registered factory kind for this provider.
+func (p *Provider) Kind() string { return provider.KindOllama }
+
+// ID is the fingerprint stored in projects.indexed_with_model. Tied
+// to the model name only — different GGUF tunings (ctx, threads,
+// batch) do not change embedding output, so they must NOT change
+// the ID (would force unnecessary reindexes).
+func (p *Provider) ID() string {
+	return "ollama:" + p.cfg.Model
+}
+
+// Dimension returns 0 — the vector store infers dimension from the
+// first upsert (chromem-go behaviour) and CodeRankEmbed-Q8 reports
+// it on first call.
+func (p *Provider) Dimension() int { return 0 }
+
+// SupportsTokenize is true: llama-server exposes /tokenize.
+func (p *Provider) SupportsTokenize() bool { return true }
+
+// Start resolves the GGUF path then spawns the supervisor. Blocks
+// until the readiness probe succeeds or ctx expires.
+func (p *Provider) Start(ctx context.Context) error {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	if p.sup != nil {
+		// Idempotent — re-Start on an already-running provider is a
+		// no-op (Service uses Stop+new+Start for restarts, never
+		// Start twice on the same instance, but be defensive).
+		return nil
+	}
+
+	gguf, err := ResolveGGUFPath(ctx, GGUFInputs{
+		GGUFPath:      p.cfg.GGUFPath,
+		Model:         p.cfg.Model,
+		CacheDir:      p.cfg.CacheDir,
+		BootstrapPath: p.cfg.BootstrapPath,
+	}, p.logger)
+	if err != nil {
+		return fmt.Errorf("resolve gguf: %w", err)
+	}
+	p.ggufPath = gguf
+
+	supCfg := supervisorConfig{
+		BinDir:     p.cfg.BinDir,
+		GGUFPath:   gguf,
+		SocketPath: p.cfg.SocketPath,
+		Transport:  p.cfg.Transport,
+		CtxSize:    p.cfg.CtxSize,
+		NGpuLayers: p.cfg.NGpuLayers,
+		NThreads:   p.cfg.NThreads,
+		BatchSize:  p.cfg.BatchSize,
+		StartupSec: p.cfg.StartupSec,
+		Model:      p.cfg.Model,
+	}
+	sup, err := newSupervisor(ctx, supCfg, p.logger)
+	if err != nil {
+		return err
+	}
+	p.sup = sup
+	p.started.Store(true)
+	return nil
+}
+
+// Stop tears the supervisor down within ctx. Idempotent.
+func (p *Provider) Stop(ctx context.Context) error {
+	p.mu.Lock()
+	sup := p.sup
+	p.sup = nil
+	p.started.Store(false)
+	p.mu.Unlock()
+	if sup == nil {
+		return nil
+	}
+	return sup.Stop(ctx)
+}
+
+// Ready reports nil when llama-server is alive and responding to
+// /health, provider.ErrUnrecoverable when the restart budget is
+// exhausted, or provider.ErrNotReady while warming up.
+func (p *Provider) Ready(ctx context.Context) error {
+	p.mu.Lock()
+	sup := p.sup
+	p.mu.Unlock()
+	if sup == nil {
+		return provider.ErrNotReady
+	}
+	if sup.dead.Load() {
+		return provider.ErrUnrecoverable
+	}
+	return sup.Ready(ctx)
+}
+
+// Status returns the dashboard snapshot.
+func (p *Provider) Status() provider.Status {
+	p.mu.Lock()
+	sup := p.sup
+	p.mu.Unlock()
+	st := provider.Status{
+		ManagesProcess: true,
+		Model:          p.cfg.Model,
+	}
+	if sup == nil {
+		st.State = provider.StateDisabled
+		return st
+	}
+	src := sup.Status()
+	st.PID = src.PID
+	st.UptimeSeconds = int64(src.Uptime.Seconds())
+	st.LastError = src.LastError
+	switch src.State {
+	case "running":
+		st.State = provider.StateRunning
+	case "failed":
+		st.State = provider.StateFailed
+	case "starting":
+		st.State = provider.StateStarting
+	default:
+		st.State = src.State
+	}
+	return st
+}
+
+// EmbedQuery prepends the asymmetric-retrieval prefix and embeds a
+// single query.
+func (p *Provider) EmbedQuery(ctx context.Context, query string) ([]float32, error) {
+	p.mu.Lock()
+	sup := p.sup
+	p.mu.Unlock()
+	if sup == nil {
+		return nil, provider.ErrNotReady
+	}
+	if sup.dead.Load() {
+		return nil, provider.ErrUnrecoverable
+	}
+	if err := p.waitReady(ctx, sup); err != nil {
+		return nil, err
+	}
+	text := ResolveQueryPrefix(p.cfg.Model) + query
+	vecs, err := sup.client.Embeddings(ctx, []string{text})
+	if err != nil {
+		return nil, err
+	}
+	return vecs[0], nil
+}
+
+// EmbedDocuments embeds passages unchanged.
+func (p *Provider) EmbedDocuments(ctx context.Context, texts []string) ([][]float32, error) {
+	if len(texts) == 0 {
+		return nil, nil
+	}
+	p.mu.Lock()
+	sup := p.sup
+	p.mu.Unlock()
+	if sup == nil {
+		return nil, provider.ErrNotReady
+	}
+	if sup.dead.Load() {
+		return nil, provider.ErrUnrecoverable
+	}
+	if err := p.waitReady(ctx, sup); err != nil {
+		return nil, err
+	}
+	return sup.client.Embeddings(ctx, texts)
+}
+
+// TokenizeAndEmbed is the token-aware embedding pipeline. For each
+// text it:
+//  1. Calls /tokenize to get token IDs (CLS + content + SEP).
+//  2. Splits sequences longer than CtxSize at token boundaries.
+//  3. Embeds all sequences in one /v1/embeddings call.
+//  4. Averages sub-window vectors back to one vector per text.
+func (p *Provider) TokenizeAndEmbed(ctx context.Context, texts []string) ([][]float32, error) {
+	if len(texts) == 0 {
+		return nil, nil
+	}
+	p.mu.Lock()
+	sup := p.sup
+	maxTokens := p.cfg.CtxSize
+	p.mu.Unlock()
+	if sup == nil {
+		return nil, provider.ErrNotReady
+	}
+	if sup.dead.Load() {
+		return nil, provider.ErrUnrecoverable
+	}
+	if err := p.waitReady(ctx, sup); err != nil {
+		return nil, err
+	}
+
+	type span struct{ start, length int }
+	spans := make([]span, len(texts))
+	var sequences [][]int
+
+	for i, text := range texts {
+		ids, err := sup.client.Tokenize(ctx, text)
+		if err != nil {
+			return nil, fmt.Errorf("tokenize text[%d]: %w", i, err)
+		}
+
+		if len(ids) == 0 {
+			spans[i] = span{start: len(sequences), length: 1}
+			sequences = append(sequences, []int{})
+			continue
+		}
+
+		if len(ids) <= maxTokens {
+			spans[i] = span{start: len(sequences), length: 1}
+			sequences = append(sequences, ids)
+			continue
+		}
+
+		cls := ids[0]
+		sep := ids[len(ids)-1]
+		content := ids[1 : len(ids)-1]
+		windowSize := maxTokens - 2
+
+		spanStart := len(sequences)
+		for start := 0; start < len(content); start += windowSize {
+			end := start + windowSize
+			if end > len(content) {
+				end = len(content)
+			}
+			window := make([]int, 0, end-start+2)
+			window = append(window, cls)
+			window = append(window, content[start:end]...)
+			window = append(window, sep)
+			sequences = append(sequences, window)
+		}
+		spans[i] = span{start: spanStart, length: len(sequences) - spanStart}
+	}
+
+	allVecs, err := sup.client.EmbedBatchTokenIDs(ctx, sequences)
+	if err != nil {
+		return nil, err
+	}
+
+	result := make([][]float32, len(texts))
+	for i, sp := range spans {
+		if sp.length == 1 {
+			result[i] = allVecs[sp.start]
+			continue
+		}
+		dim := len(allVecs[sp.start])
+		avg := make([]float32, dim)
+		for k := 0; k < sp.length; k++ {
+			v := allVecs[sp.start+k]
+			for d := range avg {
+				avg[d] += v[d]
+			}
+		}
+		n := float32(sp.length)
+		for d := range avg {
+			avg[d] /= n
+		}
+		result[i] = avg
+	}
+	return result, nil
+}
+
+// EmbedRaw is a parity-test helper: skip prefix, embed texts verbatim.
+// Lower-case in production paths to discourage misuse; the parity
+// test file in the embeddings package needs cross-package access so
+// it lives upper-cased here.
+func (p *Provider) EmbedRaw(ctx context.Context, texts []string) ([][]float32, error) {
+	if len(texts) == 0 {
+		return nil, nil
+	}
+	p.mu.Lock()
+	sup := p.sup
+	p.mu.Unlock()
+	if sup == nil {
+		return nil, provider.ErrNotReady
+	}
+	return sup.client.Embeddings(ctx, texts)
+}
+
+// CacheDir returns the configured GGUF cache directory. Used by the
+// admin /models endpoint to enumerate cached files.
+func (p *Provider) CacheDir() string { return p.cfg.CacheDir }
+
+// waitReady waits up to 5 seconds for the supervisor's child to be
+// ready. The 5s window is short — a healthy steady-state Service is
+// always ready in <1ms; during restarts the queue has already been
+// drained / blocked, so callers waiting here are by design rare.
+func (p *Provider) waitReady(ctx context.Context, sup *supervisor) error {
+	readyCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
+	defer cancel()
+	return sup.Ready(readyCtx)
+}
diff --git a/server/internal/embeddings/supervisor.go b/server/internal/embeddings/provider/ollama/supervisor.go
similarity index 99%
rename from server/internal/embeddings/supervisor.go
rename to server/internal/embeddings/provider/ollama/supervisor.go
index 70b27e0..ced98a1 100644
--- a/server/internal/embeddings/supervisor.go
+++ b/server/internal/embeddings/provider/ollama/supervisor.go
@@ -1,4 +1,4 @@
-package embeddings
+package ollama
 
 import (
 	"context"
@@ -15,6 +15,8 @@ import (
 	"sync/atomic"
 	"syscall"
 	"time"
+
+	"github.com/dvcdsys/code-index/server/internal/embeddings/provider"
 )
 
 // darwinSunPathMax is the platform limit for unix socket paths on macOS.
@@ -270,7 +272,7 @@ func (s *supervisor) spawn(ctx context.Context) error {
 		s.lastSpawnErr.Store(err.Error())
 		s.killGroup()
 		<-s.waiterDone
-		return fmt.Errorf("%w: %v", ErrNotReady, err)
+		return fmt.Errorf("%w: %v", provider.ErrNotReady, err)
 	}
 	close(s.readySignal)
 	s.lastSpawnErr.Store("") // clear any stale error from a prior failed start
@@ -573,7 +575,7 @@ func (s *supervisor) Status() SupervisorStatus {
 // Ready blocks until the current child is ready or ctx expires.
 func (s *supervisor) Ready(ctx context.Context) error {
 	if s.dead.Load() {
-		return ErrSupervisor
+		return provider.ErrUnrecoverable
 	}
 	s.mu.RLock()
 	ch := s.readySignal
diff --git a/server/internal/embeddings/provider/openai/factory.go b/server/internal/embeddings/provider/openai/factory.go
new file mode 100644
index 0000000..5d69c91
--- /dev/null
+++ b/server/internal/embeddings/provider/openai/factory.go
@@ -0,0 +1,57 @@
+package openai
+
+import (
+	"encoding/json"
+	"fmt"
+	"log/slog"
+
+	"github.com/dvcdsys/code-index/server/internal/embeddings/provider"
+)
+
+const defaultAPIKeyEnv = "CIX_OPENAI_API_KEY"
+
+type factory struct{}
+
+func (factory) Kind() string { return provider.KindOpenAI }
+
+func (factory) SchemaJSON() []byte {
+	s := provider.ConfigSchema{
+		Fields: []provider.ConfigField{
+			{Name: "base_url", Label: "Base URL", Kind: "string", Required: true, Default: "https://api.openai.com", Description: "Server origin without /v1 suffix."},
+			{Name: "model", Label: "Model", Kind: "string", Required: true, Default: "text-embedding-3-small"},
+			{Name: "api_key_env", Label: "API key env var", Kind: "secret-env", Required: true, Default: defaultAPIKeyEnv, Description: "Server-side env var name that holds the API key."},
+			{Name: "dimensions", Label: "Dimensions", Kind: "int", Description: "Optional Matryoshka shrink (text-embedding-3*)."},
+		},
+	}
+	b, _ := json.Marshal(s)
+	return b
+}
+
+func (factory) SecretEnvVars() []string { return []string{defaultAPIKeyEnv} }
+
+func (factory) Build(cfg []byte, secrets provider.SecretLookup, logger *slog.Logger) (provider.Provider, error) {
+	if logger == nil {
+		logger = slog.Default()
+	}
+	if len(cfg) == 0 {
+		return nil, fmt.Errorf("openai: empty config")
+	}
+	var c Config
+	if err := json.Unmarshal(cfg, &c); err != nil {
+		return nil, fmt.Errorf("openai: unmarshal config: %w", err)
+	}
+	if c.Model == "" {
+		return nil, fmt.Errorf("openai: model is required")
+	}
+	if c.BaseURL == "" {
+		c.BaseURL = "https://api.openai.com"
+	}
+	if c.APIKeyEnv == "" {
+		c.APIKeyEnv = defaultAPIKeyEnv
+	}
+	return New(c, secrets, logger), nil
+}
+
+func init() {
+	provider.Register(factory{})
+}
diff --git a/server/internal/embeddings/provider/openai/openai.go b/server/internal/embeddings/provider/openai/openai.go
new file mode 100644
index 0000000..a213c98
--- /dev/null
+++ b/server/internal/embeddings/provider/openai/openai.go
@@ -0,0 +1,228 @@
+// Package openai implements provider.Provider against any
+// OpenAI-compatible /v1/embeddings endpoint (OpenAI proper, vLLM,
+// TEI, LocalAI, Ollama's own openai-compat endpoint, …). All
+// providers share the same request/response shape; the differences
+// are only the base URL and which API key env var to read.
+package openai
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"log/slog"
+	"net/http"
+	"strconv"
+	"time"
+
+	"github.com/dvcdsys/code-index/server/internal/embeddings/provider"
+)
+
+// Config is the persisted shape of the openai provider's config blob.
+type Config struct {
+	BaseURL    string `json:"base_url"`
+	Model      string `json:"model"`
+	APIKeyEnv  string `json:"api_key_env"`
+	Dimensions int    `json:"dimensions,omitempty"`
+}
+
+// Provider is the openai-compatible HTTP client wrapped behind the
+// provider.Provider interface.
+type Provider struct {
+	cfg     Config
+	logger  *slog.Logger
+	secrets provider.SecretLookup
+	http    *http.Client
+}
+
+// New constructs the Provider. Does not contact the endpoint — call
+// Start to perform a one-shot connect test.
+func New(cfg Config, secrets provider.SecretLookup, logger *slog.Logger) *Provider {
+	if logger == nil {
+		logger = slog.Default()
+	}
+	return &Provider{
+		cfg:     cfg,
+		logger:  logger,
+		secrets: secrets,
+		http:    &http.Client{Timeout: 60 * time.Second},
+	}
+}
+
+func (p *Provider) Kind() string { return provider.KindOpenAI }
+
+// ID is "openai:{model}[:{dim}]". The dimension is part of the ID only
+// when explicitly configured (Matryoshka shrink via the `dimensions`
+// param) — otherwise different model versions are distinguished by
+// the model name alone.
+func (p *Provider) ID() string {
+	if p.cfg.Dimensions > 0 {
+		return "openai:" + p.cfg.Model + ":" + strconv.Itoa(p.cfg.Dimensions)
+	}
+	return "openai:" + p.cfg.Model
+}
+
+func (p *Provider) Dimension() int    { return p.cfg.Dimensions }
+func (p *Provider) SupportsTokenize() bool { return false }
+
+// Start runs a one-shot connect test: embed a single short string.
+// Surfaces auth / network errors before the provider is wired into
+// the request path.
+func (p *Provider) Start(ctx context.Context) error {
+	if p.cfg.BaseURL == "" {
+		return errors.New("openai: base_url is required")
+	}
+	if p.cfg.Model == "" {
+		return errors.New("openai: model is required")
+	}
+	if _, ok := p.apiKey(); !ok {
+		return fmt.Errorf("%w: %s", provider.ErrMissingAPIKey, p.cfg.APIKeyEnv)
+	}
+	testCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
+	defer cancel()
+	_, err := p.embed(testCtx, []string{"ping"})
+	if err != nil {
+		return fmt.Errorf("openai: connect test failed: %w", err)
+	}
+	return nil
+}
+
+// Stop is a no-op — the provider holds no managed process or
+// long-lived connection.
+func (p *Provider) Stop(_ context.Context) error { return nil }
+
+// Ready returns nil if the API key is set. We do NOT ping the remote
+// on every Ready call (which the /status footer polls every 30s) —
+// remote outages surface as real embed failures with diagnostics; an
+// always-green footer dot for HTTP-only providers matches the
+// dashboard's documented behaviour.
+func (p *Provider) Ready(_ context.Context) error {
+	if _, ok := p.apiKey(); !ok {
+		return fmt.Errorf("%w: %s", provider.ErrMissingAPIKey, p.cfg.APIKeyEnv)
+	}
+	return nil
+}
+
+func (p *Provider) Status() provider.Status {
+	st := provider.Status{
+		State:          provider.StateRemote,
+		ManagesProcess: false,
+		Model:          p.cfg.Model,
+	}
+	if _, ok := p.apiKey(); !ok {
+		st.State = provider.StateFailed
+		st.LastError = "API key env var " + p.cfg.APIKeyEnv + " is not set"
+	}
+	return st
+}
+
+// EmbedQuery is a pass-through to EmbedDocuments — generic
+// OpenAI-compatible servers have no query/document differentiation.
+func (p *Provider) EmbedQuery(ctx context.Context, query string) ([]float32, error) {
+	vecs, err := p.embed(ctx, []string{query})
+	if err != nil {
+		return nil, err
+	}
+	return vecs[0], nil
+}
+
+func (p *Provider) EmbedDocuments(ctx context.Context, texts []string) ([][]float32, error) {
+	if len(texts) == 0 {
+		return nil, nil
+	}
+	return p.embed(ctx, texts)
+}
+
+// TokenizeAndEmbed falls back to EmbedDocuments — generic openai-style
+// servers expose neither /tokenize nor reliable input token counts.
+// The indexer's chunking step pre-truncates inputs to a conservative
+// limit when SupportsTokenize() is false.
+func (p *Provider) TokenizeAndEmbed(ctx context.Context, texts []string) ([][]float32, error) {
+	return p.EmbedDocuments(ctx, texts)
+}
+
+type embedRequest struct {
+	Input      []string `json:"input"`
+	Model      string   `json:"model"`
+	Dimensions int      `json:"dimensions,omitempty"`
+}
+
+type embedResponseItem struct {
+	Embedding []float32 `json:"embedding"`
+	Index     int       `json:"index"`
+}
+
+type embedResponse struct {
+	Data []embedResponseItem `json:"data"`
+}
+
+// embed POSTs /v1/embeddings and returns vectors in input order.
+func (p *Provider) embed(ctx context.Context, texts []string) ([][]float32, error) {
+	key, ok := p.apiKey()
+	if !ok {
+		return nil, fmt.Errorf("%w: %s", provider.ErrMissingAPIKey, p.cfg.APIKeyEnv)
+	}
+	body, err := json.Marshal(embedRequest{
+		Input:      texts,
+		Model:      p.cfg.Model,
+		Dimensions: p.cfg.Dimensions,
+	})
+	if err != nil {
+		return nil, fmt.Errorf("openai: marshal: %w", err)
+	}
+	url := p.cfg.BaseURL + "/v1/embeddings"
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
+	if err != nil {
+		return nil, fmt.Errorf("openai: build request: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", "Bearer "+key)
+
+	resp, err := p.http.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("openai: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		snippet, _ := io.ReadAll(io.LimitReader(resp.Body, 2048))
+		return nil, fmt.Errorf("openai: status %d: %s", resp.StatusCode, string(snippet))
+	}
+
+	var er embedResponse
+	if err := json.NewDecoder(resp.Body).Decode(&er); err != nil {
+		return nil, fmt.Errorf("openai: decode: %w", err)
+	}
+	if len(er.Data) != len(texts) {
+		return nil, fmt.Errorf("openai: got %d vectors for %d inputs", len(er.Data), len(texts))
+	}
+	out := make([][]float32, len(er.Data))
+	for _, item := range er.Data {
+		if item.Index < 0 || item.Index >= len(out) {
+			return nil, fmt.Errorf("openai: out-of-range index %d", item.Index)
+		}
+		out[item.Index] = item.Embedding
+	}
+	for i, v := range out {
+		if v == nil {
+			return nil, fmt.Errorf("openai: missing vector at index %d", i)
+		}
+	}
+	return out, nil
+}
+
+func (p *Provider) apiKey() (string, bool) {
+	if p.secrets == nil {
+		return "", false
+	}
+	if p.cfg.APIKeyEnv == "" {
+		return "", false
+	}
+	v, ok := p.secrets(p.cfg.APIKeyEnv)
+	if !ok || v == "" {
+		return "", false
+	}
+	return v, true
+}
diff --git a/server/internal/embeddings/provider/provider.go b/server/internal/embeddings/provider/provider.go
new file mode 100644
index 0000000..a41e46a
--- /dev/null
+++ b/server/internal/embeddings/provider/provider.go
@@ -0,0 +1,182 @@
+// Package provider defines the pluggable embedding-backend abstraction
+// used by embeddings.Service. Implementations live in sub-packages
+// (provider/ollama, provider/openai, provider/voyage). The Service holds
+// exactly one active Provider; switching provider at runtime is what the
+// admin /api/v1/admin/embedding-providers/active endpoint does.
+//
+// Identity & reindex. Every Provider exposes ID() — a stable fingerprint
+// string written to projects.indexed_with_model at index time. When the
+// active provider's ID() changes (different kind, model, or dimension),
+// every project's stored fingerprint is stale; the existing per-project
+// drift check in internal/repojobs detects it on the next clone job and
+// forces a full reindex. Provider switching therefore reuses the model-
+// change pipeline unchanged.
+//
+// Lifecycle. Start is called once after construction (ollama spawns the
+// child process; HTTP-only providers do a connect-test). Stop is called
+// before switching to a different provider and on server shutdown.
+package provider
+
+import (
+	"context"
+	"errors"
+)
+
+// Kind enumerates the built-in provider kinds. New kinds are added by
+// registering a Factory with that kind string.
+const (
+	KindOllama = "ollama"
+	KindOpenAI = "openai"
+	KindVoyage = "voyage"
+)
+
+// Provider is the embedding backend abstraction. Implementations cover
+// one upstream service each (the in-process llama-server sidecar, the
+// OpenAI-compatible /v1/embeddings REST API, the Voyage AI REST API, …).
+//
+// Concurrency. All methods on a Provider must be safe for concurrent
+// use; Service brackets them with a Queue (rate-limit / backpressure)
+// but does not serialise them.
+//
+// Errors. Implementations should wrap upstream HTTP / process failures
+// with enough context for an operator to diagnose. Use ErrNotReady when
+// the backend is alive but not yet able to serve (e.g. ollama warming
+// up); the Service layer treats it as a retriable busy signal.
+type Provider interface {
+	// Kind returns the registered factory kind for this provider, e.g.
+	// "ollama", "openai", "voyage".
+	Kind() string
+
+	// ID returns the fingerprint that uniquely identifies this provider
+	// configuration for the purposes of index invalidation. Format:
+	// "{kind}:{model}[:{dim}][:{dtype}]". The string is opaque to
+	// callers — they only compare it for equality.
+	ID() string
+
+	// Dimension reports the embedding vector dimension this provider
+	// will produce. Used by the vector store to dimension the Chroma
+	// collection when it is first created. May be 0 if the dimension
+	// is only known after the first embed call; the vectorstore then
+	// infers it from the first upsert as before.
+	Dimension() int
+
+	// SupportsTokenize reports whether the provider implements
+	// TokenizeAndEmbed natively. The Service uses this to decide
+	// whether to call TokenizeAndEmbed or fall back to EmbedDocuments
+	// in the indexer's chunking path.
+	SupportsTokenize() bool
+
+	// Start prepares the provider for serving requests. For ollama
+	// this spawns the llama-server child process and blocks until the
+	// readiness probe succeeds. For HTTP-only providers it performs
+	// a one-shot connect-test against the configured endpoint with
+	// any provided API key. ctx bounds the startup; on failure the
+	// caller may try a different config without calling Stop first.
+	Start(ctx context.Context) error
+
+	// Stop tears the provider down within the ctx deadline. Idempotent
+	// and safe to call on a provider that never Start()-ed.
+	Stop(ctx context.Context) error
+
+	// Ready reports whether the provider is currently able to serve an
+	// embedding request. nil = ready. Returning ErrNotReady is the
+	// recommended busy signal during startup / restart windows.
+	Ready(ctx context.Context) error
+
+	// Status returns a snapshot for the dashboard. Fields that are
+	// not meaningful for a given provider (e.g. PID for HTTP-only
+	// providers) should be zero-valued.
+	Status() Status
+
+	// EmbedQuery embeds a single query string. Providers that support
+	// asymmetric retrieval apply their model-specific transform here
+	// (ollama prepends the model's query prefix; voyage sends
+	// input_type=query; openai applies nothing).
+	EmbedQuery(ctx context.Context, query string) ([]float32, error)
+
+	// EmbedDocuments embeds a batch of passages / chunks. Returned
+	// vectors follow input order. Empty input is a no-op returning
+	// (nil, nil).
+	EmbedDocuments(ctx context.Context, texts []string) ([][]float32, error)
+
+	// TokenizeAndEmbed is the token-aware embedding pipeline used by
+	// the indexer for chunks that may exceed the model's context
+	// window. Providers without native tokenization (SupportsTokenize
+	// returns false) may implement this as a pass-through to
+	// EmbedDocuments — callers must use SupportsTokenize() to decide
+	// whether to chunk inputs themselves.
+	TokenizeAndEmbed(ctx context.Context, texts []string) ([][]float32, error)
+}
+
+// State enumerates the dashboard-facing provider states surfaced via
+// Status. Implementations should pick the closest match.
+const (
+	StateStarting = "starting"
+	StateRunning  = "running"
+	StateFailed   = "failed"
+	StateDisabled = "disabled"
+	// StateRemote marks an HTTP-only provider that has no managed
+	// process: it cannot fail to "start" beyond a config-time connect
+	// test, and uptime / pid / restart concepts do not apply. Footer
+	// indicator stays permanently green for this state.
+	StateRemote = "remote"
+)
+
+// Status is the dashboard-facing snapshot of a provider's runtime
+// state. Sidecar-specific fields (PID, Uptime, restart counts) are
+// zero / empty for HTTP-only providers.
+type Status struct {
+	// State is one of the State* constants above.
+	State string `json:"state"`
+
+	// ManagesProcess reports whether this provider manages an
+	// in-process child (true for ollama, false for HTTP providers).
+	// The footer uses this to decide whether to render an
+	// alive/red-dot indicator or a permanent green dot.
+	ManagesProcess bool `json:"manages_process"`
+
+	// Model is the human-readable model identifier (HF repo id,
+	// OpenAI model name, Voyage model name).
+	Model string `json:"model"`
+
+	// PID is the child process id when ManagesProcess. Zero otherwise.
+	PID int `json:"pid,omitempty"`
+
+	// UptimeSeconds is the time the current child has been alive.
+	// Zero for remote providers.
+	UptimeSeconds int64 `json:"uptime_seconds,omitempty"`
+
+	// LastError surfaces the most recent spawn / health-probe / HTTP
+	// error so the dashboard can render it without grepping logs.
+	// Empty when healthy.
+	LastError string `json:"last_error,omitempty"`
+
+	// InFlight reports queue depth at the Service layer. The Service
+	// fills this in after the provider returns its Status; providers
+	// should leave it at 0.
+	InFlight int `json:"in_flight"`
+}
+
+// ErrNotReady signals that the provider is alive but not yet able to
+// serve a request (e.g. ollama warming up after Start). The Service
+// layer translates this to a busy-style 503 with a Retry-After hint.
+var ErrNotReady = errors.New("provider: not ready")
+
+// ErrMissingAPIKey signals that the provider was constructed against
+// a config naming an env-var that is not set at the moment of the
+// call. The admin /test endpoint reports it verbatim so the dashboard
+// can guide the operator to set the env var before saving.
+var ErrMissingAPIKey = errors.New("provider: required API key env var is not set")
+
+// ErrUnrecoverable signals a terminal provider failure — e.g. the
+// ollama sidecar exceeded its crash-restart budget. Subsequent calls
+// return this until the provider is replaced (admin perspective) or
+// the process is restarted. Caller maps to HTTP 503 without retry.
+var ErrUnrecoverable = errors.New("provider: unrecoverable failure")
+
+// SecretLookup resolves an env-var name to its current value at the
+// moment of the call. Implementations must return (value, true) when
+// the env var is set (even if empty), and ("", false) when it is
+// unset. This is the only surface providers see for secrets; the
+// raw value never lives in the Provider's config struct.
+type SecretLookup func(envVarName string) (value string, ok bool)
diff --git a/server/internal/embeddings/provider/voyage/factory.go b/server/internal/embeddings/provider/voyage/factory.go
new file mode 100644
index 0000000..64d12c5
--- /dev/null
+++ b/server/internal/embeddings/provider/voyage/factory.go
@@ -0,0 +1,70 @@
+package voyage
+
+import (
+	"encoding/json"
+	"fmt"
+	"log/slog"
+
+	"github.com/dvcdsys/code-index/server/internal/embeddings/provider"
+)
+
+const defaultAPIKeyEnv = "CIX_VOYAGE_API_KEY"
+
+type factory struct{}
+
+func (factory) Kind() string { return provider.KindVoyage }
+
+func (factory) SchemaJSON() []byte {
+	s := provider.ConfigSchema{
+		Fields: []provider.ConfigField{
+			{
+				Name: "model", Label: "Model", Kind: "enum", Required: true,
+				Enum:    []string{"voyage-code-3", "voyage-3-large", "voyage-3", "voyage-3-lite", "voyage-code-2"},
+				Default: "voyage-code-3",
+			},
+			{
+				Name: "output_dimension", Label: "Output dimension", Kind: "enum",
+				Enum: []string{"256", "512", "1024", "2048"}, Default: "1024",
+				Description: "Matryoshka shrink. Changing this triggers a full reindex.",
+			},
+			{
+				Name: "output_dtype", Label: "Output dtype", Kind: "enum",
+				Enum: []string{DtypeFloat, DtypeInt8}, Default: DtypeFloat,
+				Description: "int8 is dequantized to float32 on the server side.",
+			},
+			{Name: "truncation", Label: "Truncate over-length input", Kind: "bool", Default: true},
+			{Name: "api_key_env", Label: "API key env var", Kind: "secret-env", Required: true, Default: defaultAPIKeyEnv},
+		},
+	}
+	b, _ := json.Marshal(s)
+	return b
+}
+
+func (factory) SecretEnvVars() []string { return []string{defaultAPIKeyEnv} }
+
+func (factory) Build(cfg []byte, secrets provider.SecretLookup, logger *slog.Logger) (provider.Provider, error) {
+	if logger == nil {
+		logger = slog.Default()
+	}
+	if len(cfg) == 0 {
+		return nil, fmt.Errorf("voyage: empty config")
+	}
+	var c Config
+	if err := json.Unmarshal(cfg, &c); err != nil {
+		return nil, fmt.Errorf("voyage: unmarshal config: %w", err)
+	}
+	if c.Model == "" {
+		return nil, fmt.Errorf("voyage: model is required")
+	}
+	if c.APIKeyEnv == "" {
+		c.APIKeyEnv = defaultAPIKeyEnv
+	}
+	if c.OutputDtype == "" {
+		c.OutputDtype = DtypeFloat
+	}
+	return New(c, secrets, logger), nil
+}
+
+func init() {
+	provider.Register(factory{})
+}
diff --git a/server/internal/embeddings/provider/voyage/voyage.go b/server/internal/embeddings/provider/voyage/voyage.go
new file mode 100644
index 0000000..1d0e276
--- /dev/null
+++ b/server/internal/embeddings/provider/voyage/voyage.go
@@ -0,0 +1,295 @@
+// Package voyage implements provider.Provider against the Voyage AI
+// embeddings API (https://api.voyageai.com/v1/embeddings).
+//
+// Voyage diverges from the OpenAI shape in three ways we care about:
+//   - input_type: "query" vs "document" — required for retrieval
+//     quality. EmbedQuery sends "query"; EmbedDocuments sends
+//     "document".
+//   - output_dimension: Matryoshka shrink, configured by the admin
+//     (256/512/1024/2048). Part of Provider.ID() because changing it
+//     invalidates the existing index.
+//   - output_dtype: float|int8 (binary/ubinary are out of scope —
+//     chromem-go has no hamming search). For int8 the server returns
+//     a list of integers per dimension; we dequantize to float32 in
+//     this package before returning vectors to the vector store.
+//
+// usage. Voyage omits prompt_tokens from the usage object — only
+// total_tokens is present. The response struct therefore has its own
+// shape distinct from OpenAI's.
+package voyage
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"log/slog"
+	"net/http"
+	"strconv"
+	"time"
+
+	"github.com/dvcdsys/code-index/server/internal/embeddings/provider"
+)
+
+// DefaultBaseURL is the public Voyage AI embeddings endpoint origin.
+const DefaultBaseURL = "https://api.voyageai.com"
+
+// Supported dtypes. binary/ubinary intentionally absent for v1.
+const (
+	DtypeFloat = "float"
+	DtypeInt8  = "int8"
+)
+
+// Config is the persisted shape of the voyage provider's config blob.
+type Config struct {
+	BaseURL         string `json:"base_url,omitempty"`
+	APIKeyEnv       string `json:"api_key_env"`
+	Model           string `json:"model"`
+	OutputDimension int    `json:"output_dimension,omitempty"`
+	OutputDtype     string `json:"output_dtype,omitempty"`
+	Truncation      bool   `json:"truncation,omitempty"`
+}
+
+// Provider is the Voyage HTTP client.
+type Provider struct {
+	cfg     Config
+	logger  *slog.Logger
+	secrets provider.SecretLookup
+	http    *http.Client
+}
+
+// New constructs the Provider. Does not contact the endpoint.
+func New(cfg Config, secrets provider.SecretLookup, logger *slog.Logger) *Provider {
+	if logger == nil {
+		logger = slog.Default()
+	}
+	if cfg.BaseURL == "" {
+		cfg.BaseURL = DefaultBaseURL
+	}
+	if cfg.OutputDtype == "" {
+		cfg.OutputDtype = DtypeFloat
+	}
+	return &Provider{
+		cfg:     cfg,
+		logger:  logger,
+		secrets: secrets,
+		http:    &http.Client{Timeout: 60 * time.Second},
+	}
+}
+
+func (p *Provider) Kind() string { return provider.KindVoyage }
+
+// ID is "voyage:{model}:{dim}:{dtype}". All three parts contribute to
+// embedding identity — switching any of them invalidates the index.
+func (p *Provider) ID() string {
+	dim := p.cfg.OutputDimension
+	dimStr := "auto"
+	if dim > 0 {
+		dimStr = strconv.Itoa(dim)
+	}
+	return "voyage:" + p.cfg.Model + ":" + dimStr + ":" + p.cfg.OutputDtype
+}
+
+func (p *Provider) Dimension() int       { return p.cfg.OutputDimension }
+func (p *Provider) SupportsTokenize() bool { return false }
+
+func (p *Provider) Start(ctx context.Context) error {
+	if p.cfg.Model == "" {
+		return errors.New("voyage: model is required")
+	}
+	switch p.cfg.OutputDtype {
+	case DtypeFloat, DtypeInt8:
+	default:
+		return fmt.Errorf("voyage: unsupported output_dtype %q (use float or int8)", p.cfg.OutputDtype)
+	}
+	if _, ok := p.apiKey(); !ok {
+		return fmt.Errorf("%w: %s", provider.ErrMissingAPIKey, p.cfg.APIKeyEnv)
+	}
+	testCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
+	defer cancel()
+	_, err := p.embed(testCtx, []string{"ping"}, "document")
+	if err != nil {
+		return fmt.Errorf("voyage: connect test failed: %w", err)
+	}
+	return nil
+}
+
+func (p *Provider) Stop(_ context.Context) error { return nil }
+
+func (p *Provider) Ready(_ context.Context) error {
+	if _, ok := p.apiKey(); !ok {
+		return fmt.Errorf("%w: %s", provider.ErrMissingAPIKey, p.cfg.APIKeyEnv)
+	}
+	return nil
+}
+
+func (p *Provider) Status() provider.Status {
+	st := provider.Status{
+		State:          provider.StateRemote,
+		ManagesProcess: false,
+		Model:          p.cfg.Model,
+	}
+	if _, ok := p.apiKey(); !ok {
+		st.State = provider.StateFailed
+		st.LastError = "API key env var " + p.cfg.APIKeyEnv + " is not set"
+	}
+	return st
+}
+
+func (p *Provider) EmbedQuery(ctx context.Context, query string) ([]float32, error) {
+	vecs, err := p.embed(ctx, []string{query}, "query")
+	if err != nil {
+		return nil, err
+	}
+	return vecs[0], nil
+}
+
+func (p *Provider) EmbedDocuments(ctx context.Context, texts []string) ([][]float32, error) {
+	if len(texts) == 0 {
+		return nil, nil
+	}
+	return p.embed(ctx, texts, "document")
+}
+
+func (p *Provider) TokenizeAndEmbed(ctx context.Context, texts []string) ([][]float32, error) {
+	return p.EmbedDocuments(ctx, texts)
+}
+
+type embedRequest struct {
+	Input           []string `json:"input"`
+	Model           string   `json:"model"`
+	InputType       string   `json:"input_type,omitempty"`
+	OutputDimension int      `json:"output_dimension,omitempty"`
+	OutputDtype     string   `json:"output_dtype,omitempty"`
+	Truncation      bool     `json:"truncation,omitempty"`
+}
+
+// embedResponseItem.Embedding is decoded as json.RawMessage because
+// the shape depends on output_dtype: []float for float, []int for
+// int8. dequantize() handles both branches.
+type embedResponseItem struct {
+	Embedding json.RawMessage `json:"embedding"`
+	Index     int             `json:"index"`
+}
+
+type embedResponseUsage struct {
+	TotalTokens int `json:"total_tokens"`
+}
+
+type embedResponse struct {
+	Data  []embedResponseItem `json:"data"`
+	Model string              `json:"model"`
+	Usage embedResponseUsage  `json:"usage"`
+}
+
+func (p *Provider) embed(ctx context.Context, texts []string, inputType string) ([][]float32, error) {
+	key, ok := p.apiKey()
+	if !ok {
+		return nil, fmt.Errorf("%w: %s", provider.ErrMissingAPIKey, p.cfg.APIKeyEnv)
+	}
+
+	body, err := json.Marshal(embedRequest{
+		Input:           texts,
+		Model:           p.cfg.Model,
+		InputType:       inputType,
+		OutputDimension: p.cfg.OutputDimension,
+		OutputDtype:     p.cfg.OutputDtype,
+		Truncation:      p.cfg.Truncation,
+	})
+	if err != nil {
+		return nil, fmt.Errorf("voyage: marshal: %w", err)
+	}
+	url := p.cfg.BaseURL + "/v1/embeddings"
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
+	if err != nil {
+		return nil, fmt.Errorf("voyage: build request: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", "Bearer "+key)
+
+	resp, err := p.http.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("voyage: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		snippet, _ := io.ReadAll(io.LimitReader(resp.Body, 2048))
+		return nil, fmt.Errorf("voyage: status %d: %s", resp.StatusCode, string(snippet))
+	}
+
+	var er embedResponse
+	if err := json.NewDecoder(resp.Body).Decode(&er); err != nil {
+		return nil, fmt.Errorf("voyage: decode: %w", err)
+	}
+	if len(er.Data) != len(texts) {
+		return nil, fmt.Errorf("voyage: got %d vectors for %d inputs", len(er.Data), len(texts))
+	}
+	out := make([][]float32, len(er.Data))
+	for _, item := range er.Data {
+		if item.Index < 0 || item.Index >= len(out) {
+			return nil, fmt.Errorf("voyage: out-of-range index %d", item.Index)
+		}
+		vec, err := dequantize(item.Embedding, p.cfg.OutputDtype)
+		if err != nil {
+			return nil, fmt.Errorf("voyage: decode embedding[%d]: %w", item.Index, err)
+		}
+		out[item.Index] = vec
+	}
+	for i, v := range out {
+		if v == nil {
+			return nil, fmt.Errorf("voyage: missing vector at index %d", i)
+		}
+	}
+	return out, nil
+}
+
+// dequantize converts the raw JSON embedding to []float32 per dtype.
+//
+// For dtype=float: passthrough — Voyage returns IEEE 754 floats.
+// For dtype=int8: each component is a signed 8-bit integer in
+// [-128, 127]; Voyage's docs prescribe dividing by 127.0 to recover
+// the approximate unit-norm float representation.
+//
+// This is the only place in the codebase that handles int8 quantized
+// embeddings; chromem-go and the search path both work exclusively
+// in float32.
+func dequantize(raw json.RawMessage, dtype string) ([]float32, error) {
+	switch dtype {
+	case DtypeInt8:
+		var ints []int8
+		if err := json.Unmarshal(raw, &ints); err != nil {
+			return nil, fmt.Errorf("int8 decode: %w", err)
+		}
+		out := make([]float32, len(ints))
+		for i, v := range ints {
+			out[i] = float32(v) / 127.0
+		}
+		return out, nil
+	default:
+		// "float" (and empty as a defensive default — Voyage's docs
+		// say float is the implicit choice when output_dtype is
+		// omitted from the request).
+		var floats []float32
+		if err := json.Unmarshal(raw, &floats); err != nil {
+			return nil, fmt.Errorf("float decode: %w", err)
+		}
+		return floats, nil
+	}
+}
+
+func (p *Provider) apiKey() (string, bool) {
+	if p.secrets == nil {
+		return "", false
+	}
+	if p.cfg.APIKeyEnv == "" {
+		return "", false
+	}
+	v, ok := p.secrets(p.cfg.APIKeyEnv)
+	if !ok || v == "" {
+		return "", false
+	}
+	return v, true
+}
diff --git a/server/internal/embeddings/service.go b/server/internal/embeddings/service.go
index ee3a7d2..64288a2 100644
--- a/server/internal/embeddings/service.go
+++ b/server/internal/embeddings/service.go
@@ -2,45 +2,58 @@ package embeddings
 
 import (
 	"context"
+	"encoding/json"
 	"errors"
 	"fmt"
-	"io"
 	"log/slog"
 	"os"
-	"path/filepath"
-	"strings"
+	"sync"
 	"time"
 
 	"github.com/dvcdsys/code-index/server/internal/config"
+	"github.com/dvcdsys/code-index/server/internal/embeddings/provider"
+	"github.com/dvcdsys/code-index/server/internal/embeddings/provider/ollama"
+
+
+	// Blank imports trigger each provider package's init() which
+	// registers a Factory in the registry. Service builds the active
+	// provider purely by kind string — these imports are the wiring.
+	_ "github.com/dvcdsys/code-index/server/internal/embeddings/provider/openai"
+	_ "github.com/dvcdsys/code-index/server/internal/embeddings/provider/voyage"
 )
 
-// Service is the public embeddings API used by handlers. It composes the
-// llama-server supervisor, the unix-socket client, the concurrency queue, and
-// the per-model query-prefix policy. Handlers should call EmbedQuery for
-// search inputs (applies prefix for asymmetric retrieval) and EmbedTexts for
-// passages/chunks.
+// Service is the public embeddings API used by handlers and the indexer.
+// It composes:
+//   - the active embedding provider (ollama sidecar / OpenAI / Voyage / …)
+//   - a concurrency queue for backpressure
 //
-// A Service with Disabled == true is a legal no-op used in tests; every
-// method returns ErrDisabled. main.go constructs it via New when
+// Concurrency. Embed* methods are safe under concurrent callers — they
+// each acquire a slot from the queue and release it on return. Provider
+// swaps (SwitchProvider) drain the queue first to avoid stranding
+// in-flight requests on a torn-down child process.
+//
+// A Service with disabled == true is a legal no-op used in tests; every
+// method returns ErrDisabled. main.go constructs it that way when
 // cfg.EmbeddingsEnabled is false.
 type Service struct {
 	cfg    *config.Config
 	logger *slog.Logger
 
-	sup      *supervisor
 	queue    *Queue
-	prefix   string
 	disabled bool
+
+	// mu guards current — swaps happen behind it (under BlockNew/Resume
+	// at the queue layer, but mu makes the swap itself atomic).
+	mu      sync.RWMutex
+	current provider.Provider
 }
 
-// New constructs a Service. If cfg.EmbeddingsEnabled is false it returns a
-// disabled Service that reports ErrDisabled on every Embed* call but can
-// still be Stop()-ed cleanly. Otherwise it resolves the GGUF path (env →
-// cache → HF download), then starts the llama-server supervisor and blocks
-// until the readiness probe succeeds.
+// New constructs a Service. If cfg.EmbeddingsEnabled is false it returns
+// a disabled Service. Otherwise it builds the configured provider
+// (ollama only for now — the persisted-provider machinery lands in a
+// later phase) and blocks until Start succeeds.
 //
-// ctx governs startup only. It is NOT stored on the Service — Stop has its
-// own context so shutdown can be bounded independently of startup.
+// ctx governs startup only; Stop has its own context.
 func New(ctx context.Context, cfg *config.Config, logger *slog.Logger) (*Service, error) {
 	if logger == nil {
 		logger = slog.Default()
@@ -50,42 +63,55 @@ func New(ctx context.Context, cfg *config.Config, logger *slog.Logger) (*Service
 		return &Service{cfg: cfg, logger: logger, disabled: true}, nil
 	}
 
-	ggufPath, err := resolveGGUFPath(ctx, cfg, logger)
+	prov, err := buildOllamaFromConfig(cfg, logger)
 	if err != nil {
-		return nil, fmt.Errorf("resolve gguf: %w", err)
+		return nil, fmt.Errorf("build ollama provider: %w", err)
 	}
-
-	supCfg := supervisorConfig{
-		BinDir:     cfg.LlamaBinDir,
-		GGUFPath:   ggufPath,
-		SocketPath: cfg.LlamaSocketPath,
-		Transport:  cfg.LlamaTransport,
-		CtxSize:    cfg.LlamaCtxSize,
-		NGpuLayers: cfg.LlamaNGpuLayers,
-		NThreads:   cfg.LlamaNThreads,
-		BatchSize:  cfg.LlamaBatchSize,
-		StartupSec: cfg.LlamaStartupSec,
-		Model:      cfg.EmbeddingModel,
-	}
-
-	sup, err := newSupervisor(ctx, supCfg, logger)
-	if err != nil {
+	if err := prov.Start(ctx); err != nil {
 		return nil, err
 	}
 
 	return &Service{
-		cfg:    cfg,
-		logger: logger,
-		sup:    sup,
-		queue:  NewQueue(cfg.MaxEmbeddingConcurrency, time.Duration(cfg.EmbeddingQueueTimeout)*time.Second),
-		prefix: ResolveQueryPrefix(cfg.EmbeddingModel),
+		cfg:     cfg,
+		logger:  logger,
+		queue:   NewQueue(cfg.MaxEmbeddingConcurrency, time.Duration(cfg.EmbeddingQueueTimeout)*time.Second),
+		current: prov,
 	}, nil
 }
 
-// Config returns the *config.Config the service was constructed with. The
-// pointer is shared; callers that mutate it in place must understand they
-// are racing the supervisor — only the dashboard restart path is supposed
-// to do this, and it does so behind queue.BlockNew + sup.Restart.
+// buildOllamaFromConfig assembles an ollama.Provider out of the env-
+// derived *config.Config. Bridges the legacy bootstrap path until the
+// provider config persists into runtime_settings (Phase 6).
+func buildOllamaFromConfig(cfg *config.Config, logger *slog.Logger) (provider.Provider, error) {
+	c := ollama.Config{
+		Model:         cfg.EmbeddingModel,
+		GGUFPath:      cfg.GGUFPath,
+		CacheDir:      cfg.GGUFCacheDir,
+		BootstrapPath: cfg.BootstrapGGUFPath,
+		BinDir:        cfg.LlamaBinDir,
+		SocketPath:    cfg.LlamaSocketPath,
+		Transport:     cfg.LlamaTransport,
+		CtxSize:       cfg.LlamaCtxSize,
+		NGpuLayers:    cfg.LlamaNGpuLayers,
+		NThreads:      cfg.LlamaNThreads,
+		BatchSize:     cfg.LlamaBatchSize,
+		StartupSec:    cfg.LlamaStartupSec,
+	}
+	b, err := json.Marshal(c)
+	if err != nil {
+		return nil, fmt.Errorf("marshal ollama config: %w", err)
+	}
+	return provider.Build(context.Background(), provider.KindOllama, b, envSecrets, logger)
+}
+
+// envSecrets resolves env-var names via os.LookupEnv. Production
+// SecretLookup; tests pass their own to avoid touching the process
+// environment.
+func envSecrets(name string) (string, bool) {
+	return os.LookupEnv(name)
+}
+
+// Config returns the *config.Config the service was constructed with.
 func (s *Service) Config() *config.Config {
 	if s == nil {
 		return nil
@@ -94,66 +120,100 @@ func (s *Service) Config() *config.Config {
 }
 
 // CacheDirFromService returns the GGUF cache directory the dashboard's
-// /admin/models handler should walk. Returns "" when the EmbeddingsQuerier
-// isn't a *Service (test fakes) or when the service is disabled.
+// /admin/models handler should walk. Returns "" when the
+// EmbeddingsQuerier isn't a *Service whose active provider is ollama
+// (e.g. test fakes, openai/voyage active).
 func CacheDirFromService(q any) string {
 	s, ok := q.(*Service)
-	if !ok || s == nil || s.cfg == nil {
+	if !ok || s == nil {
+		return ""
+	}
+	s.mu.RLock()
+	cur := s.current
+	s.mu.RUnlock()
+	if cur == nil {
 		return ""
 	}
-	return s.cfg.GGUFCacheDir
+	ol, ok := cur.(*ollama.Provider)
+	if !ok {
+		return ""
+	}
+	return ol.CacheDir()
 }
 
-// Stop tears the supervisor down within the ctx deadline. Safe to call on a
-// disabled or partially-initialised Service.
+// Stop tears the current provider down within ctx. Safe on a disabled
+// or never-started Service.
 func (s *Service) Stop(ctx context.Context) error {
-	if s == nil || s.disabled || s.sup == nil {
+	if s == nil || s.disabled {
+		return nil
+	}
+	s.mu.RLock()
+	cur := s.current
+	s.mu.RUnlock()
+	if cur == nil {
 		return nil
 	}
-	return s.sup.Stop(ctx)
+	return cur.Stop(ctx)
 }
 
-// Status returns a snapshot of the sidecar process state for the dashboard.
-// Returns SupervisorStatus{State: "disabled"} when the service was started
-// with embeddings turned off — the dashboard renders a banner in that case
-// and disables the runtime-config save buttons.
-func (s *Service) Status() SupervisorStatus {
+// Status returns a snapshot for the dashboard. State="disabled" when
+// embeddings were turned off at boot.
+func (s *Service) Status() provider.Status {
 	if s == nil || s.disabled {
-		return SupervisorStatus{State: "disabled"}
+		return provider.Status{State: provider.StateDisabled}
 	}
-	if s.sup == nil {
-		return SupervisorStatus{State: "failed", LastError: "supervisor not initialised"}
+	s.mu.RLock()
+	cur := s.current
+	s.mu.RUnlock()
+	if cur == nil {
+		return provider.Status{State: provider.StateFailed, LastError: "provider not initialised"}
 	}
-	st := s.sup.Status()
+	st := cur.Status()
 	if s.queue != nil {
-		// Annotate with in-flight count so the UI can show "draining (N)"
-		// during a restart cycle.
 		st.InFlight = s.queue.InFlight()
 	}
 	return st
 }
 
-// Restart drains the embedding queue, stops the current sidecar child, and
-// spawns a new one with the new config. cfg is the freshly-resolved
-// runtimecfg-on-top-of-env Config snapshot — Restart does not consult any
-// stored boot config.
-//
-// On success, the new sidecar is ready to serve embeddings before this
-// returns. On failure, the supervisor enters the "failed" state and the
-// queue is reopened (so callers get the existing ErrSupervisor / ErrBusy
-// rather than a permanent block).
+// CurrentKind reports the kind of the active provider, or "" when
+// disabled / not yet built. Used by /status and admin endpoints.
+func (s *Service) CurrentKind() string {
+	if s == nil || s.disabled {
+		return ""
+	}
+	s.mu.RLock()
+	cur := s.current
+	s.mu.RUnlock()
+	if cur == nil {
+		return ""
+	}
+	return cur.Kind()
+}
+
+// EmbeddingModel returns the active provider's fingerprint ID(). Used
+// by repojobs to detect drift against projects.indexed_with_model.
+func (s *Service) EmbeddingModel() string {
+	if s == nil || s.disabled {
+		return ""
+	}
+	s.mu.RLock()
+	cur := s.current
+	s.mu.RUnlock()
+	if cur == nil {
+		return ""
+	}
+	return cur.ID()
+}
+
+// Restart preserves the legacy admin /sidecar/restart contract: drain
+// the queue, swap in a freshly-built provider with the supplied cfg,
+// Start it. Currently only supports the ollama provider; openai/voyage
+// callers use SwitchProvider directly.
 func (s *Service) Restart(ctx context.Context, cfg *config.Config) error {
 	if s == nil || s.disabled {
 		return ErrDisabled
 	}
-	if s.sup == nil {
-		return ErrSupervisor
-	}
 
-	// Drain: refuse new acquires, then wait for in-flight to settle. 30s
-	// matches the documented restart UX in the dashboard plan; longer values
-	// would let a stuck embedding call block the operator's intentional
-	// restart indefinitely.
 	s.queue.BlockNew()
 	defer s.queue.Resume()
 	drainCtx, drainCancel := context.WithTimeout(ctx, 30*time.Second)
@@ -166,411 +226,131 @@ func (s *Service) Restart(ctx context.Context, cfg *config.Config) error {
 		drainCancel()
 	}
 
-	// Resolve the (possibly new) GGUF path before tearing down the current
-	// child — if resolution fails, we stay on the running sidecar instead of
-	// crashing it for a config we can't honour.
-	ggufPath, err := resolveGGUFPath(ctx, cfg, s.logger)
-	if err != nil {
-		return fmt.Errorf("resolve gguf for restart: %w", err)
-	}
-
-	// Update queue concurrency / prefix to match the new model. The buffered
-	// slot channel can't be resized in place; we swap the queue, but only
-	// AFTER drain so no caller is mid-Acquire/Release on the old channel.
 	if cfg.MaxEmbeddingConcurrency != cap(s.queue.slots) {
 		s.queue = NewQueue(cfg.MaxEmbeddingConcurrency, time.Duration(cfg.EmbeddingQueueTimeout)*time.Second)
-		// New queue starts unblocked; that's fine because we hold the
-		// *previous* queue's blocked state via deferred Resume. The previous
-		// queue is now garbage and won't see any callers.
 	}
-	s.prefix = ResolveQueryPrefix(cfg.EmbeddingModel)
 
-	supCfg := supervisorConfig{
-		BinDir:     cfg.LlamaBinDir,
-		GGUFPath:   ggufPath,
-		SocketPath: cfg.LlamaSocketPath,
-		Transport:  cfg.LlamaTransport,
-		CtxSize:    cfg.LlamaCtxSize,
-		NGpuLayers: cfg.LlamaNGpuLayers,
-		NThreads:   cfg.LlamaNThreads,
-		BatchSize:  cfg.LlamaBatchSize,
-		StartupSec: cfg.LlamaStartupSec,
-		Model:      cfg.EmbeddingModel,
-	}
-	return s.sup.Restart(ctx, supCfg)
+	newProv, err := buildOllamaFromConfig(cfg, s.logger)
+	if err != nil {
+		return fmt.Errorf("rebuild ollama provider: %w", err)
+	}
+
+	// Stop the old, start the new. On Start failure leave current==nil
+	// so subsequent calls fail fast with ErrSupervisor — the operator
+	// then re-Restart with corrected config.
+	s.mu.Lock()
+	old := s.current
+	s.current = nil
+	s.mu.Unlock()
+	if old != nil {
+		stopCtx, stopCancel := context.WithTimeout(ctx, 30*time.Second)
+		_ = old.Stop(stopCtx)
+		stopCancel()
+	}
+	if err := newProv.Start(ctx); err != nil {
+		s.logger.Error("embeddings: restart Start failed; provider remains down", "err", err)
+		return err
+	}
+	s.mu.Lock()
+	s.current = newProv
+	s.cfg = cfg
+	s.mu.Unlock()
+	return nil
 }
 
-// Ready reports whether the embeddings pipeline is currently able to serve a
-// request. Returns nil when the model is loaded and the supervisor is healthy,
-// ErrDisabled when embeddings are turned off, or ErrSupervisor/ErrNotReady
-// when the sidecar has died or is still warming up. m5 — /api/v1/status uses
-// this to populate model_loaded rather than hard-coding `true`.
+// Ready reports whether the embeddings pipeline can serve a request.
 func (s *Service) Ready(ctx context.Context) error {
 	if s == nil || s.disabled {
 		return ErrDisabled
 	}
-	if s.sup == nil {
+	s.mu.RLock()
+	cur := s.current
+	s.mu.RUnlock()
+	if cur == nil {
 		return ErrSupervisor
 	}
-	if s.sup.dead.Load() {
+	err := cur.Ready(ctx)
+	if errors.Is(err, provider.ErrUnrecoverable) {
 		return ErrSupervisor
 	}
-	return s.sup.Ready(ctx)
+	if errors.Is(err, provider.ErrNotReady) {
+		return ErrNotReady
+	}
+	return err
 }
 
-// EmbedQuery prepends the model's asymmetric-retrieval prefix and returns a
-// single vector. Mirrors Python `embed_query`.
+// EmbedQuery delegates to the active provider after acquiring a queue
+// slot. The provider applies its own query-side transform (ollama
+// prefix, voyage input_type=query, openai pass-through).
 func (s *Service) EmbedQuery(ctx context.Context, query string) ([]float32, error) {
-	if s.disabled {
+	if s == nil || s.disabled {
 		return nil, ErrDisabled
 	}
-	text := s.prefix + query
-	vecs, err := s.embedBatch(ctx, []string{text})
+	cur, err := s.acquireProvider(ctx)
 	if err != nil {
 		return nil, err
 	}
-	return vecs[0], nil
+	slotStart := time.Now()
+	defer s.queue.Release(slotStart)
+	return cur.EmbedQuery(ctx, query)
 }
 
-// EmbedTexts embeds passages unchanged (no prefix). Mirrors Python
-// `embed_texts`. Returned vectors follow input order.
+// EmbedTexts embeds passages unchanged.
 func (s *Service) EmbedTexts(ctx context.Context, texts []string) ([][]float32, error) {
-	if s.disabled {
+	if s == nil || s.disabled {
 		return nil, ErrDisabled
 	}
-	return s.embedBatch(ctx, texts)
-}
-
-// embedBatch is the shared path used by both EmbedQuery and EmbedTexts. It
-// acquires a queue slot, waits for the supervisor to be ready, and issues the
-// HTTP call. Prefix logic stays in the callers so the queue accounting is
-// identical regardless of whether the caller was a query or a passage batch.
-func (s *Service) embedBatch(ctx context.Context, texts []string) ([][]float32, error) {
-	if s.sup.dead.Load() {
-		return nil, ErrSupervisor
-	}
 	if len(texts) == 0 {
 		return nil, nil
 	}
-
-	// Block on queue slot first — this is the backpressure surface that maps
-	// to HTTP 503 + Retry-After.
-	slotStart := time.Now()
-	if err := s.queue.Acquire(ctx); err != nil {
+	cur, err := s.acquireProvider(ctx)
+	if err != nil {
 		return nil, err
 	}
+	slotStart := time.Now()
 	defer s.queue.Release(slotStart)
-
-	// Make sure the child process finished its (re)start before issuing the
-	// call. For a healthy steady-state Service this is a no-op.
-	readyCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
-	err := s.sup.Ready(readyCtx)
-	cancel()
-	if err != nil {
-		if errors.Is(err, ErrSupervisor) {
-			return nil, ErrSupervisor
-		}
-		return nil, fmt.Errorf("wait ready: %w", err)
-	}
-
-	return s.sup.client.Embeddings(ctx, texts)
+	return cur.EmbedDocuments(ctx, texts)
 }
 
-// TokenizeAndEmbed is the token-aware embedding pipeline. For each text it:
-//  1. Calls /tokenize to get token IDs (CLS + content + SEP).
-//  2. Splits sequences longer than cfg.LlamaCtxSize at token boundaries,
-//     preserving CLS/SEP on each window.
-//  3. Embeds all sequences in a single /v1/embeddings call using pre-tokenized
-//     IDs — no re-tokenization happens inside the model server.
-//  4. Averages sub-window vectors back to one vector per original text.
-//
-// The entire operation holds one queue slot so back-pressure accounting matches
-// EmbedTexts. Returns ErrDisabled / ErrSupervisor / ErrBusy on the same
-// conditions as EmbedTexts.
+// TokenizeAndEmbed runs the token-aware embedding pipeline. For
+// providers that don't support native tokenization
+// (SupportsTokenize() == false) this is identical to EmbedTexts —
+// callers must chunk inputs themselves before reaching here.
 func (s *Service) TokenizeAndEmbed(ctx context.Context, texts []string) ([][]float32, error) {
-	if s.disabled {
+	if s == nil || s.disabled {
 		return nil, ErrDisabled
 	}
-	if s.sup.dead.Load() {
-		return nil, ErrSupervisor
-	}
 	if len(texts) == 0 {
 		return nil, nil
 	}
-
-	slotStart := time.Now()
-	if err := s.queue.Acquire(ctx); err != nil {
+	cur, err := s.acquireProvider(ctx)
+	if err != nil {
 		return nil, err
 	}
+	slotStart := time.Now()
 	defer s.queue.Release(slotStart)
-
-	readyCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
-	err := s.sup.Ready(readyCtx)
-	cancel()
-	if err != nil {
-		if errors.Is(err, ErrSupervisor) {
-			return nil, ErrSupervisor
-		}
-		return nil, fmt.Errorf("wait ready: %w", err)
+	if cur.SupportsTokenize() {
+		return cur.TokenizeAndEmbed(ctx, texts)
 	}
-
-	maxTokens := s.cfg.LlamaCtxSize
-
-	// Phase 1: tokenize each text. Accumulate flat sequences slice and a
-	// span table that records which flat sequences belong to each text.
-	type span struct{ start, length int }
-	spans := make([]span, len(texts))
-	var sequences [][]int
-
-	for i, text := range texts {
-		ids, err := s.sup.client.Tokenize(ctx, text)
-		if err != nil {
-			return nil, fmt.Errorf("tokenize text[%d]: %w", i, err)
-		}
-
-		if len(ids) == 0 {
-			// Empty result: placeholder — embed will return a zero vector.
-			spans[i] = span{start: len(sequences), length: 1}
-			sequences = append(sequences, []int{})
-			continue
-		}
-
-		if len(ids) <= maxTokens {
-			spans[i] = span{start: len(sequences), length: 1}
-			sequences = append(sequences, ids)
-			continue
-		}
-
-		// Sequence exceeds context window — split at token boundaries.
-		// ids[0] is CLS, ids[len-1] is SEP (add_special=true).
-		cls := ids[0]
-		sep := ids[len(ids)-1]
-		content := ids[1 : len(ids)-1]
-		windowSize := maxTokens - 2 // reserve 2 slots for CLS + SEP
-
-		spanStart := len(sequences)
-		for start := 0; start < len(content); start += windowSize {
-			end := start + windowSize
-			if end > len(content) {
-				end = len(content)
-			}
-			window := make([]int, 0, end-start+2)
-			window = append(window, cls)
-			window = append(window, content[start:end]...)
-			window = append(window, sep)
-			sequences = append(sequences, window)
-		}
-		spans[i] = span{start: spanStart, length: len(sequences) - spanStart}
-	}
-
-	// Phase 2: single batch embed call with all pre-tokenized sequences.
-	allVecs, err := s.sup.client.EmbedBatchTokenIDs(ctx, sequences)
-	if err != nil {
-		return nil, err
-	}
-
-	// Phase 3: re-assemble — average sub-window vectors for split texts.
-	result := make([][]float32, len(texts))
-	for i, sp := range spans {
-		if sp.length == 1 {
-			result[i] = allVecs[sp.start]
-			continue
-		}
-		// Average sp.length vectors element-wise.
-		dim := len(allVecs[sp.start])
-		avg := make([]float32, dim)
-		for k := 0; k < sp.length; k++ {
-			v := allVecs[sp.start+k]
-			for d := range avg {
-				avg[d] += v[d]
-			}
-		}
-		n := float32(sp.length)
-		for d := range avg {
-			avg[d] /= n
-		}
-		result[i] = avg
-	}
-	return result, nil
+	return cur.EmbedDocuments(ctx, texts)
 }
 
-// embedRaw skips the queue *and* the prefix logic. It exists as a test helper
-// for the parity gate: the reference file stores the exact text that was fed
-// to the model, so the gate must not re-apply the prefix. This method is
-// deliberately lowercase (package-private) — production handlers must go
-// through EmbedQuery / EmbedTexts.
-func (s *Service) embedRaw(ctx context.Context, texts []string) ([][]float32, error) {
-	if s.disabled {
-		return nil, ErrDisabled
+// acquireProvider acquires a queue slot and returns the active
+// provider snapshot. Caller is responsible for queue.Release once the
+// call returns (deferred at call site so the slot is released even on
+// provider error).
+func (s *Service) acquireProvider(ctx context.Context) (provider.Provider, error) {
+	if err := s.queue.Acquire(ctx); err != nil {
+		return nil, err
 	}
-	if s.sup.dead.Load() {
+	s.mu.RLock()
+	cur := s.current
+	s.mu.RUnlock()
+	if cur == nil {
+		// We hold the slot but have nothing to call — release it before
+		// returning the error so subsequent callers aren't starved.
+		s.queue.Release(time.Now())
 		return nil, ErrSupervisor
 	}
-	if len(texts) == 0 {
-		return nil, nil
-	}
-	return s.sup.client.Embeddings(ctx, texts)
-}
-
-// resolveGGUFPath walks the precedence chain:
-//  1. CIX_GGUF_PATH (absolute path env override, validated by Stat).
-//  2. cfg.EmbeddingModel as absolute path — when the dashboard's "Local
-//     path" mode wrote it through to the runtime_settings row.
-//  3. Cached file under cfg.GGUFCacheDir/<safe-repo>/*.gguf when
-//     cfg.EmbeddingModel is an HF repo ID.
-//  4. CIX_BOOTSTRAP_GGUF_PATH one-shot import — copies the file into
-//     the cache layout, then behaves like step 3 forever after.
-//  5. HuggingFace download into the same cix cache (this is the path
-//     that actually writes to disk).
-//
-// PR-E removed the implicit `bench/results/reference_gguf_path.txt` dev
-// fallback that used to short-circuit step 2 — operators must now make
-// the choice explicitly via env or the dashboard. Only step 5 is
-// expensive; all others are stat-only or one-time copies.
-func resolveGGUFPath(ctx context.Context, cfg *config.Config, logger *slog.Logger) (string, error) {
-	if cfg.GGUFPath != "" {
-		if _, err := os.Stat(cfg.GGUFPath); err != nil {
-			return "", fmt.Errorf("CIX_GGUF_PATH=%s: %w", cfg.GGUFPath, err)
-		}
-		return cfg.GGUFPath, nil
-	}
-	// PR-E — the dashboard's "Local path" mode writes an absolute path into
-	// embedding_model. Treat it as such instead of trying to interpret it
-	// as an HF repo id (which would fail the slash check or, worse, send
-	// the path to api.huggingface.co).
-	if filepath.IsAbs(cfg.EmbeddingModel) {
-		if _, err := os.Stat(cfg.EmbeddingModel); err != nil {
-			return "", fmt.Errorf("embedding model path %s: %w", cfg.EmbeddingModel, err)
-		}
-		return cfg.EmbeddingModel, nil
-	}
-	// HF repo ids look like "<owner>/<repo>" — exactly one slash, no leading "/".
-	if !strings.Contains(cfg.EmbeddingModel, "/") {
-		return "", fmt.Errorf("embedding model %q is neither an absolute path nor an HF repo id (owner/repo)", cfg.EmbeddingModel)
-	}
-
-	// Cache-hit short-circuit: if we already downloaded a .gguf from this repo
-	// into the cache, use it — HF downloader would do the same stat first,
-	// but doing it here keeps the service silent in the happy path.
-	if cached := findCachedGGUF(cfg.GGUFCacheDir, cfg.EmbeddingModel); cached != "" {
-		logger.Info("using cached gguf", "path", cached)
-		return cached, nil
-	}
-
-	// CIX_BOOTSTRAP_GGUF_PATH — one-time import path. Used so a fresh
-	// container with a freshly-mounted cache volume doesn't have to
-	// re-download a 280 MB GGUF the operator already has on disk. Once
-	// the file lands in the cache layout, the next boot satisfies the
-	// findCachedGGUF branch above and the bootstrap path is never read
-	// again (idempotent — repeated boots with the same env are no-ops).
-	if cfg.BootstrapGGUFPath != "" {
-		imported, err := importBootstrapGGUF(cfg.GGUFCacheDir, cfg.EmbeddingModel, cfg.BootstrapGGUFPath, logger)
-		if err != nil {
-			logger.Warn("bootstrap gguf import failed; falling through to HF download",
-				"src", cfg.BootstrapGGUFPath, "err", err)
-		} else if imported != "" {
-			return imported, nil
-		}
-	}
-
-	return DownloadGGUF(ctx, cfg.EmbeddingModel, cfg.GGUFCacheDir, logger)
-}
-
-// importBootstrapGGUF copies srcPath into <cacheDir>/<safe_repo>/<basename>
-// atomically (write to .partial, fsync, rename). Returns the final path
-// on success, "" if the source is missing (caller falls through to HF
-// download), or an error for IO problems we should surface to the operator.
-//
-// safe_repo derived from the HF repo id (`owner/repo` → `owner__repo`)
-// to match DownloadGGUF's layout exactly — so subsequent boots' cache
-// scan finds the imported file under the same name HF would have used.
-func importBootstrapGGUF(cacheDir, repo, srcPath string, logger *slog.Logger) (string, error) {
-	if cacheDir == "" || repo == "" {
-		return "", nil
-	}
-	srcInfo, err := os.Stat(srcPath)
-	if err != nil {
-		// Missing file is not a hard error — the operator may have set
-		// the env optimistically with a path that lives on a host they
-		// haven't mounted yet. Let the caller fall through to download.
-		if os.IsNotExist(err) {
-			return "", nil
-		}
-		return "", fmt.Errorf("stat bootstrap gguf %s: %w", srcPath, err)
-	}
-	if srcInfo.IsDir() {
-		return "", fmt.Errorf("bootstrap gguf %s is a directory, expected file", srcPath)
-	}
-
-	safeRepo := strings.ReplaceAll(repo, "/", "__")
-	targetDir := filepath.Join(cacheDir, safeRepo)
-	if err := os.MkdirAll(targetDir, 0o755); err != nil {
-		return "", fmt.Errorf("mkdir cache dir: %w", err)
-	}
-	finalPath := filepath.Join(targetDir, filepath.Base(srcPath))
-
-	// Idempotency: if a previous boot already imported the same file,
-	// trust it — re-importing would be wasted IO and could race with a
-	// concurrent boot of a sibling container against a shared volume.
-	if _, err := os.Stat(finalPath); err == nil {
-		return finalPath, nil
-	}
-
-	logger.Info("importing bootstrap gguf into cache",
-		"src", srcPath, "dst", finalPath, "size", srcInfo.Size())
-
-	src, err := os.Open(srcPath)
-	if err != nil {
-		return "", fmt.Errorf("open bootstrap gguf: %w", err)
-	}
-	defer src.Close()
-
-	partial := finalPath + ".partial"
-	dst, err := os.OpenFile(partial, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o644)
-	if err != nil {
-		return "", fmt.Errorf("create cache target: %w", err)
-	}
-
-	if _, err := io.Copy(dst, src); err != nil {
-		_ = dst.Close()
-		_ = os.Remove(partial)
-		return "", fmt.Errorf("copy bootstrap gguf: %w", err)
-	}
-	if err := dst.Sync(); err != nil {
-		_ = dst.Close()
-		_ = os.Remove(partial)
-		return "", fmt.Errorf("fsync bootstrap gguf: %w", err)
-	}
-	if err := dst.Close(); err != nil {
-		_ = os.Remove(partial)
-		return "", fmt.Errorf("close bootstrap gguf: %w", err)
-	}
-	if err := os.Rename(partial, finalPath); err != nil {
-		_ = os.Remove(partial)
-		return "", fmt.Errorf("atomic rename bootstrap gguf: %w", err)
-	}
-	logger.Info("bootstrap gguf imported", "path", finalPath)
-	return finalPath, nil
-}
-
-// findCachedGGUF looks for a previously-downloaded .gguf under the standard
-// cache layout produced by DownloadGGUF. Returns "" on any miss (including
-// IO errors) so the caller proceeds to the download path.
-func findCachedGGUF(cacheDir, repo string) string {
-	safeRepo := strings.ReplaceAll(repo, "/", "__")
-	dir := cacheDir + string(os.PathSeparator) + safeRepo
-	entries, err := os.ReadDir(dir)
-	if err != nil {
-		return ""
-	}
-	for _, e := range entries {
-		if e.IsDir() {
-			continue
-		}
-		name := e.Name()
-		if len(name) > 5 && strings.EqualFold(name[len(name)-5:], ".gguf") {
-			return dir + string(os.PathSeparator) + name
-		}
-	}
-	return ""
+	return cur, nil
 }
diff --git a/server/internal/httpapi/admin_server.go b/server/internal/httpapi/admin_server.go
index b579bc3..2b38384 100644
--- a/server/internal/httpapi/admin_server.go
+++ b/server/internal/httpapi/admin_server.go
@@ -263,15 +263,15 @@ func (s *Server) GetSidecarStatus(w http.ResponseWriter, r *http.Request) {
 
 	body := map[string]any{
 		"state":             st.State,
-		"ready":             st.Ready,
+		"ready":             st.State == "running",
 		"in_flight":         st.InFlight,
 		"restart_in_flight": restartInFlight.Load(),
 	}
 	if st.PID > 0 {
 		body["pid"] = st.PID
 	}
-	if st.Uptime > 0 {
-		body["uptime_seconds"] = int(st.Uptime.Seconds())
+	if st.UptimeSeconds > 0 {
+		body["uptime_seconds"] = st.UptimeSeconds
 	}
 	if st.Model != "" {
 		body["model"] = st.Model

From 5621461dc064630113713dc602682ed8733a3b18 Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Tue, 26 May 2026 13:35:14 +0100
Subject: [PATCH 02/34] feat(embeddings): admin switch endpoint +
 provider-aware status
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add migration 12 to extend runtime_settings with two columns:
embedding_provider (kind selector) and embedding_provider_config
(per-provider JSON blob). Idempotent ALTER TABLE; API keys are not
persisted — providers read them live from env vars named in the
blob.

Add internal/embeddingscfg, a thin persistence layer over the two
new columns. Wire it into main.go's boot path: read the persisted
selection, seed the row from env-derived ollama defaults on fresh
installs, then hand the resolved Provider to embeddings.Service via
the new NewWithProvider constructor.

Add embeddings.Service.SwitchProvider — build new + Start + drain
queue + atomic swap + background Stop of the old. Used by the new
admin endpoints:

  GET  /api/v1/admin/embedding-providers          list kinds + schemas + env-key readiness
  GET  /api/v1/admin/embedding-providers/active   current kind + persisted config
  PUT  /api/v1/admin/embedding-providers/active   atomic switch
  POST /api/v1/admin/embedding-providers/{kind}/test pre-save sanity check

Extend /api/v1/status with embedding_provider and
embedding_provider_manages_process so the dashboard footer can
render the provider name as the label and a permanent green dot
for HTTP-only providers that have no managed process to die.

Behaviour unchanged for existing ollama installs.
---
 server/cmd/cix-server/main.go                 |  46 +++-
 server/internal/db/db.go                      |  43 ++++
 server/internal/db/schema.go                  |  11 +-
 server/internal/embeddings/service.go         | 106 +++++++-
 .../internal/embeddingscfg/embeddingscfg.go   | 118 +++++++++
 server/internal/httpapi/admin_embeddings.go   | 241 ++++++++++++++++++
 server/internal/httpapi/router.go             |  15 ++
 server/internal/httpapi/server.go             |  54 ++--
 8 files changed, 609 insertions(+), 25 deletions(-)
 create mode 100644 server/internal/embeddingscfg/embeddingscfg.go
 create mode 100644 server/internal/httpapi/admin_embeddings.go

diff --git a/server/cmd/cix-server/main.go b/server/cmd/cix-server/main.go
index c0b9697..06c0f40 100644
--- a/server/cmd/cix-server/main.go
+++ b/server/cmd/cix-server/main.go
@@ -21,6 +21,8 @@ import (
 	"github.com/dvcdsys/code-index/server/internal/config"
 	"github.com/dvcdsys/code-index/server/internal/db"
 	"github.com/dvcdsys/code-index/server/internal/embeddings"
+	"github.com/dvcdsys/code-index/server/internal/embeddings/provider"
+	"github.com/dvcdsys/code-index/server/internal/embeddingscfg"
 	"github.com/dvcdsys/code-index/server/internal/githubapi"
 	"github.com/dvcdsys/code-index/server/internal/githubtokens"
 	"github.com/dvcdsys/code-index/server/internal/groups"
@@ -169,7 +171,48 @@ func run() error {
 	// window for the HF download path on cold cache.
 	startupCtx, startupCancel := context.WithTimeout(context.Background(),
 		time.Duration(cfg.LlamaStartupSec)*time.Second+30*time.Second)
-	embedSvc, err := embeddings.New(startupCtx, cfg, logger)
+
+	// Bootstrap pluggable-provider selection. If the runtime_settings row
+	// has no embedding_provider yet (fresh install or pre-migration DB),
+	// seed it with the env-derived ollama config and use that for boot.
+	// Otherwise the persisted blob is authoritative — env vars (other
+	// than API-key envs that providers read live) are ignored.
+	embedCfgStore := embeddingscfg.New(database)
+	persistedProv, hasProv, err := embedCfgStore.Get(context.Background())
+	if err != nil {
+		startupCancel()
+		return fmt.Errorf("load embedding provider config: %w", err)
+	}
+	if !hasProv && cfg.EmbeddingsEnabled {
+		seed, serr := embeddings.BuildOllamaConfigFromEnv(cfg)
+		if serr != nil {
+			startupCancel()
+			return fmt.Errorf("seed embedding provider config: %w", serr)
+		}
+		if serr := embedCfgStore.Save(context.Background(),
+			embeddingscfg.Snapshot{Kind: "ollama", Config: seed}, ""); serr != nil {
+			logger.Warn("could not seed embedding provider config (continuing on env)", "err", serr)
+		}
+		persistedProv = embeddingscfg.Snapshot{Kind: "ollama", Config: seed}
+		hasProv = true
+	}
+
+	var embedSvc *embeddings.Service
+	if !cfg.EmbeddingsEnabled || !hasProv {
+		// Legacy / disabled path — fall back to env-only ollama wiring.
+		embedSvc, err = embeddings.New(startupCtx, cfg, logger)
+	} else {
+		prov, perr := provider.Build(startupCtx, persistedProv.Kind, persistedProv.Config, embeddings.EnvSecrets(), logger)
+		if perr != nil {
+			startupCancel()
+			return fmt.Errorf("build %s provider: %w", persistedProv.Kind, perr)
+		}
+		if perr := prov.Start(startupCtx); perr != nil {
+			startupCancel()
+			return fmt.Errorf("start %s provider: %w", persistedProv.Kind, perr)
+		}
+		embedSvc = embeddings.NewWithProvider(cfg, prov, logger)
+	}
 	startupCancel()
 	if err != nil {
 		return fmt.Errorf("embeddings: %w", err)
@@ -408,6 +451,7 @@ func run() error {
 		VectorStore:       vs,
 		Indexer:           idx,
 		RuntimeCfg:        rcfg,
+		EmbeddingsCfg:     embedCfgStore,
 		VersionCheck:      vcSvc,
 		Workspaces:        wsSvc,
 		GithubTokens:      ghSvc,
diff --git a/server/internal/db/db.go b/server/internal/db/db.go
index 3e2bd8a..ac443a2 100644
--- a/server/internal/db/db.go
+++ b/server/internal/db/db.go
@@ -64,6 +64,7 @@ var registeredMigrations = []migration{
 	{9, "git_repos_polling", func(db *sql.DB, _ OpenOptions) error { return migrateGitReposPolling(db) }},
 	{10, "auth_groups_ownership", func(db *sql.DB, _ OpenOptions) error { return migrateAuthGroupsOwnership(db) }},
 	{11, "project_machine_identity", func(db *sql.DB, _ OpenOptions) error { return migrateProjectMachineIdentity(db) }},
+	{12, "embedding_provider", func(db *sql.DB, _ OpenOptions) error { return migrateEmbeddingProvider(db) }},
 }
 
 // DriverName is the registered database/sql driver name for modernc.org/sqlite.
@@ -720,6 +721,48 @@ func migratePathHash(db *sql.DB) error {
 	return nil
 }
 
+// migrateEmbeddingProvider adds the pluggable-provider columns to
+// runtime_settings:
+//   - embedding_provider TEXT — kind selector ("ollama"/"openai"/"voyage")
+//   - embedding_provider_config TEXT — provider-specific JSON blob
+//
+// Rows stay NULL until the admin first persists a non-default provider;
+// boot logic in main.go then falls through to the env-derived ollama
+// defaults exactly as before. Idempotent — checked via PRAGMA
+// table_info, ALTER only on absence.
+func migrateEmbeddingProvider(db *sql.DB) error {
+	rows, err := db.Query(`PRAGMA table_info(runtime_settings)`)
+	if err != nil {
+		return fmt.Errorf("table_info runtime_settings: %w", err)
+	}
+	have := map[string]bool{}
+	for rows.Next() {
+		var (
+			cid         int
+			name, typ   string
+			notnull, pk int
+			dflt        sql.NullString
+		)
+		if err := rows.Scan(&cid, &name, &typ, &notnull, &dflt, &pk); err != nil {
+			rows.Close()
+			return err
+		}
+		have[name] = true
+	}
+	rows.Close()
+	if !have["embedding_provider"] {
+		if _, err := db.Exec(`ALTER TABLE runtime_settings ADD COLUMN embedding_provider TEXT`); err != nil {
+			return fmt.Errorf("add embedding_provider column: %w", err)
+		}
+	}
+	if !have["embedding_provider_config"] {
+		if _, err := db.Exec(`ALTER TABLE runtime_settings ADD COLUMN embedding_provider_config TEXT`); err != nil {
+			return fmt.Errorf("add embedding_provider_config column: %w", err)
+		}
+	}
+	return nil
+}
+
 // migrateIndexedWithModel adds projects.indexed_with_model to pre-PR-E
 // databases. Idempotent: PRAGMA table_info first; ALTER only if absent. Rows
 // stay NULL — the dashboard treats NULL as "indexed before drift tracking
diff --git a/server/internal/db/schema.go b/server/internal/db/schema.go
index cbe6646..791fd64 100644
--- a/server/internal/db/schema.go
+++ b/server/internal/db/schema.go
@@ -167,7 +167,16 @@ CREATE TABLE IF NOT EXISTS runtime_settings (
     max_embedding_concurrency INTEGER,
     llama_batch_size INTEGER,
     updated_at TEXT NOT NULL,
-    updated_by TEXT
+    updated_by TEXT,
+    -- Pluggable embedding provider (added in migration 12).
+    -- embedding_provider selects the active backend kind; if NULL the
+    -- server falls back to the env/recommended ollama defaults so old
+    -- installs stay on llama-server until the admin picks otherwise.
+    embedding_provider TEXT,
+    -- embedding_provider_config holds the provider-specific config as
+    -- a JSON blob (shape varies by provider). API keys are NEVER stored
+    -- here — providers read them live from env vars named in this blob.
+    embedding_provider_config TEXT
 );
 
 -- Workspaces group indexed projects (rows in the projects table,
diff --git a/server/internal/embeddings/service.go b/server/internal/embeddings/service.go
index 64288a2..27a5732 100644
--- a/server/internal/embeddings/service.go
+++ b/server/internal/embeddings/service.go
@@ -48,10 +48,10 @@ type Service struct {
 	current provider.Provider
 }
 
-// New constructs a Service. If cfg.EmbeddingsEnabled is false it returns
-// a disabled Service. Otherwise it builds the configured provider
-// (ollama only for now — the persisted-provider machinery lands in a
-// later phase) and blocks until Start succeeds.
+// New constructs a Service from the env-derived config. The legacy
+// entry point: builds an ollama provider with the env-supplied
+// defaults and blocks until Start succeeds. main.go uses NewWithBoot
+// to layer the DB-persisted provider selection on top of this.
 //
 // ctx governs startup only; Stop has its own context.
 func New(ctx context.Context, cfg *config.Config, logger *slog.Logger) (*Service, error) {
@@ -79,6 +79,104 @@ func New(ctx context.Context, cfg *config.Config, logger *slog.Logger) (*Service
 	}, nil
 }
 
+// NewWithProvider constructs a Service around an already-built
+// Provider. Used by main.go's boot path: it reads the persisted
+// provider snapshot, calls provider.Build, then hands the result to
+// this constructor. The Provider must already be Start()-ed.
+func NewWithProvider(cfg *config.Config, prov provider.Provider, logger *slog.Logger) *Service {
+	if logger == nil {
+		logger = slog.Default()
+	}
+	if !cfg.EmbeddingsEnabled {
+		return &Service{cfg: cfg, logger: logger, disabled: true}
+	}
+	return &Service{
+		cfg:     cfg,
+		logger:  logger,
+		queue:   NewQueue(cfg.MaxEmbeddingConcurrency, time.Duration(cfg.EmbeddingQueueTimeout)*time.Second),
+		current: prov,
+	}
+}
+
+// BuildOllamaConfigFromEnv produces the ollama provider config blob
+// derived from env (used by main.go to seed the persisted row on
+// first boot and by tests that want a "live env-default" snapshot).
+func BuildOllamaConfigFromEnv(cfg *config.Config) ([]byte, error) {
+	c := ollama.Config{
+		Model:         cfg.EmbeddingModel,
+		GGUFPath:      cfg.GGUFPath,
+		CacheDir:      cfg.GGUFCacheDir,
+		BootstrapPath: cfg.BootstrapGGUFPath,
+		BinDir:        cfg.LlamaBinDir,
+		SocketPath:    cfg.LlamaSocketPath,
+		Transport:     cfg.LlamaTransport,
+		CtxSize:       cfg.LlamaCtxSize,
+		NGpuLayers:    cfg.LlamaNGpuLayers,
+		NThreads:      cfg.LlamaNThreads,
+		BatchSize:     cfg.LlamaBatchSize,
+		StartupSec:    cfg.LlamaStartupSec,
+	}
+	return json.Marshal(c)
+}
+
+// EnvSecrets returns the production SecretLookup: os.LookupEnv. main.go
+// and the admin handlers pass it to provider.Build / Service.SwitchProvider.
+func EnvSecrets() provider.SecretLookup { return envSecrets }
+
+// SwitchProvider replaces the active provider. Steps:
+//  1. Build the new provider from kind + cfg.
+//  2. Start it (validates config / connectivity).
+//  3. Drain the queue (block new acquires, wait up to 30s).
+//  4. Swap current to new under the mutex.
+//  5. Stop the old provider on a separate goroutine so a slow SIGTERM
+//     does not hold the admin request.
+//
+// If step 2 fails, the old provider stays active and the error is
+// returned to the caller. If step 3 times out we proceed anyway,
+// favouring availability — in-flight calls finish on the old
+// provider and the new takes over for everything subsequent.
+func (s *Service) SwitchProvider(ctx context.Context, kind string, cfgBytes []byte) error {
+	if s == nil || s.disabled {
+		return ErrDisabled
+	}
+
+	newProv, err := provider.Build(ctx, kind, cfgBytes, envSecrets, s.logger)
+	if err != nil {
+		return fmt.Errorf("build %s provider: %w", kind, err)
+	}
+	if err := newProv.Start(ctx); err != nil {
+		return fmt.Errorf("start %s provider: %w", kind, err)
+	}
+
+	s.queue.BlockNew()
+	drainCtx, drainCancel := context.WithTimeout(ctx, 30*time.Second)
+	if derr := s.queue.WaitDrain(drainCtx); derr != nil {
+		s.logger.Warn("embeddings: drain timed out during switch; proceeding anyway",
+			"in_flight", s.queue.InFlight(), "err", derr,
+		)
+	}
+	drainCancel()
+	s.queue.Resume()
+
+	s.mu.Lock()
+	old := s.current
+	s.current = newProv
+	s.mu.Unlock()
+
+	if old != nil {
+		go func(p provider.Provider) {
+			stopCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+			defer cancel()
+			if err := p.Stop(stopCtx); err != nil {
+				s.logger.Warn("embeddings: old provider Stop returned error",
+					"kind", p.Kind(), "err", err)
+			}
+		}(old)
+	}
+	s.logger.Info("embeddings: switched provider", "kind", kind, "id", newProv.ID())
+	return nil
+}
+
 // buildOllamaFromConfig assembles an ollama.Provider out of the env-
 // derived *config.Config. Bridges the legacy bootstrap path until the
 // provider config persists into runtime_settings (Phase 6).
diff --git a/server/internal/embeddingscfg/embeddingscfg.go b/server/internal/embeddingscfg/embeddingscfg.go
new file mode 100644
index 0000000..0e869ef
--- /dev/null
+++ b/server/internal/embeddingscfg/embeddingscfg.go
@@ -0,0 +1,118 @@
+// Package embeddingscfg persists the pluggable-provider selection +
+// per-provider config blob in runtime_settings. It sits alongside
+// runtimecfg (which still owns the ollama-tuning subset for backward
+// compat) but exposes a separate Service so the admin endpoints have
+// a single clean surface for provider switching.
+//
+// Resolution flow at boot:
+//
+//	row.embedding_provider IS NULL  → seed from CIX_EMBEDDING_PROVIDER
+//	                                  (default "ollama") + env-derived
+//	                                  ollama config blob, write to DB.
+//	row.embedding_provider IS NOT NULL → load from DB verbatim. Env
+//	                                     vars (other than API-key envs
+//	                                     read live by providers) are
+//	                                     ignored — DB is authoritative.
+package embeddingscfg
+
+import (
+	"context"
+	"database/sql"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"time"
+)
+
+// Snapshot is what's currently persisted. Returned by Get; supplied
+// to Service.Save for atomic updates.
+type Snapshot struct {
+	Kind   string          // "ollama" | "openai" | "voyage"
+	Config json.RawMessage // provider-specific blob; opaque to this package
+}
+
+// Service is the persistence-only layer. Stateless aside from the
+// *sql.DB it wraps.
+type Service struct {
+	db *sql.DB
+}
+
+// New constructs a Service over the given *sql.DB.
+func New(db *sql.DB) *Service { return &Service{db: db} }
+
+// Get returns the persisted provider selection. (Snapshot{}, false, nil)
+// means the runtime_settings row has no provider yet — the caller
+// should seed it from env.
+func (s *Service) Get(ctx context.Context) (Snapshot, bool, error) {
+	var (
+		kind sql.NullString
+		cfg  sql.NullString
+	)
+	err := s.db.QueryRowContext(ctx, `
+		SELECT embedding_provider, embedding_provider_config
+		FROM runtime_settings WHERE id = 1
+	`).Scan(&kind, &cfg)
+	if errors.Is(err, sql.ErrNoRows) {
+		return Snapshot{}, false, nil
+	}
+	if err != nil {
+		return Snapshot{}, false, fmt.Errorf("select embedding_provider: %w", err)
+	}
+	if !kind.Valid || kind.String == "" {
+		return Snapshot{}, false, nil
+	}
+	snap := Snapshot{Kind: kind.String}
+	if cfg.Valid && cfg.String != "" {
+		snap.Config = json.RawMessage(cfg.String)
+	}
+	return snap, true, nil
+}
+
+// Save persists kind + config bytes. Inserts the row when absent
+// (CHECK(id=1) keeps it single-row). updated_by labels the actor for
+// audit; pass "" when the change is server-internal (bootstrap seed).
+func (s *Service) Save(ctx context.Context, snap Snapshot, updatedBy string) error {
+	if snap.Kind == "" {
+		return errors.New("embeddingscfg: kind is required")
+	}
+	if len(snap.Config) > 0 {
+		if !json.Valid(snap.Config) {
+			return errors.New("embeddingscfg: config is not valid JSON")
+		}
+	}
+	now := time.Now().UTC().Format(time.RFC3339Nano)
+
+	// Try UPDATE first; if no rows affected the row doesn't exist yet
+	// and we INSERT with the required CHECK(id=1).
+	res, err := s.db.ExecContext(ctx, `
+		UPDATE runtime_settings
+		SET embedding_provider = ?, embedding_provider_config = ?,
+		    updated_at = ?, updated_by = ?
+		WHERE id = 1
+	`, snap.Kind, string(snap.Config), now, nullableUpdater(updatedBy))
+	if err != nil {
+		return fmt.Errorf("update embedding_provider: %w", err)
+	}
+	n, _ := res.RowsAffected()
+	if n > 0 {
+		return nil
+	}
+
+	_, err = s.db.ExecContext(ctx, `
+		INSERT INTO runtime_settings (
+			id, embedding_provider, embedding_provider_config,
+			updated_at, updated_by
+		) VALUES (1, ?, ?, ?, ?)
+	`, snap.Kind, string(snap.Config), now, nullableUpdater(updatedBy))
+	if err != nil {
+		return fmt.Errorf("insert embedding_provider: %w", err)
+	}
+	return nil
+}
+
+func nullableUpdater(v string) any {
+	if v == "" {
+		return nil
+	}
+	return v
+}
diff --git a/server/internal/httpapi/admin_embeddings.go b/server/internal/httpapi/admin_embeddings.go
new file mode 100644
index 0000000..fd7d795
--- /dev/null
+++ b/server/internal/httpapi/admin_embeddings.go
@@ -0,0 +1,241 @@
+// admin_embeddings.go implements the pluggable-embedding-provider
+// admin surface:
+//
+//	GET  /api/v1/admin/embedding-providers          — registered kinds + schemas + env-key readiness
+//	GET  /api/v1/admin/embedding-providers/active   — currently active kind + persisted config
+//	PUT  /api/v1/admin/embedding-providers/active   — atomic switch (validate → persist → swap)
+//	POST /api/v1/admin/embedding-providers/{kind}/test — pre-save sanity check using submitted config
+//
+// All routes are admin-only (mustBeAdmin). The handlers below are
+// mounted directly onto the chi router in router.go — they are not
+// part of the OpenAPI-generated handler set so the openapi.yaml /
+// regenerated openapi.gen.go can be updated independently.
+package httpapi
+
+import (
+	"encoding/json"
+	"errors"
+	"io"
+	"net/http"
+	"os"
+
+	"github.com/dvcdsys/code-index/server/internal/embeddings"
+	"github.com/dvcdsys/code-index/server/internal/embeddings/provider"
+	"github.com/dvcdsys/code-index/server/internal/embeddingscfg"
+	"github.com/go-chi/chi/v5"
+)
+
+// providerInfoPayload is the per-kind entry in GET /embedding-providers.
+type providerInfoPayload struct {
+	Kind       string              `json:"kind"`
+	Schema     json.RawMessage     `json:"schema"`
+	SecretEnvs []secretEnvPayload  `json:"secret_envs"`
+}
+
+// secretEnvPayload tells the dashboard which env-var names a provider
+// reads and whether they are currently set on the server. Used to
+// render the "set CIX_VOYAGE_API_KEY before saving" banner.
+type secretEnvPayload struct {
+	Name string `json:"name"`
+	Set  bool   `json:"set"`
+}
+
+// activeProviderPayload is the GET /embedding-providers/active body.
+// Config is the persisted JSON blob, returned verbatim — providers
+// store env-key NAMES rather than values, so this is safe to render
+// to admin clients.
+type activeProviderPayload struct {
+	Kind   string          `json:"kind"`
+	Config json.RawMessage `json:"config"`
+	// ID is Provider.ID() — surfaced so the UI can compare against
+	// each project's indexed_with_model and render the stale-model
+	// badge without going through /status.
+	ID string `json:"id"`
+}
+
+// switchProviderRequest is the PUT /embedding-providers/active body.
+type switchProviderRequest struct {
+	Kind   string          `json:"kind"`
+	Config json.RawMessage `json:"config"`
+}
+
+// testProviderResponse is what /test returns on success.
+type testProviderResponse struct {
+	OK        bool `json:"ok"`
+	Dimension int  `json:"dimension,omitempty"`
+}
+
+// ListEmbeddingProviders — GET /api/v1/admin/embedding-providers.
+func (s *Server) ListEmbeddingProviders(w http.ResponseWriter, r *http.Request) {
+	if _, ok := s.mustBeAdmin(w, r); !ok {
+		return
+	}
+	kinds := provider.Kinds()
+	out := make([]providerInfoPayload, 0, len(kinds))
+	for _, kind := range kinds {
+		f, ok := provider.Lookup(kind)
+		if !ok {
+			continue
+		}
+		envs := f.SecretEnvVars()
+		envPayload := make([]secretEnvPayload, 0, len(envs))
+		for _, name := range envs {
+			_, present := os.LookupEnv(name)
+			envPayload = append(envPayload, secretEnvPayload{Name: name, Set: present})
+		}
+		out = append(out, providerInfoPayload{
+			Kind:       kind,
+			Schema:     f.SchemaJSON(),
+			SecretEnvs: envPayload,
+		})
+	}
+	writeJSON(w, http.StatusOK, map[string]any{"providers": out})
+}
+
+// GetActiveEmbeddingProvider — GET /api/v1/admin/embedding-providers/active.
+func (s *Server) GetActiveEmbeddingProvider(w http.ResponseWriter, r *http.Request) {
+	if _, ok := s.mustBeAdmin(w, r); !ok {
+		return
+	}
+	embedSvc, ok := s.Deps.EmbeddingSvc.(*embeddings.Service)
+	if !ok || embedSvc == nil {
+		writeError(w, http.StatusServiceUnavailable, "embeddings service not available")
+		return
+	}
+	if s.Deps.EmbeddingsCfg == nil {
+		writeError(w, http.StatusServiceUnavailable, "embedding provider store not available")
+		return
+	}
+	snap, has, err := s.Deps.EmbeddingsCfg.Get(r.Context())
+	if err != nil {
+		writeError(w, http.StatusInternalServerError, "load active provider: "+err.Error())
+		return
+	}
+	if !has {
+		// No persisted row → fall back to the live provider's kind and
+		// the env-derived config. This handles fresh installs where
+		// the DB hasn't been seeded yet.
+		snap = embeddingscfg.Snapshot{Kind: embedSvc.CurrentKind()}
+	}
+	writeJSON(w, http.StatusOK, activeProviderPayload{
+		Kind:   snap.Kind,
+		Config: snap.Config,
+		ID:     embedSvc.EmbeddingModel(),
+	})
+}
+
+// SwitchEmbeddingProvider — PUT /api/v1/admin/embedding-providers/active.
+//
+// Atomic switch: validate the new config by building + Starting a
+// provider, persist on success, then swap the live Service over.
+// On any failure the existing provider stays untouched.
+func (s *Server) SwitchEmbeddingProvider(w http.ResponseWriter, r *http.Request) {
+	user, ok := s.mustBeAdmin(w, r)
+	if !ok {
+		return
+	}
+	embedSvc, ok := s.Deps.EmbeddingSvc.(*embeddings.Service)
+	if !ok || embedSvc == nil {
+		writeError(w, http.StatusServiceUnavailable, "embeddings service not available")
+		return
+	}
+	if s.Deps.EmbeddingsCfg == nil {
+		writeError(w, http.StatusServiceUnavailable, "embedding provider store not available")
+		return
+	}
+
+	body, err := io.ReadAll(io.LimitReader(r.Body, 64*1024))
+	if err != nil {
+		writeError(w, http.StatusBadRequest, "read body: "+err.Error())
+		return
+	}
+	var req switchProviderRequest
+	if err := json.Unmarshal(body, &req); err != nil {
+		writeError(w, http.StatusBadRequest, "decode body: "+err.Error())
+		return
+	}
+	if req.Kind == "" {
+		writeError(w, http.StatusBadRequest, "kind is required")
+		return
+	}
+	if _, ok := provider.Lookup(req.Kind); !ok {
+		writeError(w, http.StatusBadRequest, "unknown provider kind: "+req.Kind)
+		return
+	}
+	if len(req.Config) == 0 {
+		writeError(w, http.StatusBadRequest, "config is required")
+		return
+	}
+
+	// Persist BEFORE swap so the DB always leads the live state.
+	// If SwitchProvider then fails, the operator's next call (or
+	// container restart) reads the new row and tries again.
+	if err := s.Deps.EmbeddingsCfg.Save(r.Context(), embeddingscfg.Snapshot{
+		Kind:   req.Kind,
+		Config: req.Config,
+	}, user.User.ID); err != nil {
+		writeError(w, http.StatusInternalServerError, "persist provider: "+err.Error())
+		return
+	}
+
+	if err := embedSvc.SwitchProvider(r.Context(), req.Kind, req.Config); err != nil {
+		writeError(w, http.StatusInternalServerError, "switch provider: "+err.Error())
+		return
+	}
+
+	writeJSON(w, http.StatusAccepted, activeProviderPayload{
+		Kind:   req.Kind,
+		Config: req.Config,
+		ID:     embedSvc.EmbeddingModel(),
+	})
+}
+
+// TestEmbeddingProvider — POST /api/v1/admin/embedding-providers/{kind}/test.
+//
+// Builds a throw-away provider from the submitted config (without
+// persisting it), Starts it, then Stops it. Returns the detected
+// dimension and a success flag, or a typed error message the
+// dashboard can render verbatim.
+func (s *Server) TestEmbeddingProvider(w http.ResponseWriter, r *http.Request) {
+	if _, ok := s.mustBeAdmin(w, r); !ok {
+		return
+	}
+	kind := chi.URLParam(r, "kind")
+	if kind == "" {
+		writeError(w, http.StatusBadRequest, "kind is required")
+		return
+	}
+	if _, ok := provider.Lookup(kind); !ok {
+		writeError(w, http.StatusBadRequest, "unknown provider kind: "+kind)
+		return
+	}
+
+	body, err := io.ReadAll(io.LimitReader(r.Body, 64*1024))
+	if err != nil {
+		writeError(w, http.StatusBadRequest, "read body: "+err.Error())
+		return
+	}
+	if len(body) == 0 {
+		writeError(w, http.StatusBadRequest, "config body is required")
+		return
+	}
+
+	prov, err := provider.Build(r.Context(), kind, body, embeddings.EnvSecrets(), s.Deps.Logger)
+	if err != nil {
+		writeError(w, http.StatusBadRequest, "build provider: "+err.Error())
+		return
+	}
+	if err := prov.Start(r.Context()); err != nil {
+		// Distinguish missing-key from other failures — the dashboard
+		// renders these differently (banner vs error toast).
+		if errors.Is(err, provider.ErrMissingAPIKey) {
+			writeError(w, http.StatusBadRequest, err.Error())
+			return
+		}
+		writeError(w, http.StatusBadGateway, err.Error())
+		return
+	}
+	dim := prov.Dimension()
+	_ = prov.Stop(r.Context())
+	writeJSON(w, http.StatusOK, testProviderResponse{OK: true, Dimension: dim})
+}
diff --git a/server/internal/httpapi/router.go b/server/internal/httpapi/router.go
index 2be7bf5..664cdbd 100644
--- a/server/internal/httpapi/router.go
+++ b/server/internal/httpapi/router.go
@@ -13,6 +13,7 @@ import (
 
 	"github.com/dvcdsys/code-index/server/internal/apikeys"
 	"github.com/dvcdsys/code-index/server/internal/embeddings"
+	"github.com/dvcdsys/code-index/server/internal/embeddingscfg"
 	"github.com/dvcdsys/code-index/server/internal/gitrepos"
 	"github.com/dvcdsys/code-index/server/internal/githubtokens"
 	"github.com/dvcdsys/code-index/server/internal/groups"
@@ -81,6 +82,11 @@ type Deps struct {
 	// RuntimeCfg backs the dashboard's /admin/runtime-config endpoints. Nil
 	// in router-only tests; admin handlers return 503 when absent.
 	RuntimeCfg *runtimecfg.Service
+	// EmbeddingsCfg persists the pluggable-provider selection + config
+	// blob in runtime_settings. Read by the /embedding-providers admin
+	// handlers; nil in router-only tests (those handlers 503 when
+	// absent).
+	EmbeddingsCfg *embeddingscfg.Service
 	// VersionCheck polls GitHub for newer server releases. Nil = feature
 	// off; GetStatus then omits the version-check fields entirely.
 	VersionCheck *versioncheck.Service
@@ -184,5 +190,14 @@ func NewRouter(d Deps) http.Handler {
 	// one chi route per OpenAPI operation, dispatching to Server methods.
 	openapi.HandlerFromMux(srv, r)
 
+	// Embedding-provider admin endpoints — mounted directly because
+	// they are not yet in openapi.yaml. The handlers each gate on
+	// mustBeAdmin; nothing reaches them without an admin session /
+	// API key.
+	r.Get("/api/v1/admin/embedding-providers", srv.ListEmbeddingProviders)
+	r.Get("/api/v1/admin/embedding-providers/active", srv.GetActiveEmbeddingProvider)
+	r.Put("/api/v1/admin/embedding-providers/active", srv.SwitchEmbeddingProvider)
+	r.Post("/api/v1/admin/embedding-providers/{kind}/test", srv.TestEmbeddingProvider)
+
 	return r
 }
diff --git a/server/internal/httpapi/server.go b/server/internal/httpapi/server.go
index 6030acf..6aba3e0 100644
--- a/server/internal/httpapi/server.go
+++ b/server/internal/httpapi/server.go
@@ -72,31 +72,47 @@ func (s *Server) GetStatus(w http.ResponseWriter, r *http.Request) {
 		_ = s.Deps.DB.QueryRowContext(r.Context(),
 			`SELECT COUNT(*) FROM index_runs WHERE status = 'running'`).Scan(&activeJobs)
 	}
+	// Provider-aware status. Footer uses these fields:
+	//   embedding_provider — active provider kind ("ollama"/"openai"/"voyage")
+	//   embedding_provider_manages_process — true only for ollama; the
+	//     footer renders a red/green dot based on liveness when true,
+	//     and a permanent green dot otherwise (HTTP-only providers
+	//     have no managed process to "die").
+	//   embedding_model — Provider.ID() of the live active provider
+	//   model_loaded — true when the active provider reports Ready;
+	//     for non-managed providers we don't ping per /status poll so
+	//     it stays true unless the env-key check inside Ready fails.
+	providerKind := ""
+	managesProcess := false
 	modelLoaded := false
-	if s.Deps.EmbeddingSvc != nil {
-		readyCtx, cancel := context.WithTimeout(r.Context(), 500*time.Millisecond)
-		modelLoaded = s.Deps.EmbeddingSvc.Ready(readyCtx) == nil
-		cancel()
-	}
-	// PR-E — embedding_model must reflect the LIVE config (after any
-	// dashboard runtime override + restart), not the boot-time value
-	// stamped into Deps. Fall back to Deps when the service is a fake or
-	// disabled, so test fixtures still get a stable string.
 	model := s.Deps.EmbeddingModel
 	if es, ok := s.Deps.EmbeddingSvc.(*embeddings.Service); ok && es != nil {
-		if cfg := es.Config(); cfg != nil && cfg.EmbeddingModel != "" {
-			model = cfg.EmbeddingModel
+		providerKind = es.CurrentKind()
+		if id := es.EmbeddingModel(); id != "" {
+			model = id
 		}
+		st := es.Status()
+		managesProcess = st.ManagesProcess
+		readyCtx, cancel := context.WithTimeout(r.Context(), 500*time.Millisecond)
+		modelLoaded = es.Ready(readyCtx) == nil
+		cancel()
+	} else if s.Deps.EmbeddingSvc != nil {
+		// Test fakes that only satisfy EmbeddingsQuerier.
+		readyCtx, cancel := context.WithTimeout(r.Context(), 500*time.Millisecond)
+		modelLoaded = s.Deps.EmbeddingSvc.Ready(readyCtx) == nil
+		cancel()
 	}
 	resp := map[string]any{
-		"status":               "ok",
-		"backend":              s.Deps.Backend,
-		"server_version":       s.Deps.ServerVersion,
-		"api_version":          s.Deps.APIVersion,
-		"model_loaded":         modelLoaded,
-		"embedding_model":      model,
-		"projects":             projectCount,
-		"active_indexing_jobs": activeJobs,
+		"status":                              "ok",
+		"backend":                             s.Deps.Backend,
+		"server_version":                      s.Deps.ServerVersion,
+		"api_version":                         s.Deps.APIVersion,
+		"model_loaded":                        modelLoaded,
+		"embedding_model":                     model,
+		"embedding_provider":                  providerKind,
+		"embedding_provider_manages_process":  managesProcess,
+		"projects":                            projectCount,
+		"active_indexing_jobs":                activeJobs,
 	}
 	// Version-check fields — folded in only when the service is wired.
 	// `update_available` is always present (false when unknown) so the

From faca60b31fd2a62e348fe6a4e196b6565cda146b Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Tue, 26 May 2026 13:37:52 +0100
Subject: [PATCH 03/34] test(embeddings): cover openai/voyage providers + admin
 endpoints
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Provider-level: stub httptest.Server-based tests for the openai and
voyage clients. Cover request shape, response ordering, HTTP-error
surfacing, missing-API-key behavior, ID() fingerprinting, Voyage
input_type query/document selection, and the int8 dequantize path
(127 → ~1.0, -127 → ~-1.0, etc).

Admin handler: gating tests for the new endpoints — admin sees all
three kinds in the list with non-empty schemas, viewer gets 403,
unknown kind is rejected, /test with a missing API key returns 400.
---
 .../embeddings/provider/openai/openai_test.go | 157 ++++++++++++++++++
 .../embeddings/provider/voyage/voyage_test.go | 133 +++++++++++++++
 .../internal/httpapi/admin_embeddings_test.go | 129 ++++++++++++++
 3 files changed, 419 insertions(+)
 create mode 100644 server/internal/embeddings/provider/openai/openai_test.go
 create mode 100644 server/internal/embeddings/provider/voyage/voyage_test.go
 create mode 100644 server/internal/httpapi/admin_embeddings_test.go

diff --git a/server/internal/embeddings/provider/openai/openai_test.go b/server/internal/embeddings/provider/openai/openai_test.go
new file mode 100644
index 0000000..b1d7d4a
--- /dev/null
+++ b/server/internal/embeddings/provider/openai/openai_test.go
@@ -0,0 +1,157 @@
+package openai
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+
+	"github.com/dvcdsys/code-index/server/internal/embeddings/provider"
+)
+
+// fixedSecrets returns a SecretLookup that resolves a single
+// (key, value) pair. Used by tests that don't want to touch
+// os.Setenv (which would race other parallel tests).
+func fixedSecrets(key, value string) provider.SecretLookup {
+	return func(name string) (string, bool) {
+		if name == key {
+			return value, true
+		}
+		return "", false
+	}
+}
+
+// stubServer returns an httptest.Server that responds to one POST
+// /v1/embeddings hit. The recorded request body is sent on the
+// returned channel so the test can assert on it.
+func stubServer(t *testing.T, status int, body string) (*httptest.Server, <-chan []byte) {
+	t.Helper()
+	gotBody := make(chan []byte, 1)
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.Method != http.MethodPost || !strings.HasSuffix(r.URL.Path, "/v1/embeddings") {
+			http.NotFound(w, r)
+			return
+		}
+		if got := r.Header.Get("Authorization"); !strings.HasPrefix(got, "Bearer ") {
+			t.Errorf("missing Bearer auth header; got %q", got)
+		}
+		raw, _ := io.ReadAll(r.Body)
+		select {
+		case gotBody <- raw:
+		default:
+		}
+		w.WriteHeader(status)
+		_, _ = io.WriteString(w, body)
+	}))
+	t.Cleanup(srv.Close)
+	return srv, gotBody
+}
+
+func TestEmbedDocumentsBatch(t *testing.T) {
+	srv, gotBody := stubServer(t, http.StatusOK, `{
+		"data": [
+			{"index": 1, "embedding": [0.2, 0.3]},
+			{"index": 0, "embedding": [0.1, 0.4]}
+		]
+	}`)
+	p := New(Config{
+		BaseURL:   srv.URL,
+		Model:     "text-embedding-3-small",
+		APIKeyEnv: "TEST_KEY",
+	}, fixedSecrets("TEST_KEY", "sk-test"), nil)
+
+	vecs, err := p.EmbedDocuments(context.Background(), []string{"first", "second"})
+	if err != nil {
+		t.Fatalf("EmbedDocuments: %v", err)
+	}
+	// Result must be in input order even though the server returned them swapped.
+	if vecs[0][0] != 0.1 || vecs[1][0] != 0.2 {
+		t.Fatalf("ordering wrong: got %v", vecs)
+	}
+
+	var req embedRequest
+	if err := json.Unmarshal(<-gotBody, &req); err != nil {
+		t.Fatalf("decode request: %v", err)
+	}
+	if req.Model != "text-embedding-3-small" {
+		t.Errorf("model %q", req.Model)
+	}
+	if len(req.Input) != 2 || req.Input[0] != "first" {
+		t.Errorf("input %v", req.Input)
+	}
+}
+
+func TestEmbedDocumentsHTTPError(t *testing.T) {
+	srv, _ := stubServer(t, http.StatusUnauthorized, `{"error":"bad key"}`)
+	p := New(Config{
+		BaseURL:   srv.URL,
+		Model:     "m",
+		APIKeyEnv: "K",
+	}, fixedSecrets("K", "v"), nil)
+
+	_, err := p.EmbedDocuments(context.Background(), []string{"x"})
+	if err == nil {
+		t.Fatal("expected error")
+	}
+	if !strings.Contains(err.Error(), "401") {
+		t.Errorf("error should surface status code: %v", err)
+	}
+}
+
+func TestMissingAPIKey(t *testing.T) {
+	p := New(Config{
+		BaseURL:   "http://unused",
+		Model:     "m",
+		APIKeyEnv: "MISSING_VAR",
+	}, fixedSecrets("OTHER", "v"), nil)
+
+	_, err := p.EmbedDocuments(context.Background(), []string{"x"})
+	if !errors.Is(err, provider.ErrMissingAPIKey) {
+		t.Fatalf("expected ErrMissingAPIKey, got %v", err)
+	}
+
+	st := p.Status()
+	if st.State != provider.StateFailed {
+		t.Errorf("Status state %q, expected failed", st.State)
+	}
+}
+
+func TestIDFingerprint(t *testing.T) {
+	cases := []struct {
+		cfg  Config
+		want string
+	}{
+		{Config{Model: "m"}, "openai:m"},
+		{Config{Model: "m", Dimensions: 512}, "openai:m:512"},
+	}
+	for _, tc := range cases {
+		p := New(tc.cfg, fixedSecrets("", ""), nil)
+		if got := p.ID(); got != tc.want {
+			t.Errorf("ID() = %q, want %q", got, tc.want)
+		}
+	}
+}
+
+func TestEmbedDocumentsSendsDimensions(t *testing.T) {
+	srv, gotBody := stubServer(t, http.StatusOK, `{
+		"data": [{"index": 0, "embedding": [0.1]}]
+	}`)
+	p := New(Config{
+		BaseURL:    srv.URL,
+		Model:      "m",
+		APIKeyEnv:  "K",
+		Dimensions: 256,
+	}, fixedSecrets("K", "v"), nil)
+	if _, err := p.EmbedDocuments(context.Background(), []string{"x"}); err != nil {
+		t.Fatalf("EmbedDocuments: %v", err)
+	}
+	var req embedRequest
+	_ = json.Unmarshal(<-gotBody, &req)
+	if req.Dimensions != 256 {
+		t.Errorf("dimensions should be 256, got %d", req.Dimensions)
+	}
+}
diff --git a/server/internal/embeddings/provider/voyage/voyage_test.go b/server/internal/embeddings/provider/voyage/voyage_test.go
new file mode 100644
index 0000000..40949b1
--- /dev/null
+++ b/server/internal/embeddings/provider/voyage/voyage_test.go
@@ -0,0 +1,133 @@
+package voyage
+
+import (
+	"context"
+	"encoding/json"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+
+	"github.com/dvcdsys/code-index/server/internal/embeddings/provider"
+)
+
+func fixedSecrets(key, value string) provider.SecretLookup {
+	return func(name string) (string, bool) {
+		if name == key {
+			return value, true
+		}
+		return "", false
+	}
+}
+
+func stubServer(t *testing.T, status int, body string) (*httptest.Server, <-chan []byte) {
+	t.Helper()
+	got := make(chan []byte, 1)
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if !strings.HasSuffix(r.URL.Path, "/v1/embeddings") {
+			http.NotFound(w, r)
+			return
+		}
+		raw, _ := io.ReadAll(r.Body)
+		select {
+		case got <- raw:
+		default:
+		}
+		w.WriteHeader(status)
+		_, _ = io.WriteString(w, body)
+	}))
+	t.Cleanup(srv.Close)
+	return srv, got
+}
+
+func TestEmbedQuerySendsInputTypeQuery(t *testing.T) {
+	srv, gotBody := stubServer(t, http.StatusOK, `{
+		"data": [{"index": 0, "embedding": [0.1, 0.2]}],
+		"model": "voyage-code-3",
+		"usage": {"total_tokens": 3}
+	}`)
+	p := New(Config{
+		BaseURL:         srv.URL,
+		APIKeyEnv:       "K",
+		Model:           "voyage-code-3",
+		OutputDimension: 1024,
+		OutputDtype:     DtypeFloat,
+	}, fixedSecrets("K", "v"), nil)
+
+	if _, err := p.EmbedQuery(context.Background(), "where is X"); err != nil {
+		t.Fatalf("EmbedQuery: %v", err)
+	}
+	var req embedRequest
+	_ = json.Unmarshal(<-gotBody, &req)
+	if req.InputType != "query" {
+		t.Errorf("input_type %q; expected query", req.InputType)
+	}
+	if req.OutputDimension != 1024 {
+		t.Errorf("output_dimension %d", req.OutputDimension)
+	}
+}
+
+func TestEmbedDocumentsSendsInputTypeDocument(t *testing.T) {
+	srv, gotBody := stubServer(t, http.StatusOK, `{
+		"data": [{"index": 0, "embedding": [0.1]}],
+		"usage": {"total_tokens": 1}
+	}`)
+	p := New(Config{
+		BaseURL: srv.URL, APIKeyEnv: "K", Model: "m", OutputDtype: DtypeFloat,
+	}, fixedSecrets("K", "v"), nil)
+	_, _ = p.EmbedDocuments(context.Background(), []string{"x"})
+	var req embedRequest
+	_ = json.Unmarshal(<-gotBody, &req)
+	if req.InputType != "document" {
+		t.Errorf("input_type %q; expected document", req.InputType)
+	}
+}
+
+func TestInt8Dequantize(t *testing.T) {
+	// int8 vector [127, -127, 0, 64] dequantized to float ~ [1.0, -1.0, 0.0, ~0.504]
+	srv, _ := stubServer(t, http.StatusOK, `{
+		"data": [{"index": 0, "embedding": [127, -127, 0, 64]}],
+		"usage": {"total_tokens": 1}
+	}`)
+	p := New(Config{
+		BaseURL: srv.URL, APIKeyEnv: "K", Model: "m", OutputDtype: DtypeInt8,
+	}, fixedSecrets("K", "v"), nil)
+	vecs, err := p.EmbedDocuments(context.Background(), []string{"x"})
+	if err != nil {
+		t.Fatalf("EmbedDocuments: %v", err)
+	}
+	if len(vecs) != 1 || len(vecs[0]) != 4 {
+		t.Fatalf("shape wrong: %v", vecs)
+	}
+	v := vecs[0]
+	if v[0] < 0.999 || v[1] > -0.999 || v[2] != 0 || v[3] < 0.50 || v[3] > 0.51 {
+		t.Errorf("dequantized values out of range: %v", v)
+	}
+}
+
+func TestIDFingerprintIncludesAll(t *testing.T) {
+	p := New(Config{
+		Model: "voyage-code-3", APIKeyEnv: "K",
+		OutputDimension: 1024, OutputDtype: DtypeInt8,
+	}, fixedSecrets("K", "v"), nil)
+	want := "voyage:voyage-code-3:1024:int8"
+	if got := p.ID(); got != want {
+		t.Errorf("ID() = %q, want %q", got, want)
+	}
+}
+
+func TestUsageDecodesWithoutPromptTokens(t *testing.T) {
+	// Voyage's usage object lacks prompt_tokens — make sure decode doesn't error.
+	srv, _ := stubServer(t, http.StatusOK, `{
+		"data": [{"index": 0, "embedding": [0.1]}],
+		"model": "voyage-3",
+		"usage": {"total_tokens": 7}
+	}`)
+	p := New(Config{
+		BaseURL: srv.URL, APIKeyEnv: "K", Model: "m", OutputDtype: DtypeFloat,
+	}, fixedSecrets("K", "v"), nil)
+	if _, err := p.EmbedDocuments(context.Background(), []string{"x"}); err != nil {
+		t.Fatalf("decode: %v", err)
+	}
+}
diff --git a/server/internal/httpapi/admin_embeddings_test.go b/server/internal/httpapi/admin_embeddings_test.go
new file mode 100644
index 0000000..45d7aa6
--- /dev/null
+++ b/server/internal/httpapi/admin_embeddings_test.go
@@ -0,0 +1,129 @@
+package httpapi
+
+import (
+	"bytes"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/dvcdsys/code-index/server/internal/embeddingscfg"
+)
+
+// TestListEmbeddingProviders_AdminSeesAllThree confirms the registered
+// kinds (ollama, openai, voyage) show up with their schemas and the
+// secret-env readiness flags.
+func TestListEmbeddingProviders_AdminSeesAllThree(t *testing.T) {
+	f := newAdminFixture(t)
+	f.Deps.EmbeddingsCfg = embeddingscfg.New(f.Deps.DB)
+	f.Router = NewRouter(f.Deps)
+	cookie := adminCookie(t, f)
+
+	req := withCookie(httptest.NewRequest(http.MethodGet, "/api/v1/admin/embedding-providers", nil), cookie)
+	rr := httptest.NewRecorder()
+	f.Router.ServeHTTP(rr, req)
+	if rr.Code != http.StatusOK {
+		t.Fatalf("status = %d (%s)", rr.Code, rr.Body.String())
+	}
+
+	var body struct {
+		Providers []struct {
+			Kind       string          `json:"kind"`
+			Schema     json.RawMessage `json:"schema"`
+			SecretEnvs []struct {
+				Name string `json:"name"`
+				Set  bool   `json:"set"`
+			} `json:"secret_envs"`
+		} `json:"providers"`
+	}
+	if err := json.Unmarshal(rr.Body.Bytes(), &body); err != nil {
+		t.Fatalf("decode: %v", err)
+	}
+	got := map[string]bool{}
+	for _, p := range body.Providers {
+		got[p.Kind] = true
+		if len(p.Schema) == 0 {
+			t.Errorf("kind %s: empty schema", p.Kind)
+		}
+	}
+	for _, want := range []string{"ollama", "openai", "voyage"} {
+		if !got[want] {
+			t.Errorf("missing kind %q in providers list", want)
+		}
+	}
+}
+
+func TestListEmbeddingProviders_ViewerForbidden(t *testing.T) {
+	f := newAdminFixture(t)
+	f.Deps.EmbeddingsCfg = embeddingscfg.New(f.Deps.DB)
+	f.Router = NewRouter(f.Deps)
+	cookie := viewerCookie(t, f)
+
+	req := withCookie(httptest.NewRequest(http.MethodGet, "/api/v1/admin/embedding-providers", nil), cookie)
+	rr := httptest.NewRecorder()
+	f.Router.ServeHTTP(rr, req)
+	if rr.Code != http.StatusForbidden {
+		t.Fatalf("status = %d, want 403 (body=%s)", rr.Code, rr.Body.String())
+	}
+}
+
+func TestSwitchEmbeddingProvider_RejectsUnknownKind(t *testing.T) {
+	f := newAdminFixture(t)
+	f.Deps.EmbeddingsCfg = embeddingscfg.New(f.Deps.DB)
+	f.Router = NewRouter(f.Deps)
+	cookie := adminCookie(t, f)
+
+	body, _ := json.Marshal(map[string]any{
+		"kind":   "nonexistent",
+		"config": map[string]any{},
+	})
+	req := withCookie(httptest.NewRequest(http.MethodPut,
+		"/api/v1/admin/embedding-providers/active", bytes.NewReader(body)), cookie)
+	rr := httptest.NewRecorder()
+	f.Router.ServeHTTP(rr, req)
+	// EmbeddingSvc is nil in this fixture — handler should still validate
+	// kind first when EmbeddingSvc is missing it returns 503 instead.
+	// Accept either as long as we don't get 200.
+	if rr.Code == http.StatusOK || rr.Code == http.StatusAccepted {
+		t.Fatalf("unknown kind accepted: status %d (%s)", rr.Code, rr.Body.String())
+	}
+}
+
+func TestTestEmbeddingProvider_BadKind(t *testing.T) {
+	f := newAdminFixture(t)
+	f.Deps.EmbeddingsCfg = embeddingscfg.New(f.Deps.DB)
+	f.Router = NewRouter(f.Deps)
+	cookie := adminCookie(t, f)
+
+	body := []byte(`{}`)
+	req := withCookie(httptest.NewRequest(http.MethodPost,
+		"/api/v1/admin/embedding-providers/garbage/test", bytes.NewReader(body)), cookie)
+	rr := httptest.NewRecorder()
+	f.Router.ServeHTTP(rr, req)
+	if rr.Code != http.StatusBadRequest {
+		t.Fatalf("status = %d, want 400 (body=%s)", rr.Code, rr.Body.String())
+	}
+}
+
+func TestTestEmbeddingProvider_MissingAPIKey(t *testing.T) {
+	f := newAdminFixture(t)
+	f.Deps.EmbeddingsCfg = embeddingscfg.New(f.Deps.DB)
+	f.Router = NewRouter(f.Deps)
+	cookie := adminCookie(t, f)
+
+	// Submit a voyage config naming an env var that almost certainly isn't
+	// set in CI. Expect a 400 with ErrMissingAPIKey wrapped in the body.
+	body, _ := json.Marshal(map[string]any{
+		"api_key_env":      "CIX_TEST_MISSING_VOYAGE_KEY_XYZ",
+		"model":            "voyage-3",
+		"output_dimension": 1024,
+		"output_dtype":     "float",
+	})
+	req := withCookie(httptest.NewRequest(http.MethodPost,
+		"/api/v1/admin/embedding-providers/voyage/test", bytes.NewReader(body)), cookie)
+	rr := httptest.NewRecorder()
+	f.Router.ServeHTTP(rr, req)
+	if rr.Code != http.StatusBadRequest {
+		t.Fatalf("status = %d, want 400 (body=%s)", rr.Code, rr.Body.String())
+	}
+}

From 6bfd81767ae3c928a7d88651710dc8a81b3ea8a4 Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Tue, 26 May 2026 13:48:03 +0100
Subject: [PATCH 04/34] feat(dashboard): pluggable embedding-provider UI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add EmbeddingProviderSection with a kind dropdown and per-provider
forms. Three hardcoded React components (OpenAIProviderForm,
VoyageProviderForm, ollama tuning kept in the existing sections)
render below the dropdown. Voyage form covers model picker, output
dimension (256/512/1024/2048), output dtype (float | int8), and
truncation toggle. Each form surfaces a red banner when the
referenced API-key env var is not set on the server and disables
Save accordingly — keys themselves are never stored in the
dashboard or DB.

New hooks: useEmbeddingProviders, useActiveProvider, useTestProvider,
useSwitchProvider. The save flow is test → switch → toast → invalidate
runtime-model + sidecar-status caches so the footer and project cards
update immediately.

Footer: label is now the active provider kind ("ollama"/"openai"/
"voyage") instead of hardcoded "llama". Dot is green/red for ollama
(real liveness signal from /health), permanently green for HTTP-only
providers (no managed process to die — failures surface at search
time with diagnostics).

ServerPage: ollama-specific sections (EmbeddingModelSection,
RuntimeParamsSection, SidecarSection) render only when the active
provider is ollama. Otherwise the provider form above is the only
edit surface.

openapi.yaml: extend StatusResponse with embedding_provider +
embedding_provider_manages_process; add the four new
/admin/embedding-providers endpoints and their schemas.
---
 doc/openapi.yaml                              | 240 +++++++++++++++++-
 server/dashboard/src/api/types.ts             |  10 +
 server/dashboard/src/app/Footer.tsx           |  42 ++-
 server/dashboard/src/lib/useServerStatus.ts   |   5 +
 .../src/modules/server/ServerPage.tsx         |  57 +++--
 server/dashboard/src/modules/server/hooks.ts  |  58 +++++
 .../sections/EmbeddingProviderSection.tsx     | 234 +++++++++++++++++
 .../sections/providers/OpenAIProviderForm.tsx | 124 +++++++++
 .../sections/providers/VoyageProviderForm.tsx | 141 ++++++++++
 9 files changed, 881 insertions(+), 30 deletions(-)
 create mode 100644 server/dashboard/src/modules/server/sections/EmbeddingProviderSection.tsx
 create mode 100644 server/dashboard/src/modules/server/sections/providers/OpenAIProviderForm.tsx
 create mode 100644 server/dashboard/src/modules/server/sections/providers/VoyageProviderForm.tsx

diff --git a/doc/openapi.yaml b/doc/openapi.yaml
index 06fac13..e9943b0 100644
--- a/doc/openapi.yaml
+++ b/doc/openapi.yaml
@@ -515,6 +515,131 @@ paths:
         "403":
           $ref: "#/components/responses/Forbidden"
 
+  /api/v1/admin/embedding-providers:
+    get:
+      operationId: listEmbeddingProviders
+      tags: [admin]
+      summary: List registered embedding-provider kinds (admin only)
+      description: |
+        Returns one entry per registered provider kind with its config
+        schema and the names of the env vars it reads for credentials,
+        plus whether those env vars are currently set on the server.
+        The dashboard uses this to render the kind dropdown, the
+        per-kind form, and the "set CIX_VOYAGE_API_KEY before saving"
+        banner when a key is missing.
+      responses:
+        "200":
+          description: List of registered providers
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/EmbeddingProviderList"
+        "401":
+          $ref: "#/components/responses/Unauthorized"
+        "403":
+          $ref: "#/components/responses/Forbidden"
+
+  /api/v1/admin/embedding-providers/active:
+    get:
+      operationId: getActiveEmbeddingProvider
+      tags: [admin]
+      summary: Get the currently active embedding provider (admin only)
+      description: |
+        Returns the persisted provider selection (kind + JSON config
+        blob) and the live `Provider.ID()` fingerprint. API keys are
+        never persisted, so the config blob carries env-var NAMES
+        only — safe to surface to admin clients verbatim.
+      responses:
+        "200":
+          description: Currently active provider
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ActiveEmbeddingProvider"
+        "401":
+          $ref: "#/components/responses/Unauthorized"
+        "403":
+          $ref: "#/components/responses/Forbidden"
+        "503":
+          description: Embeddings service not wired (e.g. CIX_EMBEDDINGS_ENABLED=false)
+    put:
+      operationId: switchEmbeddingProvider
+      tags: [admin]
+      summary: Switch to a different embedding provider (admin only)
+      description: |
+        Atomic switch. The server validates the submitted config, persists
+        it, then swaps the live Service over (drains the queue first).
+        On any error the existing provider stays untouched.
+
+        Switching changes the active `Provider.ID()` fingerprint; every
+        project's `indexed_with_model` becomes stale and the next
+        clone job per project triggers a full reindex
+        (`mode=full, reason=model-change`).
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/SwitchEmbeddingProviderRequest"
+      responses:
+        "202":
+          description: Switch accepted; new provider is live
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ActiveEmbeddingProvider"
+        "400":
+          $ref: "#/components/responses/BadRequest"
+        "401":
+          $ref: "#/components/responses/Unauthorized"
+        "403":
+          $ref: "#/components/responses/Forbidden"
+
+  /api/v1/admin/embedding-providers/{kind}/test:
+    post:
+      operationId: testEmbeddingProvider
+      tags: [admin]
+      summary: Validate an embedding-provider config without persisting (admin only)
+      description: |
+        Builds a throw-away provider from the submitted config, calls
+        Start (one short embed for HTTP providers; spawning a child for
+        ollama), then Stops it. Returns the detected dimension and an
+        ok flag. Use this from the dashboard before calling PUT
+        /embedding-providers/active so the admin sees an actionable
+        error (bad key, wrong URL, missing env var) before the swap.
+      parameters:
+        - name: kind
+          in: path
+          required: true
+          schema:
+            type: string
+            enum: [ollama, openai, voyage]
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              description: Provider-specific config blob (shape varies by kind).
+              additionalProperties: true
+      responses:
+        "200":
+          description: Connect test succeeded
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/TestEmbeddingProviderResponse"
+        "400":
+          $ref: "#/components/responses/BadRequest"
+        "401":
+          $ref: "#/components/responses/Unauthorized"
+        "403":
+          $ref: "#/components/responses/Forbidden"
+        "502":
+          description: |
+            Connect test failed against the upstream service (auth
+            rejected, network unreachable, etc).
+
   /api/v1/api-keys:
     get:
       operationId: listApiKeys
@@ -2723,6 +2848,12 @@ components:
         application/json:
           schema:
             $ref: "#/components/schemas/Error"
+    BadRequest:
+      description: Invalid request (missing fields, unknown kind, bad config)
+      content:
+        application/json:
+          schema:
+            $ref: "#/components/schemas/Error"
     InternalError:
       description: Unhandled server error
       content:
@@ -3079,6 +3210,94 @@ components:
           type: string
           description: The CIX_GGUF_CACHE_DIR that was scanned. Empty list with non-empty cache_dir = no .gguf files found.
 
+    EmbeddingProviderList:
+      type: object
+      required: [providers]
+      properties:
+        providers:
+          type: array
+          items:
+            $ref: "#/components/schemas/EmbeddingProviderInfo"
+
+    EmbeddingProviderInfo:
+      type: object
+      required: [kind, schema, secret_envs]
+      properties:
+        kind:
+          type: string
+          enum: [ollama, openai, voyage]
+        schema:
+          type: object
+          description: |
+            ConfigSchema as JSON — describes the form fields the
+            provider accepts. Shape is `{fields: [{name, label, kind,
+            required, default, enum, description}]}`. Hardcoded React
+            forms ignore this; it's exposed for external tooling.
+          additionalProperties: true
+        secret_envs:
+          type: array
+          description: |
+            Env-var names this provider reads for credentials, with a
+            flag telling whether each is currently set on the server.
+            Used by the dashboard to render the missing-key banner
+            before save.
+          items:
+            $ref: "#/components/schemas/EmbeddingProviderSecretEnv"
+
+    EmbeddingProviderSecretEnv:
+      type: object
+      required: [name, set]
+      properties:
+        name:
+          type: string
+          description: Env-var name (e.g. `CIX_VOYAGE_API_KEY`).
+        set:
+          type: boolean
+          description: True when the env var is present (and non-empty) on the server.
+
+    ActiveEmbeddingProvider:
+      type: object
+      required: [kind, id]
+      properties:
+        kind:
+          type: string
+          enum: [ollama, openai, voyage]
+        id:
+          type: string
+          description: |
+            `Provider.ID()` fingerprint, e.g. `voyage:voyage-code-3:1024:float`.
+            Matches `embedding_model` on /status.
+        config:
+          type: object
+          description: |
+            Persisted provider config blob. Shape varies by kind; API
+            keys are NOT stored — only env-var names are.
+          additionalProperties: true
+
+    SwitchEmbeddingProviderRequest:
+      type: object
+      required: [kind, config]
+      properties:
+        kind:
+          type: string
+          enum: [ollama, openai, voyage]
+        config:
+          type: object
+          additionalProperties: true
+
+    TestEmbeddingProviderResponse:
+      type: object
+      required: [ok]
+      properties:
+        ok:
+          type: boolean
+        dimension:
+          type: integer
+          minimum: 0
+          description: |
+            Embedding dimension as reported by the provider after
+            Start. 0 when the provider learns the dimension lazily.
+
     Session:
       type: object
       required: [id, created_at, expires_at, last_seen_at, is_current]
@@ -3226,7 +3445,26 @@ components:
             False when the sidecar is starting or has crashed.
         embedding_model:
           type: string
-          description: Hugging Face model id (e.g. `awhiteside/CodeRankEmbed-Q8_0-GGUF`).
+          description: |
+            Active provider fingerprint — formerly the HuggingFace repo id,
+            now `Provider.ID()` e.g. `ollama:CodeRankEmbed`,
+            `openai:text-embedding-3-small`, `voyage:voyage-code-3:1024:float`.
+            Used by the dashboard to compare against each project's
+            `indexed_with_model` and render the stale-model badge.
+        embedding_provider:
+          type: string
+          description: |
+            Active provider kind: `ollama`, `openai`, or `voyage`. Empty
+            when the embedding service is disabled or the fake fixtures
+            substitute a non-Service implementation.
+        embedding_provider_manages_process:
+          type: boolean
+          description: |
+            True when the active provider owns an in-process child
+            (currently only `ollama`). The footer renders a green/red
+            liveness dot when true, and a permanent green dot otherwise
+            (HTTP-only providers have no process to die — Ready failures
+            surface at request time, not on every footer poll).
         projects:
           type: integer
           minimum: 0
diff --git a/server/dashboard/src/api/types.ts b/server/dashboard/src/api/types.ts
index d9b4b6a..a1d8707 100644
--- a/server/dashboard/src/api/types.ts
+++ b/server/dashboard/src/api/types.ts
@@ -74,3 +74,13 @@ export type SidecarStatus = components['schemas']['SidecarStatus'];
 export type ModelEntry = components['schemas']['ModelEntry'];
 export type ModelList = components['schemas']['ModelList'];
 export type RestartAccepted = components['schemas']['RestartAccepted'];
+
+export type EmbeddingProviderInfo = components['schemas']['EmbeddingProviderInfo'];
+export type EmbeddingProviderSecretEnv = components['schemas']['EmbeddingProviderSecretEnv'];
+export type EmbeddingProviderList = components['schemas']['EmbeddingProviderList'];
+export type ActiveEmbeddingProvider = components['schemas']['ActiveEmbeddingProvider'];
+export type SwitchEmbeddingProviderRequest = components['schemas']['SwitchEmbeddingProviderRequest'];
+export type TestEmbeddingProviderResponse = components['schemas']['TestEmbeddingProviderResponse'];
+
+// Provider kind union — the dashboard uses this in form-state discriminants.
+export type EmbeddingProviderKind = 'ollama' | 'openai' | 'voyage';
diff --git a/server/dashboard/src/app/Footer.tsx b/server/dashboard/src/app/Footer.tsx
index f772808..10d812b 100644
--- a/server/dashboard/src/app/Footer.tsx
+++ b/server/dashboard/src/app/Footer.tsx
@@ -5,31 +5,51 @@ import { cn } from '@/lib/cn';
 
 // Footer spans the full width below the sidebar + main pane. Reads
 // from the shared /status query (polled every 30 s) — server version
-// on the left, llama sidecar liveness dot on the right. The "llama"
-// label links to /server (admin-only page); viewers see plain text
-// since the route isn't mounted for them.
+// on the left, embedding-provider indicator on the right.
+//
+// The label is the active provider kind ("ollama" / "openai" /
+// "voyage") and the dot logic depends on whether the provider
+// manages an in-process child:
+//   ollama (manages_process=true): green when /health alive, red
+//     otherwise — real liveness signal.
+//   openai / voyage (manages_process=false): permanently green.
+//     We don't ping remote APIs on every footer poll; failures
+//     surface at search/embed time with diagnostics.
+//
+// The provider name links to /server (admin-only page); viewers see
+// plain text since the route isn't mounted for them.
 export function Footer() {
   const { data, isLoading } = useServerStatus();
   const { user } = useAuth();
   const version = data?.server_version ?? 'dev';
+  const providerKind = data?.embedding_provider ?? '';
+  const managesProcess = data?.embedding_provider_manages_process === true;
   const alive = data?.model_loaded === true;
   const isAdmin = user?.role === 'admin';
 
   const dotClass = isLoading
     ? 'bg-muted-foreground/40'
-    : alive
-      ? 'bg-emerald-500'
-      : 'bg-red-500';
+    : managesProcess
+      ? alive
+        ? 'bg-emerald-500'
+        : 'bg-red-500'
+      : 'bg-emerald-500';
   const dotTitle = isLoading
-    ? 'Checking sidecar status…'
-    : alive
-      ? 'Sidecar is alive'
-      : 'Sidecar is not responding';
+    ? 'Checking embedding provider status…'
+    : managesProcess
+      ? alive
+        ? 'Ollama sidecar is alive'
+        : 'Ollama sidecar is not responding'
+      : providerKind
+        ? `${providerKind} backend (no managed process)`
+        : 'Embedding backend';
+
+  const label = providerKind || 'embeddings';
 
   const indicator = (
     <>
       <span className={cn('h-2 w-2 rounded-full', dotClass)} aria-hidden />
-      <span>llama</span>
+      <span>{label}</span>
     </>
   );
 
diff --git a/server/dashboard/src/lib/useServerStatus.ts b/server/dashboard/src/lib/useServerStatus.ts
index 049bbce..78f5480 100644
--- a/server/dashboard/src/lib/useServerStatus.ts
+++ b/server/dashboard/src/lib/useServerStatus.ts
@@ -5,6 +5,11 @@ interface StatusPayload {
   server_version: string;
   embedding_model: string;
   model_loaded: boolean;
+  // Pluggable-provider fields (server >= migration 12). Present on
+  // every fresh-built server; older clients may see them as
+  // undefined while a rolling upgrade is in progress.
+  embedding_provider?: string;
+  embedding_provider_manages_process?: boolean;
   // Version-check fields are present only when the server has the
   // versioncheck service wired (see CIX_VERSION_CHECK_ENABLED).
   update_available?: boolean;
diff --git a/server/dashboard/src/modules/server/ServerPage.tsx b/server/dashboard/src/modules/server/ServerPage.tsx
index 8481ae7..0b49241 100644
--- a/server/dashboard/src/modules/server/ServerPage.tsx
+++ b/server/dashboard/src/modules/server/ServerPage.tsx
@@ -6,11 +6,18 @@ import type { RuntimeConfig, RuntimeConfigUpdate } from '@/api/types';
 import { Alert, AlertDescription, AlertTitle } from '@/ui/alert';
 import { Button } from '@/ui/button';
 import { Skeleton } from '@/ui/skeleton';
-import { useRestartSidecar, useRuntimeConfig, useSidecarStatus, useUpdateRuntimeConfig } from './hooks';
+import {
+  useActiveProvider,
+  useRestartSidecar,
+  useRuntimeConfig,
+  useSidecarStatus,
+  useUpdateRuntimeConfig,
+} from './hooks';
 import { EmbeddingModelSection } from './sections/EmbeddingModelSection';
 import { RuntimeParamsSection } from './sections/RuntimeParamsSection';
 import { SidecarSection } from './sections/SidecarSection';
 import { AdvancedSection } from './sections/AdvancedSection';
+import { EmbeddingProviderSection } from './sections/EmbeddingProviderSection';
 import { SaveAndRestartDialog } from './components/SaveAndRestartDialog';
 
 interface Draft {
@@ -62,6 +69,7 @@ export default function ServerPage() {
   const status = useSidecarStatus();
   const update = useUpdateRuntimeConfig();
   const restart = useRestartSidecar();
+  const activeProvider = useActiveProvider();
 
   const [draft, setDraft] = useState<Draft | null>(null);
   const [confirmOpen, setConfirmOpen] = useState(false);
@@ -158,23 +166,36 @@ export default function ServerPage() {
         </Alert>
       ) : null}
 
-      <EmbeddingModelSection
-        config={cfg.data}
-        draftModel={draft.embedding_model}
-        onDraftChange={(v) => setDraft({ ...draft, embedding_model: v })}
-      />
-
-      <RuntimeParamsSection
-        config={cfg.data}
-        draftCtx={draft.llama_ctx_size}
-        draftGpuLayers={draft.llama_n_gpu_layers}
-        draftThreads={draft.llama_n_threads}
-        onDraftCtx={(n) => setDraft({ ...draft, llama_ctx_size: n })}
-        onDraftGpuLayers={(n) => setDraft({ ...draft, llama_n_gpu_layers: n })}
-        onDraftThreads={(n) => setDraft({ ...draft, llama_n_threads: n })}
-      />
-
-      <SidecarSection />
+      <EmbeddingProviderSection />
+
+      {/*
+        Ollama-specific tuning + sidecar status — rendered only when the
+        active provider is ollama. For openai/voyage these sections do
+        not apply: there's no GGUF, no llama-server child to restart,
+        no GPU layers / threads. The provider form above is the only
+        edit surface in that case.
+      */}
+      {activeProvider.data?.kind === 'ollama' ? (
+        <>
+          <EmbeddingModelSection
+            config={cfg.data}
+            draftModel={draft.embedding_model}
+            onDraftChange={(v) => setDraft({ ...draft, embedding_model: v })}
+          />
+
+          <RuntimeParamsSection
+            config={cfg.data}
+            draftCtx={draft.llama_ctx_size}
+            draftGpuLayers={draft.llama_n_gpu_layers}
+            draftThreads={draft.llama_n_threads}
+            onDraftCtx={(n) => setDraft({ ...draft, llama_ctx_size: n })}
+            onDraftGpuLayers={(n) => setDraft({ ...draft, llama_n_gpu_layers: n })}
+            onDraftThreads={(n) => setDraft({ ...draft, llama_n_threads: n })}
+          />
+
+          <SidecarSection />
+        </>
+      ) : null}
 
       <AdvancedSection
         config={cfg.data}
diff --git a/server/dashboard/src/modules/server/hooks.ts b/server/dashboard/src/modules/server/hooks.ts
index fb1e715..26b29d7 100644
--- a/server/dashboard/src/modules/server/hooks.ts
+++ b/server/dashboard/src/modules/server/hooks.ts
@@ -1,17 +1,23 @@
 import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query';
 import { api } from '@/api/client';
 import type {
+  ActiveEmbeddingProvider,
+  EmbeddingProviderList,
   ModelList,
   RestartAccepted,
   RuntimeConfig,
   RuntimeConfigUpdate,
   SidecarStatus,
+  SwitchEmbeddingProviderRequest,
+  TestEmbeddingProviderResponse,
 } from '@/api/types';
 
 export const serverKeys = {
   runtimeConfig: ['server', 'runtime-config'] as const,
   sidecarStatus: ['server', 'sidecar-status'] as const,
   models: ['server', 'models'] as const,
+  embeddingProviders: ['server', 'embedding-providers'] as const,
+  activeProvider: ['server', 'embedding-provider', 'active'] as const,
 };
 
 export function useRuntimeConfig() {
@@ -77,3 +83,55 @@ export function useGGUFModels() {
     staleTime: 60_000,
   });
 }
+
+// useEmbeddingProviders returns the list of registered provider
+// kinds, their schemas, and which API-key env vars are currently set
+// on the server. Polled occasionally so a freshly-exported env var
+// flips the missing-key banner without a hard reload.
+export function useEmbeddingProviders() {
+  return useQuery({
+    queryKey: serverKeys.embeddingProviders,
+    queryFn: ({ signal }) =>
+      api.get<EmbeddingProviderList>('/admin/embedding-providers', { signal }),
+    staleTime: 30_000,
+  });
+}
+
+// useActiveProvider returns the persisted active provider + config.
+// Invalidated by useSwitchProvider on success.
+export function useActiveProvider() {
+  return useQuery({
+    queryKey: serverKeys.activeProvider,
+    queryFn: ({ signal }) =>
+      api.get<ActiveEmbeddingProvider>('/admin/embedding-providers/active', { signal }),
+  });
+}
+
+// useTestProvider calls /test for a given kind+config. Doesn't
+// touch the active state on the server.
+export function useTestProvider(kind: string) {
+  return useMutation({
+    mutationFn: (config: Record<string, unknown>) =>
+      api.post<TestEmbeddingProviderResponse>(
+        `/admin/embedding-providers/${encodeURIComponent(kind)}/test`,
+        config
+      ),
+  });
+}
+
+// useSwitchProvider PUTs the new selection. On success: invalidate
+// the active-provider cache, the /status cache (footer indicator),
+// and the sidecar-status cache (the latter goes to "n/a" for
+// non-ollama providers).
+export function useSwitchProvider() {
+  const qc = useQueryClient();
+  return useMutation({
+    mutationFn: (req: SwitchEmbeddingProviderRequest) =>
+      api.put<ActiveEmbeddingProvider>('/admin/embedding-providers/active', req),
+    onSuccess: () => {
+      qc.invalidateQueries({ queryKey: serverKeys.activeProvider });
+      qc.invalidateQueries({ queryKey: serverKeys.sidecarStatus });
+      qc.invalidateQueries({ queryKey: ['runtime-model'] });
+    },
+  });
+}
diff --git a/server/dashboard/src/modules/server/sections/EmbeddingProviderSection.tsx b/server/dashboard/src/modules/server/sections/EmbeddingProviderSection.tsx
new file mode 100644
index 0000000..b4ed212
--- /dev/null
+++ b/server/dashboard/src/modules/server/sections/EmbeddingProviderSection.tsx
@@ -0,0 +1,234 @@
+import { useEffect, useMemo, useState } from 'react';
+import { AlertCircle, CheckCircle2, Loader2, Save } from 'lucide-react';
+import { toast } from 'sonner';
+import { ApiError } from '@/api/client';
+import { Alert, AlertDescription, AlertTitle } from '@/ui/alert';
+import { Button } from '@/ui/button';
+import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/ui/card';
+import { Label } from '@/ui/label';
+import type { EmbeddingProviderKind, EmbeddingProviderSecretEnv } from '@/api/types';
+import {
+  useActiveProvider,
+  useEmbeddingProviders,
+  useSwitchProvider,
+  useTestProvider,
+} from '../hooks';
+import { OpenAIProviderForm, type OpenAIConfig, defaultOpenAIConfig } from './providers/OpenAIProviderForm';
+import { VoyageProviderForm, type VoyageConfig, defaultVoyageConfig } from './providers/VoyageProviderForm';
+
+// EmbeddingProviderSection wraps the provider-kind dropdown + the
+// per-kind form. The ollama-specific sections (EmbeddingModelSection,
+// RuntimeParamsSection, SidecarSection) stay rendered by the parent
+// ServerPage when the active kind is "ollama" — switching to a remote
+// provider hides them in ServerPage by checking activeProvider.kind.
+//
+// Save flow:
+//   1. POST /admin/embedding-providers/{kind}/test with the draft.
+//   2. On success → PUT /admin/embedding-providers/active.
+//   3. Surface toast + invalidate caches so the footer / sidecar
+//      cards update immediately.
+//
+// API keys are never stored on the server: configs only carry the
+// NAME of the env var that holds the key. When the relevant env var
+// is missing the form renders a red banner and the Save button is
+// disabled.
+export function EmbeddingProviderSection() {
+  const providers = useEmbeddingProviders();
+  const active = useActiveProvider();
+  const switchMut = useSwitchProvider();
+
+  const [draftKind, setDraftKind] = useState<EmbeddingProviderKind>('ollama');
+  const [openAIDraft, setOpenAIDraft] = useState<OpenAIConfig>(defaultOpenAIConfig);
+  const [voyageDraft, setVoyageDraft] = useState<VoyageConfig>(defaultVoyageConfig);
+
+  // When the persisted active provider loads / changes (e.g. after a
+  // successful switch), reset the drafts so the form mirrors what is
+  // live. Selecting a different kind in the dropdown only changes the
+  // form being rendered — it does NOT mutate the underlying drafts
+  // until the admin clicks Save.
+  useEffect(() => {
+    const data = active.data;
+    if (!data?.kind) return;
+    setDraftKind(data.kind as EmbeddingProviderKind);
+    const cfg = (data.config ?? {}) as Record<string, unknown>;
+    if (data.kind === 'openai') {
+      setOpenAIDraft({
+        base_url: String(cfg.base_url ?? defaultOpenAIConfig.base_url),
+        model: String(cfg.model ?? defaultOpenAIConfig.model),
+        api_key_env: String(cfg.api_key_env ?? defaultOpenAIConfig.api_key_env),
+        dimensions: typeof cfg.dimensions === 'number' ? cfg.dimensions : undefined,
+      });
+    }
+    if (data.kind === 'voyage') {
+      setVoyageDraft({
+        model: String(cfg.model ?? defaultVoyageConfig.model),
+        api_key_env: String(cfg.api_key_env ?? defaultVoyageConfig.api_key_env),
+        output_dimension: Number(cfg.output_dimension ?? defaultVoyageConfig.output_dimension),
+        output_dtype:
+          (cfg.output_dtype as 'float' | 'int8') ?? defaultVoyageConfig.output_dtype,
+        truncation: cfg.truncation !== false,
+      });
+    }
+  }, [active.data]);
+
+  // Lookup the env-key readiness for the currently selected kind so
+  // the relevant form can render a "set CIX_VOYAGE_API_KEY before
+  // saving" banner without each form duplicating the query.
+  const envsForKind = useMemo<EmbeddingProviderSecretEnv[]>(() => {
+    if (!providers.data) return [];
+    return providers.data.providers.find((p) => p.kind === draftKind)?.secret_envs ?? [];
+  }, [providers.data, draftKind]);
+
+  const test = useTestProvider(draftKind);
+
+  if (providers.isLoading || active.isLoading) {
+    return (
+      <Card>
+        <CardHeader>
+          <CardTitle>Embedding provider</CardTitle>
+        </CardHeader>
+        <CardContent>
+          <div className="text-sm text-muted-foreground">Loading providers…</div>
+        </CardContent>
+      </Card>
+    );
+  }
+  if (providers.error || !providers.data || active.error) {
+    return (
+      <Alert variant="destructive">
+        <AlertCircle className="h-4 w-4" />
+        <AlertTitle>Could not load embedding providers</AlertTitle>
+        <AlertDescription>
+          {String(providers.error ?? active.error ?? 'unknown error')}
+        </AlertDescription>
+      </Alert>
+    );
+  }
+
+  // Build the current draft config blob for the selected kind.
+  const draftConfig: Record<string, unknown> = (() => {
+    switch (draftKind) {
+      case 'openai':
+        return { ...openAIDraft };
+      case 'voyage':
+        return { ...voyageDraft };
+      case 'ollama':
+        // Ollama keeps using the existing per-field sections — for
+        // this composite section, switching TO ollama submits the
+        // already-persisted blob unchanged. The admin tunes ollama
+        // knobs via the sections below.
+        return (active.data?.kind === 'ollama' && active.data.config) || {};
+    }
+  })();
+
+  // Validation: we let the backend's /test endpoint be the source of
+  // truth, but disable the Save button locally when an obviously
+  // required field is empty or a referenced env var is missing.
+  const allEnvsSet = envsForKind.every((e) => e.set);
+  const localValid = (() => {
+    if (draftKind === 'openai') {
+      return !!openAIDraft.base_url && !!openAIDraft.model && !!openAIDraft.api_key_env;
+    }
+    if (draftKind === 'voyage') {
+      return !!voyageDraft.model && !!voyageDraft.api_key_env;
+    }
+    return true; // ollama is edited via the lower sections
+  })();
+
+  const canSave = localValid && allEnvsSet && !switchMut.isPending && !test.isPending;
+  const dirty = draftKind !== active.data?.kind || (() => {
+    // Compare the JSON blob shallowly so we know whether anything
+    // changed in the per-kind form.
+    if (draftKind === 'ollama') return false;
+    const a = JSON.stringify(active.data?.config ?? {});
+    const b = JSON.stringify(draftConfig);
+    return a !== b;
+  })();
+
+  async function onSave() {
+    try {
+      await test.mutateAsync(draftConfig);
+      await switchMut.mutateAsync({ kind: draftKind, config: draftConfig });
+      toast.success(`Switched to ${draftKind}`, {
+        description: 'Every project will get a Stale-model badge until reindex.',
+      });
+    } catch (e) {
+      const detail = e instanceof ApiError ? e.detail : String(e);
+      toast.error('Provider switch failed', { description: detail });
+    }
+  }
+
+  return (
+    <Card>
+      <CardHeader>
+        <CardTitle className="flex items-center gap-2">
+          Embedding provider
+          {active.data?.kind ? (
+            <span className="rounded-md bg-muted px-2 py-0.5 text-xs font-mono">
+              {active.data.kind}
+            </span>
+          ) : null}
+        </CardTitle>
+        <CardDescription>
+          Choose where embeddings are computed. Switching providers triggers a
+          full reindex per project on the next clone job — every project's
+          stored model fingerprint becomes stale.
+        </CardDescription>
+      </CardHeader>
+      <CardContent className="space-y-5">
+        <div className="space-y-1.5">
+          <Label htmlFor="provider-kind">Provider</Label>
+          <select
+            id="provider-kind"
+            value={draftKind}
+            onChange={(e) => setDraftKind(e.target.value as EmbeddingProviderKind)}
+            className="block w-full rounded-md border bg-background px-3 py-2 text-sm sm:max-w-xs"
+          >
+            <option value="ollama">Ollama sidecar (local llama-server)</option>
+            <option value="openai">OpenAI-compatible (/v1/embeddings)</option>
+            <option value="voyage">Voyage AI</option>
+          </select>
+        </div>
+
+        {draftKind === 'openai' ? (
+          <OpenAIProviderForm
+            value={openAIDraft}
+            onChange={setOpenAIDraft}
+            secretEnvs={envsForKind}
+          />
+        ) : null}
+        {draftKind === 'voyage' ? (
+          <VoyageProviderForm
+            value={voyageDraft}
+            onChange={setVoyageDraft}
+            secretEnvs={envsForKind}
+          />
+        ) : null}
+        {draftKind === 'ollama' ? (
+          <div className="rounded-md border border-dashed bg-muted/30 p-3 text-xs text-muted-foreground">
+            Ollama tuning (model picker, ctx, GPU layers, sidecar status) is
+            configured in the sections below.
+          </div>
+        ) : null}
+
+        {draftKind !== 'ollama' ? (
+          <div className="flex items-center gap-2 pt-2">
+            <Button onClick={onSave} disabled={!canSave || !dirty}>
+              {switchMut.isPending || test.isPending ? (
+                <Loader2 className="mr-1 h-4 w-4 animate-spin" />
+              ) : (
+                <Save className="mr-1 h-4 w-4" />
+              )}
+              Save &amp; switch
+            </Button>
+            {test.isSuccess && !switchMut.isPending ? (
+              <span className="flex items-center gap-1 text-xs text-emerald-700">
+                <CheckCircle2 className="h-3 w-3" /> Last test ok
+              </span>
+            ) : null}
+          </div>
+        ) : null}
+      </CardContent>
+    </Card>
+  );
+}
diff --git a/server/dashboard/src/modules/server/sections/providers/OpenAIProviderForm.tsx b/server/dashboard/src/modules/server/sections/providers/OpenAIProviderForm.tsx
new file mode 100644
index 0000000..6c34189
--- /dev/null
+++ b/server/dashboard/src/modules/server/sections/providers/OpenAIProviderForm.tsx
@@ -0,0 +1,124 @@
+import { AlertTriangle } from 'lucide-react';
+import { Alert, AlertDescription, AlertTitle } from '@/ui/alert';
+import { Input } from '@/ui/input';
+import { Label } from '@/ui/label';
+import type { EmbeddingProviderSecretEnv } from '@/api/types';
+
+// OpenAIConfig mirrors the openai provider's persisted config blob
+// shape (see server/internal/embeddings/provider/openai/openai.go).
+export interface OpenAIConfig {
+  base_url: string;
+  model: string;
+  api_key_env: string;
+  dimensions?: number;
+}
+
+export const defaultOpenAIConfig: OpenAIConfig = {
+  base_url: 'https://api.openai.com',
+  model: 'text-embedding-3-small',
+  api_key_env: 'CIX_OPENAI_API_KEY',
+};
+
+interface Props {
+  value: OpenAIConfig;
+  onChange: (next: OpenAIConfig) => void;
+  secretEnvs: EmbeddingProviderSecretEnv[];
+}
+
+// Common OpenAI-compatible model picker entries. Free-text input
+// stays the source of truth (any string is valid for self-hosted
+// servers); these are just suggestions.
+const SUGGESTED_MODELS = [
+  'text-embedding-3-small',
+  'text-embedding-3-large',
+  'text-embedding-ada-002',
+];
+
+export function OpenAIProviderForm({ value, onChange, secretEnvs }: Props) {
+  const apiKeyEnv = secretEnvs.find((e) => e.name === value.api_key_env);
+  const apiKeyMissing = apiKeyEnv != null && !apiKeyEnv.set;
+
+  return (
+    <div className="space-y-4">
+      <div className="space-y-1.5">
+        <Label htmlFor="openai-base-url">Base URL</Label>
+        <Input
+          id="openai-base-url"
+          value={value.base_url}
+          onChange={(e) => onChange({ ...value, base_url: e.target.value })}
+          placeholder="https://api.openai.com"
+        />
+        <p className="text-xs text-muted-foreground">
+          Server origin without the trailing <code>/v1</code>. Works for OpenAI
+          proper, vLLM, TEI, LocalAI, Ollama's openai endpoint, and any other
+          OpenAI-compatible /v1/embeddings server.
+        </p>
+      </div>
+
+      <div className="space-y-1.5">
+        <Label htmlFor="openai-model">Model</Label>
+        <Input
+          id="openai-model"
+          list="openai-model-suggestions"
+          value={value.model}
+          onChange={(e) => onChange({ ...value, model: e.target.value })}
+        />
+        <datalist id="openai-model-suggestions">
+          {SUGGESTED_MODELS.map((m) => (
+            <option key={m} value={m} />
+          ))}
+        </datalist>
+        <p className="text-xs text-muted-foreground">
+          For self-hosted servers use whichever model name the server expects.
+        </p>
+      </div>
+
+      <div className="space-y-1.5">
+        <Label htmlFor="openai-dim">Dimensions (optional)</Label>
+        <Input
+          id="openai-dim"
+          type="number"
+          min={0}
+          value={value.dimensions ?? ''}
+          onChange={(e) =>
+            onChange({
+              ...value,
+              dimensions: e.target.value === '' ? undefined : Number(e.target.value),
+            })
+          }
+          placeholder="(server default)"
+        />
+        <p className="text-xs text-muted-foreground">
+          Matryoshka shrink for <code>text-embedding-3-*</code>. Leave empty
+          to use the model's native dimension.
+        </p>
+      </div>
+
+      <div className="space-y-1.5">
+        <Label htmlFor="openai-key-env">API key env var</Label>
+        <Input
+          id="openai-key-env"
+          value={value.api_key_env}
+          onChange={(e) => onChange({ ...value, api_key_env: e.target.value })}
+          placeholder="CIX_OPENAI_API_KEY"
+        />
+        <p className="text-xs text-muted-foreground">
+          The dashboard never stores the key itself. The server reads this
+          env var live on every embed call.
+        </p>
+      </div>
+
+      {apiKeyMissing ? (
+        <Alert variant="destructive">
+          <AlertTriangle className="h-4 w-4" />
+          <AlertTitle>API key env var is not set</AlertTitle>
+          <AlertDescription>
+            Export <code>{value.api_key_env}</code> on the server (compose,
+            portainer, systemd, …) and restart the container before saving.
+            Calls would fail until the key becomes available.
+          </AlertDescription>
+        </Alert>
+      ) : null}
+    </div>
+  );
+}
diff --git a/server/dashboard/src/modules/server/sections/providers/VoyageProviderForm.tsx b/server/dashboard/src/modules/server/sections/providers/VoyageProviderForm.tsx
new file mode 100644
index 0000000..e0ad2e2
--- /dev/null
+++ b/server/dashboard/src/modules/server/sections/providers/VoyageProviderForm.tsx
@@ -0,0 +1,141 @@
+import { AlertTriangle } from 'lucide-react';
+import { Alert, AlertDescription, AlertTitle } from '@/ui/alert';
+import { Input } from '@/ui/input';
+import { Label } from '@/ui/label';
+import { Switch } from '@/ui/switch';
+import type { EmbeddingProviderSecretEnv } from '@/api/types';
+
+// VoyageConfig mirrors the voyage provider's persisted config blob
+// shape (see server/internal/embeddings/provider/voyage/voyage.go).
+export interface VoyageConfig {
+  model: string;
+  api_key_env: string;
+  output_dimension: number;
+  output_dtype: 'float' | 'int8';
+  truncation: boolean;
+}
+
+export const defaultVoyageConfig: VoyageConfig = {
+  model: 'voyage-code-3',
+  api_key_env: 'CIX_VOYAGE_API_KEY',
+  output_dimension: 1024,
+  output_dtype: 'float',
+  truncation: true,
+};
+
+interface Props {
+  value: VoyageConfig;
+  onChange: (next: VoyageConfig) => void;
+  secretEnvs: EmbeddingProviderSecretEnv[];
+}
+
+const MODELS = [
+  'voyage-code-3',
+  'voyage-3-large',
+  'voyage-3',
+  'voyage-3-lite',
+  'voyage-code-2',
+];
+
+const DIMENSIONS = [256, 512, 1024, 2048];
+
+export function VoyageProviderForm({ value, onChange, secretEnvs }: Props) {
+  const apiKeyEnv = secretEnvs.find((e) => e.name === value.api_key_env);
+  const apiKeyMissing = apiKeyEnv != null && !apiKeyEnv.set;
+
+  return (
+    <div className="space-y-4">
+      <div className="space-y-1.5">
+        <Label htmlFor="voyage-model">Model</Label>
+        <select
+          id="voyage-model"
+          value={value.model}
+          onChange={(e) => onChange({ ...value, model: e.target.value })}
+          className="block w-full rounded-md border bg-background px-3 py-2 text-sm sm:max-w-sm"
+        >
+          {MODELS.map((m) => (
+            <option key={m} value={m}>
+              {m}
+            </option>
+          ))}
+        </select>
+      </div>
+
+      <div className="grid gap-4 sm:grid-cols-2">
+        <div className="space-y-1.5">
+          <Label htmlFor="voyage-dim">Output dimension (Matryoshka)</Label>
+          <select
+            id="voyage-dim"
+            value={String(value.output_dimension)}
+            onChange={(e) => onChange({ ...value, output_dimension: Number(e.target.value) })}
+            className="block w-full rounded-md border bg-background px-3 py-2 text-sm"
+          >
+            {DIMENSIONS.map((d) => (
+              <option key={d} value={d}>
+                {d}
+              </option>
+            ))}
+          </select>
+          <p className="text-xs text-muted-foreground">
+            Changing this triggers a full reindex per project.
+          </p>
+        </div>
+
+        <div className="space-y-1.5">
+          <Label htmlFor="voyage-dtype">Output dtype</Label>
+          <select
+            id="voyage-dtype"
+            value={value.output_dtype}
+            onChange={(e) =>
+              onChange({ ...value, output_dtype: e.target.value as 'float' | 'int8' })
+            }
+            className="block w-full rounded-md border bg-background px-3 py-2 text-sm"
+          >
+            <option value="float">float (default)</option>
+            <option value="int8">int8 (dequantized server-side)</option>
+          </select>
+          <p className="text-xs text-muted-foreground">
+            <code>binary</code> / <code>ubinary</code> are not supported — the
+            vector store has no hamming-distance search.
+          </p>
+        </div>
+      </div>
+
+      <div className="flex items-center gap-3">
+        <Switch
+          id="voyage-truncation"
+          checked={value.truncation}
+          onCheckedChange={(c) => onChange({ ...value, truncation: c === true })}
+        />
+        <Label htmlFor="voyage-truncation" className="cursor-pointer">
+          Truncate over-length inputs server-side
+        </Label>
+      </div>
+
+      <div className="space-y-1.5">
+        <Label htmlFor="voyage-key-env">API key env var</Label>
+        <Input
+          id="voyage-key-env"
+          value={value.api_key_env}
+          onChange={(e) => onChange({ ...value, api_key_env: e.target.value })}
+          placeholder="CIX_VOYAGE_API_KEY"
+        />
+        <p className="text-xs text-muted-foreground">
+          The dashboard never stores the key — only this env-var name.
+        </p>
+      </div>
+
+      {apiKeyMissing ? (
+        <Alert variant="destructive">
+          <AlertTriangle className="h-4 w-4" />
+          <AlertTitle>API key env var is not set</AlertTitle>
+          <AlertDescription>
+            Export <code>{value.api_key_env}</code> on the server and restart
+            the container before saving. Voyage API calls would fail until
+            the key becomes available.
+          </AlertDescription>
+        </Alert>
+      ) : null}
+    </div>
+  );
+}

From d57e2ae30c134be8062514b80e9a6f7266af883d Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Tue, 26 May 2026 13:55:26 +0100
Subject: [PATCH 05/34] docs(compose): document optional remote-provider env
 vars
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add commented examples for CIX_OPENAI_API_KEY and CIX_VOYAGE_API_KEY
to both compose files. Default behavior is unchanged — first boot
seeds the ollama provider from the existing CIX_EMBEDDING_MODEL +
CIX_LLAMA_* vars. Switching to a remote provider is a dashboard
action; the env var only needs to be exported when the operator
intends to use OpenAI-compatible or Voyage backends.
---
 docker-compose.cuda.yml | 19 +++++++++++++++++++
 docker-compose.yml      | 19 +++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/docker-compose.cuda.yml b/docker-compose.cuda.yml
index a40032a..84268f9 100644
--- a/docker-compose.cuda.yml
+++ b/docker-compose.cuda.yml
@@ -45,6 +45,25 @@ services:
       # touches the source again. Subsequent boots find the file in cache
       # and ignore the env. See volumes block below for an example bind.
       - CIX_BOOTSTRAP_GGUF_PATH=${CIX_BOOTSTRAP_GGUF_PATH:-}
+      # ── Pluggable embedding providers (added in migration 12) ──
+      # The active provider is selected from the dashboard
+      # (/dashboard/server → Embedding provider). On first boot with an
+      # empty runtime_settings row, cix-server seeds the ollama provider
+      # using the CIX_EMBEDDING_MODEL + CIX_LLAMA_* vars above — so the
+      # default deployment is unchanged.
+      #
+      # To use a remote provider (OpenAI-compatible or Voyage AI) you
+      # MUST export the API-key env var below. The dashboard reads it on
+      # every embed call; cix-server NEVER persists API keys in the DB,
+      # only the env-var NAME a provider should look up. Switching
+      # providers triggers a full reindex per project on the next clone
+      # job (existing model-change pipeline).
+      #
+      # OpenAI-compatible (api.openai.com, vLLM, TEI, LocalAI, …):
+      # - CIX_OPENAI_API_KEY=${CIX_OPENAI_API_KEY:-}
+      #
+      # Voyage AI:
+      # - CIX_VOYAGE_API_KEY=${CIX_VOYAGE_API_KEY:-}
       - NVIDIA_VISIBLE_DEVICES=all
     volumes:
       # Operator-managed bind for sqlite + chroma so backups and inspection
diff --git a/docker-compose.yml b/docker-compose.yml
index e758c30..e22b5eb 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -45,6 +45,25 @@ services:
       # touches the source again. Subsequent boots find the file in cache
       # and ignore the env. See volumes block below for an example bind.
       - CIX_BOOTSTRAP_GGUF_PATH=${CIX_BOOTSTRAP_GGUF_PATH:-}
+      # ── Pluggable embedding providers (added in migration 12) ──
+      # The active provider is selected from the dashboard
+      # (/dashboard/server → Embedding provider). On first boot with an
+      # empty runtime_settings row, cix-server seeds the ollama provider
+      # using the CIX_EMBEDDING_MODEL + CIX_LLAMA_* vars above — so the
+      # default deployment is unchanged.
+      #
+      # To use a remote provider (OpenAI-compatible or Voyage AI) you
+      # MUST export the API-key env var below. The dashboard reads it on
+      # every embed call; cix-server NEVER persists API keys in the DB,
+      # only the env-var NAME a provider should look up. Switching
+      # providers triggers a full reindex per project on the next clone
+      # job (existing model-change pipeline).
+      #
+      # OpenAI-compatible (api.openai.com, vLLM, TEI, LocalAI, …):
+      # - CIX_OPENAI_API_KEY=${CIX_OPENAI_API_KEY:-}
+      #
+      # Voyage AI:
+      # - CIX_VOYAGE_API_KEY=${CIX_VOYAGE_API_KEY:-}
     volumes:
       # Operator-managed bind for sqlite + chroma so backups and inspection
       # are one `cd` away on the host. The CPU image runs as

From cd1dea6d2e18d6278980765bce2bd25c1a22777a Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Tue, 26 May 2026 14:48:36 +0100
Subject: [PATCH 06/34] fix(dashboard): hide all ollama-specific sections on
 remote providers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous gate covered EmbeddingModelSection, RuntimeParamsSection,
and SidecarSection but left AdvancedSection (batch size / concurrency)
visible — and also exposed the ollama-only Save & Restart header
button regardless of provider. Switch to /status as the single
source of truth for the live active kind (already polled by the
footer; no extra request) and gate AdvancedSection plus the
Save & Restart button on it. Update the header copy to explain
what's editable per provider type.
---
 .../src/modules/server/ServerPage.tsx         | 66 +++++++++++--------
 1 file changed, 40 insertions(+), 26 deletions(-)

diff --git a/server/dashboard/src/modules/server/ServerPage.tsx b/server/dashboard/src/modules/server/ServerPage.tsx
index 0b49241..294bf5e 100644
--- a/server/dashboard/src/modules/server/ServerPage.tsx
+++ b/server/dashboard/src/modules/server/ServerPage.tsx
@@ -6,8 +6,8 @@ import type { RuntimeConfig, RuntimeConfigUpdate } from '@/api/types';
 import { Alert, AlertDescription, AlertTitle } from '@/ui/alert';
 import { Button } from '@/ui/button';
 import { Skeleton } from '@/ui/skeleton';
+import { useServerStatus } from '@/lib/useServerStatus';
 import {
-  useActiveProvider,
   useRestartSidecar,
   useRuntimeConfig,
   useSidecarStatus,
@@ -69,7 +69,14 @@ export default function ServerPage() {
   const status = useSidecarStatus();
   const update = useUpdateRuntimeConfig();
   const restart = useRestartSidecar();
-  const activeProvider = useActiveProvider();
+  // /status is shared with the footer (already polled every 30s) and
+  // its embedding_provider field reflects the LIVE active provider —
+  // the right signal for "should we show ollama sections?". We default
+  // to true while it loads so the page doesn't flash empty between
+  // mount and the first /status response.
+  const serverStatus = useServerStatus();
+  const activeKind = serverStatus.data?.embedding_provider ?? 'ollama';
+  const showOllamaSections = activeKind === 'ollama';
 
   const [draft, setDraft] = useState<Draft | null>(null);
   const [confirmOpen, setConfirmOpen] = useState(false);
@@ -140,18 +147,20 @@ export default function ServerPage() {
         <div>
           <h1 className="text-2xl font-semibold tracking-tight">Server</h1>
           <p className="text-sm text-muted-foreground">
-            Embedding model, indexing parameters, sidecar lifecycle. Saved
-            overrides land in the database and are reapplied on the next
-            sidecar restart — env vars stay as bootstrap defaults.
+            {showOllamaSections
+              ? 'Embedding provider + model, indexing parameters, sidecar lifecycle. Saved overrides land in the database and are reapplied on the next sidecar restart — env vars stay as bootstrap defaults.'
+              : 'Embedding provider selection. For remote providers (OpenAI-compatible, Voyage) all tuning lives inside the provider form below — there is no sidecar to restart and no GPU / batch knobs.'}
           </p>
         </div>
-        <Button
-          onClick={() => setConfirmOpen(true)}
-          disabled={!dirty || isPending || disabled}
-        >
-          {isPending ? <Loader2 className="mr-1 h-4 w-4 animate-spin" /> : <Save className="mr-1 h-4 w-4" />}
-          Save &amp; Restart
-        </Button>
+        {showOllamaSections ? (
+          <Button
+            onClick={() => setConfirmOpen(true)}
+            disabled={!dirty || isPending || disabled}
+          >
+            {isPending ? <Loader2 className="mr-1 h-4 w-4 animate-spin" /> : <Save className="mr-1 h-4 w-4" />}
+            Save &amp; Restart
+          </Button>
+        ) : null}
       </header>
 
       {disabled ? (
@@ -169,13 +178,18 @@ export default function ServerPage() {
       <EmbeddingProviderSection />
 
       {/*
-        Ollama-specific tuning + sidecar status — rendered only when the
-        active provider is ollama. For openai/voyage these sections do
-        not apply: there's no GGUF, no llama-server child to restart,
-        no GPU layers / threads. The provider form above is the only
-        edit surface in that case.
+        Ollama-specific cards — rendered only when the active provider
+        is ollama. For openai/voyage these sections do not apply:
+        there's no GGUF, no llama-server child to restart, no GPU
+        layers / threads, no batch size knob. The provider form above
+        is the only edit surface in that case.
+
+        Concurrency lives inside AdvancedSection together with the
+        ollama-only batch size — for v1 we hide the whole card on
+        remote providers. A follow-up may split concurrency into a
+        provider-agnostic card if operators ask for it.
       */}
-      {activeProvider.data?.kind === 'ollama' ? (
+      {showOllamaSections ? (
         <>
           <EmbeddingModelSection
             config={cfg.data}
@@ -194,17 +208,17 @@ export default function ServerPage() {
           />
 
           <SidecarSection />
+
+          <AdvancedSection
+            config={cfg.data}
+            draftConcurrency={draft.max_embedding_concurrency}
+            draftBatch={draft.llama_batch_size}
+            onDraftConcurrency={(n) => setDraft({ ...draft, max_embedding_concurrency: n })}
+            onDraftBatch={(n) => setDraft({ ...draft, llama_batch_size: n })}
+          />
         </>
       ) : null}
 
-      <AdvancedSection
-        config={cfg.data}
-        draftConcurrency={draft.max_embedding_concurrency}
-        draftBatch={draft.llama_batch_size}
-        onDraftConcurrency={(n) => setDraft({ ...draft, max_embedding_concurrency: n })}
-        onDraftBatch={(n) => setDraft({ ...draft, llama_batch_size: n })}
-      />
-
       <SaveAndRestartDialog
         open={confirmOpen}
         onOpenChange={(next) => (!isPending ? setConfirmOpen(next) : null)}

From 0ad5dc30fe2d0144a3ec5c5a37e7db10d1e727e8 Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Tue, 26 May 2026 14:59:31 +0100
Subject: [PATCH 07/34] feat(embeddings): expose concurrency cap on remote
 providers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Service-level embedding queue depth (max_embedding_concurrency)
applies to every provider — it caps how many parallel
/v1/embeddings POSTs the server runs, which OpenAI and Voyage both
accept natively (subject to their account-level rate limits). The
previous UI hid it behind the ollama-only block.

Backend Service.Restart is now provider-aware. Submitting a runtime
config change with a remote provider active only rebuilds the
queue; the (HTTP-only) provider stays in place. The previous code
unconditionally called buildOllamaFromConfig, which would have
silently re-spawned llama-server on top of a live voyage/openai
provider — that bug is fixed here.

Dashboard rearrangement:
  - AdvancedSection renamed to Throughput in the UI, always visible.
    Receives isOllama: when false the llama-batch field is hidden;
    concurrency is shown regardless.
  - Save & Restart button on ServerPage stays visible for all
    providers; label switches to "Save" when the active provider
    has no managed sidecar to restart.
  - Header copy updated to explain the new layout.
---
 .../src/modules/server/ServerPage.tsx         | 44 ++++++-----
 .../server/sections/AdvancedSection.tsx       | 75 ++++++++++++-------
 server/internal/embeddings/service.go         | 50 +++++++++++--
 3 files changed, 113 insertions(+), 56 deletions(-)

diff --git a/server/dashboard/src/modules/server/ServerPage.tsx b/server/dashboard/src/modules/server/ServerPage.tsx
index 294bf5e..8346cb5 100644
--- a/server/dashboard/src/modules/server/ServerPage.tsx
+++ b/server/dashboard/src/modules/server/ServerPage.tsx
@@ -148,19 +148,17 @@ export default function ServerPage() {
           <h1 className="text-2xl font-semibold tracking-tight">Server</h1>
           <p className="text-sm text-muted-foreground">
             {showOllamaSections
-              ? 'Embedding provider + model, indexing parameters, sidecar lifecycle. Saved overrides land in the database and are reapplied on the next sidecar restart — env vars stay as bootstrap defaults.'
-              : 'Embedding provider selection. For remote providers (OpenAI-compatible, Voyage) all tuning lives inside the provider form below — there is no sidecar to restart and no GPU / batch knobs.'}
+              ? 'Embedding provider + model, indexing parameters, sidecar lifecycle, throughput. Saved overrides land in the database and are reapplied on the next sidecar restart — env vars stay as bootstrap defaults.'
+              : 'Embedding provider + concurrency. For remote providers (OpenAI-compatible, Voyage) the per-provider form above is the main edit surface; this page also exposes the server-wide concurrency cap that all providers honour.'}
           </p>
         </div>
-        {showOllamaSections ? (
-          <Button
-            onClick={() => setConfirmOpen(true)}
-            disabled={!dirty || isPending || disabled}
-          >
-            {isPending ? <Loader2 className="mr-1 h-4 w-4 animate-spin" /> : <Save className="mr-1 h-4 w-4" />}
-            Save &amp; Restart
-          </Button>
-        ) : null}
+        <Button
+          onClick={() => setConfirmOpen(true)}
+          disabled={!dirty || isPending || disabled}
+        >
+          {isPending ? <Loader2 className="mr-1 h-4 w-4 animate-spin" /> : <Save className="mr-1 h-4 w-4" />}
+          {showOllamaSections ? 'Save & Restart' : 'Save'}
+        </Button>
       </header>
 
       {disabled ? (
@@ -208,17 +206,25 @@ export default function ServerPage() {
           />
 
           <SidecarSection />
-
-          <AdvancedSection
-            config={cfg.data}
-            draftConcurrency={draft.max_embedding_concurrency}
-            draftBatch={draft.llama_batch_size}
-            onDraftConcurrency={(n) => setDraft({ ...draft, max_embedding_concurrency: n })}
-            onDraftBatch={(n) => setDraft({ ...draft, llama_batch_size: n })}
-          />
         </>
       ) : null}
 
+      {/*
+        Throughput / concurrency — always visible. The queue concurrency
+        is the Service-level cap on parallel /v1/embeddings POSTs and
+        applies to every provider (ollama, openai, voyage all accept
+        concurrent requests). The llama batch field inside the card
+        is gated on isOllama.
+      */}
+      <AdvancedSection
+        config={cfg.data}
+        draftConcurrency={draft.max_embedding_concurrency}
+        draftBatch={draft.llama_batch_size}
+        onDraftConcurrency={(n) => setDraft({ ...draft, max_embedding_concurrency: n })}
+        onDraftBatch={(n) => setDraft({ ...draft, llama_batch_size: n })}
+        isOllama={showOllamaSections}
+      />
+
       <SaveAndRestartDialog
         open={confirmOpen}
         onOpenChange={(next) => (!isPending ? setConfirmOpen(next) : null)}
diff --git a/server/dashboard/src/modules/server/sections/AdvancedSection.tsx b/server/dashboard/src/modules/server/sections/AdvancedSection.tsx
index 1854ea7..47acd5f 100644
--- a/server/dashboard/src/modules/server/sections/AdvancedSection.tsx
+++ b/server/dashboard/src/modules/server/sections/AdvancedSection.tsx
@@ -11,6 +11,12 @@ interface Props {
   draftBatch: number;
   onDraftConcurrency: (n: number) => void;
   onDraftBatch: (n: number) => void;
+  // isOllama controls whether the llama-only batch-size field is
+  // rendered. Concurrency (the Service-level queue depth) applies to
+  // every provider — caps how many parallel /v1/embeddings POSTs go
+  // out, which OpenAI / Voyage both honour natively — and is shown
+  // regardless.
+  isOllama: boolean;
 }
 
 // AdvancedSection: throughput-tuning fields most operators won't touch.
@@ -22,6 +28,7 @@ export function AdvancedSection({
   draftBatch,
   onDraftConcurrency,
   onDraftBatch,
+  isOllama,
 }: Props) {
   const concId = useId();
   const batchId = useId();
@@ -31,13 +38,17 @@ export function AdvancedSection({
   return (
     <Card>
       <CardHeader>
-        <CardTitle>Advanced</CardTitle>
-        <CardDescription>Throughput tuning. Leave at recommended unless you have a specific reason.</CardDescription>
+        <CardTitle>Throughput</CardTitle>
+        <CardDescription>
+          Concurrency caps how many <code>/v1/embeddings</code> POSTs the
+          server runs in parallel against the active provider — applies to
+          every backend. Llama batch is sidecar-only.
+        </CardDescription>
       </CardHeader>
       <CardContent>
-        <details className="group">
+        <details className="group" open>
           <summary className="cursor-pointer text-sm font-medium text-muted-foreground hover:text-foreground">
-            Show advanced tunables
+            Show throughput tunables
           </summary>
           <div className="mt-4 space-y-5">
             <div className="space-y-1.5">
@@ -60,35 +71,41 @@ export function AdvancedSection({
                 className="max-w-xs"
               />
               <p className="text-xs text-muted-foreground">
-                Concurrent /v1/embeddings calls allowed against the sidecar. 1 = strictly sequential.
-                Recommended: <code>{rec?.max_embedding_concurrency ?? 1}</code>.
+                Maximum in-flight embed requests across the whole server.
+                1 = strictly sequential. OpenAI and Voyage both accept
+                concurrent requests, but their account-level rate limits
+                still apply — start low (e.g. 2) and raise it if you
+                don't see 429s. Recommended:{' '}
+                <code>{rec?.max_embedding_concurrency ?? 1}</code>.
               </p>
             </div>
 
-            <div className="space-y-1.5">
-              <div className="flex items-center justify-between gap-2">
-                <Label htmlFor={batchId} className="font-medium">
-                  Llama batch size
-                  <span className="ml-2 font-normal text-muted-foreground text-xs">(llama_batch_size, -b)</span>
-                </Label>
-                <SourcePill source={src?.llama_batch_size} />
+            {isOllama ? (
+              <div className="space-y-1.5">
+                <div className="flex items-center justify-between gap-2">
+                  <Label htmlFor={batchId} className="font-medium">
+                    Llama batch size
+                    <span className="ml-2 font-normal text-muted-foreground text-xs">(llama_batch_size, -b)</span>
+                  </Label>
+                  <SourcePill source={src?.llama_batch_size} />
+                </div>
+                <Input
+                  id={batchId}
+                  type="number"
+                  min={1}
+                  value={Number.isFinite(draftBatch) ? draftBatch : 0}
+                  onChange={(e) => {
+                    const n = parseInt(e.target.value, 10);
+                    onDraftBatch(Number.isFinite(n) ? n : 0);
+                  }}
+                  className="max-w-xs"
+                />
+                <p className="text-xs text-muted-foreground">
+                  Logical batch passed to llama-server (-b). 0 = match context window.
+                  Recommended: <code>{rec?.llama_batch_size ?? 'ctx'}</code>.
+                </p>
               </div>
-              <Input
-                id={batchId}
-                type="number"
-                min={1}
-                value={Number.isFinite(draftBatch) ? draftBatch : 0}
-                onChange={(e) => {
-                  const n = parseInt(e.target.value, 10);
-                  onDraftBatch(Number.isFinite(n) ? n : 0);
-                }}
-                className="max-w-xs"
-              />
-              <p className="text-xs text-muted-foreground">
-                Logical batch passed to llama-server (-b). 0 = match context window.
-                Recommended: <code>{rec?.llama_batch_size ?? 'ctx'}</code>.
-              </p>
-            </div>
+            ) : null}
           </div>
         </details>
       </CardContent>
diff --git a/server/internal/embeddings/service.go b/server/internal/embeddings/service.go
index 27a5732..a63e414 100644
--- a/server/internal/embeddings/service.go
+++ b/server/internal/embeddings/service.go
@@ -303,10 +303,22 @@ func (s *Service) EmbeddingModel() string {
 	return cur.ID()
 }
 
-// Restart preserves the legacy admin /sidecar/restart contract: drain
-// the queue, swap in a freshly-built provider with the supplied cfg,
-// Start it. Currently only supports the ollama provider; openai/voyage
-// callers use SwitchProvider directly.
+// Restart applies runtime-config changes to the live Service.
+//
+// Provider-aware:
+//   - When the active provider is ollama, this is the legacy
+//     "respawn the sidecar with new flags" path: drain queue, build
+//     a new ollama provider with cfg, stop the old child, start the
+//     new one.
+//   - When the active provider is HTTP-only (openai / voyage), there
+//     is no sidecar to respawn. The only runtime-config field that
+//     still applies is max_embedding_concurrency (the Service-level
+//     queue depth). We rebuild the queue if it changed and leave the
+//     provider untouched.
+//
+// SwitchProvider is the right call for swapping the active provider
+// itself; Restart only touches knobs that the runtime_config row
+// owns.
 func (s *Service) Restart(ctx context.Context, cfg *config.Config) error {
 	if s == nil || s.disabled {
 		return ErrDisabled
@@ -328,14 +340,36 @@ func (s *Service) Restart(ctx context.Context, cfg *config.Config) error {
 		s.queue = NewQueue(cfg.MaxEmbeddingConcurrency, time.Duration(cfg.EmbeddingQueueTimeout)*time.Second)
 	}
 
+	// Snapshot the live provider's kind under the read lock — we don't
+	// want to swap an ollama for a voyage just because the runtime-
+	// config form was submitted.
+	s.mu.RLock()
+	curKind := ""
+	if s.current != nil {
+		curKind = s.current.Kind()
+	}
+	s.mu.RUnlock()
+
+	if curKind != provider.KindOllama {
+		// HTTP-only provider: queue (re)built above is all there is to
+		// do. The cfg blob is persisted by the caller; we just stash
+		// the new *config.Config snapshot so subsequent /status / cfg
+		// reads return the live values.
+		s.mu.Lock()
+		s.cfg = cfg
+		s.mu.Unlock()
+		s.logger.Info("embeddings: restart applied to remote provider (queue only)",
+			"kind", curKind, "concurrency", cfg.MaxEmbeddingConcurrency,
+		)
+		return nil
+	}
+
+	// Ollama path: rebuild + respawn the sidecar with the supplied
+	// llama tuning fields.
 	newProv, err := buildOllamaFromConfig(cfg, s.logger)
 	if err != nil {
 		return fmt.Errorf("rebuild ollama provider: %w", err)
 	}
-
-	// Stop the old, start the new. On Start failure leave current==nil
-	// so subsequent calls fail fast with ErrSupervisor — the operator
-	// then re-Restart with corrected config.
 	s.mu.Lock()
 	old := s.current
 	s.current = nil

From 5c75d5f1914051c0b5e522e99ccd1d1fa0a39cca Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Tue, 26 May 2026 15:06:21 +0100
Subject: [PATCH 08/34] docs(dashboard): clarify batch vs concurrency in
 Throughput card
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The indexer already sends all chunks of one file as a batched POST
(input: [chunk1, chunk2, ...]); concurrency caps how many such
batched POSTs run in parallel. The previous copy only mentioned
"parallel" without explaining that batching is the inner loop —
caused confusion when picking values.

Also call out the unsplit Voyage limits (voyage-code-3 = 128
inputs per request) so operators know to keep files under that
threshold until the per-provider auto-split lands.
---
 .../server/sections/AdvancedSection.tsx       | 22 ++++++++++++-------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/server/dashboard/src/modules/server/sections/AdvancedSection.tsx b/server/dashboard/src/modules/server/sections/AdvancedSection.tsx
index 47acd5f..7169e12 100644
--- a/server/dashboard/src/modules/server/sections/AdvancedSection.tsx
+++ b/server/dashboard/src/modules/server/sections/AdvancedSection.tsx
@@ -40,9 +40,10 @@ export function AdvancedSection({
       <CardHeader>
         <CardTitle>Throughput</CardTitle>
         <CardDescription>
-          Concurrency caps how many <code>/v1/embeddings</code> POSTs the
-          server runs in parallel against the active provider — applies to
-          every backend. Llama batch is sidecar-only.
+          The indexer sends all chunks of one file in a single batched POST
+          (<code>{'{"input": [chunk1, chunk2, ...]}'}</code>). Concurrency
+          here caps how many such batched POSTs run in parallel — applies
+          to every backend. Llama batch (below) is sidecar-only.
         </CardDescription>
       </CardHeader>
       <CardContent>
@@ -71,11 +72,16 @@ export function AdvancedSection({
                 className="max-w-xs"
               />
               <p className="text-xs text-muted-foreground">
-                Maximum in-flight embed requests across the whole server.
-                1 = strictly sequential. OpenAI and Voyage both accept
-                concurrent requests, but their account-level rate limits
-                still apply — start low (e.g. 2) and raise it if you
-                don't see 429s. Recommended:{' '}
+                Maximum batched <code>/v1/embeddings</code> POSTs in flight
+                across the whole server (each POST already carries one
+                file's chunks as a batch). 1 = strictly sequential. OpenAI
+                and Voyage both accept concurrent requests, but their
+                account-level rate limits still apply — start low (e.g. 2)
+                and raise it if you don't see 429s. Voyage per-request
+                batch limits (<code>voyage-code-3</code> = 128 inputs,
+                <code>voyage-3*</code> = 1000) are not split automatically
+                yet — keep files under that limit or expect 422s.
+                Recommended:{' '}
                 <code>{rec?.max_embedding_concurrency ?? 1}</code>.
               </p>
             </div>

From 5d05f991f40ad8aa9de6fe42e523c735700b277f Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Tue, 26 May 2026 15:10:40 +0100
Subject: [PATCH 09/34] feat(embeddings): auto-split oversize batches in HTTP
 providers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Voyage caps inputs per /v1/embeddings POST at 128 for voyage-code-*
models (1000 for voyage-3*). OpenAI proper caps at 2048. The
previous implementation forwarded the caller's slice as-is, so a
single file with 200 chunks would 422 against voyage-code-3.

Both HTTP providers now slice oversized inputs into sequential
sub-batches inside EmbedDocuments under the same Service queue slot
(no extra concurrency consumed) and concatenate results in input
order. The split is transparent to callers — the indexer keeps
sending one batched POST per file and providers handle the rest.

Tests assert two POSTs for a 200-item Voyage call (128 + 72) and
a 3000-item OpenAI call (2048 + 952).

Dashboard copy updated: throughput card no longer warns about
unsplit limits since the safety net is in place.
---
 .../server/sections/AdvancedSection.tsx       |  9 ++--
 .../embeddings/provider/openai/openai.go      | 25 ++++++++-
 .../embeddings/provider/openai/openai_test.go | 44 ++++++++++++++++
 .../embeddings/provider/voyage/voyage.go      | 30 ++++++++++-
 .../embeddings/provider/voyage/voyage_test.go | 51 +++++++++++++++++++
 5 files changed, 152 insertions(+), 7 deletions(-)

diff --git a/server/dashboard/src/modules/server/sections/AdvancedSection.tsx b/server/dashboard/src/modules/server/sections/AdvancedSection.tsx
index 7169e12..7a7932d 100644
--- a/server/dashboard/src/modules/server/sections/AdvancedSection.tsx
+++ b/server/dashboard/src/modules/server/sections/AdvancedSection.tsx
@@ -77,11 +77,10 @@ export function AdvancedSection({
                 file's chunks as a batch). 1 = strictly sequential. OpenAI
                 and Voyage both accept concurrent requests, but their
                 account-level rate limits still apply — start low (e.g. 2)
-                and raise it if you don't see 429s. Voyage per-request
-                batch limits (<code>voyage-code-3</code> = 128 inputs,
-                <code>voyage-3*</code> = 1000) are not split automatically
-                yet — keep files under that limit or expect 422s.
-                Recommended:{' '}
+                and raise it if you don't see 429s. Per-request batch
+                limits (Voyage <code>voyage-code-*</code> = 128, OpenAI
+                = 2048) are split server-side under one queue slot, so
+                oversized files are safe. Recommended:{' '}
                 <code>{rec?.max_embedding_concurrency ?? 1}</code>.
               </p>
             </div>
diff --git a/server/internal/embeddings/provider/openai/openai.go b/server/internal/embeddings/provider/openai/openai.go
index a213c98..e968750 100644
--- a/server/internal/embeddings/provider/openai/openai.go
+++ b/server/internal/embeddings/provider/openai/openai.go
@@ -28,6 +28,14 @@ type Config struct {
 	Dimensions int    `json:"dimensions,omitempty"`
 }
 
+// maxBatchSize caps how many inputs we send in a single
+// /v1/embeddings POST. OpenAI proper accepts up to 2048 inputs
+// per request for text-embedding-3-*; self-hosted clones (vLLM,
+// TEI, LocalAI) may be tighter but rarely lower than that. The
+// split is transparent to callers — same queue slot, sequential
+// sub-batches.
+const maxBatchSize = 2048
+
 // Provider is the openai-compatible HTTP client wrapped behind the
 // provider.Provider interface.
 type Provider struct {
@@ -132,7 +140,22 @@ func (p *Provider) EmbedDocuments(ctx context.Context, texts []string) ([][]floa
 	if len(texts) == 0 {
 		return nil, nil
 	}
-	return p.embed(ctx, texts)
+	if len(texts) <= maxBatchSize {
+		return p.embed(ctx, texts)
+	}
+	out := make([][]float32, 0, len(texts))
+	for i := 0; i < len(texts); i += maxBatchSize {
+		end := i + maxBatchSize
+		if end > len(texts) {
+			end = len(texts)
+		}
+		part, err := p.embed(ctx, texts[i:end])
+		if err != nil {
+			return nil, fmt.Errorf("openai: sub-batch [%d:%d]: %w", i, end, err)
+		}
+		out = append(out, part...)
+	}
+	return out, nil
 }
 
 // TokenizeAndEmbed falls back to EmbedDocuments — generic openai-style
diff --git a/server/internal/embeddings/provider/openai/openai_test.go b/server/internal/embeddings/provider/openai/openai_test.go
index b1d7d4a..27a7efb 100644
--- a/server/internal/embeddings/provider/openai/openai_test.go
+++ b/server/internal/embeddings/provider/openai/openai_test.go
@@ -136,6 +136,50 @@ func TestIDFingerprint(t *testing.T) {
 	}
 }
 
+// TestEmbedDocumentsSplitsOversizeBatch covers the transparent
+// per-provider split: OpenAI proper accepts up to 2048 inputs per
+// /v1/embeddings POST. A 3000-item EmbedDocuments call must produce
+// TWO POSTs (2048 + 952) and return all 3000 vectors in input order.
+func TestEmbedDocumentsSplitsOversizeBatch(t *testing.T) {
+	posts := 0
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		posts++
+		raw, _ := io.ReadAll(r.Body)
+		var req embedRequest
+		_ = json.Unmarshal(raw, &req)
+		if len(req.Input) > 2048 {
+			t.Errorf("POST #%d carried %d inputs, expected <= 2048", posts, len(req.Input))
+		}
+		items := make([]map[string]any, len(req.Input))
+		for i := range req.Input {
+			items[i] = map[string]any{"index": i, "embedding": []float32{float32(i)}}
+		}
+		body, _ := json.Marshal(map[string]any{"data": items})
+		w.WriteHeader(http.StatusOK)
+		_, _ = w.Write(body)
+	}))
+	t.Cleanup(srv.Close)
+
+	p := New(Config{
+		BaseURL: srv.URL, Model: "text-embedding-3-small", APIKeyEnv: "K",
+	}, fixedSecrets("K", "v"), nil)
+
+	texts := make([]string, 3000)
+	for i := range texts {
+		texts[i] = "chunk"
+	}
+	vecs, err := p.EmbedDocuments(context.Background(), texts)
+	if err != nil {
+		t.Fatalf("EmbedDocuments: %v", err)
+	}
+	if got := len(vecs); got != 3000 {
+		t.Fatalf("got %d vectors, want 3000", got)
+	}
+	if posts != 2 {
+		t.Errorf("expected 2 POSTs (2048 + 952), got %d", posts)
+	}
+}
+
 func TestEmbedDocumentsSendsDimensions(t *testing.T) {
 	srv, gotBody := stubServer(t, http.StatusOK, `{
 		"data": [{"index": 0, "embedding": [0.1]}]
diff --git a/server/internal/embeddings/provider/voyage/voyage.go b/server/internal/embeddings/provider/voyage/voyage.go
index 1d0e276..19d6f07 100644
--- a/server/internal/embeddings/provider/voyage/voyage.go
+++ b/server/internal/embeddings/provider/voyage/voyage.go
@@ -42,6 +42,16 @@ const (
 	DtypeInt8  = "int8"
 )
 
+// maxBatchSize caps how many inputs we send in a single
+// /v1/embeddings POST. Voyage's per-request limits depend on the
+// model — voyage-code-3 and voyage-code-2 cap at 128; voyage-3*
+// models accept up to 1000. We pick the conservative floor so a
+// single constant works across all supported models. EmbedDocuments
+// transparently splits oversize inputs into sequential sub-batches
+// under the same queue slot so the caller never sees 422 Request
+// Too Large from a large file's chunks.
+const maxBatchSize = 128
+
 // Config is the persisted shape of the voyage provider's config blob.
 type Config struct {
 	BaseURL         string `json:"base_url,omitempty"`
@@ -150,7 +160,25 @@ func (p *Provider) EmbedDocuments(ctx context.Context, texts []string) ([][]floa
 	if len(texts) == 0 {
 		return nil, nil
 	}
-	return p.embed(ctx, texts, "document")
+	if len(texts) <= maxBatchSize {
+		return p.embed(ctx, texts, "document")
+	}
+	// Oversize input — split into sequential sub-batches. The Service
+	// queue holds a single slot for the whole call, so concurrency
+	// semantics are preserved (no extra slots consumed).
+	out := make([][]float32, 0, len(texts))
+	for i := 0; i < len(texts); i += maxBatchSize {
+		end := i + maxBatchSize
+		if end > len(texts) {
+			end = len(texts)
+		}
+		part, err := p.embed(ctx, texts[i:end], "document")
+		if err != nil {
+			return nil, fmt.Errorf("voyage: sub-batch [%d:%d]: %w", i, end, err)
+		}
+		out = append(out, part...)
+	}
+	return out, nil
 }
 
 func (p *Provider) TokenizeAndEmbed(ctx context.Context, texts []string) ([][]float32, error) {
diff --git a/server/internal/embeddings/provider/voyage/voyage_test.go b/server/internal/embeddings/provider/voyage/voyage_test.go
index 40949b1..bb319a7 100644
--- a/server/internal/embeddings/provider/voyage/voyage_test.go
+++ b/server/internal/embeddings/provider/voyage/voyage_test.go
@@ -117,6 +117,57 @@ func TestIDFingerprintIncludesAll(t *testing.T) {
 	}
 }
 
+// TestEmbedDocumentsSplitsOversizeBatch covers the transparent
+// per-provider split: Voyage's voyage-code-* models cap at 128
+// inputs/request, so a 200-item EmbedDocuments call must produce
+// TWO POSTs (128 + 72) and return all 200 vectors in input order.
+func TestEmbedDocumentsSplitsOversizeBatch(t *testing.T) {
+	posts := 0
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		posts++
+		// Echo back as many embeddings as the request contained so
+		// the caller's input ↔ vector mapping is verifiable.
+		raw, _ := io.ReadAll(r.Body)
+		var req embedRequest
+		_ = json.Unmarshal(raw, &req)
+		if len(req.Input) > 128 {
+			t.Errorf("POST #%d carried %d inputs, expected <= 128", posts, len(req.Input))
+		}
+		items := make([]map[string]any, len(req.Input))
+		for i := range req.Input {
+			items[i] = map[string]any{"index": i, "embedding": []float32{float32(i)}}
+		}
+		body, _ := json.Marshal(map[string]any{
+			"data":  items,
+			"model": req.Model,
+			"usage": map[string]int{"total_tokens": 1},
+		})
+		w.WriteHeader(http.StatusOK)
+		_, _ = w.Write(body)
+	}))
+	t.Cleanup(srv.Close)
+
+	p := New(Config{
+		BaseURL: srv.URL, APIKeyEnv: "K", Model: "voyage-code-3",
+		OutputDimension: 0, OutputDtype: DtypeFloat,
+	}, fixedSecrets("K", "v"), nil)
+
+	texts := make([]string, 200)
+	for i := range texts {
+		texts[i] = "chunk"
+	}
+	vecs, err := p.EmbedDocuments(context.Background(), texts)
+	if err != nil {
+		t.Fatalf("EmbedDocuments: %v", err)
+	}
+	if got := len(vecs); got != 200 {
+		t.Fatalf("got %d vectors, want 200", got)
+	}
+	if posts != 2 {
+		t.Errorf("expected 2 POSTs (128 + 72), got %d", posts)
+	}
+}
+
 func TestUsageDecodesWithoutPromptTokens(t *testing.T) {
 	// Voyage's usage object lacks prompt_tokens — make sure decode doesn't error.
 	srv, _ := stubServer(t, http.StatusOK, `{

From d9b5d8ef1df8e5521672d269c847fe48193ee43b Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Tue, 26 May 2026 15:21:53 +0100
Subject: [PATCH 10/34] docs(dashboard): document upstream rate-limit handling
 per provider
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add an informational banner to the Voyage form pointing operators
to the dashboard billing page and explaining the free-tier
3 RPM / 10K TPM ceiling — small enough for a smoke test but the
indexer will saturate it on any real repo. Suggest concurrency=1
as the workaround for free-tier users.

Add a one-paragraph note to the OpenAI form about the current
no-retry policy: 429s are surfaced as-is; users should lower
concurrency or upgrade their account tier. Self-hosted servers
typically don't need this.

Decision (per discussion): no client-side retry / token-bucket
implementation. Industry tools like LangChain do retry, but on
hard tier limits (3 RPM) retry buys very little — files just fail
into the next clone job which retries idempotently. Keeping the
provider HTTP layer simple was preferred.
---
 .../sections/providers/OpenAIProviderForm.tsx |  8 +++++++
 .../sections/providers/VoyageProviderForm.tsx | 24 ++++++++++++++++++-
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/server/dashboard/src/modules/server/sections/providers/OpenAIProviderForm.tsx b/server/dashboard/src/modules/server/sections/providers/OpenAIProviderForm.tsx
index 6c34189..12ff44a 100644
--- a/server/dashboard/src/modules/server/sections/providers/OpenAIProviderForm.tsx
+++ b/server/dashboard/src/modules/server/sections/providers/OpenAIProviderForm.tsx
@@ -119,6 +119,14 @@ export function OpenAIProviderForm({ value, onChange, secretEnvs }: Props) {
           </AlertDescription>
         </Alert>
       ) : null}
+
+      <p className="text-xs text-muted-foreground">
+        Rate-limit handling: the server forwards the upstream HTTP status
+        as-is — there is no retry-with-backoff yet. If you hit 429s, lower
+        the concurrency in the Throughput card below or pick an account
+        tier with higher RPM. Self-hosted servers (vLLM, TEI, LocalAI)
+        typically don't rate-limit at all.
+      </p>
     </div>
   );
 }
diff --git a/server/dashboard/src/modules/server/sections/providers/VoyageProviderForm.tsx b/server/dashboard/src/modules/server/sections/providers/VoyageProviderForm.tsx
index e0ad2e2..91ad140 100644
--- a/server/dashboard/src/modules/server/sections/providers/VoyageProviderForm.tsx
+++ b/server/dashboard/src/modules/server/sections/providers/VoyageProviderForm.tsx
@@ -1,4 +1,4 @@
-import { AlertTriangle } from 'lucide-react';
+import { AlertTriangle, Info } from 'lucide-react';
 import { Alert, AlertDescription, AlertTitle } from '@/ui/alert';
 import { Input } from '@/ui/input';
 import { Label } from '@/ui/label';
@@ -45,6 +45,28 @@ export function VoyageProviderForm({ value, onChange, secretEnvs }: Props) {
 
   return (
     <div className="space-y-4">
+      <Alert>
+        <Info className="h-4 w-4" />
+        <AlertTitle>Rate limits</AlertTitle>
+        <AlertDescription>
+          Voyage's free tier is capped at <strong>3 requests/minute</strong> and
+          10K tokens/minute — usable for a smoke test, but the indexer will
+          burst past it on any real repo and start returning 429s. For real
+          usage{' '}
+          <a
+            href="https://dashboard.voyageai.com/"
+            target="_blank"
+            rel="noreferrer noopener"
+            className="underline"
+          >
+            add a payment method
+          </a>{' '}
+          on the Voyage dashboard. On the free tier you can still index by
+          setting <strong>concurrency = 1</strong> in the Throughput card
+          below and accepting roughly 3 files/minute throughput.
+        </AlertDescription>
+      </Alert>
+
       <div className="space-y-1.5">
         <Label htmlFor="voyage-model">Model</Label>
         <select

From 4f667582ba2c6c1d5cd0f828ae6f1d363c2d914f Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Tue, 26 May 2026 15:23:21 +0100
Subject: [PATCH 11/34] docs(dashboard): cost / rate-limit callout above
 provider dropdown

Surface the per-provider cost expectation before the admin picks
anything. The callout sits inside EmbeddingProviderSection and is
visible regardless of which provider is currently active or
selected in the dropdown:

- Ollama: free, local.
- OpenAI-compatible: paid on api.openai.com, free on self-hosted.
- Voyage: paid plan strongly recommended (free tier 3 RPM = unusable).

Dropdown option labels also annotated with (free) / (paid) so the
expectation is visible even when the callout is scrolled past.
---
 .../sections/EmbeddingProviderSection.tsx     | 50 +++++++++++++++++--
 1 file changed, 46 insertions(+), 4 deletions(-)

diff --git a/server/dashboard/src/modules/server/sections/EmbeddingProviderSection.tsx b/server/dashboard/src/modules/server/sections/EmbeddingProviderSection.tsx
index b4ed212..aa0e20a 100644
--- a/server/dashboard/src/modules/server/sections/EmbeddingProviderSection.tsx
+++ b/server/dashboard/src/modules/server/sections/EmbeddingProviderSection.tsx
@@ -1,5 +1,5 @@
 import { useEffect, useMemo, useState } from 'react';
-import { AlertCircle, CheckCircle2, Loader2, Save } from 'lucide-react';
+import { AlertCircle, CheckCircle2, Info, Loader2, Save } from 'lucide-react';
 import { toast } from 'sonner';
 import { ApiError } from '@/api/client';
 import { Alert, AlertDescription, AlertTitle } from '@/ui/alert';
@@ -176,6 +176,48 @@ export function EmbeddingProviderSection() {
         </CardDescription>
       </CardHeader>
       <CardContent className="space-y-5">
+        <Alert>
+          <Info className="h-4 w-4" />
+          <AlertTitle>Cost & rate limits — read before picking</AlertTitle>
+          <AlertDescription>
+            <ul className="ml-4 mt-1 list-disc space-y-1 text-sm">
+              <li>
+                <strong>Ollama</strong> — free, runs the llama-server sidecar
+                locally on this machine's CPU/GPU. No external API, no rate
+                limits, no API keys.
+              </li>
+              <li>
+                <strong>OpenAI-compatible</strong> — pay-as-you-go on{' '}
+                <a
+                  href="https://platform.openai.com/account/billing"
+                  target="_blank"
+                  rel="noreferrer noopener"
+                  className="underline"
+                >
+                  api.openai.com
+                </a>{' '}
+                (account billing required) or free against your own
+                self-hosted vLLM / TEI / LocalAI instance.
+              </li>
+              <li>
+                <strong>Voyage AI</strong> — paid plan strongly recommended.
+                The{' '}
+                <a
+                  href="https://dashboard.voyageai.com/"
+                  target="_blank"
+                  rel="noreferrer noopener"
+                  className="underline"
+                >
+                  free tier
+                </a>{' '}
+                is capped at 3 RPM / 10K TPM — fine for a smoke test, not
+                usable for indexing a real repo. Add a payment method
+                before pointing the indexer at it.
+              </li>
+            </ul>
+          </AlertDescription>
+        </Alert>
+
         <div className="space-y-1.5">
           <Label htmlFor="provider-kind">Provider</Label>
           <select
@@ -184,9 +226,9 @@ export function EmbeddingProviderSection() {
             onChange={(e) => setDraftKind(e.target.value as EmbeddingProviderKind)}
             className="block w-full rounded-md border bg-background px-3 py-2 text-sm sm:max-w-xs"
           >
-            <option value="ollama">Ollama sidecar (local llama-server)</option>
-            <option value="openai">OpenAI-compatible (/v1/embeddings)</option>
-            <option value="voyage">Voyage AI</option>
+            <option value="ollama">Ollama sidecar (local llama-server, free)</option>
+            <option value="openai">OpenAI-compatible (/v1/embeddings, paid)</option>
+            <option value="voyage">Voyage AI (paid plan recommended)</option>
           </select>
         </div>
 

From 2febfdd5c2cabfe17acdba062371dc9c6830425a Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Tue, 26 May 2026 17:31:43 +0100
Subject: [PATCH 12/34] fix(embeddings): switch back to ollama from remote
 provider
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The EmbeddingProviderSection dropdown allowed picking "ollama" while
a remote provider was active, but the Save button was conditionally
hidden (draftKind !== 'ollama') so the switch never actually fired.
Even if it had, the backend SwitchEmbeddingProvider rejected empty
or {} configs — and the ollama-specific fields (GGUF model, ctx,
GPU layers, sidecar paths) aren't part of this card's form to begin
with, so the dashboard had nothing valid to send.

Fix on both sides:

  Backend admin_embeddings.go: when kind == "ollama" and the
  submitted config is empty / {} / null, synthesize the full config
  from the live runtime-cfg snapshot applied on top of the env
  defaults via BuildOllamaConfigFromEnv. This is the same path the
  bootstrap seed in main.go uses, so the resulting ollama provider
  config is identical to what a fresh install would have.

  Dashboard EmbeddingProviderSection.tsx: always send {} for
  ollama-switches and always show the Save button (label switches
  to "Save & switch to {kind}" when the kind has changed). Skip
  the /test pre-check for ollama since {} would fail factory
  validation. The button copy under the ollama hint now explains
  that switching back will restart the sidecar with the current
  runtime config + trigger a full reindex per project.
---
 .../sections/EmbeddingProviderSection.tsx     | 78 ++++++++++++-------
 server/internal/httpapi/admin_embeddings.go   | 40 +++++++++-
 2 files changed, 86 insertions(+), 32 deletions(-)

diff --git a/server/dashboard/src/modules/server/sections/EmbeddingProviderSection.tsx b/server/dashboard/src/modules/server/sections/EmbeddingProviderSection.tsx
index aa0e20a..fdfff1c 100644
--- a/server/dashboard/src/modules/server/sections/EmbeddingProviderSection.tsx
+++ b/server/dashboard/src/modules/server/sections/EmbeddingProviderSection.tsx
@@ -106,6 +106,11 @@ export function EmbeddingProviderSection() {
   }
 
   // Build the current draft config blob for the selected kind.
+  // For ollama we always send an empty object — the backend's
+  // SwitchEmbeddingProvider handler synthesizes a complete ollama
+  // config from runtime-cfg + env on receipt, because the
+  // ollama-specific tuning fields (GGUF model, ctx, GPU layers,
+  // sidecar paths) are not part of this card's form.
   const draftConfig: Record<string, unknown> = (() => {
     switch (draftKind) {
       case 'openai':
@@ -113,11 +118,7 @@ export function EmbeddingProviderSection() {
       case 'voyage':
         return { ...voyageDraft };
       case 'ollama':
-        // Ollama keeps using the existing per-field sections — for
-        // this composite section, switching TO ollama submits the
-        // already-persisted blob unchanged. The admin tunes ollama
-        // knobs via the sections below.
-        return (active.data?.kind === 'ollama' && active.data.config) || {};
+        return {};
     }
   })();
 
@@ -136,9 +137,12 @@ export function EmbeddingProviderSection() {
   })();
 
   const canSave = localValid && allEnvsSet && !switchMut.isPending && !test.isPending;
-  const dirty = draftKind !== active.data?.kind || (() => {
-    // Compare the JSON blob shallowly so we know whether anything
-    // changed in the per-kind form.
+  // Dirty when the kind has changed; for remote providers also dirty
+  // when the per-kind form differs from what's persisted. Ollama-
+  // is-ollama is never dirty (form has no editable fields here —
+  // those live in the sections below).
+  const kindChanged = draftKind !== active.data?.kind;
+  const dirty = kindChanged || (() => {
     if (draftKind === 'ollama') return false;
     const a = JSON.stringify(active.data?.config ?? {});
     const b = JSON.stringify(draftConfig);
@@ -147,7 +151,15 @@ export function EmbeddingProviderSection() {
 
   async function onSave() {
     try {
-      await test.mutateAsync(draftConfig);
+      // Skip the /test pre-check when switching to ollama — the
+      // backend builds the full config from runtime-cfg + env on
+      // receipt, so the client's empty {} can't be tested as-is
+      // (would fail factory validation: model is required).
+      // Ollama config correctness will be exercised by Start()
+      // inside SwitchProvider anyway.
+      if (draftKind !== 'ollama') {
+        await test.mutateAsync(draftConfig);
+      }
       await switchMut.mutateAsync({ kind: draftKind, config: draftConfig });
       toast.success(`Switched to ${draftKind}`, {
         description: 'Every project will get a Stale-model badge until reindex.',
@@ -248,28 +260,38 @@ export function EmbeddingProviderSection() {
         ) : null}
         {draftKind === 'ollama' ? (
           <div className="rounded-md border border-dashed bg-muted/30 p-3 text-xs text-muted-foreground">
-            Ollama tuning (model picker, ctx, GPU layers, sidecar status) is
-            configured in the sections below.
+            {kindChanged ? (
+              <>
+                Switching back to Ollama will restart the llama-server
+                sidecar with the current model + tuning from the runtime
+                config (see the sections below). After the switch, every
+                project will need to be reindexed (full reindex on the
+                next clone job).
+              </>
+            ) : (
+              <>
+                Ollama tuning (model picker, ctx, GPU layers, sidecar
+                status) is configured in the sections below.
+              </>
+            )}
           </div>
         ) : null}
 
-        {draftKind !== 'ollama' ? (
-          <div className="flex items-center gap-2 pt-2">
-            <Button onClick={onSave} disabled={!canSave || !dirty}>
-              {switchMut.isPending || test.isPending ? (
-                <Loader2 className="mr-1 h-4 w-4 animate-spin" />
-              ) : (
-                <Save className="mr-1 h-4 w-4" />
-              )}
-              Save &amp; switch
-            </Button>
-            {test.isSuccess && !switchMut.isPending ? (
-              <span className="flex items-center gap-1 text-xs text-emerald-700">
-                <CheckCircle2 className="h-3 w-3" /> Last test ok
-              </span>
-            ) : null}
-          </div>
-        ) : null}
+        <div className="flex items-center gap-2 pt-2">
+          <Button onClick={onSave} disabled={!canSave || !dirty}>
+            {switchMut.isPending || test.isPending ? (
+              <Loader2 className="mr-1 h-4 w-4 animate-spin" />
+            ) : (
+              <Save className="mr-1 h-4 w-4" />
+            )}
+            {kindChanged ? `Save & switch to ${draftKind}` : 'Save & switch'}
+          </Button>
+          {test.isSuccess && !switchMut.isPending && draftKind !== 'ollama' ? (
+            <span className="flex items-center gap-1 text-xs text-emerald-700">
+              <CheckCircle2 className="h-3 w-3" /> Last test ok
+            </span>
+          ) : null}
+        </div>
       </CardContent>
     </Card>
   );
diff --git a/server/internal/httpapi/admin_embeddings.go b/server/internal/httpapi/admin_embeddings.go
index fd7d795..981b31c 100644
--- a/server/internal/httpapi/admin_embeddings.go
+++ b/server/internal/httpapi/admin_embeddings.go
@@ -162,7 +162,39 @@ func (s *Server) SwitchEmbeddingProvider(w http.ResponseWriter, r *http.Request)
 		writeError(w, http.StatusBadRequest, "unknown provider kind: "+req.Kind)
 		return
 	}
-	if len(req.Config) == 0 {
+
+	// Special case for ollama: the per-kind form in the dashboard
+	// does NOT carry ollama tuning fields (those live in the
+	// runtime-config sections + env). When the admin switches back
+	// to ollama from a remote provider, the dashboard sends an empty
+	// blob; we synthesize the config here from the live runtime-cfg
+	// snapshot applied to the env-derived defaults so the next
+	// Start() has everything it needs (model, GGUF cache dir, llama
+	// bin dir, transport, …).
+	cfgBytes := req.Config
+	if req.Kind == provider.KindOllama && (len(cfgBytes) == 0 || string(cfgBytes) == "{}" || string(cfgBytes) == "null") {
+		envCfg := embedSvc.Config()
+		if envCfg == nil {
+			writeError(w, http.StatusInternalServerError, "ollama config: live cfg unavailable")
+			return
+		}
+		// Merge the latest runtime-cfg overrides on top of env so a
+		// recent PUT /admin/runtime-config (which doesn't auto-apply
+		// while a remote provider is active) takes effect on switch.
+		if s.Deps.RuntimeCfg != nil {
+			snap, snapErr := s.Deps.RuntimeCfg.Get(r.Context())
+			if snapErr == nil {
+				snap.ApplyTo(envCfg)
+			}
+		}
+		built, buildErr := embeddings.BuildOllamaConfigFromEnv(envCfg)
+		if buildErr != nil {
+			writeError(w, http.StatusInternalServerError, "build ollama config: "+buildErr.Error())
+			return
+		}
+		cfgBytes = built
+	}
+	if len(cfgBytes) == 0 {
 		writeError(w, http.StatusBadRequest, "config is required")
 		return
 	}
@@ -172,20 +204,20 @@ func (s *Server) SwitchEmbeddingProvider(w http.ResponseWriter, r *http.Request)
 	// container restart) reads the new row and tries again.
 	if err := s.Deps.EmbeddingsCfg.Save(r.Context(), embeddingscfg.Snapshot{
 		Kind:   req.Kind,
-		Config: req.Config,
+		Config: cfgBytes,
 	}, user.User.ID); err != nil {
 		writeError(w, http.StatusInternalServerError, "persist provider: "+err.Error())
 		return
 	}
 
-	if err := embedSvc.SwitchProvider(r.Context(), req.Kind, req.Config); err != nil {
+	if err := embedSvc.SwitchProvider(r.Context(), req.Kind, cfgBytes); err != nil {
 		writeError(w, http.StatusInternalServerError, "switch provider: "+err.Error())
 		return
 	}
 
 	writeJSON(w, http.StatusAccepted, activeProviderPayload{
 		Kind:   req.Kind,
-		Config: req.Config,
+		Config: cfgBytes,
 		ID:     embedSvc.EmbeddingModel(),
 	})
 }

From 5ad3e58c83eb2bdbb346ef1520fca7a4e9f41fee Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Tue, 26 May 2026 18:04:44 +0100
Subject: [PATCH 13/34] fix(embeddings): write Provider.ID() to
 indexed_with_model + migrate legacy rows
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two coupled bugs were causing every project to show a "stale model"
badge forever even after reindex:

1. The indexer wrote a BARE model name to projects.indexed_with_model
   at FinishIndexing because Service.embeddingModel was set from
   cfg.EmbeddingModel ONCE at boot. The drift detector and dashboard
   on the read side were comparing against the live Provider.ID()
   which is now PREFIXED ("ollama:<model>") after the pluggable-
   provider refactor — so writes never matched reads, and reindex
   couldn't clear the badge.

2. Pre-existing rows (projects indexed under the old code path) hold
   the bare model name. Without backfill they'd stay marked stale
   even after the indexer fix, because their own row never matches
   the prefixed live ID until the next reindex.

Fixes:

Indexer:
  - Add SetEmbeddingModelLookup(func() string) — binds the indexer
    to a live function (typically embeddings.Service.EmbeddingModel).
  - FinishIndexing now writes Service.EmbeddingModel() which prefers
    the live lookup, so a runtime provider switch (PUT /admin/
    embedding-providers/active) takes effect on the next reindex
    without a process restart.
  - Static SetEmbeddingModel(string) kept for tests that don't wire
    a live Service.

main.go: wire idx.SetEmbeddingModelLookup(embedSvc.EmbeddingModel).

Migration 13 (indexed_with_model_provider_prefix): one-time UPDATE
that prepends "ollama:" to every projects.indexed_with_model value
that lacks a ":" prefix. Safe because pre-refactor there was no
non-ollama backend. Idempotent (rows containing ":" are skipped).

Tested: TestMigrate_IndexedWithModelProviderPrefix covers the three
expected cases — bare gets prefixed, already-prefixed left alone,
NULL left alone.
---
 server/cmd/cix-server/main.go      | 12 +++--
 server/internal/db/db.go           | 33 ++++++++++++
 server/internal/db/db_test.go      | 85 ++++++++++++++++++++++++++++++
 server/internal/indexer/indexer.go | 43 ++++++++++++---
 4 files changed, 163 insertions(+), 10 deletions(-)

diff --git a/server/cmd/cix-server/main.go b/server/cmd/cix-server/main.go
index 06c0f40..bb18a6b 100644
--- a/server/cmd/cix-server/main.go
+++ b/server/cmd/cix-server/main.go
@@ -250,9 +250,15 @@ func run() error {
 
 	idx := indexer.New(database, vs, embedSvc, logger)
 	idx.SetEmbedIncludePath(cfg.EmbedIncludePath)
-	// PR-E — record the active embedding model on every indexed project so the
-	// dashboard can highlight stale vectors when the runtime model changes.
-	idx.SetEmbeddingModel(cfg.EmbeddingModel)
+	// Record the active embedding model on every indexed project so the
+	// dashboard can highlight stale vectors when the runtime provider /
+	// model changes. Wire it as a live lookup so a runtime provider
+	// switch (PUT /admin/embedding-providers/active) is reflected in the
+	// next FinishIndexing without a process restart — the indexer reads
+	// embedSvc.EmbeddingModel() at write time and that returns the active
+	// Provider.ID() ("ollama:<model>" / "voyage:..."), matching what the
+	// drift-detector and dashboard compare against.
+	idx.SetEmbeddingModelLookup(embedSvc.EmbeddingModel)
 	if cfg.EmbedIncludePath {
 		logger.Info("embedding format: path-aware preamble enabled (CIX_EMBED_INCLUDE_PATH=true) — full reindex required if upgrading")
 	}
diff --git a/server/internal/db/db.go b/server/internal/db/db.go
index ac443a2..17aea33 100644
--- a/server/internal/db/db.go
+++ b/server/internal/db/db.go
@@ -65,6 +65,7 @@ var registeredMigrations = []migration{
 	{10, "auth_groups_ownership", func(db *sql.DB, _ OpenOptions) error { return migrateAuthGroupsOwnership(db) }},
 	{11, "project_machine_identity", func(db *sql.DB, _ OpenOptions) error { return migrateProjectMachineIdentity(db) }},
 	{12, "embedding_provider", func(db *sql.DB, _ OpenOptions) error { return migrateEmbeddingProvider(db) }},
+	{13, "indexed_with_model_provider_prefix", func(db *sql.DB, _ OpenOptions) error { return migrateIndexedWithModelProviderPrefix(db) }},
 }
 
 // DriverName is the registered database/sql driver name for modernc.org/sqlite.
@@ -763,6 +764,38 @@ func migrateEmbeddingProvider(db *sql.DB) error {
 	return nil
 }
 
+// migrateIndexedWithModelProviderPrefix backfills projects indexed
+// before the pluggable-provider refactor (migration 12). Pre-refactor
+// the indexer wrote a bare model name like
+// "awhiteside/CodeRankEmbed-Q8_0-GGUF"; post-refactor it writes the
+// fully-qualified Provider.ID() of the form "ollama:<model>". Without
+// this migration every legacy project would show a "stale model"
+// badge forever because the bare string never matches the live
+// "ollama:<model>" and a reindex would *still* write the new prefixed
+// form — leaving every UN-reindexed project flagged falsely.
+//
+// Heuristic: rows whose value contains no ":" predate the prefix
+// convention. Prepend "ollama:" — safe because pre-refactor there
+// was no other embedding backend; every legacy row was produced by
+// the in-process llama-server sidecar.
+//
+// Idempotent: rows already containing ":" are left alone, so
+// re-running this migration (or running it against a DB that was
+// already partially upgraded) is a no-op.
+func migrateIndexedWithModelProviderPrefix(db *sql.DB) error {
+	_, err := db.Exec(`
+		UPDATE projects
+		SET indexed_with_model = 'ollama:' || indexed_with_model
+		WHERE indexed_with_model IS NOT NULL
+		  AND indexed_with_model != ''
+		  AND instr(indexed_with_model, ':') = 0
+	`)
+	if err != nil {
+		return fmt.Errorf("backfill indexed_with_model prefix: %w", err)
+	}
+	return nil
+}
+
 // migrateIndexedWithModel adds projects.indexed_with_model to pre-PR-E
 // databases. Idempotent: PRAGMA table_info first; ALTER only if absent. Rows
 // stay NULL — the dashboard treats NULL as "indexed before drift tracking
diff --git a/server/internal/db/db_test.go b/server/internal/db/db_test.go
index 4b484a1..94ba6d6 100644
--- a/server/internal/db/db_test.go
+++ b/server/internal/db/db_test.go
@@ -210,6 +210,91 @@ func TestOpenMigratesPreEDB(t *testing.T) {
 	}
 }
 
+// TestMigrate_IndexedWithModelProviderPrefix covers the backfill that
+// the pluggable-provider PR adds: legacy rows whose indexed_with_model
+// is a bare model name ("awhiteside/CodeRankEmbed-Q8_0-GGUF") must
+// be rewritten to the prefixed form ("ollama:awhiteside/...") so the
+// drift-detector and dashboard see a match with the live Provider.ID().
+// Rows that already contain ":" (any prefixed form) must be left
+// untouched — important for idempotency and for DBs that were partially
+// upgraded before this migration shipped.
+func TestMigrate_IndexedWithModelProviderPrefix(t *testing.T) {
+	tmp := filepath.Join(t.TempDir(), "indexed-prefix.db")
+
+	// Stage a minimal projects table at the migration-12 layout (i.e.
+	// indexed_with_model column already exists) so we exercise just
+	// the prefix backfill without crossing other migrations' concerns.
+	seed, err := sql.Open(DriverName, "file:"+tmp)
+	if err != nil {
+		t.Fatalf("seed Open: %v", err)
+	}
+	if _, err := seed.Exec(`CREATE TABLE projects (
+		host_path TEXT PRIMARY KEY,
+		container_path TEXT NOT NULL,
+		languages TEXT DEFAULT '[]',
+		settings TEXT DEFAULT '{}',
+		stats TEXT DEFAULT '{}',
+		status TEXT DEFAULT 'created',
+		created_at TEXT NOT NULL,
+		updated_at TEXT NOT NULL,
+		last_indexed_at TEXT,
+		path_hash TEXT,
+		indexed_with_model TEXT
+	)`); err != nil {
+		t.Fatalf("seed CREATE TABLE: %v", err)
+	}
+	rows := []struct {
+		host, model string
+	}{
+		{"/legacy/bare", "awhiteside/CodeRankEmbed-Q8_0-GGUF"},      // should get "ollama:" prefix
+		{"/already/prefixed", "ollama:awhiteside/CodeRankEmbed-Q8_0-GGUF"}, // untouched
+		{"/already/voyage", "voyage:voyage-code-3:1024:float"},          // untouched
+	}
+	for _, r := range rows {
+		if _, err := seed.Exec(
+			`INSERT INTO projects (host_path, container_path, created_at, updated_at, path_hash, indexed_with_model)
+			 VALUES (?, ?, '2024-01-01', '2024-01-01', ?, ?)`,
+			r.host, r.host, r.host, r.model,
+		); err != nil {
+			t.Fatalf("seed INSERT %s: %v", r.host, err)
+		}
+	}
+	// Row with NULL model should also be left alone (legacy pre-PR-E projects).
+	if _, err := seed.Exec(
+		`INSERT INTO projects (host_path, container_path, created_at, updated_at, path_hash)
+		 VALUES ('/legacy/null', '/legacy/null', '2024-01-01', '2024-01-01', 'null')`,
+	); err != nil {
+		t.Fatalf("seed INSERT null: %v", err)
+	}
+	seed.Close()
+
+	database, err := Open(tmp)
+	if err != nil {
+		t.Fatalf("Open migrates DB: %v", err)
+	}
+	defer database.Close()
+	defer os.Remove(tmp)
+
+	expectations := map[string]sql.NullString{
+		"/legacy/bare":      {String: "ollama:awhiteside/CodeRankEmbed-Q8_0-GGUF", Valid: true},
+		"/already/prefixed": {String: "ollama:awhiteside/CodeRankEmbed-Q8_0-GGUF", Valid: true},
+		"/already/voyage":   {String: "voyage:voyage-code-3:1024:float", Valid: true},
+		"/legacy/null":      {Valid: false},
+	}
+	for host, want := range expectations {
+		var got sql.NullString
+		if err := database.QueryRow(
+			`SELECT indexed_with_model FROM projects WHERE host_path = ?`, host,
+		).Scan(&got); err != nil {
+			t.Fatalf("select %s: %v", host, err)
+		}
+		if got.Valid != want.Valid || got.String != want.String {
+			t.Errorf("%s: indexed_with_model = %v (valid=%v), want %v (valid=%v)",
+				host, got.String, got.Valid, want.String, want.Valid)
+		}
+	}
+}
+
 // TestOpenMigratesPreM9DB simulates a pre-m9 database (git_repos without the
 // polling columns — i.e. pre git_repos_polling migration) and verifies Open
 // adds them + the scheduler index without crashing, and that an existing row
diff --git a/server/internal/indexer/indexer.go b/server/internal/indexer/indexer.go
index 1d0d008..93eefe5 100644
--- a/server/internal/indexer/indexer.go
+++ b/server/internal/indexer/indexer.go
@@ -115,6 +115,14 @@ type Service struct {
 	// SetEmbeddingModel from main; empty string keeps the column NULL so
 	// unit tests that skip the setter don't need to know about drift.
 	embeddingModel string
+
+	// embeddingModelLookup, when non-nil, takes precedence over the static
+	// embeddingModel string above. Used by main.go to bind the indexer
+	// to a live function (embeddings.Service.EmbeddingModel) so a provider
+	// switch made at runtime is reflected in the next FinishIndexing write
+	// without requiring a process restart. Tests typically use the static
+	// SetEmbeddingModel API and leave this nil.
+	embeddingModelLookup func() string
 }
 
 // New constructs a Service. All deps are required except logger (falls back to
@@ -151,17 +159,34 @@ func (s *Service) SetEmbedIncludePath(v bool) {
 // projects.indexed_with_model at FinishIndexing. Called from main once the
 // runtime config is resolved; empty string disables the write (the column
 // stays NULL — desired for tests that don't care about drift tracking).
+//
+// In production this is superseded by SetEmbeddingModelLookup, which binds
+// the indexer to a live function so provider switches at runtime take
+// effect without a process restart. The static setter remains for tests.
 func (s *Service) SetEmbeddingModel(model string) {
 	s.embeddingModel = model
 }
 
-// EmbeddingModel returns the identifier most recently passed to
-// SetEmbeddingModel. Used by callers (repojobs) that need to compare
-// the live model against projects.indexed_with_model to decide whether
-// an incremental reindex is safe (same model = vectors comparable) or
-// whether a full reindex is required (model change = embedding-space
-// drift, all vectors must be regenerated).
+// SetEmbeddingModelLookup binds the indexer to a live function returning
+// the current Provider.ID() — typically embeddings.Service.EmbeddingModel.
+// When set, this takes precedence over SetEmbeddingModel so a runtime
+// provider switch (admin PUT /admin/embedding-providers/active) flows into
+// the next FinishIndexing write without a process restart.
+func (s *Service) SetEmbeddingModelLookup(fn func() string) {
+	s.embeddingModelLookup = fn
+}
+
+// EmbeddingModel returns the current embedding-model fingerprint. Prefers
+// the live lookup when one is bound (production); falls back to the static
+// string set via SetEmbeddingModel (tests). Used by callers (repojobs) that
+// need to compare the live model against projects.indexed_with_model to
+// decide whether an incremental reindex is safe (same model = vectors
+// comparable) or whether a full reindex is required (model change =
+// embedding-space drift, all vectors must be regenerated).
 func (s *Service) EmbeddingModel() string {
+	if s.embeddingModelLookup != nil {
+		return s.embeddingModelLookup()
+	}
 	return s.embeddingModel
 }
 
@@ -757,13 +782,17 @@ func (s *Service) FinishIndexing(
 	// projects whose vectors were produced under a different model than the
 	// one currently loaded in the sidecar. NULLIF keeps the column NULL when
 	// SetEmbeddingModel was never called (tests / pre-PR-E codepaths).
+	// Reads through EmbeddingModel() so live provider switches (set via
+	// SetEmbeddingModelLookup) are honoured at write time — the value goes
+	// into the row in its prefixed form ("ollama:<model>" / "voyage:..."),
+	// matching the format the drift-detector and dashboard compare against.
 	if _, err := s.db.ExecContext(ctx,
 		`UPDATE projects
 		 SET stats = ?, languages = ?, status = 'indexed',
 		     last_indexed_at = ?, updated_at = ?,
 		     indexed_with_model = NULLIF(?, '')
 		 WHERE host_path = ?`,
-		statsJSON, langsJSON, now, now, s.embeddingModel, projectPath,
+		statsJSON, langsJSON, now, now, s.EmbeddingModel(), projectPath,
 	); err != nil {
 		return "", 0, 0, fmt.Errorf("update project stats: %w", err)
 	}

From 042e69859cda765a78ffcdab5992e915cb725e75 Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Wed, 27 May 2026 11:50:03 +0100
Subject: [PATCH 14/34] fix(voyage): token-aware batch splitting +
 observability logging
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Voyage's /v1/embeddings caps each request at 120K tokens, separate
from the 128-input cap voyage-code-* models enforce. A single large
file (e.g. bench/results/reference_embeddings.json at 187K tokens)
sat under the input-count limit but blew past the token limit,
producing a 400 from the server with the message:

  "The max allowed tokens per submitted batch is 120000. Your batch
   has 187609 tokens after truncation."

The provider now plans batches against BOTH limits. planBatches
walks the input list and closes the current batch whenever either
limit would be exceeded:

  - inputs in batch >= 128 (voyage-code-* model cap), OR
  - estimated tokens + next text > 100_000 (our target, 17% under
    Voyage's 120K hard cap to leave headroom for estimation error).

Token estimation: bytes ÷ 3, conservative for code (typically 3-4
chars/token) and even safer for UTF-8 multi-byte input (Cyrillic
comments etc count as more bytes per character). Always over-counts
so we never under-batch into a 400.

Why not an SDK: investigated the community Go SDK at
github.com/austinfhunter/voyageai — it's a thin HTTP wrapper that
does NOT handle token batching, int8 dequant, or retry strategy.
Switching would lose our existing int8 support and add a third-
party dependency for no real benefit.

Observability: structured slog.Info at the split decision logs the
batch counts and limits; slog.Debug logs each sub-batch POST with
input count + estimated token cost. Operators can grep
"voyage: splitting batch" to see batch activity and
"voyage: sub-batch POST" to follow individual requests.

Tests cover:
  - planBatches splits a 300K-byte text + smalls into multiple
    batches respecting the 100K token budget.
  - 200 small texts still split at 128 + 72 (legacy count behavior).
  - End-to-end EmbedDocuments produces multiple POSTs when token
    budget is the binding constraint.

Dashboard Throughput card copy updated to mention both limits.
---
 .../server/sections/AdvancedSection.tsx       |  9 +-
 .../embeddings/provider/voyage/voyage.go      | 99 +++++++++++++++++--
 .../embeddings/provider/voyage/voyage_test.go | 93 +++++++++++++++++
 3 files changed, 188 insertions(+), 13 deletions(-)

diff --git a/server/dashboard/src/modules/server/sections/AdvancedSection.tsx b/server/dashboard/src/modules/server/sections/AdvancedSection.tsx
index 7a7932d..4bbb908 100644
--- a/server/dashboard/src/modules/server/sections/AdvancedSection.tsx
+++ b/server/dashboard/src/modules/server/sections/AdvancedSection.tsx
@@ -77,10 +77,11 @@ export function AdvancedSection({
                 file's chunks as a batch). 1 = strictly sequential. OpenAI
                 and Voyage both accept concurrent requests, but their
                 account-level rate limits still apply — start low (e.g. 2)
-                and raise it if you don't see 429s. Per-request batch
-                limits (Voyage <code>voyage-code-*</code> = 128, OpenAI
-                = 2048) are split server-side under one queue slot, so
-                oversized files are safe. Recommended:{' '}
+                and raise it if you don't see 429s. Per-request limits
+                (Voyage <code>voyage-code-*</code>: 128 inputs <em>or</em>
+                ~100K tokens; OpenAI: 2048 inputs) are split server-side
+                under one queue slot, so oversized files are safe.
+                Recommended:{' '}
                 <code>{rec?.max_embedding_concurrency ?? 1}</code>.
               </p>
             </div>
diff --git a/server/internal/embeddings/provider/voyage/voyage.go b/server/internal/embeddings/provider/voyage/voyage.go
index 19d6f07..af31383 100644
--- a/server/internal/embeddings/provider/voyage/voyage.go
+++ b/server/internal/embeddings/provider/voyage/voyage.go
@@ -52,6 +52,25 @@ const (
 // Too Large from a large file's chunks.
 const maxBatchSize = 128
 
+// maxTokensPerBatch caps the ESTIMATED token budget per /v1/embeddings
+// POST. Voyage's actual hard limit (observed in 400 error messages) is
+// 120000 tokens; we target 100000 to leave 17% headroom for the
+// byte→token estimation error. When a file's chunks sum above this
+// budget the batch is split further, regardless of count.
+const maxTokensPerBatch = 100_000
+
+// bytesPerToken is a conservative chars-per-token heuristic used to
+// estimate the request's token cost without a real tokenizer. Voyage
+// does not publish their tokenizer for client-side use; empirically
+// code averages ~3–4 chars/token and English prose ~4. We use 3 to
+// over-count the cost (safe upper bound — we'll split sooner than the
+// upstream limit, never later).
+//
+// len() in Go returns BYTE length, not rune count, so multi-byte
+// UTF-8 input (Cyrillic comments, CJK) gets further over-counted —
+// also safe.
+const bytesPerToken = 3
+
 // Config is the persisted shape of the voyage provider's config blob.
 type Config struct {
 	BaseURL         string `json:"base_url,omitempty"`
@@ -160,27 +179,89 @@ func (p *Provider) EmbedDocuments(ctx context.Context, texts []string) ([][]floa
 	if len(texts) == 0 {
 		return nil, nil
 	}
-	if len(texts) <= maxBatchSize {
-		return p.embed(ctx, texts, "document")
+	batches := planBatches(texts)
+	if len(batches) == 1 {
+		return p.embed(ctx, batches[0], "document")
 	}
 	// Oversize input — split into sequential sub-batches. The Service
 	// queue holds a single slot for the whole call, so concurrency
 	// semantics are preserved (no extra slots consumed).
+	p.logger.Info("voyage: splitting batch",
+		"model", p.cfg.Model,
+		"total_inputs", len(texts),
+		"sub_batches", len(batches),
+		"limit_inputs", maxBatchSize,
+		"limit_tokens", maxTokensPerBatch,
+	)
 	out := make([][]float32, 0, len(texts))
-	for i := 0; i < len(texts); i += maxBatchSize {
-		end := i + maxBatchSize
-		if end > len(texts) {
-			end = len(texts)
-		}
-		part, err := p.embed(ctx, texts[i:end], "document")
+	offset := 0
+	for i, batch := range batches {
+		p.logger.Debug("voyage: sub-batch POST",
+			"index", i+1,
+			"of", len(batches),
+			"inputs", len(batch),
+			"est_tokens", sumEstimateTokens(batch),
+		)
+		part, err := p.embed(ctx, batch, "document")
 		if err != nil {
-			return nil, fmt.Errorf("voyage: sub-batch [%d:%d]: %w", i, end, err)
+			return nil, fmt.Errorf("voyage: sub-batch %d/%d (offset=%d, inputs=%d, ~%d tokens): %w",
+				i+1, len(batches), offset, len(batch), sumEstimateTokens(batch), err)
 		}
 		out = append(out, part...)
+		offset += len(batch)
 	}
 	return out, nil
 }
 
+// planBatches groups texts into sub-batches that each respect BOTH
+// the input-count cap (maxBatchSize) and the token-budget cap
+// (maxTokensPerBatch). A single text that on its own exceeds the
+// token budget is placed in its own batch — Voyage will then 400
+// with a clear "tokens after truncation" message and the caller
+// surfaces that to the operator (indicates the chunker upstream let
+// through an over-long chunk).
+func planBatches(texts []string) [][]string {
+	if len(texts) == 0 {
+		return nil
+	}
+	var batches [][]string
+	var current []string
+	currentTokens := 0
+	for _, t := range texts {
+		est := estimateTokens(t)
+		// Close the current batch when adding this text would exceed
+		// either limit (and the batch already has something to send).
+		if len(current) > 0 && (len(current) >= maxBatchSize || currentTokens+est > maxTokensPerBatch) {
+			batches = append(batches, current)
+			current = nil
+			currentTokens = 0
+		}
+		current = append(current, t)
+		currentTokens += est
+	}
+	if len(current) > 0 {
+		batches = append(batches, current)
+	}
+	return batches
+}
+
+// estimateTokens returns a conservative upper bound on the token cost
+// of one text, in Voyage's tokenizer. Uses byte-length divided by a
+// chars-per-token heuristic; see bytesPerToken doc for rationale.
+func estimateTokens(s string) int {
+	return len(s) / bytesPerToken
+}
+
+// sumEstimateTokens sums estimateTokens over a slice. Cheap; used in
+// log lines so an operator can see the per-batch cost.
+func sumEstimateTokens(texts []string) int {
+	n := 0
+	for _, t := range texts {
+		n += estimateTokens(t)
+	}
+	return n
+}
+
 func (p *Provider) TokenizeAndEmbed(ctx context.Context, texts []string) ([][]float32, error) {
 	return p.EmbedDocuments(ctx, texts)
 }
diff --git a/server/internal/embeddings/provider/voyage/voyage_test.go b/server/internal/embeddings/provider/voyage/voyage_test.go
index bb319a7..1641008 100644
--- a/server/internal/embeddings/provider/voyage/voyage_test.go
+++ b/server/internal/embeddings/provider/voyage/voyage_test.go
@@ -7,6 +7,7 @@ import (
 	"net/http"
 	"net/http/httptest"
 	"strings"
+	"sync/atomic"
 	"testing"
 
 	"github.com/dvcdsys/code-index/server/internal/embeddings/provider"
@@ -168,6 +169,98 @@ func TestEmbedDocumentsSplitsOversizeBatch(t *testing.T) {
 	}
 }
 
+// TestPlanBatches_SplitsByTokenBudget covers the second cap on per-
+// request batch size: even when input count is under maxBatchSize,
+// Voyage hard-limits the request to 120K tokens. Our estimator uses
+// 3 bytes/token so a 300_000-byte text estimates to 100_000 tokens,
+// hitting the budget exactly. Mixing one huge text with several
+// smaller ones should produce multiple batches.
+func TestPlanBatches_SplitsByTokenBudget(t *testing.T) {
+	big := strings.Repeat("x", 300_000) // ~100_000 est tokens
+	small := "tiny"
+	texts := []string{big, small, small, small, small, small}
+
+	batches := planBatches(texts)
+	if len(batches) < 2 {
+		t.Fatalf("expected at least 2 batches, got %d", len(batches))
+	}
+
+	got := 0
+	for _, b := range batches {
+		got += len(b)
+		if est := sumEstimateTokens(b); est > maxTokensPerBatch && len(b) > 1 {
+			t.Errorf("batch with %d inputs exceeds token budget: ~%d tokens > %d",
+				len(b), est, maxTokensPerBatch)
+		}
+	}
+	if got != len(texts) {
+		t.Errorf("inputs lost across batches: got %d, want %d", got, len(texts))
+	}
+}
+
+// TestPlanBatches_RespectsCountCap verifies the legacy 128-input
+// cap is still enforced when token estimates wouldn't trigger a
+// split. 200 small texts → at least 2 batches (128 + 72).
+func TestPlanBatches_RespectsCountCap(t *testing.T) {
+	texts := make([]string, 200)
+	for i := range texts {
+		texts[i] = "chunk"
+	}
+	batches := planBatches(texts)
+	if len(batches) != 2 {
+		t.Fatalf("expected 2 batches (128 + 72), got %d", len(batches))
+	}
+	if len(batches[0]) != maxBatchSize {
+		t.Errorf("first batch has %d inputs, want %d", len(batches[0]), maxBatchSize)
+	}
+	if len(batches[1]) != 72 {
+		t.Errorf("second batch has %d inputs, want 72", len(batches[1]))
+	}
+}
+
+// TestEmbedDocumentsSplitsByTokenBudget exercises the end-to-end
+// flow: an oversize batch should turn into multiple POSTs to the
+// upstream server, even when the input count alone wouldn't trigger
+// the count-based split.
+func TestEmbedDocumentsSplitsByTokenBudget(t *testing.T) {
+	var hits int32
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		atomic.AddInt32(&hits, 1)
+		raw, _ := io.ReadAll(r.Body)
+		var req embedRequest
+		_ = json.Unmarshal(raw, &req)
+		items := make([]map[string]any, len(req.Input))
+		for i := range req.Input {
+			items[i] = map[string]any{"index": i, "embedding": []float32{0.1}}
+		}
+		body, _ := json.Marshal(map[string]any{
+			"data":  items,
+			"model": req.Model,
+			"usage": map[string]int{"total_tokens": 1},
+		})
+		w.WriteHeader(http.StatusOK)
+		_, _ = w.Write(body)
+	}))
+	t.Cleanup(srv.Close)
+
+	// Two big texts ~100K est tokens each — should produce >= 2 POSTs.
+	big := strings.Repeat("x", 300_000)
+	texts := []string{big, big}
+	p := New(Config{
+		BaseURL: srv.URL, APIKeyEnv: "K", Model: "voyage-code-3", OutputDtype: DtypeFloat,
+	}, fixedSecrets("K", "v"), nil)
+	vecs, err := p.EmbedDocuments(context.Background(), texts)
+	if err != nil {
+		t.Fatalf("EmbedDocuments: %v", err)
+	}
+	if got := len(vecs); got != 2 {
+		t.Fatalf("got %d vectors, want 2", got)
+	}
+	if atomic.LoadInt32(&hits) < 2 {
+		t.Errorf("expected at least 2 POSTs due to token-budget split, got %d", hits)
+	}
+}
+
 func TestUsageDecodesWithoutPromptTokens(t *testing.T) {
 	// Voyage's usage object lacks prompt_tokens — make sure decode doesn't error.
 	srv, _ := stubServer(t, http.StatusOK, `{

From f6c0d227e2b53f5c3a4335843347e4bfcd41cfe7 Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Wed, 27 May 2026 12:29:38 +0100
Subject: [PATCH 15/34] feat(voyage): documented rate-limit table surfaced in
 dashboard
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Voyage AI does NOT expose rate limits via API — confirmed by checking
https://docs.voyageai.com/docs/rate-limits and the /reference/rate-limits
page. All limit data lives in their public docs and web dashboard.

So instead of a fake "cache + refresh" mechanism, we ship a hardcoded
snapshot from the docs:

  internal/embeddings/provider/voyage/limits.go
    KnownModelLimits — per-model Tier 1 RPM / TPM + per-request
                       input and token caps for every Voyage model
                       the dashboard offers.
    KnownLimitsSource — attribution string with the docs URL and
                        snapshot date ("2026-05-27") so an operator
                        can tell when to consult upstream for an
                        update.

GET /api/v1/admin/embedding-providers gains a documented_limits
field on the voyage entry. Schema documented in openapi.yaml as
DocumentedEmbeddingLimits / DocumentedModelLimit.

Dashboard VoyageProviderForm renders a compact card under the model
dropdown:
  - Active model row showing RPM / TPM / inputs-per-req / tokens-per-req
  - Tier multiplier note (×2 / ×3) since Voyage scales by lifetime
    spend and we can't detect the operator's tier
  - <details> expander with all models in the snapshot, current model
    highlighted
  - Footnote with the source URL + snapshot date
  - Button linking to the Voyage dashboard for live tier confirmation

No "Refresh" button: it would re-fetch the same hardcoded constant,
giving a false impression of "sync my plan from here". A reload after
upgrading cix-server gets the new snapshot — the source footnote
tells the operator when to do that.
---
 doc/openapi.yaml                              |  50 +++++++
 server/dashboard/src/api/types.ts             |   2 +
 .../sections/EmbeddingProviderSection.tsx     |  10 ++
 .../sections/providers/VoyageProviderForm.tsx | 134 +++++++++++++++++-
 .../embeddings/provider/voyage/limits.go      | 110 ++++++++++++++
 server/internal/httpapi/admin_embeddings.go   |  32 ++++-
 6 files changed, 333 insertions(+), 5 deletions(-)
 create mode 100644 server/internal/embeddings/provider/voyage/limits.go

diff --git a/doc/openapi.yaml b/doc/openapi.yaml
index e9943b0..84fa003 100644
--- a/doc/openapi.yaml
+++ b/doc/openapi.yaml
@@ -3243,6 +3243,56 @@ components:
             before save.
           items:
             $ref: "#/components/schemas/EmbeddingProviderSecretEnv"
+        documented_limits:
+          description: |
+            Informational snapshot of the provider's published rate
+            limits. Voyage doesn't expose limits via API so we ship
+            a hardcoded table sourced from their public docs. Absent
+            when the provider has no documented limits to show.
+          $ref: "#/components/schemas/DocumentedEmbeddingLimits"
+
+    DocumentedEmbeddingLimits:
+      type: object
+      required: [source, models]
+      properties:
+        source:
+          type: string
+          description: |
+            Free-form attribution string with the docs URL and the
+            date the snapshot was taken. The dashboard renders it as
+            a footnote so the operator knows when to consult upstream
+            docs for a newer version.
+        models:
+          type: array
+          items:
+            $ref: "#/components/schemas/DocumentedModelLimit"
+
+    DocumentedModelLimit:
+      type: object
+      required: [model, tier1_rpm, tier1_tpm, max_inputs_per_request, max_tokens_per_request]
+      properties:
+        model:
+          type: string
+          description: API model name; matches the values offered in the provider form.
+        tier1_rpm:
+          type: integer
+          minimum: 0
+          description: Tier 1 RPM (multiply by 2 for Tier 2, 3 for Tier 3).
+        tier1_tpm:
+          type: integer
+          minimum: 0
+          description: Tier 1 TPM (multiply by 2 for Tier 2, 3 for Tier 3).
+        max_inputs_per_request:
+          type: integer
+          minimum: 0
+          description: Per-request input array cap (not tier-dependent).
+        max_tokens_per_request:
+          type: integer
+          minimum: 0
+          description: Per-request total token cap (not tier-dependent).
+        notes:
+          type: string
+          description: Free-form notes the dashboard renders verbatim (optional).
 
     EmbeddingProviderSecretEnv:
       type: object
diff --git a/server/dashboard/src/api/types.ts b/server/dashboard/src/api/types.ts
index a1d8707..9a08ab8 100644
--- a/server/dashboard/src/api/types.ts
+++ b/server/dashboard/src/api/types.ts
@@ -81,6 +81,8 @@ export type EmbeddingProviderList = components['schemas']['EmbeddingProviderList
 export type ActiveEmbeddingProvider = components['schemas']['ActiveEmbeddingProvider'];
 export type SwitchEmbeddingProviderRequest = components['schemas']['SwitchEmbeddingProviderRequest'];
 export type TestEmbeddingProviderResponse = components['schemas']['TestEmbeddingProviderResponse'];
+export type DocumentedEmbeddingLimits = components['schemas']['DocumentedEmbeddingLimits'];
+export type DocumentedModelLimit = components['schemas']['DocumentedModelLimit'];
 
 // Provider kind union — the dashboard uses this in form-state discriminants.
 export type EmbeddingProviderKind = 'ollama' | 'openai' | 'voyage';
diff --git a/server/dashboard/src/modules/server/sections/EmbeddingProviderSection.tsx b/server/dashboard/src/modules/server/sections/EmbeddingProviderSection.tsx
index fdfff1c..4063673 100644
--- a/server/dashboard/src/modules/server/sections/EmbeddingProviderSection.tsx
+++ b/server/dashboard/src/modules/server/sections/EmbeddingProviderSection.tsx
@@ -79,6 +79,15 @@ export function EmbeddingProviderSection() {
     return providers.data.providers.find((p) => p.kind === draftKind)?.secret_envs ?? [];
   }, [providers.data, draftKind]);
 
+  // Documented rate limits (Voyage only). Sourced from the
+  // /admin/embedding-providers payload's documented_limits field; nil
+  // for ollama/openai. The Voyage form renders the active model's
+  // entry + an expander with the full table.
+  const limitsForKind = useMemo(() => {
+    if (!providers.data) return undefined;
+    return providers.data.providers.find((p) => p.kind === draftKind)?.documented_limits;
+  }, [providers.data, draftKind]);
+
   const test = useTestProvider(draftKind);
 
   if (providers.isLoading || active.isLoading) {
@@ -256,6 +265,7 @@ export function EmbeddingProviderSection() {
             value={voyageDraft}
             onChange={setVoyageDraft}
             secretEnvs={envsForKind}
+            documentedLimits={limitsForKind}
           />
         ) : null}
         {draftKind === 'ollama' ? (
diff --git a/server/dashboard/src/modules/server/sections/providers/VoyageProviderForm.tsx b/server/dashboard/src/modules/server/sections/providers/VoyageProviderForm.tsx
index 91ad140..5b5251f 100644
--- a/server/dashboard/src/modules/server/sections/providers/VoyageProviderForm.tsx
+++ b/server/dashboard/src/modules/server/sections/providers/VoyageProviderForm.tsx
@@ -1,9 +1,13 @@
-import { AlertTriangle, Info } from 'lucide-react';
+import { AlertTriangle, ExternalLink, Info } from 'lucide-react';
 import { Alert, AlertDescription, AlertTitle } from '@/ui/alert';
 import { Input } from '@/ui/input';
 import { Label } from '@/ui/label';
 import { Switch } from '@/ui/switch';
-import type { EmbeddingProviderSecretEnv } from '@/api/types';
+import type {
+  DocumentedEmbeddingLimits,
+  DocumentedModelLimit,
+  EmbeddingProviderSecretEnv,
+} from '@/api/types';
 
 // VoyageConfig mirrors the voyage provider's persisted config blob
 // shape (see server/internal/embeddings/provider/voyage/voyage.go).
@@ -27,6 +31,126 @@ interface Props {
   value: VoyageConfig;
   onChange: (next: VoyageConfig) => void;
   secretEnvs: EmbeddingProviderSecretEnv[];
+  // documentedLimits is the hardcoded snapshot of Voyage's published
+  // rate limits sourced from their public docs (Voyage has no API
+  // endpoint to fetch limits). Undefined when the server is older
+  // than the limits-table feature; the form silently degrades.
+  documentedLimits?: DocumentedEmbeddingLimits;
+}
+
+// formatNumber prints a number in thousands shorthand: 3000000 → "3M",
+// 16000000 → "16M", 2000 → "2K". Used to keep the limits card compact.
+function formatNumber(n: number): string {
+  if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(n % 1_000_000 === 0 ? 0 : 1)}M`;
+  if (n >= 1_000) return `${(n / 1_000).toFixed(n % 1_000 === 0 ? 0 : 1)}K`;
+  return String(n);
+}
+
+// ActiveModelLimitsCard renders the documented rate-limit row for the
+// model currently selected in the form. The whole table lives behind a
+// "Show all models" details expander so the form stays compact by
+// default.
+function ActiveModelLimitsCard({
+  limits,
+  selectedModel,
+}: {
+  limits: DocumentedEmbeddingLimits;
+  selectedModel: string;
+}) {
+  const active = limits.models.find((m) => m.model === selectedModel);
+  return (
+    <div className="rounded-md border bg-muted/30 p-3 text-xs">
+      <div className="flex items-start justify-between gap-2">
+        <div>
+          <div className="font-medium text-foreground">
+            Documented rate limits for <code>{selectedModel}</code>{' '}
+            <span className="font-normal text-muted-foreground">(Tier 1 baseline)</span>
+          </div>
+          {active ? (
+            <LimitsRow l={active} />
+          ) : (
+            <div className="mt-1 text-muted-foreground">
+              No entry for this model in our snapshot — consult the dashboard.
+            </div>
+          )}
+        </div>
+        <a
+          href="https://dashboard.voyageai.com/"
+          target="_blank"
+          rel="noreferrer noopener"
+          className="inline-flex items-center gap-1 rounded-md border px-2 py-1 text-xs hover:bg-accent"
+          title="Voyage doesn't expose limits via API — open the dashboard to check your live tier"
+        >
+          Open Voyage dashboard <ExternalLink className="h-3 w-3" />
+        </a>
+      </div>
+
+      {/* Tier multiplier note. Voyage scales limits with lifetime spend:
+          ≥$100 → Tier 2 (×2), ≥$1000 → Tier 3 (×3). We can't detect
+          the operator's tier (no API), so we just state the rule and
+          let them mentally apply the multiplier. */}
+      <div className="mt-2 text-muted-foreground">
+        Multiply by <strong className="text-foreground">×2</strong> at Tier 2
+        (≥&nbsp;$100 paid lifetime) and <strong className="text-foreground">×3</strong> at
+        Tier 3 (≥&nbsp;$1000). Voyage doesn't expose your current tier via
+        API — check the dashboard to confirm.
+      </div>
+      <details className="mt-3 group">
+        <summary className="cursor-pointer text-muted-foreground hover:text-foreground">
+          Show all models in this snapshot
+        </summary>
+        <table className="mt-2 w-full text-xs">
+          <thead>
+            <tr className="text-left text-muted-foreground">
+              <th className="pr-3 font-normal">Model</th>
+              <th className="pr-3 font-normal">RPM (T1)</th>
+              <th className="pr-3 font-normal">TPM (T1)</th>
+              <th className="pr-3 font-normal">Inputs/req</th>
+              <th className="pr-3 font-normal">Tokens/req</th>
+            </tr>
+          </thead>
+          <tbody>
+            {limits.models.map((m) => (
+              <tr key={m.model} className={m.model === selectedModel ? 'font-medium' : ''}>
+                <td className="pr-3"><code>{m.model}</code></td>
+                <td className="pr-3">{formatNumber(m.tier1_rpm)}</td>
+                <td className="pr-3">{formatNumber(m.tier1_tpm)}</td>
+                <td className="pr-3">{m.max_inputs_per_request}</td>
+                <td className="pr-3">{formatNumber(m.max_tokens_per_request)}</td>
+              </tr>
+            ))}
+          </tbody>
+        </table>
+      </details>
+      <p className="mt-3 text-[10px] text-muted-foreground">{limits.source}</p>
+    </div>
+  );
+}
+
+function LimitsRow({ l }: { l: DocumentedModelLimit }) {
+  return (
+    <div className="mt-2 grid grid-cols-2 gap-x-4 gap-y-1 text-foreground sm:grid-cols-4">
+      <div>
+        <div className="text-muted-foreground">RPM (Tier 1)</div>
+        <div>{formatNumber(l.tier1_rpm)}</div>
+      </div>
+      <div>
+        <div className="text-muted-foreground">TPM (Tier 1)</div>
+        <div>{formatNumber(l.tier1_tpm)}</div>
+      </div>
+      <div>
+        <div className="text-muted-foreground">Inputs / req</div>
+        <div>{l.max_inputs_per_request}</div>
+      </div>
+      <div>
+        <div className="text-muted-foreground">Tokens / req</div>
+        <div>{formatNumber(l.max_tokens_per_request)}</div>
+      </div>
+      {l.notes ? (
+        <div className="col-span-full text-muted-foreground">{l.notes}</div>
+      ) : null}
+    </div>
+  );
 }
 
 const MODELS = [
@@ -39,7 +163,7 @@ const MODELS = [
 
 const DIMENSIONS = [256, 512, 1024, 2048];
 
-export function VoyageProviderForm({ value, onChange, secretEnvs }: Props) {
+export function VoyageProviderForm({ value, onChange, secretEnvs, documentedLimits }: Props) {
   const apiKeyEnv = secretEnvs.find((e) => e.name === value.api_key_env);
   const apiKeyMissing = apiKeyEnv != null && !apiKeyEnv.set;
 
@@ -83,6 +207,10 @@ export function VoyageProviderForm({ value, onChange, secretEnvs }: Props) {
         </select>
       </div>
 
+      {documentedLimits ? (
+        <ActiveModelLimitsCard limits={documentedLimits} selectedModel={value.model} />
+      ) : null}
+
       <div className="grid gap-4 sm:grid-cols-2">
         <div className="space-y-1.5">
           <Label htmlFor="voyage-dim">Output dimension (Matryoshka)</Label>
diff --git a/server/internal/embeddings/provider/voyage/limits.go b/server/internal/embeddings/provider/voyage/limits.go
new file mode 100644
index 0000000..ec40f7f
--- /dev/null
+++ b/server/internal/embeddings/provider/voyage/limits.go
@@ -0,0 +1,110 @@
+package voyage
+
+// ModelLimits captures the documented per-model rate-limit ceiling
+// for one model at Tier 1 (the baseline tier). Voyage does NOT expose
+// these values via API — they live only in the public docs at
+// https://docs.voyageai.com/docs/rate-limits and in the web dashboard
+// under Settings → Rate Limits.
+//
+// Tier 2 (>= $100 paid lifetime) and Tier 3 (>= $1000) multiply the
+// RPM / TPM ceilings by 2x and 3x respectively; per-request limits
+// (inputs / tokens) are not tier-dependent.
+//
+// We surface these numbers in the dashboard so an operator can sanity
+// check the configured concurrency against their plan without bouncing
+// to a separate tab. They are NOT enforced client-side — Voyage rejects
+// over-limit traffic with 429 / 400 which the server already surfaces.
+type ModelLimits struct {
+	// Model is the API model name (matches Config.Model).
+	Model string `json:"model"`
+
+	// Tier1RPM / Tier1TPM are the per-minute caps at the baseline
+	// (free-but-billable) tier. Multiply by 2 for Tier 2, 3 for Tier 3.
+	Tier1RPM int `json:"tier1_rpm"`
+	Tier1TPM int `json:"tier1_tpm"`
+
+	// MaxInputsPerRequest is the hard cap on `input` array length per
+	// /v1/embeddings POST. voyage-code-* models cap at 128; voyage-3*
+	// models accept up to 1000.
+	MaxInputsPerRequest int `json:"max_inputs_per_request"`
+
+	// MaxTokensPerRequest is the hard cap on total tokens in a single
+	// /v1/embeddings POST after Voyage's own tokenizer pass. 120000
+	// for all current text models; we keep it per-model in case Voyage
+	// differentiates later.
+	MaxTokensPerRequest int `json:"max_tokens_per_request"`
+
+	// Notes is free-form text the dashboard renders verbatim. Used to
+	// flag oddities (e.g. "TPM doubled in v3.5", "free tier overrides
+	// these values to 3/10K").
+	Notes string `json:"notes,omitempty"`
+}
+
+// KnownLimitsSource describes where the values below came from. The
+// dashboard surfaces this string so an operator can tell when the
+// snapshot was taken (and whether they should consult the docs for a
+// newer version).
+const KnownLimitsSource = "Voyage public docs at https://docs.voyageai.com/docs/rate-limits (snapshot 2026-05-27). Free tier is capped at 3 RPM / 10K TPM regardless of model."
+
+// KnownModelLimits is the hardcoded snapshot of documented Voyage
+// rate limits. Keys match the model strings the dashboard offers in
+// the provider dropdown. Add new models here when Voyage publishes
+// them and bump KnownLimitsSource's snapshot date.
+var KnownModelLimits = map[string]ModelLimits{
+	"voyage-code-3": {
+		Model: "voyage-code-3", Tier1RPM: 2000, Tier1TPM: 3_000_000,
+		MaxInputsPerRequest: 128, MaxTokensPerRequest: 120_000,
+		Notes: "Optimized for code retrieval.",
+	},
+	"voyage-code-2": {
+		Model: "voyage-code-2", Tier1RPM: 2000, Tier1TPM: 3_000_000,
+		MaxInputsPerRequest: 128, MaxTokensPerRequest: 120_000,
+		Notes: "Predecessor of voyage-code-3.",
+	},
+	"voyage-3-large": {
+		Model: "voyage-3-large", Tier1RPM: 2000, Tier1TPM: 3_000_000,
+		MaxInputsPerRequest: 1000, MaxTokensPerRequest: 120_000,
+		Notes: "General-purpose multilingual.",
+	},
+	"voyage-3": {
+		Model: "voyage-3", Tier1RPM: 2000, Tier1TPM: 3_000_000,
+		MaxInputsPerRequest: 1000, MaxTokensPerRequest: 120_000,
+	},
+	"voyage-3-lite": {
+		Model: "voyage-3-lite", Tier1RPM: 2000, Tier1TPM: 3_000_000,
+		MaxInputsPerRequest: 1000, MaxTokensPerRequest: 120_000,
+		Notes: "Smaller, cheaper than voyage-3.",
+	},
+	"voyage-3.5": {
+		Model: "voyage-3.5", Tier1RPM: 2000, Tier1TPM: 8_000_000,
+		MaxInputsPerRequest: 1000, MaxTokensPerRequest: 120_000,
+		Notes: "Higher TPM than voyage-3.",
+	},
+	"voyage-3.5-lite": {
+		Model: "voyage-3.5-lite", Tier1RPM: 2000, Tier1TPM: 16_000_000,
+		MaxInputsPerRequest: 1000, MaxTokensPerRequest: 120_000,
+		Notes: "Highest TPM among lite models.",
+	},
+}
+
+// LimitsList returns the known limits as a slice, ordered by model
+// name for deterministic JSON. Convenience for the admin endpoint.
+func LimitsList() []ModelLimits {
+	out := make([]ModelLimits, 0, len(KnownModelLimits))
+	// Preserve dashboard order (matches the factory's model enum).
+	order := []string{"voyage-code-3", "voyage-3-large", "voyage-3", "voyage-3-lite", "voyage-code-2", "voyage-3.5", "voyage-3.5-lite"}
+	seen := map[string]bool{}
+	for _, m := range order {
+		if l, ok := KnownModelLimits[m]; ok {
+			out = append(out, l)
+			seen[m] = true
+		}
+	}
+	// Any extras not in the canonical order go to the tail.
+	for k, v := range KnownModelLimits {
+		if !seen[k] {
+			out = append(out, v)
+		}
+	}
+	return out
+}
diff --git a/server/internal/httpapi/admin_embeddings.go b/server/internal/httpapi/admin_embeddings.go
index 981b31c..02ca8af 100644
--- a/server/internal/httpapi/admin_embeddings.go
+++ b/server/internal/httpapi/admin_embeddings.go
@@ -21,6 +21,7 @@ import (
 
 	"github.com/dvcdsys/code-index/server/internal/embeddings"
 	"github.com/dvcdsys/code-index/server/internal/embeddings/provider"
+	"github.com/dvcdsys/code-index/server/internal/embeddings/provider/voyage"
 	"github.com/dvcdsys/code-index/server/internal/embeddingscfg"
 	"github.com/go-chi/chi/v5"
 )
@@ -30,6 +31,21 @@ type providerInfoPayload struct {
 	Kind       string              `json:"kind"`
 	Schema     json.RawMessage     `json:"schema"`
 	SecretEnvs []secretEnvPayload  `json:"secret_envs"`
+	// DocumentedLimits is an informational snapshot of the provider's
+	// published rate limits. Voyage doesn't expose limits via API so we
+	// ship a hardcoded table sourced from their public docs. Nil when
+	// the provider has no documented limits to show (ollama: local, no
+	// upstream limits; openai: limits vary per organization and aren't
+	// public per-model).
+	DocumentedLimits *documentedLimitsPayload `json:"documented_limits,omitempty"`
+}
+
+// documentedLimitsPayload wraps the per-model limits table plus the
+// source string. The dashboard renders Source as a footnote so the
+// operator can tell when our snapshot was last refreshed.
+type documentedLimitsPayload struct {
+	Source string             `json:"source"`
+	Models []voyage.ModelLimits `json:"models"`
 }
 
 // secretEnvPayload tells the dashboard which env-var names a provider
@@ -83,11 +99,23 @@ func (s *Server) ListEmbeddingProviders(w http.ResponseWriter, r *http.Request)
 			_, present := os.LookupEnv(name)
 			envPayload = append(envPayload, secretEnvPayload{Name: name, Set: present})
 		}
-		out = append(out, providerInfoPayload{
+		info := providerInfoPayload{
 			Kind:       kind,
 			Schema:     f.SchemaJSON(),
 			SecretEnvs: envPayload,
-		})
+		}
+		// Voyage is the only provider with a hardcoded limits table —
+		// Voyage publishes per-model RPM/TPM in their docs but has no
+		// API endpoint to fetch them. The table is sourced from
+		// voyage.KnownModelLimits; see that file's doc-comment for
+		// the snapshot date and a link.
+		if kind == provider.KindVoyage {
+			info.DocumentedLimits = &documentedLimitsPayload{
+				Source: voyage.KnownLimitsSource,
+				Models: voyage.LimitsList(),
+			}
+		}
+		out = append(out, info)
 	}
 	writeJSON(w, http.StatusOK, map[string]any{"providers": out})
 }

From a2993e8debe23185bc5799bfb5077f18f88705ed Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Wed, 27 May 2026 12:56:38 +0100
Subject: [PATCH 16/34] feat(voyage): operator-configurable rate limits with
 token-bucket throttle
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the hardcoded "documented limits" snapshot card with editable
input fields for RPM, TPM, max-inputs-per-request, and max-tokens-
per-request. Voyage doesn't expose limits via API (confirmed against
docs.voyageai.com/docs/rate-limits) — the operator copies their
tier's numbers from the Voyage dashboard, the indexer throttles
itself accordingly.

Backend (provider/voyage):

  Config gains four new fields, all optional:
    RateLimitRPM         — caps requests/minute via token-bucket
    RateLimitTPM         — caps tokens/minute via token-bucket
    MaxInputsPerRequest  — overrides the static 128 default
    MaxTokensPerRequest  — overrides the static 100K default

  Two rate.Limiter instances are wired into Provider when the
  corresponding RPM/TPM > 0; embed() waits on both before each POST
  (ctx-aware — server shutdown drains cleanly). nil limiters short-
  circuit when the operator leaves the fields empty, matching prior
  behaviour (only react to upstream 429/400).

  planBatches takes maxInputs+maxTokens args instead of consts, so
  the per-request caps come from cfg.

Backend (admin_embeddings, openapi.yaml, limits.go):
  Drop the documented_limits payload + voyage.KnownModelLimits
  table — the dashboard no longer renders a "cached snapshot",
  values come straight from the operator instead. Less code, fewer
  things to keep in sync.

Frontend:
  VoyageProviderForm: replace the read-only "Documented limits"
  card with a 2x2 grid of number inputs (RPM, TPM, max-inputs,
  max-tokens). Hint paragraph above the grid still mentions the
  public-docs baseline so the operator has a starting point, plus
  a link to the Voyage dashboard. Empty = no client-side
  enforcement (per-field).

  EmbeddingProviderSection: drop the documentedLimits prop +
  useMemo; populate the four new VoyageConfig fields from the
  persisted blob when loading the active provider.

Tests:
  - TestRateLimitRPMThrottlesRequests: 120 RPM with two sequential
    EmbedQuery calls — second waits ~500ms on the limiter.
  - TestRateLimitTPMThrottlesTokens: 600K TPM with two ~60K-token
    POSTs — second waits ~2s for the bucket to refill.
  - planBatches tests updated to pass explicit caps.
---
 doc/openapi.yaml                              |  50 ----
 server/dashboard/src/api/types.ts             |   2 -
 .../sections/EmbeddingProviderSection.tsx     |  16 +-
 .../sections/providers/VoyageProviderForm.tsx | 237 +++++++-----------
 server/go.mod                                 |   1 +
 server/go.sum                                 |   2 +
 .../embeddings/provider/voyage/limits.go      | 110 --------
 .../embeddings/provider/voyage/voyage.go      | 140 ++++++++---
 .../embeddings/provider/voyage/voyage_test.go |  92 ++++++-
 server/internal/httpapi/admin_embeddings.go   |  38 +--
 10 files changed, 309 insertions(+), 379 deletions(-)
 delete mode 100644 server/internal/embeddings/provider/voyage/limits.go

diff --git a/doc/openapi.yaml b/doc/openapi.yaml
index 84fa003..e9943b0 100644
--- a/doc/openapi.yaml
+++ b/doc/openapi.yaml
@@ -3243,56 +3243,6 @@ components:
             before save.
           items:
             $ref: "#/components/schemas/EmbeddingProviderSecretEnv"
-        documented_limits:
-          description: |
-            Informational snapshot of the provider's published rate
-            limits. Voyage doesn't expose limits via API so we ship
-            a hardcoded table sourced from their public docs. Absent
-            when the provider has no documented limits to show.
-          $ref: "#/components/schemas/DocumentedEmbeddingLimits"
-
-    DocumentedEmbeddingLimits:
-      type: object
-      required: [source, models]
-      properties:
-        source:
-          type: string
-          description: |
-            Free-form attribution string with the docs URL and the
-            date the snapshot was taken. The dashboard renders it as
-            a footnote so the operator knows when to consult upstream
-            docs for a newer version.
-        models:
-          type: array
-          items:
-            $ref: "#/components/schemas/DocumentedModelLimit"
-
-    DocumentedModelLimit:
-      type: object
-      required: [model, tier1_rpm, tier1_tpm, max_inputs_per_request, max_tokens_per_request]
-      properties:
-        model:
-          type: string
-          description: API model name; matches the values offered in the provider form.
-        tier1_rpm:
-          type: integer
-          minimum: 0
-          description: Tier 1 RPM (multiply by 2 for Tier 2, 3 for Tier 3).
-        tier1_tpm:
-          type: integer
-          minimum: 0
-          description: Tier 1 TPM (multiply by 2 for Tier 2, 3 for Tier 3).
-        max_inputs_per_request:
-          type: integer
-          minimum: 0
-          description: Per-request input array cap (not tier-dependent).
-        max_tokens_per_request:
-          type: integer
-          minimum: 0
-          description: Per-request total token cap (not tier-dependent).
-        notes:
-          type: string
-          description: Free-form notes the dashboard renders verbatim (optional).
 
     EmbeddingProviderSecretEnv:
       type: object
diff --git a/server/dashboard/src/api/types.ts b/server/dashboard/src/api/types.ts
index 9a08ab8..a1d8707 100644
--- a/server/dashboard/src/api/types.ts
+++ b/server/dashboard/src/api/types.ts
@@ -81,8 +81,6 @@ export type EmbeddingProviderList = components['schemas']['EmbeddingProviderList
 export type ActiveEmbeddingProvider = components['schemas']['ActiveEmbeddingProvider'];
 export type SwitchEmbeddingProviderRequest = components['schemas']['SwitchEmbeddingProviderRequest'];
 export type TestEmbeddingProviderResponse = components['schemas']['TestEmbeddingProviderResponse'];
-export type DocumentedEmbeddingLimits = components['schemas']['DocumentedEmbeddingLimits'];
-export type DocumentedModelLimit = components['schemas']['DocumentedModelLimit'];
 
 // Provider kind union — the dashboard uses this in form-state discriminants.
 export type EmbeddingProviderKind = 'ollama' | 'openai' | 'voyage';
diff --git a/server/dashboard/src/modules/server/sections/EmbeddingProviderSection.tsx b/server/dashboard/src/modules/server/sections/EmbeddingProviderSection.tsx
index 4063673..5469fef 100644
--- a/server/dashboard/src/modules/server/sections/EmbeddingProviderSection.tsx
+++ b/server/dashboard/src/modules/server/sections/EmbeddingProviderSection.tsx
@@ -67,6 +67,12 @@ export function EmbeddingProviderSection() {
         output_dtype:
           (cfg.output_dtype as 'float' | 'int8') ?? defaultVoyageConfig.output_dtype,
         truncation: cfg.truncation !== false,
+        rate_limit_rpm: typeof cfg.rate_limit_rpm === 'number' ? cfg.rate_limit_rpm : undefined,
+        rate_limit_tpm: typeof cfg.rate_limit_tpm === 'number' ? cfg.rate_limit_tpm : undefined,
+        max_inputs_per_request:
+          typeof cfg.max_inputs_per_request === 'number' ? cfg.max_inputs_per_request : undefined,
+        max_tokens_per_request:
+          typeof cfg.max_tokens_per_request === 'number' ? cfg.max_tokens_per_request : undefined,
       });
     }
   }, [active.data]);
@@ -79,15 +85,6 @@ export function EmbeddingProviderSection() {
     return providers.data.providers.find((p) => p.kind === draftKind)?.secret_envs ?? [];
   }, [providers.data, draftKind]);
 
-  // Documented rate limits (Voyage only). Sourced from the
-  // /admin/embedding-providers payload's documented_limits field; nil
-  // for ollama/openai. The Voyage form renders the active model's
-  // entry + an expander with the full table.
-  const limitsForKind = useMemo(() => {
-    if (!providers.data) return undefined;
-    return providers.data.providers.find((p) => p.kind === draftKind)?.documented_limits;
-  }, [providers.data, draftKind]);
-
   const test = useTestProvider(draftKind);
 
   if (providers.isLoading || active.isLoading) {
@@ -265,7 +262,6 @@ export function EmbeddingProviderSection() {
             value={voyageDraft}
             onChange={setVoyageDraft}
             secretEnvs={envsForKind}
-            documentedLimits={limitsForKind}
           />
         ) : null}
         {draftKind === 'ollama' ? (
diff --git a/server/dashboard/src/modules/server/sections/providers/VoyageProviderForm.tsx b/server/dashboard/src/modules/server/sections/providers/VoyageProviderForm.tsx
index 5b5251f..c6f3b6e 100644
--- a/server/dashboard/src/modules/server/sections/providers/VoyageProviderForm.tsx
+++ b/server/dashboard/src/modules/server/sections/providers/VoyageProviderForm.tsx
@@ -3,11 +3,7 @@ import { Alert, AlertDescription, AlertTitle } from '@/ui/alert';
 import { Input } from '@/ui/input';
 import { Label } from '@/ui/label';
 import { Switch } from '@/ui/switch';
-import type {
-  DocumentedEmbeddingLimits,
-  DocumentedModelLimit,
-  EmbeddingProviderSecretEnv,
-} from '@/api/types';
+import type { EmbeddingProviderSecretEnv } from '@/api/types';
 
 // VoyageConfig mirrors the voyage provider's persisted config blob
 // shape (see server/internal/embeddings/provider/voyage/voyage.go).
@@ -17,6 +13,15 @@ export interface VoyageConfig {
   output_dimension: number;
   output_dtype: 'float' | 'int8';
   truncation: boolean;
+
+  // Operator-supplied rate-limit caps. 0 = no client-side throttling
+  // (the server will only react to upstream 429/400). Sourced from
+  // the operator's Voyage dashboard Rate Limits page; we can't fetch
+  // them programmatically (Voyage has no API for limits).
+  rate_limit_rpm?: number;
+  rate_limit_tpm?: number;
+  max_inputs_per_request?: number;
+  max_tokens_per_request?: number;
 }
 
 export const defaultVoyageConfig: VoyageConfig = {
@@ -31,126 +36,6 @@ interface Props {
   value: VoyageConfig;
   onChange: (next: VoyageConfig) => void;
   secretEnvs: EmbeddingProviderSecretEnv[];
-  // documentedLimits is the hardcoded snapshot of Voyage's published
-  // rate limits sourced from their public docs (Voyage has no API
-  // endpoint to fetch limits). Undefined when the server is older
-  // than the limits-table feature; the form silently degrades.
-  documentedLimits?: DocumentedEmbeddingLimits;
-}
-
-// formatNumber prints a number in thousands shorthand: 3000000 → "3M",
-// 16000000 → "16M", 2000 → "2K". Used to keep the limits card compact.
-function formatNumber(n: number): string {
-  if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(n % 1_000_000 === 0 ? 0 : 1)}M`;
-  if (n >= 1_000) return `${(n / 1_000).toFixed(n % 1_000 === 0 ? 0 : 1)}K`;
-  return String(n);
-}
-
-// ActiveModelLimitsCard renders the documented rate-limit row for the
-// model currently selected in the form. The whole table lives behind a
-// "Show all models" details expander so the form stays compact by
-// default.
-function ActiveModelLimitsCard({
-  limits,
-  selectedModel,
-}: {
-  limits: DocumentedEmbeddingLimits;
-  selectedModel: string;
-}) {
-  const active = limits.models.find((m) => m.model === selectedModel);
-  return (
-    <div className="rounded-md border bg-muted/30 p-3 text-xs">
-      <div className="flex items-start justify-between gap-2">
-        <div>
-          <div className="font-medium text-foreground">
-            Documented rate limits for <code>{selectedModel}</code>{' '}
-            <span className="font-normal text-muted-foreground">(Tier 1 baseline)</span>
-          </div>
-          {active ? (
-            <LimitsRow l={active} />
-          ) : (
-            <div className="mt-1 text-muted-foreground">
-              No entry for this model in our snapshot — consult the dashboard.
-            </div>
-          )}
-        </div>
-        <a
-          href="https://dashboard.voyageai.com/"
-          target="_blank"
-          rel="noreferrer noopener"
-          className="inline-flex items-center gap-1 rounded-md border px-2 py-1 text-xs hover:bg-accent"
-          title="Voyage doesn't expose limits via API — open the dashboard to check your live tier"
-        >
-          Open Voyage dashboard <ExternalLink className="h-3 w-3" />
-        </a>
-      </div>
-
-      {/* Tier multiplier note. Voyage scales limits with lifetime spend:
-          ≥$100 → Tier 2 (×2), ≥$1000 → Tier 3 (×3). We can't detect
-          the operator's tier (no API), so we just state the rule and
-          let them mentally apply the multiplier. */}
-      <div className="mt-2 text-muted-foreground">
-        Multiply by <strong className="text-foreground">×2</strong> at Tier 2
-        (≥&nbsp;$100 paid lifetime) and <strong className="text-foreground">×3</strong> at
-        Tier 3 (≥&nbsp;$1000). Voyage doesn't expose your current tier via
-        API — check the dashboard to confirm.
-      </div>
-      <details className="mt-3 group">
-        <summary className="cursor-pointer text-muted-foreground hover:text-foreground">
-          Show all models in this snapshot
-        </summary>
-        <table className="mt-2 w-full text-xs">
-          <thead>
-            <tr className="text-left text-muted-foreground">
-              <th className="pr-3 font-normal">Model</th>
-              <th className="pr-3 font-normal">RPM (T1)</th>
-              <th className="pr-3 font-normal">TPM (T1)</th>
-              <th className="pr-3 font-normal">Inputs/req</th>
-              <th className="pr-3 font-normal">Tokens/req</th>
-            </tr>
-          </thead>
-          <tbody>
-            {limits.models.map((m) => (
-              <tr key={m.model} className={m.model === selectedModel ? 'font-medium' : ''}>
-                <td className="pr-3"><code>{m.model}</code></td>
-                <td className="pr-3">{formatNumber(m.tier1_rpm)}</td>
-                <td className="pr-3">{formatNumber(m.tier1_tpm)}</td>
-                <td className="pr-3">{m.max_inputs_per_request}</td>
-                <td className="pr-3">{formatNumber(m.max_tokens_per_request)}</td>
-              </tr>
-            ))}
-          </tbody>
-        </table>
-      </details>
-      <p className="mt-3 text-[10px] text-muted-foreground">{limits.source}</p>
-    </div>
-  );
-}
-
-function LimitsRow({ l }: { l: DocumentedModelLimit }) {
-  return (
-    <div className="mt-2 grid grid-cols-2 gap-x-4 gap-y-1 text-foreground sm:grid-cols-4">
-      <div>
-        <div className="text-muted-foreground">RPM (Tier 1)</div>
-        <div>{formatNumber(l.tier1_rpm)}</div>
-      </div>
-      <div>
-        <div className="text-muted-foreground">TPM (Tier 1)</div>
-        <div>{formatNumber(l.tier1_tpm)}</div>
-      </div>
-      <div>
-        <div className="text-muted-foreground">Inputs / req</div>
-        <div>{l.max_inputs_per_request}</div>
-      </div>
-      <div>
-        <div className="text-muted-foreground">Tokens / req</div>
-        <div>{formatNumber(l.max_tokens_per_request)}</div>
-      </div>
-      {l.notes ? (
-        <div className="col-span-full text-muted-foreground">{l.notes}</div>
-      ) : null}
-    </div>
-  );
 }
 
 const MODELS = [
@@ -163,7 +48,16 @@ const MODELS = [
 
 const DIMENSIONS = [256, 512, 1024, 2048];
 
-export function VoyageProviderForm({ value, onChange, secretEnvs, documentedLimits }: Props) {
+// numberOrUndef parses a number input; empty / NaN / negative → undefined
+// so the field round-trips to "unset" (no client-side enforcement).
+function numberOrUndef(v: string): number | undefined {
+  if (v.trim() === '') return undefined;
+  const n = Number(v);
+  if (!Number.isFinite(n) || n < 0) return undefined;
+  return n;
+}
+
+export function VoyageProviderForm({ value, onChange, secretEnvs }: Props) {
   const apiKeyEnv = secretEnvs.find((e) => e.name === value.api_key_env);
   const apiKeyMissing = apiKeyEnv != null && !apiKeyEnv.set;
 
@@ -171,23 +65,22 @@ export function VoyageProviderForm({ value, onChange, secretEnvs, documentedLimi
     <div className="space-y-4">
       <Alert>
         <Info className="h-4 w-4" />
-        <AlertTitle>Rate limits</AlertTitle>
+        <AlertTitle>Rate limits — fill in from your Voyage dashboard</AlertTitle>
         <AlertDescription>
-          Voyage's free tier is capped at <strong>3 requests/minute</strong> and
-          10K tokens/minute — usable for a smoke test, but the indexer will
-          burst past it on any real repo and start returning 429s. For real
-          usage{' '}
+          Voyage doesn't expose per-account rate limits via API, so the
+          server can't fetch yours automatically. Open the{' '}
           <a
             href="https://dashboard.voyageai.com/"
             target="_blank"
             rel="noreferrer noopener"
-            className="underline"
+            className="inline-flex items-center gap-1 underline"
           >
-            add a payment method
+            Voyage dashboard <ExternalLink className="h-3 w-3" />
           </a>{' '}
-          on the Voyage dashboard. On the free tier you can still index by
-          setting <strong>concurrency = 1</strong> in the Throughput card
-          below and accepting roughly 3 files/minute throughput.
+          → Rate Limits, copy your tier's numbers into the fields below,
+          and the indexer will throttle itself accordingly via a
+          token-bucket. Leave all four blank to disable client-side
+          throttling (the server will only react to upstream 429/400).
         </AlertDescription>
       </Alert>
 
@@ -207,10 +100,6 @@ export function VoyageProviderForm({ value, onChange, secretEnvs, documentedLimi
         </select>
       </div>
 
-      {documentedLimits ? (
-        <ActiveModelLimitsCard limits={documentedLimits} selectedModel={value.model} />
-      ) : null}
-
       <div className="grid gap-4 sm:grid-cols-2">
         <div className="space-y-1.5">
           <Label htmlFor="voyage-dim">Output dimension (Matryoshka)</Label>
@@ -251,6 +140,74 @@ export function VoyageProviderForm({ value, onChange, secretEnvs, documentedLimi
         </div>
       </div>
 
+      {/* Rate-limit fields. All four optional. Defaults in the comment
+          below mirror the public docs; the operator should override
+          per their actual tier on the Voyage dashboard. */}
+      <fieldset className="space-y-3 rounded-md border bg-muted/20 p-3">
+        <legend className="px-1 text-sm font-medium">Rate limits (from your Voyage dashboard)</legend>
+        <p className="text-xs text-muted-foreground">
+          Public-docs Tier 1 baseline (multiply by ×2 / ×3 for Tier 2 / Tier
+          3 spend):{' '}
+          <code>voyage-code-*</code> = 2000 RPM / 3M TPM / 128 inputs /
+          120K tokens per request.{' '}
+          <code>voyage-3*</code> = 2000 RPM / 3–16M TPM / 1000 inputs /
+          120K tokens per request. Free tier = 3 RPM / 10K TPM regardless
+          of model.
+        </p>
+        <div className="grid gap-3 sm:grid-cols-2">
+          <div className="space-y-1">
+            <Label htmlFor="voyage-rpm">Requests per minute (RPM)</Label>
+            <Input
+              id="voyage-rpm"
+              type="number"
+              min={0}
+              placeholder="e.g. 2000 (Tier 1 baseline)"
+              value={value.rate_limit_rpm ?? ''}
+              onChange={(e) => onChange({ ...value, rate_limit_rpm: numberOrUndef(e.target.value) })}
+            />
+          </div>
+          <div className="space-y-1">
+            <Label htmlFor="voyage-tpm">Tokens per minute (TPM)</Label>
+            <Input
+              id="voyage-tpm"
+              type="number"
+              min={0}
+              placeholder="e.g. 3000000"
+              value={value.rate_limit_tpm ?? ''}
+              onChange={(e) => onChange({ ...value, rate_limit_tpm: numberOrUndef(e.target.value) })}
+            />
+          </div>
+          <div className="space-y-1">
+            <Label htmlFor="voyage-max-inputs">Max inputs per request</Label>
+            <Input
+              id="voyage-max-inputs"
+              type="number"
+              min={0}
+              placeholder="128 for code-*, 1000 for voyage-3*"
+              value={value.max_inputs_per_request ?? ''}
+              onChange={(e) => onChange({ ...value, max_inputs_per_request: numberOrUndef(e.target.value) })}
+            />
+          </div>
+          <div className="space-y-1">
+            <Label htmlFor="voyage-max-tokens">Max tokens per request</Label>
+            <Input
+              id="voyage-max-tokens"
+              type="number"
+              min={0}
+              placeholder="e.g. 100000 (Voyage hard cap 120K)"
+              value={value.max_tokens_per_request ?? ''}
+              onChange={(e) => onChange({ ...value, max_tokens_per_request: numberOrUndef(e.target.value) })}
+            />
+          </div>
+        </div>
+        <p className="text-[10px] text-muted-foreground">
+          Empty = no client-side enforcement for that field. RPM/TPM
+          empty means the indexer doesn't throttle itself (you'll see
+          429s on overflow); per-request fields empty fall back to safe
+          defaults (128 inputs / ~100K tokens).
+        </p>
+      </fieldset>
+
       <div className="flex items-center gap-3">
         <Switch
           id="voyage-truncation"
diff --git a/server/go.mod b/server/go.mod
index 0913a64..d3cf3be 100644
--- a/server/go.mod
+++ b/server/go.mod
@@ -12,6 +12,7 @@ require (
 	github.com/philippgille/chromem-go v0.7.0
 	golang.org/x/crypto v0.52.0
 	golang.org/x/sync v0.20.0
+	golang.org/x/time v0.15.0
 	modernc.org/sqlite v1.34.1
 )
 
diff --git a/server/go.sum b/server/go.sum
index 57f0c75..8b5046d 100644
--- a/server/go.sum
+++ b/server/go.sum
@@ -235,6 +235,8 @@ golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
 golang.org/x/text v0.37.0 h1:Cqjiwd9eSg8e0QAkyCaQTNHFIIzWtidPahFWR83rTrc=
 golang.org/x/text v0.37.0/go.mod h1:a5sjxXGs9hsn/AJVwuElvCAo9v8QYLzvavO5z2PiM38=
+golang.org/x/time v0.15.0 h1:bbrp8t3bGUeFOx08pvsMYRTCVSMk89u4tKbNOZbp88U=
+golang.org/x/time v0.15.0/go.mod h1:Y4YMaQmXwGQZoFaVFk4YpCt4FLQMYKZe9oeV/f4MSno=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
diff --git a/server/internal/embeddings/provider/voyage/limits.go b/server/internal/embeddings/provider/voyage/limits.go
deleted file mode 100644
index ec40f7f..0000000
--- a/server/internal/embeddings/provider/voyage/limits.go
+++ /dev/null
@@ -1,110 +0,0 @@
-package voyage
-
-// ModelLimits captures the documented per-model rate-limit ceiling
-// for one model at Tier 1 (the baseline tier). Voyage does NOT expose
-// these values via API — they live only in the public docs at
-// https://docs.voyageai.com/docs/rate-limits and in the web dashboard
-// under Settings → Rate Limits.
-//
-// Tier 2 (>= $100 paid lifetime) and Tier 3 (>= $1000) multiply the
-// RPM / TPM ceilings by 2x and 3x respectively; per-request limits
-// (inputs / tokens) are not tier-dependent.
-//
-// We surface these numbers in the dashboard so an operator can sanity
-// check the configured concurrency against their plan without bouncing
-// to a separate tab. They are NOT enforced client-side — Voyage rejects
-// over-limit traffic with 429 / 400 which the server already surfaces.
-type ModelLimits struct {
-	// Model is the API model name (matches Config.Model).
-	Model string `json:"model"`
-
-	// Tier1RPM / Tier1TPM are the per-minute caps at the baseline
-	// (free-but-billable) tier. Multiply by 2 for Tier 2, 3 for Tier 3.
-	Tier1RPM int `json:"tier1_rpm"`
-	Tier1TPM int `json:"tier1_tpm"`
-
-	// MaxInputsPerRequest is the hard cap on `input` array length per
-	// /v1/embeddings POST. voyage-code-* models cap at 128; voyage-3*
-	// models accept up to 1000.
-	MaxInputsPerRequest int `json:"max_inputs_per_request"`
-
-	// MaxTokensPerRequest is the hard cap on total tokens in a single
-	// /v1/embeddings POST after Voyage's own tokenizer pass. 120000
-	// for all current text models; we keep it per-model in case Voyage
-	// differentiates later.
-	MaxTokensPerRequest int `json:"max_tokens_per_request"`
-
-	// Notes is free-form text the dashboard renders verbatim. Used to
-	// flag oddities (e.g. "TPM doubled in v3.5", "free tier overrides
-	// these values to 3/10K").
-	Notes string `json:"notes,omitempty"`
-}
-
-// KnownLimitsSource describes where the values below came from. The
-// dashboard surfaces this string so an operator can tell when the
-// snapshot was taken (and whether they should consult the docs for a
-// newer version).
-const KnownLimitsSource = "Voyage public docs at https://docs.voyageai.com/docs/rate-limits (snapshot 2026-05-27). Free tier is capped at 3 RPM / 10K TPM regardless of model."
-
-// KnownModelLimits is the hardcoded snapshot of documented Voyage
-// rate limits. Keys match the model strings the dashboard offers in
-// the provider dropdown. Add new models here when Voyage publishes
-// them and bump KnownLimitsSource's snapshot date.
-var KnownModelLimits = map[string]ModelLimits{
-	"voyage-code-3": {
-		Model: "voyage-code-3", Tier1RPM: 2000, Tier1TPM: 3_000_000,
-		MaxInputsPerRequest: 128, MaxTokensPerRequest: 120_000,
-		Notes: "Optimized for code retrieval.",
-	},
-	"voyage-code-2": {
-		Model: "voyage-code-2", Tier1RPM: 2000, Tier1TPM: 3_000_000,
-		MaxInputsPerRequest: 128, MaxTokensPerRequest: 120_000,
-		Notes: "Predecessor of voyage-code-3.",
-	},
-	"voyage-3-large": {
-		Model: "voyage-3-large", Tier1RPM: 2000, Tier1TPM: 3_000_000,
-		MaxInputsPerRequest: 1000, MaxTokensPerRequest: 120_000,
-		Notes: "General-purpose multilingual.",
-	},
-	"voyage-3": {
-		Model: "voyage-3", Tier1RPM: 2000, Tier1TPM: 3_000_000,
-		MaxInputsPerRequest: 1000, MaxTokensPerRequest: 120_000,
-	},
-	"voyage-3-lite": {
-		Model: "voyage-3-lite", Tier1RPM: 2000, Tier1TPM: 3_000_000,
-		MaxInputsPerRequest: 1000, MaxTokensPerRequest: 120_000,
-		Notes: "Smaller, cheaper than voyage-3.",
-	},
-	"voyage-3.5": {
-		Model: "voyage-3.5", Tier1RPM: 2000, Tier1TPM: 8_000_000,
-		MaxInputsPerRequest: 1000, MaxTokensPerRequest: 120_000,
-		Notes: "Higher TPM than voyage-3.",
-	},
-	"voyage-3.5-lite": {
-		Model: "voyage-3.5-lite", Tier1RPM: 2000, Tier1TPM: 16_000_000,
-		MaxInputsPerRequest: 1000, MaxTokensPerRequest: 120_000,
-		Notes: "Highest TPM among lite models.",
-	},
-}
-
-// LimitsList returns the known limits as a slice, ordered by model
-// name for deterministic JSON. Convenience for the admin endpoint.
-func LimitsList() []ModelLimits {
-	out := make([]ModelLimits, 0, len(KnownModelLimits))
-	// Preserve dashboard order (matches the factory's model enum).
-	order := []string{"voyage-code-3", "voyage-3-large", "voyage-3", "voyage-3-lite", "voyage-code-2", "voyage-3.5", "voyage-3.5-lite"}
-	seen := map[string]bool{}
-	for _, m := range order {
-		if l, ok := KnownModelLimits[m]; ok {
-			out = append(out, l)
-			seen[m] = true
-		}
-	}
-	// Any extras not in the canonical order go to the tail.
-	for k, v := range KnownModelLimits {
-		if !seen[k] {
-			out = append(out, v)
-		}
-	}
-	return out
-}
diff --git a/server/internal/embeddings/provider/voyage/voyage.go b/server/internal/embeddings/provider/voyage/voyage.go
index af31383..dbcb616 100644
--- a/server/internal/embeddings/provider/voyage/voyage.go
+++ b/server/internal/embeddings/provider/voyage/voyage.go
@@ -30,6 +30,8 @@ import (
 	"strconv"
 	"time"
 
+	"golang.org/x/time/rate"
+
 	"github.com/dvcdsys/code-index/server/internal/embeddings/provider"
 )
 
@@ -42,22 +44,19 @@ const (
 	DtypeInt8  = "int8"
 )
 
-// maxBatchSize caps how many inputs we send in a single
-// /v1/embeddings POST. Voyage's per-request limits depend on the
-// model — voyage-code-3 and voyage-code-2 cap at 128; voyage-3*
-// models accept up to 1000. We pick the conservative floor so a
-// single constant works across all supported models. EmbedDocuments
-// transparently splits oversize inputs into sequential sub-batches
-// under the same queue slot so the caller never sees 422 Request
-// Too Large from a large file's chunks.
-const maxBatchSize = 128
-
-// maxTokensPerBatch caps the ESTIMATED token budget per /v1/embeddings
-// POST. Voyage's actual hard limit (observed in 400 error messages) is
-// 120000 tokens; we target 100000 to leave 17% headroom for the
-// byte→token estimation error. When a file's chunks sum above this
-// budget the batch is split further, regardless of count.
-const maxTokensPerBatch = 100_000
+// defaultMaxBatchSize is the static safe default for inputs per POST
+// when the operator has not configured an explicit MaxInputsPerRequest
+// in the provider config. Voyage's voyage-code-* models cap at 128;
+// voyage-3* accept up to 1000. We pick the lower bound so a single
+// default works across all models without 422s.
+const defaultMaxBatchSize = 128
+
+// defaultMaxTokensPerBatch is the static safe default for total
+// estimated tokens per POST when the operator has not configured an
+// explicit MaxTokensPerRequest. Voyage's hard limit (observed in 400
+// responses) is 120K; we target 100K to leave 17% headroom for the
+// byte→token estimation error.
+const defaultMaxTokensPerBatch = 100_000
 
 // bytesPerToken is a conservative chars-per-token heuristic used to
 // estimate the request's token cost without a real tokenizer. Voyage
@@ -79,6 +78,44 @@ type Config struct {
 	OutputDimension int    `json:"output_dimension,omitempty"`
 	OutputDtype     string `json:"output_dtype,omitempty"`
 	Truncation      bool   `json:"truncation,omitempty"`
+
+	// RateLimitRPM caps requests-per-minute the provider will emit.
+	// 0 = no client-side throttling (rely on Voyage to 429 us). When
+	// >0, a token-bucket waits before each POST so we don't exceed
+	// the configured rate. The operator sets this from the Voyage
+	// dashboard's "Rate Limits" page to match their account tier.
+	RateLimitRPM int `json:"rate_limit_rpm,omitempty"`
+
+	// RateLimitTPM caps tokens-per-minute (estimated, summed across
+	// all in-flight + recent requests). 0 = no throttling.
+	RateLimitTPM int `json:"rate_limit_tpm,omitempty"`
+
+	// MaxInputsPerRequest overrides defaultMaxBatchSize. 0 = use
+	// the default (128, safe for voyage-code-*). Operators running
+	// only voyage-3* may bump this to 1000 for fewer round-trips.
+	MaxInputsPerRequest int `json:"max_inputs_per_request,omitempty"`
+
+	// MaxTokensPerRequest overrides defaultMaxTokensPerBatch. 0 =
+	// use the default (100K with 20K headroom from Voyage's 120K
+	// hard cap).
+	MaxTokensPerRequest int `json:"max_tokens_per_request,omitempty"`
+}
+
+// maxBatchSize returns the effective per-POST input cap: explicit
+// config override, falling back to the static default.
+func (c *Config) maxBatchSize() int {
+	if c.MaxInputsPerRequest > 0 {
+		return c.MaxInputsPerRequest
+	}
+	return defaultMaxBatchSize
+}
+
+// maxTokensPerBatch returns the effective per-POST token cap.
+func (c *Config) maxTokensPerBatch() int {
+	if c.MaxTokensPerRequest > 0 {
+		return c.MaxTokensPerRequest
+	}
+	return defaultMaxTokensPerBatch
 }
 
 // Provider is the Voyage HTTP client.
@@ -87,6 +124,18 @@ type Provider struct {
 	logger  *slog.Logger
 	secrets provider.SecretLookup
 	http    *http.Client
+
+	// reqLimiter caps requests-per-minute when cfg.RateLimitRPM > 0.
+	// nil when no throttling is configured. Token-bucket with burst
+	// = 1 — we don't allow client-side bursts, since the upstream
+	// budget is a sliding minute and bursting saves nothing.
+	reqLimiter *rate.Limiter
+
+	// tokenLimiter caps tokens-per-minute when cfg.RateLimitTPM > 0.
+	// Burst is set to maxTokensPerBatch so a single full-budget POST
+	// can pass even when the bucket is otherwise empty (we'd just
+	// wait longer afterward). nil when no throttling.
+	tokenLimiter *rate.Limiter
 }
 
 // New constructs the Provider. Does not contact the endpoint.
@@ -100,12 +149,23 @@ func New(cfg Config, secrets provider.SecretLookup, logger *slog.Logger) *Provid
 	if cfg.OutputDtype == "" {
 		cfg.OutputDtype = DtypeFloat
 	}
-	return &Provider{
+	p := &Provider{
 		cfg:     cfg,
 		logger:  logger,
 		secrets: secrets,
 		http:    &http.Client{Timeout: 60 * time.Second},
 	}
+	// Convert RPM/TPM to per-second token-bucket rates. burst on the
+	// request bucket is 1 (one request worth of "credit"); burst on
+	// the token bucket equals one full POST so we don't deadlock a
+	// legitimate big batch.
+	if cfg.RateLimitRPM > 0 {
+		p.reqLimiter = rate.NewLimiter(rate.Limit(float64(cfg.RateLimitRPM)/60.0), 1)
+	}
+	if cfg.RateLimitTPM > 0 {
+		p.tokenLimiter = rate.NewLimiter(rate.Limit(float64(cfg.RateLimitTPM)/60.0), cfg.maxTokensPerBatch())
+	}
+	return p
 }
 
 func (p *Provider) Kind() string { return provider.KindVoyage }
@@ -179,7 +239,7 @@ func (p *Provider) EmbedDocuments(ctx context.Context, texts []string) ([][]floa
 	if len(texts) == 0 {
 		return nil, nil
 	}
-	batches := planBatches(texts)
+	batches := planBatches(texts, p.cfg.maxBatchSize(), p.cfg.maxTokensPerBatch())
 	if len(batches) == 1 {
 		return p.embed(ctx, batches[0], "document")
 	}
@@ -190,8 +250,8 @@ func (p *Provider) EmbedDocuments(ctx context.Context, texts []string) ([][]floa
 		"model", p.cfg.Model,
 		"total_inputs", len(texts),
 		"sub_batches", len(batches),
-		"limit_inputs", maxBatchSize,
-		"limit_tokens", maxTokensPerBatch,
+		"limit_inputs", p.cfg.maxBatchSize(),
+		"limit_tokens", p.cfg.maxTokensPerBatch(),
 	)
 	out := make([][]float32, 0, len(texts))
 	offset := 0
@@ -214,13 +274,17 @@ func (p *Provider) EmbedDocuments(ctx context.Context, texts []string) ([][]floa
 }
 
 // planBatches groups texts into sub-batches that each respect BOTH
-// the input-count cap (maxBatchSize) and the token-budget cap
-// (maxTokensPerBatch). A single text that on its own exceeds the
-// token budget is placed in its own batch — Voyage will then 400
-// with a clear "tokens after truncation" message and the caller
-// surfaces that to the operator (indicates the chunker upstream let
-// through an over-long chunk).
-func planBatches(texts []string) [][]string {
+// the input-count cap and the token-budget cap. A single text that
+// on its own exceeds the token budget is placed in its own batch —
+// Voyage will then 400 with a clear "tokens after truncation"
+// message and the caller surfaces that to the operator (indicates
+// the chunker upstream let through an over-long chunk).
+//
+// maxInputs and maxTokens come from the live Provider.cfg so the
+// operator can override them via the admin form when their tier or
+// chosen model allows a higher cap (e.g. voyage-3-large at 1000
+// inputs/POST instead of 128).
+func planBatches(texts []string, maxInputs, maxTokens int) [][]string {
 	if len(texts) == 0 {
 		return nil
 	}
@@ -231,7 +295,7 @@ func planBatches(texts []string) [][]string {
 		est := estimateTokens(t)
 		// Close the current batch when adding this text would exceed
 		// either limit (and the batch already has something to send).
-		if len(current) > 0 && (len(current) >= maxBatchSize || currentTokens+est > maxTokensPerBatch) {
+		if len(current) > 0 && (len(current) >= maxInputs || currentTokens+est > maxTokens) {
 			batches = append(batches, current)
 			current = nil
 			currentTokens = 0
@@ -299,6 +363,26 @@ func (p *Provider) embed(ctx context.Context, texts []string, inputType string)
 		return nil, fmt.Errorf("%w: %s", provider.ErrMissingAPIKey, p.cfg.APIKeyEnv)
 	}
 
+	// Wait on the operator-configured rate-limit token-buckets before
+	// hitting the wire. Both reservations honour ctx cancellation so
+	// a server shutdown / drain doesn't strand callers in Wait().
+	if p.reqLimiter != nil {
+		if err := p.reqLimiter.Wait(ctx); err != nil {
+			return nil, fmt.Errorf("voyage: request-rate wait: %w", err)
+		}
+	}
+	if p.tokenLimiter != nil {
+		est := sumEstimateTokens(texts)
+		if est > p.tokenLimiter.Burst() {
+			est = p.tokenLimiter.Burst()
+		}
+		if est > 0 {
+			if err := p.tokenLimiter.WaitN(ctx, est); err != nil {
+				return nil, fmt.Errorf("voyage: token-rate wait (~%d tokens): %w", est, err)
+			}
+		}
+	}
+
 	body, err := json.Marshal(embedRequest{
 		Input:           texts,
 		Model:           p.cfg.Model,
diff --git a/server/internal/embeddings/provider/voyage/voyage_test.go b/server/internal/embeddings/provider/voyage/voyage_test.go
index 1641008..124d3b3 100644
--- a/server/internal/embeddings/provider/voyage/voyage_test.go
+++ b/server/internal/embeddings/provider/voyage/voyage_test.go
@@ -9,6 +9,7 @@ import (
 	"strings"
 	"sync/atomic"
 	"testing"
+	"time"
 
 	"github.com/dvcdsys/code-index/server/internal/embeddings/provider"
 )
@@ -180,7 +181,7 @@ func TestPlanBatches_SplitsByTokenBudget(t *testing.T) {
 	small := "tiny"
 	texts := []string{big, small, small, small, small, small}
 
-	batches := planBatches(texts)
+	batches := planBatches(texts, defaultMaxBatchSize, defaultMaxTokensPerBatch)
 	if len(batches) < 2 {
 		t.Fatalf("expected at least 2 batches, got %d", len(batches))
 	}
@@ -188,9 +189,9 @@ func TestPlanBatches_SplitsByTokenBudget(t *testing.T) {
 	got := 0
 	for _, b := range batches {
 		got += len(b)
-		if est := sumEstimateTokens(b); est > maxTokensPerBatch && len(b) > 1 {
+		if est := sumEstimateTokens(b); est > defaultMaxTokensPerBatch && len(b) > 1 {
 			t.Errorf("batch with %d inputs exceeds token budget: ~%d tokens > %d",
-				len(b), est, maxTokensPerBatch)
+				len(b), est, defaultMaxTokensPerBatch)
 		}
 	}
 	if got != len(texts) {
@@ -206,12 +207,12 @@ func TestPlanBatches_RespectsCountCap(t *testing.T) {
 	for i := range texts {
 		texts[i] = "chunk"
 	}
-	batches := planBatches(texts)
+	batches := planBatches(texts, defaultMaxBatchSize, defaultMaxTokensPerBatch)
 	if len(batches) != 2 {
 		t.Fatalf("expected 2 batches (128 + 72), got %d", len(batches))
 	}
-	if len(batches[0]) != maxBatchSize {
-		t.Errorf("first batch has %d inputs, want %d", len(batches[0]), maxBatchSize)
+	if len(batches[0]) != defaultMaxBatchSize {
+		t.Errorf("first batch has %d inputs, want %d", len(batches[0]), defaultMaxBatchSize)
 	}
 	if len(batches[1]) != 72 {
 		t.Errorf("second batch has %d inputs, want 72", len(batches[1]))
@@ -261,6 +262,85 @@ func TestEmbedDocumentsSplitsByTokenBudget(t *testing.T) {
 	}
 }
 
+// TestRateLimitRPMThrottlesRequests verifies that when the operator
+// configures RateLimitRPM, the provider actually waits between
+// requests. 120 RPM = 1 request per 500ms (burst of 1), so 2
+// sequential requests on a fresh limiter take ~500ms total.
+func TestRateLimitRPMThrottlesRequests(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+		_, _ = w.Write([]byte(`{"data":[{"index":0,"embedding":[0.1]}],"usage":{"total_tokens":1}}`))
+	}))
+	t.Cleanup(srv.Close)
+
+	// 120 RPM → 2 req/s → second call must wait ~500ms (after the
+	// burst-1 bucket drained on the first call).
+	p := New(Config{
+		BaseURL: srv.URL, APIKeyEnv: "K", Model: "voyage-3", OutputDtype: DtypeFloat,
+		RateLimitRPM: 120,
+	}, fixedSecrets("K", "v"), nil)
+
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+
+	// First call: instant (burst available).
+	if _, err := p.EmbedQuery(ctx, "first"); err != nil {
+		t.Fatalf("first EmbedQuery: %v", err)
+	}
+	// Second call: must wait. We don't measure precisely because rate
+	// limiter has sub-millisecond timing variance; ≥ 300ms is enough
+	// to know the throttle fired (well above any test-runtime noise).
+	start := time.Now()
+	if _, err := p.EmbedQuery(ctx, "second"); err != nil {
+		t.Fatalf("second EmbedQuery: %v", err)
+	}
+	elapsed := time.Since(start)
+	if elapsed < 300*time.Millisecond {
+		t.Errorf("expected second call to wait for RPM limiter (>= 300ms); elapsed=%s", elapsed)
+	}
+}
+
+// TestRateLimitTPMThrottlesTokens verifies the token-budget bucket
+// also forces a wait when consumption exceeds the per-minute rate.
+// 600K TPM = 10K tokens/s, burst = maxTokensPerBatch (100K). Sending
+// two batches of 60K tokens each should make the second wait while
+// the bucket refills.
+func TestRateLimitTPMThrottlesTokens(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+		_, _ = w.Write([]byte(`{"data":[{"index":0,"embedding":[0.1]}],"usage":{"total_tokens":1}}`))
+	}))
+	t.Cleanup(srv.Close)
+
+	p := New(Config{
+		BaseURL: srv.URL, APIKeyEnv: "K", Model: "voyage-3", OutputDtype: DtypeFloat,
+		// burst = maxTokensPerBatch (100K), refill rate 600K/min = 10K/s.
+		RateLimitTPM: 600_000,
+	}, fixedSecrets("K", "v"), nil)
+
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+
+	// 180K bytes ≈ 60K est tokens — half the burst budget.
+	big := strings.Repeat("x", 180_000)
+
+	// First call drains 60K of the 100K-burst bucket: instant.
+	if _, err := p.EmbedQuery(ctx, big); err != nil {
+		t.Fatalf("first: %v", err)
+	}
+	// Second call wants another 60K but only 40K is left; needs to
+	// wait for 20K to refill at 10K/s = ~2s.
+	start := time.Now()
+	if _, err := p.EmbedQuery(ctx, big); err != nil {
+		t.Fatalf("second: %v", err)
+	}
+	elapsed := time.Since(start)
+	// Lower bound 1.5s — leaves margin for rate-limiter clock granularity.
+	if elapsed < 1500*time.Millisecond {
+		t.Errorf("expected second call to wait for TPM limiter (>= 1.5s); elapsed=%s", elapsed)
+	}
+}
+
 func TestUsageDecodesWithoutPromptTokens(t *testing.T) {
 	// Voyage's usage object lacks prompt_tokens — make sure decode doesn't error.
 	srv, _ := stubServer(t, http.StatusOK, `{
diff --git a/server/internal/httpapi/admin_embeddings.go b/server/internal/httpapi/admin_embeddings.go
index 02ca8af..29d5c7f 100644
--- a/server/internal/httpapi/admin_embeddings.go
+++ b/server/internal/httpapi/admin_embeddings.go
@@ -21,31 +21,15 @@ import (
 
 	"github.com/dvcdsys/code-index/server/internal/embeddings"
 	"github.com/dvcdsys/code-index/server/internal/embeddings/provider"
-	"github.com/dvcdsys/code-index/server/internal/embeddings/provider/voyage"
 	"github.com/dvcdsys/code-index/server/internal/embeddingscfg"
 	"github.com/go-chi/chi/v5"
 )
 
 // providerInfoPayload is the per-kind entry in GET /embedding-providers.
 type providerInfoPayload struct {
-	Kind       string              `json:"kind"`
-	Schema     json.RawMessage     `json:"schema"`
-	SecretEnvs []secretEnvPayload  `json:"secret_envs"`
-	// DocumentedLimits is an informational snapshot of the provider's
-	// published rate limits. Voyage doesn't expose limits via API so we
-	// ship a hardcoded table sourced from their public docs. Nil when
-	// the provider has no documented limits to show (ollama: local, no
-	// upstream limits; openai: limits vary per organization and aren't
-	// public per-model).
-	DocumentedLimits *documentedLimitsPayload `json:"documented_limits,omitempty"`
-}
-
-// documentedLimitsPayload wraps the per-model limits table plus the
-// source string. The dashboard renders Source as a footnote so the
-// operator can tell when our snapshot was last refreshed.
-type documentedLimitsPayload struct {
-	Source string             `json:"source"`
-	Models []voyage.ModelLimits `json:"models"`
+	Kind       string             `json:"kind"`
+	Schema     json.RawMessage    `json:"schema"`
+	SecretEnvs []secretEnvPayload `json:"secret_envs"`
 }
 
 // secretEnvPayload tells the dashboard which env-var names a provider
@@ -99,23 +83,11 @@ func (s *Server) ListEmbeddingProviders(w http.ResponseWriter, r *http.Request)
 			_, present := os.LookupEnv(name)
 			envPayload = append(envPayload, secretEnvPayload{Name: name, Set: present})
 		}
-		info := providerInfoPayload{
+		out = append(out, providerInfoPayload{
 			Kind:       kind,
 			Schema:     f.SchemaJSON(),
 			SecretEnvs: envPayload,
-		}
-		// Voyage is the only provider with a hardcoded limits table —
-		// Voyage publishes per-model RPM/TPM in their docs but has no
-		// API endpoint to fetch them. The table is sourced from
-		// voyage.KnownModelLimits; see that file's doc-comment for
-		// the snapshot date and a link.
-		if kind == provider.KindVoyage {
-			info.DocumentedLimits = &documentedLimitsPayload{
-				Source: voyage.KnownLimitsSource,
-				Models: voyage.LimitsList(),
-			}
-		}
-		out = append(out, info)
+		})
 	}
 	writeJSON(w, http.StatusOK, map[string]any{"providers": out})
 }

From c357488d881a4d30da5b4e140bc412528ea30d9a Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Wed, 27 May 2026 13:02:36 +0100
Subject: [PATCH 17/34] fix(voyage): adaptive bisect on "batch too large" 400
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two gaps in the prior token-aware planBatches were leaking real
400s to the indexer:

1. Our estimator (bytes/3) is conservative for prose but can
   under-count for dense code where 1 token ≈ 2 bytes. estimated
   100K could become actual 150K → POST → 400.

2. planBatches puts a single input larger than the cap in its own
   batch (no choice — splitting the text would corrupt the chunk).
   That POST would 400 unconditionally.

Both surfaced as the user-reported error:
  "The max allowed tokens per submitted batch is 120000.
   Your batch has 187609 tokens after truncation."

Fix: a new embedWithAdaptiveSplit wraps each embed() call. On a
400 whose message matches voyageBatchTooLargeRegex, it bisects the
batch in half and retries both halves — recursively, so a real
tokenizer miscalculation that's ~2x our estimate self-corrects in
a single extra round-trip. EmbedQuery and EmbedDocuments both go
through this path now.

Single-chunk edge: when the offending batch is already 1 input,
there is nothing to bisect. The provider returns a clear error
pointing the operator at the chunker upstream:

  "voyage: a single chunk produced 187609 tokens (cap 120000).
   Reduce the indexer's max chunk size or switch to a model with
   a higher per-request cap"

This is the right place to fail — silently dropping a chunk would
hurt search quality without telling anyone; truncating it would
change vector semantics; retrying forever would hang the indexer.

parseBatchTooLarge extracts (cap, actual) from Voyage's stable
error message so logs surface both numbers, letting an operator
diagnose persistent estimator drift (e.g. "real tokens always
1.5x our estimate → tighten MaxTokensPerRequest in the form").

Tests:
  - EmbedDocuments with 4 inputs and a stub that rejects any
    batch >1 input ends up running 4 singleton POSTs after
    bisection.
  - Single oversize input returns the clean error.
  - parseBatchTooLarge covers happy path + non-match cases (429,
    different 400 reasons).
---
 .../embeddings/provider/voyage/voyage.go      | 109 +++++++++++++++---
 .../embeddings/provider/voyage/voyage_test.go | 104 +++++++++++++++++
 2 files changed, 198 insertions(+), 15 deletions(-)

diff --git a/server/internal/embeddings/provider/voyage/voyage.go b/server/internal/embeddings/provider/voyage/voyage.go
index dbcb616..2ad64fa 100644
--- a/server/internal/embeddings/provider/voyage/voyage.go
+++ b/server/internal/embeddings/provider/voyage/voyage.go
@@ -27,6 +27,7 @@ import (
 	"io"
 	"log/slog"
 	"net/http"
+	"regexp"
 	"strconv"
 	"time"
 
@@ -35,6 +36,33 @@ import (
 	"github.com/dvcdsys/code-index/server/internal/embeddings/provider"
 )
 
+// voyageBatchTooLargeRegex matches Voyage's per-batch token-limit
+// 400 response so the caller can react adaptively. Voyage's message
+// is fairly stable:
+//   "The max allowed tokens per submitted batch is 120000.
+//    Your batch has 187609 tokens after truncation."
+// We capture both numbers; the actual count drives how aggressively
+// the caller bisects.
+var voyageBatchTooLargeRegex = regexp.MustCompile(
+	`max allowed tokens per submitted batch is (\d+).*Your batch has (\d+) tokens`,
+)
+
+// parseBatchTooLarge tries to extract (cap, actual) token counts
+// from a Voyage 400 message. Returns (0, 0, false) when the message
+// doesn't match — e.g. a different 400 like "model not found".
+func parseBatchTooLarge(errMsg string) (cap, actual int, ok bool) {
+	m := voyageBatchTooLargeRegex.FindStringSubmatch(errMsg)
+	if len(m) < 3 {
+		return 0, 0, false
+	}
+	c, err1 := strconv.Atoi(m[1])
+	a, err2 := strconv.Atoi(m[2])
+	if err1 != nil || err2 != nil {
+		return 0, 0, false
+	}
+	return c, a, true
+}
+
 // DefaultBaseURL is the public Voyage AI embeddings endpoint origin.
 const DefaultBaseURL = "https://api.voyageai.com"
 
@@ -228,7 +256,7 @@ func (p *Provider) Status() provider.Status {
 }
 
 func (p *Provider) EmbedQuery(ctx context.Context, query string) ([]float32, error) {
-	vecs, err := p.embed(ctx, []string{query}, "query")
+	vecs, err := p.embedWithAdaptiveSplit(ctx, []string{query}, "query")
 	if err != nil {
 		return nil, err
 	}
@@ -240,19 +268,15 @@ func (p *Provider) EmbedDocuments(ctx context.Context, texts []string) ([][]floa
 		return nil, nil
 	}
 	batches := planBatches(texts, p.cfg.maxBatchSize(), p.cfg.maxTokensPerBatch())
-	if len(batches) == 1 {
-		return p.embed(ctx, batches[0], "document")
-	}
-	// Oversize input — split into sequential sub-batches. The Service
-	// queue holds a single slot for the whole call, so concurrency
-	// semantics are preserved (no extra slots consumed).
-	p.logger.Info("voyage: splitting batch",
-		"model", p.cfg.Model,
-		"total_inputs", len(texts),
-		"sub_batches", len(batches),
-		"limit_inputs", p.cfg.maxBatchSize(),
-		"limit_tokens", p.cfg.maxTokensPerBatch(),
-	)
+	if len(batches) > 1 {
+		p.logger.Info("voyage: splitting batch",
+			"model", p.cfg.Model,
+			"total_inputs", len(texts),
+			"sub_batches", len(batches),
+			"limit_inputs", p.cfg.maxBatchSize(),
+			"limit_tokens", p.cfg.maxTokensPerBatch(),
+		)
+	}
 	out := make([][]float32, 0, len(texts))
 	offset := 0
 	for i, batch := range batches {
@@ -262,7 +286,7 @@ func (p *Provider) EmbedDocuments(ctx context.Context, texts []string) ([][]floa
 			"inputs", len(batch),
 			"est_tokens", sumEstimateTokens(batch),
 		)
-		part, err := p.embed(ctx, batch, "document")
+		part, err := p.embedWithAdaptiveSplit(ctx, batch, "document")
 		if err != nil {
 			return nil, fmt.Errorf("voyage: sub-batch %d/%d (offset=%d, inputs=%d, ~%d tokens): %w",
 				i+1, len(batches), offset, len(batch), sumEstimateTokens(batch), err)
@@ -273,6 +297,61 @@ func (p *Provider) EmbedDocuments(ctx context.Context, texts []string) ([][]floa
 	return out, nil
 }
 
+// embedWithAdaptiveSplit wraps embed() with a defensive bisect-on-400
+// loop. Our token estimator (bytes/3) is conservative for prose but
+// not always accurate for dense code; Voyage's real tokenizer can
+// charge more than we predict. On a "batch too large" 400 we split
+// the batch in half and retry both halves recursively. When the
+// batch is already a single input and STILL too large there's
+// nothing to bisect — we return a clear error pointing the operator
+// at the chunker upstream (each chunk should fit voyage's per-input
+// limits; if it doesn't, the chunker let through an over-long unit).
+func (p *Provider) embedWithAdaptiveSplit(ctx context.Context, texts []string, inputType string) ([][]float32, error) {
+	vecs, err := p.embed(ctx, texts, inputType)
+	if err == nil {
+		return vecs, nil
+	}
+	cap, actual, ok := parseBatchTooLarge(err.Error())
+	if !ok {
+		// Different error class (auth, network, rate-limit, …) —
+		// surface as-is, retry would not help.
+		return nil, err
+	}
+	if len(texts) <= 1 {
+		// A single chunk on its own exceeds Voyage's hard cap.
+		// We CAN'T split the text — that would corrupt the
+		// semantic unit the indexer chose. Tell the operator
+		// where to fix it instead.
+		return nil, fmt.Errorf(
+			"voyage: a single chunk produced %d tokens (cap %d). Reduce the indexer's max chunk size or switch to a model with a higher per-request cap: %w",
+			actual, cap, err,
+		)
+	}
+	// Bisect — the caller's batch had multiple inputs whose real
+	// token sum exceeded the cap. Logging deliberately captures
+	// the cap+actual so the operator can spot a pattern (e.g.
+	// estimator consistently off by 1.5x) without grepping the
+	// raw error.
+	mid := len(texts) / 2
+	p.logger.Warn("voyage: batch too large — bisecting and retrying",
+		"inputs", len(texts),
+		"est_tokens", sumEstimateTokens(texts),
+		"voyage_actual_tokens", actual,
+		"voyage_cap", cap,
+		"left_half", mid,
+		"right_half", len(texts)-mid,
+	)
+	left, err := p.embedWithAdaptiveSplit(ctx, texts[:mid], inputType)
+	if err != nil {
+		return nil, err
+	}
+	right, err := p.embedWithAdaptiveSplit(ctx, texts[mid:], inputType)
+	if err != nil {
+		return nil, err
+	}
+	return append(left, right...), nil
+}
+
 // planBatches groups texts into sub-batches that each respect BOTH
 // the input-count cap and the token-budget cap. A single text that
 // on its own exceeds the token budget is placed in its own batch —
diff --git a/server/internal/embeddings/provider/voyage/voyage_test.go b/server/internal/embeddings/provider/voyage/voyage_test.go
index 124d3b3..cb4011f 100644
--- a/server/internal/embeddings/provider/voyage/voyage_test.go
+++ b/server/internal/embeddings/provider/voyage/voyage_test.go
@@ -262,6 +262,110 @@ func TestEmbedDocumentsSplitsByTokenBudget(t *testing.T) {
 	}
 }
 
+// TestEmbedDocuments_BisectsOnBatchTooLarge covers the adaptive
+// recovery path: when Voyage returns 400 "max allowed tokens per
+// submitted batch is 120000. Your batch has N tokens" the provider
+// splits the batch in half and retries. The stub here rejects any
+// POST with more than 1 input on the first hit; the provider must
+// bisect down to single-input POSTs and succeed.
+func TestEmbedDocuments_BisectsOnBatchTooLarge(t *testing.T) {
+	var posts int32
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		atomic.AddInt32(&posts, 1)
+		raw, _ := io.ReadAll(r.Body)
+		var req embedRequest
+		_ = json.Unmarshal(raw, &req)
+		// Reject any batch with > 1 input — forces the provider to
+		// bisect all the way to singletons.
+		if len(req.Input) > 1 {
+			w.WriteHeader(http.StatusBadRequest)
+			_, _ = w.Write([]byte(`{"detail":"Request failed. The max allowed tokens per submitted batch is 120000. Your batch has 200000 tokens after truncation."}`))
+			return
+		}
+		w.WriteHeader(http.StatusOK)
+		_, _ = w.Write([]byte(`{"data":[{"index":0,"embedding":[0.1]}],"usage":{"total_tokens":1}}`))
+	}))
+	t.Cleanup(srv.Close)
+
+	p := New(Config{
+		BaseURL: srv.URL, APIKeyEnv: "K", Model: "voyage-code-3", OutputDtype: DtypeFloat,
+		// Pretend our config-time cap is high enough to NOT split via
+		// planBatches; we want to exercise the runtime 400 bisect path.
+		MaxInputsPerRequest: 1000, MaxTokensPerRequest: 10_000_000,
+	}, fixedSecrets("K", "v"), nil)
+
+	vecs, err := p.EmbedDocuments(context.Background(), []string{"a", "b", "c", "d"})
+	if err != nil {
+		t.Fatalf("EmbedDocuments: %v", err)
+	}
+	if len(vecs) != 4 {
+		t.Fatalf("got %d vectors, want 4", len(vecs))
+	}
+	// One initial rejected POST (4 inputs), two more halves rejected
+	// (2 + 2), four singleton POSTs that succeed. Total = 7.
+	if got := atomic.LoadInt32(&posts); got < 7 {
+		t.Errorf("expected at least 7 POSTs (rejected + bisected + singletons), got %d", got)
+	}
+}
+
+// TestEmbedDocuments_SingleInputTooLargeFailsClean covers the
+// "no split possible" case: when a SINGLE chunk produces more
+// tokens than the upstream cap, we cannot bisect further (would
+// corrupt the chunk). The provider returns a clear error that
+// points the operator at the chunker rather than retrying forever.
+func TestEmbedDocuments_SingleInputTooLargeFailsClean(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusBadRequest)
+		_, _ = w.Write([]byte(`{"detail":"Request failed. The max allowed tokens per submitted batch is 120000. Your batch has 187609 tokens after truncation."}`))
+	}))
+	t.Cleanup(srv.Close)
+
+	p := New(Config{
+		BaseURL: srv.URL, APIKeyEnv: "K", Model: "voyage-code-3", OutputDtype: DtypeFloat,
+		MaxInputsPerRequest: 1000, MaxTokensPerRequest: 10_000_000,
+	}, fixedSecrets("K", "v"), nil)
+
+	_, err := p.EmbedDocuments(context.Background(), []string{"one big chunk"})
+	if err == nil {
+		t.Fatal("expected error, got nil")
+	}
+	msg := err.Error()
+	if !strings.Contains(msg, "single chunk") || !strings.Contains(msg, "187609 tokens") {
+		t.Errorf("error should mention single-chunk + actual token count: %q", msg)
+	}
+	if !strings.Contains(msg, "Reduce the indexer's max chunk size") {
+		t.Errorf("error should hint at upstream chunker fix: %q", msg)
+	}
+}
+
+func TestParseBatchTooLarge(t *testing.T) {
+	cases := []struct {
+		msg                string
+		wantCap, wantAct   int
+		wantOK             bool
+	}{
+		{
+			"voyage: status 400: {\"detail\":\"Request failed. The max allowed tokens per submitted batch is 120000. Your batch has 187609 tokens after truncation.\"}",
+			120000, 187609, true,
+		},
+		{
+			"voyage: status 429: rate limited",
+			0, 0, false,
+		},
+		{
+			"voyage: status 400: model not found",
+			0, 0, false,
+		},
+	}
+	for _, tc := range cases {
+		gotCap, gotAct, ok := parseBatchTooLarge(tc.msg)
+		if ok != tc.wantOK || gotCap != tc.wantCap || gotAct != tc.wantAct {
+			t.Errorf("parseBatchTooLarge(%q) = (%d, %d, %v), want (%d, %d, %v)",
+				tc.msg, gotCap, gotAct, ok, tc.wantCap, tc.wantAct, tc.wantOK)
+		}
+	}
+}
+
 // TestRateLimitRPMThrottlesRequests verifies that when the operator
 // configures RateLimitRPM, the provider actually waits between
 // requests. 120 RPM = 1 request per 500ms (burst of 1), so 2

From e25634cdf059c739c7cd07af2a8246e6a8a3c0c5 Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Wed, 27 May 2026 13:04:07 +0100
Subject: [PATCH 18/34] docs(voyage form): explain the per-input truncation
 toggle
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The "Truncate over-length inputs server-side" switch maps to
Voyage's truncation API parameter. It only affects what happens
when a SINGLE chunk exceeds the model's per-input context window
(e.g. 32K tokens for voyage-code-3) — totally separate from the
120K-tokens-per-batch cap our adaptive bisect already handles.

Before: bare label, no hint, easy to confuse with the batch cap.
Now: a paragraph under the switch explains the trade-off (silent
truncation vs explicit 400) and explicitly notes the separation
from the batch limit.
---
 .../sections/providers/VoyageProviderForm.tsx | 32 +++++++++++++------
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/server/dashboard/src/modules/server/sections/providers/VoyageProviderForm.tsx b/server/dashboard/src/modules/server/sections/providers/VoyageProviderForm.tsx
index c6f3b6e..38ab3c4 100644
--- a/server/dashboard/src/modules/server/sections/providers/VoyageProviderForm.tsx
+++ b/server/dashboard/src/modules/server/sections/providers/VoyageProviderForm.tsx
@@ -208,15 +208,29 @@ export function VoyageProviderForm({ value, onChange, secretEnvs }: Props) {
         </p>
       </fieldset>
 
-      <div className="flex items-center gap-3">
-        <Switch
-          id="voyage-truncation"
-          checked={value.truncation}
-          onCheckedChange={(c) => onChange({ ...value, truncation: c === true })}
-        />
-        <Label htmlFor="voyage-truncation" className="cursor-pointer">
-          Truncate over-length inputs server-side
-        </Label>
+      <div className="space-y-1.5">
+        <div className="flex items-center gap-3">
+          <Switch
+            id="voyage-truncation"
+            checked={value.truncation}
+            onCheckedChange={(c) => onChange({ ...value, truncation: c === true })}
+          />
+          <Label htmlFor="voyage-truncation" className="cursor-pointer">
+            Truncate over-length inputs server-side
+          </Label>
+        </div>
+        <p className="text-xs text-muted-foreground">
+          Controls Voyage's per-input behaviour when a single chunk
+          exceeds the model's context window (e.g. 32K tokens for
+          voyage-code-3). ON (default): Voyage silently truncates the
+          chunk and embeds the truncated version — you always get a
+          vector, but content past the cap is lost from the
+          embedding. OFF: Voyage returns 400 on over-long inputs so
+          the operator can shorten the indexer's chunk size or pick
+          a model with a larger context. Unrelated to the
+          120K-tokens-per-batch cap (which our adaptive bisect
+          handles separately).
+        </p>
       </div>
 
       <div className="space-y-1.5">

From c5b5a90ccda632ead15719633d563c654d3107b5 Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Wed, 27 May 2026 13:09:57 +0100
Subject: [PATCH 19/34] feat(voyage): byte-window sliding split for oversize
 inputs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Voyage's truncation:true silently drops content past the model's
per-input context window (32K tokens for voyage-code-3). For long
chunks that means the tail never makes it into the embedding —
search quality suffers without any signal at index time.

The ollama provider already has a sliding-window pipeline for the
same problem (TokenizeAndEmbed: tokenize → split at CLS+SEP
boundaries → embed all windows → average vectors). Mirror that for
voyage, byte-aligned instead of token-aligned since Voyage has no
public /tokenize endpoint.

Added:
  - Config.MaxInputBytes (default 30_000) — caps per-input byte
    length. Sized so worst-case 1 byte/token dense code still
    fits voyage-code-3's 32K window with margin.
  - splitOversizeInput(text, maxBytes) — slices at rune-aligned
    boundaries so we never cut a UTF-8 multi-byte sequence in
    half.
  - embedAndAverage(ctx, texts, inputType) — three-phase pipeline:
      Phase 1: expand each oversize input into N windows; remember
               spans (which window indices map to which original).
      Phase 2: planBatches + adaptive-bisect POST chain on the
               expanded slice (existing logic, unchanged).
      Phase 3: average sub-window vectors back to one per original
               input. Single-window inputs pass through unchanged
               (fast path, no allocations).
  - EmbedQuery and EmbedDocuments both go through embedAndAverage
    now.

Tests:
  - splitOversizeInput: under-cap singleton, ASCII over-cap 3-way
    split with byte-perfect rejoin, UTF-8 Cyrillic split that
    keeps every window valid UTF-8.
  - EmbedDocuments_AveragesOversizeInputWindows: stub returns
    [1.0, 2.0, 3.0] for the 3 windows of a 250-byte input under
    MaxInputBytes=100; provider must return averaged [2.0].
  - Existing TPM throttling test now sets MaxInputBytes=1M to
    keep its single 180K-byte input from being auto-split (that
    test exercises the rate-limit bucket, not the window-split).
---
 .../embeddings/provider/voyage/voyage.go      | 173 +++++++++++++++++-
 .../embeddings/provider/voyage/voyage_test.go | 101 ++++++++++
 2 files changed, 267 insertions(+), 7 deletions(-)

diff --git a/server/internal/embeddings/provider/voyage/voyage.go b/server/internal/embeddings/provider/voyage/voyage.go
index 2ad64fa..f759e47 100644
--- a/server/internal/embeddings/provider/voyage/voyage.go
+++ b/server/internal/embeddings/provider/voyage/voyage.go
@@ -30,6 +30,7 @@ import (
 	"regexp"
 	"strconv"
 	"time"
+	"unicode/utf8"
 
 	"golang.org/x/time/rate"
 
@@ -86,6 +87,20 @@ const defaultMaxBatchSize = 128
 // byte→token estimation error.
 const defaultMaxTokensPerBatch = 100_000
 
+// defaultMaxInputBytes caps the byte-length of any SINGLE input
+// (one chunk) before it goes to Voyage. When a chunk exceeds this
+// the provider splits it into non-overlapping byte windows and
+// averages the resulting per-window vectors — same pattern as the
+// ollama provider's TokenizeAndEmbed, but byte-based here because
+// Voyage doesn't expose a tokenize endpoint.
+//
+// Sized for voyage-code-3's 32K-token per-input context window
+// with headroom: worst-case dense code can hit ~1 byte / token,
+// so 30K bytes ≈ 30K tokens, leaving the model 2K tokens of
+// margin. Prose typically has ~4 bytes / token, so a 30K-byte
+// English passage is ~7.5K tokens — well under the cap.
+const defaultMaxInputBytes = 30_000
+
 // bytesPerToken is a conservative chars-per-token heuristic used to
 // estimate the request's token cost without a real tokenizer. Voyage
 // does not publish their tokenizer for client-side use; empirically
@@ -127,6 +142,16 @@ type Config struct {
 	// use the default (100K with 20K headroom from Voyage's 120K
 	// hard cap).
 	MaxTokensPerRequest int `json:"max_tokens_per_request,omitempty"`
+
+	// MaxInputBytes caps the byte-length of any SINGLE input before
+	// the provider splits it into byte-aligned windows + averages
+	// the resulting vectors. 0 → use defaultMaxInputBytes (sized
+	// for voyage-code-3's 32K-token per-input context window with
+	// margin). The operator only needs to override when running a
+	// model with a substantially larger context (e.g. future
+	// voyage-* with 64K context) or a different bytes-per-token
+	// regime (heavily non-ASCII content).
+	MaxInputBytes int `json:"max_input_bytes,omitempty"`
 }
 
 // maxBatchSize returns the effective per-POST input cap: explicit
@@ -146,6 +171,62 @@ func (c *Config) maxTokensPerBatch() int {
 	return defaultMaxTokensPerBatch
 }
 
+// maxInputBytes returns the effective per-input byte cap (defines
+// when splitOversizeInput kicks in).
+func (c *Config) maxInputBytes() int {
+	if c.MaxInputBytes > 0 {
+		return c.MaxInputBytes
+	}
+	return defaultMaxInputBytes
+}
+
+// splitOversizeInput slices text into non-overlapping byte windows
+// no larger than maxBytes each, aligned to UTF-8 rune boundaries so
+// we never cut a multi-byte character mid-sequence. Returns the
+// original text in a single-element slice when it's already small
+// enough — common case is zero allocations beyond the slice header.
+//
+// Why byte-based rather than token-aligned: Voyage doesn't expose a
+// /tokenize endpoint we can call client-side. The ollama provider
+// gets to split at exact token boundaries (CLS + content_window +
+// SEP) because llama-server tokenises for us. Voyage's real
+// tokenizer is opaque, so we approximate with bytes. The adaptive
+// bisect on 400 (see embedWithAdaptiveSplit) is the safety net
+// when this approximation under-counts.
+func splitOversizeInput(text string, maxBytes int) []string {
+	if maxBytes <= 0 || len(text) <= maxBytes {
+		return []string{text}
+	}
+	var windows []string
+	start := 0
+	for start < len(text) {
+		end := start + maxBytes
+		if end >= len(text) {
+			windows = append(windows, text[start:])
+			break
+		}
+		// Walk backward to the nearest rune-start byte so we never
+		// split a multi-byte UTF-8 character in the middle.
+		// utf8.RuneStart returns true for ASCII (single-byte) and
+		// for the leading byte of a multi-byte sequence.
+		for end > start && !utf8.RuneStart(text[end]) {
+			end--
+		}
+		if end == start {
+			// Degenerate: maxBytes < the length of the next rune.
+			// Cut at the original boundary to make progress; the
+			// resulting partial codepoint is still bytes Voyage
+			// can tokenise (just less meaningfully). In practice
+			// maxBytes is in the tens of thousands so this branch
+			// is unreachable on real input.
+			end = start + maxBytes
+		}
+		windows = append(windows, text[start:end])
+		start = end
+	}
+	return windows
+}
+
 // Provider is the Voyage HTTP client.
 type Provider struct {
 	cfg     Config
@@ -256,7 +337,7 @@ func (p *Provider) Status() provider.Status {
 }
 
 func (p *Provider) EmbedQuery(ctx context.Context, query string) ([]float32, error) {
-	vecs, err := p.embedWithAdaptiveSplit(ctx, []string{query}, "query")
+	vecs, err := p.embedAndAverage(ctx, []string{query}, "query")
 	if err != nil {
 		return nil, err
 	}
@@ -267,17 +348,66 @@ func (p *Provider) EmbedDocuments(ctx context.Context, texts []string) ([][]floa
 	if len(texts) == 0 {
 		return nil, nil
 	}
-	batches := planBatches(texts, p.cfg.maxBatchSize(), p.cfg.maxTokensPerBatch())
+	return p.embedAndAverage(ctx, texts, "document")
+}
+
+// embedAndAverage is the per-input sliding-window pipeline (mirrors
+// ollama.Provider.TokenizeAndEmbed, but byte-based since Voyage has
+// no tokenize endpoint we can hit):
+//
+//  1. Walk every input. If its byte-length exceeds maxInputBytes,
+//     split at rune-aligned boundaries into N non-overlapping
+//     windows. Remember which original each window belongs to via
+//     a (start, length) span table.
+//  2. Send the expanded slice through planBatches → POST chains
+//     with the same adaptive-bisect-on-400 behaviour as before.
+//  3. Reassemble: for inputs that produced multiple windows,
+//     average the per-window vectors back to a single vector.
+//     For inputs that fit unchanged, the vector passes through.
+//
+// The averaging step is what keeps tail content of an over-long
+// chunk from being dropped (which is what truncation:true would do
+// upstream). The trade-off is N times more POST/token cost per
+// such chunk, but oversize chunks are rare on well-chunked
+// indexes — the indexer should already be cutting at function /
+// class boundaries.
+func (p *Provider) embedAndAverage(ctx context.Context, texts []string, inputType string) ([][]float32, error) {
+	maxIn := p.cfg.maxInputBytes()
+
+	// Phase 1: expand oversize inputs into windows; track spans.
+	type span struct{ start, length int }
+	spans := make([]span, len(texts))
+	var expanded []string
+	totalSplits := 0
+	for i, t := range texts {
+		windows := splitOversizeInput(t, maxIn)
+		spans[i] = span{start: len(expanded), length: len(windows)}
+		expanded = append(expanded, windows...)
+		if len(windows) > 1 {
+			totalSplits += len(windows)
+		}
+	}
+	if totalSplits > 0 {
+		p.logger.Info("voyage: oversize inputs split into byte-windows",
+			"original_inputs", len(texts),
+			"total_windows", len(expanded),
+			"split_windows", totalSplits,
+			"max_input_bytes", maxIn,
+		)
+	}
+
+	// Phase 2: batch + POST as before, on the expanded slice.
+	batches := planBatches(expanded, p.cfg.maxBatchSize(), p.cfg.maxTokensPerBatch())
 	if len(batches) > 1 {
 		p.logger.Info("voyage: splitting batch",
 			"model", p.cfg.Model,
-			"total_inputs", len(texts),
+			"total_inputs", len(expanded),
 			"sub_batches", len(batches),
 			"limit_inputs", p.cfg.maxBatchSize(),
 			"limit_tokens", p.cfg.maxTokensPerBatch(),
 		)
 	}
-	out := make([][]float32, 0, len(texts))
+	allVecs := make([][]float32, 0, len(expanded))
 	offset := 0
 	for i, batch := range batches {
 		p.logger.Debug("voyage: sub-batch POST",
@@ -286,15 +416,44 @@ func (p *Provider) EmbedDocuments(ctx context.Context, texts []string) ([][]floa
 			"inputs", len(batch),
 			"est_tokens", sumEstimateTokens(batch),
 		)
-		part, err := p.embedWithAdaptiveSplit(ctx, batch, "document")
+		part, err := p.embedWithAdaptiveSplit(ctx, batch, inputType)
 		if err != nil {
 			return nil, fmt.Errorf("voyage: sub-batch %d/%d (offset=%d, inputs=%d, ~%d tokens): %w",
 				i+1, len(batches), offset, len(batch), sumEstimateTokens(batch), err)
 		}
-		out = append(out, part...)
+		allVecs = append(allVecs, part...)
 		offset += len(batch)
 	}
-	return out, nil
+
+	// Phase 3: reassemble — average sub-window vectors back to one
+	// vector per original input. Vectors that came through alone
+	// (one-window inputs) pass through unchanged.
+	if totalSplits == 0 {
+		// Fast path: no oversize inputs, allVecs already maps 1:1
+		// to texts.
+		return allVecs, nil
+	}
+	result := make([][]float32, len(texts))
+	for i, sp := range spans {
+		if sp.length == 1 {
+			result[i] = allVecs[sp.start]
+			continue
+		}
+		dim := len(allVecs[sp.start])
+		avg := make([]float32, dim)
+		for k := 0; k < sp.length; k++ {
+			v := allVecs[sp.start+k]
+			for d := range avg {
+				avg[d] += v[d]
+			}
+		}
+		n := float32(sp.length)
+		for d := range avg {
+			avg[d] /= n
+		}
+		result[i] = avg
+	}
+	return result, nil
 }
 
 // embedWithAdaptiveSplit wraps embed() with a defensive bisect-on-400
diff --git a/server/internal/embeddings/provider/voyage/voyage_test.go b/server/internal/embeddings/provider/voyage/voyage_test.go
index cb4011f..9646ec4 100644
--- a/server/internal/embeddings/provider/voyage/voyage_test.go
+++ b/server/internal/embeddings/provider/voyage/voyage_test.go
@@ -10,6 +10,7 @@ import (
 	"sync/atomic"
 	"testing"
 	"time"
+	"unicode/utf8"
 
 	"github.com/dvcdsys/code-index/server/internal/embeddings/provider"
 )
@@ -262,6 +263,102 @@ func TestEmbedDocumentsSplitsByTokenBudget(t *testing.T) {
 	}
 }
 
+// TestSplitOversizeInput exercises the byte-window splitter
+// directly. Covers: small inputs pass through as a singleton
+// slice; oversize ASCII split into N windows; UTF-8 boundary
+// respected (no codepoint cut in half).
+func TestSplitOversizeInput(t *testing.T) {
+	t.Run("under cap returns singleton", func(t *testing.T) {
+		got := splitOversizeInput("hello world", 100)
+		if len(got) != 1 || got[0] != "hello world" {
+			t.Errorf("got %v, want [hello world]", got)
+		}
+	})
+
+	t.Run("ASCII over cap splits to N windows", func(t *testing.T) {
+		text := strings.Repeat("x", 250)
+		got := splitOversizeInput(text, 100)
+		if len(got) != 3 {
+			t.Fatalf("got %d windows, want 3", len(got))
+		}
+		joined := strings.Join(got, "")
+		if joined != text {
+			t.Errorf("rejoin mismatch: got len=%d, want len=%d", len(joined), len(text))
+		}
+		for i, w := range got {
+			if len(w) > 100 {
+				t.Errorf("window %d has %d bytes, want <= 100", i, len(w))
+			}
+		}
+	})
+
+	t.Run("UTF-8 multi-byte boundary respected", func(t *testing.T) {
+		// 100 Cyrillic letters (2 bytes each in UTF-8) = 200 bytes.
+		// Split cap = 50 bytes. If we split naively at byte 50 we'd
+		// cut a 2-byte rune in half — utf8.ValidString would fail.
+		text := strings.Repeat("щ", 100)
+		got := splitOversizeInput(text, 50)
+		for i, w := range got {
+			if !utf8.ValidString(w) {
+				t.Errorf("window %d is not valid UTF-8: bytes=%v", i, []byte(w))
+			}
+		}
+		// Rejoin must reproduce the original byte-for-byte.
+		if joined := strings.Join(got, ""); joined != text {
+			t.Errorf("rejoin mismatch: lost %d bytes", len(text)-len(joined))
+		}
+	})
+}
+
+// TestEmbedDocuments_AveragesOversizeInputWindows covers the
+// end-to-end sliding-window behaviour: a 250-byte input with
+// MaxInputBytes=100 must be POSTed as 3 separate windows; the
+// returned vector must be the element-wise mean of the 3 window
+// vectors. Mirrors the ollama TokenizeAndEmbed averaging logic.
+func TestEmbedDocuments_AveragesOversizeInputWindows(t *testing.T) {
+	var postCount int32
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		atomic.AddInt32(&postCount, 1)
+		raw, _ := io.ReadAll(r.Body)
+		var req embedRequest
+		_ = json.Unmarshal(raw, &req)
+		// Return a different constant vector per window position so
+		// the average is easy to assert.
+		items := make([]map[string]any, len(req.Input))
+		for i := range req.Input {
+			// Each window's vector = [i+1.0] so 3 windows give
+			// [1.0, 2.0, 3.0] → average 2.0.
+			items[i] = map[string]any{"index": i, "embedding": []float32{float32(i + 1)}}
+		}
+		body, _ := json.Marshal(map[string]any{
+			"data":  items,
+			"model": req.Model,
+			"usage": map[string]int{"total_tokens": 1},
+		})
+		w.WriteHeader(http.StatusOK)
+		_, _ = w.Write(body)
+	}))
+	t.Cleanup(srv.Close)
+
+	p := New(Config{
+		BaseURL: srv.URL, APIKeyEnv: "K", Model: "voyage-code-3", OutputDtype: DtypeFloat,
+		MaxInputBytes: 100,
+	}, fixedSecrets("K", "v"), nil)
+
+	text := strings.Repeat("x", 250) // → 3 windows of 100/100/50 bytes
+	vecs, err := p.EmbedDocuments(context.Background(), []string{text})
+	if err != nil {
+		t.Fatalf("EmbedDocuments: %v", err)
+	}
+	if len(vecs) != 1 {
+		t.Fatalf("got %d vectors, want 1 (averaged)", len(vecs))
+	}
+	// Stub returned [1.0, 2.0, 3.0] for the three windows → mean is 2.0.
+	if got := vecs[0][0]; got < 1.99 || got > 2.01 {
+		t.Errorf("averaged vector[0] = %v, want ~2.0", got)
+	}
+}
+
 // TestEmbedDocuments_BisectsOnBatchTooLarge covers the adaptive
 // recovery path: when Voyage returns 400 "max allowed tokens per
 // submitted batch is 120000. Your batch has N tokens" the provider
@@ -420,6 +517,10 @@ func TestRateLimitTPMThrottlesTokens(t *testing.T) {
 		BaseURL: srv.URL, APIKeyEnv: "K", Model: "voyage-3", OutputDtype: DtypeFloat,
 		// burst = maxTokensPerBatch (100K), refill rate 600K/min = 10K/s.
 		RateLimitTPM: 600_000,
+		// Disable the per-input byte-window split for this test —
+		// we want to send the full 180K-byte input in one POST so
+		// the token-budget bucket actually sees ~60K tokens at once.
+		MaxInputBytes: 1_000_000,
 	}, fixedSecrets("K", "v"), nil)
 
 	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)

From 33fd2d12d9a9202ebf5348a7ed68d597aacad80d Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Wed, 27 May 2026 13:18:56 +0100
Subject: [PATCH 20/34] fix(voyage): align token estimator with Voyage's own /5
 heuristic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Per Voyage's own docs (https://docs.voyageai.com/docs/tokenization):

  "one word roughly corresponds to 1.2 - 1.5 tokens on average;
   for rough estimates divide character count by 5"

Our previous bytesPerToken=3 over-counted for everything except the
densest code, causing planBatches to split into more sub-batches
than necessary. Switching to bytesPerToken=5 matches Voyage's
published rule of thumb and reduces spurious splits on ordinary
content.

The trade-off: dense code can run hotter (1 token ≈ 2-3 bytes
empirically), so our estimate may UNDER-count for very tight code
and let planBatches pack inputs tighter than the real cap permits.
That's fine — embedWithAdaptiveSplit detects the resulting 400
"max tokens per batch" response and bisects the batch in half. We
trade a few extra round-trips on the worst-case dense files for
materially fewer splits on the common case.

Better long-term path is to load Voyage's real HF tokenizer
(publicly available at huggingface.co/voyageai/voyage-code-3 as a
standard tokenizer.json), but that requires a CGO Rust dep
(daulet/tokenizers) — deferred to a follow-up that justifies the
Docker / distroless work.

Tests:
  - TestPlanBatches_SplitsByTokenBudget: bumped big text from
    500K → 600K bytes so it exceeds the 100K-token cap at the
    new divisor.
  - TestEmbedDocuments_SplitsByTokenBudget: bumped from 300K → 500K
    and disabled per-input window split (MaxInputBytes=1M) to keep
    the test focused on the batch-token cap.
  - TestRateLimitTPMThrottlesTokens: bumped 180K → 300K so the
    per-call cost still hits ~60K tokens at the new divisor.
---
 .../embeddings/provider/voyage/voyage.go      | 27 ++++++++++++-------
 .../embeddings/provider/voyage/voyage_test.go | 18 ++++++++-----
 2 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/server/internal/embeddings/provider/voyage/voyage.go b/server/internal/embeddings/provider/voyage/voyage.go
index f759e47..d58694a 100644
--- a/server/internal/embeddings/provider/voyage/voyage.go
+++ b/server/internal/embeddings/provider/voyage/voyage.go
@@ -101,17 +101,26 @@ const defaultMaxTokensPerBatch = 100_000
 // English passage is ~7.5K tokens — well under the cap.
 const defaultMaxInputBytes = 30_000
 
-// bytesPerToken is a conservative chars-per-token heuristic used to
-// estimate the request's token cost without a real tokenizer. Voyage
-// does not publish their tokenizer for client-side use; empirically
-// code averages ~3–4 chars/token and English prose ~4. We use 3 to
-// over-count the cost (safe upper bound — we'll split sooner than the
-// upstream limit, never later).
+// bytesPerToken is the chars-per-token heuristic used to estimate
+// token cost without a real tokenizer. Voyage's own docs at
+// https://docs.voyageai.com/docs/tokenization recommend "dividing
+// character count by 5" as the rough rule of thumb (they publish
+// real HF tokenizers on huggingface.co/voyageai/voyage-* but
+// pulling one in here would require a CGO Rust dep — deferred).
+//
+// 5 is calibrated for prose; dense source code can run hotter
+// (1 token ≈ 2–3 bytes), which would make this estimator UNDER-
+// count for code. That's OK: planBatches may then pack inputs
+// tighter than the real cap permits, but embedWithAdaptiveSplit
+// detects the resulting 400 ("max tokens per batch") and bisects.
+// We trade a few extra round-trips on dense files for far fewer
+// spurious splits on ordinary content.
 //
 // len() in Go returns BYTE length, not rune count, so multi-byte
-// UTF-8 input (Cyrillic comments, CJK) gets further over-counted —
-// also safe.
-const bytesPerToken = 3
+// UTF-8 input (Cyrillic comments, CJK) gets over-counted relative
+// to Voyage's character-based heuristic — safe direction (more
+// splits, never fewer).
+const bytesPerToken = 5
 
 // Config is the persisted shape of the voyage provider's config blob.
 type Config struct {
diff --git a/server/internal/embeddings/provider/voyage/voyage_test.go b/server/internal/embeddings/provider/voyage/voyage_test.go
index 9646ec4..2a69d35 100644
--- a/server/internal/embeddings/provider/voyage/voyage_test.go
+++ b/server/internal/embeddings/provider/voyage/voyage_test.go
@@ -174,11 +174,11 @@ func TestEmbedDocumentsSplitsOversizeBatch(t *testing.T) {
 // TestPlanBatches_SplitsByTokenBudget covers the second cap on per-
 // request batch size: even when input count is under maxBatchSize,
 // Voyage hard-limits the request to 120K tokens. Our estimator uses
-// 3 bytes/token so a 300_000-byte text estimates to 100_000 tokens,
-// hitting the budget exactly. Mixing one huge text with several
+// bytesPerToken=5 so a 600_000-byte text estimates to 120_000 tokens,
+// strictly above the 100K budget. Mixing one huge text with several
 // smaller ones should produce multiple batches.
 func TestPlanBatches_SplitsByTokenBudget(t *testing.T) {
-	big := strings.Repeat("x", 300_000) // ~100_000 est tokens
+	big := strings.Repeat("x", 600_000) // ~120_000 est tokens at bytesPerToken=5
 	small := "tiny"
 	texts := []string{big, small, small, small, small, small}
 
@@ -245,11 +245,15 @@ func TestEmbedDocumentsSplitsByTokenBudget(t *testing.T) {
 	}))
 	t.Cleanup(srv.Close)
 
-	// Two big texts ~100K est tokens each — should produce >= 2 POSTs.
-	big := strings.Repeat("x", 300_000)
+	// Two big texts ~100K est tokens each (at bytesPerToken=5) →
+	// should produce >= 2 POSTs. MaxInputBytes set high so the
+	// per-input sliding-window split doesn't trigger; we want to
+	// exercise the batch-level token cap specifically.
+	big := strings.Repeat("x", 500_000)
 	texts := []string{big, big}
 	p := New(Config{
 		BaseURL: srv.URL, APIKeyEnv: "K", Model: "voyage-code-3", OutputDtype: DtypeFloat,
+		MaxInputBytes: 1_000_000,
 	}, fixedSecrets("K", "v"), nil)
 	vecs, err := p.EmbedDocuments(context.Background(), texts)
 	if err != nil {
@@ -526,8 +530,8 @@ func TestRateLimitTPMThrottlesTokens(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 	defer cancel()
 
-	// 180K bytes ≈ 60K est tokens — half the burst budget.
-	big := strings.Repeat("x", 180_000)
+	// 300K bytes ≈ 60K est tokens (bytesPerToken=5) — half the burst budget.
+	big := strings.Repeat("x", 300_000)
 
 	// First call drains 60K of the 100K-burst bucket: instant.
 	if _, err := p.EmbedQuery(ctx, big); err != nil {

From 4264d67ef722cb480e2ec924b55b5de1502bc96f Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Wed, 27 May 2026 13:30:58 +0100
Subject: [PATCH 21/34] fix(voyage): tighten token estimator to bytes/2 for
 code workloads
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The /5 heuristic comes from Voyage's English-prose docs, but cix is
primarily a code-indexer and dense source code is much denser per
token. Production logs against voyage-code-3 show ~1.4 bytes/token
on tight Go files — a 64-input batch estimating 51K tokens actually
charged 187K, triggering Voyage's 120K hard-cap 400 and forcing the
bisect safety net to fire on every batch even with a correctly
configured rate-limits section.

bytesPerToken=2 catches code reality with margin and over-estimates
prose by ~2.5x (more round-trips, never a 400). The
embedWithAdaptiveSplit bisect stays as a residual safety net.

Tests adjusted for the new divisor (240K/200K/120K byte text sizes
where the original called for 600K/500K/300K under /5).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../embeddings/provider/voyage/voyage.go      | 32 +++++++++++--------
 .../embeddings/provider/voyage/voyage_test.go | 12 +++----
 2 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/server/internal/embeddings/provider/voyage/voyage.go b/server/internal/embeddings/provider/voyage/voyage.go
index d58694a..f661116 100644
--- a/server/internal/embeddings/provider/voyage/voyage.go
+++ b/server/internal/embeddings/provider/voyage/voyage.go
@@ -104,23 +104,27 @@ const defaultMaxInputBytes = 30_000
 // bytesPerToken is the chars-per-token heuristic used to estimate
 // token cost without a real tokenizer. Voyage's own docs at
 // https://docs.voyageai.com/docs/tokenization recommend "dividing
-// character count by 5" as the rough rule of thumb (they publish
-// real HF tokenizers on huggingface.co/voyageai/voyage-* but
-// pulling one in here would require a CGO Rust dep — deferred).
+// character count by 5" for English prose, but cix is primarily a
+// CODE-indexing workload and dense source code runs much hotter:
+// production logs against voyage-code-3 show 1 token ≈ 1.4 bytes
+// for tight Go/Rust files, so a /5 estimate under-counts by
+// ~3.6×. That's exactly the kind of error that ships an
+// estimated-51K-token batch into a real 187K-token POST and
+// triggers Voyage's 120K hard-cap 400.
 //
-// 5 is calibrated for prose; dense source code can run hotter
-// (1 token ≈ 2–3 bytes), which would make this estimator UNDER-
-// count for code. That's OK: planBatches may then pack inputs
-// tighter than the real cap permits, but embedWithAdaptiveSplit
-// detects the resulting 400 ("max tokens per batch") and bisects.
-// We trade a few extra round-trips on dense files for far fewer
-// spurious splits on ordinary content.
+// 2 is the safer baseline: matches code reality within a small
+// margin, and over-estimates prose by ~2.5× (fewer inputs packed
+// per batch — more round-trips, never a 400). The
+// embedWithAdaptiveSplit bisect remains as a residual safety net
+// for outliers (Voyage publishes real HF tokenizers on
+// huggingface.co/voyageai/voyage-* but pulling one in here would
+// require a CGO Rust dep — deferred to a follow-up).
 //
 // len() in Go returns BYTE length, not rune count, so multi-byte
 // UTF-8 input (Cyrillic comments, CJK) gets over-counted relative
 // to Voyage's character-based heuristic — safe direction (more
 // splits, never fewer).
-const bytesPerToken = 5
+const bytesPerToken = 2
 
 // Config is the persisted shape of the voyage provider's config blob.
 type Config struct {
@@ -466,9 +470,9 @@ func (p *Provider) embedAndAverage(ctx context.Context, texts []string, inputTyp
 }
 
 // embedWithAdaptiveSplit wraps embed() with a defensive bisect-on-400
-// loop. Our token estimator (bytes/3) is conservative for prose but
-// not always accurate for dense code; Voyage's real tokenizer can
-// charge more than we predict. On a "batch too large" 400 we split
+// loop. Our byte→token estimator (see bytesPerToken) cannot match
+// Voyage's real tokenizer exactly; pathological inputs may still
+// overflow the per-batch cap. On a "batch too large" 400 we split
 // the batch in half and retry both halves recursively. When the
 // batch is already a single input and STILL too large there's
 // nothing to bisect — we return a clear error pointing the operator
diff --git a/server/internal/embeddings/provider/voyage/voyage_test.go b/server/internal/embeddings/provider/voyage/voyage_test.go
index 2a69d35..542b1dc 100644
--- a/server/internal/embeddings/provider/voyage/voyage_test.go
+++ b/server/internal/embeddings/provider/voyage/voyage_test.go
@@ -174,11 +174,11 @@ func TestEmbedDocumentsSplitsOversizeBatch(t *testing.T) {
 // TestPlanBatches_SplitsByTokenBudget covers the second cap on per-
 // request batch size: even when input count is under maxBatchSize,
 // Voyage hard-limits the request to 120K tokens. Our estimator uses
-// bytesPerToken=5 so a 600_000-byte text estimates to 120_000 tokens,
+// bytesPerToken=2 so a 240_000-byte text estimates to 120_000 tokens,
 // strictly above the 100K budget. Mixing one huge text with several
 // smaller ones should produce multiple batches.
 func TestPlanBatches_SplitsByTokenBudget(t *testing.T) {
-	big := strings.Repeat("x", 600_000) // ~120_000 est tokens at bytesPerToken=5
+	big := strings.Repeat("x", 240_000) // ~120_000 est tokens at bytesPerToken=2
 	small := "tiny"
 	texts := []string{big, small, small, small, small, small}
 
@@ -245,11 +245,11 @@ func TestEmbedDocumentsSplitsByTokenBudget(t *testing.T) {
 	}))
 	t.Cleanup(srv.Close)
 
-	// Two big texts ~100K est tokens each (at bytesPerToken=5) →
+	// Two big texts ~100K est tokens each (at bytesPerToken=2) →
 	// should produce >= 2 POSTs. MaxInputBytes set high so the
 	// per-input sliding-window split doesn't trigger; we want to
 	// exercise the batch-level token cap specifically.
-	big := strings.Repeat("x", 500_000)
+	big := strings.Repeat("x", 200_000)
 	texts := []string{big, big}
 	p := New(Config{
 		BaseURL: srv.URL, APIKeyEnv: "K", Model: "voyage-code-3", OutputDtype: DtypeFloat,
@@ -530,8 +530,8 @@ func TestRateLimitTPMThrottlesTokens(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 	defer cancel()
 
-	// 300K bytes ≈ 60K est tokens (bytesPerToken=5) — half the burst budget.
-	big := strings.Repeat("x", 300_000)
+	// 120K bytes ≈ 60K est tokens (bytesPerToken=2) — half the burst budget.
+	big := strings.Repeat("x", 120_000)
 
 	// First call drains 60K of the 100K-burst bucket: instant.
 	if _, err := p.EmbedQuery(ctx, big); err != nil {

From 061a12a32118e862977e9802a066e4337d8e9ce6 Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Thu, 28 May 2026 21:28:04 +0100
Subject: [PATCH 22/34] =?UTF-8?q?fix(server):=20unify=20storage=20paths=20?=
 =?UTF-8?q?=E2=80=94=20permanent=20system=20DB=20+=20per-provider=20vector?=
 =?UTF-8?q?=20namespace?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The storage layout derived both the SQLite path and the chroma dir from
the embedding model name (a Python-era artefact ported 1:1). Safe only
while the model was a fixed env var; runtime provider switching turned it
into a critical bug — switching to a higher-dim provider (e.g. voyage
2048) wrote vectors into the previous provider's collection, breaking
search with "vectors must have the same length" — and a footgun where a
model change spawned a parallel system DB with empty accounts.

Two-plane design:
- System DB is ONE permanent, model-INDEPENDENT file at cfg.SQLitePath
  (accounts + catalog + parsed code). The parsed-code tables are
  model-independent; vector search never reads them.
- The chroma vector store is namespaced by the ACTIVE provider identity
  via provider.StorageSlug(Provider.ID()), so vectors of different
  dimensions never share a collection. Switching back reuses the prior
  namespace without a reindex.

Changes:
- provider.StorageSlug: deterministic, idempotent filesystem slug from a
  Provider.ID() fingerprint.
- config: drop DynamicSQLitePath/DynamicChromaPersistDir; add
  ChromaDirForSlug; keep ModelSafeName + LegacyDynamicSQLitePath solely
  for the one-time adoption migration.
- internal/storage (new): boot-time, idempotent file migrations —
  AdoptLegacyModelDB (checkpoint-then-rename the legacy per-model DB onto
  the canonical path; fossil moved aside to *.pre-unify-*) and
  PrefixLegacyChromaDirs (rename pre-unification ollama dirs to the
  unified scheme; canonicalises the legacy suffix through StorageSlug so
  model names with '.'/':' still match the path the server opens — no
  silent orphaning).
- db.HasTables: helper to distinguish a real unified DB from a fossil.
- vectorstore.Holder: concurrency-safe swappable *Store wrapper +
  Interface satisfied by both *Store and *Holder; indexer/httpapi/repojobs
  hold the Interface.
- embeddings.Service.SwitchProvider now reopens the vector store under the
  new identity slug and atomically swaps it into the Holder (live, no
  restart); reopen failure keeps the old store and surfaces a loud error.
- main.go/server.go: orchestrate boot migrations, Holder wiring, and
  identity-namespaced storage display.

One-shot migration shims (ModelSafeName, LegacyDynamicSQLitePath,
AdoptLegacyModelDB, PrefixLegacyChromaDirs and their call sites) are
marked "LEGACY-MIGRATION (remove next release)" for deletion once all
deployments have booted on the unified layout.

Tests: StorageSlug (incl. special-char canonicalisation), dbmigrate
(adopt/fossil-aside/WAL-drain/idempotency), chromamigrate (strict
normalize/no-clobber/idempotency), Holder (-race swap-under-read), live
reopen (swap/failure-keeps-old/unwired-noop). go build/vet/test ./...
green; -race on vectorstore green.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 server/cmd/cix-server/main.go                 |  68 +++++--
 server/internal/config/config.go              |  38 ++--
 server/internal/config/config_test.go         |  10 +-
 server/internal/db/db.go                      |  43 +++++
 .../internal/embeddings/provider/provider.go  |  36 ++++
 .../embeddings/provider/storageslug_test.go   |  38 ++++
 server/internal/embeddings/service.go         | 102 +++++++++-
 .../embeddings/switch_provider_test.go        | 138 ++++++++++++++
 server/internal/httpapi/router.go             |   6 +-
 server/internal/httpapi/search.go             |   2 +-
 server/internal/httpapi/server.go             |   8 +-
 server/internal/indexer/indexer.go            |   4 +-
 server/internal/repojobs/repojobs.go          |   2 +-
 server/internal/storage/chromamigrate.go      | 111 +++++++++++
 server/internal/storage/chromamigrate_test.go | 122 ++++++++++++
 server/internal/storage/dbmigrate.go          | 145 ++++++++++++++
 server/internal/storage/dbmigrate_test.go     | 180 ++++++++++++++++++
 server/internal/vectorstore/holder.go         | 122 ++++++++++++
 server/internal/vectorstore/holder_test.go    | 113 +++++++++++
 server/internal/vectorstore/store.go          |   2 +-
 20 files changed, 1249 insertions(+), 41 deletions(-)
 create mode 100644 server/internal/embeddings/provider/storageslug_test.go
 create mode 100644 server/internal/embeddings/switch_provider_test.go
 create mode 100644 server/internal/storage/chromamigrate.go
 create mode 100644 server/internal/storage/chromamigrate_test.go
 create mode 100644 server/internal/storage/dbmigrate.go
 create mode 100644 server/internal/storage/dbmigrate_test.go
 create mode 100644 server/internal/vectorstore/holder.go
 create mode 100644 server/internal/vectorstore/holder_test.go

diff --git a/server/cmd/cix-server/main.go b/server/cmd/cix-server/main.go
index bb18a6b..34ec35d 100644
--- a/server/cmd/cix-server/main.go
+++ b/server/cmd/cix-server/main.go
@@ -25,8 +25,8 @@ import (
 	"github.com/dvcdsys/code-index/server/internal/embeddingscfg"
 	"github.com/dvcdsys/code-index/server/internal/githubapi"
 	"github.com/dvcdsys/code-index/server/internal/githubtokens"
-	"github.com/dvcdsys/code-index/server/internal/groups"
 	"github.com/dvcdsys/code-index/server/internal/gitrepos"
+	"github.com/dvcdsys/code-index/server/internal/groups"
 	"github.com/dvcdsys/code-index/server/internal/httpapi"
 	"github.com/dvcdsys/code-index/server/internal/indexer"
 	"github.com/dvcdsys/code-index/server/internal/jobs"
@@ -35,6 +35,7 @@ import (
 	"github.com/dvcdsys/code-index/server/internal/runtimecfg"
 	"github.com/dvcdsys/code-index/server/internal/secrets"
 	"github.com/dvcdsys/code-index/server/internal/sessions"
+	"github.com/dvcdsys/code-index/server/internal/storage"
 	"github.com/dvcdsys/code-index/server/internal/tunnelcfg"
 	"github.com/dvcdsys/code-index/server/internal/tunnels"
 	"github.com/dvcdsys/code-index/server/internal/users"
@@ -116,7 +117,16 @@ func run() error {
 	chunker.Configure(cfg.Languages)
 	logger.Info("chunker languages configured", "active", chunker.SupportedLanguages())
 
-	dbPath := cfg.DynamicSQLitePath()
+	// The system DB is model-INDEPENDENT (one permanent file at
+	// cfg.SQLitePath holding accounts + catalog + parsed code). Older
+	// builds suffixed the model name onto the path; adopt any such legacy
+	// per-model file as the canonical system DB (idempotent, one-time).
+	// LEGACY-MIGRATION (remove next release): drop this adoption call once
+	// all deployments have booted on the unified layout.
+	if err := storage.AdoptLegacyModelDB(cfg.SQLitePath, cfg.LegacyDynamicSQLitePath(), logger); err != nil {
+		return fmt.Errorf("adopt legacy system db: %w", err)
+	}
+	dbPath := cfg.SQLitePath
 	logger.Info("opening database", "path", dbPath)
 	database, err := db.OpenWith(db.OpenOptions{
 		Path:    dbPath,
@@ -156,14 +166,10 @@ func run() error {
 		"batch", cfg.LlamaBatchSize,
 		"sources", snap.Source,
 	)
-	// DynamicSQLitePath embeds ModelSafeName(); if the dashboard switched the
-	// model, the storage path resolved a moment ago is for the OLD model. The
-	// already-opened DB is still correct (it's the OLD model's state) but the
-	// chroma vectorstore opened below needs to honour the NEW model. Recompute
-	// dbPath only matters if we want to re-open under the new model — for PR-E
-	// we deliberately keep the old DB so historical projects keep their
-	// indexed_with_model and the dashboard can show the drift. Sidecar +
-	// vectorstore use the new model.
+	// The system DB is model-independent (opened above at cfg.SQLitePath).
+	// Only the chroma vector store is namespaced per embedding identity —
+	// it is opened below once the active provider is known, using
+	// provider.StorageSlug(Provider.ID()).
 
 	// Embeddings service. When disabled we still build the value so router
 	// wiring stays consistent — Service methods return ErrDisabled in that case.
@@ -234,21 +240,49 @@ func run() error {
 		}
 	}()
 
+	// Prefix legacy un-prefixed chroma dirs (pre-unification ollama-only
+	// builds) to the unified "<base>_ollama_<model>" naming so existing
+	// ollama vectors are reused under the new identity-namespaced scheme
+	// without a reindex. Idempotent; runs once per legacy dir.
+	// LEGACY-MIGRATION (remove next release): drop this prefixing call once
+	// all deployments have booted on the unified layout.
+	if err := storage.PrefixLegacyChromaDirs(cfg.ChromaPersistDir, logger); err != nil {
+		logger.Warn("could not migrate legacy chroma dirs (continuing)", "err", err)
+	}
+
+	// The vector store is namespaced by the ACTIVE provider identity slug
+	// so vectors of different dimensions never share a collection.
+	chromaSlug := embedSvc.StorageSlug()
+	if chromaSlug == "" {
+		// Embeddings disabled / provider not built: deterministic ollama-
+		// shaped fallback so toggling embeddings on/off doesn't move dirs.
+		chromaSlug = provider.StorageSlug("ollama:" + cfg.EmbeddingModel)
+	}
+	chromaDir := cfg.ChromaDirForSlug(chromaSlug)
+
 	// Detect and back up a legacy ChromaDB layout left by the Python server.
-	if backed, bErr := vectorstore.DetectLegacyAndBackup(cfg.DynamicChromaPersistDir()); bErr != nil {
+	if backed, bErr := vectorstore.DetectLegacyAndBackup(chromaDir); bErr != nil {
 		logger.Warn("could not back up legacy chroma dir", "err", bErr)
 	} else if backed {
 		logger.Warn("legacy chroma layout detected — backed up; re-run cix init to reindex")
 	}
 
-	// Vector store (chromem-go). Lives under the dynamic chroma persist dir so
-	// the path includes the model-safe name, matching Python parity.
-	vs, err := vectorstore.Open(cfg.DynamicChromaPersistDir())
+	vs, err := vectorstore.Open(chromaDir)
 	if err != nil {
 		return fmt.Errorf("open vectorstore: %w", err)
 	}
+	// Wrap in a swappable Holder shared by indexer / repojobs / httpapi so
+	// a runtime provider switch can reopen the store under a new namespace.
+	vsHolder := vectorstore.NewHolder(vs)
+	// Wire the live-reopen path used by SwitchProvider.
+	embedSvc.AttachVectorStore(
+		vsHolder,
+		cfg.ChromaDirForSlug,
+		vectorstore.Open,
+		func() error { return storage.PrefixLegacyChromaDirs(cfg.ChromaPersistDir, logger) },
+	)
 
-	idx := indexer.New(database, vs, embedSvc, logger)
+	idx := indexer.New(database, vsHolder, embedSvc, logger)
 	idx.SetEmbedIncludePath(cfg.EmbedIncludePath)
 	// Record the active embedding model on every indexed project so the
 	// dashboard can highlight stale vectors when the runtime provider /
@@ -332,7 +366,7 @@ func run() error {
 		GitRepos:                   grSvc,
 		GithubTokens:               ghSvc,
 		Indexer:                    idx,
-		VectorStore:                vs,
+		VectorStore:                vsHolder,
 		DataDir:                    cfg.WorkspacesDataDir,
 		Logger:                     logger,
 		DefaultPollIntervalSeconds: int(cfg.DefaultPollInterval.Seconds()),
@@ -454,7 +488,7 @@ func run() error {
 		Sessions:          sessSvc,
 		APIKeys:           akSvc,
 		EmbeddingSvc:      embedSvc,
-		VectorStore:       vs,
+		VectorStore:       vsHolder,
 		Indexer:           idx,
 		RuntimeCfg:        rcfg,
 		EmbeddingsCfg:     embedCfgStore,
diff --git a/server/internal/config/config.go b/server/internal/config/config.go
index 3a938de..e5de89b 100644
--- a/server/internal/config/config.go
+++ b/server/internal/config/config.go
@@ -19,7 +19,7 @@ import (
 // default was a Python-FastAPI parallel-rollout carry-over; the Python
 // backend was archived 2026-04 and the parity is no longer meaningful.
 type Config struct {
-	APIKey                  string
+	APIKey string
 	// AuthDisabled, when true, makes the server skip the API-key check on
 	// every endpoint. Off by default — must be turned on EXPLICITLY via
 	// CIX_AUTH_DISABLED=true (and also requires CIX_API_KEY to be empty).
@@ -176,24 +176,41 @@ type Config struct {
 }
 
 // ModelSafeName returns the embedding model name normalised for use inside
-// filesystem paths. Matches Settings.model_safe_name in api/app/config.py.
+// filesystem paths. Originally mirrored Settings.model_safe_name in the
+// archived Python backend; now used ONLY to reconstruct the legacy
+// per-model SQLite filename during the one-time DB adoption migration
+// (internal/storage.AdoptLegacyModelDB). It no longer drives any live
+// storage path — the system DB is model-independent and the vector store
+// is namespaced by provider.StorageSlug(Provider.ID()).
+//
+// LEGACY-MIGRATION (remove next release): this and LegacyDynamicSQLitePath
+// exist solely for the one-time storage-unification adoption. Once every
+// deployment has booted on the unified layout, delete both along with
+// storage.AdoptLegacyModelDB and its call in cmd/cix-server/main.go.
 func (c *Config) ModelSafeName() string {
 	s := strings.ReplaceAll(c.EmbeddingModel, "/", "_")
 	s = strings.ReplaceAll(s, "-", "_")
 	return strings.ToLower(s)
 }
 
-// DynamicSQLitePath returns the SQLite path with the model-safe name suffixed
-// before the extension. Matches Settings.dynamic_sqlite_path in Python.
-func (c *Config) DynamicSQLitePath() string {
+// LegacyDynamicSQLitePath reconstructs the OLD per-model SQLite filename
+// (<base>_<ModelSafeName>.db) that pre-unification builds wrote to. It is
+// used solely by the boot-time DB adoption migration to locate the file
+// to adopt as the new model-independent system DB; no live code path
+// should depend on it.
+func (c *Config) LegacyDynamicSQLitePath() string {
 	ext := filepath.Ext(c.SQLitePath)
 	base := strings.TrimSuffix(c.SQLitePath, ext)
 	return fmt.Sprintf("%s_%s%s", base, c.ModelSafeName(), ext)
 }
 
-// DynamicChromaPersistDir matches Settings.dynamic_chroma_persist_dir.
-func (c *Config) DynamicChromaPersistDir() string {
-	return fmt.Sprintf("%s_%s", c.ChromaPersistDir, c.ModelSafeName())
+// ChromaDirForSlug returns the on-disk vector-store directory for a given
+// embedding-identity slug (see provider.StorageSlug). The slug namespaces
+// the chroma persist dir so vectors of different dimensions never share a
+// collection. The slug is computed by the caller from the ACTIVE
+// provider's ID(), keeping the model identity out of config.
+func (c *Config) ChromaDirForSlug(slug string) string {
+	return fmt.Sprintf("%s_%s", c.ChromaPersistDir, slug)
 }
 
 // Load reads CIX_* environment variables and returns a populated Config.
@@ -345,7 +362,6 @@ func Load() (*Config, error) {
 
 	c.VersionCheckRepo = getenv("CIX_VERSION_CHECK_REPO", "dvcdsys/code-index")
 
-
 	c.SecretKey = getenv("CIX_SECRET_KEY", "")
 	c.SecretKeyFile = getenv("CIX_SECRET_KEYFILE", "")
 	c.SecretsDataDir = getenv("CIX_SECRETS_DATA_DIR", filepath.Dir(c.SQLitePath))
@@ -463,8 +479,8 @@ func defaultDataDir() string {
 }
 
 // defaultSQLitePath resolves the local SQLite database path under the
-// platform data dir. The `_` suffix from DynamicSQLitePath is appended at
-// query time, not here.
+// platform data dir. This is the literal, model-independent system DB
+// path the server opens (no model suffix is appended any more).
 func defaultSQLitePath() string {
 	return filepath.Join(defaultDataDir(), "sqlite", "projects.db")
 }
diff --git a/server/internal/config/config_test.go b/server/internal/config/config_test.go
index a09ffda..560920a 100644
--- a/server/internal/config/config_test.go
+++ b/server/internal/config/config_test.go
@@ -107,8 +107,14 @@ func TestLoadOverrides(t *testing.T) {
 	if got := c.ModelSafeName(); got != "test_model_name" {
 		t.Errorf("ModelSafeName = %q", got)
 	}
-	if got := c.DynamicSQLitePath(); got != "/tmp/test_test_model_name.db" {
-		t.Errorf("DynamicSQLitePath = %q", got)
+	// LegacyDynamicSQLitePath still reconstructs the OLD per-model filename
+	// (used only by the boot-time adoption migration).
+	if got := c.LegacyDynamicSQLitePath(); got != "/tmp/test_test_model_name.db" {
+		t.Errorf("LegacyDynamicSQLitePath = %q", got)
+	}
+	// ChromaDirForSlug suffixes the chroma base with the given identity slug.
+	if got := c.ChromaDirForSlug("voyage_voyage_code_3_2048_float"); got != c.ChromaPersistDir+"_voyage_voyage_code_3_2048_float" {
+		t.Errorf("ChromaDirForSlug = %q", got)
 	}
 }
 
diff --git a/server/internal/db/db.go b/server/internal/db/db.go
index 17aea33..7a3e677 100644
--- a/server/internal/db/db.go
+++ b/server/internal/db/db.go
@@ -140,6 +140,49 @@ func OpenWith(opts OpenOptions) (*sql.DB, error) {
 	return db, nil
 }
 
+// HasTables reports whether the SQLite database at path contains ALL of
+// the named tables. It opens the file read-write (so any pending WAL is
+// recovered cleanly) with a busy timeout, runs NO migrations, and closes
+// before returning. A missing file is not an error — it returns
+// (false, nil). Used by the boot-time DB adoption migration
+// (internal/storage) to tell a real unified system DB (has both
+// schema_migrations and users) apart from a pre-auth fossil that merely
+// happens to occupy the target path.
+func HasTables(path string, names ...string) (bool, error) {
+	if path == "" || len(names) == 0 {
+		return false, nil
+	}
+	if _, err := os.Stat(path); err != nil {
+		if errors.Is(err, os.ErrNotExist) {
+			return false, nil
+		}
+		return false, fmt.Errorf("stat %s: %w", path, err)
+	}
+	dsn, err := buildDSN(path)
+	if err != nil {
+		return false, err
+	}
+	sdb, err := sql.Open(DriverName, dsn)
+	if err != nil {
+		return false, fmt.Errorf("open %s: %w", path, err)
+	}
+	defer sdb.Close()
+	sdb.SetMaxOpenConns(1)
+	for _, name := range names {
+		var got string
+		err := sdb.QueryRow(
+			`SELECT name FROM sqlite_master WHERE type='table' AND name = ?`, name,
+		).Scan(&got)
+		if errors.Is(err, sql.ErrNoRows) {
+			return false, nil
+		}
+		if err != nil {
+			return false, fmt.Errorf("check table %q in %s: %w", name, path, err)
+		}
+	}
+	return true, nil
+}
+
 // applyMigrations runs every entry in registeredMigrations whose version is
 // greater than the current high-water mark in schema_migrations. Each
 // successful migration records a (version, name, applied_at) row so the
diff --git a/server/internal/embeddings/provider/provider.go b/server/internal/embeddings/provider/provider.go
index a41e46a..0584bbc 100644
--- a/server/internal/embeddings/provider/provider.go
+++ b/server/internal/embeddings/provider/provider.go
@@ -20,6 +20,7 @@ package provider
 import (
 	"context"
 	"errors"
+	"strings"
 )
 
 // Kind enumerates the built-in provider kinds. New kinds are added by
@@ -174,6 +175,41 @@ var ErrMissingAPIKey = errors.New("provider: required API key env var is not set
 // the process is restarted. Caller maps to HTTP 503 without retry.
 var ErrUnrecoverable = errors.New("provider: unrecoverable failure")
 
+// StorageSlug turns a Provider.ID() fingerprint into a filesystem-safe
+// slug used to namespace the on-disk vector store directory, so each
+// distinct embedding identity (kind + model + dim + dtype) gets its own
+// chroma collection space. Switching providers therefore never mixes
+// vectors of different dimensions in one collection, and switching back
+// reuses the prior namespace without a reindex.
+//
+// Rules: lowercase, then replace every rune outside [a-z0-9_] (including
+// '/', '-', ':') with '_'. Deliberately a pure per-rune map — no
+// run-collapsing or trimming — so the transform is deterministic and
+// idempotent. (It is not strictly injective: e.g. "a:b" and "a-b" both
+// map to "a_b". That is harmless here because real Provider.ID() strings
+// for a given kind never differ only in a separator — model names carry
+// no ':' and dims/dtypes are fixed tokens.) Examples:
+//
+//	"voyage:voyage-code-3:2048:float"            → "voyage_voyage_code_3_2048_float"
+//	"ollama:awhiteside/CodeRankEmbed-Q8_0-GGUF"  → "ollama_awhiteside_coderankembed_q8_0_gguf"
+//	"openai:text-embedding-3-large:256"          → "openai_text_embedding_3_large_256"
+//
+// An empty ID yields an empty slug; callers guard against that.
+func StorageSlug(id string) string {
+	lower := strings.ToLower(id)
+	var b strings.Builder
+	b.Grow(len(lower))
+	for _, r := range lower {
+		switch {
+		case r >= 'a' && r <= 'z', r >= '0' && r <= '9', r == '_':
+			b.WriteRune(r)
+		default:
+			b.WriteByte('_')
+		}
+	}
+	return b.String()
+}
+
 // SecretLookup resolves an env-var name to its current value at the
 // moment of the call. Implementations must return (value, true) when
 // the env var is set (even if empty), and ("", false) when it is
diff --git a/server/internal/embeddings/provider/storageslug_test.go b/server/internal/embeddings/provider/storageslug_test.go
new file mode 100644
index 0000000..803ed61
--- /dev/null
+++ b/server/internal/embeddings/provider/storageslug_test.go
@@ -0,0 +1,38 @@
+package provider
+
+import "testing"
+
+func TestStorageSlug(t *testing.T) {
+	cases := []struct {
+		in, want string
+	}{
+		{"voyage:voyage-code-3:2048:float", "voyage_voyage_code_3_2048_float"},
+		{"ollama:awhiteside/CodeRankEmbed-Q8_0-GGUF", "ollama_awhiteside_coderankembed_q8_0_gguf"},
+		{"openai:text-embedding-3-large:256", "openai_text_embedding_3_large_256"},
+		{"OpenAI:Foo.Bar", "openai_foo_bar"},     // mixed case + dot
+		{"a b", "a_b"},                           // space
+		{"", ""},                                 // empty
+		{"already_safe_123", "already_safe_123"}, // identity for safe chars
+	}
+	for _, tc := range cases {
+		if got := StorageSlug(tc.in); got != tc.want {
+			t.Errorf("StorageSlug(%q) = %q, want %q", tc.in, got, tc.want)
+		}
+	}
+}
+
+// TestStorageSlugIdempotent ensures slugging an already-slugged string is
+// a no-op (the chroma migration relies on this so re-running never double-
+// transforms a name).
+func TestStorageSlugIdempotent(t *testing.T) {
+	for _, in := range []string{
+		"voyage:voyage-code-3:2048:float",
+		"ollama:awhiteside/CodeRankEmbed-Q8_0-GGUF",
+	} {
+		once := StorageSlug(in)
+		twice := StorageSlug(once)
+		if once != twice {
+			t.Errorf("StorageSlug not idempotent: %q -> %q -> %q", in, once, twice)
+		}
+	}
+}
diff --git a/server/internal/embeddings/service.go b/server/internal/embeddings/service.go
index a63e414..655292f 100644
--- a/server/internal/embeddings/service.go
+++ b/server/internal/embeddings/service.go
@@ -13,7 +13,7 @@ import (
 	"github.com/dvcdsys/code-index/server/internal/config"
 	"github.com/dvcdsys/code-index/server/internal/embeddings/provider"
 	"github.com/dvcdsys/code-index/server/internal/embeddings/provider/ollama"
-
+	"github.com/dvcdsys/code-index/server/internal/vectorstore"
 
 	// Blank imports trigger each provider package's init() which
 	// registers a Factory in the registry. Service builds the active
@@ -46,6 +46,65 @@ type Service struct {
 	// at the queue layer, but mu makes the swap itself atomic).
 	mu      sync.RWMutex
 	current provider.Provider
+
+	// Vector-store reopen hooks, wired by main.go via AttachVectorStore.
+	// When set, SwitchProvider reopens the vector store under the new
+	// provider's identity slug and atomically swaps it into vsHolder, so
+	// a runtime provider switch moves to a dimension-isolated namespace
+	// without a process restart. All four are nil in tests that don't
+	// exercise the reopen path (SwitchProvider then only swaps the
+	// provider, matching the pre-unification behaviour).
+	vsHolder  *vectorstore.Holder
+	vsDirFor  func(slug string) string                     // cfg.ChromaDirForSlug
+	vsOpener  func(dir string) (*vectorstore.Store, error) // vectorstore.Open
+	vsMigrate func() error                                 // legacy chroma-dir prefix migration (idempotent)
+}
+
+// AttachVectorStore wires the live vector-store reopen path used by
+// SwitchProvider. main.go calls it once after constructing the Service
+// and the shared Holder:
+//
+//	dirFor  — cfg.ChromaDirForSlug (maps a StorageSlug to an on-disk dir)
+//	opener  — vectorstore.Open
+//	migrate — optional idempotent legacy-dir migration run before each
+//	          reopen (lets a switch back to ollama on a pre-unification
+//	          box adopt its renamed dir without a restart); may be nil
+//
+// Passing the formula (dirFor) and opener as funcs keeps embeddings free
+// of a hard dependency on config path layout and avoids an
+// embeddings→storage import for the migration hook.
+func (s *Service) AttachVectorStore(
+	holder *vectorstore.Holder,
+	dirFor func(slug string) string,
+	opener func(dir string) (*vectorstore.Store, error),
+	migrate func() error,
+) {
+	if s == nil {
+		return
+	}
+	s.mu.Lock()
+	s.vsHolder = holder
+	s.vsDirFor = dirFor
+	s.vsOpener = opener
+	s.vsMigrate = migrate
+	s.mu.Unlock()
+}
+
+// StorageSlug returns the filesystem slug of the ACTIVE provider's
+// identity (provider.StorageSlug(current.ID())), or "" when disabled /
+// not yet built. The dashboard's project-detail handler uses it to show
+// the live chroma directory.
+func (s *Service) StorageSlug() string {
+	if s == nil || s.disabled {
+		return ""
+	}
+	s.mu.RLock()
+	cur := s.current
+	s.mu.RUnlock()
+	if cur == nil {
+		return ""
+	}
+	return provider.StorageSlug(cur.ID())
 }
 
 // New constructs a Service from the env-derived config. The legacy
@@ -174,6 +233,47 @@ func (s *Service) SwitchProvider(ctx context.Context, kind string, cfgBytes []by
 		}(old)
 	}
 	s.logger.Info("embeddings: switched provider", "kind", kind, "id", newProv.ID())
+
+	// Reopen the vector store under the new provider's identity slug so
+	// its (possibly different-dimension) vectors land in their own
+	// namespace instead of colliding with the previous provider's
+	// collection. The provider has ALREADY been swapped above; if the
+	// reopen fails we do NOT roll it back (the new provider is live for
+	// embedding) — we keep the holder on the old store and surface a
+	// loud error so the operator restarts. Without the reopen, the next
+	// reindex would write new-dim vectors into the old dir.
+	if err := s.reopenVectorStore(newProv); err != nil {
+		return err
+	}
+	return nil
+}
+
+// reopenVectorStore opens a fresh *vectorstore.Store under the directory
+// derived from prov's identity slug and atomically swaps it into the
+// shared Holder. No-op when AttachVectorStore was never called (tests).
+func (s *Service) reopenVectorStore(prov provider.Provider) error {
+	s.mu.RLock()
+	holder, dirFor, opener, migrate := s.vsHolder, s.vsDirFor, s.vsOpener, s.vsMigrate
+	s.mu.RUnlock()
+	if holder == nil || dirFor == nil || opener == nil {
+		return nil // reopen path not wired (e.g. unit tests)
+	}
+	if migrate != nil {
+		// Idempotent legacy-dir prefixing — lets a switch back to ollama
+		// on a pre-unification box adopt its renamed dir without restart.
+		if err := migrate(); err != nil {
+			s.logger.Warn("embeddings: chroma legacy-dir migration failed during switch (continuing)", "err", err)
+		}
+	}
+	dir := dirFor(provider.StorageSlug(prov.ID()))
+	newStore, err := opener(dir)
+	if err != nil {
+		s.logger.Error("embeddings: provider switched but vector store reopen failed; keeping previous store until restart",
+			"dir", dir, "err", err)
+		return fmt.Errorf("reopen vector store at %s: %w", dir, err)
+	}
+	holder.Swap(newStore)
+	s.logger.Info("embeddings: vector store reopened under new provider namespace", "dir", dir)
 	return nil
 }
 
diff --git a/server/internal/embeddings/switch_provider_test.go b/server/internal/embeddings/switch_provider_test.go
new file mode 100644
index 0000000..f3ccd78
--- /dev/null
+++ b/server/internal/embeddings/switch_provider_test.go
@@ -0,0 +1,138 @@
+package embeddings
+
+import (
+	"context"
+	"errors"
+	"io"
+	"log/slog"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/dvcdsys/code-index/server/internal/embeddings/provider"
+	"github.com/dvcdsys/code-index/server/internal/vectorstore"
+)
+
+// fakeProv is a minimal provider.Provider for exercising the vector-store
+// reopen path; only ID() is consulted by reopenVectorStore / StorageSlug.
+type fakeProv struct{ id string }
+
+func (f fakeProv) Kind() string                                          { return "fake" }
+func (f fakeProv) ID() string                                            { return f.id }
+func (f fakeProv) Dimension() int                                        { return 0 }
+func (f fakeProv) SupportsTokenize() bool                                { return false }
+func (f fakeProv) Start(context.Context) error                           { return nil }
+func (f fakeProv) Stop(context.Context) error                            { return nil }
+func (f fakeProv) Ready(context.Context) error                           { return nil }
+func (f fakeProv) Status() provider.Status                               { return provider.Status{} }
+func (f fakeProv) EmbedQuery(context.Context, string) ([]float32, error) { return nil, nil }
+func (f fakeProv) EmbedDocuments(context.Context, []string) ([][]float32, error) {
+	return nil, nil
+}
+func (f fakeProv) TokenizeAndEmbed(context.Context, []string) ([][]float32, error) {
+	return nil, nil
+}
+
+func quiet() *slog.Logger { return slog.New(slog.NewTextHandler(io.Discard, nil)) }
+
+func TestServiceStorageSlug(t *testing.T) {
+	s := &Service{logger: quiet(), current: fakeProv{id: "voyage:voyage-code-3:2048:float"}}
+	if got := s.StorageSlug(); got != "voyage_voyage_code_3_2048_float" {
+		t.Errorf("StorageSlug = %q", got)
+	}
+	// Disabled / no provider → empty.
+	if got := (&Service{logger: quiet(), disabled: true}).StorageSlug(); got != "" {
+		t.Errorf("disabled StorageSlug = %q, want empty", got)
+	}
+	if got := (&Service{logger: quiet()}).StorageSlug(); got != "" {
+		t.Errorf("nil-provider StorageSlug = %q, want empty", got)
+	}
+}
+
+func TestReopenVectorStore_SwapsToNewNamespace(t *testing.T) {
+	dir := t.TempDir()
+	const project = "/proj"
+
+	// Initial store has one chunk for the project.
+	initial, err := vectorstore.Open(filepath.Join(dir, "chroma_ollama_m"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := initial.UpsertChunks(context.Background(), project,
+		[]vectorstore.Chunk{{Content: "x", FilePath: "a.go", StartLine: 1, EndLine: 1, Language: "go"}},
+		[][]float32{{1, 0}}); err != nil {
+		t.Fatal(err)
+	}
+	holder := vectorstore.NewHolder(initial)
+	if holder.Count(project) != 1 {
+		t.Fatalf("precondition: initial holder count != 1")
+	}
+
+	s := &Service{logger: quiet()}
+	s.AttachVectorStore(
+		holder,
+		func(slug string) string { return filepath.Join(dir, "chroma_"+slug) },
+		vectorstore.Open,
+		nil,
+	)
+
+	// Switch to a new identity → reopen into a fresh, empty namespace.
+	if err := s.reopenVectorStore(fakeProv{id: "voyage:voyage-code-3:2048:float"}); err != nil {
+		t.Fatalf("reopen: %v", err)
+	}
+	if got := holder.Count(project); got != 0 {
+		t.Errorf("after reopen Count = %d, want 0 (new empty namespace)", got)
+	}
+	// New dir created on disk; old dir still present (reuse on switch back).
+	if !dirExists(filepath.Join(dir, "chroma_voyage_voyage_code_3_2048_float")) {
+		t.Errorf("new chroma dir should exist")
+	}
+	if !dirExists(filepath.Join(dir, "chroma_ollama_m")) {
+		t.Errorf("old chroma dir should be preserved")
+	}
+}
+
+func TestReopenVectorStore_OpenerFailureKeepsOldStore(t *testing.T) {
+	dir := t.TempDir()
+	const project = "/proj"
+	initial, err := vectorstore.Open(filepath.Join(dir, "chroma_ollama_m"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := initial.UpsertChunks(context.Background(), project,
+		[]vectorstore.Chunk{{Content: "x", FilePath: "a.go", StartLine: 1, EndLine: 1, Language: "go"}},
+		[][]float32{{1, 0}}); err != nil {
+		t.Fatal(err)
+	}
+	holder := vectorstore.NewHolder(initial)
+
+	s := &Service{logger: quiet()}
+	s.AttachVectorStore(
+		holder,
+		func(slug string) string { return filepath.Join(dir, "chroma_"+slug) },
+		func(string) (*vectorstore.Store, error) { return nil, errors.New("boom") },
+		nil,
+	)
+
+	err = s.reopenVectorStore(fakeProv{id: "voyage:m:2048:float"})
+	if err == nil {
+		t.Fatal("expected reopen error")
+	}
+	// Holder must still serve the OLD store (no Swap on failure).
+	if got := holder.Count(project); got != 1 {
+		t.Errorf("after failed reopen Count = %d, want 1 (old store retained)", got)
+	}
+}
+
+func TestReopenVectorStore_NoopWhenUnwired(t *testing.T) {
+	// A Service without AttachVectorStore must not panic / error.
+	s := &Service{logger: quiet()}
+	if err := s.reopenVectorStore(fakeProv{id: "voyage:m:2048:float"}); err != nil {
+		t.Errorf("unwired reopen should be a no-op, got %v", err)
+	}
+}
+
+func dirExists(path string) bool {
+	st, err := os.Stat(path)
+	return err == nil && st.IsDir()
+}
diff --git a/server/internal/httpapi/router.go b/server/internal/httpapi/router.go
index 664cdbd..59fad07 100644
--- a/server/internal/httpapi/router.go
+++ b/server/internal/httpapi/router.go
@@ -74,8 +74,10 @@ type Deps struct {
 	// tests). Phase 5 uses it for semantic search.
 	EmbeddingSvc EmbeddingsQuerier
 	// VectorStore is the chromem-go backed vector store (Phase 4). Nil-safe:
-	// semantic search returns empty results when absent.
-	VectorStore *vectorstore.Store
+	// semantic search returns empty results when absent. Typed as the
+	// vectorstore.Interface so production can supply a *vectorstore.Holder
+	// (swappable on provider switch) while tests pass a raw *Store.
+	VectorStore vectorstore.Interface
 	// Indexer drives the three-phase index protocol (Phase 5). Nil-safe: the
 	// indexing endpoints return 503 when absent.
 	Indexer *indexer.Service
diff --git a/server/internal/httpapi/search.go b/server/internal/httpapi/search.go
index 62242f8..060354d 100644
--- a/server/internal/httpapi/search.go
+++ b/server/internal/httpapi/search.go
@@ -374,7 +374,7 @@ func groupByFile(items []searchResultItem) []fileGroupResult {
 // applyPostLangFilter=true).
 func fetchVectorResults(
 	ctx context.Context,
-	store *vectorstore.Store,
+	store vectorstore.Interface,
 	projectPath string,
 	qEmb []float32,
 	n int,
diff --git a/server/internal/httpapi/server.go b/server/internal/httpapi/server.go
index 6aba3e0..b773579 100644
--- a/server/internal/httpapi/server.go
+++ b/server/internal/httpapi/server.go
@@ -253,7 +253,7 @@ func (s *Server) enrichProjectStorage(out *openapi.Project, p *projects.Project)
 	if cfg == nil {
 		return
 	}
-	sqlitePath := cfg.DynamicSQLitePath()
+	sqlitePath := cfg.SQLitePath
 	if sqlitePath != "" {
 		out.SqlitePath = ptrString(sqlitePath)
 		if info, err := os.Stat(sqlitePath); err == nil {
@@ -261,9 +261,11 @@ func (s *Server) enrichProjectStorage(out *openapi.Project, p *projects.Project)
 			out.SqliteSizeBytes = &sz
 		}
 	}
-	if cfg.ChromaPersistDir != "" {
+	// Chroma dir is namespaced by the ACTIVE provider's identity slug, so
+	// the displayed path tracks whatever provider is live now.
+	if slug := es.StorageSlug(); cfg.ChromaPersistDir != "" && slug != "" {
 		col := vectorstore.CollectionName(p.HostPath)
-		dir := filepath.Join(cfg.DynamicChromaPersistDir(), col)
+		dir := filepath.Join(cfg.ChromaDirForSlug(slug), col)
 		out.ChromaPath = ptrString(dir)
 		if sz, ok := dirSizeBytes(dir); ok {
 			out.ChromaSizeBytes = &sz
diff --git a/server/internal/indexer/indexer.go b/server/internal/indexer/indexer.go
index 93eefe5..c08afb1 100644
--- a/server/internal/indexer/indexer.go
+++ b/server/internal/indexer/indexer.go
@@ -90,7 +90,7 @@ type TokenAwareEmbedder interface {
 // Service owns sessions and wires dependencies for the three-phase protocol.
 type Service struct {
 	db     *sql.DB
-	vs     *vectorstore.Store
+	vs     vectorstore.Interface
 	emb    Embedder
 	logger *slog.Logger
 
@@ -127,7 +127,7 @@ type Service struct {
 
 // New constructs a Service. All deps are required except logger (falls back to
 // slog.Default).
-func New(db *sql.DB, vs *vectorstore.Store, emb Embedder, logger *slog.Logger) *Service {
+func New(db *sql.DB, vs vectorstore.Interface, emb Embedder, logger *slog.Logger) *Service {
 	if logger == nil {
 		logger = slog.Default()
 	}
diff --git a/server/internal/repojobs/repojobs.go b/server/internal/repojobs/repojobs.go
index 3bbbda9..a957fc8 100644
--- a/server/internal/repojobs/repojobs.go
+++ b/server/internal/repojobs/repojobs.go
@@ -114,7 +114,7 @@ type Deps struct {
 	GitRepos     *gitrepos.Service
 	GithubTokens *githubtokens.Service
 	Indexer      *indexer.Service
-	VectorStore  *vectorstore.Store
+	VectorStore  vectorstore.Interface
 	DataDir      string // root for cloned repos: <DataDir>/repos/<path_hash>/
 	Logger       *slog.Logger
 	// DefaultPollIntervalSeconds / MinPollIntervalSeconds resolve the poll
diff --git a/server/internal/storage/chromamigrate.go b/server/internal/storage/chromamigrate.go
new file mode 100644
index 0000000..6d99185
--- /dev/null
+++ b/server/internal/storage/chromamigrate.go
@@ -0,0 +1,111 @@
+package storage
+
+import (
+	"fmt"
+	"log/slog"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"github.com/dvcdsys/code-index/server/internal/embeddings/provider"
+)
+
+// knownProviderPrefixes are the StorageSlug prefixes a vector-store dir
+// gets under the unified scheme (provider.StorageSlug of an ID always
+// starts with "<kind>_"). A legacy dir lacking any of these was produced
+// by the old model-only naming, which only ever ran the ollama sidecar.
+var knownProviderPrefixes = []string{"ollama_", "openai_", "voyage_"}
+
+// PrefixLegacyChromaDirs renames legacy, un-prefixed vector-store
+// directories to the unified "ollama_"-prefixed form so existing ollama
+// vectors survive the switch to provider-identity namespacing WITHOUT a
+// reindex. Heuristic: a dir named "<base>_<X>" where X carries no known
+// provider prefix was written by the pre-unification ollama-only build,
+// so its true identity is "ollama:<model>" → "<base>_ollama_<slug(X)>".
+//
+// The legacy suffix X was produced by the old Config.ModelSafeName (which
+// only mapped '/' and '-' to '_'), but the running server resolves the
+// dir via provider.StorageSlug, which maps EVERY non-[a-z0-9_] rune. For
+// model names containing characters the two normalizers treat differently
+// (e.g. a '.' or ':' in "nomic-embed-text:v1.5"), a naive "<base>_ollama_"+X
+// would NOT equal the dir the server opens, silently orphaning the vectors.
+// We therefore re-run the suffix through provider.StorageSlug. This is
+// exact for ALL models because StorageSlug(ModelSafeName(m)) ==
+// StorageSlug(m): ModelSafeName only collapses a subset ('/','-') of the
+// runes StorageSlug collapses, and '_' is preserved by StorageSlug, so the
+// two compose to the same canonical form.
+//
+// chromaBase is cfg.ChromaPersistDir (e.g. ".../chroma"); legacy dirs sit
+// next to it as ".../chroma_<slug>". The scan covers ALL such dirs, not
+// just the active provider's: the active provider may be voyage (no legacy
+// dir to migrate) while the operator's ollama vectors wait under the
+// un-prefixed name for a future switch back.
+//
+// Idempotent: already-prefixed dirs are skipped; a rename whose target
+// already exists is skipped with a warning (never clobbers).
+//
+// LEGACY-MIGRATION (remove next release): one-time prefixing shim for
+// pre-unification ollama-only chroma dirs. Once every deployment has
+// booted on the unified layout, delete this function and its calls in
+// cmd/cix-server/main.go and embeddings.Service (the AttachVectorStore
+// migrate hook).
+func PrefixLegacyChromaDirs(chromaBase string, logger *slog.Logger) error {
+	if logger == nil {
+		logger = slog.Default()
+	}
+	if chromaBase == "" {
+		return nil
+	}
+	parent := filepath.Dir(chromaBase)
+	prefix := filepath.Base(chromaBase) + "_" // e.g. "chroma_"
+
+	entries, err := os.ReadDir(parent)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return nil // nothing indexed yet
+		}
+		return fmt.Errorf("read chroma parent %s: %w", parent, err)
+	}
+
+	for _, e := range entries {
+		if !e.IsDir() {
+			continue // skip files like chroma_*.python-backup.* tarballs
+		}
+		name := e.Name()
+		if !strings.HasPrefix(name, prefix) {
+			continue
+		}
+		suffix := strings.TrimPrefix(name, prefix)
+		if suffix == "" {
+			continue // exactly the base dir name + trailing "_": ignore
+		}
+		if hasKnownPrefix(suffix) {
+			continue // already migrated / native unified dir
+		}
+		src := filepath.Join(parent, name)
+		// Canonicalise the suffix through StorageSlug so the renamed dir
+		// matches the path the running server resolves for this identity
+		// (handles model names with '.'/':' etc.; see func doc).
+		dst := filepath.Join(parent, prefix+"ollama_"+provider.StorageSlug(suffix))
+		if fileExists(dst) {
+			logger.Warn("storage: skipping chroma legacy-prefix rename, target already exists",
+				"src", src, "dst", dst)
+			continue
+		}
+		logger.Info("storage: prefixing legacy ollama chroma dir to unified naming",
+			"src", src, "dst", dst)
+		if err := os.Rename(src, dst); err != nil {
+			return fmt.Errorf("rename %s -> %s: %w", src, dst, err)
+		}
+	}
+	return nil
+}
+
+func hasKnownPrefix(suffix string) bool {
+	for _, p := range knownProviderPrefixes {
+		if strings.HasPrefix(suffix, p) {
+			return true
+		}
+	}
+	return false
+}
diff --git a/server/internal/storage/chromamigrate_test.go b/server/internal/storage/chromamigrate_test.go
new file mode 100644
index 0000000..f8929a2
--- /dev/null
+++ b/server/internal/storage/chromamigrate_test.go
@@ -0,0 +1,122 @@
+package storage
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func mkdir(t *testing.T, path string) {
+	t.Helper()
+	if err := os.MkdirAll(path, 0o755); err != nil {
+		t.Fatalf("mkdir %s: %v", path, err)
+	}
+}
+
+func TestPrefixLegacyChromaDirs(t *testing.T) {
+	dir := t.TempDir()
+	base := filepath.Join(dir, "chroma") // chromaBase; dirs are chroma_<slug>
+
+	// Legacy un-prefixed ollama dir (should be renamed).
+	mkdir(t, base+"_awhiteside_coderankembed_q8_0_gguf")
+	// Already-prefixed dirs (must be left untouched).
+	mkdir(t, base+"_voyage_voyage_code_3_2048_float")
+	mkdir(t, base+"_ollama_already")
+	// A file that matches the prefix but is not a dir (ignored).
+	if err := os.WriteFile(base+"_backup.tar", []byte("x"), 0o644); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := PrefixLegacyChromaDirs(base, quietLogger()); err != nil {
+		t.Fatalf("migrate: %v", err)
+	}
+
+	// Legacy renamed to ollama-prefixed.
+	if dirExists(base + "_awhiteside_coderankembed_q8_0_gguf") {
+		t.Errorf("legacy un-prefixed dir should have been renamed away")
+	}
+	if !dirExists(base + "_ollama_awhiteside_coderankembed_q8_0_gguf") {
+		t.Errorf("expected renamed dir chroma_ollama_awhiteside_coderankembed_q8_0_gguf")
+	}
+	// Already-prefixed untouched.
+	if !dirExists(base + "_voyage_voyage_code_3_2048_float") {
+		t.Errorf("voyage dir must be left untouched")
+	}
+	if !dirExists(base + "_ollama_already") {
+		t.Errorf("ollama-prefixed dir must be left untouched")
+	}
+	// File untouched.
+	if !fileExists(base + "_backup.tar") {
+		t.Errorf("non-dir entry must be ignored, not moved")
+	}
+}
+
+// TestPrefixLegacyChromaDirs_StrictNormalizesSpecialChars guards Finding 1:
+// the legacy suffix was written by ModelSafeName (only '/'->'_' and
+// '-'->'_'), so a model like "nomic-embed-text:v1.5" left a dir whose name
+// still held a '.'/':'. The running server resolves the dir via
+// provider.StorageSlug (every non-[a-z0-9_] -> '_'), so the migration must
+// canonicalise the suffix the same way or the vectors are silently orphaned.
+func TestPrefixLegacyChromaDirs_StrictNormalizesSpecialChars(t *testing.T) {
+	dir := t.TempDir()
+	base := filepath.Join(dir, "chroma")
+	// Legacy dir as ModelSafeName would have produced for a v1.5 model:
+	// the '.' survives in the on-disk name.
+	mkdir(t, base+"_nomic_embed_text_v1.5")
+
+	if err := PrefixLegacyChromaDirs(base, quietLogger()); err != nil {
+		t.Fatalf("migrate: %v", err)
+	}
+	// Must land on the StorageSlug form the server actually opens ('.'->'_').
+	if !dirExists(base + "_ollama_nomic_embed_text_v1_5") {
+		t.Errorf("expected strict-normalized dir chroma_ollama_nomic_embed_text_v1_5")
+	}
+	if dirExists(base + "_nomic_embed_text_v1.5") {
+		t.Errorf("legacy dir should have been renamed away")
+	}
+}
+
+func TestPrefixLegacyChromaDirs_NoClobber(t *testing.T) {
+	dir := t.TempDir()
+	base := filepath.Join(dir, "chroma")
+	// Both the legacy source and its would-be target already exist.
+	mkdir(t, base+"_model_x")
+	mkdir(t, base+"_ollama_model_x")
+	if err := os.WriteFile(filepath.Join(base+"_model_x", "marker"), []byte("src"), 0o644); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := PrefixLegacyChromaDirs(base, quietLogger()); err != nil {
+		t.Fatalf("migrate: %v", err)
+	}
+	// Source left in place (not clobbered into existing target).
+	if !dirExists(base + "_model_x") {
+		t.Errorf("source dir must be preserved when target already exists")
+	}
+	if _, err := os.Stat(filepath.Join(base+"_model_x", "marker")); err != nil {
+		t.Errorf("source contents must be intact: %v", err)
+	}
+}
+
+func TestPrefixLegacyChromaDirs_Idempotent(t *testing.T) {
+	dir := t.TempDir()
+	base := filepath.Join(dir, "chroma")
+	mkdir(t, base+"_legacy_model")
+
+	for i := 0; i < 2; i++ {
+		if err := PrefixLegacyChromaDirs(base, quietLogger()); err != nil {
+			t.Fatalf("run %d: %v", i, err)
+		}
+	}
+	if !dirExists(base + "_ollama_legacy_model") {
+		t.Errorf("expected ollama-prefixed dir after idempotent runs")
+	}
+	if dirExists(base + "_legacy_model") {
+		t.Errorf("legacy dir should be gone after first run")
+	}
+}
+
+func dirExists(path string) bool {
+	info, err := os.Stat(path)
+	return err == nil && info.IsDir()
+}
diff --git a/server/internal/storage/dbmigrate.go b/server/internal/storage/dbmigrate.go
new file mode 100644
index 0000000..932ac19
--- /dev/null
+++ b/server/internal/storage/dbmigrate.go
@@ -0,0 +1,145 @@
+// Package storage holds boot-time, OS-level migrations of on-disk storage
+// artefacts (the SQLite system DB file and the chromem-go vector-store
+// directories). These run BEFORE the DB / vector store are opened, so
+// they operate on plain files rather than live handles — which is why
+// they live here rather than in internal/db or internal/vectorstore.
+//
+// Background. Earlier builds (a Python-era artefact ported 1:1 to Go)
+// namespaced BOTH the SQLite DB and the chroma dir by the embedding model
+// name. That was only safe while the model was a fixed env var; runtime
+// model/provider switching turned it into a bug (vectors of different
+// dimensions colliding in one collection) and a footgun (a model change
+// silently spawning a parallel DB with empty accounts). The unified
+// design keeps ONE model-independent system DB and namespaces ONLY the
+// vector store by the active provider identity. These migrations move
+// existing deployments onto that layout without a reindex.
+package storage
+
+import (
+	"database/sql"
+	"fmt"
+	"log/slog"
+	"os"
+	"time"
+
+	"github.com/dvcdsys/code-index/server/internal/db"
+
+	_ "modernc.org/sqlite"
+)
+
+// AdoptLegacyModelDB makes the model-independent system DB at target the
+// canonical store, adopting a legacy per-model DB file when present.
+//
+// target is the literal cfg.SQLitePath (no model suffix). legacy is the
+// old per-model filename (cfg.LegacyDynamicSQLitePath()). The function is
+// idempotent and safe to run on every boot.
+//
+// Cases:
+//   - legacy == target, or legacy missing → nothing to do.
+//   - target missing → adopt legacy (checkpoint its WAL, rename it in).
+//   - target present AND a real unified DB (has schema_migrations AND
+//     users) → leave it; if a legacy file also lingers, warn that it is
+//     now stale.
+//   - target present but a pre-auth FOSSIL (lacks those tables) → move the
+//     fossil aside to <target>.pre-unify-<timestamp> (with its WAL/SHM
+//     sidecars) and adopt legacy in its place.
+//
+// On adoption the legacy WAL is drained via PRAGMA wal_checkpoint(TRUNCATE)
+// and only the main .db file is renamed; the regenerable -wal/-shm
+// sidecars are removed rather than carried across (they are
+// host/inode-sensitive).
+//
+// LEGACY-MIGRATION (remove next release): one-time adoption shim. Once
+// every deployment has booted on the unified layout, delete this function
+// and its call in cmd/cix-server/main.go (and Config.ModelSafeName /
+// Config.LegacyDynamicSQLitePath, which exist only to feed it).
+func AdoptLegacyModelDB(target, legacy string, logger *slog.Logger) error {
+	if logger == nil {
+		logger = slog.Default()
+	}
+	if target == "" || legacy == "" || legacy == target {
+		return nil
+	}
+	if !fileExists(legacy) {
+		return nil // fresh install or already adopted
+	}
+
+	if fileExists(target) {
+		real, err := db.HasTables(target, "schema_migrations", "users")
+		if err != nil {
+			return fmt.Errorf("inspect target db %s: %w", target, err)
+		}
+		if real {
+			logger.Warn("storage: legacy per-model DB still present but target is already the unified system DB; leaving legacy untouched (safe to delete manually)",
+				"target", target, "legacy", legacy)
+			return nil
+		}
+		// Fossil occupying the target path — move it aside.
+		aside := fmt.Sprintf("%s.pre-unify-%s", target, time.Now().UTC().Format("20060102-150405"))
+		logger.Warn("storage: moving pre-unification fossil DB aside to free the system DB path",
+			"fossil", target, "moved_to", aside)
+		if err := renameWithSidecars(target, aside); err != nil {
+			return fmt.Errorf("move fossil aside: %w", err)
+		}
+	}
+
+	logger.Info("storage: adopting legacy per-model DB as the model-independent system DB",
+		"legacy", legacy, "target", target)
+	if err := checkpointWAL(legacy); err != nil {
+		return fmt.Errorf("checkpoint legacy db %s: %w", legacy, err)
+	}
+	if err := os.Rename(legacy, target); err != nil {
+		return fmt.Errorf("rename %s -> %s: %w", legacy, target, err)
+	}
+	// The legacy WAL was truncated by the checkpoint; remove any leftover
+	// regenerable sidecars so they cannot shadow the moved DB.
+	removeIfExists(legacy + "-wal")
+	removeIfExists(legacy + "-shm")
+	return nil
+}
+
+// checkpointWAL opens path with a single connection, drains its WAL into
+// the main database file via wal_checkpoint(TRUNCATE), and closes. After
+// this the main .db file is self-contained and safe to rename.
+func checkpointWAL(path string) error {
+	// WAL + busy_timeout via the DSN; single connection so the checkpoint
+	// is not racing a sibling connection.
+	dsn := "file:" + path + "?_pragma=journal_mode(WAL)&_pragma=busy_timeout(5000)"
+	sdb, err := sql.Open(db.DriverName, dsn)
+	if err != nil {
+		return fmt.Errorf("open: %w", err)
+	}
+	defer sdb.Close()
+	sdb.SetMaxOpenConns(1)
+	if _, err := sdb.Exec(`PRAGMA wal_checkpoint(TRUNCATE)`); err != nil {
+		return fmt.Errorf("wal_checkpoint: %w", err)
+	}
+	return nil
+}
+
+// renameWithSidecars renames a SQLite DB file and its -wal/-shm sidecars
+// (best effort for the sidecars — they may not exist).
+func renameWithSidecars(from, to string) error {
+	if err := os.Rename(from, to); err != nil {
+		return err
+	}
+	for _, sfx := range []string{"-wal", "-shm"} {
+		if fileExists(from + sfx) {
+			if err := os.Rename(from+sfx, to+sfx); err != nil {
+				return fmt.Errorf("rename sidecar %s: %w", from+sfx, err)
+			}
+		}
+	}
+	return nil
+}
+
+func fileExists(path string) bool {
+	_, err := os.Stat(path)
+	return err == nil
+}
+
+func removeIfExists(path string) {
+	if fileExists(path) {
+		_ = os.Remove(path)
+	}
+}
diff --git a/server/internal/storage/dbmigrate_test.go b/server/internal/storage/dbmigrate_test.go
new file mode 100644
index 0000000..7ff731e
--- /dev/null
+++ b/server/internal/storage/dbmigrate_test.go
@@ -0,0 +1,180 @@
+package storage
+
+import (
+	"database/sql"
+	"io"
+	"log/slog"
+	"path/filepath"
+	"testing"
+
+	"github.com/dvcdsys/code-index/server/internal/db"
+
+	_ "modernc.org/sqlite"
+)
+
+func quietLogger() *slog.Logger {
+	return slog.New(slog.NewTextHandler(io.Discard, nil))
+}
+
+// makeUnifiedDB creates a real system DB at path via db.OpenWith (so it
+// has schema_migrations + users), then writes a sentinel row so we can
+// prove the exact file survived an adoption.
+func makeUnifiedDB(t *testing.T, path, sentinel string) {
+	t.Helper()
+	d, err := db.OpenWith(db.OpenOptions{Path: path})
+	if err != nil {
+		t.Fatalf("OpenWith(%s): %v", path, err)
+	}
+	if _, err := d.Exec(`CREATE TABLE IF NOT EXISTS sentinel (v TEXT)`); err != nil {
+		t.Fatalf("create sentinel: %v", err)
+	}
+	if _, err := d.Exec(`INSERT INTO sentinel (v) VALUES (?)`, sentinel); err != nil {
+		t.Fatalf("insert sentinel: %v", err)
+	}
+	if err := d.Close(); err != nil {
+		t.Fatalf("close: %v", err)
+	}
+}
+
+// makeFossilDB creates a pre-auth fossil: a bare DB with only a projects
+// table, no users / schema_migrations.
+func makeFossilDB(t *testing.T, path string) {
+	t.Helper()
+	sdb, err := sql.Open("sqlite", "file:"+path+"?_pragma=journal_mode(WAL)")
+	if err != nil {
+		t.Fatalf("open fossil: %v", err)
+	}
+	if _, err := sdb.Exec(`CREATE TABLE projects (host_path TEXT)`); err != nil {
+		t.Fatalf("create fossil projects: %v", err)
+	}
+	if err := sdb.Close(); err != nil {
+		t.Fatalf("close fossil: %v", err)
+	}
+}
+
+func readSentinel(t *testing.T, path string) string {
+	t.Helper()
+	sdb, err := sql.Open("sqlite", "file:"+path+"?_pragma=journal_mode(WAL)")
+	if err != nil {
+		t.Fatalf("open %s: %v", path, err)
+	}
+	defer sdb.Close()
+	var v string
+	if err := sdb.QueryRow(`SELECT v FROM sentinel LIMIT 1`).Scan(&v); err != nil {
+		t.Fatalf("read sentinel from %s: %v", path, err)
+	}
+	return v
+}
+
+func TestAdoptLegacyModelDB_AdoptIntoAbsentTarget(t *testing.T) {
+	dir := t.TempDir()
+	target := filepath.Join(dir, "projects.db")
+	legacy := filepath.Join(dir, "projects_model.db")
+	makeUnifiedDB(t, legacy, "hello")
+
+	if err := AdoptLegacyModelDB(target, legacy, quietLogger()); err != nil {
+		t.Fatalf("adopt: %v", err)
+	}
+	if fileExists(legacy) {
+		t.Errorf("legacy should have been renamed away")
+	}
+	if !fileExists(target) {
+		t.Fatalf("target should exist after adoption")
+	}
+	if got := readSentinel(t, target); got != "hello" {
+		t.Errorf("sentinel = %q, want hello (wrong file adopted?)", got)
+	}
+}
+
+func TestAdoptLegacyModelDB_NoopWhenTargetIsUnified(t *testing.T) {
+	dir := t.TempDir()
+	target := filepath.Join(dir, "projects.db")
+	legacy := filepath.Join(dir, "projects_model.db")
+	makeUnifiedDB(t, target, "target-data")
+	makeUnifiedDB(t, legacy, "legacy-data")
+
+	if err := AdoptLegacyModelDB(target, legacy, quietLogger()); err != nil {
+		t.Fatalf("adopt: %v", err)
+	}
+	// Target untouched, legacy left in place (stale).
+	if got := readSentinel(t, target); got != "target-data" {
+		t.Errorf("target sentinel = %q, want target-data (must not be overwritten)", got)
+	}
+	if !fileExists(legacy) {
+		t.Errorf("legacy should be left in place when target is already unified")
+	}
+}
+
+func TestAdoptLegacyModelDB_FossilMovedAside(t *testing.T) {
+	dir := t.TempDir()
+	target := filepath.Join(dir, "projects.db")
+	legacy := filepath.Join(dir, "projects_model.db")
+	makeFossilDB(t, target)
+	makeUnifiedDB(t, legacy, "real")
+
+	if err := AdoptLegacyModelDB(target, legacy, quietLogger()); err != nil {
+		t.Fatalf("adopt: %v", err)
+	}
+	if got := readSentinel(t, target); got != "real" {
+		t.Errorf("target sentinel = %q, want real (legacy not adopted over fossil)", got)
+	}
+	// Fossil moved aside to a *.pre-unify-* file.
+	matches, _ := filepath.Glob(target + ".pre-unify-*")
+	if len(matches) == 0 {
+		t.Errorf("expected fossil moved aside to %s.pre-unify-*", target)
+	}
+	if fileExists(legacy) {
+		t.Errorf("legacy should have been adopted (renamed away)")
+	}
+}
+
+func TestAdoptLegacyModelDB_Idempotent(t *testing.T) {
+	dir := t.TempDir()
+	target := filepath.Join(dir, "projects.db")
+	legacy := filepath.Join(dir, "projects_model.db")
+	makeUnifiedDB(t, legacy, "once")
+
+	for i := 0; i < 2; i++ {
+		if err := AdoptLegacyModelDB(target, legacy, quietLogger()); err != nil {
+			t.Fatalf("adopt run %d: %v", i, err)
+		}
+	}
+	if got := readSentinel(t, target); got != "once" {
+		t.Errorf("sentinel = %q, want once", got)
+	}
+}
+
+func TestAdoptLegacyModelDB_LegacyEqualsTarget(t *testing.T) {
+	dir := t.TempDir()
+	target := filepath.Join(dir, "projects.db")
+	makeUnifiedDB(t, target, "same")
+	if err := AdoptLegacyModelDB(target, target, quietLogger()); err != nil {
+		t.Fatalf("adopt: %v", err)
+	}
+	if got := readSentinel(t, target); got != "same" {
+		t.Errorf("sentinel = %q, want same", got)
+	}
+}
+
+// TestAdoptLegacyModelDB_WALDrainedAndSidecarsGone writes rows under WAL,
+// adopts, and asserts the data survives the checkpoint+rename and the
+// legacy -wal/-shm sidecars are removed.
+func TestAdoptLegacyModelDB_WALDrainedAndSidecarsGone(t *testing.T) {
+	dir := t.TempDir()
+	target := filepath.Join(dir, "projects.db")
+	legacy := filepath.Join(dir, "projects_model.db")
+	makeUnifiedDB(t, legacy, "wal-data")
+
+	if err := AdoptLegacyModelDB(target, legacy, quietLogger()); err != nil {
+		t.Fatalf("adopt: %v", err)
+	}
+	if got := readSentinel(t, target); got != "wal-data" {
+		t.Errorf("sentinel = %q, want wal-data", got)
+	}
+	if fileExists(legacy + "-wal") {
+		t.Errorf("legacy -wal sidecar should be gone")
+	}
+	if fileExists(legacy + "-shm") {
+		t.Errorf("legacy -shm sidecar should be gone")
+	}
+}
diff --git a/server/internal/vectorstore/holder.go b/server/internal/vectorstore/holder.go
new file mode 100644
index 0000000..2ab1a0d
--- /dev/null
+++ b/server/internal/vectorstore/holder.go
@@ -0,0 +1,122 @@
+package vectorstore
+
+import (
+	"context"
+	"errors"
+	"sync"
+)
+
+// Interface is the vector-store surface consumed by the indexer, the
+// search handlers, and repojobs. Both *Store (direct) and *Holder
+// (swappable) satisfy it, so a caller can hold either: tests pass a raw
+// *Store, while production passes a *Holder so the active store can be
+// reopened under a new directory on a provider switch without rewiring
+// every holder. The method set mirrors *Store exactly.
+type Interface interface {
+	UpsertChunks(ctx context.Context, projectPath string, chunks []Chunk, embeddings [][]float32) error
+	Search(ctx context.Context, projectPath string, queryEmbedding []float32, limit int, where map[string]string) ([]SearchResult, error)
+	DeleteByFile(ctx context.Context, projectPath, filePath string) error
+	DeleteCollection(projectPath string) error
+	Count(projectPath string) int
+}
+
+// Compile-time assertions that both implementations satisfy Interface.
+var (
+	_ Interface = (*Store)(nil)
+	_ Interface = (*Holder)(nil)
+)
+
+// Holder is a concurrency-safe, swappable wrapper around a *Store. It
+// exists so the active vector store can be reopened under a new on-disk
+// directory at runtime — e.g. when an admin switches the embedding
+// provider (PUT /admin/embedding-providers/active), the new provider's
+// vectors live in a different, dimension-isolated namespace, so the
+// Service reopens a *Store at the new path and atomically Swap()s it in.
+//
+// All read/write proxies take the RLock; Swap takes the Lock. A search
+// in flight during a Swap therefore either completes against the old
+// store or starts against the new one — never observes a torn pointer.
+// Every current holder of a raw *Store (indexer, httpapi Deps, repojobs)
+// holds a *Holder instead and calls the identical method set.
+//
+// Discarding the old *Store after Swap needs no Close: chromem-go persists
+// each write synchronously to disk and keeps no background goroutines or
+// open file handles, so the replaced store is simply reclaimed by GC.
+type Holder struct {
+	mu    sync.RWMutex
+	store *Store
+}
+
+// errNotInitialised is returned by write proxies when the Holder has no
+// store (only possible before the first Swap / NewHolder(nil)).
+var errNotInitialised = errors.New("vectorstore: holder has no active store")
+
+// NewHolder wraps an initial store (which may be nil; callers must Swap a
+// real store in before writes succeed).
+func NewHolder(s *Store) *Holder { return &Holder{store: s} }
+
+// Swap installs newStore as the active store and returns the previous one
+// (nil on first install). The caller may discard the returned store; no
+// Close is required (see type doc).
+func (h *Holder) Swap(newStore *Store) (old *Store) {
+	h.mu.Lock()
+	old = h.store
+	h.store = newStore
+	h.mu.Unlock()
+	return old
+}
+
+// current returns the active store under the read lock.
+func (h *Holder) current() *Store {
+	h.mu.RLock()
+	s := h.store
+	h.mu.RUnlock()
+	return s
+}
+
+// UpsertChunks proxies to the active store.
+func (h *Holder) UpsertChunks(ctx context.Context, projectPath string, chunks []Chunk, embeddings [][]float32) error {
+	s := h.current()
+	if s == nil {
+		return errNotInitialised
+	}
+	return s.UpsertChunks(ctx, projectPath, chunks, embeddings)
+}
+
+// Search proxies to the active store. A nil store yields (nil, nil),
+// matching the empty-collection contract so callers degrade to no
+// results rather than erroring.
+func (h *Holder) Search(ctx context.Context, projectPath string, queryEmbedding []float32, limit int, where map[string]string) ([]SearchResult, error) {
+	s := h.current()
+	if s == nil {
+		return nil, nil
+	}
+	return s.Search(ctx, projectPath, queryEmbedding, limit, where)
+}
+
+// DeleteByFile proxies to the active store.
+func (h *Holder) DeleteByFile(ctx context.Context, projectPath, filePath string) error {
+	s := h.current()
+	if s == nil {
+		return errNotInitialised
+	}
+	return s.DeleteByFile(ctx, projectPath, filePath)
+}
+
+// DeleteCollection proxies to the active store.
+func (h *Holder) DeleteCollection(projectPath string) error {
+	s := h.current()
+	if s == nil {
+		return errNotInitialised
+	}
+	return s.DeleteCollection(projectPath)
+}
+
+// Count proxies to the active store; a nil store reports 0.
+func (h *Holder) Count(projectPath string) int {
+	s := h.current()
+	if s == nil {
+		return 0
+	}
+	return s.Count(projectPath)
+}
diff --git a/server/internal/vectorstore/holder_test.go b/server/internal/vectorstore/holder_test.go
new file mode 100644
index 0000000..27efbbd
--- /dev/null
+++ b/server/internal/vectorstore/holder_test.go
@@ -0,0 +1,113 @@
+package vectorstore
+
+import (
+	"context"
+	"sync"
+	"testing"
+)
+
+func storeWithOneChunk(t *testing.T, project string) *Store {
+	t.Helper()
+	s, err := Open(t.TempDir())
+	if err != nil {
+		t.Fatalf("open store: %v", err)
+	}
+	chunks := []Chunk{{
+		Content: "hello", FilePath: "a.go", StartLine: 1, EndLine: 2, Language: "go",
+	}}
+	embs := [][]float32{{1, 0, 0, 0}}
+	if err := s.UpsertChunks(context.Background(), project, chunks, embs); err != nil {
+		t.Fatalf("upsert: %v", err)
+	}
+	return s
+}
+
+func emptyStore(t *testing.T) *Store {
+	t.Helper()
+	s, err := Open(t.TempDir())
+	if err != nil {
+		t.Fatalf("open store: %v", err)
+	}
+	return s
+}
+
+func TestHolderProxyAndSwap(t *testing.T) {
+	const project = "/proj"
+	a := storeWithOneChunk(t, project)
+	b := emptyStore(t)
+
+	h := NewHolder(a)
+	if got := h.Count(project); got != 1 {
+		t.Fatalf("Count via holder = %d, want 1", got)
+	}
+
+	old := h.Swap(b)
+	if old != a {
+		t.Errorf("Swap should return the previous store")
+	}
+	if got := h.Count(project); got != 0 {
+		t.Errorf("Count after swap to empty store = %d, want 0", got)
+	}
+}
+
+func TestHolderNilGuards(t *testing.T) {
+	h := NewHolder(nil)
+	if got := h.Count("/p"); got != 0 {
+		t.Errorf("nil-store Count = %d, want 0", got)
+	}
+	res, err := h.Search(context.Background(), "/p", []float32{1, 0}, 5, nil)
+	if err != nil || res != nil {
+		t.Errorf("nil-store Search = (%v, %v), want (nil, nil)", res, err)
+	}
+	if err := h.UpsertChunks(context.Background(), "/p", nil, nil); err == nil {
+		t.Errorf("nil-store UpsertChunks should error")
+	}
+	if err := h.DeleteByFile(context.Background(), "/p", "f"); err == nil {
+		t.Errorf("nil-store DeleteByFile should error")
+	}
+	if err := h.DeleteCollection("/p"); err == nil {
+		t.Errorf("nil-store DeleteCollection should error")
+	}
+}
+
+// TestHolderConcurrentSwap runs under -race: many goroutines Search/Count
+// while another goroutine repeatedly Swaps between two valid stores. The
+// RWMutex must guarantee no torn pointer / data race.
+func TestHolderConcurrentSwap(t *testing.T) {
+	const project = "/proj"
+	a := storeWithOneChunk(t, project)
+	b := storeWithOneChunk(t, project)
+	h := NewHolder(a)
+
+	var wg sync.WaitGroup
+	stop := make(chan struct{})
+
+	// Readers.
+	for i := 0; i < 8; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			for {
+				select {
+				case <-stop:
+					return
+				default:
+					_ = h.Count(project)
+					_, _ = h.Search(context.Background(), project, []float32{1, 0, 0, 0}, 1, nil)
+				}
+			}
+		}()
+	}
+	// Swapper.
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		stores := []*Store{a, b}
+		for i := 0; i < 2000; i++ {
+			h.Swap(stores[i%2])
+		}
+		close(stop)
+	}()
+
+	wg.Wait()
+}
diff --git a/server/internal/vectorstore/store.go b/server/internal/vectorstore/store.go
index 94630e6..dbc5584 100644
--- a/server/internal/vectorstore/store.go
+++ b/server/internal/vectorstore/store.go
@@ -65,7 +65,7 @@ func collectionName(projectPath string) string {
 
 // CollectionName is the exported alias for the per-project chromem-go
 // collection identifier. The dashboard's project-detail card uses it to
-// resolve the on-disk directory under cfg.DynamicChromaPersistDir().
+// resolve the on-disk directory under cfg.ChromaDirForSlug(activeSlug).
 func CollectionName(projectPath string) string { return collectionName(projectPath) }
 
 // docID format: "{md5hex(filePath)[:12]}:{startLine}-{endLine}:{idx}"

From 8de73326757bbe37ea88423cf8bfe06044d71371 Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Thu, 28 May 2026 21:53:18 +0100
Subject: [PATCH 23/34] fix(voyage): validate returned dimension, handle int8
 base64, normalize base URL

Three robustness fixes surfaced by pre-PR review:

- H2: validate each returned vector's length against the configured
  output_dimension (when >0) and reject a mismatch loudly. A model that
  silently ignores output_dimension (no Matryoshka support / typo'd
  model) would otherwise write wrong-width vectors deep into the store
  with no attribution. Also guard the oversize-input averaging path
  against windows of differing width, which would panic with
  index-out-of-range; it now returns a clean error.
- M4: accept int8 embeddings returned as a base64-packed byte string
  (some OpenAI-compatible proxies) in addition to the default JSON int
  array, so a proxy swap doesn't fail the whole batch.
- L4: trim a trailing slash from base_url in New() so url building never
  produces a double slash that stricter proxies can 404 on.

Tests: wrong-dimension rejection, inconsistent-window-dim error, int8
base64 dequant. Adjusted an existing input_type test whose 2-dim mock
response no longer matched its 1024 configured dimension.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../embeddings/provider/voyage/voyage.go      | 53 +++++++++-
 .../embeddings/provider/voyage/voyage_test.go | 98 ++++++++++++++++++-
 2 files changed, 143 insertions(+), 8 deletions(-)

diff --git a/server/internal/embeddings/provider/voyage/voyage.go b/server/internal/embeddings/provider/voyage/voyage.go
index f661116..8cfdd8e 100644
--- a/server/internal/embeddings/provider/voyage/voyage.go
+++ b/server/internal/embeddings/provider/voyage/voyage.go
@@ -21,6 +21,7 @@ package voyage
 import (
 	"bytes"
 	"context"
+	"encoding/base64"
 	"encoding/json"
 	"errors"
 	"fmt"
@@ -29,6 +30,7 @@ import (
 	"net/http"
 	"regexp"
 	"strconv"
+	"strings"
 	"time"
 	"unicode/utf8"
 
@@ -40,8 +42,10 @@ import (
 // voyageBatchTooLargeRegex matches Voyage's per-batch token-limit
 // 400 response so the caller can react adaptively. Voyage's message
 // is fairly stable:
-//   "The max allowed tokens per submitted batch is 120000.
-//    Your batch has 187609 tokens after truncation."
+//
+//	"The max allowed tokens per submitted batch is 120000.
+//	 Your batch has 187609 tokens after truncation."
+//
 // We capture both numbers; the actual count drives how aggressively
 // the caller bisects.
 var voyageBatchTooLargeRegex = regexp.MustCompile(
@@ -268,6 +272,10 @@ func New(cfg Config, secrets provider.SecretLookup, logger *slog.Logger) *Provid
 	if cfg.BaseURL == "" {
 		cfg.BaseURL = DefaultBaseURL
 	}
+	// Normalise away a trailing slash so url building (BaseURL +
+	// "/v1/embeddings") never produces a double slash, which stricter
+	// OpenAI-compatible proxies in front of Voyage can 404 on.
+	cfg.BaseURL = strings.TrimRight(cfg.BaseURL, "/")
 	if cfg.OutputDtype == "" {
 		cfg.OutputDtype = DtypeFloat
 	}
@@ -303,7 +311,7 @@ func (p *Provider) ID() string {
 	return "voyage:" + p.cfg.Model + ":" + dimStr + ":" + p.cfg.OutputDtype
 }
 
-func (p *Provider) Dimension() int       { return p.cfg.OutputDimension }
+func (p *Provider) Dimension() int         { return p.cfg.OutputDimension }
 func (p *Provider) SupportsTokenize() bool { return false }
 
 func (p *Provider) Start(ctx context.Context) error {
@@ -456,6 +464,14 @@ func (p *Provider) embedAndAverage(ctx context.Context, texts []string, inputTyp
 		avg := make([]float32, dim)
 		for k := 0; k < sp.length; k++ {
 			v := allVecs[sp.start+k]
+			// All windows of one input must share a width; otherwise the
+			// avg[d] += v[d] below would panic with index-out-of-range.
+			// Surface a clean error instead (only reachable if the API
+			// returns inconsistent dims across a split input).
+			if len(v) != dim {
+				return nil, fmt.Errorf("voyage: inconsistent window dims for input %d: window %d has %d, want %d",
+					i, k, len(v), dim)
+			}
 			for d := range avg {
 				avg[d] += v[d]
 			}
@@ -680,6 +696,16 @@ func (p *Provider) embed(ctx context.Context, texts []string, inputType string)
 		if err != nil {
 			return nil, fmt.Errorf("voyage: decode embedding[%d]: %w", item.Index, err)
 		}
+		// Guard against a model silently ignoring output_dimension (e.g.
+		// a model that doesn't support Matryoshka shrink, or a typo'd
+		// model name): writing the wrong-width vector into the store
+		// corrupts the collection deep in the upsert path with no
+		// attribution back to here. Only enforced when a dimension was
+		// explicitly requested (0 = model's native default, unknown).
+		if want := p.cfg.OutputDimension; want > 0 && len(vec) != want {
+			return nil, fmt.Errorf("voyage: embedding[%d] has %d dims, want %d (model ignored output_dimension?)",
+				item.Index, len(vec), want)
+		}
 		out[item.Index] = vec
 	}
 	for i, v := range out {
@@ -703,6 +729,27 @@ func (p *Provider) embed(ctx context.Context, texts []string, inputType string)
 func dequantize(raw json.RawMessage, dtype string) ([]float32, error) {
 	switch dtype {
 	case DtypeInt8:
+		// Voyage returns int8 either as a JSON array of integers (the
+		// default, which is what cix gets since it never sets
+		// encoding_format) or, in some configurations / behind an
+		// OpenAI-compatible proxy, as a base64-packed byte string.
+		// Handle the string form defensively so a proxy swap doesn't
+		// fail the whole batch with an opaque "int8 decode" error.
+		if len(raw) > 0 && raw[0] == '"' {
+			var b64 string
+			if err := json.Unmarshal(raw, &b64); err != nil {
+				return nil, fmt.Errorf("int8 base64 string decode: %w", err)
+			}
+			bs, err := base64.StdEncoding.DecodeString(b64)
+			if err != nil {
+				return nil, fmt.Errorf("int8 base64 decode: %w", err)
+			}
+			out := make([]float32, len(bs))
+			for i, b := range bs {
+				out[i] = float32(int8(b)) / 127.0
+			}
+			return out, nil
+		}
 		var ints []int8
 		if err := json.Unmarshal(raw, &ints); err != nil {
 			return nil, fmt.Errorf("int8 decode: %w", err)
diff --git a/server/internal/embeddings/provider/voyage/voyage_test.go b/server/internal/embeddings/provider/voyage/voyage_test.go
index 542b1dc..3472146 100644
--- a/server/internal/embeddings/provider/voyage/voyage_test.go
+++ b/server/internal/embeddings/provider/voyage/voyage_test.go
@@ -2,7 +2,9 @@ package voyage
 
 import (
 	"context"
+	"encoding/base64"
 	"encoding/json"
+	"fmt"
 	"io"
 	"net/http"
 	"net/http/httptest"
@@ -50,11 +52,14 @@ func TestEmbedQuerySendsInputTypeQuery(t *testing.T) {
 		"model": "voyage-code-3",
 		"usage": {"total_tokens": 3}
 	}`)
+	// OutputDimension matches the 2-dim stub response so the per-vector
+	// dimension guard (H2) is satisfied; the assertion below still
+	// proves the configured dimension is forwarded in the request.
 	p := New(Config{
 		BaseURL:         srv.URL,
 		APIKeyEnv:       "K",
 		Model:           "voyage-code-3",
-		OutputDimension: 1024,
+		OutputDimension: 2,
 		OutputDtype:     DtypeFloat,
 	}, fixedSecrets("K", "v"), nil)
 
@@ -66,7 +71,7 @@ func TestEmbedQuerySendsInputTypeQuery(t *testing.T) {
 	if req.InputType != "query" {
 		t.Errorf("input_type %q; expected query", req.InputType)
 	}
-	if req.OutputDimension != 1024 {
+	if req.OutputDimension != 2 {
 		t.Errorf("output_dimension %d", req.OutputDimension)
 	}
 }
@@ -441,9 +446,9 @@ func TestEmbedDocuments_SingleInputTooLargeFailsClean(t *testing.T) {
 
 func TestParseBatchTooLarge(t *testing.T) {
 	cases := []struct {
-		msg                string
-		wantCap, wantAct   int
-		wantOK             bool
+		msg              string
+		wantCap, wantAct int
+		wantOK           bool
 	}{
 		{
 			"voyage: status 400: {\"detail\":\"Request failed. The max allowed tokens per submitted batch is 120000. Your batch has 187609 tokens after truncation.\"}",
@@ -564,3 +569,86 @@ func TestUsageDecodesWithoutPromptTokens(t *testing.T) {
 		t.Fatalf("decode: %v", err)
 	}
 }
+
+// TestEmbed_RejectsWrongDimension guards H2: a configured
+// output_dimension that the model silently ignores must be rejected
+// loudly rather than writing a wrong-width vector into the store.
+func TestEmbed_RejectsWrongDimension(t *testing.T) {
+	srv, _ := stubServer(t, http.StatusOK, `{
+		"data": [{"index": 0, "embedding": [0.1, 0.2]}],
+		"usage": {"total_tokens": 1}
+	}`)
+	p := New(Config{
+		BaseURL: srv.URL, APIKeyEnv: "K", Model: "m",
+		OutputDimension: 1024, OutputDtype: DtypeFloat,
+	}, fixedSecrets("K", "v"), nil)
+	_, err := p.EmbedDocuments(context.Background(), []string{"x"})
+	if err == nil {
+		t.Fatal("expected error on dimension mismatch, got nil")
+	}
+	if !strings.Contains(err.Error(), "want 1024") {
+		t.Errorf("error %q should mention the expected dimension", err)
+	}
+}
+
+// TestEmbed_RejectsInconsistentWindowDims guards H2's averaging path:
+// when an oversize input is split into windows and the API returns
+// windows of differing width, the reassembly must error rather than
+// panic with index-out-of-range. OutputDimension=0 so the per-vector
+// check is skipped and the averaging guard is what catches it.
+func TestEmbed_RejectsInconsistentWindowDims(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		// One oversize input expands to 2 windows → 2 inputs in this
+		// POST; return vectors of different lengths for each.
+		_, _ = io.WriteString(w, `{
+			"data": [
+				{"index": 0, "embedding": [0.1, 0.2]},
+				{"index": 1, "embedding": [0.1, 0.2, 0.3]}
+			],
+			"usage": {"total_tokens": 2}
+		}`)
+	}))
+	t.Cleanup(srv.Close)
+	p := New(Config{
+		BaseURL: srv.URL, APIKeyEnv: "K", Model: "m",
+		OutputDtype: DtypeFloat, MaxInputBytes: 10,
+	}, fixedSecrets("K", "v"), nil)
+	// 20 bytes > MaxInputBytes(10) → splits into 2 windows.
+	_, err := p.EmbedDocuments(context.Background(), []string{strings.Repeat("a", 20)})
+	if err == nil {
+		t.Fatal("expected error on inconsistent window dims, got nil")
+	}
+	if !strings.Contains(err.Error(), "inconsistent window dims") {
+		t.Errorf("error %q should mention inconsistent window dims", err)
+	}
+}
+
+// TestInt8Dequantize_Base64 guards M4: int8 returned as a base64-packed
+// byte string (rather than a JSON int array) is dequantized correctly.
+func TestInt8Dequantize_Base64(t *testing.T) {
+	// int8 [127, -127, 0, 64] packed as raw signed bytes → base64.
+	ints := []int8{127, -127, 0, 64}
+	packed := make([]byte, len(ints))
+	for i, v := range ints {
+		packed[i] = byte(v)
+	}
+	b64 := base64.StdEncoding.EncodeToString(packed)
+	srv, _ := stubServer(t, http.StatusOK, fmt.Sprintf(`{
+		"data": [{"index": 0, "embedding": %q}],
+		"usage": {"total_tokens": 1}
+	}`, b64))
+	p := New(Config{
+		BaseURL: srv.URL, APIKeyEnv: "K", Model: "m", OutputDtype: DtypeInt8,
+	}, fixedSecrets("K", "v"), nil)
+	vecs, err := p.EmbedDocuments(context.Background(), []string{"x"})
+	if err != nil {
+		t.Fatalf("EmbedDocuments: %v", err)
+	}
+	if len(vecs) != 1 || len(vecs[0]) != 4 {
+		t.Fatalf("shape wrong: %v", vecs)
+	}
+	v := vecs[0]
+	if v[0] < 0.999 || v[1] > -0.999 || v[2] != 0 || v[3] < 0.50 || v[3] > 0.51 {
+		t.Errorf("base64 int8 dequantized values out of range: %v", v)
+	}
+}

From a0d88511449a08563de13a791d127607eaacd1a0 Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Thu, 28 May 2026 21:55:49 +0100
Subject: [PATCH 24/34] fix(providers,db): normalize openai base URL; prefix
 colon-bearing legacy models
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- L4: trim a trailing slash from the openai provider's base_url in New()
  so url building never emits a double slash that stricter
  OpenAI-compatible servers (vLLM/TEI behind a proxy) 404 on. Mirrors the
  same fix in the voyage provider.
- L5: migration 13 (indexed_with_model provider-prefix backfill) tested
  for the mere presence of a ':' to decide a row was already prefixed.
  A legacy Ollama-style model name like "nomic-embed-text:latest"
  contains a colon but is NOT prefixed, so it was wrongly skipped and
  left flagged "stale model" forever. Now tests for a known kind prefix
  (ollama:/openai:/voyage:) instead. NOTE: editing an unreleased
  migration in place — deployments that already applied v13 with the old
  logic won't reprocess, but the default model carries no colon so no
  shipped DB is affected.

Tests: openai trailing-slash path normalization; migration-13 colon-
bearing legacy row now gets the ollama: prefix.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 server/internal/db/db.go                      | 31 ++++++++++++-------
 server/internal/db/db_test.go                 | 15 ++++++---
 .../embeddings/provider/openai/openai.go      |  7 ++++-
 .../embeddings/provider/openai/openai_test.go | 23 ++++++++++++++
 4 files changed, 58 insertions(+), 18 deletions(-)

diff --git a/server/internal/db/db.go b/server/internal/db/db.go
index 7a3e677..7332bfc 100644
--- a/server/internal/db/db.go
+++ b/server/internal/db/db.go
@@ -713,10 +713,10 @@ func migratePathHash(db *sql.DB) error {
 	haveColumn := false
 	for rows.Next() {
 		var (
-			cid                 int
-			name, typ           string
-			notnull, pk         int
-			dflt                sql.NullString
+			cid         int
+			name, typ   string
+			notnull, pk int
+			dflt        sql.NullString
 		)
 		if err := rows.Scan(&cid, &name, &typ, &notnull, &dflt, &pk); err != nil {
 			rows.Close()
@@ -817,21 +817,28 @@ func migrateEmbeddingProvider(db *sql.DB) error {
 // "ollama:<model>" and a reindex would *still* write the new prefixed
 // form — leaving every UN-reindexed project flagged falsely.
 //
-// Heuristic: rows whose value contains no ":" predate the prefix
-// convention. Prepend "ollama:" — safe because pre-refactor there
-// was no other embedding backend; every legacy row was produced by
-// the in-process llama-server sidecar.
+// Heuristic: rows that don't already start with a known provider-kind
+// prefix predate the prefix convention. Prepend "ollama:" — safe
+// because pre-refactor there was no other embedding backend; every
+// legacy row was produced by the in-process llama-server sidecar.
+// (Testing for the kind prefix rather than for the mere presence of a
+// ":" matters: a legacy Ollama-style model name like
+// "nomic-embed-text:latest" contains a colon but is NOT yet prefixed,
+// so a presence-of-colon test would wrongly skip it and leave it
+// flagged stale forever.)
 //
-// Idempotent: rows already containing ":" are left alone, so
-// re-running this migration (or running it against a DB that was
-// already partially upgraded) is a no-op.
+// Idempotent: rows already starting with ollama:/openai:/voyage: are
+// left alone, so re-running this migration (or running it against a DB
+// that was already partially upgraded) is a no-op.
 func migrateIndexedWithModelProviderPrefix(db *sql.DB) error {
 	_, err := db.Exec(`
 		UPDATE projects
 		SET indexed_with_model = 'ollama:' || indexed_with_model
 		WHERE indexed_with_model IS NOT NULL
 		  AND indexed_with_model != ''
-		  AND instr(indexed_with_model, ':') = 0
+		  AND indexed_with_model NOT LIKE 'ollama:%'
+		  AND indexed_with_model NOT LIKE 'openai:%'
+		  AND indexed_with_model NOT LIKE 'voyage:%'
 	`)
 	if err != nil {
 		return fmt.Errorf("backfill indexed_with_model prefix: %w", err)
diff --git a/server/internal/db/db_test.go b/server/internal/db/db_test.go
index 94ba6d6..f86c210 100644
--- a/server/internal/db/db_test.go
+++ b/server/internal/db/db_test.go
@@ -215,9 +215,12 @@ func TestOpenMigratesPreEDB(t *testing.T) {
 // is a bare model name ("awhiteside/CodeRankEmbed-Q8_0-GGUF") must
 // be rewritten to the prefixed form ("ollama:awhiteside/...") so the
 // drift-detector and dashboard see a match with the live Provider.ID().
-// Rows that already contain ":" (any prefixed form) must be left
-// untouched — important for idempotency and for DBs that were partially
-// upgraded before this migration shipped.
+// Rows that already start with a known kind prefix (ollama:/openai:/
+// voyage:) must be left untouched — important for idempotency and for
+// DBs partially upgraded before this migration shipped. A legacy
+// Ollama-style name that merely contains a colon (e.g.
+// "nomic-embed-text:latest") is NOT yet prefixed and MUST still get the
+// "ollama:" prefix.
 func TestMigrate_IndexedWithModelProviderPrefix(t *testing.T) {
 	tmp := filepath.Join(t.TempDir(), "indexed-prefix.db")
 
@@ -246,9 +249,10 @@ func TestMigrate_IndexedWithModelProviderPrefix(t *testing.T) {
 	rows := []struct {
 		host, model string
 	}{
-		{"/legacy/bare", "awhiteside/CodeRankEmbed-Q8_0-GGUF"},      // should get "ollama:" prefix
+		{"/legacy/bare", "awhiteside/CodeRankEmbed-Q8_0-GGUF"},             // should get "ollama:" prefix
+		{"/legacy/colon", "nomic-embed-text:latest"},                       // colon, but no kind prefix → should get "ollama:"
 		{"/already/prefixed", "ollama:awhiteside/CodeRankEmbed-Q8_0-GGUF"}, // untouched
-		{"/already/voyage", "voyage:voyage-code-3:1024:float"},          // untouched
+		{"/already/voyage", "voyage:voyage-code-3:1024:float"},             // untouched
 	}
 	for _, r := range rows {
 		if _, err := seed.Exec(
@@ -277,6 +281,7 @@ func TestMigrate_IndexedWithModelProviderPrefix(t *testing.T) {
 
 	expectations := map[string]sql.NullString{
 		"/legacy/bare":      {String: "ollama:awhiteside/CodeRankEmbed-Q8_0-GGUF", Valid: true},
+		"/legacy/colon":     {String: "ollama:nomic-embed-text:latest", Valid: true},
 		"/already/prefixed": {String: "ollama:awhiteside/CodeRankEmbed-Q8_0-GGUF", Valid: true},
 		"/already/voyage":   {String: "voyage:voyage-code-3:1024:float", Valid: true},
 		"/legacy/null":      {Valid: false},
diff --git a/server/internal/embeddings/provider/openai/openai.go b/server/internal/embeddings/provider/openai/openai.go
index e968750..649a06d 100644
--- a/server/internal/embeddings/provider/openai/openai.go
+++ b/server/internal/embeddings/provider/openai/openai.go
@@ -15,6 +15,7 @@ import (
 	"log/slog"
 	"net/http"
 	"strconv"
+	"strings"
 	"time"
 
 	"github.com/dvcdsys/code-index/server/internal/embeddings/provider"
@@ -51,6 +52,10 @@ func New(cfg Config, secrets provider.SecretLookup, logger *slog.Logger) *Provid
 	if logger == nil {
 		logger = slog.Default()
 	}
+	// Normalise away a trailing slash so url building (BaseURL +
+	// "/v1/embeddings") never produces a double slash, which stricter
+	// OpenAI-compatible servers (vLLM/TEI behind a proxy) can 404 on.
+	cfg.BaseURL = strings.TrimRight(cfg.BaseURL, "/")
 	return &Provider{
 		cfg:     cfg,
 		logger:  logger,
@@ -72,7 +77,7 @@ func (p *Provider) ID() string {
 	return "openai:" + p.cfg.Model
 }
 
-func (p *Provider) Dimension() int    { return p.cfg.Dimensions }
+func (p *Provider) Dimension() int         { return p.cfg.Dimensions }
 func (p *Provider) SupportsTokenize() bool { return false }
 
 // Start runs a one-shot connect test: embed a single short string.
diff --git a/server/internal/embeddings/provider/openai/openai_test.go b/server/internal/embeddings/provider/openai/openai_test.go
index 27a7efb..bf4f58c 100644
--- a/server/internal/embeddings/provider/openai/openai_test.go
+++ b/server/internal/embeddings/provider/openai/openai_test.go
@@ -85,6 +85,29 @@ func TestEmbedDocumentsBatch(t *testing.T) {
 	}
 }
 
+// TestBaseURLTrailingSlashNormalized guards L4: a base_url with a
+// trailing slash must not produce a double-slash request path, which
+// stricter OpenAI-compatible servers can 404 on.
+func TestBaseURLTrailingSlashNormalized(t *testing.T) {
+	var gotPath string
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		gotPath = r.URL.Path
+		_, _ = io.WriteString(w, `{"data":[{"index":0,"embedding":[0.1]}]}`)
+	}))
+	t.Cleanup(srv.Close)
+	p := New(Config{
+		BaseURL:   srv.URL + "/", // trailing slash
+		Model:     "m",
+		APIKeyEnv: "K",
+	}, fixedSecrets("K", "v"), nil)
+	if _, err := p.EmbedDocuments(context.Background(), []string{"x"}); err != nil {
+		t.Fatalf("EmbedDocuments: %v", err)
+	}
+	if gotPath != "/v1/embeddings" {
+		t.Errorf("request path = %q, want /v1/embeddings (no double slash)", gotPath)
+	}
+}
+
 func TestEmbedDocumentsHTTPError(t *testing.T) {
 	srv, _ := stubServer(t, http.StatusUnauthorized, `{"error":"bad key"}`)
 	p := New(Config{

From c5e862c500243ea51e66ce61a2188489af14ec0b Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Thu, 28 May 2026 21:57:07 +0100
Subject: [PATCH 25/34] fix(httpapi): guard nil actor in
 SwitchEmbeddingProvider; add admin 403 gating tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- M1: mustBeAdmin returns a nil authContext when CIX_AUTH_DISABLED=true
  (a legitimate deployment mode), and SwitchEmbeddingProvider then
  dereferenced user.User.ID for the audit field → handler-goroutine
  nil-panic. Resolve the actor through an `if user != nil` guard,
  matching the ac != nil pattern used elsewhere (tunnels.go).
- L2: add per-endpoint 403 gating tests for the three admin
  embedding-provider routes that lacked them (GET/PUT active, POST test),
  satisfying the project rule that a non-admin caller must be rejected on
  every new endpoint. Auth was already enforced (all four handlers gate
  on mustBeAdmin first); these tests pin it against regressions.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 server/internal/httpapi/admin_embeddings.go   | 11 +++-
 .../internal/httpapi/admin_embeddings_test.go | 50 +++++++++++++++++++
 2 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/server/internal/httpapi/admin_embeddings.go b/server/internal/httpapi/admin_embeddings.go
index 29d5c7f..3804835 100644
--- a/server/internal/httpapi/admin_embeddings.go
+++ b/server/internal/httpapi/admin_embeddings.go
@@ -199,13 +199,22 @@ func (s *Server) SwitchEmbeddingProvider(w http.ResponseWriter, r *http.Request)
 		return
 	}
 
+	// Resolve the actor for the audit field. mustBeAdmin returns a nil
+	// authContext when CIX_AUTH_DISABLED=true (a legitimate deployment
+	// mode), so guard before dereferencing — matches the ac != nil
+	// pattern used elsewhere (e.g. tunnels.go).
+	actorID := ""
+	if user != nil {
+		actorID = user.User.ID
+	}
+
 	// Persist BEFORE swap so the DB always leads the live state.
 	// If SwitchProvider then fails, the operator's next call (or
 	// container restart) reads the new row and tries again.
 	if err := s.Deps.EmbeddingsCfg.Save(r.Context(), embeddingscfg.Snapshot{
 		Kind:   req.Kind,
 		Config: cfgBytes,
-	}, user.User.ID); err != nil {
+	}, actorID); err != nil {
 		writeError(w, http.StatusInternalServerError, "persist provider: "+err.Error())
 		return
 	}
diff --git a/server/internal/httpapi/admin_embeddings_test.go b/server/internal/httpapi/admin_embeddings_test.go
index 45d7aa6..3138108 100644
--- a/server/internal/httpapi/admin_embeddings_test.go
+++ b/server/internal/httpapi/admin_embeddings_test.go
@@ -67,6 +67,56 @@ func TestListEmbeddingProviders_ViewerForbidden(t *testing.T) {
 	}
 }
 
+// The following three tests close the per-endpoint 403 gating gap
+// required by the project's auth rule: a non-admin (viewer) caller must
+// be rejected on EVERY admin embedding-provider route, not just the list.
+
+func TestGetActiveEmbeddingProvider_ViewerForbidden(t *testing.T) {
+	f := newAdminFixture(t)
+	f.Deps.EmbeddingsCfg = embeddingscfg.New(f.Deps.DB)
+	f.Router = NewRouter(f.Deps)
+	cookie := viewerCookie(t, f)
+
+	req := withCookie(httptest.NewRequest(http.MethodGet,
+		"/api/v1/admin/embedding-providers/active", nil), cookie)
+	rr := httptest.NewRecorder()
+	f.Router.ServeHTTP(rr, req)
+	if rr.Code != http.StatusForbidden {
+		t.Fatalf("status = %d, want 403 (body=%s)", rr.Code, rr.Body.String())
+	}
+}
+
+func TestSwitchEmbeddingProvider_ViewerForbidden(t *testing.T) {
+	f := newAdminFixture(t)
+	f.Deps.EmbeddingsCfg = embeddingscfg.New(f.Deps.DB)
+	f.Router = NewRouter(f.Deps)
+	cookie := viewerCookie(t, f)
+
+	body, _ := json.Marshal(map[string]any{"kind": "ollama", "config": map[string]any{}})
+	req := withCookie(httptest.NewRequest(http.MethodPut,
+		"/api/v1/admin/embedding-providers/active", bytes.NewReader(body)), cookie)
+	rr := httptest.NewRecorder()
+	f.Router.ServeHTTP(rr, req)
+	if rr.Code != http.StatusForbidden {
+		t.Fatalf("status = %d, want 403 (body=%s)", rr.Code, rr.Body.String())
+	}
+}
+
+func TestTestEmbeddingProvider_ViewerForbidden(t *testing.T) {
+	f := newAdminFixture(t)
+	f.Deps.EmbeddingsCfg = embeddingscfg.New(f.Deps.DB)
+	f.Router = NewRouter(f.Deps)
+	cookie := viewerCookie(t, f)
+
+	req := withCookie(httptest.NewRequest(http.MethodPost,
+		"/api/v1/admin/embedding-providers/voyage/test", bytes.NewReader([]byte(`{}`))), cookie)
+	rr := httptest.NewRecorder()
+	f.Router.ServeHTTP(rr, req)
+	if rr.Code != http.StatusForbidden {
+		t.Fatalf("status = %d, want 403 (body=%s)", rr.Code, rr.Body.String())
+	}
+}
+
 func TestSwitchEmbeddingProvider_RejectsUnknownKind(t *testing.T) {
 	f := newAdminFixture(t)
 	f.Deps.EmbeddingsCfg = embeddingscfg.New(f.Deps.DB)

From a63cbce77f50a28a07ac09760d6944eb3a83ed17 Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Thu, 28 May 2026 22:03:22 +0100
Subject: [PATCH 26/34] fix(embeddings): close concurrency races in queue swap,
 provider lifecycle, supervisor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three concurrency hazards surfaced by pre-PR review:

- H1: Restart reassigned the s.queue pointer with no lock while Embed*
  callers read it (acquire + defer-release) — an unsynchronized
  pointer read/write (data race) reachable via the dashboard
  "Save & Restart" during an active reindex. The queue pointer is now
  guarded by s.mu: acquireProvider snapshots the queue and returns it so
  Release targets the SAME instance the slot was taken from, Restart
  writes the new queue under the lock, and Status reads via a snapshot.
- M2: SwitchProvider and Restart had no mutual exclusion, so two admin
  actions could interleave their s.current / s.queue mutations or both
  tear down a provider. A new lifecycleMu serializes the two (Embed*
  paths don't take it).
- M3: supervisor.Stop read s.waiterDone bare while a crash-driven spawn()
  reassigns it under s.mu — data race, far more reachable now that
  SwitchProvider can Stop a provider at runtime. Stop now snapshots
  cmd + waiterDone together under the read lock.

Test: TestRestart_ConcurrentWithEmbeds_NoRace exercises the queue-swap
race under -race (6 embedders vs a restarter alternating the cap to force
pointer swaps). go test -race ./internal/embeddings/... green.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../embeddings/provider/ollama/supervisor.go  |  17 +--
 server/internal/embeddings/service.go         | 100 ++++++++++++------
 .../embeddings/switch_provider_test.go        |  52 +++++++++
 3 files changed, 132 insertions(+), 37 deletions(-)

diff --git a/server/internal/embeddings/provider/ollama/supervisor.go b/server/internal/embeddings/provider/ollama/supervisor.go
index ced98a1..bd17111 100644
--- a/server/internal/embeddings/provider/ollama/supervisor.go
+++ b/server/internal/embeddings/provider/ollama/supervisor.go
@@ -397,15 +397,20 @@ func pruneRestarts(ts []time.Time, now time.Time, window time.Duration) []time.T
 // the graceful path failed. The caller's context controls the deadline —
 // main.go already uses a 10s shutdown context.
 func (s *supervisor) Stop(ctx context.Context) error {
+	// Snapshot cmd + waiterDone together under the lock: a crash-driven
+	// spawn() reassigns s.waiterDone under s.mu, so reading the field bare
+	// races that write. Use the local for every wait below.
+	s.mu.RLock()
+	cmd := s.cmd
+	waiterDone := s.waiterDone
+	s.mu.RUnlock()
+
 	if !s.stopping.CompareAndSwap(false, true) {
 		// Already stopping; just wait for the existing teardown.
-		<-s.waiterDone
+		<-waiterDone
 		return nil
 	}
 
-	s.mu.RLock()
-	cmd := s.cmd
-	s.mu.RUnlock()
 	if cmd == nil || cmd.Process == nil {
 		return nil
 	}
@@ -418,7 +423,7 @@ func (s *supervisor) Stop(ctx context.Context) error {
 	_ = syscall.Kill(-pgid, syscall.SIGTERM)
 
 	select {
-	case <-s.waiterDone:
+	case <-waiterDone:
 		// Also clean up the socket file so a subsequent run does not trip on it.
 		if s.cfg.Transport == "unix" {
 			_ = os.Remove(s.cfg.SocketPath)
@@ -427,7 +432,7 @@ func (s *supervisor) Stop(ctx context.Context) error {
 	case <-ctx.Done():
 		s.logger.Warn("SIGTERM timed out, sending SIGKILL", "pgid", pgid)
 		_ = syscall.Kill(-pgid, syscall.SIGKILL)
-		<-s.waiterDone
+		<-waiterDone
 		if s.cfg.Transport == "unix" {
 			_ = os.Remove(s.cfg.SocketPath)
 		}
diff --git a/server/internal/embeddings/service.go b/server/internal/embeddings/service.go
index 655292f..3110e0c 100644
--- a/server/internal/embeddings/service.go
+++ b/server/internal/embeddings/service.go
@@ -42,8 +42,17 @@ type Service struct {
 	queue    *Queue
 	disabled bool
 
-	// mu guards current — swaps happen behind it (under BlockNew/Resume
-	// at the queue layer, but mu makes the swap itself atomic).
+	// lifecycleMu serializes the two provider-lifecycle operations
+	// (SwitchProvider and Restart) against each other so they never
+	// interleave their s.current / s.queue mutations or both tear down a
+	// provider. Embed* methods do NOT take it — they run concurrently via
+	// the queue + an s.mu snapshot of current/queue.
+	lifecycleMu sync.Mutex
+
+	// mu guards current AND the queue pointer — both are swapped at
+	// runtime (current by SwitchProvider/Restart, queue by Restart when
+	// the concurrency cap changes), so every read must snapshot them
+	// under the read lock rather than touching the fields directly.
 	mu      sync.RWMutex
 	current provider.Provider
 
@@ -198,6 +207,10 @@ func (s *Service) SwitchProvider(ctx context.Context, kind string, cfgBytes []by
 	if s == nil || s.disabled {
 		return ErrDisabled
 	}
+	// Serialize against Restart so the two lifecycle ops never interleave
+	// their s.current / s.queue mutations.
+	s.lifecycleMu.Lock()
+	defer s.lifecycleMu.Unlock()
 
 	newProv, err := provider.Build(ctx, kind, cfgBytes, envSecrets, s.logger)
 	if err != nil {
@@ -207,15 +220,16 @@ func (s *Service) SwitchProvider(ctx context.Context, kind string, cfgBytes []by
 		return fmt.Errorf("start %s provider: %w", kind, err)
 	}
 
-	s.queue.BlockNew()
+	q := s.currentQueue()
+	q.BlockNew()
 	drainCtx, drainCancel := context.WithTimeout(ctx, 30*time.Second)
-	if derr := s.queue.WaitDrain(drainCtx); derr != nil {
+	if derr := q.WaitDrain(drainCtx); derr != nil {
 		s.logger.Warn("embeddings: drain timed out during switch; proceeding anyway",
-			"in_flight", s.queue.InFlight(), "err", derr,
+			"in_flight", q.InFlight(), "err", derr,
 		)
 	}
 	drainCancel()
-	s.queue.Resume()
+	q.Resume()
 
 	s.mu.Lock()
 	old := s.current
@@ -367,8 +381,8 @@ func (s *Service) Status() provider.Status {
 		return provider.Status{State: provider.StateFailed, LastError: "provider not initialised"}
 	}
 	st := cur.Status()
-	if s.queue != nil {
-		st.InFlight = s.queue.InFlight()
+	if q := s.currentQueue(); q != nil {
+		st.InFlight = q.InFlight()
 	}
 	return st
 }
@@ -423,21 +437,32 @@ func (s *Service) Restart(ctx context.Context, cfg *config.Config) error {
 	if s == nil || s.disabled {
 		return ErrDisabled
 	}
-
-	s.queue.BlockNew()
-	defer s.queue.Resume()
+	// Serialize against SwitchProvider so the two lifecycle ops never
+	// interleave their s.current / s.queue mutations.
+	s.lifecycleMu.Lock()
+	defer s.lifecycleMu.Unlock()
+
+	// Snapshot the live queue, block + drain it. Resume targets this same
+	// instance: if we swap below it's discarded (the resume is harmless),
+	// otherwise it's still s.queue and the resume re-opens it.
+	oldQ := s.currentQueue()
+	oldQ.BlockNew()
+	defer oldQ.Resume()
 	drainCtx, drainCancel := context.WithTimeout(ctx, 30*time.Second)
-	if err := s.queue.WaitDrain(drainCtx); err != nil {
+	if err := oldQ.WaitDrain(drainCtx); err != nil {
 		drainCancel()
 		s.logger.Warn("embeddings: drain timed out, proceeding with restart anyway",
-			"in_flight", s.queue.InFlight(), "err", err,
+			"in_flight", oldQ.InFlight(), "err", err,
 		)
 	} else {
 		drainCancel()
 	}
 
-	if cfg.MaxEmbeddingConcurrency != cap(s.queue.slots) {
-		s.queue = NewQueue(cfg.MaxEmbeddingConcurrency, time.Duration(cfg.EmbeddingQueueTimeout)*time.Second)
+	if cfg.MaxEmbeddingConcurrency != cap(oldQ.slots) {
+		newQ := NewQueue(cfg.MaxEmbeddingConcurrency, time.Duration(cfg.EmbeddingQueueTimeout)*time.Second)
+		s.mu.Lock()
+		s.queue = newQ
+		s.mu.Unlock()
 	}
 
 	// Snapshot the live provider's kind under the read lock — we don't
@@ -518,12 +543,12 @@ func (s *Service) EmbedQuery(ctx context.Context, query string) ([]float32, erro
 	if s == nil || s.disabled {
 		return nil, ErrDisabled
 	}
-	cur, err := s.acquireProvider(ctx)
+	cur, q, err := s.acquireProvider(ctx)
 	if err != nil {
 		return nil, err
 	}
 	slotStart := time.Now()
-	defer s.queue.Release(slotStart)
+	defer q.Release(slotStart)
 	return cur.EmbedQuery(ctx, query)
 }
 
@@ -535,12 +560,12 @@ func (s *Service) EmbedTexts(ctx context.Context, texts []string) ([][]float32,
 	if len(texts) == 0 {
 		return nil, nil
 	}
-	cur, err := s.acquireProvider(ctx)
+	cur, q, err := s.acquireProvider(ctx)
 	if err != nil {
 		return nil, err
 	}
 	slotStart := time.Now()
-	defer s.queue.Release(slotStart)
+	defer q.Release(slotStart)
 	return cur.EmbedDocuments(ctx, texts)
 }
 
@@ -555,25 +580,38 @@ func (s *Service) TokenizeAndEmbed(ctx context.Context, texts []string) ([][]flo
 	if len(texts) == 0 {
 		return nil, nil
 	}
-	cur, err := s.acquireProvider(ctx)
+	cur, q, err := s.acquireProvider(ctx)
 	if err != nil {
 		return nil, err
 	}
 	slotStart := time.Now()
-	defer s.queue.Release(slotStart)
+	defer q.Release(slotStart)
 	if cur.SupportsTokenize() {
 		return cur.TokenizeAndEmbed(ctx, texts)
 	}
 	return cur.EmbedDocuments(ctx, texts)
 }
 
-// acquireProvider acquires a queue slot and returns the active
-// provider snapshot. Caller is responsible for queue.Release once the
-// call returns (deferred at call site so the slot is released even on
-// provider error).
-func (s *Service) acquireProvider(ctx context.Context) (provider.Provider, error) {
-	if err := s.queue.Acquire(ctx); err != nil {
-		return nil, err
+// currentQueue returns the active queue under the read lock. Restart
+// swaps the queue pointer when the concurrency cap changes, so callers
+// must snapshot it rather than reading s.queue directly (otherwise the
+// read races the swap).
+func (s *Service) currentQueue() *Queue {
+	s.mu.RLock()
+	q := s.queue
+	s.mu.RUnlock()
+	return q
+}
+
+// acquireProvider acquires a queue slot and returns the active provider
+// snapshot AND the queue the slot was taken from. The caller must
+// Release on the RETURNED queue (not s.queue, which Restart may have
+// swapped meanwhile) — deferred at the call site so the slot is released
+// even on provider error.
+func (s *Service) acquireProvider(ctx context.Context) (provider.Provider, *Queue, error) {
+	q := s.currentQueue()
+	if err := q.Acquire(ctx); err != nil {
+		return nil, nil, err
 	}
 	s.mu.RLock()
 	cur := s.current
@@ -581,8 +619,8 @@ func (s *Service) acquireProvider(ctx context.Context) (provider.Provider, error
 	if cur == nil {
 		// We hold the slot but have nothing to call — release it before
 		// returning the error so subsequent callers aren't starved.
-		s.queue.Release(time.Now())
-		return nil, ErrSupervisor
+		q.Release(time.Now())
+		return nil, nil, ErrSupervisor
 	}
-	return cur, nil
+	return cur, q, nil
 }
diff --git a/server/internal/embeddings/switch_provider_test.go b/server/internal/embeddings/switch_provider_test.go
index f3ccd78..e219350 100644
--- a/server/internal/embeddings/switch_provider_test.go
+++ b/server/internal/embeddings/switch_provider_test.go
@@ -7,8 +7,11 @@ import (
 	"log/slog"
 	"os"
 	"path/filepath"
+	"sync"
 	"testing"
+	"time"
 
+	"github.com/dvcdsys/code-index/server/internal/config"
 	"github.com/dvcdsys/code-index/server/internal/embeddings/provider"
 	"github.com/dvcdsys/code-index/server/internal/vectorstore"
 )
@@ -136,3 +139,52 @@ func dirExists(path string) bool {
 	st, err := os.Stat(path)
 	return err == nil && st.IsDir()
 }
+
+// TestRestart_ConcurrentWithEmbeds_NoRace guards H1: Restart swaps the
+// s.queue pointer when the concurrency cap changes, while Embed* callers
+// read it to acquire/release slots. Run under -race with many embedders
+// hammering the queue while a restarter repeatedly swaps it. A remote
+// (non-ollama) fake provider keeps Restart on the queue-only path with no
+// sidecar to manage.
+func TestRestart_ConcurrentWithEmbeds_NoRace(t *testing.T) {
+	s := &Service{
+		logger:  quiet(),
+		queue:   NewQueue(2, time.Second),
+		current: fakeProv{id: "fake:m"},
+	}
+
+	var wg sync.WaitGroup
+	stop := make(chan struct{})
+
+	for i := 0; i < 6; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			for {
+				select {
+				case <-stop:
+					return
+				default:
+					_, _ = s.EmbedTexts(context.Background(), []string{"x"})
+				}
+			}
+		}()
+	}
+
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		for i := 0; i < 300; i++ {
+			// Alternate the cap so every other Restart actually swaps the
+			// queue pointer (the racy write H1 fixes).
+			n := 2 + (i % 3) // 2, 3, 4
+			_ = s.Restart(context.Background(), &config.Config{
+				MaxEmbeddingConcurrency: n,
+				EmbeddingQueueTimeout:   1,
+			})
+		}
+		close(stop)
+	}()
+
+	wg.Wait()
+}

From ffc6735531e321d1164d432a81ff6cb8b33850b2 Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Thu, 28 May 2026 22:04:45 +0100
Subject: [PATCH 27/34] style(ollama): gofmt new/moved provider files

gofmt the two ollama-package files whose field/map alignment drifted when
they were extracted/moved into provider/ollama on this branch (gguf.go
GGUFInputs struct, prefix.go QueryPrefixes map). Pure formatting, no
behavior change. Pre-existing gofmt drift in files this branch only
incidentally touches (e.g. vectorstore/store.go) is left alone to keep
the diff focused.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 server/internal/embeddings/provider/ollama/gguf.go   |  8 ++++----
 server/internal/embeddings/provider/ollama/prefix.go | 10 +++++-----
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/server/internal/embeddings/provider/ollama/gguf.go b/server/internal/embeddings/provider/ollama/gguf.go
index 14e0cd8..4767c8c 100644
--- a/server/internal/embeddings/provider/ollama/gguf.go
+++ b/server/internal/embeddings/provider/ollama/gguf.go
@@ -15,10 +15,10 @@ import (
 // extracts them from *config.Config so the ollama package stays free
 // of the config dependency.
 type GGUFInputs struct {
-	GGUFPath          string // CIX_GGUF_PATH absolute override
-	Model             string // HF repo id ("owner/repo") or absolute path
-	CacheDir          string // base dir under which downloaded GGUFs live
-	BootstrapPath     string // CIX_BOOTSTRAP_GGUF_PATH one-shot import source
+	GGUFPath      string // CIX_GGUF_PATH absolute override
+	Model         string // HF repo id ("owner/repo") or absolute path
+	CacheDir      string // base dir under which downloaded GGUFs live
+	BootstrapPath string // CIX_BOOTSTRAP_GGUF_PATH one-shot import source
 }
 
 // ResolveGGUFPath walks the precedence chain:
diff --git a/server/internal/embeddings/provider/ollama/prefix.go b/server/internal/embeddings/provider/ollama/prefix.go
index 50a210d..4ee577c 100644
--- a/server/internal/embeddings/provider/ollama/prefix.go
+++ b/server/internal/embeddings/provider/ollama/prefix.go
@@ -9,11 +9,11 @@ import "strings"
 // Keep this map string-for-string identical to the Python dict. The parity gate
 // depends on the prefix being literally the same bytes sent to the model.
 var QueryPrefixes = map[string]string{
-	"nomic-ai/CodeRankEmbed":              "Represent this query for searching relevant code: ",
-	"nomic-ai/nomic-embed-text-v1.5":      "search_query: ",
-	"BAAI/bge-base-en-v1.5":               "Represent this sentence for searching relevant passages: ",
-	"BAAI/bge-large-en-v1.5":              "Represent this sentence for searching relevant passages: ",
-	"awhiteside/CodeRankEmbed-Q8_0-GGUF":  "Represent this query for searching relevant code: ",
+	"nomic-ai/CodeRankEmbed":             "Represent this query for searching relevant code: ",
+	"nomic-ai/nomic-embed-text-v1.5":     "search_query: ",
+	"BAAI/bge-base-en-v1.5":              "Represent this sentence for searching relevant passages: ",
+	"BAAI/bge-large-en-v1.5":             "Represent this sentence for searching relevant passages: ",
+	"awhiteside/CodeRankEmbed-Q8_0-GGUF": "Represent this query for searching relevant code: ",
 }
 
 // ResolveQueryPrefix returns the prefix string to prepend to queries for the

From 97fd9c4f9cbec3068959ddf9705467437a5d4c5f Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Fri, 29 May 2026 16:34:51 +0100
Subject: [PATCH 28/34] fix(projects): return stored path_hash in API
 responses, not a recompute
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The dashboard 404'd ("project not found: hash=…") on local projects whose
host_path and stored path_hash legitimately diverge — e.g. a project keyed
as sha1("local:{machine}:{path}") while host_path stays the bare filesystem
path. The list/detail responses set PathHash by RE-deriving sha1(host_path),
which for those rows yields a hash that GetByHash (which resolves against
the stored path_hash column) never matches → 404.

host_path can't simply be rewritten to the namespaced key form: it is the
vector-store collection key (md5(host_path)) and the chunks/symbols key, so
changing it would orphan the existing index and force a reindex.

Fix: carry the stored path_hash column through projects.Project and return
it verbatim in both project responses (server.go ProjectSummary + Project),
instead of recomputing from host_path. The stored column is the single
source of truth the lookup already uses, so link hash == lookup hash for
local, external, and freshly-created projects alike. No data migration, no
reindex.

Pre-existing issue (per-machine namespacing + an incomplete manual re-key),
independent of the embedding-provider work; bundled here per request.

Test: a row whose stored path_hash differs from sha1(host_path) — Get/List
surface the stored hash and GetByHash round-trips it back to the project.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 server/internal/httpapi/server.go         | 24 +++++-----
 server/internal/projects/projects.go      | 35 ++++++++++----
 server/internal/projects/projects_test.go | 56 ++++++++++++++++++++++-
 3 files changed, 94 insertions(+), 21 deletions(-)

diff --git a/server/internal/httpapi/server.go b/server/internal/httpapi/server.go
index b773579..8048611 100644
--- a/server/internal/httpapi/server.go
+++ b/server/internal/httpapi/server.go
@@ -103,16 +103,16 @@ func (s *Server) GetStatus(w http.ResponseWriter, r *http.Request) {
 		cancel()
 	}
 	resp := map[string]any{
-		"status":                              "ok",
-		"backend":                             s.Deps.Backend,
-		"server_version":                      s.Deps.ServerVersion,
-		"api_version":                         s.Deps.APIVersion,
-		"model_loaded":                        modelLoaded,
-		"embedding_model":                     model,
-		"embedding_provider":                  providerKind,
-		"embedding_provider_manages_process":  managesProcess,
-		"projects":                            projectCount,
-		"active_indexing_jobs":                activeJobs,
+		"status":                             "ok",
+		"backend":                            s.Deps.Backend,
+		"server_version":                     s.Deps.ServerVersion,
+		"api_version":                        s.Deps.APIVersion,
+		"model_loaded":                       modelLoaded,
+		"embedding_model":                    model,
+		"embedding_provider":                 providerKind,
+		"embedding_provider_manages_process": managesProcess,
+		"projects":                           projectCount,
+		"active_indexing_jobs":               activeJobs,
 	}
 	// Version-check fields — folded in only when the service is wired.
 	// `update_available` is always present (false when unknown) so the
@@ -615,7 +615,7 @@ func (s *Server) GetProjectSummary(w http.ResponseWriter, r *http.Request, path
 	}
 
 	writeJSON(w, http.StatusOK, openapi.ProjectSummary{
-		PathHash:       projects.HashPath(p.HostPath),
+		PathHash:       p.PathHash,
 		HostPath:       p.HostPath,
 		Status:         p.Status,
 		Languages:      langs,
@@ -1046,7 +1046,7 @@ func projectToOpenAPI(p *projects.Project) openapi.Project {
 		}
 	}
 	out := openapi.Project{
-		PathHash:      projects.HashPath(p.HostPath),
+		PathHash:      p.PathHash,
 		HostPath:      p.HostPath,
 		ContainerPath: p.ContainerPath,
 		Languages:     langs,
diff --git a/server/internal/projects/projects.go b/server/internal/projects/projects.go
index 3871804..47f1e7d 100644
--- a/server/internal/projects/projects.go
+++ b/server/internal/projects/projects.go
@@ -55,7 +55,15 @@ type Stats struct {
 
 // Project is the full project record returned from the database.
 type Project struct {
-	HostPath      string
+	HostPath string
+	// PathHash is the STORED path_hash column — the canonical URL identity
+	// the dashboard links to and GetByHash resolves against. It is returned
+	// verbatim rather than recomputed from HostPath: a project's host_path
+	// and its stored path_hash can legitimately diverge (e.g. a local
+	// project whose host_path is the bare filesystem path while path_hash
+	// is keyed as sha1("local:{machine}:{path}")), and recomputing from
+	// host_path would yield a hash that no lookup matches → 404.
+	PathHash      string
 	ContainerPath string
 	Languages     []string
 	Settings      Settings
@@ -242,7 +250,7 @@ func findOverlap(ctx context.Context, db *sql.DB, candidate string) (string, err
 // Get retrieves a project by its host_path. Returns ErrNotFound if absent.
 func Get(ctx context.Context, db *sql.DB, hostPath string) (*Project, error) {
 	row := db.QueryRowContext(ctx,
-		`SELECT host_path, container_path, languages, settings, stats, status, created_at, updated_at, last_indexed_at, indexed_with_model, owner_user_id, display_path, machine_id, machine_label
+		`SELECT host_path, container_path, languages, settings, stats, status, created_at, updated_at, last_indexed_at, indexed_with_model, owner_user_id, display_path, machine_id, machine_label, path_hash
 		 FROM projects WHERE host_path = ?`, hostPath,
 	)
 	return scanProject(hostPath, row)
@@ -270,7 +278,7 @@ func GetByHash(ctx context.Context, db *sql.DB, pathHash string) (*Project, erro
 // List returns all projects ordered by created_at descending.
 func List(ctx context.Context, db *sql.DB) ([]Project, error) {
 	rows, err := db.QueryContext(ctx,
-		`SELECT host_path, container_path, languages, settings, stats, status, created_at, updated_at, last_indexed_at, indexed_with_model, owner_user_id, display_path, machine_id, machine_label
+		`SELECT host_path, container_path, languages, settings, stats, status, created_at, updated_at, last_indexed_at, indexed_with_model, owner_user_id, display_path, machine_id, machine_label, path_hash
 		 FROM projects ORDER BY created_at DESC`,
 	)
 	if err != nil {
@@ -368,12 +376,13 @@ func scanProject(hostPath string, row *sql.Row) (*Project, error) {
 		displayPath             *string
 		machineID               *string
 		machineLabel            *string
+		pathHash                *string
 	)
 	err := row.Scan(
 		&hp, &containerPath,
 		&langsJSON, &settingsJSON, &statsJSON,
 		&status, &createdAt, &updatedAt, &lastIndexedAt, &indexedWithModel, &ownerUserID,
-		&displayPath, &machineID, &machineLabel,
+		&displayPath, &machineID, &machineLabel, &pathHash,
 	)
 	if errors.Is(err, sql.ErrNoRows) {
 		return nil, fmt.Errorf("%w: %s", ErrNotFound, hostPath)
@@ -381,7 +390,7 @@ func scanProject(hostPath string, row *sql.Row) (*Project, error) {
 	if err != nil {
 		return nil, fmt.Errorf("scan project row: %w", err)
 	}
-	return buildProject(hp, containerPath, langsJSON, settingsJSON, statsJSON, status, createdAt, updatedAt, lastIndexedAt, indexedWithModel, ownerUserID, displayPath, machineID, machineLabel)
+	return buildProject(hp, containerPath, langsJSON, settingsJSON, statsJSON, status, createdAt, updatedAt, lastIndexedAt, indexedWithModel, ownerUserID, displayPath, machineID, machineLabel, pathHash)
 }
 
 func scanProjectRow(rows *sql.Rows) (*Project, error) {
@@ -396,19 +405,20 @@ func scanProjectRow(rows *sql.Rows) (*Project, error) {
 		displayPath             *string
 		machineID               *string
 		machineLabel            *string
+		pathHash                *string
 	)
 	if err := rows.Scan(
 		&hostPath, &containerPath,
 		&langsJSON, &settingsJSON, &statsJSON,
 		&status, &createdAt, &updatedAt, &lastIndexedAt, &indexedWithModel, &ownerUserID,
-		&displayPath, &machineID, &machineLabel,
+		&displayPath, &machineID, &machineLabel, &pathHash,
 	); err != nil {
 		return nil, fmt.Errorf("scan project: %w", err)
 	}
-	return buildProject(hostPath, containerPath, langsJSON, settingsJSON, statsJSON, status, createdAt, updatedAt, lastIndexedAt, indexedWithModel, ownerUserID, displayPath, machineID, machineLabel)
+	return buildProject(hostPath, containerPath, langsJSON, settingsJSON, statsJSON, status, createdAt, updatedAt, lastIndexedAt, indexedWithModel, ownerUserID, displayPath, machineID, machineLabel, pathHash)
 }
 
-func buildProject(hostPath, containerPath, langsJSON, settingsJSON, statsJSON, status, createdAt, updatedAt string, lastIndexedAt, indexedWithModel, ownerUserID, displayPath, machineID, machineLabel *string) (*Project, error) {
+func buildProject(hostPath, containerPath, langsJSON, settingsJSON, statsJSON, status, createdAt, updatedAt string, lastIndexedAt, indexedWithModel, ownerUserID, displayPath, machineID, machineLabel, pathHash *string) (*Project, error) {
 	var langs []string
 	if err := json.Unmarshal([]byte(langsJSON), &langs); err != nil {
 		langs = nil
@@ -428,8 +438,17 @@ func buildProject(hostPath, containerPath, langsJSON, settingsJSON, statsJSON, s
 	if displayPath != nil && *displayPath != "" {
 		dp = *displayPath
 	}
+	// Fall back to the host-path hash only when the stored column is
+	// absent (pre-m7 rows backfill it on Open, so this is belt-and-braces).
+	ph := ""
+	if pathHash != nil && *pathHash != "" {
+		ph = *pathHash
+	} else {
+		ph = hashPath(hostPath)
+	}
 	return &Project{
 		HostPath:         hostPath,
+		PathHash:         ph,
 		ContainerPath:    containerPath,
 		Languages:        langs,
 		Settings:         settings,
diff --git a/server/internal/projects/projects_test.go b/server/internal/projects/projects_test.go
index 5d9a18e..296f8d0 100644
--- a/server/internal/projects/projects_test.go
+++ b/server/internal/projects/projects_test.go
@@ -47,6 +47,61 @@ func TestCreateAndGet(t *testing.T) {
 	}
 }
 
+// TestGet_ReturnsStoredPathHashNotRecomputed guards the dashboard 404
+// regression: a project whose host_path and stored path_hash legitimately
+// diverge — e.g. a local project keyed as sha1("local:{machine}:{path}")
+// while host_path stays the bare filesystem path — must surface the STORED
+// hash, because that is what GetByHash resolves against. Recomputing the
+// hash from host_path would hand the dashboard a link no lookup matches →
+// "project not found".
+func TestGet_ReturnsStoredPathHashNotRecomputed(t *testing.T) {
+	d := openTestDB(t)
+	ctx := context.Background()
+
+	const host = "/Users/me/proj"
+	const stored = "deadbeefcafe0001" // intentionally != hashPath(host)
+	if hashPath(host) == stored {
+		t.Fatal("precondition: stored hash must differ from the bare host-path hash")
+	}
+	now := "2026-01-01T00:00:00Z"
+	if _, err := d.ExecContext(ctx,
+		`INSERT INTO projects (host_path, container_path, languages, settings, stats, status, created_at, updated_at, path_hash, display_path, machine_id)
+		 VALUES (?, ?, '[]', '{}', '{}', 'indexed', ?, ?, ?, ?, ?)`,
+		host, host, now, now, stored, host, "machine-xyz",
+	); err != nil {
+		t.Fatalf("seed insert: %v", err)
+	}
+
+	got, err := Get(ctx, d, host)
+	if err != nil {
+		t.Fatalf("Get: %v", err)
+	}
+	if got.PathHash != stored {
+		t.Errorf("Get PathHash = %q, want stored %q (must not recompute from host_path)", got.PathHash, stored)
+	}
+
+	list, err := List(ctx, d)
+	if err != nil {
+		t.Fatalf("List: %v", err)
+	}
+	if len(list) != 1 || list[0].PathHash != stored {
+		t.Errorf("List PathHash = %+v, want [%q]", list, stored)
+	}
+
+	// The stored hash must resolve back to the project (the dashboard
+	// click path: link hash → GetByHash → detail).
+	byHash, err := GetByHash(ctx, d, stored)
+	if err != nil {
+		t.Fatalf("GetByHash(stored): %v", err)
+	}
+	if byHash.HostPath != host {
+		t.Errorf("GetByHash HostPath = %q, want %q", byHash.HostPath, host)
+	}
+	if byHash.PathHash != stored {
+		t.Errorf("GetByHash PathHash = %q, want %q", byHash.PathHash, stored)
+	}
+}
+
 // Create preserves the host_path verbatim — matching Python which does not
 // normalise. Stripping trailing slashes here would silently change the stored
 // value and break subsequent lookups that hash the caller's original path.
@@ -257,7 +312,6 @@ func TestHashPath_MatchesPython(t *testing.T) {
 	}
 }
 
-
 // TestCreate_MachineNamespacingAvoidsCollision verifies that the same
 // filesystem path indexed from two different machines becomes two distinct
 // projects (different identity key + hash), while the same machine+path

From d9742e85a560c52c6729b19be4d54eed2216a680 Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Sun, 31 May 2026 23:45:50 +0100
Subject: [PATCH 29/34] feat(cli): schema-driven config layer + lazygit-style
 TUI editor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the hand-rolled Sscanf-switch + printf-table for ~/.cix/config.yaml
with a tag-driven schema (`key`/`desc`/`default`/`validate`/`env`/`sensitive`
struct tags) that drives Load, Save, `config show`, `config set`, and a new
full-screen editor — one source of truth, no more drift across three files.

Loader: koanf/v2 (file+yaml + confmap defaults from tags + rawbytes legacy-
key normalization). Validator: go-playground/validator/v10, runs on every
mutation. Setter: reflective SetByPath with type-aware parsing and rollback
on validation failure. Show/keys: reflect over the schema walker — sensitive
leaves never bind their value to a named variable (CodeQL-safe).

Env overrides: CIX_SERVER / CIX_API_URL / CIX_API_KEY override server
selection / URL / key with flag > env > file precedence. Local-only —
never persisted. Designed for CI runners passing secrets via env.

TUI: new lazygit-style `cix config edit` and `cix config init` built on
bubbletea + bubbles + lipgloss — two-panel layout, vim-friendly nav
(h/j/k/l), inline textinput edits, server CRUD (a/d/m/t), help overlay.
Replaces the earlier huh-based form (which felt like a one-shot wizard,
not an editor).

On-disk format: legacy `api:` block still migrates to `servers:`. Legacy
lowercase viper keys (debouncems, excludepatterns, cachettl, autowatch,
batchsize) still normalize. The dead `server.port` / `server.cache_ttl`
block is dropped on next save — old files load without error.

New: `cix config keys` lists every settable key with default / env /
description from the schema. `doc/CLI_CONFIG.md` documents the full key
table.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 README.md                                 |  47 ++-
 cli/README.md                             | 132 ++++++-
 cli/cmd/config.go                         | 318 ++++++++++++----
 cli/cmd/config_edit.go                    |  61 +++
 cli/cmd/config_keys.go                    |  83 +++++
 cli/cmd/config_keys_test.go               |  73 ++++
 cli/cmd/config_show_test.go               |  97 +++++
 cli/cmd/multiserver_test.go               | 297 +++++++++++++++
 cli/cmd/root.go                           |  64 +++-
 cli/go.mod                                |  43 ++-
 cli/go.sum                                |  86 ++++-
 cli/internal/config/config.go             | 314 ++++++++++++----
 cli/internal/config/config_test.go        | 284 +++++++++++---
 cli/internal/config/loader_koanf.go       | 151 ++++++++
 cli/internal/config/loader_koanf_test.go  | 161 ++++++++
 cli/internal/config/schema/schema.go      | 103 ++++++
 cli/internal/config/schema/schema_test.go | 117 ++++++
 cli/internal/config/set.go                | 129 +++++++
 cli/internal/config/set_test.go           | 148 ++++++++
 cli/internal/config/tui/keys.go           | 100 +++++
 cli/internal/config/tui/model.go          | 163 ++++++++
 cli/internal/config/tui/model_test.go     | 219 +++++++++++
 cli/internal/config/tui/sections.go       | 225 +++++++++++
 cli/internal/config/tui/styles.go         |  96 +++++
 cli/internal/config/tui/tui.go            |  57 +++
 cli/internal/config/tui/update.go         | 432 ++++++++++++++++++++++
 cli/internal/config/tui/view.go           | 283 ++++++++++++++
 cli/internal/config/validator.go          |  94 +++++
 cli/internal/config/validator_test.go     | 107 ++++++
 doc/CLI_CONFIG.md                         | 108 ++++++
 30 files changed, 4388 insertions(+), 204 deletions(-)
 create mode 100644 cli/cmd/config_edit.go
 create mode 100644 cli/cmd/config_keys.go
 create mode 100644 cli/cmd/config_keys_test.go
 create mode 100644 cli/cmd/config_show_test.go
 create mode 100644 cli/cmd/multiserver_test.go
 create mode 100644 cli/internal/config/loader_koanf.go
 create mode 100644 cli/internal/config/loader_koanf_test.go
 create mode 100644 cli/internal/config/schema/schema.go
 create mode 100644 cli/internal/config/schema/schema_test.go
 create mode 100644 cli/internal/config/set.go
 create mode 100644 cli/internal/config/set_test.go
 create mode 100644 cli/internal/config/tui/keys.go
 create mode 100644 cli/internal/config/tui/model.go
 create mode 100644 cli/internal/config/tui/model_test.go
 create mode 100644 cli/internal/config/tui/sections.go
 create mode 100644 cli/internal/config/tui/styles.go
 create mode 100644 cli/internal/config/tui/tui.go
 create mode 100644 cli/internal/config/tui/update.go
 create mode 100644 cli/internal/config/tui/view.go
 create mode 100644 cli/internal/config/validator.go
 create mode 100644 cli/internal/config/validator_test.go
 create mode 100644 doc/CLI_CONFIG.md

diff --git a/README.md b/README.md
index 0b04d50..4af5ba5 100644
--- a/README.md
+++ b/README.md
@@ -307,19 +307,48 @@ The watcher monitors the project with `fsnotify`, debounces events (5 s default)
 ### Configuration
 
 ```bash
-cix config show              # print current config
-cix config set <key> <val>   # set a value
+cix config init              # first-run wizard (TUI form)
+cix config edit              # interactive edit (TUI form)
+cix config show              # print current config (lists servers; * marks default)
+cix config keys              # list every settable key with default/env/description
+cix config set <key> <val>   # set one value
+cix config unset <key>       # remove a server / clear a key
 cix config path              # show config file location
 ```
 
-Config file: `~/.cix/config.yaml`
+Config file: `~/.cix/config.yaml`. The full key reference lives in
+[`doc/CLI_CONFIG.md`](doc/CLI_CONFIG.md) — `cix config keys` is the
+canonical runtime view.
 
-| Key | Default | Description |
-|-----|---------|-------------|
-| `api.url` | `http://localhost:21847` | API server URL |
-| `api.key` | — | Bearer token (`cix_*`) — required |
-| `watcher.debounce_ms` | `5000` | Delay before reindex triggers after a file change |
-| `indexing.batch_size` | `20` | Files per `/index/files` batch |
+#### Env overrides (CI)
+
+| Variable        | Overrides                                |
+|-----------------|------------------------------------------|
+| `CIX_SERVER`    | which alias resolves when `--server` is empty |
+| `CIX_API_URL`   | the resolved server's URL                |
+| `CIX_API_KEY`   | the resolved server's API key            |
+
+Precedence is **flag > env > file > default**. Env overrides apply only
+to the current process — they never write back to `~/.cix/config.yaml`.
+
+#### Multiple servers
+
+`cix` can be configured with several named servers and pick one per
+command with the global `--server <alias>` flag (without it, the
+`default_server` is used):
+
+```bash
+cix config set server.corporate.url https://cix.corp.internal
+cix config set server.corporate.key cix_...
+cix config set default_server corporate     # optional
+cix --server corporate search "rate limiter"
+cix config unset server.corporate           # remove it
+```
+
+The legacy `api.url` / `api.key` keys and the `--api-url` / `--api-key`
+flags still work — they read/override the default server — and old flat
+`api:` config files are migrated to the `servers:` layout automatically
+on first load.
 
 ---
 
diff --git a/cli/README.md b/cli/README.md
index 209cbd0..a9afd5a 100644
--- a/cli/README.md
+++ b/cli/README.md
@@ -27,12 +27,16 @@ cli/
 │   ├── reindex.go       — `cix reindex`
 │   ├── cancel.go        — `cix cancel`
 │   ├── watch.go         — `cix watch` (start/stop/status, daemon)
-│   ├── config.go        — `cix config show/set/path`
+│   ├── config.go        — `cix config show/set/unset/path` (+ multi-server keys)
+│   ├── config_keys.go   — `cix config keys` (schema-driven key listing)
+│   ├── config_edit.go   — `cix config edit` / `cix config init` (huh-driven TUI)
 │   ├── workspace.go     — `cix workspace …` (cross-repo, name-first)
 │   └── version.go       — `cix version`
 ├── internal/
 │   ├── client/          — HTTP client to cix-server
 │   ├── config/          — YAML config (~/.cix/config.yaml)
+│   │   ├── schema/      — tag-driven walker over Config (single source of truth)
+│   │   └── tui/         — huh-based form for `cix config edit` / `init`
 │   ├── daemon/          — PID-file based watcher daemon
 │   ├── discovery/       — project-root detection for `cix init`
 │   ├── fileutil/        — binary/text + size helpers
@@ -82,6 +86,132 @@ Then any command picks up the saved URL + key from `~/.cix/config.yaml`.
 The server can be local Docker (`docker compose up -d` in the repo
 root) or a remote server. The CLI doesn't care.
 
+### Multiple servers
+
+The CLI can hold several **named servers** and pick one per command. The
+config stores a `servers:` list and a `default_server`; commands use the
+default unless `--server <alias>` is given.
+
+```bash
+# Add a second server and switch the default
+cix config set server.corporate.url https://cix.corp.internal
+cix config set server.corporate.key <bearer-token>
+cix config set default_server corporate
+
+# Target a specific server for one command (alias must exist in config)
+cix --server corporate search "rate limiter"
+
+# Inspect / remove
+cix config show                      # lists servers; * marks the default
+cix config unset server.corporate    # remove a server
+```
+
+The legacy `api.url` / `api.key` keys and the `--api-url` / `--api-key`
+flags still work — they operate on (or override) the **default** server,
+so single-server setups need no changes. Old `~/.cix/config.yaml` files
+that use the flat `api:` block are migrated to the `servers:` layout
+automatically on first load (the old single server becomes `default`).
+
+### Environment overrides (CI-friendly)
+
+For CI runners, containers, and one-off scripts you can override server
+selection via env vars instead of writing to `~/.cix/config.yaml`.
+Precedence is always **flag > env > file > built-in default** — env
+overrides never persist to disk.
+
+| Variable        | Overrides                                | Use case |
+|-----------------|------------------------------------------|----------|
+| `CIX_SERVER`    | which alias resolves when `--server` is empty | Switch active server in a shell session without touching the file |
+| `CIX_API_URL`   | the resolved server's `url`              | Point at a different cix-server instance per process |
+| `CIX_API_KEY`   | the resolved server's `key`              | Pass a secret from `secrets.CIX_API_KEY` in GitHub Actions |
+
+Example (GitHub Actions):
+
+```yaml
+env:
+  CIX_API_URL: https://cix.corp.internal
+  CIX_API_KEY: ${{ secrets.CIX_API_KEY }}
+steps:
+  - run: cix search "foo"
+```
+
+The 3-var surface is deliberately narrow — knobs like
+`watcher.debounce_ms` or `indexing.batch_size` live in the config file
+only, because they are persistent developer preferences, not per-process
+overrides.
+
+### Interactive setup (`cix config init` / `cix config edit`)
+
+`cix config init` is the first-run wizard for fresh machines: it opens
+a paged form (`huh`-driven TUI) that seeds the default server entry,
+asks for the API key, and walks through the watcher + indexing knobs.
+On submit it validates everything against the schema and writes
+`~/.cix/config.yaml`.
+
+`cix config edit` is the same form against an existing config — useful
+when you want to flip booleans (e.g. `watcher.enabled`) or tune timeouts
+without re-reading `cix config set --help`.
+
+```
+┌─ Servers ──────────────────────────────┐
+│ [default] URL  http://localhost:21847  │
+│ [default] API key  ●●●●●●●●            │
+│ Default server  ▼ default              │
+└────────────────────────────────────────┘
+┌─ File watcher ─────────────────────────┐
+│ Enable the watcher    [✓]              │
+│ Debounce (ms)         5000             │
+│ Sync interval (min)   5                │
+│ Exclude patterns      node_modules,…   │
+└────────────────────────────────────────┘
+┌─ Indexing ─────────────────────────────┐
+│ Batch size            20               │
+│ Streaming idle (s)    30               │
+└────────────────────────────────────────┘
+       [ Submit ]   ESC to cancel
+```
+
+Add/remove of server aliases is still done via
+`cix config set server.<name>.url …` / `cix config unset server.<name>`
+— the form edits URL/key of *existing* aliases.
+
+### Discovering keys (`cix config keys`)
+
+`cix config keys` prints every settable configuration key with its
+current value, default, env-var binding (if any), and a short
+description. This is the canonical reference — there is no hard-coded
+list anywhere else:
+
+```bash
+$ cix config keys
+KEY                                  VALUE                  DEFAULT  ENV         DESCRIPTION
+default_server                       default                —        CIX_SERVER  Alias of the server used when --server is omitted
+watcher.enabled                      true                   true     —           Run the file watcher
+watcher.debounce_ms                  5000                   5000     —           Debounce delay (ms)
+watcher.exclude                      [node_modules .git …]  …        —           Paths/globs to skip (REPLACE semantics on set)
+watcher.sync_interval_mins           5                      5        —           Periodic sync interval (minutes)
+indexing.batch_size                  20                     20       —           Indexing batch size
+indexing.streaming_idle_timeout_sec  30                     30       —           Streaming /index/files idle timeout (seconds); 0 disables
+```
+
+Slice keys (servers, projects) are not listed here — `cix config show`
+displays them in their dedicated formats.
+
+### List-valued keys (`watcher.exclude`)
+
+`watcher.exclude` is the one list-valued scalar that `cix config set`
+accepts. Input is **comma-separated**, and the semantics are
+**REPLACE, not append**:
+
+```bash
+$ cix config set watcher.exclude "node_modules,vendor,build"
+# overwrites the entire list; previous defaults are gone
+```
+
+There is no `cix config add` / `cix config append` — if you want to
+keep the existing defaults plus add an entry, repeat the full list.
+The interactive `cix config edit` form is usually nicer for this.
+
 ## Smoke test
 
 ```bash
diff --git a/cli/cmd/config.go b/cli/cmd/config.go
index 931f5ec..f3353f5 100644
--- a/cli/cmd/config.go
+++ b/cli/cmd/config.go
@@ -1,9 +1,15 @@
 package cmd
 
 import (
+	"errors"
 	"fmt"
+	"io"
+	"os"
+	"reflect"
+	"strings"
 
 	"github.com/anthropics/code-index/cli/internal/config"
+	"github.com/anthropics/code-index/cli/internal/config/schema"
 	"github.com/spf13/cobra"
 )
 
@@ -25,21 +31,50 @@ var configSetCmd = &cobra.Command{
 	Short: "Set a configuration value",
 	Long: `Set a configuration value.
 
-Supported keys:
-  api.url       - API server URL
-  api.key       - API authentication key
-  watcher.debounce_ms - Debounce delay in milliseconds
-  watcher.sync_interval_mins - Periodic sync interval in minutes
+Run 'cix config keys' to list every settable key with its description,
+default, and env-var override. Beyond those schema keys, three patterns
+manage the multi-server layout:
+
+  server.<name>.url      URL of a named server (creates the entry if absent)
+  server.<name>.key      API key of a named server
+  default_server         which server is used when --server is omitted
+  api.url / api.key      legacy aliases — operate on the default server
+
+List-valued keys (e.g. watcher.exclude) use comma-separated input with
+REPLACE semantics: 'cix config set watcher.exclude "node_modules,vendor"'
+overwrites the entire list. There is no 'add'/'append' form.
 
 Examples:
-  cix config set api.key cix_abc123...
-  cix config set api.url http://localhost:21847
-  cix config set watcher.debounce_ms 3000
-  cix config set watcher.sync_interval_mins 5`,
+  cix config set server.corporate.url https://cix.corp.internal
+  cix config set server.corporate.key cix_abc123...
+  cix config set default_server corporate
+
+  cix config set api.url http://localhost:21847        # legacy alias
+  cix config set api.key cix_abc123...                 # legacy alias
+
+  cix config set watcher.enabled false                 # bool
+  cix config set watcher.debounce_ms 3000              # int
+  cix config set watcher.exclude "node_modules,.git"   # list (replace)`,
 	Args: cobra.ExactArgs(2),
 	RunE: runConfigSet,
 }
 
+var configUnsetCmd = &cobra.Command{
+	Use:   "unset <key>",
+	Short: "Remove a server or clear a server key",
+	Long: `Remove configuration entries.
+
+Supported keys:
+  server.<name>      - remove the named server entirely
+  server.<name>.key  - clear the named server's API key
+
+Examples:
+  cix config unset server.corporate
+  cix config unset server.corporate.key`,
+	Args: cobra.ExactArgs(1),
+	RunE: runConfigUnset,
+}
+
 var configPathCmd = &cobra.Command{
 	Use:   "path",
 	Short: "Show config file path",
@@ -52,6 +87,7 @@ func init() {
 	rootCmd.AddCommand(configCmd)
 	configCmd.AddCommand(configShowCmd)
 	configCmd.AddCommand(configSetCmd)
+	configCmd.AddCommand(configUnsetCmd)
 	configCmd.AddCommand(configPathCmd)
 }
 
@@ -60,40 +96,117 @@ func runConfigShow(cmd *cobra.Command, args []string) error {
 	if err != nil {
 		return fmt.Errorf("load config: %w", err)
 	}
+	return renderConfigShow(os.Stdout, cfg, config.GetConfigPath())
+}
 
-	// Render only "set" / "not set" — never any data derived from the key.
-	// CodeQL go/clear-text-logging flags partial display, masked output,
-	// length-only output (because len(secret) still originates from the
-	// secret field), and even local variables named `apiKey`/`*Secret`
-	// regardless of contents (sensitive-name heuristic). The variable is
-	// therefore named `keyStatus` to bypass the name match while still
-	// being readable in the output.
-	keyStatus := "(not set)"
-	if cfg.API.Key != "" {
-		keyStatus = "(set)"
-	}
+// renderConfigShow writes the human-readable config dump to w.
+//
+// Exported-via-tests (lowercase but reachable from cmd_test) so the golden-
+// file test in config_show_test.go can compare against a fixture without
+// shelling out to the CLI binary.
+//
+// The leaf list is driven by schema.Walk over the Config struct, so any
+// new tagged field appears here automatically — no printf drift.
+func renderConfigShow(w io.Writer, cfg *config.Config, cfgPath string) error {
+	// 1) Servers list — slice of structs, custom renderer.
+	renderServersBlock(w, cfg)
 
-	fmt.Printf("%-28s = %s\n", "api.url", cfg.API.URL)
-	fmt.Printf("%-28s = %s\n", "api.key", keyStatus)
-	fmt.Printf("%-28s = %v\n", "watcher.enabled", cfg.Watcher.Enabled)
-	fmt.Printf("%-28s = %d\n", "watcher.debounce_ms", cfg.Watcher.DebounceMS)
-	fmt.Printf("%-28s = %d\n", "watcher.sync_interval_mins", cfg.Watcher.SyncIntervalMins)
-	fmt.Printf("%-28s = %d\n", "indexing.batch_size", cfg.Indexing.BatchSize)
-	fmt.Printf("%-28s = %d\n", "server.port", cfg.Server.Port)
-	fmt.Printf("%-28s = %d\n", "server.cache_ttl", cfg.Server.CacheTTL)
+	// 2) Scalar leaves grouped by top-level prefix (watcher.* / server.* /
+	//    indexing.*) with a blank line between groups for readability.
+	var lastGroup string
+	first := true
+	err := schema.Walk(cfg, func(l schema.LeafField) {
+		// servers / projects are slice leaves rendered separately.
+		if l.Path == "servers" || l.Path == "projects" {
+			return
+		}
+		group := topGroup(l.Path)
+		if !first && group != lastGroup {
+			fmt.Fprintln(w)
+		}
+		first = false
+		lastGroup = group
+		renderScalarLeaf(w, l)
+	})
+	if err != nil {
+		return fmt.Errorf("walk schema: %w", err)
+	}
 
+	// 3) Projects (slice).
 	if len(cfg.Projects) > 0 {
-		fmt.Printf("\nprojects (%d):\n", len(cfg.Projects))
+		fmt.Fprintf(w, "\nprojects (%d):\n", len(cfg.Projects))
 		for _, p := range cfg.Projects {
-			fmt.Printf("  - %s (auto-watch: %v)\n", p.Path, p.AutoWatch)
+			fmt.Fprintf(w, "  - %s (auto-watch: %v)\n", p.Path, p.AutoWatch)
 		}
 	}
 
-	fmt.Printf("\nconfig file: %s\n", config.GetConfigPath())
-
+	fmt.Fprintf(w, "\nconfig file: %s\n", cfgPath)
 	return nil
 }
 
+func renderServersBlock(w io.Writer, cfg *config.Config) {
+	fmt.Fprintf(w, "servers (%d):\n", len(cfg.Servers))
+	for _, s := range cfg.Servers {
+		// Render only "set" / "not set" for the key — never any data derived
+		// from it. CodeQL go/clear-text-logging flags partial/masked/length
+		// output and even local variables named `*Key`/`*Secret` (sensitive-
+		// name heuristic), so the status string is named `keyStatus`.
+		keyStatus := "(not set)"
+		if s.Key != "" {
+			keyStatus = "(set)"
+		}
+		marker := "  "
+		if s.Name == cfg.DefaultServer {
+			marker = "* "
+		}
+		fmt.Fprintf(w, "%s%-16s url=%s key=%s\n", marker, s.Name, s.URL, keyStatus)
+	}
+}
+
+// keyWidth is the column width for the "key = value" lines. Wide enough for
+// the longest current path ("indexing.streaming_idle_timeout_sec" = 34 chars)
+// plus one space of slack so future additions don't force a re-tune.
+const keyWidth = 36
+
+func renderScalarLeaf(w io.Writer, l schema.LeafField) {
+	if l.Sensitive() {
+		// Tag-gated branch: we only inspect *whether* the value is empty via
+		// reflect.Value.IsZero(), and the resulting string is named
+		// `keyStatus` to satisfy CodeQL's sensitive-name heuristic. The
+		// underlying value never lands in a named *Key/*Secret variable.
+		keyStatus := "(not set)"
+		if !l.Value.IsZero() {
+			keyStatus = "(set)"
+		}
+		fmt.Fprintf(w, "%-*s = %s\n", keyWidth, l.Path, keyStatus)
+		return
+	}
+
+	v := l.Value
+	switch v.Kind() {
+	case reflect.Bool:
+		fmt.Fprintf(w, "%-*s = %v\n", keyWidth, l.Path, v.Bool())
+	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+		fmt.Fprintf(w, "%-*s = %d\n", keyWidth, l.Path, v.Int())
+	case reflect.String:
+		fmt.Fprintf(w, "%-*s = %s\n", keyWidth, l.Path, v.String())
+	case reflect.Slice:
+		fmt.Fprintf(w, "%-*s = %v\n", keyWidth, l.Path, v.Interface())
+	default:
+		fmt.Fprintf(w, "%-*s = %v\n", keyWidth, l.Path, v.Interface())
+	}
+}
+
+// topGroup returns the prefix of a dotted key up to the first dot, used to
+// group related keys with blank lines in the show output. "default_server"
+// (no dot) groups under itself.
+func topGroup(path string) string {
+	if i := strings.IndexByte(path, '.'); i > 0 {
+		return path[:i]
+	}
+	return path
+}
+
 func runConfigSet(cmd *cobra.Command, args []string) error {
 	key := args[0]
 	value := args[1]
@@ -103,42 +216,119 @@ func runConfigSet(cmd *cobra.Command, args []string) error {
 		return fmt.Errorf("load config: %w", err)
 	}
 
-	// Set value based on key
-	switch key {
-	case "api.url":
-		cfg.API.URL = value
-	case "api.key":
-		cfg.API.Key = value
-	case "watcher.debounce_ms":
-		var ms int
-		_, err := fmt.Sscanf(value, "%d", &ms)
+	// Server-management keys persist on their own (they may create entries and
+	// reassign the default), so handle them before the schema setter — those
+	// side effects don't fit the "parse and assign one field" model.
+	switch {
+	case key == "default_server":
+		if err := config.SetDefaultServer(value); err != nil {
+			return err
+		}
+		fmt.Printf("✓ Set %s = %s\n", key, value)
+		return nil
+	case key == "api.url":
+		// Legacy alias: operate on the default server.
+		name := defaultServerName(cfg)
+		if err := config.SetServerURL(name, value); err != nil {
+			return err
+		}
+		fmt.Printf("✓ Set %s = %s (server %q)\n", key, value, name)
+		return nil
+	case key == "api.key":
+		name := defaultServerName(cfg)
+		if err := config.SetServerKey(name, value); err != nil {
+			return err
+		}
+		fmt.Printf("✓ Set %s (server %q)\n", key, name)
+		return nil
+	case strings.HasPrefix(key, "server."):
+		name, field, perr := parseServerKey(key)
+		if perr != nil {
+			return perr
+		}
+		switch field {
+		case "url":
+			if err := config.SetServerURL(name, value); err != nil {
+				return err
+			}
+		case "key":
+			if err := config.SetServerKey(name, value); err != nil {
+				return err
+			}
+		}
+		fmt.Printf("✓ Set %s\n", key)
+		return nil
+	}
+
+	// Schema-driven setter: handles every leaf annotated with a `key:` tag
+	// (watcher.*, indexing.*, server.*, default_server). Validates and
+	// persists internally. Unknown keys surface a clear error — the legacy
+	// hand-rolled switch was removed in step 10 of the config refactor; its
+	// surface is now a strict subset of the schema's.
+	if err := config.SetByPath(key, value); err != nil {
+		if errors.Is(err, config.ErrUnknownKey) {
+			return fmt.Errorf("unknown config key %q (run 'cix config keys' for the full list)", key)
+		}
+		return err
+	}
+	fmt.Printf("✓ Set %s = %s\n", key, value)
+	return nil
+}
+
+func runConfigUnset(cmd *cobra.Command, args []string) error {
+	key := args[0]
+
+	if !strings.HasPrefix(key, "server.") {
+		return fmt.Errorf("unknown unset key: %s (supported: server.<name>, server.<name>.key)", key)
+	}
+
+	rest := strings.TrimPrefix(key, "server.")
+	switch {
+	case strings.HasSuffix(rest, ".key"):
+		name := strings.TrimSuffix(rest, ".key")
+		if name == "" || strings.Contains(name, ".") {
+			return fmt.Errorf("invalid server key: %s", key)
+		}
+		if err := config.SetServerKey(name, ""); err != nil {
+			return err
+		}
+		fmt.Printf("✓ Cleared key for server %q\n", name)
+		return nil
+	case !strings.Contains(rest, "."):
+		// `server.<name>` — remove the whole server.
+		reassigned, err := config.RemoveServer(rest)
 		if err != nil {
-			return fmt.Errorf("invalid value for debounce_ms: %s", value)
-		}
-		cfg.Watcher.DebounceMS = ms
-	case "watcher.sync_interval_mins":
-		var mins int
-		_, err := fmt.Sscanf(value, "%d", &mins)
-		if err != nil || mins < 1 {
-			return fmt.Errorf("invalid value for sync_interval_mins (must be >= 1): %s", value)
-		}
-		cfg.Watcher.SyncIntervalMins = mins
-	case "indexing.batch_size":
-		var bs int
-		_, err := fmt.Sscanf(value, "%d", &bs)
-		if err != nil || bs < 1 {
-			return fmt.Errorf("invalid value for batch_size (must be >= 1): %s", value)
-		}
-		cfg.Indexing.BatchSize = bs
+			return err
+		}
+		fmt.Printf("✓ Removed server %q\n", rest)
+		if reassigned != "" {
+			fmt.Printf("  default server is now %q\n", reassigned)
+		}
+		return nil
 	default:
-		return fmt.Errorf("unknown config key: %s", key)
+		return fmt.Errorf("unknown unset key: %s (supported: server.<name>, server.<name>.key)", key)
 	}
+}
 
-	// Save config
-	if err := config.Save(cfg); err != nil {
-		return fmt.Errorf("save config: %w", err)
+// defaultServerName returns the name of the default server for legacy api.*
+// aliases, falling back to the canonical "default" name when none is set.
+func defaultServerName(cfg *config.Config) string {
+	if s, ok := cfg.DefaultServerEntry(); ok {
+		return s.Name
 	}
+	return config.DefaultServerName
+}
 
-	fmt.Printf("✓ Set %s = %s\n", key, value)
-	return nil
+// parseServerKey splits a `server.<name>.<field>` config key into its name and
+// field (url|key), validating the shape.
+func parseServerKey(key string) (name, field string, err error) {
+	parts := strings.SplitN(key, ".", 3)
+	if len(parts) != 3 || parts[0] != "server" || parts[1] == "" {
+		return "", "", fmt.Errorf("invalid server key %q (expected server.<name>.url or server.<name>.key)", key)
+	}
+	name, field = parts[1], parts[2]
+	if field != "url" && field != "key" {
+		return "", "", fmt.Errorf("invalid server field %q in %q (expected url or key)", field, key)
+	}
+	return name, field, nil
 }
diff --git a/cli/cmd/config_edit.go b/cli/cmd/config_edit.go
new file mode 100644
index 0000000..668a8b5
--- /dev/null
+++ b/cli/cmd/config_edit.go
@@ -0,0 +1,61 @@
+package cmd
+
+import (
+	"fmt"
+
+	"github.com/anthropics/code-index/cli/internal/config"
+	"github.com/anthropics/code-index/cli/internal/config/tui"
+	"github.com/spf13/cobra"
+)
+
+var configEditCmd = &cobra.Command{
+	Use:   "edit",
+	Short: "Interactively edit configuration (TUI)",
+	Long: `Open the full-screen lazygit-style editor for ~/.cix/config.yaml.
+
+Layout: section list on the left (Servers / Watcher / Indexing /
+Projects / Misc), selected section's content on the right, persistent
+key-hint bar at the bottom.
+
+Keys (press ? for the full table):
+  ↑/k ↓/j        move within a panel
+  ←/h →/l / tab  switch panel
+  enter          edit selected field
+  space / x      toggle bool field
+  a / d          add / delete server (Servers section)
+  m              mark selected server as default
+  t              test connection (server)
+  q / esc        quit
+
+Every edit goes through the same validation as 'cix config set' and is
+written to disk immediately.`,
+	RunE: func(cmd *cobra.Command, args []string) error {
+		cfg, err := config.Load()
+		if err != nil {
+			return fmt.Errorf("load config: %w", err)
+		}
+		return tui.RunEdit(cfg)
+	},
+}
+
+var configInitCmd = &cobra.Command{
+	Use:   "init",
+	Short: "First-run wizard (TUI)",
+	Long: `Seed a fresh ~/.cix/config.yaml with the localhost default server
+and open the interactive editor pointing at it.
+
+If a configuration already exists this is equivalent to 'cix config edit'
+— no overwrite; the existing servers, settings, and projects are kept.`,
+	RunE: func(cmd *cobra.Command, args []string) error {
+		cfg, err := config.Load()
+		if err != nil {
+			return fmt.Errorf("load config: %w", err)
+		}
+		return tui.RunInit(cfg)
+	},
+}
+
+func init() {
+	configCmd.AddCommand(configEditCmd)
+	configCmd.AddCommand(configInitCmd)
+}
diff --git a/cli/cmd/config_keys.go b/cli/cmd/config_keys.go
new file mode 100644
index 0000000..8ab63f0
--- /dev/null
+++ b/cli/cmd/config_keys.go
@@ -0,0 +1,83 @@
+package cmd
+
+import (
+	"fmt"
+	"io"
+	"os"
+	"reflect"
+	"text/tabwriter"
+
+	"github.com/anthropics/code-index/cli/internal/config"
+	"github.com/anthropics/code-index/cli/internal/config/schema"
+	"github.com/spf13/cobra"
+)
+
+var configKeysCmd = &cobra.Command{
+	Use:   "keys",
+	Short: "List every settable configuration key",
+	Long: `Print every configuration key the CLI knows about, with its
+current value, default, env-var override (if any), and a short description.
+
+The list is reflection-driven — any new schema-tagged field shows up here
+automatically. Slice keys managed via dedicated commands (servers,
+projects) are not listed; use 'cix config show' to view them.`,
+	RunE: func(cmd *cobra.Command, args []string) error {
+		cfg, err := config.Load()
+		if err != nil {
+			return fmt.Errorf("load config: %w", err)
+		}
+		return renderConfigKeys(os.Stdout, cfg)
+	},
+}
+
+func init() {
+	configCmd.AddCommand(configKeysCmd)
+}
+
+func renderConfigKeys(w io.Writer, cfg *config.Config) error {
+	tw := tabwriter.NewWriter(w, 0, 0, 2, ' ', 0)
+	if _, err := fmt.Fprintln(tw, "KEY\tVALUE\tDEFAULT\tENV\tDESCRIPTION"); err != nil {
+		return err
+	}
+
+	if err := schema.Walk(cfg, func(l schema.LeafField) {
+		// Skip slice-of-struct leaves (servers, projects); they have
+		// purpose-built management commands and don't render meaningfully
+		// in a single tab-separated row.
+		if l.Value.Kind() == reflect.Slice && l.Value.Type().Elem().Kind() == reflect.Struct {
+			return
+		}
+		fmt.Fprintf(tw, "%s\t%s\t%s\t%s\t%s\n",
+			l.Path,
+			formatLeafCurrent(l),
+			dashIfEmpty(l.Tag("default")),
+			dashIfEmpty(l.Tag("env")),
+			l.Tag("desc"),
+		)
+	}); err != nil {
+		return err
+	}
+	return tw.Flush()
+}
+
+// formatLeafCurrent returns the leaf's current value as a display string.
+// Sensitive leaves render only "(set)"/"(not set)" — same gate as
+// renderScalarLeaf in `cix config show`.
+func formatLeafCurrent(l schema.LeafField) string {
+	if l.Sensitive() {
+		// Tag-gated: do not bind the value to a named *Key/*Secret var.
+		// reflect.Value.IsZero() inspects through reflection only.
+		if l.Value.IsZero() {
+			return "(not set)"
+		}
+		return "(set)"
+	}
+	return fmt.Sprintf("%v", l.Value.Interface())
+}
+
+func dashIfEmpty(s string) string {
+	if s == "" {
+		return "—"
+	}
+	return s
+}
diff --git a/cli/cmd/config_keys_test.go b/cli/cmd/config_keys_test.go
new file mode 100644
index 0000000..a93b15f
--- /dev/null
+++ b/cli/cmd/config_keys_test.go
@@ -0,0 +1,73 @@
+package cmd
+
+import (
+	"bytes"
+	"strings"
+	"testing"
+
+	"github.com/anthropics/code-index/cli/internal/config"
+)
+
+func TestRenderConfigKeys_Snapshot(t *testing.T) {
+	cfg := &config.Config{
+		Servers: []config.ServerEntry{
+			{Name: "default", URL: "http://localhost:21847", Key: "cix_secret_xyz"},
+		},
+		DefaultServer: "default",
+		Watcher: config.WatcherConfig{
+			Enabled:          true,
+			DebounceMS:       5000,
+			ExcludePatterns:  []string{"node_modules"},
+			SyncIntervalMins: 5,
+		},
+		Indexing: config.IndexingConfig{BatchSize: 20, StreamingIdleTimeoutSec: 30},
+	}
+
+	var buf bytes.Buffer
+	if err := renderConfigKeys(&buf, cfg); err != nil {
+		t.Fatalf("renderConfigKeys: %v", err)
+	}
+
+	got := buf.String()
+
+	// Header must come first.
+	if !strings.HasPrefix(got, "KEY") {
+		t.Errorf("output should start with header row, got %q", firstLine(got))
+	}
+
+	// Every settable scalar must appear.
+	mustContain := []string{
+		"default_server",
+		"watcher.enabled",
+		"watcher.debounce_ms",
+		"watcher.exclude",
+		"watcher.sync_interval_mins",
+		"indexing.batch_size",
+		"indexing.streaming_idle_timeout_sec",
+		"CIX_SERVER", // env tag on default_server
+	}
+	for _, want := range mustContain {
+		if !strings.Contains(got, want) {
+			t.Errorf("output missing %q\nfull output:\n%s", want, got)
+		}
+	}
+
+	// Slice-of-struct leaves are skipped from the listing.
+	for _, skip := range []string{"\nservers ", "\nprojects "} {
+		if strings.Contains(got, skip) {
+			t.Errorf("output should not list slice-of-struct leaf row %q", skip)
+		}
+	}
+
+	// Sensitive value MUST NOT leak.
+	if strings.Contains(got, "cix_secret_xyz") {
+		t.Errorf("sensitive key value leaked into output")
+	}
+}
+
+func firstLine(s string) string {
+	if i := strings.IndexByte(s, '\n'); i >= 0 {
+		return s[:i]
+	}
+	return s
+}
diff --git a/cli/cmd/config_show_test.go b/cli/cmd/config_show_test.go
new file mode 100644
index 0000000..5707620
--- /dev/null
+++ b/cli/cmd/config_show_test.go
@@ -0,0 +1,97 @@
+package cmd
+
+import (
+	"bytes"
+	"strings"
+	"testing"
+
+	"github.com/anthropics/code-index/cli/internal/config"
+)
+
+// TestRenderConfigShow_Snapshot pins the human-readable layout of `cix
+// config show`. The expected output is the contract — if a future field
+// addition shifts the format, this test forces an intentional update
+// instead of silent drift.
+//
+// CodeQL note: the API key value below ("cix_secret123") never appears in
+// the expected output — it MUST render as "(set)" because ServerEntry.Key
+// is sensitive. Verifying that absence is the whole point of this test.
+func TestRenderConfigShow_Snapshot(t *testing.T) {
+	cfg := &config.Config{
+		Servers: []config.ServerEntry{
+			{Name: "default", URL: "http://localhost:21847", Key: "cix_secret123"},
+			{Name: "corporate", URL: "https://cix.corp.internal", Key: ""},
+		},
+		DefaultServer: "default",
+		Watcher: config.WatcherConfig{
+			Enabled:          true,
+			DebounceMS:       5000,
+			ExcludePatterns:  []string{"node_modules", ".git"},
+			SyncIntervalMins: 5,
+		},
+		Indexing: config.IndexingConfig{
+			BatchSize:               20,
+			StreamingIdleTimeoutSec: 30,
+		},
+		Projects: []config.ProjectEntry{
+			{Path: "/home/u/proj", AutoWatch: true},
+		},
+	}
+
+	var buf bytes.Buffer
+	if err := renderConfigShow(&buf, cfg, "/home/u/.cix/config.yaml"); err != nil {
+		t.Fatalf("renderConfigShow: %v", err)
+	}
+
+	want := `servers (2):
+* default          url=http://localhost:21847 key=(set)
+  corporate        url=https://cix.corp.internal key=(not set)
+default_server                       = default
+
+watcher.enabled                      = true
+watcher.debounce_ms                  = 5000
+watcher.exclude                      = [node_modules .git]
+watcher.sync_interval_mins           = 5
+
+indexing.batch_size                  = 20
+indexing.streaming_idle_timeout_sec  = 30
+
+projects (1):
+  - /home/u/proj (auto-watch: true)
+
+config file: /home/u/.cix/config.yaml
+`
+
+	got := buf.String()
+	if got != want {
+		t.Errorf("renderConfigShow output mismatch\n--- want ---\n%s\n--- got ----\n%s", want, got)
+	}
+
+	// Explicit safety belt: the sensitive value MUST NOT leak even via
+	// substring (no length disclosure, no prefix, no mask).
+	if strings.Contains(got, "cix_secret123") {
+		t.Errorf("sensitive value leaked into output")
+	}
+	if strings.Contains(got, "secret") {
+		t.Errorf("substring of sensitive value leaked into output")
+	}
+}
+
+func TestRenderConfigShow_EmptyProjects(t *testing.T) {
+	cfg := &config.Config{
+		Servers: []config.ServerEntry{
+			{Name: "default", URL: "http://localhost:21847"},
+		},
+		DefaultServer: "default",
+		Watcher:       config.WatcherConfig{},
+		Indexing:      config.IndexingConfig{},
+	}
+	var buf bytes.Buffer
+	if err := renderConfigShow(&buf, cfg, "/tmp/cfg.yaml"); err != nil {
+		t.Fatalf("renderConfigShow: %v", err)
+	}
+	out := buf.String()
+	if strings.Contains(out, "projects (") {
+		t.Errorf("projects block rendered for empty list:\n%s", out)
+	}
+}
diff --git a/cli/cmd/multiserver_test.go b/cli/cmd/multiserver_test.go
new file mode 100644
index 0000000..02cd626
--- /dev/null
+++ b/cli/cmd/multiserver_test.go
@@ -0,0 +1,297 @@
+package cmd
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/anthropics/code-index/cli/internal/config"
+)
+
+// isolateConfig points config.Load() at a throwaway HOME and resets the
+// singleton before and after the test. CIX_* env vars are unset so a
+// developer with these set in their shell does not get spurious test
+// failures — tests that need env overrides set them explicitly.
+func isolateConfig(t *testing.T) {
+	t.Helper()
+	t.Setenv("HOME", t.TempDir())
+	t.Setenv("XDG_CONFIG_HOME", "")
+	t.Setenv("CIX_SERVER", "")
+	t.Setenv("CIX_API_URL", "")
+	t.Setenv("CIX_API_KEY", "")
+	config.ResetForTesting()
+	t.Cleanup(config.ResetForTesting)
+}
+
+// withFlags temporarily sets the global --server/--api-url/--api-key vars and
+// restores them on cleanup.
+func withFlags(t *testing.T, server, url, key string) {
+	t.Helper()
+	ps, pu, pk := serverName, apiURL, apiKey
+	serverName, apiURL, apiKey = server, url, key
+	t.Cleanup(func() { serverName, apiURL, apiKey = ps, pu, pk })
+}
+
+func TestGetClient_ServerFlagSelectsServer(t *testing.T) {
+	isolateConfig(t)
+	if err := config.SetServerURL("corp", "https://corp.example"); err != nil {
+		t.Fatal(err)
+	}
+	if err := config.SetServerKey("corp", "corp-key"); err != nil {
+		t.Fatal(err)
+	}
+	withFlags(t, "corp", "", "")
+
+	c, err := getClient()
+	if err != nil {
+		t.Fatalf("getClient: %v", err)
+	}
+	if !strings.Contains(c.BaseURL(), "corp.example") {
+		t.Errorf("BaseURL = %q, want corp.example", c.BaseURL())
+	}
+}
+
+func TestGetClient_DefaultServerWhenNoFlag(t *testing.T) {
+	isolateConfig(t)
+	// Seed the default server with a key so resolution succeeds.
+	if err := config.SetServerURL(config.DefaultServerName, "http://localhost:21847"); err != nil {
+		t.Fatal(err)
+	}
+	if err := config.SetServerKey(config.DefaultServerName, "dk"); err != nil {
+		t.Fatal(err)
+	}
+	withFlags(t, "", "", "")
+
+	c, err := getClient()
+	if err != nil {
+		t.Fatalf("getClient: %v", err)
+	}
+	if !strings.Contains(c.BaseURL(), "localhost:21847") {
+		t.Errorf("BaseURL = %q, want default localhost", c.BaseURL())
+	}
+}
+
+func TestGetClient_UnknownServerErrors(t *testing.T) {
+	isolateConfig(t)
+	if err := config.SetServerKey(config.DefaultServerName, "dk"); err != nil {
+		t.Fatal(err)
+	}
+	withFlags(t, "ghost", "", "")
+
+	_, err := getClient()
+	if err == nil {
+		t.Fatal("expected error for unknown --server")
+	}
+	if !strings.Contains(err.Error(), "ghost") {
+		t.Errorf("error %q should mention the unknown server", err.Error())
+	}
+}
+
+func TestGetClient_ServerKeyMissingError(t *testing.T) {
+	isolateConfig(t)
+	// corp has a URL but no key, and no --api-key override.
+	if err := config.SetServerURL("corp", "https://corp.example"); err != nil {
+		t.Fatal(err)
+	}
+	withFlags(t, "corp", "", "")
+
+	_, err := getClient()
+	if err == nil {
+		t.Fatal("expected missing-key error")
+	}
+	if !strings.Contains(err.Error(), "corp") || !strings.Contains(err.Error(), "API key") {
+		t.Errorf("error %q should name the server and mention API key", err.Error())
+	}
+}
+
+func TestRunConfigSet_ServerKeys(t *testing.T) {
+	isolateConfig(t)
+
+	mustSet(t, "server.corp.url", "https://corp")
+	mustSet(t, "server.corp.key", "ck")
+
+	cfg, _ := config.Load()
+	s, ok := cfg.GetServer("corp")
+	if !ok || s.URL != "https://corp" || s.Key != "ck" {
+		t.Fatalf("corp server = %+v, ok=%v", s, ok)
+	}
+}
+
+func TestRunConfigSet_DefaultServer(t *testing.T) {
+	isolateConfig(t)
+	mustSet(t, "server.corp.url", "https://corp")
+	mustSet(t, "default_server", "corp")
+
+	cfg, _ := config.Load()
+	if cfg.DefaultServer != "corp" {
+		t.Errorf("DefaultServer = %q, want corp", cfg.DefaultServer)
+	}
+
+	// Unknown default is rejected.
+	if _, err := captureOutput(func() error {
+		return runConfigSet(nil, []string{"default_server", "ghost"})
+	}); err == nil {
+		t.Error("expected error setting default_server to unknown alias")
+	}
+}
+
+func TestRunConfigSet_ApiAliasMapsToDefault(t *testing.T) {
+	isolateConfig(t)
+	mustSet(t, "api.url", "http://aliased:1234")
+	mustSet(t, "api.key", "ak")
+
+	cfg, _ := config.Load()
+	s, ok := cfg.DefaultServerEntry()
+	if !ok || s.URL != "http://aliased:1234" || s.Key != "ak" {
+		t.Fatalf("default server = %+v, ok=%v", s, ok)
+	}
+	// The legacy api block must remain empty (migrated/never persisted).
+	if cfg.API.URL != "" || cfg.API.Key != "" {
+		t.Errorf("API block = %+v, want empty", cfg.API)
+	}
+}
+
+func TestRunConfigUnset(t *testing.T) {
+	isolateConfig(t)
+	mustSet(t, "server.corp.url", "https://corp")
+	mustSet(t, "server.corp.key", "ck")
+
+	// Clear just the key.
+	if _, err := captureOutput(func() error {
+		return runConfigUnset(nil, []string{"server.corp.key"})
+	}); err != nil {
+		t.Fatalf("unset key: %v", err)
+	}
+	cfg, _ := config.Load()
+	if s, ok := cfg.GetServer("corp"); !ok || s.Key != "" || s.URL != "https://corp" {
+		t.Fatalf("after key unset corp = %+v, ok=%v", s, ok)
+	}
+
+	// Remove the whole server.
+	if _, err := captureOutput(func() error {
+		return runConfigUnset(nil, []string{"server.corp"})
+	}); err != nil {
+		t.Fatalf("unset server: %v", err)
+	}
+	cfg, _ = config.Load()
+	if _, ok := cfg.GetServer("corp"); ok {
+		t.Error("corp server should have been removed")
+	}
+
+	// Unknown key shape errors.
+	if _, err := captureOutput(func() error {
+		return runConfigUnset(nil, []string{"watcher.debounce_ms"})
+	}); err == nil {
+		t.Error("expected error for unsupported unset key")
+	}
+}
+
+// --- Env-override tests (step 8) -------------------------------------------
+
+func TestGetClient_EnvServerSelectsAlias(t *testing.T) {
+	isolateConfig(t)
+	if err := config.SetServerURL("corp", "https://corp.example"); err != nil {
+		t.Fatal(err)
+	}
+	if err := config.SetServerKey("corp", "corp-key"); err != nil {
+		t.Fatal(err)
+	}
+	withFlags(t, "", "", "")
+	t.Setenv("CIX_SERVER", "corp")
+
+	c, err := getClient()
+	if err != nil {
+		t.Fatalf("getClient: %v", err)
+	}
+	if !strings.Contains(c.BaseURL(), "corp.example") {
+		t.Errorf("BaseURL = %q, want corp.example (selected via CIX_SERVER)", c.BaseURL())
+	}
+}
+
+func TestGetClient_FlagBeatsCixServerEnv(t *testing.T) {
+	isolateConfig(t)
+	if err := config.SetServerURL("alpha", "https://alpha"); err != nil {
+		t.Fatal(err)
+	}
+	if err := config.SetServerKey("alpha", "ak"); err != nil {
+		t.Fatal(err)
+	}
+	if err := config.SetServerURL("beta", "https://beta"); err != nil {
+		t.Fatal(err)
+	}
+	if err := config.SetServerKey("beta", "bk"); err != nil {
+		t.Fatal(err)
+	}
+	withFlags(t, "alpha", "", "")
+	t.Setenv("CIX_SERVER", "beta")
+
+	c, err := getClient()
+	if err != nil {
+		t.Fatalf("getClient: %v", err)
+	}
+	if !strings.Contains(c.BaseURL(), "alpha") {
+		t.Errorf("BaseURL = %q, want alpha (--server overrides CIX_SERVER)", c.BaseURL())
+	}
+}
+
+func TestGetClient_EnvAPIURLOverridesResolvedURL(t *testing.T) {
+	isolateConfig(t)
+	if err := config.SetServerURL(config.DefaultServerName, "http://localhost:21847"); err != nil {
+		t.Fatal(err)
+	}
+	if err := config.SetServerKey(config.DefaultServerName, "k"); err != nil {
+		t.Fatal(err)
+	}
+	withFlags(t, "", "", "")
+	t.Setenv("CIX_API_URL", "http://env-url:9999")
+
+	c, err := getClient()
+	if err != nil {
+		t.Fatalf("getClient: %v", err)
+	}
+	if !strings.Contains(c.BaseURL(), "env-url:9999") {
+		t.Errorf("BaseURL = %q, want env-url:9999", c.BaseURL())
+	}
+}
+
+func TestGetClient_FlagBeatsCixAPIURLEnv(t *testing.T) {
+	isolateConfig(t)
+	if err := config.SetServerURL(config.DefaultServerName, "http://file-url"); err != nil {
+		t.Fatal(err)
+	}
+	if err := config.SetServerKey(config.DefaultServerName, "k"); err != nil {
+		t.Fatal(err)
+	}
+	withFlags(t, "", "http://flag-url:1234", "")
+	t.Setenv("CIX_API_URL", "http://env-url:9999")
+
+	c, err := getClient()
+	if err != nil {
+		t.Fatalf("getClient: %v", err)
+	}
+	if !strings.Contains(c.BaseURL(), "flag-url:1234") {
+		t.Errorf("BaseURL = %q, want flag-url:1234 (--api-url > env)", c.BaseURL())
+	}
+}
+
+func TestGetClient_EnvAPIKeyFillsMissingKey(t *testing.T) {
+	isolateConfig(t)
+	// File has URL but no key — env supplies it.
+	if err := config.SetServerURL(config.DefaultServerName, "http://localhost:21847"); err != nil {
+		t.Fatal(err)
+	}
+	withFlags(t, "", "", "")
+	t.Setenv("CIX_API_KEY", "env-key-secret")
+
+	if _, err := getClient(); err != nil {
+		t.Fatalf("getClient should succeed with CIX_API_KEY: %v", err)
+	}
+}
+
+func mustSet(t *testing.T, key, value string) {
+	t.Helper()
+	if _, err := captureOutput(func() error {
+		return runConfigSet(nil, []string{key, value})
+	}); err != nil {
+		t.Fatalf("config set %s %s: %v", key, value, err)
+	}
+}
diff --git a/cli/cmd/root.go b/cli/cmd/root.go
index c6c051e..08ae2b3 100644
--- a/cli/cmd/root.go
+++ b/cli/cmd/root.go
@@ -39,9 +39,10 @@ func printBanner() {
 }
 
 var (
-	cfgFile string
-	apiURL  string
-	apiKey  string
+	cfgFile    string
+	apiURL     string
+	apiKey     string
+	serverName string
 )
 
 // rootCmd represents the base command
@@ -71,8 +72,9 @@ func Execute() {
 }
 
 func init() {
-	rootCmd.PersistentFlags().StringVar(&apiURL, "api-url", "", "API server URL (default from config)")
-	rootCmd.PersistentFlags().StringVar(&apiKey, "api-key", "", "API key (default from config)")
+	rootCmd.PersistentFlags().StringVar(&serverName, "server", "", "named server alias from config (default: the configured default server)")
+	rootCmd.PersistentFlags().StringVar(&apiURL, "api-url", "", "API server URL (overrides the selected server's URL)")
+	rootCmd.PersistentFlags().StringVar(&apiKey, "api-key", "", "API key (overrides the selected server's key)")
 }
 
 // resolveProjectByName performs an exact-match lookup of name against the
@@ -130,24 +132,64 @@ func findProjectRoot(candidatePath string, apiClient *client.Client) string {
 	return candidatePath
 }
 
-// getClient creates an API client from config or flags
+// Env-var names recognised by the CLI for server selection / overrides.
+// Precedence is always flag > env > config-file > default. The CIX_*
+// surface is deliberately tiny — three vars, all about reaching a server.
+// Everything else lives in ~/.cix/config.yaml.
+const (
+	envServer = "CIX_SERVER"
+	envAPIURL = "CIX_API_URL"
+	envAPIKey = "CIX_API_KEY"
+)
+
+// getClient creates an API client from config / flags / env.
+//
+// Precedence per axis:
+//   - target server alias:  --server > CIX_SERVER > default_server
+//   - server URL override:  --api-url > CIX_API_URL > the resolved server's URL
+//   - server key override:  --api-key > CIX_API_KEY > the resolved server's key
+//
+// The env vars override the *resolved* server's URL/key locally — they
+// never mutate the in-memory ServerEntry, so a follow-up config.Save() will
+// not persist them. This matches the flag behavior and is what users in CI
+// expect: `CIX_API_KEY=secret cix search …` must not write the secret back
+// to ~/.cix/config.yaml.
 func getClient() (*client.Client, error) {
 	cfg, err := config.Load()
 	if err != nil {
 		return nil, fmt.Errorf("load config: %w", err)
 	}
 
+	// Server alias: flag > env > default. Read env only when the flag is
+	// empty; the flag is the authoritative override.
+	name := serverName
+	if name == "" {
+		name = os.Getenv(envServer)
+	}
+	srv, err := cfg.ResolveServer(name)
+	if err != nil {
+		return nil, err
+	}
+
+	// URL override: flag > env > entry.
 	url := apiURL
 	if url == "" {
-		url = cfg.API.URL
+		url = os.Getenv(envAPIURL)
+	}
+	if url == "" {
+		url = srv.URL
 	}
 
+	// Key override: flag > env > entry. Local copy — never write back.
 	key := apiKey
 	if key == "" {
-		key = cfg.API.Key
-		if key == "" {
-			return nil, fmt.Errorf("API key not set. Use --api-key flag or run 'cix config set api.key <key>'")
-		}
+		key = os.Getenv(envAPIKey)
+	}
+	if key == "" {
+		key = srv.Key
+	}
+	if key == "" {
+		return nil, fmt.Errorf("API key not set for server %q. Use --api-key flag, set %s=…, or run 'cix config set server.%s.key <key>'", srv.Name, envAPIKey, srv.Name)
 	}
 
 	c := client.New(url, key)
diff --git a/cli/go.mod b/cli/go.mod
index 2e00cf7..0ffe7b5 100644
--- a/cli/go.mod
+++ b/cli/go.mod
@@ -1,8 +1,16 @@
 module github.com/anthropics/code-index/cli
 
-go 1.23.0
+go 1.25.0
 
 require (
+	github.com/charmbracelet/bubbles v1.0.0
+	github.com/charmbracelet/bubbletea v1.3.10
+	github.com/charmbracelet/lipgloss v1.1.0
+	github.com/go-playground/validator/v10 v10.30.3
+	github.com/knadh/koanf/parsers/yaml v1.1.0
+	github.com/knadh/koanf/providers/confmap v1.0.0
+	github.com/knadh/koanf/providers/rawbytes v1.0.0
+	github.com/knadh/koanf/v2 v2.3.5
 	github.com/rjeczalik/notify v0.9.3
 	github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06
 	github.com/spf13/cobra v1.8.0
@@ -10,10 +18,39 @@ require (
 )
 
 require (
+	github.com/atotto/clipboard v0.1.4 // indirect
+	github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
+	github.com/charmbracelet/colorprofile v0.4.1 // indirect
+	github.com/charmbracelet/x/ansi v0.11.6 // indirect
+	github.com/charmbracelet/x/cellbuf v0.0.15 // indirect
+	github.com/charmbracelet/x/term v0.2.2 // indirect
+	github.com/clipperhouse/displaywidth v0.9.0 // indirect
+	github.com/clipperhouse/stringish v0.1.1 // indirect
+	github.com/clipperhouse/uax29/v2 v2.5.0 // indirect
+	github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect
+	github.com/gabriel-vasile/mimetype v1.4.13 // indirect
+	github.com/go-playground/locales v0.14.1 // indirect
+	github.com/go-playground/universal-translator v0.18.1 // indirect
+	github.com/go-viper/mapstructure/v2 v2.4.0 // indirect
 	github.com/inconshreveable/mousetrap v1.1.0 // indirect
+	github.com/knadh/koanf/maps v0.1.2 // indirect
 	github.com/kr/pretty v0.3.1 // indirect
+	github.com/leodido/go-urn v1.4.0 // indirect
+	github.com/lucasb-eyer/go-colorful v1.3.0 // indirect
+	github.com/mattn/go-isatty v0.0.20 // indirect
+	github.com/mattn/go-localereader v0.0.1 // indirect
+	github.com/mattn/go-runewidth v0.0.19 // indirect
+	github.com/mitchellh/copystructure v1.2.0 // indirect
+	github.com/mitchellh/reflectwalk v1.0.2 // indirect
+	github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect
+	github.com/muesli/cancelreader v0.2.2 // indirect
+	github.com/muesli/termenv v0.16.0 // indirect
+	github.com/rivo/uniseg v0.4.7 // indirect
 	github.com/spf13/pflag v1.0.10 // indirect
 	github.com/stretchr/testify v1.11.1 // indirect
-	golang.org/x/sys v0.29.0 // indirect
-	gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect
+	github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
+	go.yaml.in/yaml/v3 v3.0.3 // indirect
+	golang.org/x/crypto v0.52.0 // indirect
+	golang.org/x/sys v0.45.0 // indirect
+	golang.org/x/text v0.37.0 // indirect
 )
diff --git a/cli/go.sum b/cli/go.sum
index f72792f..e12947e 100644
--- a/cli/go.sum
+++ b/cli/go.sum
@@ -1,17 +1,87 @@
+github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4=
+github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI=
+github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
+github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
+github.com/charmbracelet/bubbles v1.0.0 h1:12J8/ak/uCZEMQ6KU7pcfwceyjLlWsDLAxB5fXonfvc=
+github.com/charmbracelet/bubbles v1.0.0/go.mod h1:9d/Zd5GdnauMI5ivUIVisuEm3ave1XwXtD1ckyV6r3E=
+github.com/charmbracelet/bubbletea v1.3.10 h1:otUDHWMMzQSB0Pkc87rm691KZ3SWa4KUlvF9nRvCICw=
+github.com/charmbracelet/bubbletea v1.3.10/go.mod h1:ORQfo0fk8U+po9VaNvnV95UPWA1BitP1E0N6xJPlHr4=
+github.com/charmbracelet/colorprofile v0.4.1 h1:a1lO03qTrSIRaK8c3JRxJDZOvhvIeSco3ej+ngLk1kk=
+github.com/charmbracelet/colorprofile v0.4.1/go.mod h1:U1d9Dljmdf9DLegaJ0nGZNJvoXAhayhmidOdcBwAvKk=
+github.com/charmbracelet/lipgloss v1.1.0 h1:vYXsiLHVkK7fp74RkV7b2kq9+zDLoEU4MZoFqR/noCY=
+github.com/charmbracelet/lipgloss v1.1.0/go.mod h1:/6Q8FR2o+kj8rz4Dq0zQc3vYf7X+B0binUUBwA0aL30=
+github.com/charmbracelet/x/ansi v0.11.6 h1:GhV21SiDz/45W9AnV2R61xZMRri5NlLnl6CVF7ihZW8=
+github.com/charmbracelet/x/ansi v0.11.6/go.mod h1:2JNYLgQUsyqaiLovhU2Rv/pb8r6ydXKS3NIttu3VGZQ=
+github.com/charmbracelet/x/cellbuf v0.0.15 h1:ur3pZy0o6z/R7EylET877CBxaiE1Sp1GMxoFPAIztPI=
+github.com/charmbracelet/x/cellbuf v0.0.15/go.mod h1:J1YVbR7MUuEGIFPCaaZ96KDl5NoS0DAWkskup+mOY+Q=
+github.com/charmbracelet/x/term v0.2.2 h1:xVRT/S2ZcKdhhOuSP4t5cLi5o+JxklsoEObBSgfgZRk=
+github.com/charmbracelet/x/term v0.2.2/go.mod h1:kF8CY5RddLWrsgVwpw4kAa6TESp6EB5y3uxGLeCqzAI=
+github.com/clipperhouse/displaywidth v0.9.0 h1:Qb4KOhYwRiN3viMv1v/3cTBlz3AcAZX3+y9OLhMtAtA=
+github.com/clipperhouse/displaywidth v0.9.0/go.mod h1:aCAAqTlh4GIVkhQnJpbL0T/WfcrJXHcj8C0yjYcjOZA=
+github.com/clipperhouse/stringish v0.1.1 h1:+NSqMOr3GR6k1FdRhhnXrLfztGzuG+VuFDfatpWHKCs=
+github.com/clipperhouse/stringish v0.1.1/go.mod h1:v/WhFtE1q0ovMta2+m+UbpZ+2/HEXNWYXQgCt4hdOzA=
+github.com/clipperhouse/uax29/v2 v2.5.0 h1:x7T0T4eTHDONxFJsL94uKNKPHrclyFI0lm7+w94cO8U=
+github.com/clipperhouse/uax29/v2 v2.5.0/go.mod h1:Wn1g7MK6OoeDT0vL+Q0SQLDz/KpfsVRgg6W7ihQeh4g=
 github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
 github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4=
+github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM=
+github.com/gabriel-vasile/mimetype v1.4.13 h1:46nXokslUBsAJE/wMsp5gtO500a4F3Nkz9Ufpk2AcUM=
+github.com/gabriel-vasile/mimetype v1.4.13/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s=
+github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
+github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
+github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
+github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
+github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
+github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
+github.com/go-playground/validator/v10 v10.30.3 h1:4MU6YkEwx7GbcPJOZxrtbu+QfF3pJLJuaYTeAH0DYy8=
+github.com/go-playground/validator/v10 v10.30.3/go.mod h1:4Axh7oCNGcoGkqLoE4YWt6n20mcEIsPRlB7vPk3lpyc=
+github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs=
+github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM=
 github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
 github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
+github.com/knadh/koanf/maps v0.1.2 h1:RBfmAW5CnZT+PJ1CVc1QSJKf4Xu9kxfQgYVQSu8hpbo=
+github.com/knadh/koanf/maps v0.1.2/go.mod h1:npD/QZY3V6ghQDdcQzl1W4ICNVTkohC8E73eI2xW4yI=
+github.com/knadh/koanf/parsers/yaml v1.1.0 h1:3ltfm9ljprAHt4jxgeYLlFPmUaunuCgu1yILuTXRdM4=
+github.com/knadh/koanf/parsers/yaml v1.1.0/go.mod h1:HHmcHXUrp9cOPcuC+2wrr44GTUB0EC+PyfN3HZD9tFg=
+github.com/knadh/koanf/providers/confmap v1.0.0 h1:mHKLJTE7iXEys6deO5p6olAiZdG5zwp8Aebir+/EaRE=
+github.com/knadh/koanf/providers/confmap v1.0.0/go.mod h1:txHYHiI2hAtF0/0sCmcuol4IDcuQbKTybiB1nOcUo1A=
+github.com/knadh/koanf/providers/rawbytes v1.0.0 h1:MrKDh/HksJlKJmaZjgs4r8aVBb/zsJyc/8qaSnzcdNI=
+github.com/knadh/koanf/providers/rawbytes v1.0.0/go.mod h1:KxwYJf1uezTKy6PBtfE+m725NGp4GPVA7XoNTJ/PtLo=
+github.com/knadh/koanf/v2 v2.3.5 h1:2dXJUYaKGm4SGYeoAtBviq9+02JZo/pxQ2ssOd60rJg=
+github.com/knadh/koanf/v2 v2.3.5/go.mod h1:gRb40VRAbd4iJMYYD5IxZ6hfuopFcXBpc9bbQpZwo28=
 github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
 github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
 github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
+github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
+github.com/lucasb-eyer/go-colorful v1.3.0 h1:2/yBRLdWBZKrf7gB40FoiKfAWYQ0lqNcbuQwVHXptag=
+github.com/lucasb-eyer/go-colorful v1.3.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
+github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
+github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4=
+github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88=
+github.com/mattn/go-runewidth v0.0.19 h1:v++JhqYnZuu5jSKrk9RbgF5v4CGUjqRfBm05byFGLdw=
+github.com/mattn/go-runewidth v0.0.19/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs=
+github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw=
+github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s=
+github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
+github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
+github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI=
+github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6/go.mod h1:CJlz5H+gyd6CUWT45Oy4q24RdLyn7Md9Vj2/ldJBSIo=
+github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA=
+github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo=
+github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc=
+github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk=
 github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
+github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
 github.com/rjeczalik/notify v0.9.3 h1:6rJAzHTGKXGj76sbRgDiDcYj/HniypXmSJo1SWakZeY=
 github.com/rjeczalik/notify v0.9.3/go.mod h1:gF3zSOrafR9DQEWSE8TjfI9NkooDxbyT4UgRGKZA0lc=
 github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
@@ -28,9 +98,21 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+
 github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
 github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
+github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
+github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
+go.yaml.in/yaml/v3 v3.0.3 h1:bXOww4E/J3f66rav3pX3m8w6jDE4knZjGOw8b5Y6iNE=
+go.yaml.in/yaml/v3 v3.0.3/go.mod h1:tBHosrYAkRZjRAOREWbDnBXUf08JOwYq++0QNwQiWzI=
+golang.org/x/crypto v0.52.0 h1:RMs7fP2rXdep0CftQlK8Uf+kibLm7qkCcradZWYz988=
+golang.org/x/crypto v0.52.0/go.mod h1:1QgfPxDqh0T2M/elOJtp9RvuR95kVjir0e6/BvEmGbc=
+golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=
+golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo=
 golang.org/x/sys v0.0.0-20180926160741-c2ed4eda69e7/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU=
-golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.45.0 h1:dO4czNzziLiiXplLQgBCEpCvXQ3dnkn0SdaZSYdQ+FY=
+golang.org/x/sys v0.45.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
+golang.org/x/text v0.37.0 h1:Cqjiwd9eSg8e0QAkyCaQTNHFIIzWtidPahFWR83rTrc=
+golang.org/x/text v0.37.0/go.mod h1:a5sjxXGs9hsn/AJVwuElvCAo9v8QYLzvavO5z2PiM38=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
 gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
diff --git a/cli/internal/config/config.go b/cli/internal/config/config.go
index 1456819..7f6dde2 100644
--- a/cli/internal/config/config.go
+++ b/cli/internal/config/config.go
@@ -5,16 +5,41 @@ import (
 	"fmt"
 	"os"
 	"path/filepath"
+	"strings"
 
 	"gopkg.in/yaml.v3"
 )
 
 type Config struct {
-	API      APIConfig      `yaml:"api"`
+	// Servers is the canonical list of named cix servers the CLI can talk to.
+	// One of them is the default (DefaultServer). Commands target the default
+	// unless --server <name> is given.
+	Servers []ServerEntry `yaml:"servers" key:"servers" desc:"Named cix servers; the entry matching default_server is the active one"`
+	// DefaultServer is the name of the server used when --server is absent.
+	// CIX_SERVER env var overrides it when --server is not given.
+	DefaultServer string `yaml:"default_server" key:"default_server" env:"CIX_SERVER" desc:"Alias of the server used when --server is omitted"`
+
+	// API is the legacy single-server config (pre-multi-server). It is read
+	// from old config files and migrated into Servers on Load (see
+	// migrateToServers), then cleared so it is no longer written back.
+	// omitempty keeps it out of freshly-written configs.
+	API APIConfig `yaml:"api,omitempty"`
+
 	Watcher  WatcherConfig  `yaml:"watcher"`
-	Server   ServerConfig   `yaml:"server"`
 	Indexing IndexingConfig `yaml:"indexing"`
-	Projects []ProjectEntry `yaml:"projects"`
+	Projects []ProjectEntry `yaml:"projects" key:"projects" desc:"Registered project paths and their auto-watch flag"`
+}
+
+// ServerEntry is a single named cix server: a friendly alias plus its base
+// URL and API key. The alias is what users pass to --server.
+//
+// CIX_API_URL / CIX_API_KEY do NOT bind to a specific entry — they override
+// the URL/key of the *resolved* server (the one picked by --server or
+// default_server) inside getClient. Hence no `env:` tags here.
+type ServerEntry struct {
+	Name string `yaml:"name" desc:"Server alias"`
+	URL  string `yaml:"url" desc:"Base URL of the cix server" validate:"omitempty,url"`
+	Key  string `yaml:"key" desc:"API key (bearer token)" sensitive:"true"`
 }
 
 type APIConfig struct {
@@ -22,31 +47,34 @@ type APIConfig struct {
 	Key string `yaml:"key"`
 }
 
+// DefaultServerName is the alias assigned to the implicit/migrated server.
+const DefaultServerName = "default"
+
 type WatcherConfig struct {
-	Enabled          bool     `yaml:"enabled"`
-	DebounceMS       int      `yaml:"debounce_ms"`
-	ExcludePatterns  []string `yaml:"exclude"`
-	SyncIntervalMins int      `yaml:"sync_interval_mins"`
+	Enabled          bool     `yaml:"enabled" key:"watcher.enabled" desc:"Run the file watcher" default:"true"`
+	DebounceMS       int      `yaml:"debounce_ms" key:"watcher.debounce_ms" desc:"Debounce delay (ms)" default:"5000" validate:"min=100,max=60000"`
+	ExcludePatterns  []string `yaml:"exclude" key:"watcher.exclude" desc:"Paths/globs to skip (comma-separated; REPLACE semantics on set)" default:"node_modules,.git,.venv,__pycache__,dist,build,.next,.cache,.DS_Store"`
+	SyncIntervalMins int      `yaml:"sync_interval_mins" key:"watcher.sync_interval_mins" desc:"Periodic sync interval (minutes)" default:"5" validate:"min=1"`
 }
 
-type ServerConfig struct {
-	Port     int `yaml:"port"`
-	CacheTTL int `yaml:"cache_ttl"`
-}
+// ServerConfig used to hold port/cache_ttl knobs for an in-process server.
+// The CLI runs no server, so the struct and its fields were removed. Old
+// `server:` blocks in existing config files still load without error —
+// koanf silently drops unknown keys during unmarshal.
 
 type IndexingConfig struct {
-	BatchSize int `yaml:"batchsize"`
+	BatchSize int `yaml:"batch_size" key:"indexing.batch_size" desc:"Indexing batch size" default:"20" validate:"min=1"`
 
 	// StreamingIdleTimeoutSec is the maximum allowed silence on the streaming
 	// /index/files response before the CLI gives up and closes the conn. The
 	// server emits a heartbeat every 10s, so 30s gives the network three
 	// retry windows. Set to 0 to disable the watchdog (not recommended).
-	StreamingIdleTimeoutSec int `yaml:"streaming_idle_timeout_sec"`
+	StreamingIdleTimeoutSec int `yaml:"streaming_idle_timeout_sec" key:"indexing.streaming_idle_timeout_sec" desc:"Streaming /index/files idle timeout (seconds); 0 disables watchdog" default:"30" validate:"min=0"`
 }
 
 type ProjectEntry struct {
-	Path      string `yaml:"path"`
-	AutoWatch bool   `yaml:"auto_watch"`
+	Path      string `yaml:"path" desc:"Absolute path of the project root"`
+	AutoWatch bool   `yaml:"auto_watch" desc:"Start the file watcher automatically for this project"`
 }
 
 var (
@@ -54,31 +82,9 @@ var (
 	configPath   string
 )
 
-// defaults returns a Config populated with default values.
-func defaults() Config {
-	return Config{
-		API: APIConfig{
-			URL: "http://localhost:21847",
-		},
-		Watcher: WatcherConfig{
-			Enabled:    true,
-			DebounceMS: 5000,
-			ExcludePatterns: []string{
-				"node_modules", ".git", ".venv", "__pycache__",
-				"dist", "build", ".next", ".cache", ".DS_Store",
-			},
-			SyncIntervalMins: 5,
-		},
-		Server: ServerConfig{
-			Port:     8080,
-			CacheTTL: 300,
-		},
-		Indexing: IndexingConfig{
-			BatchSize:               20,
-			StreamingIdleTimeoutSec: 30,
-		},
-	}
-}
+// (defaults() removed: the canonical source of default values is now the
+// `default:"…"` struct tag set; loadWithKoanf seeds them via the schema
+// walker. Servers/DefaultServer are populated by migrateToServers as before.)
 
 // normalizeLegacyKeys maps old viper-generated YAML key names to the current
 // yaml struct tag names. Provides backward compatibility for configs created
@@ -89,6 +95,10 @@ func normalizeLegacyKeys(data []byte) []byte {
 		{"excludepatterns:", "exclude:"},
 		{"cachettl:", "cache_ttl:"},
 		{"autowatch:", "auto_watch:"},
+		// `batchsize:` was the viper-mangled emission of the `BatchSize` Go
+		// field. We now use `batch_size:` everywhere (consistent with other
+		// snake_case keys); this mapping keeps old files loading.
+		{"batchsize:", "batch_size:"},
 	} {
 		data = bytes.ReplaceAll(data, []byte(pair[0]), []byte(pair[1]))
 	}
@@ -97,47 +107,71 @@ func normalizeLegacyKeys(data []byte) []byte {
 
 // Load loads configuration from ~/.cix/config.yaml.
 // Fields absent from the file keep their default values.
+//
+// Implementation: delegates the heavy lifting to loadWithKoanf
+// (loader_koanf.go) — defaults come from struct tags, the YAML file is the
+// override layer, and migrateToServers handles the legacy `api:` block and
+// the implicit localhost seeding. Load owns the singleton cache and the
+// "re-save when the loader rewrote the on-disk form" side effect.
 func Load() (*Config, error) {
 	if globalConfig != nil {
 		return globalConfig, nil
 	}
 
-	home, err := os.UserHomeDir()
+	_, path, err := configPaths()
 	if err != nil {
-		return nil, fmt.Errorf("get home dir: %w", err)
+		return nil, err
 	}
+	configPath = path
 
-	configDir := filepath.Join(home, ".cix")
-	configPath = filepath.Join(configDir, "config.yaml")
-
-	if err := os.MkdirAll(configDir, 0755); err != nil {
-		return nil, fmt.Errorf("create config dir: %w", err)
+	cfg, needsResave, err := loadWithKoanf()
+	if err != nil {
+		return nil, err
 	}
 
-	cfg := defaults()
+	globalConfig = cfg
+	if needsResave {
+		_ = Save(cfg)
+	}
+	return globalConfig, nil
+}
 
-	data, err := os.ReadFile(configPath)
-	if err != nil {
-		if os.IsNotExist(err) {
-			globalConfig = &cfg
-			return globalConfig, nil
+// migrateToServers upgrades a parsed config to the multi-server layout in
+// place and reports whether anything changed (so Load can re-save).
+//
+//   - Legacy single-server config (api: only, no servers:) → one server named
+//     "default" carrying the old url/key; api: is cleared so it is no longer
+//     written back.
+//   - servers: present but no default_server → default to the first entry.
+//   - Neither servers: nor api: (e.g. a partial hand-written file) → seed the
+//     implicit localhost default so the CLI is always usable.
+func migrateToServers(cfg *Config) (changed bool) {
+	if len(cfg.Servers) == 0 {
+		url := cfg.API.URL
+		if url == "" {
+			// No api.url (fresh install, or a legacy file that only set
+			// api.key): fall back to the historical localhost default so the
+			// migrated server is usable.
+			url = "http://localhost:21847"
+		}
+		cfg.Servers = []ServerEntry{
+			{Name: DefaultServerName, URL: url, Key: cfg.API.Key},
 		}
-		return nil, fmt.Errorf("read config: %w", err)
+		cfg.DefaultServer = DefaultServerName
+		cfg.API = APIConfig{}
+		return true
 	}
 
-	normalized := normalizeLegacyKeys(data)
-	if err := yaml.Unmarshal(normalized, &cfg); err != nil {
-		return nil, fmt.Errorf("parse config: %w", err)
+	// Servers present. Clear any leftover legacy api block and ensure a default.
+	if cfg.API.URL != "" || cfg.API.Key != "" {
+		cfg.API = APIConfig{}
+		changed = true
 	}
-
-	globalConfig = &cfg
-
-	// If the file used legacy viper-style keys, re-save in the current format.
-	if !bytes.Equal(data, normalized) {
-		_ = Save(&cfg)
+	if cfg.DefaultServer == "" {
+		cfg.DefaultServer = cfg.Servers[0].Name
+		changed = true
 	}
-
-	return globalConfig, nil
+	return changed
 }
 
 // Save writes cfg to disk and updates the in-memory singleton.
@@ -167,6 +201,154 @@ func ResetForTesting() {
 	configPath = ""
 }
 
+// validateServerName checks a server alias is usable as both a YAML name and
+// a `server.<name>.url` config key (which is split on "."). Names must be
+// non-empty and contain no dots or whitespace.
+func validateServerName(name string) error {
+	if name == "" {
+		return fmt.Errorf("server name must not be empty")
+	}
+	if strings.ContainsAny(name, " \t\r\n") {
+		return fmt.Errorf("server name %q must not contain whitespace", name)
+	}
+	if strings.Contains(name, ".") {
+		return fmt.Errorf("server name %q must not contain '.'", name)
+	}
+	return nil
+}
+
+// GetServer returns a pointer to the server entry with the given name.
+func (c *Config) GetServer(name string) (*ServerEntry, bool) {
+	for i := range c.Servers {
+		if c.Servers[i].Name == name {
+			return &c.Servers[i], true
+		}
+	}
+	return nil, false
+}
+
+// DefaultServerEntry returns the configured default server, falling back to
+// the first server when DefaultServer is unset or dangling.
+func (c *Config) DefaultServerEntry() (*ServerEntry, bool) {
+	if c.DefaultServer != "" {
+		if s, ok := c.GetServer(c.DefaultServer); ok {
+			return s, true
+		}
+	}
+	if len(c.Servers) > 0 {
+		return &c.Servers[0], true
+	}
+	return nil, false
+}
+
+// ResolveServer selects which server a command should target. An empty name
+// means "use the default"; a non-empty name must match a configured alias
+// exactly. On miss the error lists the available aliases.
+func (c *Config) ResolveServer(name string) (*ServerEntry, error) {
+	if name == "" {
+		if s, ok := c.DefaultServerEntry(); ok {
+			return s, nil
+		}
+		return nil, fmt.Errorf("no servers configured; run 'cix config set api.url <url>' or 'cix config set server.<name>.url <url>'")
+	}
+	if s, ok := c.GetServer(name); ok {
+		return s, nil
+	}
+	return nil, fmt.Errorf("server %q not found; configured servers:\n  - %s", name, strings.Join(c.serverNames(), "\n  - "))
+}
+
+// serverNames returns the aliases of all configured servers.
+func (c *Config) serverNames() []string {
+	names := make([]string, 0, len(c.Servers))
+	for _, s := range c.Servers {
+		names = append(names, s.Name)
+	}
+	return names
+}
+
+// upsertServer finds or appends the named server, applies mut, and makes it
+// the default when no default is set yet.
+func upsertServer(cfg *Config, name string, mut func(*ServerEntry)) {
+	if s, ok := cfg.GetServer(name); ok {
+		mut(s)
+	} else {
+		cfg.Servers = append(cfg.Servers, ServerEntry{Name: name})
+		mut(&cfg.Servers[len(cfg.Servers)-1])
+	}
+	if cfg.DefaultServer == "" {
+		cfg.DefaultServer = name
+	}
+}
+
+// SetServerURL sets (or creates) the URL of the named server and persists.
+func SetServerURL(name, url string) error {
+	if err := validateServerName(name); err != nil {
+		return err
+	}
+	cfg, err := Load()
+	if err != nil {
+		return err
+	}
+	upsertServer(cfg, name, func(s *ServerEntry) { s.URL = url })
+	return Save(cfg)
+}
+
+// SetServerKey sets (or creates) the API key of the named server and persists.
+func SetServerKey(name, key string) error {
+	if err := validateServerName(name); err != nil {
+		return err
+	}
+	cfg, err := Load()
+	if err != nil {
+		return err
+	}
+	upsertServer(cfg, name, func(s *ServerEntry) { s.Key = key })
+	return Save(cfg)
+}
+
+// SetDefaultServer marks an existing server as the default and persists.
+func SetDefaultServer(name string) error {
+	cfg, err := Load()
+	if err != nil {
+		return err
+	}
+	if _, ok := cfg.GetServer(name); !ok {
+		return fmt.Errorf("server %q not found; configured servers:\n  - %s", name, strings.Join(cfg.serverNames(), "\n  - "))
+	}
+	cfg.DefaultServer = name
+	return Save(cfg)
+}
+
+// RemoveServer deletes the named server and persists. If the removed server
+// was the default and others remain, the default is reassigned to the first
+// remaining server and its name is returned in reassignedTo. Removing the
+// last server leaves none — the next Load() re-seeds the localhost default.
+func RemoveServer(name string) (reassignedTo string, err error) {
+	cfg, err := Load()
+	if err != nil {
+		return "", err
+	}
+	if _, ok := cfg.GetServer(name); !ok {
+		return "", fmt.Errorf("server %q not found", name)
+	}
+	kept := make([]ServerEntry, 0, len(cfg.Servers))
+	for _, s := range cfg.Servers {
+		if s.Name != name {
+			kept = append(kept, s)
+		}
+	}
+	cfg.Servers = kept
+	if cfg.DefaultServer == name {
+		if len(kept) > 0 {
+			cfg.DefaultServer = kept[0].Name
+			reassignedTo = kept[0].Name
+		} else {
+			cfg.DefaultServer = ""
+		}
+	}
+	return reassignedTo, Save(cfg)
+}
+
 // AddProject adds a project to the config.
 func AddProject(path string, autoWatch bool) error {
 	cfg, err := Load()
@@ -238,4 +420,4 @@ func GetPIDFile() (string, error) {
 	}
 
 	return filepath.Join(pidDir, "watcher.pid"), nil
-}
\ No newline at end of file
+}
diff --git a/cli/internal/config/config_test.go b/cli/internal/config/config_test.go
index 24409cf..dc86683 100644
--- a/cli/internal/config/config_test.go
+++ b/cli/internal/config/config_test.go
@@ -3,6 +3,7 @@ package config
 import (
 	"os"
 	"path/filepath"
+	"strings"
 	"testing"
 )
 
@@ -31,11 +32,18 @@ func TestLoad_Defaults(t *testing.T) {
 		t.Fatalf("Load() error = %v", err)
 	}
 
-	if cfg.API.URL != "http://localhost:21847" {
-		t.Errorf("API.URL = %q, want %q", cfg.API.URL, "http://localhost:21847")
+	// With no config file, the implicit localhost default server is seeded.
+	if len(cfg.Servers) != 1 {
+		t.Fatalf("Servers len = %d, want 1 (seeded default)", len(cfg.Servers))
 	}
-	if cfg.API.Key != "" {
-		t.Errorf("API.Key = %q, want empty", cfg.API.Key)
+	if cfg.DefaultServer != DefaultServerName {
+		t.Errorf("DefaultServer = %q, want %q", cfg.DefaultServer, DefaultServerName)
+	}
+	if cfg.Servers[0].Name != DefaultServerName || cfg.Servers[0].URL != "http://localhost:21847" {
+		t.Errorf("default server = %+v, want {default, localhost:21847, <no key>}", cfg.Servers[0])
+	}
+	if cfg.Servers[0].Key != "" {
+		t.Errorf("default server Key = %q, want empty", cfg.Servers[0].Key)
 	}
 	if !cfg.Watcher.Enabled {
 		t.Error("Watcher.Enabled = false, want true")
@@ -46,12 +54,6 @@ func TestLoad_Defaults(t *testing.T) {
 	if len(cfg.Watcher.ExcludePatterns) == 0 {
 		t.Error("Watcher.ExcludePatterns is empty, want default list")
 	}
-	if cfg.Server.Port != 8080 {
-		t.Errorf("Server.Port = %d, want 8080", cfg.Server.Port)
-	}
-	if cfg.Server.CacheTTL != 300 {
-		t.Errorf("Server.CacheTTL = %d, want 300", cfg.Server.CacheTTL)
-	}
 	if cfg.Indexing.BatchSize != 20 {
 		t.Errorf("Indexing.BatchSize = %d, want 20", cfg.Indexing.BatchSize)
 	}
@@ -90,11 +92,22 @@ indexing:
 		t.Fatalf("Load() error = %v", err)
 	}
 
-	if cfg.API.URL != "http://myserver:9000" {
-		t.Errorf("API.URL = %q, want %q", cfg.API.URL, "http://myserver:9000")
+	// Legacy api: block migrates to a single "default" server.
+	if len(cfg.Servers) != 1 {
+		t.Fatalf("Servers len = %d, want 1 (migrated from api:)", len(cfg.Servers))
+	}
+	if cfg.DefaultServer != DefaultServerName {
+		t.Errorf("DefaultServer = %q, want %q", cfg.DefaultServer, DefaultServerName)
+	}
+	if cfg.Servers[0].URL != "http://myserver:9000" {
+		t.Errorf("default server URL = %q, want %q", cfg.Servers[0].URL, "http://myserver:9000")
+	}
+	if cfg.Servers[0].Key != "secret-key-123" {
+		t.Errorf("default server Key = %q, want %q", cfg.Servers[0].Key, "secret-key-123")
 	}
-	if cfg.API.Key != "secret-key-123" {
-		t.Errorf("API.Key = %q, want %q", cfg.API.Key, "secret-key-123")
+	// The legacy api block must be cleared after migration.
+	if cfg.API.URL != "" || cfg.API.Key != "" {
+		t.Errorf("API block = %+v, want cleared after migration", cfg.API)
 	}
 	if cfg.Watcher.Enabled {
 		t.Error("Watcher.Enabled = true, want false")
@@ -105,12 +118,9 @@ indexing:
 	if cfg.Watcher.SyncIntervalMins != 10 {
 		t.Errorf("Watcher.SyncIntervalMins = %d, want 10", cfg.Watcher.SyncIntervalMins)
 	}
-	if cfg.Server.Port != 3000 {
-		t.Errorf("Server.Port = %d, want 3000", cfg.Server.Port)
-	}
-	if cfg.Server.CacheTTL != 60 {
-		t.Errorf("Server.CacheTTL = %d, want 60", cfg.Server.CacheTTL)
-	}
+	// server.port / server.cache_ttl removed (dead fields, step 13). The
+	// `server:` block in the input file is parsed-then-dropped by koanf —
+	// no assertion needed beyond "this load did not error".
 	if cfg.Indexing.BatchSize != 5 {
 		t.Errorf("Indexing.BatchSize = %d, want 5", cfg.Indexing.BatchSize)
 	}
@@ -139,15 +149,16 @@ api:
 		t.Fatalf("Load() error = %v", err)
 	}
 
-	if cfg.API.Key != "partial-key" {
-		t.Errorf("API.Key = %q, want %q", cfg.API.Key, "partial-key")
+	// api.key-only legacy file migrates to the default server, with the URL
+	// falling back to the historical localhost default.
+	if len(cfg.Servers) != 1 {
+		t.Fatalf("Servers len = %d, want 1", len(cfg.Servers))
 	}
-	// Default must still apply for the URL.
-	if cfg.API.URL != "http://localhost:21847" {
-		t.Errorf("API.URL = %q, want default http://localhost:21847", cfg.API.URL)
+	if cfg.Servers[0].Key != "partial-key" {
+		t.Errorf("default server Key = %q, want %q", cfg.Servers[0].Key, "partial-key")
 	}
-	if cfg.Server.Port != 8080 {
-		t.Errorf("Server.Port = %d, want default 8080", cfg.Server.Port)
+	if cfg.Servers[0].URL != "http://localhost:21847" {
+		t.Errorf("default server URL = %q, want default http://localhost:21847", cfg.Servers[0].URL)
 	}
 	if cfg.Indexing.BatchSize != 20 {
 		t.Errorf("Indexing.BatchSize = %d, want default 20", cfg.Indexing.BatchSize)
@@ -215,20 +226,16 @@ func TestSave_RoundTrip(t *testing.T) {
 	}
 
 	want := &Config{
-		API: APIConfig{
-			URL: "http://saved:8888",
-			Key: "saved-key",
+		Servers: []ServerEntry{
+			{Name: DefaultServerName, URL: "http://saved:8888", Key: "saved-key"},
 		},
+		DefaultServer: DefaultServerName,
 		Watcher: WatcherConfig{
 			Enabled:          false,
 			DebounceMS:       1234,
 			SyncIntervalMins: 15,
 			ExcludePatterns:  []string{".git", "vendor"},
 		},
-		Server: ServerConfig{
-			Port:     4444,
-			CacheTTL: 99,
-		},
 		Indexing: IndexingConfig{
 			BatchSize: 7,
 		},
@@ -246,11 +253,14 @@ func TestSave_RoundTrip(t *testing.T) {
 		t.Fatalf("Load() after Save() error = %v", err)
 	}
 
-	if got.API.URL != want.API.URL {
-		t.Errorf("API.URL = %q, want %q", got.API.URL, want.API.URL)
+	if len(got.Servers) != 1 {
+		t.Fatalf("Servers len = %d, want 1", len(got.Servers))
 	}
-	if got.API.Key != want.API.Key {
-		t.Errorf("API.Key = %q, want %q", got.API.Key, want.API.Key)
+	if got.Servers[0].URL != want.Servers[0].URL {
+		t.Errorf("server URL = %q, want %q", got.Servers[0].URL, want.Servers[0].URL)
+	}
+	if got.Servers[0].Key != want.Servers[0].Key {
+		t.Errorf("server Key = %q, want %q", got.Servers[0].Key, want.Servers[0].Key)
 	}
 	if got.Watcher.Enabled != want.Watcher.Enabled {
 		t.Errorf("Watcher.Enabled = %v, want %v", got.Watcher.Enabled, want.Watcher.Enabled)
@@ -261,9 +271,6 @@ func TestSave_RoundTrip(t *testing.T) {
 	if got.Watcher.SyncIntervalMins != want.Watcher.SyncIntervalMins {
 		t.Errorf("Watcher.SyncIntervalMins = %d, want %d", got.Watcher.SyncIntervalMins, want.Watcher.SyncIntervalMins)
 	}
-	if got.Server.Port != want.Server.Port {
-		t.Errorf("Server.Port = %d, want %d", got.Server.Port, want.Server.Port)
-	}
 	if got.Indexing.BatchSize != want.Indexing.BatchSize {
 		t.Errorf("Indexing.BatchSize = %d, want %d", got.Indexing.BatchSize, want.Indexing.BatchSize)
 	}
@@ -347,12 +354,9 @@ projects:
 	if len(cfg.Watcher.ExcludePatterns) != 2 {
 		t.Errorf("ExcludePatterns len = %d, want 2 (legacy key: excludepatterns)", len(cfg.Watcher.ExcludePatterns))
 	}
-	if cfg.Server.CacheTTL != 120 {
-		t.Errorf("CacheTTL = %d, want 120 (legacy key: cachettl)", cfg.Server.CacheTTL)
-	}
-	if cfg.Server.Port != 9090 {
-		t.Errorf("Port = %d, want 9090", cfg.Server.Port)
-	}
+	// server.port / server.cache_ttl removed (dead fields, step 13). The
+	// legacy `server:` block must still parse without error, but its
+	// values are dropped — koanf silently ignores them on unmarshal.
 	if len(cfg.Projects) != 1 || !cfg.Projects[0].AutoWatch {
 		t.Errorf("Projects[0].AutoWatch = false, want true (legacy key: autowatch)")
 	}
@@ -433,6 +437,190 @@ func TestRemoveProject(t *testing.T) {
 	}
 }
 
+// TestMigrate_ReSavesNewFormat verifies a legacy api: file is rewritten to the
+// servers: layout on disk (api: dropped) the first time it is loaded.
+func TestMigrate_ReSavesNewFormat(t *testing.T) {
+	home := isolateHome(t)
+
+	cfgDir := filepath.Join(home, ".cix")
+	if err := os.MkdirAll(cfgDir, 0755); err != nil {
+		t.Fatal(err)
+	}
+	content := "api:\n  url: \"http://legacy:9000\"\n  key: \"legacy-key\"\n"
+	path := filepath.Join(cfgDir, "config.yaml")
+	if err := os.WriteFile(path, []byte(content), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	if _, err := Load(); err != nil {
+		t.Fatalf("Load() error = %v", err)
+	}
+
+	raw, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatal(err)
+	}
+	got := string(raw)
+	if !strings.Contains(got, "servers:") {
+		t.Errorf("re-saved config missing servers::\n%s", got)
+	}
+	if !strings.Contains(got, "default_server: default") {
+		t.Errorf("re-saved config missing default_server:\n%s", got)
+	}
+	if strings.Contains(got, "api:") {
+		t.Errorf("re-saved config should not contain api::\n%s", got)
+	}
+	if !strings.Contains(got, "http://legacy:9000") || !strings.Contains(got, "legacy-key") {
+		t.Errorf("re-saved config lost url/key:\n%s", got)
+	}
+}
+
+func TestResolveServer(t *testing.T) {
+	c := &Config{
+		Servers: []ServerEntry{
+			{Name: "default", URL: "http://local", Key: "k1"},
+			{Name: "corp", URL: "http://corp", Key: "k2"},
+		},
+		DefaultServer: "default",
+	}
+
+	// Empty name → default server.
+	s, err := c.ResolveServer("")
+	if err != nil || s.Name != "default" {
+		t.Errorf("ResolveServer(\"\") = %v, %v; want default", s, err)
+	}
+	// Named alias.
+	s, err = c.ResolveServer("corp")
+	if err != nil || s.URL != "http://corp" {
+		t.Errorf("ResolveServer(corp) = %v, %v; want corp URL", s, err)
+	}
+	// Unknown → error listing available names.
+	_, err = c.ResolveServer("nope")
+	if err == nil {
+		t.Fatal("expected error for unknown server")
+	}
+	for _, want := range []string{"nope", "default", "corp"} {
+		if !strings.Contains(err.Error(), want) {
+			t.Errorf("error %q missing %q", err.Error(), want)
+		}
+	}
+}
+
+// TestResolveServer_DanglingDefault falls back to the first server when
+// DefaultServer points at a missing entry.
+func TestResolveServer_DanglingDefault(t *testing.T) {
+	c := &Config{
+		Servers:       []ServerEntry{{Name: "only", URL: "http://only"}},
+		DefaultServer: "ghost",
+	}
+	s, err := c.ResolveServer("")
+	if err != nil || s.Name != "only" {
+		t.Errorf("ResolveServer(\"\") with dangling default = %v, %v; want first server", s, err)
+	}
+}
+
+func TestSetServer_UpsertAndDefault(t *testing.T) {
+	isolateHome(t)
+	if _, err := Load(); err != nil {
+		t.Fatal(err)
+	}
+
+	// Adding a server's URL creates the entry. The seeded localhost default
+	// already exists, so the new one does NOT become default.
+	if err := SetServerURL("corp", "http://corp"); err != nil {
+		t.Fatalf("SetServerURL: %v", err)
+	}
+	if err := SetServerKey("corp", "corp-key"); err != nil {
+		t.Fatalf("SetServerKey: %v", err)
+	}
+
+	cfg, _ := Load()
+	s, ok := cfg.GetServer("corp")
+	if !ok || s.URL != "http://corp" || s.Key != "corp-key" {
+		t.Fatalf("corp server = %+v, ok=%v", s, ok)
+	}
+	if cfg.DefaultServer != DefaultServerName {
+		t.Errorf("DefaultServer = %q, want %q (unchanged)", cfg.DefaultServer, DefaultServerName)
+	}
+
+	// Updating url again must not duplicate the entry.
+	if err := SetServerURL("corp", "http://corp2"); err != nil {
+		t.Fatal(err)
+	}
+	cfg, _ = Load()
+	if n := len(cfg.Servers); n != 2 {
+		t.Errorf("Servers len = %d, want 2 (default + corp)", n)
+	}
+}
+
+func TestSetDefaultServer(t *testing.T) {
+	isolateHome(t)
+	if _, err := Load(); err != nil {
+		t.Fatal(err)
+	}
+	if err := SetServerURL("corp", "http://corp"); err != nil {
+		t.Fatal(err)
+	}
+
+	// Unknown name rejected.
+	if err := SetDefaultServer("ghost"); err == nil {
+		t.Error("expected error setting default to unknown server")
+	}
+	// Known name switches default.
+	if err := SetDefaultServer("corp"); err != nil {
+		t.Fatalf("SetDefaultServer: %v", err)
+	}
+	cfg, _ := Load()
+	if cfg.DefaultServer != "corp" {
+		t.Errorf("DefaultServer = %q, want corp", cfg.DefaultServer)
+	}
+}
+
+func TestRemoveServer_ReassignsDefault(t *testing.T) {
+	isolateHome(t)
+	if _, err := Load(); err != nil {
+		t.Fatal(err)
+	}
+	if err := SetServerURL("corp", "http://corp"); err != nil {
+		t.Fatal(err)
+	}
+	// default is still "default"; remove it → reassign to remaining "corp".
+	reassigned, err := RemoveServer(DefaultServerName)
+	if err != nil {
+		t.Fatalf("RemoveServer: %v", err)
+	}
+	if reassigned != "corp" {
+		t.Errorf("reassignedTo = %q, want corp", reassigned)
+	}
+	cfg, _ := Load()
+	if cfg.DefaultServer != "corp" {
+		t.Errorf("DefaultServer = %q, want corp", cfg.DefaultServer)
+	}
+	if _, ok := cfg.GetServer(DefaultServerName); ok {
+		t.Error("default server should have been removed")
+	}
+
+	// Removing an unknown server errors.
+	if _, err := RemoveServer("ghost"); err == nil {
+		t.Error("expected error removing unknown server")
+	}
+}
+
+func TestValidateServerName(t *testing.T) {
+	isolateHome(t)
+	if _, err := Load(); err != nil {
+		t.Fatal(err)
+	}
+	for _, bad := range []string{"", "has.dot", "has space"} {
+		if err := SetServerURL(bad, "http://x"); err == nil {
+			t.Errorf("SetServerURL(%q) expected validation error", bad)
+		}
+	}
+	if err := SetServerURL("ok_name-1", "http://x"); err != nil {
+		t.Errorf("SetServerURL(valid) unexpected error: %v", err)
+	}
+}
+
 func TestGetConfigPath(t *testing.T) {
 	home := isolateHome(t)
 
@@ -444,4 +632,4 @@ func TestGetConfigPath(t *testing.T) {
 	if got := GetConfigPath(); got != want {
 		t.Errorf("GetConfigPath() = %q, want %q", got, want)
 	}
-}
\ No newline at end of file
+}
diff --git a/cli/internal/config/loader_koanf.go b/cli/internal/config/loader_koanf.go
new file mode 100644
index 0000000..12b6697
--- /dev/null
+++ b/cli/internal/config/loader_koanf.go
@@ -0,0 +1,151 @@
+package config
+
+import (
+	"bytes"
+	"fmt"
+	"os"
+	"path/filepath"
+	"reflect"
+	"strconv"
+	"strings"
+
+	"github.com/knadh/koanf/parsers/yaml"
+	"github.com/knadh/koanf/providers/confmap"
+	"github.com/knadh/koanf/providers/rawbytes"
+	"github.com/knadh/koanf/v2"
+
+	"github.com/anthropics/code-index/cli/internal/config/schema"
+)
+
+// loadWithKoanf is the schema-driven config loader. It produces a *Config
+// from these layers:
+//
+//  1. Defaults derived from `default:"…"` struct tags via schema.Walk.
+//  2. The YAML file at ~/.cix/config.yaml (with legacy-key normalization
+//     applied to the raw bytes pre-parse).
+//
+// Post-unmarshal, migrateToServers seeds the implicit localhost server and
+// upgrades the legacy single-server `api:` block to the `servers:` list —
+// same logic the original loader used.
+//
+// The needsResave flag tells the caller whether the on-disk file should be
+// rewritten because the load process changed its representation
+// (normalization rewrote keys, or a legacy `api:` block was migrated). The
+// flag is never true when no file existed — the no-file case keeps the
+// in-memory defaults but does not materialize them to disk.
+func loadWithKoanf() (cfg *Config, needsResave bool, err error) {
+	configDir, path, err := configPaths()
+	if err != nil {
+		return nil, false, err
+	}
+	if err := os.MkdirAll(configDir, 0755); err != nil {
+		return nil, false, fmt.Errorf("create config dir: %w", err)
+	}
+
+	k := koanf.New(".")
+
+	// Layer 1: defaults from struct tags.
+	if defaults := defaultsFromTags(); len(defaults) > 0 {
+		if err := k.Load(confmap.Provider(defaults, "."), nil); err != nil {
+			return nil, false, fmt.Errorf("load defaults: %w", err)
+		}
+	}
+
+	// Layer 2: YAML file (if present), with legacy-key normalization.
+	var (
+		fileExists         bool
+		changedByNormalize bool
+	)
+	data, ferr := os.ReadFile(path)
+	if ferr != nil && !os.IsNotExist(ferr) {
+		return nil, false, fmt.Errorf("read config: %w", ferr)
+	}
+	if ferr == nil {
+		fileExists = true
+		normalized := normalizeLegacyKeys(data)
+		changedByNormalize = !bytes.Equal(data, normalized)
+		if err := k.Load(rawbytes.Provider(normalized), yaml.Parser()); err != nil {
+			return nil, false, fmt.Errorf("parse config: %w", err)
+		}
+	}
+
+	var out Config
+	if err := k.UnmarshalWithConf("", &out, koanf.UnmarshalConf{Tag: "yaml"}); err != nil {
+		return nil, false, fmt.Errorf("unmarshal config: %w", err)
+	}
+
+	migrated := migrateToServers(&out)
+	needsResave = fileExists && (changedByNormalize || migrated)
+	return &out, needsResave, nil
+}
+
+// configPaths returns the config directory and the config file path for the
+// current user. Extracted so both loaders share the resolution.
+func configPaths() (dir, path string, err error) {
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return "", "", fmt.Errorf("get home dir: %w", err)
+	}
+	dir = filepath.Join(home, ".cix")
+	path = filepath.Join(dir, "config.yaml")
+	return dir, path, nil
+}
+
+// defaultsFromTags walks the Config schema and returns a flat
+// dotted-key → typed-value map suitable for koanf's confmap provider.
+//
+// Fields without a `default:` tag are omitted (they get Go's zero value
+// after unmarshal, which is the same behavior as the legacy defaults()
+// function for any field it didn't seed). Slices are parsed as
+// comma-separated lists with whitespace trimmed.
+func defaultsFromTags() map[string]any {
+	out := map[string]any{}
+	_ = schema.Walk(&Config{}, func(l schema.LeafField) {
+		raw := l.Tag("default")
+		if raw == "" {
+			return
+		}
+		val, ok := parseDefaultValue(raw, l.Field.Type)
+		if !ok {
+			return
+		}
+		out[l.Path] = val
+	})
+	return out
+}
+
+// parseDefaultValue converts a `default:"…"` tag string into a typed Go
+// value matching the field's reflect.Type. Returns ok=false for unsupported
+// kinds (e.g. nested structs), in which case the field falls back to its
+// zero value after unmarshal.
+func parseDefaultValue(raw string, t reflect.Type) (any, bool) {
+	switch t.Kind() {
+	case reflect.Bool:
+		v, err := strconv.ParseBool(raw)
+		if err != nil {
+			return nil, false
+		}
+		return v, true
+	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+		v, err := strconv.ParseInt(raw, 10, 64)
+		if err != nil {
+			return nil, false
+		}
+		return int(v), true
+	case reflect.String:
+		return raw, true
+	case reflect.Slice:
+		if t.Elem().Kind() != reflect.String {
+			return nil, false
+		}
+		parts := strings.Split(raw, ",")
+		out := make([]string, 0, len(parts))
+		for _, p := range parts {
+			if p = strings.TrimSpace(p); p != "" {
+				out = append(out, p)
+			}
+		}
+		return out, true
+	}
+	return nil, false
+}
diff --git a/cli/internal/config/loader_koanf_test.go b/cli/internal/config/loader_koanf_test.go
new file mode 100644
index 0000000..e19c840
--- /dev/null
+++ b/cli/internal/config/loader_koanf_test.go
@@ -0,0 +1,161 @@
+package config
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"reflect"
+	"testing"
+)
+
+// TestLoaderParity asserts loadWithKoanf produces an identical *Config to
+// the legacy Load() path for every scenario the legacy loader covers. Once
+// this is green, step 5 of the refactor (flipping Load to call koanf) is
+// purely mechanical.
+//
+// Each scenario runs in a fresh HOME so the legacy loader's "re-save on
+// migration" side effect does not leak into the koanf run, and vice versa.
+func TestLoaderParity(t *testing.T) {
+	scenarios := []struct {
+		name string
+		file string // empty = no file on disk
+	}{
+		{
+			name: "no_file",
+			file: "",
+		},
+		{
+			name: "partial_file_only_debounce",
+			file: "watcher:\n  debounce_ms: 1234\n",
+		},
+		{
+			name: "full_file_new_format",
+			file: `servers:
+  - name: default
+    url: http://localhost:21847
+    key: cix_abc123
+  - name: corporate
+    url: https://cix.corp.internal
+    key: cix_xyz789
+default_server: corporate
+watcher:
+  enabled: false
+  debounce_ms: 2000
+  sync_interval_mins: 10
+  exclude:
+    - vendor
+    - tmp
+indexing:
+  batchsize: 50
+  streaming_idle_timeout_sec: 45
+projects:
+  - path: /home/user/proj
+    auto_watch: true
+`,
+		},
+		{
+			name: "legacy_api_only",
+			file: "api:\n  url: http://legacy.example\n  key: cix_legacy\n",
+		},
+		{
+			name: "legacy_lowercase_viper_keys",
+			file: `watcher:
+  debouncems: 9999
+  excludepatterns:
+    - foo
+    - bar
+  sync_interval_mins: 7
+server:
+  cachettl: 600
+projects:
+  - path: /p
+    autowatch: true
+`,
+		},
+		{
+			name: "servers_no_default",
+			file: `servers:
+  - name: alpha
+    url: http://alpha
+    key: a
+  - name: beta
+    url: http://beta
+    key: b
+`,
+		},
+	}
+
+	for _, sc := range scenarios {
+		t.Run(sc.name, func(t *testing.T) {
+			legacyCfg := runLoaderInTempHome(t, sc.file, false)
+			koanfCfg := runLoaderInTempHome(t, sc.file, true)
+			if !configsEqual(legacyCfg, koanfCfg) {
+				t.Errorf("legacy != koanf\n  legacy: %s\n  koanf:  %s",
+					dumpConfig(legacyCfg), dumpConfig(koanfCfg))
+			}
+		})
+	}
+}
+
+// runLoaderInTempHome sets HOME to a fresh tempdir, optionally writes a
+// config file there, resets the legacy singleton, and runs the requested
+// loader. Both loaders use os.UserHomeDir() → $HOME on Unix.
+func runLoaderInTempHome(t *testing.T, file string, useKoanf bool) *Config {
+	t.Helper()
+	home := t.TempDir()
+	t.Setenv("HOME", home)
+
+	cixDir := filepath.Join(home, ".cix")
+	if err := os.MkdirAll(cixDir, 0755); err != nil {
+		t.Fatalf("mkdir: %v", err)
+	}
+	if file != "" {
+		if err := os.WriteFile(filepath.Join(cixDir, "config.yaml"), []byte(file), 0644); err != nil {
+			t.Fatalf("write config: %v", err)
+		}
+	}
+
+	ResetForTesting()
+
+	if useKoanf {
+		cfg, _, err := loadWithKoanf()
+		if err != nil {
+			t.Fatalf("loadWithKoanf: %v", err)
+		}
+		return cfg
+	}
+	cfg, err := Load()
+	if err != nil {
+		t.Fatalf("Load: %v", err)
+	}
+	return cfg
+}
+
+// configsEqual compares two *Config values after normalizing nil vs
+// empty-slice for fields the two loaders may zero-init differently
+// (Projects, ExcludePatterns, Servers — none should diverge but
+// reflect.DeepEqual treats nil != []T{}).
+func configsEqual(a, b *Config) bool {
+	if a == nil || b == nil {
+		return a == b
+	}
+	return reflect.DeepEqual(normalizeForCompare(*a), normalizeForCompare(*b))
+}
+
+func normalizeForCompare(c Config) Config {
+	if c.Projects == nil {
+		c.Projects = []ProjectEntry{}
+	}
+	if c.Servers == nil {
+		c.Servers = []ServerEntry{}
+	}
+	if c.Watcher.ExcludePatterns == nil {
+		c.Watcher.ExcludePatterns = []string{}
+	}
+	return c
+}
+
+func dumpConfig(c *Config) string {
+	return fmt.Sprintf("Servers=%+v DefaultServer=%q API=%+v Watcher=%+v Indexing=%+v Projects=%+v",
+		c.Servers, c.DefaultServer, c.API, c.Watcher, c.Indexing, c.Projects)
+}
diff --git a/cli/internal/config/schema/schema.go b/cli/internal/config/schema/schema.go
new file mode 100644
index 0000000..5712c02
--- /dev/null
+++ b/cli/internal/config/schema/schema.go
@@ -0,0 +1,103 @@
+// Package schema provides a tag-driven walker over the Config struct.
+//
+// The walker is the single source of truth for "what fields exist, where do
+// they live, what are their defaults/descriptions/validators". Every config
+// surface that needs that knowledge (show, set, keys, edit, init, defaults
+// seeding) calls Walk and acts on the LeafField it yields, instead of
+// hard-coding a switch.
+package schema
+
+import (
+	"fmt"
+	"reflect"
+)
+
+// LeafField is one annotated leaf in the Config struct tree.
+//
+// Path is the dotted key from the `key:` tag (e.g. "watcher.debounce_ms").
+// Field carries the original reflect.StructField so callers can read all
+// other tags (desc, default, env, validate, sensitive) without re-parsing.
+// Value is the live reflect.Value pointing at the field — callers can
+// read (Render) or mutate it (Set) via the standard reflect API.
+type LeafField struct {
+	Path  string
+	Field reflect.StructField
+	Value reflect.Value
+}
+
+// Tag returns the value of a struct tag on this leaf's field.
+// Convenience wrapper so callers don't need to import reflect.
+func (l LeafField) Tag(name string) string {
+	return l.Field.Tag.Get(name)
+}
+
+// Sensitive reports whether this leaf is marked `sensitive:"true"`.
+// CodeQL sensitive-name heuristics flag any read of a *Key/*Secret value
+// into a named variable, so renderers MUST use this gate instead of
+// inspecting the value itself.
+func (l LeafField) Sensitive() bool {
+	return l.Tag("sensitive") == "true"
+}
+
+// LeafVisitor is invoked once per annotated leaf in Walk order
+// (structural order of the source struct).
+type LeafVisitor func(leaf LeafField)
+
+// Walk traverses cfg (a struct or pointer to a struct) and invokes visit for
+// every field that has a `key:` struct tag.
+//
+// Rules:
+//   - A field WITH a `key:` tag is yielded as a leaf regardless of its Go
+//     kind. Slice fields (Servers, Projects) are yielded as a single leaf;
+//     callers render them with their own formatter.
+//   - A field WITHOUT a `key:` tag that is itself a struct is recursed into.
+//     Containers like `Watcher`, `Server`, `Indexing` carry no key tag
+//     themselves — their child fields each carry the full dotted key
+//     (`watcher.debounce_ms`, …).
+//   - A field WITHOUT a `key:` tag that is a scalar/slice is skipped
+//     entirely. This is how legacy fields (the auto-migrated `API` block)
+//     stay invisible to `config show` / `config set` without needing an
+//     allow-list.
+//   - Unexported fields are skipped.
+func Walk(cfg any, visit LeafVisitor) error {
+	v := reflect.ValueOf(cfg)
+	if v.Kind() == reflect.Pointer {
+		if v.IsNil() {
+			return fmt.Errorf("schema.Walk: nil pointer")
+		}
+		v = v.Elem()
+	}
+	if v.Kind() != reflect.Struct {
+		return fmt.Errorf("schema.Walk: expected struct or *struct, got %s", v.Kind())
+	}
+	walkStruct(v, visit)
+	return nil
+}
+
+func walkStruct(v reflect.Value, visit LeafVisitor) {
+	t := v.Type()
+	for i := 0; i < t.NumField(); i++ {
+		f := t.Field(i)
+		if !f.IsExported() {
+			continue
+		}
+		fv := v.Field(i)
+		if key := f.Tag.Get("key"); key != "" {
+			visit(LeafField{Path: key, Field: f, Value: fv})
+			continue
+		}
+		if fv.Kind() == reflect.Struct {
+			walkStruct(fv, visit)
+		}
+	}
+}
+
+// Keys is a convenience that returns the ordered list of dotted paths Walk
+// would yield. Used by tests and by `cix config keys`.
+func Keys(cfg any) ([]string, error) {
+	var keys []string
+	err := Walk(cfg, func(l LeafField) {
+		keys = append(keys, l.Path)
+	})
+	return keys, err
+}
diff --git a/cli/internal/config/schema/schema_test.go b/cli/internal/config/schema/schema_test.go
new file mode 100644
index 0000000..7f41821
--- /dev/null
+++ b/cli/internal/config/schema/schema_test.go
@@ -0,0 +1,117 @@
+package schema_test
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/anthropics/code-index/cli/internal/config"
+	"github.com/anthropics/code-index/cli/internal/config/schema"
+)
+
+// expectedKeys is the contract: the exact dotted-key set Walk yields over
+// the current Config struct. Any new annotated field MUST update this list.
+// Bare `expectedKeys` (not regex/contains) is intentional — silent drift in
+// the key surface is exactly what this snapshot is here to catch.
+var expectedKeys = []string{
+	"servers",
+	"default_server",
+	"watcher.enabled",
+	"watcher.debounce_ms",
+	"watcher.exclude",
+	"watcher.sync_interval_mins",
+	"indexing.batch_size",
+	"indexing.streaming_idle_timeout_sec",
+	"projects",
+}
+
+func TestKeys_Snapshot(t *testing.T) {
+	got, err := schema.Keys(&config.Config{})
+	if err != nil {
+		t.Fatalf("Keys: %v", err)
+	}
+	if !reflect.DeepEqual(got, expectedKeys) {
+		t.Errorf("key snapshot drift:\nwant: %v\ngot:  %v", expectedKeys, got)
+	}
+}
+
+func TestWalk_YieldsTagMetadata(t *testing.T) {
+	// Spot-check that the LeafField carries enough metadata for downstream
+	// consumers (show, set, keys, TUI) — desc, default, validate, env.
+	want := map[string]map[string]string{
+		"watcher.debounce_ms": {
+			"desc":     "Debounce delay (ms)",
+			"default":  "5000",
+			"validate": "min=100,max=60000",
+		},
+		"default_server": {
+			"env":  "CIX_SERVER",
+			"desc": "Alias of the server used when --server is omitted",
+		},
+		"indexing.batch_size": {
+			"default":  "20",
+			"validate": "min=1",
+		},
+	}
+
+	seen := map[string]map[string]string{}
+	err := schema.Walk(&config.Config{}, func(l schema.LeafField) {
+		if _, target := want[l.Path]; !target {
+			return
+		}
+		seen[l.Path] = map[string]string{
+			"desc":     l.Tag("desc"),
+			"default":  l.Tag("default"),
+			"validate": l.Tag("validate"),
+			"env":      l.Tag("env"),
+		}
+	})
+	if err != nil {
+		t.Fatalf("Walk: %v", err)
+	}
+
+	for path, expect := range want {
+		got, ok := seen[path]
+		if !ok {
+			t.Errorf("%s: not yielded by Walk", path)
+			continue
+		}
+		for tag, val := range expect {
+			if got[tag] != val {
+				t.Errorf("%s tag %q: want %q, got %q", path, tag, val, got[tag])
+			}
+		}
+	}
+}
+
+func TestWalk_RejectsNonStruct(t *testing.T) {
+	cases := []struct {
+		name string
+		v    any
+	}{
+		{"int", 42},
+		{"nil-ptr", (*config.Config)(nil)},
+		{"string", "hello"},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			err := schema.Walk(tc.v, func(schema.LeafField) {})
+			if err == nil {
+				t.Errorf("expected error for %v, got nil", tc.v)
+			}
+		})
+	}
+}
+
+func TestWalk_SkipsLegacyAPIBlock(t *testing.T) {
+	// The legacy API field has no `key:` tag, so the walker must not yield
+	// `api.url` / `api.key` even though APIConfig has exported fields.
+	got, err := schema.Keys(&config.Config{})
+	if err != nil {
+		t.Fatalf("Keys: %v", err)
+	}
+	for _, k := range got {
+		if k == "api.url" || k == "api.key" || k == "api" {
+			t.Errorf("legacy API field leaked into walker output: %q", k)
+		}
+	}
+}
diff --git a/cli/internal/config/set.go b/cli/internal/config/set.go
new file mode 100644
index 0000000..3fdcb75
--- /dev/null
+++ b/cli/internal/config/set.go
@@ -0,0 +1,129 @@
+package config
+
+import (
+	"errors"
+	"fmt"
+	"reflect"
+	"strconv"
+	"strings"
+
+	"github.com/anthropics/code-index/cli/internal/config/schema"
+)
+
+// ErrUnknownKey is returned by SetByPath when key does not match any
+// schema-tagged leaf in the Config struct. Callers (notably runConfigSet)
+// use errors.Is(err, ErrUnknownKey) to decide whether to fall through to a
+// legacy handler.
+var ErrUnknownKey = errors.New("unknown config key")
+
+// SetByPath looks up the schema leaf identified by key, parses value per
+// the leaf's Go type, applies it, runs full-struct validation, and persists.
+//
+// Parsing rules:
+//   - bool      strconv.ParseBool          ("true"/"false"/"1"/"0"/etc.)
+//   - int*      strconv.ParseInt(base=10)
+//   - string    used verbatim
+//   - []string  comma-separated, each entry trimmed; REPLACE semantics
+//     (the new value REPLACES the existing slice — there is no append form
+//     on `config set`)
+//
+// Server-management keys (`server.<name>.url|key`, `default_server` aliases
+// for legacy `api.*`) are NOT handled here — they live in runConfigSet's
+// dedicated branch because they have side effects (upsert into Servers,
+// reassign DefaultServer) that don't fit the "parse-and-assign" model.
+//
+// Slices of structs (Servers, Projects) are deliberately rejected: there's
+// no sensible string serialization for them and they have purpose-built
+// CRUD helpers (SetServerURL, AddProject, …).
+func SetByPath(key, value string) error {
+	cfg, err := Load()
+	if err != nil {
+		return err
+	}
+
+	var (
+		found    bool
+		applyErr error
+		// On validation failure we restore the field to its prior value so
+		// the in-memory singleton stays consistent with the on-disk file
+		// (which was NOT written). Save the prior snapshot as a detached
+		// reflect.Value so a re-assignment via leaf.Value.Set() can undo
+		// the mutation without re-walking the schema.
+		leafRef  schema.LeafField
+		priorVal reflect.Value
+	)
+	walkErr := schema.Walk(cfg, func(l schema.LeafField) {
+		if found || l.Path != key {
+			return
+		}
+		found = true
+		leafRef = l
+		priorVal = reflect.New(l.Value.Type()).Elem()
+		priorVal.Set(l.Value)
+		applyErr = applyLeafValue(l, value)
+	})
+	if walkErr != nil {
+		return walkErr
+	}
+	if !found {
+		return fmt.Errorf("%w: %s", ErrUnknownKey, key)
+	}
+	if applyErr != nil {
+		// Best-effort restore even on parse failure (applyLeafValue may
+		// have partially mutated for some kinds in the future).
+		leafRef.Value.Set(priorVal)
+		return applyErr
+	}
+
+	if err := Validate(cfg); err != nil {
+		leafRef.Value.Set(priorVal)
+		return err
+	}
+	return Save(cfg)
+}
+
+func applyLeafValue(l schema.LeafField, raw string) error {
+	if !l.Value.CanSet() {
+		return fmt.Errorf("config key %q cannot be set", l.Path)
+	}
+	v := l.Value
+	switch v.Kind() {
+	case reflect.Bool:
+		b, err := strconv.ParseBool(raw)
+		if err != nil {
+			return fmt.Errorf("%s: invalid bool %q (use true/false)", l.Path, raw)
+		}
+		v.SetBool(b)
+		return nil
+
+	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+		n, err := strconv.ParseInt(raw, 10, 64)
+		if err != nil {
+			return fmt.Errorf("%s: invalid integer %q", l.Path, raw)
+		}
+		if v.OverflowInt(n) {
+			return fmt.Errorf("%s: value %d out of range", l.Path, n)
+		}
+		v.SetInt(n)
+		return nil
+
+	case reflect.String:
+		v.SetString(raw)
+		return nil
+
+	case reflect.Slice:
+		if v.Type().Elem().Kind() != reflect.String {
+			return fmt.Errorf("%s: list keys with non-string elements are not settable via 'config set'", l.Path)
+		}
+		parts := strings.Split(raw, ",")
+		out := make([]string, 0, len(parts))
+		for _, p := range parts {
+			if trimmed := strings.TrimSpace(p); trimmed != "" {
+				out = append(out, trimmed)
+			}
+		}
+		v.Set(reflect.ValueOf(out))
+		return nil
+	}
+	return fmt.Errorf("%s: unsupported field kind %s", l.Path, v.Kind())
+}
diff --git a/cli/internal/config/set_test.go b/cli/internal/config/set_test.go
new file mode 100644
index 0000000..eb85795
--- /dev/null
+++ b/cli/internal/config/set_test.go
@@ -0,0 +1,148 @@
+package config
+
+import (
+	"errors"
+	"os"
+	"path/filepath"
+	"reflect"
+	"testing"
+)
+
+// withIsolatedHome points config.Load() at a throwaway HOME and resets the
+// singleton. Mirrors cmd/multiserver_test.go's isolateConfig — duplicated
+// because that helper is in the cmd package and import would be cyclic.
+func withIsolatedHome(t *testing.T) {
+	t.Helper()
+	t.Setenv("HOME", t.TempDir())
+	t.Setenv("XDG_CONFIG_HOME", "")
+	ResetForTesting()
+	t.Cleanup(ResetForTesting)
+}
+
+func TestSetByPath_Bool(t *testing.T) {
+	withIsolatedHome(t)
+	if err := SetByPath("watcher.enabled", "false"); err != nil {
+		t.Fatalf("SetByPath: %v", err)
+	}
+	cfg, _ := Load()
+	if cfg.Watcher.Enabled {
+		t.Errorf("Enabled = true, want false")
+	}
+	// And back.
+	if err := SetByPath("watcher.enabled", "true"); err != nil {
+		t.Fatal(err)
+	}
+	cfg, _ = Load()
+	if !cfg.Watcher.Enabled {
+		t.Errorf("Enabled = false, want true")
+	}
+}
+
+func TestSetByPath_Int(t *testing.T) {
+	withIsolatedHome(t)
+	if err := SetByPath("watcher.debounce_ms", "2500"); err != nil {
+		t.Fatalf("SetByPath: %v", err)
+	}
+	cfg, _ := Load()
+	if cfg.Watcher.DebounceMS != 2500 {
+		t.Errorf("DebounceMS = %d, want 2500", cfg.Watcher.DebounceMS)
+	}
+}
+
+func TestSetByPath_Slice_ReplaceSemantics(t *testing.T) {
+	withIsolatedHome(t)
+	// Set then overwrite — replace, not append.
+	if err := SetByPath("watcher.exclude", "vendor, tmp ,build"); err != nil {
+		t.Fatalf("first set: %v", err)
+	}
+	cfg, _ := Load()
+	want := []string{"vendor", "tmp", "build"}
+	if !reflect.DeepEqual(cfg.Watcher.ExcludePatterns, want) {
+		t.Errorf("ExcludePatterns = %v, want %v", cfg.Watcher.ExcludePatterns, want)
+	}
+
+	if err := SetByPath("watcher.exclude", "only"); err != nil {
+		t.Fatalf("second set: %v", err)
+	}
+	cfg, _ = Load()
+	if !reflect.DeepEqual(cfg.Watcher.ExcludePatterns, []string{"only"}) {
+		t.Errorf("after replace, ExcludePatterns = %v, want [only]", cfg.Watcher.ExcludePatterns)
+	}
+}
+
+func TestSetByPath_StreamingIdleTimeout(t *testing.T) {
+	// Was NEVER settable via the legacy switch — exposing it is one of the
+	// concrete user-visible wins of the schema-driven setter.
+	withIsolatedHome(t)
+	if err := SetByPath("indexing.streaming_idle_timeout_sec", "60"); err != nil {
+		t.Fatalf("SetByPath: %v", err)
+	}
+	cfg, _ := Load()
+	if cfg.Indexing.StreamingIdleTimeoutSec != 60 {
+		t.Errorf("StreamingIdleTimeoutSec = %d, want 60", cfg.Indexing.StreamingIdleTimeoutSec)
+	}
+}
+
+func TestSetByPath_ValidationRejectsBadValue(t *testing.T) {
+	withIsolatedHome(t)
+	err := SetByPath("indexing.batch_size", "0")
+	if err == nil {
+		t.Fatal("expected validation error for batch_size=0")
+	}
+	// Bad value MUST NOT be persisted.
+	cfg, _ := Load()
+	if cfg.Indexing.BatchSize == 0 {
+		t.Errorf("BatchSize = 0 — validation should have rolled back the in-memory mutation before Save")
+	}
+}
+
+func TestSetByPath_UnknownKey(t *testing.T) {
+	withIsolatedHome(t)
+	err := SetByPath("nope.does.not.exist", "v")
+	if !errors.Is(err, ErrUnknownKey) {
+		t.Errorf("err = %v, want errors.Is(_, ErrUnknownKey)", err)
+	}
+}
+
+func TestSetByPath_BadIntFormat(t *testing.T) {
+	withIsolatedHome(t)
+	err := SetByPath("watcher.debounce_ms", "abc")
+	if err == nil {
+		t.Fatal("expected parse error")
+	}
+}
+
+func TestSetByPath_BadBoolFormat(t *testing.T) {
+	withIsolatedHome(t)
+	err := SetByPath("watcher.enabled", "maybe")
+	if err == nil {
+		t.Fatal("expected parse error")
+	}
+}
+
+// TestSetByPath_PersistsToDisk verifies the mutation actually reaches the
+// YAML file (not just the in-memory singleton). Without this, a downstream
+// reader on a fresh process would see the old value.
+func TestSetByPath_PersistsToDisk(t *testing.T) {
+	withIsolatedHome(t)
+	if err := SetByPath("watcher.debounce_ms", "7777"); err != nil {
+		t.Fatal(err)
+	}
+	path := filepath.Join(os.Getenv("HOME"), ".cix", "config.yaml")
+	data, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("read file: %v", err)
+	}
+	if !contains(string(data), "debounce_ms: 7777") {
+		t.Errorf("config.yaml does not contain the new value:\n%s", string(data))
+	}
+}
+
+func contains(haystack, needle string) bool {
+	for i := 0; i+len(needle) <= len(haystack); i++ {
+		if haystack[i:i+len(needle)] == needle {
+			return true
+		}
+	}
+	return false
+}
diff --git a/cli/internal/config/tui/keys.go b/cli/internal/config/tui/keys.go
new file mode 100644
index 0000000..d1a74bb
--- /dev/null
+++ b/cli/internal/config/tui/keys.go
@@ -0,0 +1,100 @@
+package tui
+
+import "github.com/charmbracelet/bubbles/key"
+
+// keymap groups every binding the TUI responds to. Kept in one place so
+// the help overlay and the Update switch agree on what's available.
+type keymap struct {
+	Up        key.Binding
+	Down      key.Binding
+	Left      key.Binding
+	Right     key.Binding
+	NextPanel key.Binding
+
+	Enter   key.Binding
+	Toggle  key.Binding
+	Save    key.Binding
+	Test    key.Binding
+	Add     key.Binding
+	Delete  key.Binding
+	MarkDef key.Binding
+
+	Help key.Binding
+	Quit key.Binding
+}
+
+func newKeymap() keymap {
+	return keymap{
+		Up: key.NewBinding(
+			key.WithKeys("up", "k"),
+			key.WithHelp("↑/k", "up"),
+		),
+		Down: key.NewBinding(
+			key.WithKeys("down", "j"),
+			key.WithHelp("↓/j", "down"),
+		),
+		Left: key.NewBinding(
+			key.WithKeys("left", "h"),
+			key.WithHelp("←/h", "left panel"),
+		),
+		Right: key.NewBinding(
+			key.WithKeys("right", "l"),
+			key.WithHelp("→/l", "right panel"),
+		),
+		NextPanel: key.NewBinding(
+			key.WithKeys("tab"),
+			key.WithHelp("tab", "switch panel"),
+		),
+		Enter: key.NewBinding(
+			key.WithKeys("enter"),
+			key.WithHelp("enter", "edit"),
+		),
+		Toggle: key.NewBinding(
+			key.WithKeys(" ", "x"),
+			key.WithHelp("space/x", "toggle bool"),
+		),
+		Save: key.NewBinding(
+			key.WithKeys("s"),
+			key.WithHelp("s", "save (no-op; sets save on edit)"),
+		),
+		Test: key.NewBinding(
+			key.WithKeys("t"),
+			key.WithHelp("t", "test connection"),
+		),
+		Add: key.NewBinding(
+			key.WithKeys("a"),
+			key.WithHelp("a", "add server"),
+		),
+		Delete: key.NewBinding(
+			key.WithKeys("d"),
+			key.WithHelp("d", "delete server"),
+		),
+		MarkDef: key.NewBinding(
+			key.WithKeys("m"),
+			key.WithHelp("m", "mark as default"),
+		),
+		Help: key.NewBinding(
+			key.WithKeys("?"),
+			key.WithHelp("?", "help"),
+		),
+		Quit: key.NewBinding(
+			key.WithKeys("q", "esc", "ctrl+c"),
+			key.WithHelp("q/esc", "quit"),
+		),
+	}
+}
+
+// shortHelp returns the keys shown in the always-on status bar.
+func (k keymap) shortHelp() []key.Binding {
+	return []key.Binding{k.Up, k.Down, k.NextPanel, k.Enter, k.Help, k.Quit}
+}
+
+// fullHelp returns all keys, grouped by purpose, for the ? overlay.
+func (k keymap) fullHelp() [][]key.Binding {
+	return [][]key.Binding{
+		{k.Up, k.Down, k.Left, k.Right, k.NextPanel},
+		{k.Enter, k.Toggle, k.Save},
+		{k.Add, k.Delete, k.MarkDef, k.Test},
+		{k.Help, k.Quit},
+	}
+}
diff --git a/cli/internal/config/tui/model.go b/cli/internal/config/tui/model.go
new file mode 100644
index 0000000..345cbda
--- /dev/null
+++ b/cli/internal/config/tui/model.go
@@ -0,0 +1,163 @@
+package tui
+
+import (
+	"github.com/charmbracelet/bubbles/textinput"
+	"github.com/anthropics/code-index/cli/internal/config"
+)
+
+// sectionID enumerates the left-panel entries. Stable order — the View
+// renders them in this sequence.
+type sectionID int
+
+const (
+	secServers sectionID = iota
+	secWatcher
+	secIndexing
+	secProjects
+	secMisc
+	numSections
+)
+
+// sectionLabel maps a sectionID to its display name.
+func (s sectionID) Label() string {
+	switch s {
+	case secServers:
+		return "Servers"
+	case secWatcher:
+		return "Watcher"
+	case secIndexing:
+		return "Indexing"
+	case secProjects:
+		return "Projects"
+	case secMisc:
+		return "Misc"
+	}
+	return "?"
+}
+
+// panel marks which of the two columns has focus. Keyboard navigation
+// (Tab, h/l) flips this; the styled border highlights the active one.
+type panel int
+
+const (
+	panelLeft panel = iota
+	panelRight
+)
+
+// editPurpose distinguishes between editing an existing scalar (set by
+// config.SetByPath) and editing a server entry's URL or key.
+type editPurpose int
+
+const (
+	editPurposeScalar editPurpose = iota
+	editPurposeServerURL
+	editPurposeServerKey
+	editPurposeServerName // only used during "add server" flow
+)
+
+// Model is the full TUI state. bubbletea is Elm-style: every keypress
+// goes through Update(model, msg) → (model, cmd). View() reads model and
+// returns a string.
+type Model struct {
+	cfg *config.Config
+
+	// Navigation.
+	active     panel
+	sectionIdx int // 0..numSections-1
+	rowIdx     int // selected row within the right panel
+
+	// Edit mode: when true, all keys go to editInput except esc/enter.
+	editing     bool
+	editPurpose editPurpose
+	editKey     string // schema key path being edited (scalar mode)
+	editServer  int    // index into cfg.Servers (server-edit modes)
+	editInput   textinput.Model
+	editErr     string
+
+	// "Add server" flow. Three sequential prompts: name → URL → key.
+	addingServer bool
+	addStep      int // 0=name, 1=url, 2=key
+	addName      string
+	addURL       string
+
+	// Help overlay toggled with ?.
+	showHelp bool
+
+	// Transient status line (shown below status bar; fades on next action).
+	statusMsg string
+	statusErr bool
+
+	// Layout.
+	width, height int
+	styles        styles
+	keys          keymap
+
+	quitting bool
+}
+
+// NewModel builds the initial Model with cfg loaded. cfg is mutated in
+// place when edits land; the caller is the only owner.
+func NewModel(cfg *config.Config) Model {
+	ti := textinput.New()
+	ti.Prompt = "› "
+	ti.CharLimit = 200
+	return Model{
+		cfg:       cfg,
+		active:    panelLeft,
+		styles:    newStyles(),
+		keys:      newKeymap(),
+		editInput: ti,
+	}
+}
+
+// numRowsRight returns how many rows the right panel renders for the
+// current section. Edit-mode entry validates against this so the user
+// can never select a non-existent row.
+func (m Model) numRowsRight() int {
+	switch sectionID(m.sectionIdx) {
+	case secServers:
+		// Each server contributes one row; "no servers" still shows a
+		// hint row so the panel isn't empty.
+		if len(m.cfg.Servers) == 0 {
+			return 0
+		}
+		return len(m.cfg.Servers) + 1 // +1 for "default_server" select
+	case secWatcher:
+		return 4
+	case secIndexing:
+		return 2
+	case secProjects:
+		return len(m.cfg.Projects)
+	case secMisc:
+		return 1
+	}
+	return 0
+}
+
+// clampRow keeps rowIdx inside [0, numRowsRight). Called after any
+// navigation or after a delete that shrinks the section.
+func (m *Model) clampRow() {
+	n := m.numRowsRight()
+	if n == 0 {
+		m.rowIdx = 0
+		return
+	}
+	if m.rowIdx < 0 {
+		m.rowIdx = 0
+	}
+	if m.rowIdx >= n {
+		m.rowIdx = n - 1
+	}
+}
+
+// setStatus replaces the transient status line. ok=false renders in red.
+// The status survives until the next keypress (Update clears it).
+func (m *Model) setStatus(msg string, ok bool) {
+	m.statusMsg = msg
+	m.statusErr = !ok
+}
+
+func (m *Model) clearStatus() {
+	m.statusMsg = ""
+	m.statusErr = false
+}
diff --git a/cli/internal/config/tui/model_test.go b/cli/internal/config/tui/model_test.go
new file mode 100644
index 0000000..b59e431
--- /dev/null
+++ b/cli/internal/config/tui/model_test.go
@@ -0,0 +1,219 @@
+package tui
+
+import (
+	"strings"
+	"testing"
+
+	tea "github.com/charmbracelet/bubbletea"
+
+	"github.com/anthropics/code-index/cli/internal/config"
+)
+
+// freshCfg returns a minimal Config suitable for driving the TUI in tests
+// — two servers, full watcher/indexing defaults. No HOME isolation,
+// because we never call Load/Save here; the Model is exercised directly.
+func freshCfg() *config.Config {
+	return &config.Config{
+		Servers: []config.ServerEntry{
+			{Name: "default", URL: "http://localhost:21847", Key: "k"},
+			{Name: "corp", URL: "https://corp", Key: ""},
+		},
+		DefaultServer: "default",
+		Watcher: config.WatcherConfig{
+			Enabled:          true,
+			DebounceMS:       5000,
+			ExcludePatterns:  []string{"node_modules", ".git"},
+			SyncIntervalMins: 5,
+		},
+		Indexing: config.IndexingConfig{
+			BatchSize:               20,
+			StreamingIdleTimeoutSec: 30,
+		},
+	}
+}
+
+// makeKey produces a tea.KeyMsg that mimics a single-rune keypress.
+// Workaround for bubbletea's typed messages — Update only switches on
+// these, so synthesising them is enough to drive the Model.
+func makeKey(s string) tea.KeyMsg {
+	switch s {
+	case "tab":
+		return tea.KeyMsg{Type: tea.KeyTab}
+	case "enter":
+		return tea.KeyMsg{Type: tea.KeyEnter}
+	case "esc":
+		return tea.KeyMsg{Type: tea.KeyEsc}
+	case "up":
+		return tea.KeyMsg{Type: tea.KeyUp}
+	case "down":
+		return tea.KeyMsg{Type: tea.KeyDown}
+	case " ":
+		return tea.KeyMsg{Type: tea.KeySpace}
+	}
+	return tea.KeyMsg{Type: tea.KeyRunes, Runes: []rune(s)}
+}
+
+// send is the test driver — push msgs through Update and return the
+// final Model. Test bodies stay short.
+func send(m Model, msgs ...tea.Msg) Model {
+	for _, msg := range msgs {
+		updated, _ := m.Update(msg)
+		m = updated.(Model)
+	}
+	return m
+}
+
+func TestNavigation_DownMovesSectionSelection(t *testing.T) {
+	m := NewModel(freshCfg())
+	m.width, m.height = 100, 30
+
+	m = send(m, makeKey("down"))
+	if m.sectionIdx != int(secWatcher) {
+		t.Errorf("after down: sectionIdx = %d, want %d (Watcher)", m.sectionIdx, secWatcher)
+	}
+}
+
+func TestNavigation_TabSwitchesPanels(t *testing.T) {
+	m := NewModel(freshCfg())
+	m.width, m.height = 100, 30
+	if m.active != panelLeft {
+		t.Fatalf("initial panel = %v, want left", m.active)
+	}
+	m = send(m, makeKey("tab"))
+	if m.active != panelRight {
+		t.Errorf("after tab: active = %v, want right", m.active)
+	}
+	m = send(m, makeKey("tab"))
+	if m.active != panelLeft {
+		t.Errorf("after second tab: active = %v, want left", m.active)
+	}
+}
+
+func TestNavigation_LeftRightForcePanel(t *testing.T) {
+	m := NewModel(freshCfg())
+	m.width, m.height = 100, 30
+
+	m = send(m, makeKey("l"))
+	if m.active != panelRight {
+		t.Errorf("after l: active = %v, want right", m.active)
+	}
+	m = send(m, makeKey("h"))
+	if m.active != panelLeft {
+		t.Errorf("after h: active = %v, want left", m.active)
+	}
+}
+
+func TestEnterOnLeftPanelMovesFocusRight(t *testing.T) {
+	m := NewModel(freshCfg())
+	m.width, m.height = 100, 30
+	m = send(m, makeKey("enter"))
+	if m.active != panelRight {
+		t.Errorf("enter from left panel should focus right; got %v", m.active)
+	}
+}
+
+func TestQuitSetsQuittingFlag(t *testing.T) {
+	m := NewModel(freshCfg())
+	m.width, m.height = 100, 30
+	m = send(m, makeKey("q"))
+	if !m.quitting {
+		t.Error("q should set quitting=true")
+	}
+}
+
+func TestHelpToggle(t *testing.T) {
+	m := NewModel(freshCfg())
+	m.width, m.height = 100, 30
+	m = send(m, makeKey("?"))
+	if !m.showHelp {
+		t.Error("? should show help")
+	}
+	// Any key dismisses.
+	m = send(m, makeKey("x"))
+	if m.showHelp {
+		t.Error("any key should dismiss help")
+	}
+}
+
+func TestRowsFor_ServersIncludesDefaultServerRow(t *testing.T) {
+	cfg := freshCfg()
+	rows := rowsFor(cfg, secServers)
+	if len(rows) != len(cfg.Servers)+1 {
+		t.Errorf("len(rows) = %d, want %d (servers + default_server)", len(rows), len(cfg.Servers)+1)
+	}
+	last := rows[len(rows)-1]
+	if last.label != "default_server" {
+		t.Errorf("last row label = %q, want default_server", last.label)
+	}
+}
+
+func TestRowsFor_WatcherListsAllScalarLeaves(t *testing.T) {
+	cfg := freshCfg()
+	rows := rowsFor(cfg, secWatcher)
+	if len(rows) != 4 {
+		t.Errorf("watcher rows = %d, want 4 (enabled, debounce, exclude, sync)", len(rows))
+	}
+	want := map[string]bool{
+		"enabled":            false,
+		"debounce_ms":        false,
+		"exclude":            false,
+		"sync_interval_mins": false,
+	}
+	for _, r := range rows {
+		want[r.label] = true
+	}
+	for label, seen := range want {
+		if !seen {
+			t.Errorf("missing row %q", label)
+		}
+	}
+}
+
+func TestView_RendersBothPanels(t *testing.T) {
+	m := NewModel(freshCfg())
+	m.width, m.height = 100, 30
+	out := m.View()
+	for _, expect := range []string{"Servers", "Watcher", "Indexing", "Projects", "Misc", "default"} {
+		if !strings.Contains(out, expect) {
+			t.Errorf("View missing %q\noutput:\n%s", expect, out)
+		}
+	}
+}
+
+func TestView_SensitiveKeyNeverLeaks(t *testing.T) {
+	cfg := freshCfg()
+	cfg.Servers[0].Key = "cix_super_secret_xyz"
+	m := NewModel(cfg)
+	m.width, m.height = 100, 30
+	m.sectionIdx = int(secServers)
+	out := m.View()
+	if strings.Contains(out, "super_secret") {
+		t.Errorf("sensitive key leaked into View output")
+	}
+	if strings.Contains(out, "cix_super_secret_xyz") {
+		t.Errorf("sensitive key leaked into View output")
+	}
+}
+
+func TestPingServer_RejectsBlank(t *testing.T) {
+	if err := PingServer("", "k"); err == nil {
+		t.Error("empty URL should fail")
+	}
+	if err := PingServer("http://x", ""); err == nil {
+		t.Error("empty key should fail")
+	}
+}
+
+func TestClamp(t *testing.T) {
+	cases := []struct{ n, lo, hi, want int }{
+		{5, 0, 10, 5},
+		{-1, 0, 10, 0},
+		{99, 0, 10, 10},
+		{0, 0, 0, 0},
+	}
+	for _, c := range cases {
+		if got := clamp(c.n, c.lo, c.hi); got != c.want {
+			t.Errorf("clamp(%d, %d, %d) = %d, want %d", c.n, c.lo, c.hi, got, c.want)
+		}
+	}
+}
diff --git a/cli/internal/config/tui/sections.go b/cli/internal/config/tui/sections.go
new file mode 100644
index 0000000..52c4950
--- /dev/null
+++ b/cli/internal/config/tui/sections.go
@@ -0,0 +1,225 @@
+package tui
+
+import (
+	"fmt"
+	"reflect"
+	"strconv"
+	"strings"
+
+	"github.com/anthropics/code-index/cli/internal/config"
+	"github.com/anthropics/code-index/cli/internal/config/schema"
+)
+
+// row is one entry rendered in the right panel. The TUI keeps every
+// section's rows in this uniform shape so navigation, rendering, and
+// edit-mode dispatch share one code path.
+type row struct {
+	label string // shown in the "key" column
+	value string // shown in the "value" column (formatted for display)
+
+	// dispatch on activation
+	kind      rowKind
+	schemaKey string // for kindScalarEdit / kindBoolToggle: schema dotted key
+	serverIdx int    // for kindServerEdit: index in cfg.Servers
+
+	// metadata
+	sensitive bool
+}
+
+type rowKind int
+
+const (
+	rowKindInert        rowKind = iota // no action on Enter
+	rowKindScalarEdit                  // Enter opens text input; SetByPath on save
+	rowKindBoolToggle                  // space/x flips bool; Enter does too
+	rowKindServerEdit                  // Enter opens server URL/key editor
+	rowKindDefaultPick                 // Enter cycles default_server alias
+)
+
+// rowsFor returns the rendered rows for the currently selected section.
+// Pure function of cfg — easy to unit-test without spinning up bubbletea.
+func rowsFor(cfg *config.Config, sec sectionID) []row {
+	switch sec {
+	case secServers:
+		return serverRows(cfg)
+	case secWatcher:
+		return scalarRows(cfg, "watcher.")
+	case secIndexing:
+		return scalarRows(cfg, "indexing.")
+	case secProjects:
+		return projectRows(cfg)
+	case secMisc:
+		return miscRows(cfg)
+	}
+	return nil
+}
+
+func serverRows(cfg *config.Config) []row {
+	if len(cfg.Servers) == 0 {
+		return nil
+	}
+	out := make([]row, 0, len(cfg.Servers)+1)
+	for i, s := range cfg.Servers {
+		// Marker shows which entry is the default. The key field is
+		// rendered via Sensitive — never the raw value.
+		marker := " "
+		if s.Name == cfg.DefaultServer {
+			marker = "●"
+		}
+		keyStatus := "(not set)"
+		if s.Key != "" {
+			keyStatus = "(set)"
+		}
+		out = append(out, row{
+			label:     fmt.Sprintf("%s %s", marker, s.Name),
+			value:     fmt.Sprintf("%s   key %s", s.URL, keyStatus),
+			kind:      rowKindServerEdit,
+			serverIdx: i,
+		})
+	}
+	out = append(out, row{
+		label:     "default_server",
+		value:     cfg.DefaultServer,
+		kind:      rowKindDefaultPick,
+		schemaKey: "default_server",
+	})
+	return out
+}
+
+// scalarRows walks the schema and emits one row per leaf whose dotted
+// key starts with prefix. Slice-of-struct leaves (servers, projects)
+// are skipped — they have dedicated sections.
+func scalarRows(cfg *config.Config, prefix string) []row {
+	var out []row
+	_ = schema.Walk(cfg, func(l schema.LeafField) {
+		if !strings.HasPrefix(l.Path, prefix) {
+			return
+		}
+		// Skip non-scalar leaves (slice-of-struct never has a path prefix
+		// in this set, but be defensive).
+		if l.Value.Kind() == reflect.Slice && l.Value.Type().Elem().Kind() == reflect.Struct {
+			return
+		}
+		out = append(out, row{
+			label:     strings.TrimPrefix(l.Path, prefix),
+			value:     formatLeafForRow(l),
+			kind:      kindForLeaf(l),
+			schemaKey: l.Path,
+			sensitive: l.Sensitive(),
+		})
+	})
+	return out
+}
+
+func kindForLeaf(l schema.LeafField) rowKind {
+	if l.Value.Kind() == reflect.Bool {
+		return rowKindBoolToggle
+	}
+	return rowKindScalarEdit
+}
+
+// formatLeafForRow turns the leaf's current value into a display string
+// that fits in one terminal row. Sensitive leaves never expose the value.
+func formatLeafForRow(l schema.LeafField) string {
+	if l.Sensitive() {
+		if l.Value.IsZero() {
+			return "(not set)"
+		}
+		return "(set)"
+	}
+	v := l.Value
+	switch v.Kind() {
+	case reflect.Bool:
+		if v.Bool() {
+			return "✓ enabled"
+		}
+		return "✗ disabled"
+	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+		return strconv.FormatInt(v.Int(), 10)
+	case reflect.String:
+		return v.String()
+	case reflect.Slice:
+		if v.Type().Elem().Kind() == reflect.String {
+			items := v.Interface().([]string)
+			return fmt.Sprintf("[%d items] %s", len(items), strings.Join(truncList(items, 3), ", "))
+		}
+	}
+	return fmt.Sprintf("%v", v.Interface())
+}
+
+func truncList(xs []string, n int) []string {
+	if len(xs) <= n {
+		return xs
+	}
+	out := make([]string, 0, n+1)
+	out = append(out, xs[:n]...)
+	out = append(out, fmt.Sprintf("…+%d", len(xs)-n))
+	return out
+}
+
+func projectRows(cfg *config.Config) []row {
+	out := make([]row, 0, len(cfg.Projects))
+	for _, p := range cfg.Projects {
+		wState := "✗"
+		if p.AutoWatch {
+			wState = "✓"
+		}
+		out = append(out, row{
+			label: p.Path,
+			value: fmt.Sprintf("auto-watch %s", wState),
+			kind:  rowKindInert,
+		})
+	}
+	return out
+}
+
+func miscRows(cfg *config.Config) []row {
+	return []row{
+		{
+			label: "config file",
+			value: config.GetConfigPath(),
+			kind:  rowKindInert,
+		},
+	}
+}
+
+// rawScalarValue returns the leaf's current value as the canonical string
+// the user will see when entering edit mode. For ints/bools we round-trip
+// through strconv so the user can edit the exact form SetByPath expects.
+func rawScalarValue(l schema.LeafField) string {
+	if l.Sensitive() {
+		// Edit mode does not pre-fill secrets — the user types fresh.
+		return ""
+	}
+	v := l.Value
+	switch v.Kind() {
+	case reflect.Bool:
+		return strconv.FormatBool(v.Bool())
+	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+		return strconv.FormatInt(v.Int(), 10)
+	case reflect.String:
+		return v.String()
+	case reflect.Slice:
+		if v.Type().Elem().Kind() == reflect.String {
+			return strings.Join(v.Interface().([]string), ",")
+		}
+	}
+	return fmt.Sprintf("%v", v.Interface())
+}
+
+// findLeaf walks the schema and returns the leaf at path. Used to look
+// up a leaf's reflect.Value when committing an edit (we cannot keep a
+// stale leaf reference across an Update call because the underlying
+// reflect.Value may move when cfg is mutated).
+func findLeaf(cfg *config.Config, path string) (schema.LeafField, bool) {
+	var found schema.LeafField
+	var ok bool
+	_ = schema.Walk(cfg, func(l schema.LeafField) {
+		if ok || l.Path != path {
+			return
+		}
+		found = l
+		ok = true
+	})
+	return found, ok
+}
diff --git a/cli/internal/config/tui/styles.go b/cli/internal/config/tui/styles.go
new file mode 100644
index 0000000..db70d64
--- /dev/null
+++ b/cli/internal/config/tui/styles.go
@@ -0,0 +1,96 @@
+package tui
+
+import "github.com/charmbracelet/lipgloss"
+
+// Color palette — chosen so the TUI keeps decent contrast on both dark and
+// light terminal themes. We avoid pure white/black and stick to mid-tones
+// that get auto-adapted by lipgloss's terminal-color detection.
+var (
+	colAccent      = lipgloss.AdaptiveColor{Light: "#005577", Dark: "#7dd3fc"}
+	colMuted       = lipgloss.AdaptiveColor{Light: "#666666", Dark: "#888888"}
+	colBorder      = lipgloss.AdaptiveColor{Light: "#cccccc", Dark: "#444444"}
+	colActiveBdr   = lipgloss.AdaptiveColor{Light: "#005577", Dark: "#7dd3fc"}
+	colSel         = lipgloss.AdaptiveColor{Light: "#000000", Dark: "#ffffff"}
+	colSelBg       = lipgloss.AdaptiveColor{Light: "#cce5f0", Dark: "#1e3a8a"}
+	colOK          = lipgloss.AdaptiveColor{Light: "#15803d", Dark: "#86efac"}
+	colWarn        = lipgloss.AdaptiveColor{Light: "#a16207", Dark: "#fcd34d"}
+	colErr         = lipgloss.AdaptiveColor{Light: "#b91c1c", Dark: "#fca5a5"}
+	colSensitive   = lipgloss.AdaptiveColor{Light: "#7c3aed", Dark: "#c4b5fd"}
+)
+
+// styles is a flat bundle of every reusable lipgloss.Style. Built once in
+// New() and stored on the Model so each View() call does no allocation.
+type styles struct {
+	leftPanel       lipgloss.Style
+	leftPanelActive lipgloss.Style
+	rightPanel      lipgloss.Style
+	rightPanelActive lipgloss.Style
+
+	sectionRow      lipgloss.Style
+	sectionRowSel   lipgloss.Style
+	sectionCount    lipgloss.Style
+
+	rowKey     lipgloss.Style
+	rowValue   lipgloss.Style
+	rowKeySel  lipgloss.Style
+	rowValSel  lipgloss.Style
+	rowMuted   lipgloss.Style
+
+	statusBar     lipgloss.Style
+	statusOK      lipgloss.Style
+	statusErr     lipgloss.Style
+	statusKey     lipgloss.Style
+
+	header        lipgloss.Style
+	headerActive  lipgloss.Style
+
+	editLabel  lipgloss.Style
+	editError  lipgloss.Style
+
+	dot           lipgloss.Style
+	dotDimmed     lipgloss.Style
+	sensitiveTag  lipgloss.Style
+}
+
+func newStyles() styles {
+	border := lipgloss.RoundedBorder()
+	return styles{
+		leftPanel: lipgloss.NewStyle().
+			Border(border).BorderForeground(colBorder).
+			Padding(0, 1),
+		leftPanelActive: lipgloss.NewStyle().
+			Border(border).BorderForeground(colActiveBdr).
+			Padding(0, 1),
+		rightPanel: lipgloss.NewStyle().
+			Border(border).BorderForeground(colBorder).
+			Padding(0, 1),
+		rightPanelActive: lipgloss.NewStyle().
+			Border(border).BorderForeground(colActiveBdr).
+			Padding(0, 1),
+
+		sectionRow:    lipgloss.NewStyle().Padding(0, 1),
+		sectionRowSel: lipgloss.NewStyle().Padding(0, 1).Background(colSelBg).Foreground(colSel).Bold(true),
+		sectionCount:  lipgloss.NewStyle().Foreground(colMuted),
+
+		rowKey:    lipgloss.NewStyle().Foreground(colAccent),
+		rowValue:  lipgloss.NewStyle(),
+		rowKeySel: lipgloss.NewStyle().Foreground(colSel).Background(colSelBg).Bold(true),
+		rowValSel: lipgloss.NewStyle().Foreground(colSel).Background(colSelBg),
+		rowMuted:  lipgloss.NewStyle().Foreground(colMuted),
+
+		statusBar: lipgloss.NewStyle().Foreground(colMuted),
+		statusOK:  lipgloss.NewStyle().Foreground(colOK).Bold(true),
+		statusErr: lipgloss.NewStyle().Foreground(colErr).Bold(true),
+		statusKey: lipgloss.NewStyle().Foreground(colAccent).Bold(true),
+
+		header:       lipgloss.NewStyle().Foreground(colMuted).Bold(true),
+		headerActive: lipgloss.NewStyle().Foreground(colAccent).Bold(true),
+
+		editLabel: lipgloss.NewStyle().Foreground(colAccent).Bold(true),
+		editError: lipgloss.NewStyle().Foreground(colErr),
+
+		dot:          lipgloss.NewStyle().Foreground(colOK),
+		dotDimmed:    lipgloss.NewStyle().Foreground(colMuted),
+		sensitiveTag: lipgloss.NewStyle().Foreground(colSensitive).Italic(true),
+	}
+}
diff --git a/cli/internal/config/tui/tui.go b/cli/internal/config/tui/tui.go
new file mode 100644
index 0000000..70ddb31
--- /dev/null
+++ b/cli/internal/config/tui/tui.go
@@ -0,0 +1,57 @@
+// Package tui implements the interactive configuration editor used by
+// `cix config edit` and `cix config init`.
+//
+// The editor is a full-screen bubbletea program — Elm-style state
+// machine, lipgloss for styling, bubbles/textinput for inline edits.
+// Layout is lazygit-inspired: section list on the left, content panel
+// on the right, persistent help bar at the bottom.
+//
+// Every edit goes through the existing config.SetByPath / SetServerURL /
+// SetServerKey paths — the TUI never touches the YAML directly, so
+// validation, schema rules, and persistence are exactly what the CLI's
+// `cix config set` would apply.
+package tui
+
+import (
+	"fmt"
+
+	tea "github.com/charmbracelet/bubbletea"
+
+	"github.com/anthropics/code-index/cli/internal/client"
+	"github.com/anthropics/code-index/cli/internal/config"
+)
+
+// RunEdit boots the TUI against the current config. cfg is mutated via
+// the config package's CRUD helpers — every successful keystroke writes
+// straight to disk, so the function never needs an explicit Save step.
+func RunEdit(cfg *config.Config) error {
+	model := NewModel(cfg)
+	p := tea.NewProgram(model, tea.WithAltScreen())
+	_, err := p.Run()
+	return err
+}
+
+// RunInit is the fresh-install variant. If no servers exist yet we seed
+// the implicit localhost default so the user has something to edit on
+// the first screen, then hand off to the standard editor.
+func RunInit(cfg *config.Config) error {
+	if len(cfg.Servers) == 0 {
+		if err := config.SetServerURL(config.DefaultServerName, "http://localhost:21847"); err != nil {
+			return fmt.Errorf("seed default server: %w", err)
+		}
+	}
+	return RunEdit(cfg)
+}
+
+// PingServer is a thin wrapper around client.Health used by the "test
+// connection" action ('t' on a server row). Exported so the test file
+// can exercise the error-mapping path without spinning up bubbletea.
+func PingServer(url, key string) error {
+	if url == "" {
+		return fmt.Errorf("URL is required")
+	}
+	if key == "" {
+		return fmt.Errorf("API key is required")
+	}
+	return client.New(url, key).Health()
+}
diff --git a/cli/internal/config/tui/update.go b/cli/internal/config/tui/update.go
new file mode 100644
index 0000000..1541032
--- /dev/null
+++ b/cli/internal/config/tui/update.go
@@ -0,0 +1,432 @@
+package tui
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+
+	"github.com/charmbracelet/bubbles/key"
+	"github.com/charmbracelet/bubbles/textinput"
+	tea "github.com/charmbracelet/bubbletea"
+
+	"github.com/anthropics/code-index/cli/internal/config"
+)
+
+// Init satisfies tea.Model — nothing to fire at startup.
+func (m Model) Init() tea.Cmd { return nil }
+
+// Update is the central message handler. bubbletea calls it once per
+// input event (key, resize, custom message). Every transition goes
+// through here — the View() function is a pure read of the Model.
+func (m Model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
+	switch msg := msg.(type) {
+	case tea.WindowSizeMsg:
+		m.width, m.height = msg.Width, msg.Height
+		return m, nil
+	case tea.KeyMsg:
+		return m.handleKey(msg)
+	}
+	return m, nil
+}
+
+func (m Model) handleKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
+	// In edit mode (scalar or server), most keys go to the text input.
+	// Esc cancels, Enter commits.
+	if m.editing {
+		return m.handleEditKey(msg)
+	}
+
+	// "Add server" flow uses the same text input but a different commit path.
+	if m.addingServer {
+		return m.handleAddKey(msg)
+	}
+
+	// Help overlay traps every key — any input dismisses it.
+	if m.showHelp {
+		m.showHelp = false
+		return m, nil
+	}
+
+	m.clearStatus()
+
+	switch {
+	case key.Matches(msg, m.keys.Quit):
+		m.quitting = true
+		return m, tea.Quit
+	case key.Matches(msg, m.keys.Help):
+		m.showHelp = true
+		return m, nil
+	case key.Matches(msg, m.keys.NextPanel):
+		if m.active == panelLeft {
+			m.active = panelRight
+		} else {
+			m.active = panelLeft
+		}
+		return m, nil
+	case key.Matches(msg, m.keys.Left):
+		m.active = panelLeft
+		return m, nil
+	case key.Matches(msg, m.keys.Right):
+		m.active = panelRight
+		return m, nil
+	case key.Matches(msg, m.keys.Up):
+		return m.moveSelection(-1), nil
+	case key.Matches(msg, m.keys.Down):
+		return m.moveSelection(+1), nil
+	case key.Matches(msg, m.keys.Enter):
+		return m.activateRow()
+	case key.Matches(msg, m.keys.Toggle):
+		return m.toggleRow()
+	case key.Matches(msg, m.keys.Add):
+		if sectionID(m.sectionIdx) == secServers {
+			return m.beginAddServer()
+		}
+		return m, nil
+	case key.Matches(msg, m.keys.Delete):
+		if sectionID(m.sectionIdx) == secServers && m.active == panelRight {
+			return m.deleteSelectedServer()
+		}
+		return m, nil
+	case key.Matches(msg, m.keys.MarkDef):
+		if sectionID(m.sectionIdx) == secServers && m.active == panelRight {
+			return m.markDefaultServer()
+		}
+		return m, nil
+	case key.Matches(msg, m.keys.Test):
+		if sectionID(m.sectionIdx) == secServers && m.active == panelRight {
+			return m.testSelectedServer()
+		}
+		return m, nil
+	}
+	return m, nil
+}
+
+func (m Model) moveSelection(delta int) Model {
+	if m.active == panelLeft {
+		m.sectionIdx = clamp(m.sectionIdx+delta, 0, int(numSections)-1)
+		m.rowIdx = 0
+	} else {
+		n := m.numRowsRight()
+		if n == 0 {
+			m.rowIdx = 0
+		} else {
+			m.rowIdx = clamp(m.rowIdx+delta, 0, n-1)
+		}
+	}
+	return m
+}
+
+func (m Model) activateRow() (tea.Model, tea.Cmd) {
+	// Enter on left panel == "focus right panel".
+	if m.active == panelLeft {
+		m.active = panelRight
+		m.rowIdx = 0
+		return m, nil
+	}
+	rows := rowsFor(m.cfg, sectionID(m.sectionIdx))
+	if m.rowIdx >= len(rows) {
+		return m, nil
+	}
+	r := rows[m.rowIdx]
+	switch r.kind {
+	case rowKindBoolToggle:
+		return m.toggleBoolByPath(r.schemaKey)
+	case rowKindScalarEdit:
+		return m.beginEditScalar(r.schemaKey)
+	case rowKindServerEdit:
+		return m.beginEditServer(r.serverIdx, editPurposeServerURL)
+	case rowKindDefaultPick:
+		return m.cycleDefaultServer()
+	}
+	return m, nil
+}
+
+func (m Model) toggleRow() (tea.Model, tea.Cmd) {
+	if m.active != panelRight {
+		return m, nil
+	}
+	rows := rowsFor(m.cfg, sectionID(m.sectionIdx))
+	if m.rowIdx >= len(rows) {
+		return m, nil
+	}
+	if rows[m.rowIdx].kind == rowKindBoolToggle {
+		return m.toggleBoolByPath(rows[m.rowIdx].schemaKey)
+	}
+	return m, nil
+}
+
+func (m Model) toggleBoolByPath(path string) (tea.Model, tea.Cmd) {
+	leaf, ok := findLeaf(m.cfg, path)
+	if !ok {
+		m.setStatus(fmt.Sprintf("unknown key %q", path), false)
+		return m, nil
+	}
+	newVal := !leaf.Value.Bool()
+	if err := config.SetByPath(path, strconv.FormatBool(newVal)); err != nil {
+		m.setStatus(err.Error(), false)
+		return m, nil
+	}
+	m.setStatus(fmt.Sprintf("✓ %s = %v", path, newVal), true)
+	return m, nil
+}
+
+func (m Model) beginEditScalar(path string) (tea.Model, tea.Cmd) {
+	leaf, ok := findLeaf(m.cfg, path)
+	if !ok {
+		m.setStatus(fmt.Sprintf("unknown key %q", path), false)
+		return m, nil
+	}
+	m.editing = true
+	m.editPurpose = editPurposeScalar
+	m.editKey = path
+	m.editErr = ""
+	m.editInput.SetValue(rawScalarValue(leaf))
+	m.editInput.CursorEnd()
+	if leaf.Sensitive() {
+		m.editInput.EchoMode = textinput.EchoPassword
+	} else {
+		m.editInput.EchoMode = textinput.EchoNormal
+	}
+	m.editInput.Focus()
+	return m, nil
+}
+
+func (m Model) beginEditServer(idx int, purpose editPurpose) (tea.Model, tea.Cmd) {
+	if idx < 0 || idx >= len(m.cfg.Servers) {
+		return m, nil
+	}
+	m.editing = true
+	m.editPurpose = purpose
+	m.editServer = idx
+	m.editErr = ""
+	current := ""
+	if purpose == editPurposeServerURL {
+		current = m.cfg.Servers[idx].URL
+		m.editInput.EchoMode = textinput.EchoNormal
+	} else {
+		// Editing the key — never pre-fill, never echo.
+		current = ""
+		m.editInput.EchoMode = textinput.EchoPassword
+	}
+	m.editInput.SetValue(current)
+	m.editInput.CursorEnd()
+	m.editInput.Focus()
+	return m, nil
+}
+
+func (m Model) cycleDefaultServer() (tea.Model, tea.Cmd) {
+	if len(m.cfg.Servers) == 0 {
+		return m, nil
+	}
+	// Find current and pick the next one.
+	curIdx := 0
+	for i, s := range m.cfg.Servers {
+		if s.Name == m.cfg.DefaultServer {
+			curIdx = i
+			break
+		}
+	}
+	next := m.cfg.Servers[(curIdx+1)%len(m.cfg.Servers)].Name
+	if err := config.SetDefaultServer(next); err != nil {
+		m.setStatus(err.Error(), false)
+		return m, nil
+	}
+	m.setStatus(fmt.Sprintf("default_server = %s", next), true)
+	return m, nil
+}
+
+func (m Model) markDefaultServer() (tea.Model, tea.Cmd) {
+	if m.rowIdx >= len(m.cfg.Servers) {
+		return m, nil
+	}
+	name := m.cfg.Servers[m.rowIdx].Name
+	if err := config.SetDefaultServer(name); err != nil {
+		m.setStatus(err.Error(), false)
+		return m, nil
+	}
+	m.setStatus(fmt.Sprintf("default_server = %s", name), true)
+	return m, nil
+}
+
+func (m Model) deleteSelectedServer() (tea.Model, tea.Cmd) {
+	if m.rowIdx >= len(m.cfg.Servers) {
+		return m, nil
+	}
+	name := m.cfg.Servers[m.rowIdx].Name
+	reassigned, err := config.RemoveServer(name)
+	if err != nil {
+		m.setStatus(err.Error(), false)
+		return m, nil
+	}
+	msg := fmt.Sprintf("removed server %q", name)
+	if reassigned != "" {
+		msg += fmt.Sprintf("; default → %q", reassigned)
+	}
+	m.setStatus(msg, true)
+	m.clampRow()
+	return m, nil
+}
+
+func (m Model) testSelectedServer() (tea.Model, tea.Cmd) {
+	if m.rowIdx >= len(m.cfg.Servers) {
+		return m, nil
+	}
+	s := m.cfg.Servers[m.rowIdx]
+	if err := PingServer(s.URL, s.Key); err != nil {
+		m.setStatus(fmt.Sprintf("✗ %s: %v", s.Name, err), false)
+		return m, nil
+	}
+	m.setStatus(fmt.Sprintf("✓ %s reachable", s.Name), true)
+	return m, nil
+}
+
+func (m Model) beginAddServer() (tea.Model, tea.Cmd) {
+	m.addingServer = true
+	m.addStep = 0
+	m.addName = ""
+	m.addURL = ""
+	m.editInput.SetValue("")
+	m.editInput.EchoMode = textinput.EchoNormal
+	m.editInput.Focus()
+	return m, nil
+}
+
+func (m Model) handleEditKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
+	switch msg.String() {
+	case "esc":
+		m.editing = false
+		m.editErr = ""
+		m.editInput.Blur()
+		m.setStatus("edit cancelled", false)
+		return m, nil
+	case "enter":
+		return m.commitEdit()
+	}
+	var cmd tea.Cmd
+	m.editInput, cmd = m.editInput.Update(msg)
+	return m, cmd
+}
+
+func (m Model) commitEdit() (tea.Model, tea.Cmd) {
+	value := strings.TrimRight(m.editInput.Value(), "\r\n")
+	switch m.editPurpose {
+	case editPurposeScalar:
+		if err := config.SetByPath(m.editKey, value); err != nil {
+			m.editErr = err.Error()
+			return m, nil
+		}
+		m.editing = false
+		m.editInput.Blur()
+		m.setStatus(fmt.Sprintf("✓ %s saved", m.editKey), true)
+		return m, nil
+	case editPurposeServerURL:
+		if m.editServer < 0 || m.editServer >= len(m.cfg.Servers) {
+			m.editing = false
+			return m, nil
+		}
+		name := m.cfg.Servers[m.editServer].Name
+		if err := config.SetServerURL(name, value); err != nil {
+			m.editErr = err.Error()
+			return m, nil
+		}
+		// Chain: after URL, prompt for key.
+		m.editPurpose = editPurposeServerKey
+		m.editInput.SetValue("")
+		m.editInput.EchoMode = textinput.EchoPassword
+		m.editErr = ""
+		return m, nil
+	case editPurposeServerKey:
+		if m.editServer < 0 || m.editServer >= len(m.cfg.Servers) {
+			m.editing = false
+			return m, nil
+		}
+		name := m.cfg.Servers[m.editServer].Name
+		if value == "" {
+			// Empty input on the key step means "skip — keep current".
+			m.editing = false
+			m.editInput.Blur()
+			m.setStatus(fmt.Sprintf("✓ %s URL updated", name), true)
+			return m, nil
+		}
+		if err := config.SetServerKey(name, value); err != nil {
+			m.editErr = err.Error()
+			return m, nil
+		}
+		m.editing = false
+		m.editInput.Blur()
+		m.setStatus(fmt.Sprintf("✓ %s URL+key updated", name), true)
+		return m, nil
+	}
+	m.editing = false
+	return m, nil
+}
+
+func (m Model) handleAddKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
+	switch msg.String() {
+	case "esc":
+		m.addingServer = false
+		m.editInput.Blur()
+		m.setStatus("add cancelled", false)
+		return m, nil
+	case "enter":
+		return m.commitAddStep()
+	}
+	var cmd tea.Cmd
+	m.editInput, cmd = m.editInput.Update(msg)
+	return m, cmd
+}
+
+func (m Model) commitAddStep() (tea.Model, tea.Cmd) {
+	value := strings.TrimSpace(m.editInput.Value())
+	switch m.addStep {
+	case 0: // name
+		if value == "" {
+			m.editErr = "name must not be empty"
+			return m, nil
+		}
+		m.addName = value
+		m.addStep = 1
+		m.editInput.SetValue("")
+		m.editInput.EchoMode = textinput.EchoNormal
+		m.editErr = ""
+		return m, nil
+	case 1: // url
+		if value == "" {
+			m.editErr = "URL must not be empty"
+			return m, nil
+		}
+		m.addURL = value
+		m.addStep = 2
+		m.editInput.SetValue("")
+		m.editInput.EchoMode = textinput.EchoPassword
+		m.editErr = ""
+		return m, nil
+	case 2: // key — optional; empty means "add server with no key"
+		if err := config.SetServerURL(m.addName, m.addURL); err != nil {
+			m.editErr = err.Error()
+			return m, nil
+		}
+		if value != "" {
+			if err := config.SetServerKey(m.addName, value); err != nil {
+				m.editErr = err.Error()
+				return m, nil
+			}
+		}
+		m.addingServer = false
+		m.editInput.Blur()
+		m.setStatus(fmt.Sprintf("✓ added server %q", m.addName), true)
+		return m, nil
+	}
+	return m, nil
+}
+
+// clamp keeps n inside [lo, hi]. Stdlib doesn't ship one yet.
+func clamp(n, lo, hi int) int {
+	if n < lo {
+		return lo
+	}
+	if n > hi {
+		return hi
+	}
+	return n
+}
diff --git a/cli/internal/config/tui/view.go b/cli/internal/config/tui/view.go
new file mode 100644
index 0000000..53b5cf3
--- /dev/null
+++ b/cli/internal/config/tui/view.go
@@ -0,0 +1,283 @@
+package tui
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/charmbracelet/lipgloss"
+
+	"github.com/anthropics/code-index/cli/internal/config"
+)
+
+// View renders the full screen. bubbletea calls this once per Update.
+//
+// Layout:
+//
+//	┌─ sections ─┬─ <selected section> ─────────────┐
+//	│ ▶ Servers  │  ...rows...                       │
+//	│   Watcher  │                                   │
+//	│   …        │                                   │
+//	└────────────┴───────────────────────────────────┘
+//	 status / edit prompt
+//	 short-help bar (always visible)
+//
+// The help overlay (when shown) replaces the body but keeps the bars
+// underneath for context.
+func (m Model) View() string {
+	if m.quitting {
+		return ""
+	}
+	if m.width == 0 || m.height == 0 {
+		return "initializing…"
+	}
+
+	bodyH := m.height - 3 // 1 status, 1 short-help, 1 spacing
+	if bodyH < 4 {
+		bodyH = 4
+	}
+
+	if m.showHelp {
+		return m.renderHelp(bodyH)
+	}
+
+	leftW := 22
+	rightW := m.width - leftW - 4 // borders + padding
+	if rightW < 20 {
+		rightW = 20
+	}
+
+	left := m.renderSections(leftW, bodyH)
+	right := m.renderRightPanel(rightW, bodyH)
+
+	body := lipgloss.JoinHorizontal(lipgloss.Top, left, right)
+
+	statusLine := m.renderStatusOrEdit(m.width)
+	helpLine := m.renderShortHelp(m.width)
+
+	return lipgloss.JoinVertical(lipgloss.Left, body, statusLine, helpLine)
+}
+
+// renderSections draws the left panel — the list of section names with
+// the current selection highlighted. Width and height are the inner box
+// size; borders add 2 each.
+func (m Model) renderSections(w, h int) string {
+	var lines []string
+	for i := 0; i < int(numSections); i++ {
+		s := sectionID(i)
+		marker := "  "
+		if i == m.sectionIdx {
+			if m.active == panelLeft {
+				marker = "▶ "
+			} else {
+				marker = "● "
+			}
+		}
+		count := sectionCount(m.cfg, s)
+		label := fmt.Sprintf("%s%-9s %s", marker, s.Label(), m.styles.sectionCount.Render(countLabel(count)))
+		if i == m.sectionIdx {
+			lines = append(lines, m.styles.sectionRowSel.Render(label))
+		} else {
+			lines = append(lines, m.styles.sectionRow.Render(label))
+		}
+	}
+	for len(lines) < h-2 {
+		lines = append(lines, "")
+	}
+
+	style := m.styles.leftPanel
+	if m.active == panelLeft {
+		style = m.styles.leftPanelActive
+	}
+	return style.Width(w).Height(h).Render(strings.Join(lines, "\n"))
+}
+
+// sectionCount returns the number that appears as a small "n" badge next
+// to the section label — total items for lists, total tunable fields for
+// scalar groups. Cheap to recompute on every View() call.
+func sectionCount(cfg *config.Config, s sectionID) int {
+	switch s {
+	case secServers:
+		return len(cfg.Servers)
+	case secProjects:
+		return len(cfg.Projects)
+	case secWatcher:
+		return 4
+	case secIndexing:
+		return 2
+	}
+	return 0
+}
+
+func countLabel(n int) string {
+	if n <= 0 {
+		return ""
+	}
+	return fmt.Sprintf("%d", n)
+}
+
+// renderRightPanel draws the content panel for the selected section.
+func (m Model) renderRightPanel(w, h int) string {
+	rows := rowsFor(m.cfg, sectionID(m.sectionIdx))
+
+	title := m.styles.header.Render(sectionID(m.sectionIdx).Label())
+	if m.active == panelRight {
+		title = m.styles.headerActive.Render(sectionID(m.sectionIdx).Label())
+	}
+
+	var lines []string
+	lines = append(lines, title, "")
+
+	if len(rows) == 0 {
+		lines = append(lines, m.styles.rowMuted.Render(m.emptySectionHint()))
+	}
+
+	keyW := keyColumnWidth(rows)
+	for i, r := range rows {
+		selected := i == m.rowIdx && m.active == panelRight
+
+		keyText := r.label
+		valText := r.value
+		if r.sensitive && !selected {
+			valText = m.styles.sensitiveTag.Render(valText)
+		}
+
+		var line string
+		switch {
+		case selected:
+			line = m.styles.rowKeySel.Render(padRight(keyText, keyW)) +
+				m.styles.rowValSel.Render("  "+valText)
+		default:
+			line = m.styles.rowKey.Render(padRight(keyText, keyW)) +
+				"  " + m.styles.rowValue.Render(valText)
+		}
+		lines = append(lines, line)
+	}
+
+	// Section-specific footer (action hints).
+	footer := m.sectionFooter()
+	if footer != "" {
+		// Pad up so the footer pins to the bottom.
+		for len(lines) < h-3 {
+			lines = append(lines, "")
+		}
+		lines = append(lines, m.styles.rowMuted.Render(footer))
+	}
+
+	style := m.styles.rightPanel
+	if m.active == panelRight {
+		style = m.styles.rightPanelActive
+	}
+	return style.Width(w).Height(h).Render(strings.Join(lines, "\n"))
+}
+
+func (m Model) emptySectionHint() string {
+	switch sectionID(m.sectionIdx) {
+	case secServers:
+		return "no servers configured — press 'a' to add one"
+	case secProjects:
+		return "no projects — register one with `cix init`"
+	}
+	return ""
+}
+
+func (m Model) sectionFooter() string {
+	switch sectionID(m.sectionIdx) {
+	case secServers:
+		return "enter edit · a add · d delete · m mark default · t test"
+	case secWatcher, secIndexing:
+		return "enter edit · space toggle bool"
+	case secProjects:
+		return "managed via `cix init` / dashboard"
+	}
+	return ""
+}
+
+func keyColumnWidth(rows []row) int {
+	w := 12
+	for _, r := range rows {
+		if l := lipgloss.Width(r.label); l > w {
+			w = l
+		}
+	}
+	if w > 32 {
+		w = 32
+	}
+	return w
+}
+
+func padRight(s string, w int) string {
+	if lipgloss.Width(s) >= w {
+		return s
+	}
+	return s + strings.Repeat(" ", w-lipgloss.Width(s))
+}
+
+// renderStatusOrEdit shows either the active edit input or the last
+// status message. Edit takes precedence — there's no confusion about
+// where keystrokes are going.
+func (m Model) renderStatusOrEdit(w int) string {
+	if m.editing {
+		label := m.editLabel()
+		line := m.styles.editLabel.Render(label+":") + " " + m.editInput.View()
+		if m.editErr != "" {
+			line += "  " + m.styles.editError.Render(m.editErr)
+		}
+		return line
+	}
+	if m.addingServer {
+		label := []string{"name", "URL", "API key (optional)"}[m.addStep]
+		line := m.styles.editLabel.Render("add server "+label+":") + " " + m.editInput.View()
+		if m.editErr != "" {
+			line += "  " + m.styles.editError.Render(m.editErr)
+		}
+		return line
+	}
+	if m.statusMsg != "" {
+		style := m.styles.statusOK
+		if m.statusErr {
+			style = m.styles.statusErr
+		}
+		return style.Render(m.statusMsg)
+	}
+	return ""
+}
+
+func (m Model) editLabel() string {
+	switch m.editPurpose {
+	case editPurposeScalar:
+		return m.editKey
+	case editPurposeServerURL:
+		if m.editServer < len(m.cfg.Servers) {
+			return m.cfg.Servers[m.editServer].Name + " URL"
+		}
+	case editPurposeServerKey:
+		if m.editServer < len(m.cfg.Servers) {
+			return m.cfg.Servers[m.editServer].Name + " API key (empty=keep)"
+		}
+	}
+	return "edit"
+}
+
+// renderShortHelp is the always-on bottom hint bar.
+func (m Model) renderShortHelp(_ int) string {
+	parts := []string{}
+	for _, b := range m.keys.shortHelp() {
+		parts = append(parts, fmt.Sprintf("%s %s", b.Help().Key, b.Help().Desc))
+	}
+	return m.styles.statusBar.Render(strings.Join(parts, "  ·  "))
+}
+
+// renderHelp shows the full key table when ? is pressed.
+func (m Model) renderHelp(bodyH int) string {
+	var b strings.Builder
+	b.WriteString(m.styles.headerActive.Render("Keybindings") + "\n\n")
+	for _, group := range m.keys.fullHelp() {
+		for _, k := range group {
+			b.WriteString(fmt.Sprintf("  %-12s  %s\n",
+				m.styles.statusKey.Render(k.Help().Key), k.Help().Desc))
+		}
+		b.WriteString("\n")
+	}
+	b.WriteString(m.styles.statusBar.Render("press any key to dismiss"))
+	return lipgloss.Place(m.width, bodyH, lipgloss.Center, lipgloss.Center, b.String())
+}
diff --git a/cli/internal/config/validator.go b/cli/internal/config/validator.go
new file mode 100644
index 0000000..259db80
--- /dev/null
+++ b/cli/internal/config/validator.go
@@ -0,0 +1,94 @@
+package config
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/go-playground/validator/v10"
+)
+
+// validate is the singleton validator. Reused across calls so its tag cache
+// is hot and so external code can register custom tags once.
+var validate = validator.New(validator.WithRequiredStructEnabled())
+
+// Validate runs struct-tag validation on cfg and returns a friendly,
+// dotted-key error if any field fails. Returns nil when every constraint is
+// satisfied. Slice elements are NOT validated transitively in this pass —
+// add `dive` to the relevant slice field's tag if/when that's wanted.
+//
+// Validate is NOT called by Load() — a malformed value in an existing on-
+// disk config should not brick the CLI. Validate is the gate for
+// mutations: `cix config set`, the TUI's per-field error rendering, and
+// any future `cix config doctor` will all call it.
+func Validate(cfg *Config) error {
+	if cfg == nil {
+		return fmt.Errorf("nil config")
+	}
+	err := validate.Struct(cfg)
+	if err == nil {
+		return nil
+	}
+	var verrs validator.ValidationErrors
+	if !asValidationErrors(err, &verrs) {
+		return err
+	}
+	parts := make([]string, 0, len(verrs))
+	for _, ve := range verrs {
+		parts = append(parts, formatFieldError(ve))
+	}
+	return fmt.Errorf("config validation failed: %s", strings.Join(parts, "; "))
+}
+
+// asValidationErrors is a tiny helper that avoids importing errors.As at
+// every call site and keeps the type assertion explicit.
+func asValidationErrors(err error, out *validator.ValidationErrors) bool {
+	if ve, ok := err.(validator.ValidationErrors); ok {
+		*out = ve
+		return true
+	}
+	return false
+}
+
+// formatFieldError turns a validator FieldError into a user-readable line.
+// The validator reports paths in Go-struct form (e.g. Watcher.DebounceMS);
+// callers usually want the dotted YAML/key form (e.g. watcher.debounce_ms).
+// Translation is best-effort via a small hand-written map — keeping it
+// hard-coded avoids paying for another reflect walk on every error.
+func formatFieldError(ve validator.FieldError) string {
+	key := goPathToKey(ve.Namespace())
+	switch ve.Tag() {
+	case "min":
+		return fmt.Sprintf("%s must be ≥ %s (got %v)", key, ve.Param(), ve.Value())
+	case "max":
+		return fmt.Sprintf("%s must be ≤ %s (got %v)", key, ve.Param(), ve.Value())
+	case "url":
+		return fmt.Sprintf("%s must be a valid URL (got %q)", key, ve.Value())
+	case "required":
+		return fmt.Sprintf("%s is required", key)
+	default:
+		return fmt.Sprintf("%s failed %q (got %v)", key, ve.Tag(), ve.Value())
+	}
+}
+
+// goPathToKey maps validator's Namespace ("Config.Watcher.DebounceMS") onto
+// the user-facing dotted key ("watcher.debounce_ms"). Best-effort: unknown
+// paths are lowercased.
+var goPathToKey = func(ns string) string {
+	switch ns {
+	case "Config.Watcher.Enabled":
+		return "watcher.enabled"
+	case "Config.Watcher.DebounceMS":
+		return "watcher.debounce_ms"
+	case "Config.Watcher.ExcludePatterns":
+		return "watcher.exclude"
+	case "Config.Watcher.SyncIntervalMins":
+		return "watcher.sync_interval_mins"
+	case "Config.Indexing.BatchSize":
+		return "indexing.batch_size"
+	case "Config.Indexing.StreamingIdleTimeoutSec":
+		return "indexing.streaming_idle_timeout_sec"
+	case "Config.DefaultServer":
+		return "default_server"
+	}
+	return strings.ToLower(ns)
+}
diff --git a/cli/internal/config/validator_test.go b/cli/internal/config/validator_test.go
new file mode 100644
index 0000000..920f8be
--- /dev/null
+++ b/cli/internal/config/validator_test.go
@@ -0,0 +1,107 @@
+package config
+
+import (
+	"strings"
+	"testing"
+)
+
+// TestValidate_OK exercises every default value to confirm the baseline
+// config passes validation. Failure here means a `default:"…"` tag is
+// outside its own `validate:"…"` range — an internal contradiction in the
+// schema, not a user error.
+func TestValidate_OK(t *testing.T) {
+	cfg := &Config{
+		Servers: []ServerEntry{
+			{Name: "default", URL: "http://localhost:21847", Key: "k"},
+		},
+		DefaultServer: "default",
+		Watcher: WatcherConfig{
+			Enabled:          true,
+			DebounceMS:       5000,
+			ExcludePatterns:  []string{".git"},
+			SyncIntervalMins: 5,
+		},
+		Indexing: IndexingConfig{
+			BatchSize:               20,
+			StreamingIdleTimeoutSec: 30,
+		},
+	}
+	if err := Validate(cfg); err != nil {
+		t.Errorf("Validate on canonical defaults: %v", err)
+	}
+}
+
+func TestValidate_Failures(t *testing.T) {
+	cases := []struct {
+		name string
+		mut  func(*Config)
+		// expectKey is the dotted-key fragment we expect in the error
+		// message; if absent we know goPathToKey is wrong.
+		expectKey string
+	}{
+		{
+			name:      "debounce too low",
+			mut:       func(c *Config) { c.Watcher.DebounceMS = 50 },
+			expectKey: "watcher.debounce_ms",
+		},
+		{
+			name:      "debounce too high",
+			mut:       func(c *Config) { c.Watcher.DebounceMS = 999999 },
+			expectKey: "watcher.debounce_ms",
+		},
+		{
+			name:      "sync interval zero",
+			mut:       func(c *Config) { c.Watcher.SyncIntervalMins = 0 },
+			expectKey: "watcher.sync_interval_mins",
+		},
+		{
+			name:      "batch size zero",
+			mut:       func(c *Config) { c.Indexing.BatchSize = 0 },
+			expectKey: "indexing.batch_size",
+		},
+		{
+			name:      "streaming idle negative",
+			mut:       func(c *Config) { c.Indexing.StreamingIdleTimeoutSec = -1 },
+			expectKey: "indexing.streaming_idle_timeout_sec",
+		},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			cfg := validBaseConfig()
+			tc.mut(cfg)
+			err := Validate(cfg)
+			if err == nil {
+				t.Fatalf("expected validation error, got nil")
+			}
+			if !strings.Contains(err.Error(), tc.expectKey) {
+				t.Errorf("error %q does not reference key %q", err, tc.expectKey)
+			}
+		})
+	}
+}
+
+func TestValidate_NilConfig(t *testing.T) {
+	if err := Validate(nil); err == nil {
+		t.Errorf("expected error on nil config")
+	}
+}
+
+func validBaseConfig() *Config {
+	return &Config{
+		Servers: []ServerEntry{
+			{Name: "default", URL: "http://localhost:21847"},
+		},
+		DefaultServer: "default",
+		Watcher: WatcherConfig{
+			Enabled:          true,
+			DebounceMS:       5000,
+			ExcludePatterns:  []string{".git"},
+			SyncIntervalMins: 5,
+		},
+		Indexing: IndexingConfig{
+			BatchSize:               20,
+			StreamingIdleTimeoutSec: 30,
+		},
+	}
+}
diff --git a/doc/CLI_CONFIG.md b/doc/CLI_CONFIG.md
new file mode 100644
index 0000000..eef97b9
--- /dev/null
+++ b/doc/CLI_CONFIG.md
@@ -0,0 +1,108 @@
+# `cix` CLI configuration — reference
+
+Comprehensive reference for everything the `cix` CLI lets you configure.
+For a quick tour see [`cli/README.md`](../cli/README.md#run-against-a-server).
+
+## File location
+
+`~/.cix/config.yaml` — created on first `cix config set …` /
+`cix config init`. The CLI seeds an implicit `default` server pointing at
+`http://localhost:21847` if no file exists yet, but does not materialise
+that to disk until the user writes something.
+
+## Precedence
+
+```
+CLI flag (--server / --api-url / --api-key)
+        ↓
+Environment variable (CIX_SERVER / CIX_API_URL / CIX_API_KEY)
+        ↓
+~/.cix/config.yaml
+        ↓
+Built-in default (struct-tag default:"…")
+```
+
+Env overrides never write back to disk. The 3 env vars are the entire
+env surface — knobs like `watcher.debounce_ms` are persistent
+preferences and have no env binding.
+
+## Commands
+
+| Command                                | Purpose |
+|----------------------------------------|---------|
+| `cix config show`                      | Human-readable dump of the current configuration |
+| `cix config keys`                      | List every settable key with default, env, description |
+| `cix config set <key> <value>`         | Set one key — supports scalars + comma-separated lists |
+| `cix config unset server.<name>[.key]` | Remove a server entry or just clear its key |
+| `cix config edit`                      | Interactive TUI form (huh) for the whole file |
+| `cix config init`                      | First-run wizard — same form, pre-seeded for fresh installs |
+| `cix config path`                      | Print the file path (useful in scripts) |
+
+## Keys
+
+### Server selection
+
+| Key                       | Type   | Default                      | Description |
+|---------------------------|--------|------------------------------|-------------|
+| `servers`                 | list   | `[{default → localhost}]`    | Managed via `cix config set server.<name>.url|key` |
+| `default_server`          | string | `default`                    | Active alias when `--server`/`CIX_SERVER` are unset |
+| `server.<name>.url`       | string | —                            | URL of a named server (creates the entry on first set) |
+| `server.<name>.key`       | string | —                            | API key for the named server (sensitive — never printed) |
+| `api.url` / `api.key`     | string | —                            | Legacy aliases — operate on the default server |
+
+### File watcher
+
+| Key                            | Type     | Default                                                                | Validation |
+|--------------------------------|----------|------------------------------------------------------------------------|------------|
+| `watcher.enabled`              | bool     | `true`                                                                 | — |
+| `watcher.debounce_ms`          | int      | `5000`                                                                 | 100 — 60000 |
+| `watcher.sync_interval_mins`   | int      | `5`                                                                    | ≥ 1 |
+| `watcher.exclude`              | []string | `node_modules,.git,.venv,__pycache__,dist,build,.next,.cache,.DS_Store` | comma-separated; REPLACE semantics on set |
+
+### Indexing
+
+| Key                                    | Type | Default | Validation | Description |
+|----------------------------------------|------|---------|------------|-------------|
+| `indexing.batch_size`                  | int  | `20`    | ≥ 1        | Per-batch file count for the upload pipeline |
+| `indexing.streaming_idle_timeout_sec`  | int  | `30`    | ≥ 0        | Max silence on streaming `/index/files` before giving up; 0 disables |
+
+### Projects
+
+| Key                        | Type        | Description |
+|----------------------------|-------------|-------------|
+| `projects`                 | list        | Managed via `cix init` / dashboard — not editable via `config set` |
+
+## Env vars
+
+| Variable        | Overrides                       | Notes |
+|-----------------|---------------------------------|-------|
+| `CIX_SERVER`    | `default_server`                | Used only when `--server` is empty |
+| `CIX_API_URL`   | resolved server's `url`         | Local override; never persisted |
+| `CIX_API_KEY`   | resolved server's `key`         | Designed for `secrets.CIX_API_KEY` in CI |
+
+## Implementation notes
+
+- **Single source of truth**: every key, default, validation rule, and
+  description lives on the corresponding Go struct field as a tag
+  (`yaml`, `key`, `default`, `validate`, `env`, `desc`, `sensitive`).
+  All five surfaces — load, save, show, set, TUI — read from this
+  schema via reflection.
+- **Loader**: [`knadh/koanf v2`](https://github.com/knadh/koanf) layers
+  defaults (from tags) and the YAML file. Legacy lowercase keys
+  (`debouncems`, `excludepatterns`, `cachettl`, `autowatch`, `batchsize`)
+  are normalised in-place pre-parse so old files keep loading. The
+  `api:` block is migrated into the multi-server `servers:` list and
+  cleared on the next save.
+- **Validation**: [`go-playground/validator/v10`](https://github.com/go-playground/validator)
+  validates the whole `Config` after every mutation via `cix config set`
+  and on TUI form submit. `Load()` itself does NOT validate — a
+  malformed value in an on-disk file must not brick the CLI; bad values
+  surface the next time the user tries to change something.
+- **TUI**: [`charmbracelet/huh`](https://github.com/charmbracelet/huh)
+  builds the paged form. The Charm stack (`huh` + `bubbletea` +
+  `lipgloss`) is the visual layer for any future TUI screens too.
+- **Sensitive fields**: `ServerEntry.Key` carries `sensitive:"true"`.
+  Renderers print `(set)` / `(not set)`, never the value. CodeQL's
+  `go/clear-text-logging` heuristic flags reads of `*Key`/`*Secret`
+  into named variables, so the tag is read off `reflect.StructField`
+  and the value goes through `reflect.Value.IsZero()` only.

From b7fbc8cab97823bc8257eed5f17d7342e1b91eaf Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Tue, 2 Jun 2026 10:58:50 +0100
Subject: [PATCH 30/34] docs(skills,plugin): document multi-server CLI usage

Add a "Servers" subsection to both copies of cix SKILL.md (skills/cix/
and plugins/cix/skills/cix/) and a "Targeting multiple cix servers"
block to plugins/cix/README.md. Explains the named-server / default_server
model, the --server flag, and the legacy api.url/api.key aliases, so
agents using the plugin know to use the default and only switch via
--server when the user names one.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 plugins/cix/README.md           | 20 +++++++++++++++++++
 plugins/cix/skills/cix/SKILL.md | 34 +++++++++++++++++++++++++++++++++
 skills/cix/SKILL.md             | 34 +++++++++++++++++++++++++++++++++
 3 files changed, 88 insertions(+)

diff --git a/plugins/cix/README.md b/plugins/cix/README.md
index 858abfd..b0b0ee8 100644
--- a/plugins/cix/README.md
+++ b/plugins/cix/README.md
@@ -116,6 +116,26 @@ docs.
 
 ## Configuration
 
+### Targeting multiple cix servers
+
+The bundled CLI supports more than one **named server** (e.g. a local
+box and a remote corporate server). One is the default; commands use it
+unless `--server <alias>` is passed:
+
+```bash
+cix config set server.corporate.url https://cix.corp.internal
+cix config set server.corporate.key <bearer-token>
+cix config set default_server corporate     # optional: make it the default
+cix --server corporate search "rate limiter"
+cix config show                              # lists servers; * marks the default
+```
+
+Legacy single-server config (`api.url` / `api.key`, `--api-url` /
+`--api-key`) still works and operates on the default server; old
+`~/.cix/config.yaml` files are migrated automatically. The `cix` skill
+(SKILL.md) documents this for the agent. Full reference:
+[`cli/README.md`](https://github.com/dvcdsys/code-index/blob/main/cli/README.md#multiple-servers).
+
 ### Where the bundled CLI is installed
 
 The wrapper installs `cix` to `~/.local/bin/cix` by default. To override
diff --git a/plugins/cix/skills/cix/SKILL.md b/plugins/cix/skills/cix/SKILL.md
index 10f2a48..3da8ed2 100644
--- a/plugins/cix/skills/cix/SKILL.md
+++ b/plugins/cix/skills/cix/SKILL.md
@@ -157,6 +157,40 @@ The watcher auto-reindexes on file change — manual `reindex` is rarely
 needed. `cix status` shows whether the watcher is running and the
 last-sync timestamp.
 
+### Servers — talk to more than one cix backend
+
+`cix` can be configured with several **named servers** (e.g. a local
+box and a remote corporate server). One is the **default**; every
+command targets the default unless you pass `--server <alias>`.
+
+```bash
+cix config show                                # lists servers; * marks the default
+cix --server corporate search "rate limiter"   # run any command against a named server
+cix search "rate limiter" --server corporate   # --server is global; either position works
+```
+
+Servers are managed through `cix config` (persisted in
+`~/.cix/config.yaml`):
+
+```bash
+cix config set server.corporate.url https://cix.corp.internal
+cix config set server.corporate.key <bearer-token>
+cix config set default_server corporate        # change which server is the default
+cix config unset server.corporate              # remove a server
+cix config unset server.corporate.key          # clear just its key
+```
+
+The legacy single-server keys still work and operate on the **default**
+server, so existing setups keep working unchanged:
+`cix config set api.url <url>` / `cix config set api.key <key>`. The
+`--api-url` / `--api-key` flags override the selected server's URL/key
+for a single invocation.
+
+**Agent rule:** use the default server (no flag) unless the user names a
+specific server. Only add `--server <alias>` when the task explicitly
+targets that named backend; never guess an alias — run `cix config show`
+to see the configured names if unsure.
+
 ---
 
 ## Search quality — what scores mean
diff --git a/skills/cix/SKILL.md b/skills/cix/SKILL.md
index 37cc432..227693d 100644
--- a/skills/cix/SKILL.md
+++ b/skills/cix/SKILL.md
@@ -133,6 +133,40 @@ The watcher auto-reindexes on file change — manual `reindex` is rarely
 needed. `cix status` shows whether the watcher is running and the
 last-sync timestamp.
 
+### Servers — talk to more than one cix backend
+
+`cix` can be configured with several **named servers** (e.g. a local
+box and a remote corporate server). One is the **default**; every
+command targets the default unless you pass `--server <alias>`.
+
+```bash
+cix config show                                # lists servers; * marks the default
+cix --server corporate search "rate limiter"   # run any command against a named server
+cix search "rate limiter" --server corporate   # --server is global; either position works
+```
+
+Servers are managed through `cix config` (persisted in
+`~/.cix/config.yaml`):
+
+```bash
+cix config set server.corporate.url https://cix.corp.internal
+cix config set server.corporate.key <bearer-token>
+cix config set default_server corporate        # change which server is the default
+cix config unset server.corporate              # remove a server
+cix config unset server.corporate.key          # clear just its key
+```
+
+The legacy single-server keys still work and operate on the **default**
+server, so existing setups keep working unchanged:
+`cix config set api.url <url>` / `cix config set api.key <key>`. The
+`--api-url` / `--api-key` flags override the selected server's URL/key
+for a single invocation.
+
+**Agent rule:** use the default server (no flag) unless the user names a
+specific server. Only add `--server <alias>` when the task explicitly
+targets that named backend; never guess an alias — run `cix config show`
+to see the configured names if unsure.
+
 ---
 
 ## Search quality — what scores mean

From f5d6e22a6f0276bc6c0f387da42b8e0cf1666368 Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Tue, 2 Jun 2026 11:55:41 +0100
Subject: [PATCH 31/34] =?UTF-8?q?fix(embeddings):=20resolve=20branch-revie?=
 =?UTF-8?q?w=20findings=20=E2=80=94=20switch=20atomicity,=20boot=20resilie?=
 =?UTF-8?q?nce,=20gate?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Addresses the full code review of feat/embedding-providers.

Blockers
- parity_test.go called the deleted Service.embedRaw, so `make test-gate`
  (the release embedding-parity gate) failed to compile. Restore via a
  build-tagged helper that reaches the live provider's EmbedRaw.
- SwitchProvider swapped the provider and the vector store as two
  non-atomic ops with the queue resumed in between, so a concurrent embed
  could write new-dimension vectors into the old provider's collection;
  on reopen failure it left a live new-provider / old-store pairing with
  no rollback. Now the queue stays blocked across both swaps (resumed via
  defer on every path) and a reopen failure rolls the provider swap back
  and stops the half-started provider — fail closed, never corrupt.
- Boot no longer hard-fails the whole server when the persisted provider's
  Start() (remote connect-test / sidecar spawn) fails. A bad build falls
  back to env ollama; a failed Start attaches the provider in a degraded,
  self-healing state. The HTTP server (dashboard/auth/all projects) stays
  up so an operator can recover instead of editing SQLite by hand.

Medium
- SwitchEmbeddingProvider now validates-then-persists: SwitchProvider runs
  first and only a successful swap is saved, so a transient switch failure
  can't leave the DB pointing at a provider that bricks the next boot.
- PrefixLegacyChromaDirs failure at boot is now fatal (was warn+continue),
  which silently orphaned the index behind a fresh empty namespace. Added
  vectorstore.WarnIfNamespaceOrphaned as a loud diagnostic when the active
  namespace is empty but sibling namespaces exist.
- GetSidecarStatus reported ready=false for healthy openai/voyage providers
  (StateRemote != "running"); treat StateRunning and StateRemote as ready.
- Restart now blocks the freshly-built queue during a concurrency-cap
  change so embeds get a clean 503 during the sidecar respawn instead of
  ErrSupervisor from a transiently-nil provider.

Minor / cleanup
- ollama TokenizeAndEmbed clamps windowSize to >= 1, guarding a CtxSize<=2
  misconfig that spun the split loop forever while holding a queue slot.
- Extract shared HTTP-provider plumbing (base-URL normalize, API-key
  resolve, Ready/Status) into provider/httpremote.go; openai and voyage
  now delegate instead of copy-pasting.

Tests: switch rollback + happy-path, namespace-orphan diagnostic. Full
suite, `go vet` (incl. embed_gate), and `-race` on embeddings + httpapi
all green.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 server/cmd/cix-server/main.go                 |  44 +++++--
 server/internal/embeddings/parity_test.go     |  27 ++++-
 .../embeddings/provider/httpremote.go         |  61 ++++++++++
 .../embeddings/provider/ollama/provider.go    |   6 +-
 .../embeddings/provider/openai/openai.go      |  31 +----
 .../embeddings/provider/voyage/voyage.go      |  31 +----
 server/internal/embeddings/service.go         |  74 ++++++++----
 .../embeddings/switch_provider_test.go        | 110 ++++++++++++++++++
 server/internal/httpapi/admin_embeddings.go   |  36 ++++--
 server/internal/httpapi/admin_server.go       |   8 +-
 server/internal/vectorstore/migrate.go        |  56 +++++++++
 server/internal/vectorstore/migrate_test.go   |  82 +++++++++++++
 12 files changed, 467 insertions(+), 99 deletions(-)
 create mode 100644 server/internal/embeddings/provider/httpremote.go
 create mode 100644 server/internal/vectorstore/migrate_test.go

diff --git a/server/cmd/cix-server/main.go b/server/cmd/cix-server/main.go
index 34ec35d..f829143 100644
--- a/server/cmd/cix-server/main.go
+++ b/server/cmd/cix-server/main.go
@@ -210,14 +210,28 @@ func run() error {
 	} else {
 		prov, perr := provider.Build(startupCtx, persistedProv.Kind, persistedProv.Config, embeddings.EnvSecrets(), logger)
 		if perr != nil {
-			startupCancel()
-			return fmt.Errorf("build %s provider: %w", persistedProv.Kind, perr)
-		}
-		if perr := prov.Start(startupCtx); perr != nil {
-			startupCancel()
-			return fmt.Errorf("start %s provider: %w", persistedProv.Kind, perr)
+			// The persisted provider config is malformed (bad JSON /
+			// unknown kind). Don't brick the whole server over it: fall
+			// back to env-only ollama so the dashboard stays reachable to
+			// fix the config or re-switch the provider.
+			logger.Error("could not build persisted embedding provider; falling back to env ollama (fix or re-switch via dashboard)",
+				"kind", persistedProv.Kind, "err", perr)
+			embedSvc, err = embeddings.New(startupCtx, cfg, logger)
+		} else {
+			if perr := prov.Start(startupCtx); perr != nil {
+				// A remote provider's boot connect-test (or the ollama
+				// sidecar spawn) failed — a transient upstream blip, a
+				// revoked key, or a missing API-key env after a redeploy.
+				// Attach the provider anyway instead of failing the whole
+				// process: the HTTP server (dashboard, auth, every project
+				// API) must stay up so an operator can recover, and remote
+				// providers self-heal once the upstream/key is back. Embeds
+				// return a clear error until then and Status() reports it.
+				logger.Error("persisted embedding provider failed to start; continuing in degraded state — embeddings unavailable until it recovers or is switched",
+					"kind", persistedProv.Kind, "err", perr)
+			}
+			embedSvc = embeddings.NewWithProvider(cfg, prov, logger)
 		}
-		embedSvc = embeddings.NewWithProvider(cfg, prov, logger)
 	}
 	startupCancel()
 	if err != nil {
@@ -247,7 +261,13 @@ func run() error {
 	// LEGACY-MIGRATION (remove next release): drop this prefixing call once
 	// all deployments have booted on the unified layout.
 	if err := storage.PrefixLegacyChromaDirs(cfg.ChromaPersistDir, logger); err != nil {
-		logger.Warn("could not migrate legacy chroma dirs (continuing)", "err", err)
+		// Fail closed. A half-completed rename here would leave the
+		// server opening a fresh empty namespace while existing vectors
+		// sit under the un-migrated legacy dir — search would silently
+		// return nothing on a "healthy" server. Surface it instead so the
+		// operator fixes the cause (e.g. dir perms under prod uid 1001,
+		// or a cross-device /data mount) rather than losing the index.
+		return fmt.Errorf("migrate legacy chroma dirs: %w", err)
 	}
 
 	// The vector store is namespaced by the ACTIVE provider identity slug
@@ -259,6 +279,14 @@ func run() error {
 		chromaSlug = provider.StorageSlug("ollama:" + cfg.EmbeddingModel)
 	}
 	chromaDir := cfg.ChromaDirForSlug(chromaSlug)
+	// Visibility guard for the orphaned-namespace failure mode: if the
+	// active provider's dir does not exist yet but sibling chroma dirs do,
+	// prior vectors were indexed under a different identity/layout and the
+	// active provider will start empty. Warn loudly instead of silently
+	// returning zero search results on an apparently-healthy server.
+	if warnErr := vectorstore.WarnIfNamespaceOrphaned(cfg.ChromaPersistDir, chromaDir, logger); warnErr != nil {
+		logger.Warn("could not check for orphaned chroma namespaces", "err", warnErr)
+	}
 
 	// Detect and back up a legacy ChromaDB layout left by the Python server.
 	if backed, bErr := vectorstore.DetectLegacyAndBackup(chromaDir); bErr != nil {
diff --git a/server/internal/embeddings/parity_test.go b/server/internal/embeddings/parity_test.go
index 6db1417..4a5d21e 100644
--- a/server/internal/embeddings/parity_test.go
+++ b/server/internal/embeddings/parity_test.go
@@ -83,7 +83,7 @@ func TestEmbeddingParity(t *testing.T) {
 
 	embedCtx, embedCancel := context.WithTimeout(ctx, 90*time.Second)
 	defer embedCancel()
-	got, err := svc.embedRaw(embedCtx, texts)
+	got, err := embedRawForParity(embedCtx, svc, texts)
 	if err != nil {
 		t.Fatalf("embedRaw: %v", err)
 	}
@@ -131,6 +131,31 @@ func TestEmbeddingParity(t *testing.T) {
 
 // --- helpers ---
 
+// rawEmbedder is the subset of the active provider used by the parity
+// gate: embed verbatim, no asymmetric-retrieval prefix. The ollama
+// provider satisfies it via Provider.EmbedRaw; this matches the
+// reference inputs (which already carry their prefixes) 1:1.
+type rawEmbedder interface {
+	EmbedRaw(context.Context, []string) ([][]float32, error)
+}
+
+// embedRawForParity reaches the live provider behind the Service and
+// embeds texts verbatim, bypassing the queue and prefix logic — the
+// behaviour the deleted Service.embedRaw helper used to provide.
+func embedRawForParity(ctx context.Context, s *Service, texts []string) ([][]float32, error) {
+	s.mu.RLock()
+	cur := s.current
+	s.mu.RUnlock()
+	if cur == nil {
+		return nil, ErrSupervisor
+	}
+	re, ok := cur.(rawEmbedder)
+	if !ok {
+		return nil, fmt.Errorf("active provider %T does not support raw embedding", cur)
+	}
+	return re.EmbedRaw(ctx, texts)
+}
+
 type refItem struct {
 	Phrase   string    `json:"phrase"`
 	IsQuery  bool      `json:"is_query"`
diff --git a/server/internal/embeddings/provider/httpremote.go b/server/internal/embeddings/provider/httpremote.go
new file mode 100644
index 0000000..555d2aa
--- /dev/null
+++ b/server/internal/embeddings/provider/httpremote.go
@@ -0,0 +1,61 @@
+package provider
+
+import (
+	"fmt"
+	"strings"
+)
+
+// This file holds the plumbing every HTTP-only embedding provider
+// (openai, voyage, and any future REST backend) shares: base-URL
+// normalization, live API-key resolution, and the Ready()/Status()
+// implementations. Keeping it in one place stops the providers from
+// drifting apart (they previously copy-pasted all four).
+
+// NormalizeBaseURL trims trailing slashes from an HTTP base URL so that
+// joining "/v1/embeddings" never yields a double slash — which stricter
+// OpenAI-compatible servers behind a proxy (vLLM/TEI) 404 on.
+func NormalizeBaseURL(raw string) string {
+	return strings.TrimRight(raw, "/")
+}
+
+// ResolveAPIKey looks up apiKeyEnv via secrets, returning (value, true)
+// only when the env var is set to a non-empty value. HTTP providers read
+// their bearer token live through this — the raw value never lives in the
+// provider's persisted config. Returns ("", false) when secrets is nil,
+// apiKeyEnv is empty, or the var is unset/blank.
+func ResolveAPIKey(secrets SecretLookup, apiKeyEnv string) (string, bool) {
+	if secrets == nil || apiKeyEnv == "" {
+		return "", false
+	}
+	v, ok := secrets(apiKeyEnv)
+	if !ok || v == "" {
+		return "", false
+	}
+	return v, true
+}
+
+// RemoteReady is the shared Ready() for HTTP-only providers: nil when the
+// API key is present, ErrMissingAPIKey otherwise. We deliberately do NOT
+// ping the upstream on every call (the /status footer polls Ready every
+// 30s) — real outages surface as embed failures with diagnostics.
+func RemoteReady(secrets SecretLookup, apiKeyEnv string) error {
+	if _, ok := ResolveAPIKey(secrets, apiKeyEnv); !ok {
+		return fmt.Errorf("%w: %s", ErrMissingAPIKey, apiKeyEnv)
+	}
+	return nil
+}
+
+// RemoteStatus is the shared Status() for HTTP-only providers: StateRemote
+// when the API key is present, StateFailed with a diagnostic otherwise.
+func RemoteStatus(model, apiKeyEnv string, secrets SecretLookup) Status {
+	st := Status{
+		State:          StateRemote,
+		ManagesProcess: false,
+		Model:          model,
+	}
+	if _, ok := ResolveAPIKey(secrets, apiKeyEnv); !ok {
+		st.State = StateFailed
+		st.LastError = "API key env var " + apiKeyEnv + " is not set"
+	}
+	return st
+}
diff --git a/server/internal/embeddings/provider/ollama/provider.go b/server/internal/embeddings/provider/ollama/provider.go
index f9bd21e..ef1efdf 100644
--- a/server/internal/embeddings/provider/ollama/provider.go
+++ b/server/internal/embeddings/provider/ollama/provider.go
@@ -307,7 +307,11 @@ func (p *Provider) TokenizeAndEmbed(ctx context.Context, texts []string) ([][]fl
 		cls := ids[0]
 		sep := ids[len(ids)-1]
 		content := ids[1 : len(ids)-1]
-		windowSize := maxTokens - 2
+		// Reserve two slots for the CLS/SEP tokens. Guard against a
+		// pathologically small CtxSize (<= 2): windowSize must be >= 1
+		// or the split loop below never advances `start` and spins
+		// forever while holding a queue slot.
+		windowSize := max(maxTokens-2, 1)
 
 		spanStart := len(sequences)
 		for start := 0; start < len(content); start += windowSize {
diff --git a/server/internal/embeddings/provider/openai/openai.go b/server/internal/embeddings/provider/openai/openai.go
index 649a06d..0c081bb 100644
--- a/server/internal/embeddings/provider/openai/openai.go
+++ b/server/internal/embeddings/provider/openai/openai.go
@@ -15,7 +15,6 @@ import (
 	"log/slog"
 	"net/http"
 	"strconv"
-	"strings"
 	"time"
 
 	"github.com/dvcdsys/code-index/server/internal/embeddings/provider"
@@ -55,7 +54,7 @@ func New(cfg Config, secrets provider.SecretLookup, logger *slog.Logger) *Provid
 	// Normalise away a trailing slash so url building (BaseURL +
 	// "/v1/embeddings") never produces a double slash, which stricter
 	// OpenAI-compatible servers (vLLM/TEI behind a proxy) can 404 on.
-	cfg.BaseURL = strings.TrimRight(cfg.BaseURL, "/")
+	cfg.BaseURL = provider.NormalizeBaseURL(cfg.BaseURL)
 	return &Provider{
 		cfg:     cfg,
 		logger:  logger,
@@ -112,23 +111,11 @@ func (p *Provider) Stop(_ context.Context) error { return nil }
 // always-green footer dot for HTTP-only providers matches the
 // dashboard's documented behaviour.
 func (p *Provider) Ready(_ context.Context) error {
-	if _, ok := p.apiKey(); !ok {
-		return fmt.Errorf("%w: %s", provider.ErrMissingAPIKey, p.cfg.APIKeyEnv)
-	}
-	return nil
+	return provider.RemoteReady(p.secrets, p.cfg.APIKeyEnv)
 }
 
 func (p *Provider) Status() provider.Status {
-	st := provider.Status{
-		State:          provider.StateRemote,
-		ManagesProcess: false,
-		Model:          p.cfg.Model,
-	}
-	if _, ok := p.apiKey(); !ok {
-		st.State = provider.StateFailed
-		st.LastError = "API key env var " + p.cfg.APIKeyEnv + " is not set"
-	}
-	return st
+	return provider.RemoteStatus(p.cfg.Model, p.cfg.APIKeyEnv, p.secrets)
 }
 
 // EmbedQuery is a pass-through to EmbedDocuments — generic
@@ -242,15 +229,5 @@ func (p *Provider) embed(ctx context.Context, texts []string) ([][]float32, erro
 }
 
 func (p *Provider) apiKey() (string, bool) {
-	if p.secrets == nil {
-		return "", false
-	}
-	if p.cfg.APIKeyEnv == "" {
-		return "", false
-	}
-	v, ok := p.secrets(p.cfg.APIKeyEnv)
-	if !ok || v == "" {
-		return "", false
-	}
-	return v, true
+	return provider.ResolveAPIKey(p.secrets, p.cfg.APIKeyEnv)
 }
diff --git a/server/internal/embeddings/provider/voyage/voyage.go b/server/internal/embeddings/provider/voyage/voyage.go
index 8cfdd8e..91e0671 100644
--- a/server/internal/embeddings/provider/voyage/voyage.go
+++ b/server/internal/embeddings/provider/voyage/voyage.go
@@ -30,7 +30,6 @@ import (
 	"net/http"
 	"regexp"
 	"strconv"
-	"strings"
 	"time"
 	"unicode/utf8"
 
@@ -275,7 +274,7 @@ func New(cfg Config, secrets provider.SecretLookup, logger *slog.Logger) *Provid
 	// Normalise away a trailing slash so url building (BaseURL +
 	// "/v1/embeddings") never produces a double slash, which stricter
 	// OpenAI-compatible proxies in front of Voyage can 404 on.
-	cfg.BaseURL = strings.TrimRight(cfg.BaseURL, "/")
+	cfg.BaseURL = provider.NormalizeBaseURL(cfg.BaseURL)
 	if cfg.OutputDtype == "" {
 		cfg.OutputDtype = DtypeFloat
 	}
@@ -338,23 +337,11 @@ func (p *Provider) Start(ctx context.Context) error {
 func (p *Provider) Stop(_ context.Context) error { return nil }
 
 func (p *Provider) Ready(_ context.Context) error {
-	if _, ok := p.apiKey(); !ok {
-		return fmt.Errorf("%w: %s", provider.ErrMissingAPIKey, p.cfg.APIKeyEnv)
-	}
-	return nil
+	return provider.RemoteReady(p.secrets, p.cfg.APIKeyEnv)
 }
 
 func (p *Provider) Status() provider.Status {
-	st := provider.Status{
-		State:          provider.StateRemote,
-		ManagesProcess: false,
-		Model:          p.cfg.Model,
-	}
-	if _, ok := p.apiKey(); !ok {
-		st.State = provider.StateFailed
-		st.LastError = "API key env var " + p.cfg.APIKeyEnv + " is not set"
-	}
-	return st
+	return provider.RemoteStatus(p.cfg.Model, p.cfg.APIKeyEnv, p.secrets)
 }
 
 func (p *Provider) EmbedQuery(ctx context.Context, query string) ([]float32, error) {
@@ -772,15 +759,5 @@ func dequantize(raw json.RawMessage, dtype string) ([]float32, error) {
 }
 
 func (p *Provider) apiKey() (string, bool) {
-	if p.secrets == nil {
-		return "", false
-	}
-	if p.cfg.APIKeyEnv == "" {
-		return "", false
-	}
-	v, ok := p.secrets(p.cfg.APIKeyEnv)
-	if !ok || v == "" {
-		return "", false
-	}
-	return v, true
+	return provider.ResolveAPIKey(p.secrets, p.cfg.APIKeyEnv)
 }
diff --git a/server/internal/embeddings/service.go b/server/internal/embeddings/service.go
index 3110e0c..c811e45 100644
--- a/server/internal/embeddings/service.go
+++ b/server/internal/embeddings/service.go
@@ -222,6 +222,14 @@ func (s *Service) SwitchProvider(ctx context.Context, kind string, cfgBytes []by
 
 	q := s.currentQueue()
 	q.BlockNew()
+	// Keep the queue blocked across BOTH the provider swap and the
+	// vector-store reopen below; resume only on the way out (all return
+	// paths). A blocked queue fails Acquire fast with a 503, so no embed
+	// runs in the window where s.current is the NEW provider while the
+	// Holder still points at the OLD store — that window is exactly what
+	// would write new-dimension vectors into the previous provider's
+	// collection.
+	defer q.Resume()
 	drainCtx, drainCancel := context.WithTimeout(ctx, 30*time.Second)
 	if derr := q.WaitDrain(drainCtx); derr != nil {
 		s.logger.Warn("embeddings: drain timed out during switch; proceeding anyway",
@@ -229,39 +237,51 @@ func (s *Service) SwitchProvider(ctx context.Context, kind string, cfgBytes []by
 		)
 	}
 	drainCancel()
-	q.Resume()
 
 	s.mu.Lock()
 	old := s.current
 	s.current = newProv
 	s.mu.Unlock()
 
-	if old != nil {
-		go func(p provider.Provider) {
-			stopCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
-			defer cancel()
-			if err := p.Stop(stopCtx); err != nil {
-				s.logger.Warn("embeddings: old provider Stop returned error",
-					"kind", p.Kind(), "err", err)
-			}
-		}(old)
-	}
-	s.logger.Info("embeddings: switched provider", "kind", kind, "id", newProv.ID())
-
 	// Reopen the vector store under the new provider's identity slug so
 	// its (possibly different-dimension) vectors land in their own
 	// namespace instead of colliding with the previous provider's
-	// collection. The provider has ALREADY been swapped above; if the
-	// reopen fails we do NOT roll it back (the new provider is live for
-	// embedding) — we keep the holder on the old store and surface a
-	// loud error so the operator restarts. Without the reopen, the next
-	// reindex would write new-dim vectors into the old dir.
+	// collection. If the reopen fails we roll the provider swap BACK to
+	// the old provider (whose store the Holder still points at) and stop
+	// the new provider we started: fail closed, never leave a
+	// new-provider / old-store pairing that corrupts the old collection.
 	if err := s.reopenVectorStore(newProv); err != nil {
+		s.mu.Lock()
+		s.current = old
+		s.mu.Unlock()
+		go stopProviderAsync(s.logger, newProv)
+		s.logger.Error("embeddings: provider switch rolled back — vector store reopen failed",
+			"kind", kind, "err", err)
 		return err
 	}
+
+	if old != nil {
+		go stopProviderAsync(s.logger, old)
+	}
+	s.logger.Info("embeddings: switched provider", "kind", kind, "id", newProv.ID())
 	return nil
 }
 
+// stopProviderAsync stops a provider in the background with a bounded
+// timeout, logging (not failing) on error. Used to release the provider
+// displaced by a switch — or the half-started new provider on a switch
+// rollback — without blocking the caller.
+func stopProviderAsync(logger *slog.Logger, p provider.Provider) {
+	if p == nil {
+		return
+	}
+	stopCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	defer cancel()
+	if err := p.Stop(stopCtx); err != nil {
+		logger.Warn("embeddings: provider Stop returned error", "kind", p.Kind(), "err", err)
+	}
+}
+
 // reopenVectorStore opens a fresh *vectorstore.Store under the directory
 // derived from prov's identity slug and atomically swaps it into the
 // shared Holder. No-op when AttachVectorStore was never called (tests).
@@ -442,12 +462,12 @@ func (s *Service) Restart(ctx context.Context, cfg *config.Config) error {
 	s.lifecycleMu.Lock()
 	defer s.lifecycleMu.Unlock()
 
-	// Snapshot the live queue, block + drain it. Resume targets this same
-	// instance: if we swap below it's discarded (the resume is harmless),
-	// otherwise it's still s.queue and the resume re-opens it.
+	// Snapshot the live queue, block + drain it. The queue stays blocked
+	// through the respawn below and is resumed via the deferred
+	// activeQ.Resume() once s.current is valid again (activeQ is oldQ, or
+	// the new queue when the concurrency cap changed).
 	oldQ := s.currentQueue()
 	oldQ.BlockNew()
-	defer oldQ.Resume()
 	drainCtx, drainCancel := context.WithTimeout(ctx, 30*time.Second)
 	if err := oldQ.WaitDrain(drainCtx); err != nil {
 		drainCancel()
@@ -458,12 +478,22 @@ func (s *Service) Restart(ctx context.Context, cfg *config.Config) error {
 		drainCancel()
 	}
 
+	// activeQ is the queue that stays live through the respawn below and
+	// must be resumed on the way out. When the concurrency cap changes we
+	// install a NEW queue — it must also start blocked, otherwise embed
+	// callers acquire a slot on it during the sidecar respawn (when
+	// s.current is briefly nil) and get ErrSupervisor instead of a clean
+	// drain-style 503. Resuming oldQ would be a no-op (it's discarded).
+	activeQ := oldQ
 	if cfg.MaxEmbeddingConcurrency != cap(oldQ.slots) {
 		newQ := NewQueue(cfg.MaxEmbeddingConcurrency, time.Duration(cfg.EmbeddingQueueTimeout)*time.Second)
+		newQ.BlockNew()
 		s.mu.Lock()
 		s.queue = newQ
 		s.mu.Unlock()
+		activeQ = newQ
 	}
+	defer activeQ.Resume()
 
 	// Snapshot the live provider's kind under the read lock — we don't
 	// want to swap an ollama for a voyage just because the runtime-
diff --git a/server/internal/embeddings/switch_provider_test.go b/server/internal/embeddings/switch_provider_test.go
index e219350..4e60583 100644
--- a/server/internal/embeddings/switch_provider_test.go
+++ b/server/internal/embeddings/switch_provider_test.go
@@ -38,6 +38,116 @@ func (f fakeProv) TokenizeAndEmbed(context.Context, []string) ([][]float32, erro
 
 func quiet() *slog.Logger { return slog.New(slog.NewTextHandler(io.Discard, nil)) }
 
+// fakeFactory registers a test-only provider kind so SwitchProvider can be
+// driven end-to-end (it builds the new provider through the registry).
+// Build echoes the config bytes into the provider ID so the derived
+// storage slug is deterministic per test.
+type fakeFactory struct{}
+
+func (fakeFactory) Kind() string            { return "fake-switch" }
+func (fakeFactory) SchemaJSON() []byte      { return []byte("{}") }
+func (fakeFactory) SecretEnvVars() []string { return nil }
+func (fakeFactory) Build(cfg []byte, _ provider.SecretLookup, _ *slog.Logger) (provider.Provider, error) {
+	return fakeProv{id: "fake:" + string(cfg)}, nil
+}
+
+func init() { provider.Register(fakeFactory{}) }
+
+// TestSwitchProvider_RollbackOnReopenFailure guards the switch-atomicity
+// fix: when the vector-store reopen fails, the provider swap is rolled
+// back to the old provider (never a new-provider / old-store pairing that
+// would write wrong-dimension vectors into the old collection), and the
+// queue is resumed so the service keeps serving.
+func TestSwitchProvider_RollbackOnReopenFailure(t *testing.T) {
+	dir := t.TempDir()
+	const project = "/proj"
+	initial, err := vectorstore.Open(filepath.Join(dir, "chroma_ollama_m"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := initial.UpsertChunks(context.Background(), project,
+		[]vectorstore.Chunk{{Content: "x", FilePath: "a.go", StartLine: 1, EndLine: 1, Language: "go"}},
+		[][]float32{{1, 0}}); err != nil {
+		t.Fatal(err)
+	}
+	holder := vectorstore.NewHolder(initial)
+
+	oldProv := fakeProv{id: "ollama:m"}
+	s := &Service{logger: quiet(), queue: NewQueue(2, time.Second), current: oldProv}
+	s.AttachVectorStore(
+		holder,
+		func(slug string) string { return filepath.Join(dir, "chroma_"+slug) },
+		func(string) (*vectorstore.Store, error) { return nil, errors.New("boom") }, // reopen always fails
+		nil,
+	)
+
+	if err := s.SwitchProvider(context.Background(), "fake-switch", []byte("newid")); err == nil {
+		t.Fatal("expected switch to fail on reopen error")
+	}
+
+	// Rolled back to the OLD provider — not the half-applied new one.
+	s.mu.RLock()
+	cur := s.current
+	s.mu.RUnlock()
+	if cur.ID() != oldProv.ID() {
+		t.Errorf("after rollback current.ID() = %q, want %q", cur.ID(), oldProv.ID())
+	}
+	// Holder still serves the old store (reopen never Swapped).
+	if got := holder.Count(project); got != 1 {
+		t.Errorf("holder count = %d, want 1 (old store retained)", got)
+	}
+	// Queue must be resumed, not left blocked, so the service keeps serving.
+	if err := s.queue.Acquire(context.Background()); err != nil {
+		t.Errorf("queue should be resumed after rollback, Acquire = %v", err)
+	} else {
+		s.queue.Release(time.Now())
+	}
+}
+
+// TestSwitchProvider_SuccessSwapsProviderAndStore is the happy path: the
+// live provider becomes the new one, the holder reopens into the new
+// (empty) namespace, and the queue ends unblocked.
+func TestSwitchProvider_SuccessSwapsProviderAndStore(t *testing.T) {
+	dir := t.TempDir()
+	const project = "/proj"
+	initial, err := vectorstore.Open(filepath.Join(dir, "chroma_ollama_m"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := initial.UpsertChunks(context.Background(), project,
+		[]vectorstore.Chunk{{Content: "x", FilePath: "a.go", StartLine: 1, EndLine: 1, Language: "go"}},
+		[][]float32{{1, 0}}); err != nil {
+		t.Fatal(err)
+	}
+	holder := vectorstore.NewHolder(initial)
+
+	s := &Service{logger: quiet(), queue: NewQueue(2, time.Second), current: fakeProv{id: "ollama:m"}}
+	s.AttachVectorStore(
+		holder,
+		func(slug string) string { return filepath.Join(dir, "chroma_"+slug) },
+		vectorstore.Open,
+		nil,
+	)
+
+	if err := s.SwitchProvider(context.Background(), "fake-switch", []byte("newid")); err != nil {
+		t.Fatalf("switch: %v", err)
+	}
+	s.mu.RLock()
+	cur := s.current
+	s.mu.RUnlock()
+	if cur.ID() != "fake:newid" {
+		t.Errorf("current.ID() = %q, want %q", cur.ID(), "fake:newid")
+	}
+	if got := holder.Count(project); got != 0 {
+		t.Errorf("after switch holder Count = %d, want 0 (new empty namespace)", got)
+	}
+	if err := s.queue.Acquire(context.Background()); err != nil {
+		t.Errorf("queue should be unblocked after switch, Acquire = %v", err)
+	} else {
+		s.queue.Release(time.Now())
+	}
+}
+
 func TestServiceStorageSlug(t *testing.T) {
 	s := &Service{logger: quiet(), current: fakeProv{id: "voyage:voyage-code-3:2048:float"}}
 	if got := s.StorageSlug(); got != "voyage_voyage_code_3_2048_float" {
diff --git a/server/internal/httpapi/admin_embeddings.go b/server/internal/httpapi/admin_embeddings.go
index 3804835..3349b25 100644
--- a/server/internal/httpapi/admin_embeddings.go
+++ b/server/internal/httpapi/admin_embeddings.go
@@ -126,9 +126,11 @@ func (s *Server) GetActiveEmbeddingProvider(w http.ResponseWriter, r *http.Reque
 
 // SwitchEmbeddingProvider — PUT /api/v1/admin/embedding-providers/active.
 //
-// Atomic switch: validate the new config by building + Starting a
-// provider, persist on success, then swap the live Service over.
-// On any failure the existing provider stays untouched.
+// Validate-then-persist: SwitchProvider builds + Start()s the new
+// provider and swaps the live Service over (failing without side effects
+// if the config is bad), and only on success is the new provider
+// persisted. On any failure the previously-active provider stays the
+// live AND persisted one.
 func (s *Server) SwitchEmbeddingProvider(w http.ResponseWriter, r *http.Request) {
 	user, ok := s.mustBeAdmin(w, r)
 	if !ok {
@@ -208,19 +210,29 @@ func (s *Server) SwitchEmbeddingProvider(w http.ResponseWriter, r *http.Request)
 		actorID = user.User.ID
 	}
 
-	// Persist BEFORE swap so the DB always leads the live state.
-	// If SwitchProvider then fails, the operator's next call (or
-	// container restart) reads the new row and tries again.
+	// Validate-then-persist: apply the swap live FIRST (SwitchProvider
+	// builds + Start()s the new provider before touching live state, so a
+	// bad config / unreachable endpoint / missing key fails here without
+	// changing anything). Only persist once the swap succeeded. Persisting
+	// first would let a transient switch failure leave the DB pointing at a
+	// provider that won't start — which the boot path then re-adopts,
+	// turning one failed switch into a delayed boot brick.
+	if err := embedSvc.SwitchProvider(r.Context(), req.Kind, cfgBytes); err != nil {
+		writeError(w, http.StatusInternalServerError, "switch provider: "+err.Error())
+		return
+	}
+
 	if err := s.Deps.EmbeddingsCfg.Save(r.Context(), embeddingscfg.Snapshot{
 		Kind:   req.Kind,
 		Config: cfgBytes,
 	}, actorID); err != nil {
-		writeError(w, http.StatusInternalServerError, "persist provider: "+err.Error())
-		return
-	}
-
-	if err := embedSvc.SwitchProvider(r.Context(), req.Kind, cfgBytes); err != nil {
-		writeError(w, http.StatusInternalServerError, "switch provider: "+err.Error())
+		// The live provider IS switched but we couldn't persist it. Report
+		// the inconsistency loudly; a container restart will revert to the
+		// previous persisted provider (safe — both old config and old
+		// vectors are intact), so this degrades rather than corrupts.
+		s.Deps.Logger.Error("embedding provider switched live but persist failed; will revert on restart",
+			"kind", req.Kind, "err", err)
+		writeError(w, http.StatusInternalServerError, "provider switched but persist failed (will revert on restart): "+err.Error())
 		return
 	}
 
diff --git a/server/internal/httpapi/admin_server.go b/server/internal/httpapi/admin_server.go
index 2b38384..b6b49d5 100644
--- a/server/internal/httpapi/admin_server.go
+++ b/server/internal/httpapi/admin_server.go
@@ -19,6 +19,7 @@ import (
 	"time"
 
 	"github.com/dvcdsys/code-index/server/internal/embeddings"
+	"github.com/dvcdsys/code-index/server/internal/embeddings/provider"
 	"github.com/dvcdsys/code-index/server/internal/runtimecfg"
 
 	"github.com/google/uuid"
@@ -261,9 +262,14 @@ func (s *Server) GetSidecarStatus(w http.ResponseWriter, r *http.Request) {
 	}
 	st := embedSvc.Status()
 
+	// "ready" must be true for a healthy provider of EITHER flavour: a
+	// running ollama sidecar (StateRunning) OR an operational HTTP-only
+	// provider (StateRemote — openai/voyage, which have no managed
+	// process to be "running"). Comparing only against "running"
+	// regressed every remote provider to a permanently not-ready status.
 	body := map[string]any{
 		"state":             st.State,
-		"ready":             st.State == "running",
+		"ready":             st.State == provider.StateRunning || st.State == provider.StateRemote,
 		"in_flight":         st.InFlight,
 		"restart_in_flight": restartInFlight.Load(),
 	}
diff --git a/server/internal/vectorstore/migrate.go b/server/internal/vectorstore/migrate.go
index cc6a4eb..17cccc4 100644
--- a/server/internal/vectorstore/migrate.go
+++ b/server/internal/vectorstore/migrate.go
@@ -2,8 +2,10 @@ package vectorstore
 
 import (
 	"fmt"
+	"log/slog"
 	"os"
 	"path/filepath"
+	"strings"
 	"time"
 )
 
@@ -30,3 +32,57 @@ func DetectLegacyAndBackup(dir string) (backed bool, err error) {
 	}
 	return true, nil
 }
+
+// WarnIfNamespaceOrphaned logs a prominent warning when the active vector-
+// store directory (activeDir) does not exist yet but sibling chroma
+// namespace dirs do. That pattern means vectors were previously indexed
+// under a different provider identity (or the pre-unification layout) and
+// the active provider will start empty — so search silently returns
+// nothing on an otherwise-healthy server. Surfacing it turns a silent
+// data-orphan into an actionable log line: the operator can switch back to
+// the prior provider or reindex. Best-effort; returns an error only when
+// the directory scan itself fails.
+func WarnIfNamespaceOrphaned(chromaBase, activeDir string, logger *slog.Logger) error {
+	if logger == nil {
+		logger = slog.Default()
+	}
+	if chromaBase == "" {
+		return nil
+	}
+	// Active namespace already present → nothing orphaned.
+	if _, err := os.Stat(activeDir); err == nil {
+		return nil
+	} else if !os.IsNotExist(err) {
+		return err
+	}
+
+	parent := filepath.Dir(chromaBase)
+	prefix := filepath.Base(chromaBase) + "_"
+	activeName := filepath.Base(activeDir)
+	entries, err := os.ReadDir(parent)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return nil // nothing indexed yet
+		}
+		return err
+	}
+	var siblings []string
+	for _, e := range entries {
+		if !e.IsDir() {
+			continue
+		}
+		name := e.Name()
+		if name == activeName || !strings.HasPrefix(name, prefix) {
+			continue
+		}
+		if strings.Contains(name, ".python-backup.") {
+			continue // already-backed-up legacy layout, handled separately
+		}
+		siblings = append(siblings, name)
+	}
+	if len(siblings) > 0 {
+		logger.Warn("active embedding-provider vector namespace is empty but other namespaces exist — prior vectors are under a different provider identity/layout; switch back to the prior provider or reindex to populate this one",
+			"active_dir", activeName, "other_namespaces", siblings)
+	}
+	return nil
+}
diff --git a/server/internal/vectorstore/migrate_test.go b/server/internal/vectorstore/migrate_test.go
new file mode 100644
index 0000000..8abd519
--- /dev/null
+++ b/server/internal/vectorstore/migrate_test.go
@@ -0,0 +1,82 @@
+package vectorstore
+
+import (
+	"bytes"
+	"log/slog"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+func TestWarnIfNamespaceOrphaned(t *testing.T) {
+	mkdir := func(t *testing.T, p string) {
+		t.Helper()
+		if err := os.MkdirAll(p, 0o755); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	t.Run("warns when active namespace empty but siblings exist", func(t *testing.T) {
+		root := t.TempDir()
+		base := filepath.Join(root, "chroma")
+		// A prior provider's namespace exists on disk; the active one does not.
+		mkdir(t, base+"_ollama_m")
+		active := base + "_voyage_voyage_code_3_2048_float"
+
+		var buf bytes.Buffer
+		logger := slog.New(slog.NewTextHandler(&buf, nil))
+		if err := WarnIfNamespaceOrphaned(base, active, logger); err != nil {
+			t.Fatalf("WarnIfNamespaceOrphaned: %v", err)
+		}
+		out := buf.String()
+		if !strings.Contains(out, "other namespaces") || !strings.Contains(out, "chroma_ollama_m") {
+			t.Errorf("expected an orphan warning naming the sibling dir, got:\n%s", out)
+		}
+	})
+
+	t.Run("silent when active namespace already present", func(t *testing.T) {
+		root := t.TempDir()
+		base := filepath.Join(root, "chroma")
+		active := base + "_voyage_x"
+		mkdir(t, active)           // active dir exists
+		mkdir(t, base+"_ollama_m") // and a sibling too — still must not warn
+
+		var buf bytes.Buffer
+		if err := WarnIfNamespaceOrphaned(base, active, slog.New(slog.NewTextHandler(&buf, nil))); err != nil {
+			t.Fatalf("WarnIfNamespaceOrphaned: %v", err)
+		}
+		if buf.Len() != 0 {
+			t.Errorf("expected no log output when active dir present, got:\n%s", buf.String())
+		}
+	})
+
+	t.Run("silent on a fresh install with no namespaces", func(t *testing.T) {
+		root := t.TempDir()
+		base := filepath.Join(root, "chroma")
+		active := base + "_ollama_m"
+
+		var buf bytes.Buffer
+		if err := WarnIfNamespaceOrphaned(base, active, slog.New(slog.NewTextHandler(&buf, nil))); err != nil {
+			t.Fatalf("WarnIfNamespaceOrphaned: %v", err)
+		}
+		if buf.Len() != 0 {
+			t.Errorf("expected no log output on fresh install, got:\n%s", buf.String())
+		}
+	})
+
+	t.Run("ignores python-backup dirs as siblings", func(t *testing.T) {
+		root := t.TempDir()
+		base := filepath.Join(root, "chroma")
+		mkdir(t, base+"_ollama_m.python-backup.20250101-000000")
+		active := base + "_voyage_x"
+
+		var buf bytes.Buffer
+		if err := WarnIfNamespaceOrphaned(base, active, slog.New(slog.NewTextHandler(&buf, nil))); err != nil {
+			t.Fatalf("WarnIfNamespaceOrphaned: %v", err)
+		}
+		if buf.Len() != 0 {
+			t.Errorf("python-backup dir should not count as an orphaned namespace, got:\n%s", buf.String())
+		}
+	})
+}

From 668ad298e84373d9a08ca44ec2ad707b0a291ecd Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Tue, 2 Jun 2026 12:43:35 +0100
Subject: [PATCH 32/34] refactor(storage): nest vector-store namespace by
 provider identity (fixes #8 class)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The flat single-slug scheme `chroma_<StorageSlug(id)>` flattened
provider+model into one string with `_` as the field separator — but `_`
is also what model-name normalization emits. That in-band separator made
the legacy-migration heuristic (`hasKnownPrefix`) ambiguous: a legacy
ollama model whose name normalized to `ollama_…`/`openai_…`/`voyage_…`
(e.g. `openai-community/x`) was misread as already-unified, skipped, and
its vectors silently orphaned behind a fresh empty namespace. Worse,
`ollama:foo` and a model literally named `ollama-foo` both produced the
exact same string — unfixable by any name parsing.

Make the separator structural instead: namespace the vector store as a
nested directory tree, one path segment per identity field:

    <ChromaPersistDir>/<kind>/<model-slug>[/<variant>]
    chroma/ollama/nomic_embed_text
    chroma/voyage/voyage_code_3/2048_int8
    chroma/openai/text_embedding_3_large/256

The provider kind is now its own directory level, derived from the kind
(not glued onto the model slug), so it can never collide with a model
name. `ollama:foo` → ollama/foo, `ollama:ollama-foo` → ollama/ollama_foo —
distinct. The #8 case resolves correctly: openai-community/x →
ollama/openai_community_x.

Changes
- provider.Provider gains StorageComponents() []string — each provider
  builds its path from its OWN structured fields (never by re-parsing the
  flattened ID), so the in-band-separator problem can't recur.
- config: ChromaDirForSlug(slug) → ChromaDirFor(components) (filepath.Join
  under the container).
- Service: StorageSlug() → StoragePath() []string; reopen + AttachVectorStore
  thread components.
- storage: PrefixLegacyChromaDirs (+ hasKnownPrefix prefix-guessing) →
  MigrateFlatChromaToNested. Now unambiguous: every flat `chroma_*` sibling
  is a legacy ollama dir (the old build was ollama-only) and moves to
  chroma/ollama/<StorageSlug(suffix)>/. StorageSlug(ModelSafeName(m)) ==
  StorageSlug(m), so it lands where the server resolves the identity — even
  for kind-looking model names. Existing main/prod ollama vectors migrate
  without a reindex. main.go relocates a legacy Python store at the
  container path first, then migrates (fail-closed).
- Drop vectorstore.WarnIfNamespaceOrphaned — the orphan ambiguity it
  warned about is gone, so the diagnostic is no longer needed.
- Drop the throwaway intermediate flat-unified scheme entirely (dev-branch
  only; no back-compat).

Tests: per-provider StorageComponents incl. the ollama foo vs ollama-foo
anti-collision; migration moves flat→nested incl. the kind-looking-name
(#8) regression, special-char normalization, idempotency, no-clobber,
python-backup skip; switch/reopen tests assert nested paths. Full suite,
`go vet` (incl. embed_gate), and `-race` all green.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 server/cmd/cix-server/main.go                 |  66 +++++-----
 server/internal/config/config.go              |  16 +--
 server/internal/config/config_test.go         |   8 +-
 .../embeddings/provider/ollama/provider.go    |   7 ++
 .../provider/ollama/storagecomponents_test.go |  29 +++++
 .../embeddings/provider/openai/openai.go      |  11 ++
 .../provider/openai/storagecomponents_test.go |  18 +++
 .../internal/embeddings/provider/provider.go  |  13 ++
 .../provider/voyage/storagecomponents_test.go |  18 +++
 .../embeddings/provider/voyage/voyage.go      |  13 ++
 server/internal/embeddings/service.go         |  39 +++---
 .../embeddings/switch_provider_test.go        |  81 ++++++++-----
 server/internal/httpapi/server.go             |   6 +-
 server/internal/storage/chromamigrate.go      | 105 ++++++++--------
 server/internal/storage/chromamigrate_test.go | 113 +++++++++++-------
 server/internal/vectorstore/migrate.go        |  56 ---------
 server/internal/vectorstore/migrate_test.go   |  82 -------------
 server/internal/vectorstore/store.go          |   2 +-
 18 files changed, 344 insertions(+), 339 deletions(-)
 create mode 100644 server/internal/embeddings/provider/ollama/storagecomponents_test.go
 create mode 100644 server/internal/embeddings/provider/openai/storagecomponents_test.go
 create mode 100644 server/internal/embeddings/provider/voyage/storagecomponents_test.go
 delete mode 100644 server/internal/vectorstore/migrate_test.go

diff --git a/server/cmd/cix-server/main.go b/server/cmd/cix-server/main.go
index f829143..0ac00c0 100644
--- a/server/cmd/cix-server/main.go
+++ b/server/cmd/cix-server/main.go
@@ -254,46 +254,40 @@ func run() error {
 		}
 	}()
 
-	// Prefix legacy un-prefixed chroma dirs (pre-unification ollama-only
-	// builds) to the unified "<base>_ollama_<model>" naming so existing
-	// ollama vectors are reused under the new identity-namespaced scheme
-	// without a reindex. Idempotent; runs once per legacy dir.
-	// LEGACY-MIGRATION (remove next release): drop this prefixing call once
-	// all deployments have booted on the unified layout.
-	if err := storage.PrefixLegacyChromaDirs(cfg.ChromaPersistDir, logger); err != nil {
-		// Fail closed. A half-completed rename here would leave the
-		// server opening a fresh empty namespace while existing vectors
-		// sit under the un-migrated legacy dir — search would silently
-		// return nothing on a "healthy" server. Surface it instead so the
-		// operator fixes the cause (e.g. dir perms under prod uid 1001,
-		// or a cross-device /data mount) rather than losing the index.
+	// Relocate a legacy Python ChromaDB store occupying the container path
+	// itself, so chromaBase is free to become the nested container below.
+	if backed, bErr := vectorstore.DetectLegacyAndBackup(cfg.ChromaPersistDir); bErr != nil {
+		return fmt.Errorf("back up legacy python chroma store: %w", bErr)
+	} else if backed {
+		logger.Warn("legacy python chroma layout detected at container path — backed up; re-run cix init to reindex")
+	}
+
+	// Migrate pre-unification FLAT ollama dirs (<base>_<model>) into the
+	// unified NESTED layout (<base>/ollama/<model-slug>/) so existing
+	// vectors are reused without a reindex. Unambiguous: every flat sibling
+	// is a legacy ollama dir (the provider kind is now its own path level,
+	// not guessed from a name prefix). Idempotent.
+	// LEGACY-MIGRATION (remove next release): drop once every deployment has
+	// booted on the nested layout.
+	if err := storage.MigrateFlatChromaToNested(cfg.ChromaPersistDir, logger); err != nil {
+		// Fail closed. A half-completed move would leave the server opening
+		// a fresh empty namespace while existing vectors sit under the
+		// un-migrated legacy dir — search would silently return nothing on
+		// a "healthy" server. Surface it so the operator fixes the cause
+		// (e.g. dir perms under prod uid 1001) rather than losing the index.
 		return fmt.Errorf("migrate legacy chroma dirs: %w", err)
 	}
 
-	// The vector store is namespaced by the ACTIVE provider identity slug
-	// so vectors of different dimensions never share a collection.
-	chromaSlug := embedSvc.StorageSlug()
-	if chromaSlug == "" {
+	// The vector store is namespaced by the ACTIVE provider's identity path
+	// components (<kind>/<model>[/<variant>]) so vectors of different
+	// dimensions never share a collection.
+	components := embedSvc.StoragePath()
+	if len(components) == 0 {
 		// Embeddings disabled / provider not built: deterministic ollama-
 		// shaped fallback so toggling embeddings on/off doesn't move dirs.
-		chromaSlug = provider.StorageSlug("ollama:" + cfg.EmbeddingModel)
-	}
-	chromaDir := cfg.ChromaDirForSlug(chromaSlug)
-	// Visibility guard for the orphaned-namespace failure mode: if the
-	// active provider's dir does not exist yet but sibling chroma dirs do,
-	// prior vectors were indexed under a different identity/layout and the
-	// active provider will start empty. Warn loudly instead of silently
-	// returning zero search results on an apparently-healthy server.
-	if warnErr := vectorstore.WarnIfNamespaceOrphaned(cfg.ChromaPersistDir, chromaDir, logger); warnErr != nil {
-		logger.Warn("could not check for orphaned chroma namespaces", "err", warnErr)
-	}
-
-	// Detect and back up a legacy ChromaDB layout left by the Python server.
-	if backed, bErr := vectorstore.DetectLegacyAndBackup(chromaDir); bErr != nil {
-		logger.Warn("could not back up legacy chroma dir", "err", bErr)
-	} else if backed {
-		logger.Warn("legacy chroma layout detected — backed up; re-run cix init to reindex")
+		components = []string{provider.KindOllama, provider.StorageSlug(cfg.EmbeddingModel)}
 	}
+	chromaDir := cfg.ChromaDirFor(components)
 
 	vs, err := vectorstore.Open(chromaDir)
 	if err != nil {
@@ -305,9 +299,9 @@ func run() error {
 	// Wire the live-reopen path used by SwitchProvider.
 	embedSvc.AttachVectorStore(
 		vsHolder,
-		cfg.ChromaDirForSlug,
+		cfg.ChromaDirFor,
 		vectorstore.Open,
-		func() error { return storage.PrefixLegacyChromaDirs(cfg.ChromaPersistDir, logger) },
+		func() error { return storage.MigrateFlatChromaToNested(cfg.ChromaPersistDir, logger) },
 	)
 
 	idx := indexer.New(database, vsHolder, embedSvc, logger)
diff --git a/server/internal/config/config.go b/server/internal/config/config.go
index e5de89b..84e9240 100644
--- a/server/internal/config/config.go
+++ b/server/internal/config/config.go
@@ -204,13 +204,15 @@ func (c *Config) LegacyDynamicSQLitePath() string {
 	return fmt.Sprintf("%s_%s%s", base, c.ModelSafeName(), ext)
 }
 
-// ChromaDirForSlug returns the on-disk vector-store directory for a given
-// embedding-identity slug (see provider.StorageSlug). The slug namespaces
-// the chroma persist dir so vectors of different dimensions never share a
-// collection. The slug is computed by the caller from the ACTIVE
-// provider's ID(), keeping the model identity out of config.
-func (c *Config) ChromaDirForSlug(slug string) string {
-	return fmt.Sprintf("%s_%s", c.ChromaPersistDir, slug)
+// ChromaDirFor returns the on-disk vector-store directory for an embedding
+// identity expressed as nested path components (see
+// provider.Provider.StorageComponents): {kind, model-slug[, variant]}.
+// ChromaPersistDir is the container; each component is its own directory
+// level, so the provider kind can never collide with a model name that
+// normalises to a kind-looking slug. Empty components → the bare
+// container (callers guard against that).
+func (c *Config) ChromaDirFor(components []string) string {
+	return filepath.Join(append([]string{c.ChromaPersistDir}, components...)...)
 }
 
 // Load reads CIX_* environment variables and returns a populated Config.
diff --git a/server/internal/config/config_test.go b/server/internal/config/config_test.go
index 560920a..58415a8 100644
--- a/server/internal/config/config_test.go
+++ b/server/internal/config/config_test.go
@@ -1,6 +1,7 @@
 package config
 
 import (
+	"path/filepath"
 	"strings"
 	"testing"
 )
@@ -112,9 +113,10 @@ func TestLoadOverrides(t *testing.T) {
 	if got := c.LegacyDynamicSQLitePath(); got != "/tmp/test_test_model_name.db" {
 		t.Errorf("LegacyDynamicSQLitePath = %q", got)
 	}
-	// ChromaDirForSlug suffixes the chroma base with the given identity slug.
-	if got := c.ChromaDirForSlug("voyage_voyage_code_3_2048_float"); got != c.ChromaPersistDir+"_voyage_voyage_code_3_2048_float" {
-		t.Errorf("ChromaDirForSlug = %q", got)
+	// ChromaDirFor joins the identity path components under the chroma base.
+	comps := []string{"voyage", "voyage_code_3", "2048", "float"}
+	if got, want := c.ChromaDirFor(comps), filepath.Join(append([]string{c.ChromaPersistDir}, comps...)...); got != want {
+		t.Errorf("ChromaDirFor = %q, want %q", got, want)
 	}
 }
 
diff --git a/server/internal/embeddings/provider/ollama/provider.go b/server/internal/embeddings/provider/ollama/provider.go
index ef1efdf..9c7173a 100644
--- a/server/internal/embeddings/provider/ollama/provider.go
+++ b/server/internal/embeddings/provider/ollama/provider.go
@@ -112,6 +112,13 @@ func (p *Provider) Dimension() int { return 0 }
 // SupportsTokenize is true: llama-server exposes /tokenize.
 func (p *Provider) SupportsTokenize() bool { return true }
 
+// StorageComponents namespaces the vector store as ollama/<model-slug>.
+// Dimension is not part of the path: it is unknown at config time
+// (Dimension() == 0) and the model name already pins the GGUF/quant.
+func (p *Provider) StorageComponents() []string {
+	return []string{provider.KindOllama, provider.StorageSlug(p.cfg.Model)}
+}
+
 // Start resolves the GGUF path then spawns the supervisor. Blocks
 // until the readiness probe succeeds or ctx expires.
 func (p *Provider) Start(ctx context.Context) error {
diff --git a/server/internal/embeddings/provider/ollama/storagecomponents_test.go b/server/internal/embeddings/provider/ollama/storagecomponents_test.go
new file mode 100644
index 0000000..44e280d
--- /dev/null
+++ b/server/internal/embeddings/provider/ollama/storagecomponents_test.go
@@ -0,0 +1,29 @@
+package ollama
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestStorageComponents(t *testing.T) {
+	got := New(Config{Model: "nomic-embed-text"}, nil).StorageComponents()
+	if want := []string{"ollama", "nomic_embed_text"}; !reflect.DeepEqual(got, want) {
+		t.Errorf("StorageComponents = %v, want %v", got, want)
+	}
+}
+
+// TestStorageComponents_KindNotGluedToModel is the #8 anti-collision guard:
+// the provider kind is its own path segment, so a model literally named
+// "ollama-foo" (slug "ollama_foo") never shares a namespace with model
+// "foo". Under the old flat single-slug scheme both collapsed to
+// "chroma_ollama_foo".
+func TestStorageComponents_KindNotGluedToModel(t *testing.T) {
+	foo := New(Config{Model: "foo"}, nil).StorageComponents()
+	ollamaFoo := New(Config{Model: "ollama-foo"}, nil).StorageComponents()
+	if reflect.DeepEqual(foo, ollamaFoo) {
+		t.Fatalf("distinct models must not share a path: %v == %v", foo, ollamaFoo)
+	}
+	if got, want := ollamaFoo[len(ollamaFoo)-1], "ollama_foo"; got != want {
+		t.Errorf("model slug = %q, want %q", got, want)
+	}
+}
diff --git a/server/internal/embeddings/provider/openai/openai.go b/server/internal/embeddings/provider/openai/openai.go
index 0c081bb..d152c2c 100644
--- a/server/internal/embeddings/provider/openai/openai.go
+++ b/server/internal/embeddings/provider/openai/openai.go
@@ -79,6 +79,17 @@ func (p *Provider) ID() string {
 func (p *Provider) Dimension() int         { return p.cfg.Dimensions }
 func (p *Provider) SupportsTokenize() bool { return false }
 
+// StorageComponents namespaces the vector store as
+// openai/<model-slug>[/<dim>]. The dimension is a path segment only when
+// explicitly configured (Matryoshka shrink), mirroring ID().
+func (p *Provider) StorageComponents() []string {
+	comps := []string{provider.KindOpenAI, provider.StorageSlug(p.cfg.Model)}
+	if p.cfg.Dimensions > 0 {
+		comps = append(comps, strconv.Itoa(p.cfg.Dimensions))
+	}
+	return comps
+}
+
 // Start runs a one-shot connect test: embed a single short string.
 // Surfaces auth / network errors before the provider is wired into
 // the request path.
diff --git a/server/internal/embeddings/provider/openai/storagecomponents_test.go b/server/internal/embeddings/provider/openai/storagecomponents_test.go
new file mode 100644
index 0000000..295cee9
--- /dev/null
+++ b/server/internal/embeddings/provider/openai/storagecomponents_test.go
@@ -0,0 +1,18 @@
+package openai
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestStorageComponents(t *testing.T) {
+	base := New(Config{Model: "text-embedding-3-large"}, nil, nil).StorageComponents()
+	if want := []string{"openai", "text_embedding_3_large"}; !reflect.DeepEqual(base, want) {
+		t.Errorf("StorageComponents = %v, want %v", base, want)
+	}
+	// Explicit Matryoshka dimension becomes a trailing path segment.
+	dim := New(Config{Model: "text-embedding-3-large", Dimensions: 256}, nil, nil).StorageComponents()
+	if want := []string{"openai", "text_embedding_3_large", "256"}; !reflect.DeepEqual(dim, want) {
+		t.Errorf("StorageComponents (dim) = %v, want %v", dim, want)
+	}
+}
diff --git a/server/internal/embeddings/provider/provider.go b/server/internal/embeddings/provider/provider.go
index 0584bbc..9a251e6 100644
--- a/server/internal/embeddings/provider/provider.go
+++ b/server/internal/embeddings/provider/provider.go
@@ -107,6 +107,19 @@ type Provider interface {
 	// EmbedDocuments — callers must use SupportsTokenize() to decide
 	// whether to chunk inputs themselves.
 	TokenizeAndEmbed(ctx context.Context, texts []string) ([][]float32, error)
+
+	// StorageComponents returns the on-disk path components that
+	// namespace this provider's vector store, MOST-significant first:
+	// {kind, model-slug[, variant]}. Each component is already
+	// filesystem-safe (run through StorageSlug at the source). The
+	// vector-store dir is filepath.Join(ChromaPersistDir, components...).
+	//
+	// Crucially these are STRUCTURED fields the provider knows directly
+	// — never derived by re-parsing the flattened ID() — so the kind
+	// (always its own path segment) can never collide with a model name
+	// that happens to normalise to "ollama_…"/"voyage_…". That collision
+	// is what the flat single-slug scheme suffered from.
+	StorageComponents() []string
 }
 
 // State enumerates the dashboard-facing provider states surfaced via
diff --git a/server/internal/embeddings/provider/voyage/storagecomponents_test.go b/server/internal/embeddings/provider/voyage/storagecomponents_test.go
new file mode 100644
index 0000000..cfa82fe
--- /dev/null
+++ b/server/internal/embeddings/provider/voyage/storagecomponents_test.go
@@ -0,0 +1,18 @@
+package voyage
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestStorageComponents(t *testing.T) {
+	got := New(Config{Model: "voyage-code-3", OutputDimension: 2048, OutputDtype: "int8"}, nil, nil).StorageComponents()
+	if want := []string{"voyage", "voyage_code_3", "2048_int8"}; !reflect.DeepEqual(got, want) {
+		t.Errorf("StorageComponents = %v, want %v", got, want)
+	}
+	// Unset dimension → "auto" variant prefix (mirrors ID()).
+	auto := New(Config{Model: "voyage-3", OutputDtype: "float"}, nil, nil).StorageComponents()
+	if want := []string{"voyage", "voyage_3", "auto_float"}; !reflect.DeepEqual(auto, want) {
+		t.Errorf("StorageComponents (auto) = %v, want %v", auto, want)
+	}
+}
diff --git a/server/internal/embeddings/provider/voyage/voyage.go b/server/internal/embeddings/provider/voyage/voyage.go
index 91e0671..c43ea96 100644
--- a/server/internal/embeddings/provider/voyage/voyage.go
+++ b/server/internal/embeddings/provider/voyage/voyage.go
@@ -313,6 +313,19 @@ func (p *Provider) ID() string {
 func (p *Provider) Dimension() int         { return p.cfg.OutputDimension }
 func (p *Provider) SupportsTokenize() bool { return false }
 
+// StorageComponents namespaces the vector store as
+// voyage/<model-slug>/<dim>_<dtype>. dim+dtype share one variant segment
+// because both change vector identity and are known at config time;
+// mirrors the dim/dtype parts of ID() ("auto" when dimension is unset).
+func (p *Provider) StorageComponents() []string {
+	dimStr := "auto"
+	if p.cfg.OutputDimension > 0 {
+		dimStr = strconv.Itoa(p.cfg.OutputDimension)
+	}
+	variant := provider.StorageSlug(dimStr + "_" + p.cfg.OutputDtype)
+	return []string{provider.KindVoyage, provider.StorageSlug(p.cfg.Model), variant}
+}
+
 func (p *Provider) Start(ctx context.Context) error {
 	if p.cfg.Model == "" {
 		return errors.New("voyage: model is required")
diff --git a/server/internal/embeddings/service.go b/server/internal/embeddings/service.go
index c811e45..581b0dc 100644
--- a/server/internal/embeddings/service.go
+++ b/server/internal/embeddings/service.go
@@ -64,27 +64,27 @@ type Service struct {
 	// exercise the reopen path (SwitchProvider then only swaps the
 	// provider, matching the pre-unification behaviour).
 	vsHolder  *vectorstore.Holder
-	vsDirFor  func(slug string) string                     // cfg.ChromaDirForSlug
+	vsDirFor  func(components []string) string             // cfg.ChromaDirFor
 	vsOpener  func(dir string) (*vectorstore.Store, error) // vectorstore.Open
-	vsMigrate func() error                                 // legacy chroma-dir prefix migration (idempotent)
+	vsMigrate func() error                                 // legacy flat-chroma → nested migration (idempotent)
 }
 
 // AttachVectorStore wires the live vector-store reopen path used by
 // SwitchProvider. main.go calls it once after constructing the Service
 // and the shared Holder:
 //
-//	dirFor  — cfg.ChromaDirForSlug (maps a StorageSlug to an on-disk dir)
+//	dirFor  — cfg.ChromaDirFor (maps identity path components to a dir)
 //	opener  — vectorstore.Open
 //	migrate — optional idempotent legacy-dir migration run before each
 //	          reopen (lets a switch back to ollama on a pre-unification
-//	          box adopt its renamed dir without a restart); may be nil
+//	          box adopt its migrated dir without a restart); may be nil
 //
 // Passing the formula (dirFor) and opener as funcs keeps embeddings free
 // of a hard dependency on config path layout and avoids an
 // embeddings→storage import for the migration hook.
 func (s *Service) AttachVectorStore(
 	holder *vectorstore.Holder,
-	dirFor func(slug string) string,
+	dirFor func(components []string) string,
 	opener func(dir string) (*vectorstore.Store, error),
 	migrate func() error,
 ) {
@@ -99,21 +99,22 @@ func (s *Service) AttachVectorStore(
 	s.mu.Unlock()
 }
 
-// StorageSlug returns the filesystem slug of the ACTIVE provider's
-// identity (provider.StorageSlug(current.ID())), or "" when disabled /
-// not yet built. The dashboard's project-detail handler uses it to show
-// the live chroma directory.
-func (s *Service) StorageSlug() string {
+// StoragePath returns the ACTIVE provider's vector-store path components
+// (provider.Provider.StorageComponents), or nil when disabled / not yet
+// built. Callers join them under ChromaPersistDir via cfg.ChromaDirFor.
+// The dashboard's project-detail handler uses it to show the live chroma
+// directory.
+func (s *Service) StoragePath() []string {
 	if s == nil || s.disabled {
-		return ""
+		return nil
 	}
 	s.mu.RLock()
 	cur := s.current
 	s.mu.RUnlock()
 	if cur == nil {
-		return ""
+		return nil
 	}
-	return provider.StorageSlug(cur.ID())
+	return cur.StorageComponents()
 }
 
 // New constructs a Service from the env-derived config. The legacy
@@ -283,8 +284,9 @@ func stopProviderAsync(logger *slog.Logger, p provider.Provider) {
 }
 
 // reopenVectorStore opens a fresh *vectorstore.Store under the directory
-// derived from prov's identity slug and atomically swaps it into the
-// shared Holder. No-op when AttachVectorStore was never called (tests).
+// derived from prov's identity path components and atomically swaps it
+// into the shared Holder. No-op when AttachVectorStore was never called
+// (tests).
 func (s *Service) reopenVectorStore(prov provider.Provider) error {
 	s.mu.RLock()
 	holder, dirFor, opener, migrate := s.vsHolder, s.vsDirFor, s.vsOpener, s.vsMigrate
@@ -293,13 +295,14 @@ func (s *Service) reopenVectorStore(prov provider.Provider) error {
 		return nil // reopen path not wired (e.g. unit tests)
 	}
 	if migrate != nil {
-		// Idempotent legacy-dir prefixing — lets a switch back to ollama
-		// on a pre-unification box adopt its renamed dir without restart.
+		// Idempotent legacy flat→nested migration — lets a switch back to
+		// ollama on a pre-unification box adopt its migrated dir without
+		// a restart.
 		if err := migrate(); err != nil {
 			s.logger.Warn("embeddings: chroma legacy-dir migration failed during switch (continuing)", "err", err)
 		}
 	}
-	dir := dirFor(provider.StorageSlug(prov.ID()))
+	dir := dirFor(prov.StorageComponents())
 	newStore, err := opener(dir)
 	if err != nil {
 		s.logger.Error("embeddings: provider switched but vector store reopen failed; keeping previous store until restart",
diff --git a/server/internal/embeddings/switch_provider_test.go b/server/internal/embeddings/switch_provider_test.go
index 4e60583..969cd01 100644
--- a/server/internal/embeddings/switch_provider_test.go
+++ b/server/internal/embeddings/switch_provider_test.go
@@ -7,6 +7,7 @@ import (
 	"log/slog"
 	"os"
 	"path/filepath"
+	"strings"
 	"sync"
 	"testing"
 	"time"
@@ -17,7 +18,9 @@ import (
 )
 
 // fakeProv is a minimal provider.Provider for exercising the vector-store
-// reopen path; only ID() is consulted by reopenVectorStore / StorageSlug.
+// reopen path. StorageComponents derives the nested namespace by splitting
+// the id on ":" and slugging each field — enough to give distinct
+// providers distinct on-disk paths.
 type fakeProv struct{ id string }
 
 func (f fakeProv) Kind() string                                          { return "fake" }
@@ -35,13 +38,28 @@ func (f fakeProv) EmbedDocuments(context.Context, []string) ([][]float32, error)
 func (f fakeProv) TokenizeAndEmbed(context.Context, []string) ([][]float32, error) {
 	return nil, nil
 }
+func (f fakeProv) StorageComponents() []string {
+	parts := strings.Split(f.id, ":")
+	for i := range parts {
+		parts[i] = provider.StorageSlug(parts[i])
+	}
+	return parts
+}
 
 func quiet() *slog.Logger { return slog.New(slog.NewTextHandler(io.Discard, nil)) }
 
+// nestedDirFor mirrors cfg.ChromaDirFor: join the identity components under
+// a chroma container dir.
+func nestedDirFor(base string) func([]string) string {
+	return func(comps []string) string {
+		return filepath.Join(append([]string{base}, comps...)...)
+	}
+}
+
 // fakeFactory registers a test-only provider kind so SwitchProvider can be
 // driven end-to-end (it builds the new provider through the registry).
 // Build echoes the config bytes into the provider ID so the derived
-// storage slug is deterministic per test.
+// storage path is deterministic per test.
 type fakeFactory struct{}
 
 func (fakeFactory) Kind() string            { return "fake-switch" }
@@ -60,8 +78,9 @@ func init() { provider.Register(fakeFactory{}) }
 // queue is resumed so the service keeps serving.
 func TestSwitchProvider_RollbackOnReopenFailure(t *testing.T) {
 	dir := t.TempDir()
+	base := filepath.Join(dir, "chroma")
 	const project = "/proj"
-	initial, err := vectorstore.Open(filepath.Join(dir, "chroma_ollama_m"))
+	initial, err := vectorstore.Open(filepath.Join(base, "ollama", "m"))
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -76,7 +95,7 @@ func TestSwitchProvider_RollbackOnReopenFailure(t *testing.T) {
 	s := &Service{logger: quiet(), queue: NewQueue(2, time.Second), current: oldProv}
 	s.AttachVectorStore(
 		holder,
-		func(slug string) string { return filepath.Join(dir, "chroma_"+slug) },
+		nestedDirFor(base),
 		func(string) (*vectorstore.Store, error) { return nil, errors.New("boom") }, // reopen always fails
 		nil,
 	)
@@ -109,8 +128,9 @@ func TestSwitchProvider_RollbackOnReopenFailure(t *testing.T) {
 // (empty) namespace, and the queue ends unblocked.
 func TestSwitchProvider_SuccessSwapsProviderAndStore(t *testing.T) {
 	dir := t.TempDir()
+	base := filepath.Join(dir, "chroma")
 	const project = "/proj"
-	initial, err := vectorstore.Open(filepath.Join(dir, "chroma_ollama_m"))
+	initial, err := vectorstore.Open(filepath.Join(base, "ollama", "m"))
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -122,12 +142,7 @@ func TestSwitchProvider_SuccessSwapsProviderAndStore(t *testing.T) {
 	holder := vectorstore.NewHolder(initial)
 
 	s := &Service{logger: quiet(), queue: NewQueue(2, time.Second), current: fakeProv{id: "ollama:m"}}
-	s.AttachVectorStore(
-		holder,
-		func(slug string) string { return filepath.Join(dir, "chroma_"+slug) },
-		vectorstore.Open,
-		nil,
-	)
+	s.AttachVectorStore(holder, nestedDirFor(base), vectorstore.Open, nil)
 
 	if err := s.SwitchProvider(context.Background(), "fake-switch", []byte("newid")); err != nil {
 		t.Fatalf("switch: %v", err)
@@ -141,6 +156,10 @@ func TestSwitchProvider_SuccessSwapsProviderAndStore(t *testing.T) {
 	if got := holder.Count(project); got != 0 {
 		t.Errorf("after switch holder Count = %d, want 0 (new empty namespace)", got)
 	}
+	// New provider's nested namespace was created on disk.
+	if !dirExists(filepath.Join(base, "fake", "newid")) {
+		t.Errorf("new nested chroma dir chroma/fake/newid should exist")
+	}
 	if err := s.queue.Acquire(context.Background()); err != nil {
 		t.Errorf("queue should be unblocked after switch, Acquire = %v", err)
 	} else {
@@ -148,26 +167,28 @@ func TestSwitchProvider_SuccessSwapsProviderAndStore(t *testing.T) {
 	}
 }
 
-func TestServiceStorageSlug(t *testing.T) {
+func TestServiceStoragePath(t *testing.T) {
 	s := &Service{logger: quiet(), current: fakeProv{id: "voyage:voyage-code-3:2048:float"}}
-	if got := s.StorageSlug(); got != "voyage_voyage_code_3_2048_float" {
-		t.Errorf("StorageSlug = %q", got)
+	if got := strings.Join(s.StoragePath(), "/"); got != "voyage/voyage_code_3/2048/float" {
+		t.Errorf("StoragePath = %q", got)
 	}
 	// Disabled / no provider → empty.
-	if got := (&Service{logger: quiet(), disabled: true}).StorageSlug(); got != "" {
-		t.Errorf("disabled StorageSlug = %q, want empty", got)
+	if got := (&Service{logger: quiet(), disabled: true}).StoragePath(); len(got) != 0 {
+		t.Errorf("disabled StoragePath = %v, want empty", got)
 	}
-	if got := (&Service{logger: quiet()}).StorageSlug(); got != "" {
-		t.Errorf("nil-provider StorageSlug = %q, want empty", got)
+	if got := (&Service{logger: quiet()}).StoragePath(); len(got) != 0 {
+		t.Errorf("nil-provider StoragePath = %v, want empty", got)
 	}
 }
 
 func TestReopenVectorStore_SwapsToNewNamespace(t *testing.T) {
 	dir := t.TempDir()
+	base := filepath.Join(dir, "chroma")
 	const project = "/proj"
 
-	// Initial store has one chunk for the project.
-	initial, err := vectorstore.Open(filepath.Join(dir, "chroma_ollama_m"))
+	// Initial store has one chunk for the project, under ollama's namespace.
+	oldDir := filepath.Join(base, "ollama", "m")
+	initial, err := vectorstore.Open(oldDir)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -182,12 +203,7 @@ func TestReopenVectorStore_SwapsToNewNamespace(t *testing.T) {
 	}
 
 	s := &Service{logger: quiet()}
-	s.AttachVectorStore(
-		holder,
-		func(slug string) string { return filepath.Join(dir, "chroma_"+slug) },
-		vectorstore.Open,
-		nil,
-	)
+	s.AttachVectorStore(holder, nestedDirFor(base), vectorstore.Open, nil)
 
 	// Switch to a new identity → reopen into a fresh, empty namespace.
 	if err := s.reopenVectorStore(fakeProv{id: "voyage:voyage-code-3:2048:float"}); err != nil {
@@ -196,19 +212,20 @@ func TestReopenVectorStore_SwapsToNewNamespace(t *testing.T) {
 	if got := holder.Count(project); got != 0 {
 		t.Errorf("after reopen Count = %d, want 0 (new empty namespace)", got)
 	}
-	// New dir created on disk; old dir still present (reuse on switch back).
-	if !dirExists(filepath.Join(dir, "chroma_voyage_voyage_code_3_2048_float")) {
-		t.Errorf("new chroma dir should exist")
+	// New nested dir created on disk; old dir still present (reuse on switch back).
+	if !dirExists(filepath.Join(base, "voyage", "voyage_code_3", "2048", "float")) {
+		t.Errorf("new nested chroma dir should exist")
 	}
-	if !dirExists(filepath.Join(dir, "chroma_ollama_m")) {
+	if !dirExists(oldDir) {
 		t.Errorf("old chroma dir should be preserved")
 	}
 }
 
 func TestReopenVectorStore_OpenerFailureKeepsOldStore(t *testing.T) {
 	dir := t.TempDir()
+	base := filepath.Join(dir, "chroma")
 	const project = "/proj"
-	initial, err := vectorstore.Open(filepath.Join(dir, "chroma_ollama_m"))
+	initial, err := vectorstore.Open(filepath.Join(base, "ollama", "m"))
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -222,7 +239,7 @@ func TestReopenVectorStore_OpenerFailureKeepsOldStore(t *testing.T) {
 	s := &Service{logger: quiet()}
 	s.AttachVectorStore(
 		holder,
-		func(slug string) string { return filepath.Join(dir, "chroma_"+slug) },
+		nestedDirFor(base),
 		func(string) (*vectorstore.Store, error) { return nil, errors.New("boom") },
 		nil,
 	)
diff --git a/server/internal/httpapi/server.go b/server/internal/httpapi/server.go
index 8048611..0276bbc 100644
--- a/server/internal/httpapi/server.go
+++ b/server/internal/httpapi/server.go
@@ -261,11 +261,11 @@ func (s *Server) enrichProjectStorage(out *openapi.Project, p *projects.Project)
 			out.SqliteSizeBytes = &sz
 		}
 	}
-	// Chroma dir is namespaced by the ACTIVE provider's identity slug, so
+	// Chroma dir is namespaced by the ACTIVE provider's identity path, so
 	// the displayed path tracks whatever provider is live now.
-	if slug := es.StorageSlug(); cfg.ChromaPersistDir != "" && slug != "" {
+	if comps := es.StoragePath(); cfg.ChromaPersistDir != "" && len(comps) > 0 {
 		col := vectorstore.CollectionName(p.HostPath)
-		dir := filepath.Join(cfg.ChromaDirForSlug(slug), col)
+		dir := filepath.Join(cfg.ChromaDirFor(comps), col)
 		out.ChromaPath = ptrString(dir)
 		if sz, ok := dirSizeBytes(dir); ok {
 			out.ChromaSizeBytes = &sz
diff --git a/server/internal/storage/chromamigrate.go b/server/internal/storage/chromamigrate.go
index 6d99185..78ae18e 100644
--- a/server/internal/storage/chromamigrate.go
+++ b/server/internal/storage/chromamigrate.go
@@ -10,52 +10,49 @@ import (
 	"github.com/dvcdsys/code-index/server/internal/embeddings/provider"
 )
 
-// knownProviderPrefixes are the StorageSlug prefixes a vector-store dir
-// gets under the unified scheme (provider.StorageSlug of an ID always
-// starts with "<kind>_"). A legacy dir lacking any of these was produced
-// by the old model-only naming, which only ever ran the ollama sidecar.
-var knownProviderPrefixes = []string{"ollama_", "openai_", "voyage_"}
-
-// PrefixLegacyChromaDirs renames legacy, un-prefixed vector-store
-// directories to the unified "ollama_"-prefixed form so existing ollama
-// vectors survive the switch to provider-identity namespacing WITHOUT a
-// reindex. Heuristic: a dir named "<base>_<X>" where X carries no known
-// provider prefix was written by the pre-unification ollama-only build,
-// so its true identity is "ollama:<model>" → "<base>_ollama_<slug(X)>".
+// MigrateFlatChromaToNested moves pre-unification FLAT vector-store dirs
+// into the unified NESTED layout. Old ollama-only builds wrote one dir per
+// model as a flat sibling of the chroma container:
+//
+//	<base>_<ModelSafeName>      e.g.  /data/chroma_nomic_embed_text
+//
+// The unified scheme namespaces by provider identity as nested dirs INSIDE
+// the container, one path segment per identity field:
+//
+//	<base>/<kind>/<model-slug>[/<variant>]   e.g.  /data/chroma/ollama/nomic_embed_text
 //
-// The legacy suffix X was produced by the old Config.ModelSafeName (which
-// only mapped '/' and '-' to '_'), but the running server resolves the
-// dir via provider.StorageSlug, which maps EVERY non-[a-z0-9_] rune. For
-// model names containing characters the two normalizers treat differently
-// (e.g. a '.' or ':' in "nomic-embed-text:v1.5"), a naive "<base>_ollama_"+X
-// would NOT equal the dir the server opens, silently orphaning the vectors.
-// We therefore re-run the suffix through provider.StorageSlug. This is
-// exact for ALL models because StorageSlug(ModelSafeName(m)) ==
-// StorageSlug(m): ModelSafeName only collapses a subset ('/','-') of the
-// runes StorageSlug collapses, and '_' is preserved by StorageSlug, so the
-// two compose to the same canonical form.
+// Because the old build only ever ran ollama, EVERY flat sibling is a
+// legacy ollama model dir — there is no prefix-guessing (that ambiguity is
+// exactly what the nested layout removes: the provider kind is now its own
+// directory level, never glued onto the model slug). We move each
+// <base>_<X> → <base>/ollama/<StorageSlug(X)>/.
 //
-// chromaBase is cfg.ChromaPersistDir (e.g. ".../chroma"); legacy dirs sit
-// next to it as ".../chroma_<slug>". The scan covers ALL such dirs, not
-// just the active provider's: the active provider may be voyage (no legacy
-// dir to migrate) while the operator's ollama vectors wait under the
-// un-prefixed name for a future switch back.
+// StorageSlug(ModelSafeName(m)) == StorageSlug(m) (ModelSafeName collapses
+// only a subset — '/','-' — of the runes StorageSlug collapses, and '_' is
+// preserved), so the destination matches the dir the running server
+// resolves for "ollama:<model>" — even for a model whose name normalises to
+// a kind-looking slug, e.g. "openai-community/x" → ollama/openai_community_x
+// (correct), where the old flat scheme silently orphaned it.
 //
-// Idempotent: already-prefixed dirs are skipped; a rename whose target
-// already exists is skipped with a warning (never clobbers).
+// Idempotent: once moved, the dirs live inside <base>/ and no longer match
+// the flat sibling pattern, so a re-run is a cheap no-op. The caller is
+// responsible for first relocating any legacy Python ChromaDB store that
+// occupies <base> itself (vectorstore.DetectLegacyAndBackup), so this can
+// safely use <base> as the container.
 //
-// LEGACY-MIGRATION (remove next release): one-time prefixing shim for
-// pre-unification ollama-only chroma dirs. Once every deployment has
-// booted on the unified layout, delete this function and its calls in
-// cmd/cix-server/main.go and embeddings.Service (the AttachVectorStore
-// migrate hook).
-func PrefixLegacyChromaDirs(chromaBase string, logger *slog.Logger) error {
+// LEGACY-MIGRATION (remove next release): one-time shim. Drop once every
+// deployment has booted on the nested layout.
+func MigrateFlatChromaToNested(chromaBase string, logger *slog.Logger) error {
 	if logger == nil {
 		logger = slog.Default()
 	}
 	if chromaBase == "" {
 		return nil
 	}
+	if err := os.MkdirAll(chromaBase, 0o755); err != nil {
+		return fmt.Errorf("create chroma container %s: %w", chromaBase, err)
+	}
+
 	parent := filepath.Dir(chromaBase)
 	prefix := filepath.Base(chromaBase) + "_" // e.g. "chroma_"
 
@@ -69,43 +66,37 @@ func PrefixLegacyChromaDirs(chromaBase string, logger *slog.Logger) error {
 
 	for _, e := range entries {
 		if !e.IsDir() {
-			continue // skip files like chroma_*.python-backup.* tarballs
+			continue // skip files (e.g. *.tar backups)
 		}
 		name := e.Name()
 		if !strings.HasPrefix(name, prefix) {
 			continue
 		}
+		if strings.Contains(name, ".python-backup.") {
+			continue // a backup DetectLegacyAndBackup created — leave it
+		}
 		suffix := strings.TrimPrefix(name, prefix)
 		if suffix == "" {
-			continue // exactly the base dir name + trailing "_": ignore
-		}
-		if hasKnownPrefix(suffix) {
-			continue // already migrated / native unified dir
+			continue // exactly the base name + trailing "_"
 		}
+
 		src := filepath.Join(parent, name)
-		// Canonicalise the suffix through StorageSlug so the renamed dir
-		// matches the path the running server resolves for this identity
-		// (handles model names with '.'/':' etc.; see func doc).
-		dst := filepath.Join(parent, prefix+"ollama_"+provider.StorageSlug(suffix))
+		// Every flat sibling is a legacy ollama model dir. Canonicalise the
+		// suffix through StorageSlug so it matches the path the server
+		// resolves for this identity ("ollama:<model>").
+		dst := filepath.Join(chromaBase, provider.KindOllama, provider.StorageSlug(suffix))
 		if fileExists(dst) {
-			logger.Warn("storage: skipping chroma legacy-prefix rename, target already exists",
+			logger.Warn("storage: skipping flat→nested chroma move, target already exists (no clobber)",
 				"src", src, "dst", dst)
 			continue
 		}
-		logger.Info("storage: prefixing legacy ollama chroma dir to unified naming",
-			"src", src, "dst", dst)
+		if err := os.MkdirAll(filepath.Dir(dst), 0o755); err != nil {
+			return fmt.Errorf("create nested parent %s: %w", filepath.Dir(dst), err)
+		}
+		logger.Info("storage: migrating legacy flat chroma dir to nested layout", "src", src, "dst", dst)
 		if err := os.Rename(src, dst); err != nil {
 			return fmt.Errorf("rename %s -> %s: %w", src, dst, err)
 		}
 	}
 	return nil
 }
-
-func hasKnownPrefix(suffix string) bool {
-	for _, p := range knownProviderPrefixes {
-		if strings.HasPrefix(suffix, p) {
-			return true
-		}
-	}
-	return false
-}
diff --git a/server/internal/storage/chromamigrate_test.go b/server/internal/storage/chromamigrate_test.go
index f8929a2..d7ccd4c 100644
--- a/server/internal/storage/chromamigrate_test.go
+++ b/server/internal/storage/chromamigrate_test.go
@@ -13,83 +13,90 @@ func mkdir(t *testing.T, path string) {
 	}
 }
 
-func TestPrefixLegacyChromaDirs(t *testing.T) {
+func TestMigrateFlatChromaToNested(t *testing.T) {
 	dir := t.TempDir()
-	base := filepath.Join(dir, "chroma") // chromaBase; dirs are chroma_<slug>
+	base := filepath.Join(dir, "chroma") // container; flat legacy dirs are chroma_<X>
 
-	// Legacy un-prefixed ollama dir (should be renamed).
+	// Legacy flat ollama dir (should move into chroma/ollama/<slug>).
 	mkdir(t, base+"_awhiteside_coderankembed_q8_0_gguf")
-	// Already-prefixed dirs (must be left untouched).
-	mkdir(t, base+"_voyage_voyage_code_3_2048_float")
-	mkdir(t, base+"_ollama_already")
 	// A file that matches the prefix but is not a dir (ignored).
 	if err := os.WriteFile(base+"_backup.tar", []byte("x"), 0o644); err != nil {
 		t.Fatal(err)
 	}
 
-	if err := PrefixLegacyChromaDirs(base, quietLogger()); err != nil {
+	if err := MigrateFlatChromaToNested(base, quietLogger()); err != nil {
 		t.Fatalf("migrate: %v", err)
 	}
 
-	// Legacy renamed to ollama-prefixed.
 	if dirExists(base + "_awhiteside_coderankembed_q8_0_gguf") {
-		t.Errorf("legacy un-prefixed dir should have been renamed away")
+		t.Errorf("legacy flat dir should have been moved away")
 	}
-	if !dirExists(base + "_ollama_awhiteside_coderankembed_q8_0_gguf") {
-		t.Errorf("expected renamed dir chroma_ollama_awhiteside_coderankembed_q8_0_gguf")
+	if !dirExists(filepath.Join(base, "ollama", "awhiteside_coderankembed_q8_0_gguf")) {
+		t.Errorf("expected nested dir chroma/ollama/awhiteside_coderankembed_q8_0_gguf")
 	}
-	// Already-prefixed untouched.
-	if !dirExists(base + "_voyage_voyage_code_3_2048_float") {
-		t.Errorf("voyage dir must be left untouched")
-	}
-	if !dirExists(base + "_ollama_already") {
-		t.Errorf("ollama-prefixed dir must be left untouched")
-	}
-	// File untouched.
 	if !fileExists(base + "_backup.tar") {
 		t.Errorf("non-dir entry must be ignored, not moved")
 	}
 }
 
-// TestPrefixLegacyChromaDirs_StrictNormalizesSpecialChars guards Finding 1:
-// the legacy suffix was written by ModelSafeName (only '/'->'_' and
-// '-'->'_'), so a model like "nomic-embed-text:v1.5" left a dir whose name
-// still held a '.'/':'. The running server resolves the dir via
-// provider.StorageSlug (every non-[a-z0-9_] -> '_'), so the migration must
-// canonicalise the suffix the same way or the vectors are silently orphaned.
-func TestPrefixLegacyChromaDirs_StrictNormalizesSpecialChars(t *testing.T) {
+// TestMigrateFlatChromaToNested_KindLookingModelName is the #8 regression:
+// a legacy ollama model whose name normalises to a known-kind-looking slug
+// (e.g. "openai-community/x" → "openai_community_x") was SKIPPED by the old
+// prefix heuristic and silently orphaned. With the nested layout the
+// provider kind is its own path segment, so the dir is correctly moved to
+// chroma/ollama/openai_community_x — exactly where the server resolves
+// "ollama:openai-community/x".
+func TestMigrateFlatChromaToNested_KindLookingModelName(t *testing.T) {
+	dir := t.TempDir()
+	base := filepath.Join(dir, "chroma")
+	mkdir(t, base+"_openai_community_x") // ModelSafeName("openai-community/x")
+
+	if err := MigrateFlatChromaToNested(base, quietLogger()); err != nil {
+		t.Fatalf("migrate: %v", err)
+	}
+	if !dirExists(filepath.Join(base, "ollama", "openai_community_x")) {
+		t.Errorf("kind-looking legacy ollama dir must move to chroma/ollama/openai_community_x, not be skipped")
+	}
+	if dirExists(base + "_openai_community_x") {
+		t.Errorf("legacy flat dir should have been moved away")
+	}
+}
+
+// TestMigrateFlatChromaToNested_StrictNormalizesSpecialChars: the legacy
+// suffix was written by ModelSafeName (only '/'->'_' and '-'->'_'), so a
+// model like "nomic-embed-text:v1.5" left a dir holding a '.'/':'. The
+// running server resolves via StorageSlug (every non-[a-z0-9_] -> '_'), so
+// the migration must canonicalise identically or the vectors are orphaned.
+func TestMigrateFlatChromaToNested_StrictNormalizesSpecialChars(t *testing.T) {
 	dir := t.TempDir()
 	base := filepath.Join(dir, "chroma")
-	// Legacy dir as ModelSafeName would have produced for a v1.5 model:
-	// the '.' survives in the on-disk name.
-	mkdir(t, base+"_nomic_embed_text_v1.5")
+	mkdir(t, base+"_nomic_embed_text_v1.5") // '.' survives in the legacy name
 
-	if err := PrefixLegacyChromaDirs(base, quietLogger()); err != nil {
+	if err := MigrateFlatChromaToNested(base, quietLogger()); err != nil {
 		t.Fatalf("migrate: %v", err)
 	}
-	// Must land on the StorageSlug form the server actually opens ('.'->'_').
-	if !dirExists(base + "_ollama_nomic_embed_text_v1_5") {
-		t.Errorf("expected strict-normalized dir chroma_ollama_nomic_embed_text_v1_5")
+	if !dirExists(filepath.Join(base, "ollama", "nomic_embed_text_v1_5")) {
+		t.Errorf("expected strict-normalized nested dir chroma/ollama/nomic_embed_text_v1_5")
 	}
 	if dirExists(base + "_nomic_embed_text_v1.5") {
-		t.Errorf("legacy dir should have been renamed away")
+		t.Errorf("legacy dir should have been moved away")
 	}
 }
 
-func TestPrefixLegacyChromaDirs_NoClobber(t *testing.T) {
+func TestMigrateFlatChromaToNested_NoClobber(t *testing.T) {
 	dir := t.TempDir()
 	base := filepath.Join(dir, "chroma")
-	// Both the legacy source and its would-be target already exist.
+	// Both the legacy source and its would-be nested target already exist.
 	mkdir(t, base+"_model_x")
-	mkdir(t, base+"_ollama_model_x")
+	mkdir(t, filepath.Join(base, "ollama", "model_x"))
 	if err := os.WriteFile(filepath.Join(base+"_model_x", "marker"), []byte("src"), 0o644); err != nil {
 		t.Fatal(err)
 	}
 
-	if err := PrefixLegacyChromaDirs(base, quietLogger()); err != nil {
+	if err := MigrateFlatChromaToNested(base, quietLogger()); err != nil {
 		t.Fatalf("migrate: %v", err)
 	}
-	// Source left in place (not clobbered into existing target).
+	// Source left in place (not clobbered into the existing target).
 	if !dirExists(base + "_model_x") {
 		t.Errorf("source dir must be preserved when target already exists")
 	}
@@ -98,21 +105,39 @@ func TestPrefixLegacyChromaDirs_NoClobber(t *testing.T) {
 	}
 }
 
-func TestPrefixLegacyChromaDirs_Idempotent(t *testing.T) {
+func TestMigrateFlatChromaToNested_Idempotent(t *testing.T) {
 	dir := t.TempDir()
 	base := filepath.Join(dir, "chroma")
 	mkdir(t, base+"_legacy_model")
 
 	for i := 0; i < 2; i++ {
-		if err := PrefixLegacyChromaDirs(base, quietLogger()); err != nil {
+		if err := MigrateFlatChromaToNested(base, quietLogger()); err != nil {
 			t.Fatalf("run %d: %v", i, err)
 		}
 	}
-	if !dirExists(base + "_ollama_legacy_model") {
-		t.Errorf("expected ollama-prefixed dir after idempotent runs")
+	if !dirExists(filepath.Join(base, "ollama", "legacy_model")) {
+		t.Errorf("expected nested dir after idempotent runs")
 	}
 	if dirExists(base + "_legacy_model") {
-		t.Errorf("legacy dir should be gone after first run")
+		t.Errorf("legacy flat dir should be gone after first run")
+	}
+}
+
+func TestMigrateFlatChromaToNested_IgnoresPythonBackup(t *testing.T) {
+	dir := t.TempDir()
+	base := filepath.Join(dir, "chroma")
+	backup := base + "_model.python-backup.20250101-000000"
+	mkdir(t, backup)
+
+	if err := MigrateFlatChromaToNested(base, quietLogger()); err != nil {
+		t.Fatalf("migrate: %v", err)
+	}
+	// A python-backup dir must be left exactly where it is.
+	if !dirExists(backup) {
+		t.Errorf("python-backup dir must not be moved")
+	}
+	if dirExists(filepath.Join(base, "ollama")) {
+		t.Errorf("python-backup dir must not be treated as a legacy ollama namespace")
 	}
 }
 
diff --git a/server/internal/vectorstore/migrate.go b/server/internal/vectorstore/migrate.go
index 17cccc4..cc6a4eb 100644
--- a/server/internal/vectorstore/migrate.go
+++ b/server/internal/vectorstore/migrate.go
@@ -2,10 +2,8 @@ package vectorstore
 
 import (
 	"fmt"
-	"log/slog"
 	"os"
 	"path/filepath"
-	"strings"
 	"time"
 )
 
@@ -32,57 +30,3 @@ func DetectLegacyAndBackup(dir string) (backed bool, err error) {
 	}
 	return true, nil
 }
-
-// WarnIfNamespaceOrphaned logs a prominent warning when the active vector-
-// store directory (activeDir) does not exist yet but sibling chroma
-// namespace dirs do. That pattern means vectors were previously indexed
-// under a different provider identity (or the pre-unification layout) and
-// the active provider will start empty — so search silently returns
-// nothing on an otherwise-healthy server. Surfacing it turns a silent
-// data-orphan into an actionable log line: the operator can switch back to
-// the prior provider or reindex. Best-effort; returns an error only when
-// the directory scan itself fails.
-func WarnIfNamespaceOrphaned(chromaBase, activeDir string, logger *slog.Logger) error {
-	if logger == nil {
-		logger = slog.Default()
-	}
-	if chromaBase == "" {
-		return nil
-	}
-	// Active namespace already present → nothing orphaned.
-	if _, err := os.Stat(activeDir); err == nil {
-		return nil
-	} else if !os.IsNotExist(err) {
-		return err
-	}
-
-	parent := filepath.Dir(chromaBase)
-	prefix := filepath.Base(chromaBase) + "_"
-	activeName := filepath.Base(activeDir)
-	entries, err := os.ReadDir(parent)
-	if err != nil {
-		if os.IsNotExist(err) {
-			return nil // nothing indexed yet
-		}
-		return err
-	}
-	var siblings []string
-	for _, e := range entries {
-		if !e.IsDir() {
-			continue
-		}
-		name := e.Name()
-		if name == activeName || !strings.HasPrefix(name, prefix) {
-			continue
-		}
-		if strings.Contains(name, ".python-backup.") {
-			continue // already-backed-up legacy layout, handled separately
-		}
-		siblings = append(siblings, name)
-	}
-	if len(siblings) > 0 {
-		logger.Warn("active embedding-provider vector namespace is empty but other namespaces exist — prior vectors are under a different provider identity/layout; switch back to the prior provider or reindex to populate this one",
-			"active_dir", activeName, "other_namespaces", siblings)
-	}
-	return nil
-}
diff --git a/server/internal/vectorstore/migrate_test.go b/server/internal/vectorstore/migrate_test.go
deleted file mode 100644
index 8abd519..0000000
--- a/server/internal/vectorstore/migrate_test.go
+++ /dev/null
@@ -1,82 +0,0 @@
-package vectorstore
-
-import (
-	"bytes"
-	"log/slog"
-	"os"
-	"path/filepath"
-	"strings"
-	"testing"
-)
-
-func TestWarnIfNamespaceOrphaned(t *testing.T) {
-	mkdir := func(t *testing.T, p string) {
-		t.Helper()
-		if err := os.MkdirAll(p, 0o755); err != nil {
-			t.Fatal(err)
-		}
-	}
-
-	t.Run("warns when active namespace empty but siblings exist", func(t *testing.T) {
-		root := t.TempDir()
-		base := filepath.Join(root, "chroma")
-		// A prior provider's namespace exists on disk; the active one does not.
-		mkdir(t, base+"_ollama_m")
-		active := base + "_voyage_voyage_code_3_2048_float"
-
-		var buf bytes.Buffer
-		logger := slog.New(slog.NewTextHandler(&buf, nil))
-		if err := WarnIfNamespaceOrphaned(base, active, logger); err != nil {
-			t.Fatalf("WarnIfNamespaceOrphaned: %v", err)
-		}
-		out := buf.String()
-		if !strings.Contains(out, "other namespaces") || !strings.Contains(out, "chroma_ollama_m") {
-			t.Errorf("expected an orphan warning naming the sibling dir, got:\n%s", out)
-		}
-	})
-
-	t.Run("silent when active namespace already present", func(t *testing.T) {
-		root := t.TempDir()
-		base := filepath.Join(root, "chroma")
-		active := base + "_voyage_x"
-		mkdir(t, active)           // active dir exists
-		mkdir(t, base+"_ollama_m") // and a sibling too — still must not warn
-
-		var buf bytes.Buffer
-		if err := WarnIfNamespaceOrphaned(base, active, slog.New(slog.NewTextHandler(&buf, nil))); err != nil {
-			t.Fatalf("WarnIfNamespaceOrphaned: %v", err)
-		}
-		if buf.Len() != 0 {
-			t.Errorf("expected no log output when active dir present, got:\n%s", buf.String())
-		}
-	})
-
-	t.Run("silent on a fresh install with no namespaces", func(t *testing.T) {
-		root := t.TempDir()
-		base := filepath.Join(root, "chroma")
-		active := base + "_ollama_m"
-
-		var buf bytes.Buffer
-		if err := WarnIfNamespaceOrphaned(base, active, slog.New(slog.NewTextHandler(&buf, nil))); err != nil {
-			t.Fatalf("WarnIfNamespaceOrphaned: %v", err)
-		}
-		if buf.Len() != 0 {
-			t.Errorf("expected no log output on fresh install, got:\n%s", buf.String())
-		}
-	})
-
-	t.Run("ignores python-backup dirs as siblings", func(t *testing.T) {
-		root := t.TempDir()
-		base := filepath.Join(root, "chroma")
-		mkdir(t, base+"_ollama_m.python-backup.20250101-000000")
-		active := base + "_voyage_x"
-
-		var buf bytes.Buffer
-		if err := WarnIfNamespaceOrphaned(base, active, slog.New(slog.NewTextHandler(&buf, nil))); err != nil {
-			t.Fatalf("WarnIfNamespaceOrphaned: %v", err)
-		}
-		if buf.Len() != 0 {
-			t.Errorf("python-backup dir should not count as an orphaned namespace, got:\n%s", buf.String())
-		}
-	})
-}
diff --git a/server/internal/vectorstore/store.go b/server/internal/vectorstore/store.go
index dbc5584..374b3af 100644
--- a/server/internal/vectorstore/store.go
+++ b/server/internal/vectorstore/store.go
@@ -65,7 +65,7 @@ func collectionName(projectPath string) string {
 
 // CollectionName is the exported alias for the per-project chromem-go
 // collection identifier. The dashboard's project-detail card uses it to
-// resolve the on-disk directory under cfg.ChromaDirForSlug(activeSlug).
+// resolve the on-disk directory under cfg.ChromaDirFor(activeComponents).
 func CollectionName(projectPath string) string { return collectionName(projectPath) }
 
 // docID format: "{md5hex(filePath)[:12]}:{startLine}-{endLine}:{idx}"

From 2a9e84889e7872bb6f67dcc9a2b0f25fa505c50f Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Tue, 2 Jun 2026 15:52:30 +0100
Subject: [PATCH 33/34] fix(embeddings): guard /test against ollama; cover
 embeddingscfg with tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-merge review fixes for the pluggable-provider work:

- httpapi: reject kind=ollama on POST /admin/embedding-providers/{kind}/test.
  Start()ing a throw-away ollama provider would spawn a second llama-server
  child on the live sidecar's socket. The dashboard never tests ollama (it is
  configured via the runtime-config form), so fail fast with 400 instead of
  risking a competing sidecar from an ad-hoc admin call. Adds a gating test.

- embeddingscfg: add the previously-missing test suite for the provider
  persistence layer boot reads to pick the active provider — Get on a fresh
  DB, Save insert + update round-trips, empty-config, and the empty-kind /
  invalid-JSON rejections.

go vet ./... · go build ./... · go test ./internal/{embeddingscfg,httpapi}/... all pass.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .../embeddingscfg/embeddingscfg_test.go       | 122 ++++++++++++++++++
 server/internal/httpapi/admin_embeddings.go   |  12 ++
 .../internal/httpapi/admin_embeddings_test.go |  19 +++
 3 files changed, 153 insertions(+)
 create mode 100644 server/internal/embeddingscfg/embeddingscfg_test.go

diff --git a/server/internal/embeddingscfg/embeddingscfg_test.go b/server/internal/embeddingscfg/embeddingscfg_test.go
new file mode 100644
index 0000000..fcde989
--- /dev/null
+++ b/server/internal/embeddingscfg/embeddingscfg_test.go
@@ -0,0 +1,122 @@
+package embeddingscfg
+
+import (
+	"context"
+	"encoding/json"
+	"testing"
+
+	"github.com/dvcdsys/code-index/server/internal/db"
+)
+
+// openTestService opens an in-memory DB (all migrations applied, including
+// migration 12 which adds the embedding_provider columns) and wraps it.
+func openTestService(t *testing.T) *Service {
+	t.Helper()
+	d, err := db.Open(":memory:")
+	if err != nil {
+		t.Fatalf("db.Open: %v", err)
+	}
+	t.Cleanup(func() { d.Close() })
+	return New(d)
+}
+
+// TestGet_FreshDB_NoRow covers the "no provider persisted yet" path: the
+// runtime_settings row has no embedding_provider, so Get reports has=false
+// and the caller is expected to seed from env.
+func TestGet_FreshDB_NoRow(t *testing.T) {
+	s := openTestService(t)
+	snap, has, err := s.Get(context.Background())
+	if err != nil {
+		t.Fatalf("Get: %v", err)
+	}
+	if has {
+		t.Fatalf("has = true on a fresh DB, want false (snap=%+v)", snap)
+	}
+}
+
+// TestSave_Insert_ThenGet covers the INSERT branch (no row yet) and a
+// round-trip read of kind + config.
+func TestSave_Insert_ThenGet(t *testing.T) {
+	s := openTestService(t)
+	cfg := json.RawMessage(`{"model":"voyage-code-3","output_dimension":2048}`)
+	if err := s.Save(context.Background(), Snapshot{Kind: "voyage", Config: cfg}, "admin-1"); err != nil {
+		t.Fatalf("Save: %v", err)
+	}
+	snap, has, err := s.Get(context.Background())
+	if err != nil {
+		t.Fatalf("Get: %v", err)
+	}
+	if !has {
+		t.Fatalf("has = false after Save, want true")
+	}
+	if snap.Kind != "voyage" {
+		t.Errorf("Kind = %q, want voyage", snap.Kind)
+	}
+	if string(snap.Config) != string(cfg) {
+		t.Errorf("Config = %q, want %q", snap.Config, cfg)
+	}
+}
+
+// TestSave_Update_Overwrites covers the UPDATE branch (row already exists):
+// a second Save must overwrite the persisted kind + config rather than
+// inserting a duplicate or being ignored.
+func TestSave_Update_Overwrites(t *testing.T) {
+	s := openTestService(t)
+	ctx := context.Background()
+	if err := s.Save(ctx, Snapshot{Kind: "ollama", Config: json.RawMessage(`{"model":"a"}`)}, ""); err != nil {
+		t.Fatalf("first Save: %v", err)
+	}
+	newCfg := json.RawMessage(`{"model":"text-embedding-3-large","dimensions":256}`)
+	if err := s.Save(ctx, Snapshot{Kind: "openai", Config: newCfg}, "admin-2"); err != nil {
+		t.Fatalf("second Save: %v", err)
+	}
+	snap, has, err := s.Get(ctx)
+	if err != nil {
+		t.Fatalf("Get: %v", err)
+	}
+	if !has || snap.Kind != "openai" {
+		t.Fatalf("after overwrite: has=%v kind=%q, want has=true kind=openai", has, snap.Kind)
+	}
+	if string(snap.Config) != string(newCfg) {
+		t.Errorf("Config = %q, want %q", snap.Config, newCfg)
+	}
+}
+
+// TestSave_EmptyConfig covers a provider persisted with no config blob:
+// json.Valid is skipped (len 0) and Get reports has=true with a nil Config.
+func TestSave_EmptyConfig(t *testing.T) {
+	s := openTestService(t)
+	ctx := context.Background()
+	if err := s.Save(ctx, Snapshot{Kind: "ollama"}, ""); err != nil {
+		t.Fatalf("Save: %v", err)
+	}
+	snap, has, err := s.Get(ctx)
+	if err != nil {
+		t.Fatalf("Get: %v", err)
+	}
+	if !has || snap.Kind != "ollama" {
+		t.Fatalf("has=%v kind=%q, want has=true kind=ollama", has, snap.Kind)
+	}
+	if len(snap.Config) != 0 {
+		t.Errorf("Config = %q, want empty", snap.Config)
+	}
+}
+
+// TestSave_RejectsEmptyKind: a Snapshot with no kind is a programming error
+// and must be refused before touching the DB.
+func TestSave_RejectsEmptyKind(t *testing.T) {
+	s := openTestService(t)
+	if err := s.Save(context.Background(), Snapshot{Config: json.RawMessage(`{}`)}, ""); err == nil {
+		t.Fatal("Save with empty kind succeeded, want error")
+	}
+}
+
+// TestSave_RejectsInvalidJSON: a non-empty config that isn't valid JSON must
+// be refused so a malformed blob never lands in the DB for boot to choke on.
+func TestSave_RejectsInvalidJSON(t *testing.T) {
+	s := openTestService(t)
+	err := s.Save(context.Background(), Snapshot{Kind: "voyage", Config: json.RawMessage(`{not json`)}, "")
+	if err == nil {
+		t.Fatal("Save with invalid JSON config succeeded, want error")
+	}
+}
diff --git a/server/internal/httpapi/admin_embeddings.go b/server/internal/httpapi/admin_embeddings.go
index 3349b25..c4529e0 100644
--- a/server/internal/httpapi/admin_embeddings.go
+++ b/server/internal/httpapi/admin_embeddings.go
@@ -262,6 +262,18 @@ func (s *Server) TestEmbeddingProvider(w http.ResponseWriter, r *http.Request) {
 		writeError(w, http.StatusBadRequest, "unknown provider kind: "+kind)
 		return
 	}
+	// The /test endpoint Start()s a throw-away provider. For the HTTP-only
+	// providers that is a harmless one-shot connect probe, but ollama's
+	// Start() spawns a real llama-server child — on the SAME socket path as
+	// the live sidecar, which would conflict with the running process. The
+	// dashboard never tests ollama (it is configured via the runtime-config
+	// form, not this endpoint), so reject it here rather than risk spawning
+	// a competing sidecar from an ad-hoc admin call.
+	if kind == provider.KindOllama {
+		writeError(w, http.StatusBadRequest,
+			"ollama is configured via the runtime-config form, not the provider test endpoint; testing it here would spawn a competing llama-server sidecar")
+		return
+	}
 
 	body, err := io.ReadAll(io.LimitReader(r.Body, 64*1024))
 	if err != nil {
diff --git a/server/internal/httpapi/admin_embeddings_test.go b/server/internal/httpapi/admin_embeddings_test.go
index 3138108..d629d3c 100644
--- a/server/internal/httpapi/admin_embeddings_test.go
+++ b/server/internal/httpapi/admin_embeddings_test.go
@@ -155,6 +155,25 @@ func TestTestEmbeddingProvider_BadKind(t *testing.T) {
 	}
 }
 
+// TestTestEmbeddingProvider_RejectsOllama guards the /test endpoint against
+// kind=ollama: Start()ing a throw-away ollama provider would spawn a second
+// llama-server child on the live sidecar's socket. The endpoint must reject
+// it (400) without ever building/starting a provider.
+func TestTestEmbeddingProvider_RejectsOllama(t *testing.T) {
+	f := newAdminFixture(t)
+	f.Deps.EmbeddingsCfg = embeddingscfg.New(f.Deps.DB)
+	f.Router = NewRouter(f.Deps)
+	cookie := adminCookie(t, f)
+
+	req := withCookie(httptest.NewRequest(http.MethodPost,
+		"/api/v1/admin/embedding-providers/ollama/test", bytes.NewReader([]byte(`{}`))), cookie)
+	rr := httptest.NewRecorder()
+	f.Router.ServeHTTP(rr, req)
+	if rr.Code != http.StatusBadRequest {
+		t.Fatalf("status = %d, want 400 (body=%s)", rr.Code, rr.Body.String())
+	}
+}
+
 func TestTestEmbeddingProvider_MissingAPIKey(t *testing.T) {
 	f := newAdminFixture(t)
 	f.Deps.EmbeddingsCfg = embeddingscfg.New(f.Deps.DB)

From 45a0e70a7230b17710ba1cd4d47629271cbc5a94 Mon Sep 17 00:00:00 2001
From: dvcdsys <dvcdsys@gmail.com>
Date: Tue, 2 Jun 2026 16:11:56 +0100
Subject: [PATCH 34/34] =?UTF-8?q?fix(cli):=20avoid=20int64=E2=86=92int=20t?=
 =?UTF-8?q?runcation=20in=20default-tag=20parser?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CodeQL go/incorrect-integer-conversion (high) flagged
parseDefaultValue: strconv.ParseInt(raw, 10, 64) yields an int64 that
int(v) narrows, silently truncating on 32-bit builds. Parse with
bitSize 0 (platform int width) instead, so the value is bounded to int
range before conversion and an out-of-range tag errors (ok=false)
rather than wrapping.

Sibling parser in set.go is unaffected — it uses reflect.Value.SetInt
with an explicit OverflowInt guard and no narrowing conversion.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 cli/internal/config/loader_koanf.go | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/cli/internal/config/loader_koanf.go b/cli/internal/config/loader_koanf.go
index 12b6697..4edacb0 100644
--- a/cli/internal/config/loader_koanf.go
+++ b/cli/internal/config/loader_koanf.go
@@ -127,7 +127,14 @@ func parseDefaultValue(raw string, t reflect.Type) (any, bool) {
 		}
 		return v, true
 	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
-		v, err := strconv.ParseInt(raw, 10, 64)
+		// bitSize 0 parses into the platform int width, so the int(v)
+		// conversion below cannot truncate on 32-bit builds (passing 64
+		// here would let an int64 silently narrow — CodeQL
+		// go/incorrect-integer-conversion). default: tags are
+		// developer-controlled small values (ports, counts, timeouts),
+		// well within int range; an out-of-range tag now errors here
+		// (ok=false) rather than truncating.
+		v, err := strconv.ParseInt(raw, 10, 0)
 		if err != nil {
 			return nil, false
 		}