diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
new file mode 100644
index 0000000..9b32205
--- /dev/null
+++ b/.claude-plugin/marketplace.json
@@ -0,0 +1,25 @@
+{
+ "$schema": "https://json.schemastore.org/claude-code-marketplace.json",
+ "name": "code-index",
+ "owner": {
+ "name": "dvcdsys",
+ "email": "dvcdsys@gmail.com"
+ },
+ "description": "Marketplace for cix — semantic code search and navigation tooling for Claude Code",
+ "plugins": [
+ {
+ "name": "cix",
+ "source": "./plugins/cix",
+ "description": "Semantic code search and navigation. Bundles the cix CLI, slash commands, behavioral hooks, and the experimental cix-workspace skill + cix-workspace-investigator sub-agent for cross-project research across cix workspaces.",
+ "author": {
+ "name": "dvcdsys"
+ },
+ "homepage": "https://github.com/dvcdsys/code-index",
+ "repository": "https://github.com/dvcdsys/code-index",
+ "license": "MIT",
+ "keywords": ["search", "code-search", "semantic", "navigation", "indexing", "embeddings", "workspace", "cross-project", "sub-agent"],
+ "category": "developer-tools",
+ "tags": ["search", "indexing", "ai", "embeddings", "workspace", "cross-project"]
+ }
+ ]
+}
diff --git a/.env.example b/.env.example
index ba37d24..464f2ac 100644
--- a/.env.example
+++ b/.env.example
@@ -17,6 +17,10 @@ CIX_PORT=21847
CIX_CHROMA_PERSIST_DIR=~/.cix/data/chroma
CIX_SQLITE_PATH=~/.cix/data/sqlite/projects.db
CIX_GGUF_CACHE_DIR=~/.cix/data/models
+# Base dir for cloned GitHub repos (each clone lives at
/repos//).
+# Defaults to /repos. Point at a dedicated volume —
+# cloned repos can be large. (Legacy alias: CIX_WORKSPACES_DATA_DIR.)
+# CIX_REPOS_DIR=/data/cix-repos
# ── Indexing ──────────────────────────────────────────────────────────────
CIX_EMBEDDING_MODEL=awhiteside/CodeRankEmbed-Q8_0-GGUF
diff --git a/.github/workflows/ci-cli.yml b/.github/workflows/ci-cli.yml
index b8a5c9b..5c92a55 100644
--- a/.github/workflows/ci-cli.yml
+++ b/.github/workflows/ci-cli.yml
@@ -2,12 +2,12 @@ name: "CI: CLI"
on:
push:
- branches: [main]
+ branches: [main, develop]
paths:
- "cli/**"
- ".github/workflows/ci-cli.yml"
pull_request:
- branches: [main]
+ branches: [main, develop]
paths:
- "cli/**"
- ".github/workflows/ci-cli.yml"
diff --git a/.github/workflows/ci-plugin.yml b/.github/workflows/ci-plugin.yml
new file mode 100644
index 0000000..20e08ff
--- /dev/null
+++ b/.github/workflows/ci-plugin.yml
@@ -0,0 +1,78 @@
+name: Plugin Tests
+
+# Trigger only when plugin files change — server/CLI/dashboard work
+# is unaffected and shouldn't run plugin tests. Branch list matches
+# ci-cli.yml / ci-server.yml so plugin work lands on equal footing
+# (the integration branch is `develop`, not feature branches).
+on:
+ push:
+ branches: [main, develop]
+ paths:
+ - 'plugins/cix/**'
+ - '.claude-plugin/**'
+ - '.github/workflows/ci-plugin.yml'
+ pull_request:
+ branches: [main, develop]
+ paths:
+ - 'plugins/cix/**'
+ - '.claude-plugin/**'
+ - '.github/workflows/ci-plugin.yml'
+
+# Minimum permissions required by the workflow (CodeQL workflow-permissions advisory).
+# Read-only on repo contents is enough — we don't push code, comments, or releases.
+permissions:
+ contents: read
+
+jobs:
+ test:
+ name: bats + shellcheck on ${{ matrix.os }}
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [ubuntu-latest, macos-latest]
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Install bats, jq, shellcheck (Linux)
+ if: runner.os == 'Linux'
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y bats jq shellcheck
+
+ - name: Install bats, jq, shellcheck (macOS)
+ if: runner.os == 'macOS'
+ run: |
+ brew install bats-core jq shellcheck
+
+ - name: Verify bats version
+ run: bats --version
+
+ - name: Run bats test suites
+ run: bats --tap plugins/cix/tests/*.bats
+
+ - name: ShellCheck on hook scripts
+ run: |
+ # `--severity=warning` filters out style nags; `-x` follows
+ # sourced files (we don't source any in v0.1, but defensive).
+ shellcheck --severity=warning plugins/cix/scripts/*.sh
+
+ - name: Validate JSON manifests with jq
+ run: |
+ jq . .claude-plugin/marketplace.json
+ jq . plugins/cix/.claude-plugin/plugin.json
+ jq . plugins/cix/hooks/hooks.json
+
+ - name: Verify symlink integrity
+ run: |
+ # The bin/cix symlink MUST point at scripts/cix-wrapper.sh.
+ if [[ ! -L plugins/cix/bin/cix ]]; then
+ echo "::error::plugins/cix/bin/cix is not a symlink"
+ exit 1
+ fi
+ target=$(readlink plugins/cix/bin/cix)
+ if [[ "$target" != "../scripts/cix-wrapper.sh" ]]; then
+ echo "::error::bin/cix points to '$target' (expected '../scripts/cix-wrapper.sh')"
+ exit 1
+ fi
diff --git a/.github/workflows/ci-server.yml b/.github/workflows/ci-server.yml
index 10c1466..0e16f49 100644
--- a/.github/workflows/ci-server.yml
+++ b/.github/workflows/ci-server.yml
@@ -2,12 +2,12 @@ name: "CI: Server"
on:
push:
- branches: [main]
+ branches: [main, develop]
paths:
- "server/**"
- ".github/workflows/ci-server.yml"
pull_request:
- branches: [main]
+ branches: [main, develop]
paths:
- "server/**"
- ".github/workflows/ci-server.yml"
diff --git a/.github/workflows/llama-pin-check.yml b/.github/workflows/llama-pin-check.yml
new file mode 100644
index 0000000..187416e
--- /dev/null
+++ b/.github/workflows/llama-pin-check.yml
@@ -0,0 +1,65 @@
+name: "llama.cpp pin freshness"
+
+# The CUDA image pins ghcr.io/ggml-org/llama.cpp:server-cuda by digest for
+# reproducible builds (see server/Dockerfile.cuda). That pin would otherwise
+# rot silently. This job checks weekly whether upstream has shipped a newer
+# build and, if so, opens (or updates) a tracking issue so the bump doesn't
+# get forgotten. The same check also runs on every CUDA image build as a
+# build-log annotation — this workflow is the persistent, can't-miss reminder.
+on:
+ schedule:
+ - cron: "0 8 * * 1" # Mondays 08:00 UTC
+ workflow_dispatch:
+
+permissions:
+ contents: read
+ issues: write
+
+jobs:
+ check:
+ name: Check pinned llama.cpp digest
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: Set up Buildx
+ uses: docker/setup-buildx-action@v3
+
+ - name: Compare pinned vs upstream digest
+ id: check
+ run: server/scripts/check-llama-pin.sh server/Dockerfile.cuda
+
+ - name: Open or update reminder issue
+ if: steps.check.outputs.stale == 'true'
+ env:
+ GH_TOKEN: ${{ github.token }}
+ PINNED: ${{ steps.check.outputs.pinned }}
+ CURRENT: ${{ steps.check.outputs.current }}
+ run: |
+ set -euo pipefail
+ title="chore(server/cuda): bump pinned llama.cpp digest"
+ body="$(cat < checksums.txt
+
+ - name: Delete previous develop release + tag
+ env:
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ run: |
+ # `|| true` because the release/tag may not exist on the first run.
+ gh release delete cli/develop \
+ --cleanup-tag --yes \
+ --repo "${{ github.repository }}" || true
+
+ - name: Create develop release
+ uses: softprops/action-gh-release@v2
+ with:
+ tag_name: cli/develop
+ target_commitish: develop
+ name: "CLI develop (${{ github.sha }})"
+ prerelease: true
+ make_latest: "false"
+ files: |
+ artifacts/*.tar.gz
+ artifacts/checksums.txt
+ body: |
+ **Floating develop release** — force-updated on every merge to
+ `develop` that touches `cli/**`. Not a stable release.
+
+ Built from commit ${{ github.sha }}.
+
+ ## Install
+
+ ```bash
+ curl -fsSL https://raw.githubusercontent.com/dvcdsys/code-index/main/install-develop.sh | bash
+ ```
+
+ Re-run the same command later to pick up the next develop build.
+ For stable, use [`install.sh`](https://github.com/dvcdsys/code-index/blob/main/install.sh) instead.
diff --git a/.github/workflows/prerelease-server.yml b/.github/workflows/prerelease-server.yml
new file mode 100644
index 0000000..5281637
--- /dev/null
+++ b/.github/workflows/prerelease-server.yml
@@ -0,0 +1,59 @@
+name: "Pre-release: Server CUDA (develop)"
+
+# Triggered on push to `develop` (i.e. PR merges; direct pushes are
+# blocked by branch protection). Builds the CUDA-only image and pushes
+# it to Docker Hub as the floating tag `:develop-cu128`, so the prod
+# RTX 3090 box can stage a pre-release without waiting for a release
+# tag.
+#
+# CPU image is intentionally skipped here — it's only built on real
+# `server/v*` release tags.
+on:
+ push:
+ branches: [develop]
+ paths:
+ - "server/**"
+ - "doc/openapi.yaml"
+ - ".github/workflows/prerelease-server.yml"
+
+permissions:
+ contents: read
+
+jobs:
+ docker-cuda:
+ name: Build CUDA image (amd64, develop)
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: Set up Buildx
+ uses: docker/setup-buildx-action@v3
+
+ # Reminder (never fails the build) when upstream llama.cpp has moved past
+ # the digest pinned in Dockerfile.cuda — keeps the deliberate pin honest.
+ - name: Warn if pinned llama.cpp is stale
+ run: server/scripts/check-llama-pin.sh server/Dockerfile.cuda
+
+ - name: Login to Docker Hub
+ uses: docker/login-action@v3
+ with:
+ username: ${{ secrets.DOCKER_USERNAME }}
+ password: ${{ secrets.DOCKER_PASSWORD }}
+
+ - name: Build and push
+ uses: docker/build-push-action@v6
+ with:
+ context: server
+ file: server/Dockerfile.cuda
+ platforms: linux/amd64
+ push: true
+ provenance: mode=max
+ sbom: true
+ build-args: VERSION=develop-${{ github.sha }}
+ # `openapi=doc` mounts the repo-root doc/ folder so the dashboard
+ # build stage can `COPY --from=openapi openapi.yaml` without us
+ # widening the primary build context (which is `server/`).
+ build-contexts: |
+ openapi=doc
+ tags: dvcdsys/code-index:develop-cu128
diff --git a/.github/workflows/release-server.yml b/.github/workflows/release-server.yml
index f0c7c2f..c53afcd 100644
--- a/.github/workflows/release-server.yml
+++ b/.github/workflows/release-server.yml
@@ -128,6 +128,11 @@ jobs:
- name: Set up Buildx
uses: docker/setup-buildx-action@v3
+ # Reminder (never fails the build) when upstream llama.cpp has moved past
+ # the digest pinned in Dockerfile.cuda — keeps the deliberate pin honest.
+ - name: Warn if pinned llama.cpp is stale
+ run: server/scripts/check-llama-pin.sh server/Dockerfile.cuda
+
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 499dcce..8c30dc6 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -12,9 +12,60 @@ code-index/
├── cli/ # Go CLI (cix binary)
│ ├── cmd/ # cobra commands
│ └── internal/ # client, config, daemon, indexer, watcher
-└── skills/ # Claude Code skill definitions
+├── plugins/cix/ # Claude Code plugin (hooks, skills, slash commands, bats tests)
+├── skills/ # Canonical sources for cross-cutting skills
+│ # (mirrored into plugins/cix/skills/ via sync-skills.sh)
+└── doc/ # Tracked documentation
```
+The `docs/` directory (with an `s`) is gitignored — it is for local notes
+only. New tracked documentation goes under `doc/`.
+
+## Branches and pull requests
+
+The repo runs a two-branch model:
+
+| Branch | Purpose |
+|-----------|---------------------------------------------------------------|
+| `main` | Release branch. Tags (`server/vX.Y.Z`, `cli/vX.Y.Z`) cut from here. |
+| `develop` | Integration branch. All feature and fix work merges here first. |
+
+**Open every PR against `develop`.** Promotion from `develop` to `main`
+happens as part of the release workflow, not per-feature.
+
+CI is wired in three workflows — `ci-cli.yml`, `ci-server.yml`,
+`ci-plugin.yml` — all gated on the same branch set:
+
+- Push to `main` or `develop` runs CI.
+- Pull request targeting `main` or `develop` runs CI.
+- Push to any other branch does **not** trigger CI directly — CI fires
+ when you open the PR.
+
+There is no required branch-name convention. You can name your feature
+branch anything (`feat/foo`, `fix/bug-123`, `your-handle/sandbox`, …);
+CI runs from the PR, not from the branch name.
+
+Each workflow has a path filter, so CLI-only changes don't run server
+tests and vice versa. Filters live at the top of each workflow file.
+
+## Commit messages
+
+The repo uses Conventional Commits:
+
+```
+():
+
+
+```
+
+Types in active use: `feat`, `fix`, `chore`, `docs`, `ci`, `build`,
+`refactor`. Scopes commonly seen: `cli`, `server`, `plugin`, `dashboard`,
+`db`, `workspaces`, `skill`. Pick the smallest accurate scope; if a
+change spans surfaces, the broader type without a scope is fine.
+
+The body should explain **why**, not **what** — the diff already shows
+what.
+
## Prerequisites
| Tool | Version | Purpose |
@@ -73,10 +124,47 @@ cd server && go test ./...
# Server parity gate (requires make bundle + a local GGUF)
cd server && make test-gate
-# CLI build check
+# CLI tests + build check
+cd cli && make test # go test -v ./...
cd cli && go build ./...
+
+# Plugin tests (bats + shellcheck + jq manifest validation)
+bats plugins/cix/tests/*.bats
+shellcheck --severity=warning plugins/cix/scripts/*.sh
+jq . plugins/cix/hooks/hooks.json
+```
+
+`bats` is the test runner the plugin uses (`brew install bats-core` on
+macOS, `apt-get install bats` on Linux). CI runs the same three checks
+plus a symlink-integrity assertion for `plugins/cix/bin/cix`.
+
+### End-to-end plugin reload
+
+To smoke-test plugin changes against a real Claude Code installation:
+
+```bash
+make plugin-reload-local # from repo root
```
+This removes the cix plugin, purges the plugin cache, re-installs the
+marketplace from the working tree, and re-installs the plugin. Restart
+your Claude Code session to pick up the new bundle.
+
+### Skill source sync
+
+The `cix-workspace` skill has a canonical source under `skills/` and a
+byte-identical mirror under `plugins/cix/skills/`. Both must stay in
+sync. After editing the source:
+
+```bash
+plugins/cix/scripts/sync-skills.sh # copy source → plugin
+plugins/cix/scripts/sync-skills.sh --check # CI-friendly drift check
+```
+
+Do not edit the plugin copy directly; the next sync overwrites it. The
+standalone `skills/cix/SKILL.md` is **not** synced — the plugin version
+carries extra frontmatter the standalone loader does not need.
+
## Making changes
### Server (Go)
@@ -107,11 +195,14 @@ See [README — Building and Publishing](README.md#building-and-publishing-to-do
## Pull requests
-- All changes to `main` must go through a pull request
-- At least **1 approval** required before merging
-- Keep PRs focused — one feature or fix per PR
-- For server changes: `go test ./...` must pass in `server/`
-- For CLI changes: `go vet ./...` must pass in `cli/`
+- Open every PR against `develop` (not `main` — see "Branches and pull
+ requests" above).
+- At least **1 approval** required before merging.
+- Keep PRs focused — one feature or fix per PR.
+- For server changes: `go test ./...` must pass in `server/`.
+- For CLI changes: `go test ./...` must pass in `cli/`.
+- For plugin changes: `bats plugins/cix/tests/*.bats` must pass (CI runs
+ this on Linux and macOS).
## Reporting issues
diff --git a/Makefile b/Makefile
index 54ad72b..4447241 100644
--- a/Makefile
+++ b/Makefile
@@ -2,3 +2,15 @@
help build test bundle test-gate docker-build-cuda docker-build-cuda-dev clean:
@$(MAKE) -C server $@
+
+.PHONY: plugin-reload-local
+plugin-reload-local:
+ @echo "==> Removing cix plugin and code-index marketplace"
+ -claude plugin remove cix@code-index
+ -claude plugin marketplace remove code-index
+ @echo "==> Purging plugin cache"
+ rm -rf $(HOME)/.claude/plugins/cache/code-index
+ @echo "==> Reinstalling marketplace from $(CURDIR)"
+ claude plugin marketplace add $(CURDIR)
+ claude plugin install cix@code-index
+ @echo "==> Reloaded. Restart Claude Code session to pick up changes."
diff --git a/README.md b/README.md
index ea07357..e898842 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@
[](https://opensource.org/licenses/MIT)
[](https://hub.docker.com/r/dvcdsys/code-index)
-Search your codebase by meaning, not just text. Self-hosted, embeddings-based, works with any agent or terminal — and now with a full web dashboard.
+Search your codebase by meaning, not just text. Self-hosted, embeddings-based, works with any agent or terminal — with a full web dashboard and multi-repo workspace search.
```bash
cix search "authentication middleware"
@@ -43,10 +43,12 @@ Grep and fuzzy file search work fine for small projects. At scale they break dow
## What you get
-- **`cix-server`** — Go HTTP API with embedded llama.cpp sidecar for embeddings, SQLite for symbols + project metadata, chromem-go for vectors. Ships as a single distroless container.
-- **Web dashboard** at `/dashboard` — projects, semantic search, user + API-key management, runtime sidecar control, drift indicator. Embedded directly into the server binary.
-- **`cix` CLI** — drop-in `cix search`/`cix symbols`/`cix files` commands for terminal + agent use.
+- **`cix-server`** — Go HTTP API with embedded llama.cpp sidecar for embeddings, SQLite for symbols + project metadata, chromem-go for vectors, FTS5 BM25 mirror for keyword + hybrid ranking. Ships as a single distroless container.
+- **Web dashboard** at `/dashboard` — projects, semantic search, user + API-key management, runtime sidecar control, drift indicator, release-update banner, dashboard-driven reindex. Embedded directly in the server binary.
+- **`cix` CLI** — drop-in `cix search`/`cix symbols`/`cix files`/`cix workspace …` commands for terminal + agent use.
- **File watcher** — `cix watch` keeps the index fresh as you edit, no manual reindex.
+- **Workspaces** — group multiple repositories into a named workspace; cix clones them server-side via a stored GitHub PAT, indexes them with the same pipeline, and runs hybrid BM25 + dense search across the union. GitHub webhooks auto-reindex on `push`. See [`workspaces.md`](workspaces.md) and [`doc/WORKSPACES.md`](doc/WORKSPACES.md).
+- **Claude Code plugin** (v0.2.0+) — install once and `cix` becomes the agent's default reflex for code search. Bundles two skills (`cix`, `cix-workspace`) and a fan-out sub-agent. See [Agent integration](#agent-integration).
- **OpenAPI as source of truth** — Go server interface + TypeScript dashboard types are generated from `doc/openapi.yaml`. Swagger UI at `/docs`.
---
@@ -68,6 +70,7 @@ Grep and fuzzy file search work fine for small projects. At scale they break dow
├─────────────────────────────────────────────────────────────────┤
│ HTTP/REST + cookie sessions + Bearer API keys │
│ ├── auth, admin, api-keys, projects, indexing, search │
+│ ├── workspaces, github-tokens, webhooks │
│ ├── embedded React dashboard (//go:embed all:dist) │
│ └── embedded Swagger UI │
│ │
@@ -75,24 +78,23 @@ Grep and fuzzy file search work fine for small projects. At scale they break dow
│ ├── gotreesitter (AST chunking, 200+ languages) │
│ ├── llama-server sidecar (Unix socket → CodeRankEmbed Q8 GGUF) │
│ ├── chromem-go (cosine similarity vector store) │
+│ ├── SQLite FTS5 chunk mirror (BM25 — powers hybrid workspace) │
│ └── modernc.org/sqlite (projects, symbols, file hashes) │
└────────────┬─────────────────────────────────────┬──────────────┘
│ HTTP │ Unix socket
▼ ▼
cix CLI (Go) — search, ┌──────────────────────────┐
symbols, files, init, │ llama-server child proc │
- reindex, watch │ (llama.cpp embeddings) │
+ reindex, watch, workspace │ (llama.cpp embeddings) │
└──────────────────────────┘
```
-The server is a pure-Go static binary; CUDA-image variants add a `libcublas` runtime layer for GPU embeddings.
+The server is a pure-Go static binary; CUDA-image variants add a CUDA runtime layer for GPU embeddings. Workspace clones live in `/repos/`.
---
## Quick Start
-### 1. Start the server
-
Three deployment modes:
| Mode | Best for | GPU | Prerequisites |
@@ -101,33 +103,32 @@ Three deployment modes:
| **Docker (CUDA)** | NVIDIA GPU servers | CUDA 12.x | Docker + NVIDIA Container Toolkit |
| **Native (macOS)** | Apple Silicon w/ full Metal | Metal | Go 1.25+, Xcode CLT |
-#### Docker (CPU)
+### 1. Start the server
+
+**Docker (CPU):**
```bash
git clone https://github.com/dvcdsys/code-index && cd code-index
cp .env.example .env
# Edit .env — set CIX_API_KEY, CIX_BOOTSTRAP_ADMIN_EMAIL, CIX_BOOTSTRAP_ADMIN_PASSWORD
docker compose up -d
-```
-
-```bash
curl http://localhost:21847/health # → {"status":"ok"}
```
> [!IMPORTANT]
-> On a fresh database the server **refuses to start** unless both `CIX_BOOTSTRAP_ADMIN_EMAIL` and `CIX_BOOTSTRAP_ADMIN_PASSWORD` are set. The user is created with `must_change_password=true`, so the temporary password only works for the first login.
-
-#### Docker (CUDA — NVIDIA GPU)
+> On a fresh database the server **refuses to start** unless both `CIX_BOOTSTRAP_ADMIN_EMAIL` and `CIX_BOOTSTRAP_ADMIN_PASSWORD` are set. The user is created with `must_change_password=true`, so the temporary password only works for the first login. After first login you can drop the env vars from `.env`.
-See [GPU Acceleration (CUDA)](#gpu-acceleration-cuda) below.
+**Docker (CUDA — NVIDIA GPU):**
```bash
docker compose -f docker-compose.cuda.yml up -d
```
-#### Native macOS (Apple Silicon — Metal GPU)
+See [GPU Acceleration (CUDA)](#gpu-acceleration-cuda) for host requirements.
+
+**Native macOS (Apple Silicon — Metal GPU):**
-> **Why not Docker?** Docker Desktop on macOS runs containers inside a Linux VM — Metal GPU is **not accessible** from within a container. For full Metal acceleration you must run natively.
+Docker Desktop on macOS runs containers in a Linux VM with no Metal access — for full GPU acceleration on Apple Silicon, run natively.
```bash
xcode-select --install # if not installed
@@ -138,75 +139,26 @@ cp .env.example .env
cd server && make run
```
-Native env-var summary for Metal:
-
-| Variable | Recommended | Notes |
-|---|---|---|
-| `CIX_N_GPU_LAYERS` | `99` | Offload all layers to Metal; `0` = CPU only |
-| `CIX_LLAMA_BIN_DIR` | set by `make run` | Path to the `llama-server` bundle dir |
-| `CIX_EMBEDDINGS_ENABLED` | `true` | Default. Set `false` to skip the sidecar entirely |
-
-> [!TIP]
-> `make run` runs `make bundle` first (no-op if already built), so it's safe after any `git pull`.
-
-**Auto-start with launchd** (optional — run as a background service on login):
-
-```bash
-cat > ~/Library/LaunchAgents/com.cix.server.plist << 'EOF'
-
-
-
- Labelcom.cix.server
- ProgramArguments
- /ABSOLUTE/PATH/TO/server/dist/cix-darwin-arm64/cix-server
- EnvironmentVariables
-
- CIX_API_KEYYOUR_KEY
- CIX_BOOTSTRAP_ADMIN_EMAILadmin@example.com
- CIX_BOOTSTRAP_ADMIN_PASSWORDchange-me-on-first-login
- CIX_LLAMA_BIN_DIR/ABSOLUTE/PATH/TO/server/dist/cix-darwin-arm64/llama
- CIX_N_GPU_LAYERS99
- CIX_PORT21847
- CIX_SQLITE_PATH/Users/YOUR_USER/.cix/data/sqlite/projects.db
- CIX_CHROMA_PERSIST_DIR/Users/YOUR_USER/.cix/data/chroma
- CIX_GGUF_CACHE_DIR/Users/YOUR_USER/.cix/data/models
-
- RunAtLoad
- KeepAlive
- StandardOutPath/tmp/cix-server.log
- StandardErrorPath/tmp/cix-server.err
-
-EOF
-launchctl load ~/Library/LaunchAgents/com.cix.server.plist
-launchctl start com.cix.server
-```
+For a launchd auto-start setup and the full env-var checklist, see [`doc/SETUP_MACOS_NATIVE.md`](doc/SETUP_MACOS_NATIVE.md).
### 2. Log in to the dashboard
-Open http://localhost:21847/dashboard in your browser.
-
-1. Sign in with the email + password you set as `CIX_BOOTSTRAP_ADMIN_*` env vars.
-2. You'll be **forced to change the password** on first login. Pick a real one.
-3. Land on the home screen — see [Dashboard](#dashboard) for what's there.
+Open http://localhost:21847/dashboard and sign in with the bootstrap admin email + password. You'll be forced to change the password on first login. See [Dashboard](#dashboard) for what's on each page.
### 3. Install the CLI
-**Option A — one-line installer (macOS / Linux):**
+**One-line installer (macOS / Linux):**
```bash
curl -fsSL https://raw.githubusercontent.com/dvcdsys/code-index/main/install.sh | bash
```
-**Option B — from source:**
+For a pre-release build from `develop`, use `install-develop.sh` instead — see [`doc/UPDATES.md`](doc/UPDATES.md#cli-install-channels). Not for production.
-```bash
-cd cli && make build && make install # → /usr/local/bin/cix
-```
-
-**Option C — without Make:**
+**From source:**
```bash
-cd cli && go build -o cix . && sudo mv cix /usr/local/bin/
+cd cli && make build && make install # → /usr/local/bin/cix
```
### 4. Configure the CLI
@@ -216,19 +168,15 @@ cix config set api.url http://localhost:21847
cix config set api.key $(grep CIX_API_KEY .env | cut -d= -f2)
```
-Or mint a fresh API key from the dashboard's **API Keys** page and paste that.
+Or mint a fresh API key from the dashboard's **API Keys** page.
-### 5. Index a project
+### 5. Index a project and search
```bash
cd /path/to/your/project
-cix init # registers + indexes + starts the file watcher
-cix status # wait for: Status: ✓ Indexed
-```
+cix init # registers + indexes + starts the file watcher
+cix status # wait for: Status: ✓ Indexed
-### 6. Search
-
-```bash
cix search "authentication middleware"
cix search "error handling" --in ./api
cix symbols "handleRequest" --kind function
@@ -246,11 +194,13 @@ The dashboard ships embedded in the server binary at `/dashboard`. No extra serv
| Page | Audience | What it does |
|------|----------|--------------|
-| **Home** | everyone | Live status strip (server version, current embedding model, sidecar Ready/Loading) + module shortcuts |
-| **Projects** | everyone | List indexed projects, view stats (file count, languages, symbols, vector count, sqlite/chroma sizes), copy reindex commands. Cards turn **red with "Stale model"** badge when the runtime embedding model differs from the model the project was indexed with — see [Drift indicator](#drift-indicator). |
+| **Home** | everyone | Live status strip (server version, current embedding model, sidecar Ready/Loading), update-available banner when a newer `server/v*` release is published on GitHub, module shortcuts. |
+| **Projects** | everyone | List indexed projects with stats (file count, languages, symbols, vector count, sqlite/chroma sizes), per-project **Reindex** button + live indexing indicator, copy reindex commands. Cards turn red with a **Stale model** badge when the runtime embedding model differs from the model the project was indexed with (see [Drift indicator](#drift-indicator)). |
+| **Workspaces** | everyone | Group multiple repositories into a named workspace and search them as one corpus. The in-dashboard add-repo flow streams clone + index progress live; pick the org/account first, then the repo. Status tracking: `pending` → `cloning` → `indexing` → `indexed` / `failed`. Hybrid BM25 + dense search across the whole group. See [`workspaces.md`](workspaces.md). |
| **Search** | everyone | Five modes: semantic, symbols, references, definitions, files. Same engine the CLI uses. |
| **API Keys** | everyone | Mint long-lived `cix_*` keys (256-bit entropy, GitHub-class), copy them once, revoke at any time. |
-| **Users** | admin | Invite teammates, set role (admin/viewer), reset password (forces change on next login), disable account. |
+| **GitHub Tokens** | everyone | Store personal access tokens used by workspaces. Tokens are AES-256-GCM encrypted at rest; the plaintext is returned once on creation and never again. Scopes are **derived from GitHub** at storage time (not user-declared), so the dashboard shows the PAT's true capabilities. |
+| **Users** | admin | Invite teammates, set role (admin / viewer), reset password (forces change on next login), disable account. |
| **Settings** | everyone | Theme, default editor, change own password. |
| **Server** | admin | Runtime config — embedding model, `n_ctx`, `n_gpu_layers`, `n_threads`, batch size, queue concurrency. **Save & Restart** drains in-flight embeddings, restarts the sidecar, polls until ready. Source pill on each field shows whether the live value comes from the DB override, env bootstrap, or the recommended fallback. |
@@ -263,22 +213,23 @@ Two paths share the same identity model:
### Drift indicator
-When you change the runtime embedding model (Server → Embedding model → Save & Restart), every project that was indexed with the previous model becomes stale — the vectors are no longer comparable to fresh queries. The dashboard surfaces this:
+When you change the runtime embedding model (Server → Embedding model → Save & Restart), every project indexed with the previous model becomes stale — vectors are no longer comparable to fresh queries. The dashboard surfaces this with red borders + `Stale model` badges on project cards, and a banner on the project detail page with a copy-to-clipboard `cix reindex --full ` command. After running the reindex, the drift signal clears automatically.
+
+### Disabled-embeddings mode
-- **Projects list:** stale projects render with `border-destructive` + a `Stale model` badge.
-- **Project detail page:** a banner "Indexed with ``; current runtime model is ``. Vectors may be incompatible." with a copy-to-clipboard `cix reindex /path` command.
+Set `CIX_EMBEDDINGS_ENABLED=false` to bring the server up without the llama-server sidecar — auth, dashboard, project metadata, and symbol / file searches all keep working; only semantic search and indexing are disabled. The Server page renders a warning banner and disables the relevant inputs.
-After running the reindex, the drift signal clears automatically.
+### Workspaces
-### Disabled-embeddings mode
+The **Workspaces** page groups repositories into one named workspace and searches them as a single corpus — useful for tasks that span microservices, infra-as-code, API specs, and the like. Unlike `cix init` (which indexes the project you're `cd`'d into), workspaces track repositories that the server itself clones (or local projects that you link in).
-Set `CIX_EMBEDDINGS_ENABLED=false` to bring the server up without the llama-server sidecar — auth, dashboard, project metadata, and symbol/file searches all keep working; only semantic search and indexing are disabled. The Server page renders a warning banner and disables the relevant inputs.
+Workspaces are gated by `CIX_WORKSPACES_ENABLED=true`. See [`workspaces.md`](workspaces.md) for the user-facing workflow and [`doc/WORKSPACES.md`](doc/WORKSPACES.md) for operator setup (encryption keys, Cloudflare tunnel, webhook modes, REST API). The hybrid-search algorithm lives in [`doc/SEARCH_ALGORITHM.md`](doc/SEARCH_ALGORITHM.md); the webhook lifecycle in [`doc/WEBHOOKS.md`](doc/WEBHOOKS.md).
---
## CLI Reference
-### Project Management
+### Project management
| Command | Description |
|---------|-------------|
@@ -314,7 +265,17 @@ cix references [--file ] [--limit ]
cix files [--limit ]
```
-### File Watcher
+### Workspaces (cross-repo)
+
+```bash
+cix workspace list # all workspaces
+cix workspace "" show # detail
+cix workspace "" search "" [--limit ] # hybrid BM25 + dense
+```
+
+The CLI uses a name-first grammar (PR8 / `5db28fd`) so an agent doesn't need to juggle workspace ids. See [`workspaces.md`](workspaces.md) for the agent contract.
+
+### File watcher
```bash
cix watch [path] # start background daemon
@@ -323,7 +284,7 @@ cix watch stop # stop daemon
cix watch status # check if running
```
-The watcher monitors the project with `fsnotify`/`rjeczalik/notify`, debounces events (5 s default), and triggers incremental reindexing automatically. Logs: `~/.cix/logs/watcher.log`.
+The watcher monitors the project with `fsnotify`, debounces events (5 s default), and triggers incremental reindexing automatically. Logs: `~/.cix/logs/watcher.log`.
### Configuration
@@ -348,14 +309,20 @@ Config file: `~/.cix/config.yaml`
`cix` is designed to be called by AI agents (Claude, GPT, Cursor, custom agents) as a shell tool. Agents run `cix search` instead of Grep/Glob — getting ranked, relevant snippets rather than raw file dumps.
-### Claude Code
+### Claude Code (recommended: install the plugin)
-Install the bundled skill so Claude knows to use `cix` automatically:
+The **`cix` Claude Code plugin** (v0.2.0+) bundles the `cix` and `cix-workspace` skills, the `cix-workspace-investigator` sub-agent (parallel per-repo fan-out for cross-project research), CLI auto-install hooks, and a grep-nudge that suggests `cix search` when the agent reaches for Grep on an indexed project. Install from the marketplace:
-```bash
-cp -r skills/cix ~/.claude/skills/cix
+```
+/plugin marketplace add dvcdsys/code-index
+/plugin install cix@code-index
+/reload-plugins
```
+See [`plugins/cix/README.md`](plugins/cix/README.md) for the full hook list and configuration knobs.
+
+If you prefer manual install or aren't using the plugin system: `cp -r skills/cix ~/.claude/skills/cix`.
+
Then in any Claude Code session, invoke the skill **paired with the actual engineering task** — not a search query. The pattern is `/cix `:
```
@@ -370,9 +337,9 @@ Then in any Claude Code session, invoke the skill **paired with the actual engin
The slash command primes Claude with cix usage guidance; the task that follows is what Claude actually executes. Throughout the work, Claude reaches for `cix search` / `cix definitions` / `cix references` to navigate the codebase **as a tool inside the task**, not as the task itself. This is the right mental model: cix is the agent's IDE — `goto-def`, `find-refs`, "what calls this" — that lets it understand unfamiliar code before changing it.
-A bare `/cix` works (yields a generic "ready to search" reply), and a search-style prompt like `/cix find X` works (Claude does one search and stops). Neither captures the real value. Pairing the skill with a real task — fix, implement, investigate, refactor — is what makes the agent meaningfully more useful than grep + reading files.
+For multi-repo work, invoke the workspace skill explicitly: `/cix-workspace `. It's manual-only by design — it doesn't auto-trigger on cross-cutting prompts. See [`workspaces.md`](workspaces.md#agent-integration) for the agent contract.
-To activate in every session without typing `/cix` (so cix becomes the default reflex for any code-search question), add to `~/.claude/CLAUDE.md`:
+To activate `cix` as the default reflex without typing `/cix` every time, add to `~/.claude/CLAUDE.md`:
```markdown
## Code search
@@ -384,6 +351,8 @@ Use `cix` for all code search instead of Grep/Glob:
Run `cix init` on first use in a project.
```
+(The plugin's session hooks do most of this automatically once installed.)
+
### Other agents
Same pattern — give the agent shell execution and describe the commands:
@@ -411,319 +380,102 @@ cix search "error handling in auth flow" --in ./api
---
-## How Indexing Works
+## How indexing works
-**Chunking** — tree-sitter parses code into semantic chunks (functions, classes, methods). Unsupported languages fall back to a sliding window (2000 chars, 256 char overlap).
-
-Supported languages: Python, TypeScript, JavaScript, Go, Rust, Java, C, C++, C#, Ruby, PHP, Swift, Kotlin, Scala, Bash, SQL, Markdown, HTML, CSS, and 30+ more.
+**Chunking** — tree-sitter parses code into semantic chunks (functions, classes, methods). Unsupported languages fall back to a sliding window (2000 chars, 256 char overlap). 30+ languages have AST extraction — see [`doc/LANGUAGES.md`](doc/LANGUAGES.md) for the full list.
**Embeddings** — each chunk is encoded with a GGUF build of CodeRankEmbed (default: [awhiteside/CodeRankEmbed-Q8_0-GGUF](https://huggingface.co/awhiteside/CodeRankEmbed-Q8_0-GGUF); 768d, 8192-token context, ~145 MB on disk) via the `llama-server` sidecar (llama.cpp). Queries get a `"Represent this query for searching relevant code: "` prefix for asymmetric retrieval.
**Path-aware preamble** — each chunk is embedded with its file path, language, and parent symbol prefixed. This makes "auth middleware" find `auth.go` even if the file content uses different vocabulary. Toggle with `CIX_EMBED_INCLUDE_PATH` (default `true`); changing it requires `cix reindex --full`.
+**FTS5 / BM25 mirror** — every chunk also lands in a SQLite FTS5 virtual table indexed by trigram over `(content, symbol_name, file_path)`. Single-project search stays pure-dense; the BM25 mirror powers hybrid workspace search (acronym precision + project-level relevance gating). See [`doc/SEARCH_ALGORITHM.md`](doc/SEARCH_ALGORITHM.md).
+
**Incremental reindex** — uses SHA-256 file hashes. Only new or changed files are re-embedded. Deleted files are removed from the index.
**Filtering** — respects `.gitignore` and `.cixignore`, skips common dirs (`node_modules`, `.git`, `.venv`, etc.), skips files >`CIX_MAX_FILE_SIZE` (512 KiB default) and empty files. Per-project configuration via `.cixconfig.yaml` (see below).
---
-## Tuning Search Quality
-
-### `--min-score` threshold
+## Tuning search quality
-`cix` defaults to `--min-score 0.4`. This is calibrated for **CodeRankEmbed-Q8_0** with the path-aware embedding format (`CIX_EMBED_INCLUDE_PATH=true`, default).
-
-A typical score landscape on a real codebase:
+`cix` defaults to `--min-score 0.4`, calibrated for **CodeRankEmbed-Q8_0** with the path-aware embedding format. Typical score landscape on a real codebase:
| Match strength | Score range | Action |
|---|---|---|
| Exact symbol or filename match | 0.65 – 0.80 | rare; very high confidence |
-| Strong path-aware concept match | 0.50 – 0.65 | typical "good" match for `cix search "cli watch daemon"` |
-| Weaker concept / partial path overlap | 0.40 – 0.50 | typical for ambiguous or multi-token queries |
+| Strong path-aware concept match | 0.50 – 0.65 | typical "good" match |
+| Weaker concept / partial path overlap | 0.40 – 0.50 | typical for ambiguous queries |
| Likely unrelated noise | < 0.40 | filtered out by default |
-**When to lower the threshold**:
-
-- The query returns `No results` but you know matching code exists — try `--min-score 0.25`
-- Your query is intentionally vague (exploring an unfamiliar codebase) — `--min-score 0.2`
-- Single-word identifier queries on rare names
-
-**When to raise the threshold**:
-
-- Agent context is filling up with weak matches — `--min-score 0.5`
-- You only want clear top hits — `--min-score 0.6`
-
-> [!NOTE]
-> CodeRankEmbed is **asymmetric**: queries get a `"Represent this query for searching relevant code: "` prefix, which puts query and passage vectors into separate regions of the embedding space. Cosine similarities are systematically lower than for symmetric models — a "strong" match here is 0.55, not 0.80. Don't compare these numbers to thresholds quoted for OpenAI / Voyage / generic sentence-transformers.
+**When to lower the threshold:** sparse queries returning no results — try `--min-score 0.25`. Exploring an unfamiliar codebase — `--min-score 0.2`. Rare single-word identifiers.
-> [!TIP]
-> If you switched embedding models or toggled `CIX_EMBED_INCLUDE_PATH`, run `cix reindex --full` and recalibrate. Old vectors and new vectors live in the same store but score differently.
+**When to raise it:** agent context filling up with weak matches — `--min-score 0.5` or `0.6`.
-### `--exclude` for noisy directories
+CodeRankEmbed is asymmetric: queries and passages live in different regions of the embedding space, so cosine similarities are systematically lower than for symmetric models. Don't compare these numbers to thresholds quoted for OpenAI / Voyage / generic sentence-transformers. Full details — including hybrid workspace scoring — in [`doc/SEARCH_ALGORITHM.md`](doc/SEARCH_ALGORITHM.md).
-Repos with vendored code, fixtures, or legacy migrations can pull unrelated paths into top results because path tokens contribute to scoring. Two options:
-
-```bash
-# One-off exclude for a single search
-cix search "main entry point" --exclude vendor --exclude bench/fixtures
-
-# Permanent exclude — add to .cixignore (skips indexing entirely)
-echo "vendor/" >> .cixignore
-echo "bench/fixtures/" >> .cixignore
-cix reindex --full
-```
-
-`.cixignore` is preferred for directories you never want in results — they don't take up index space. `--exclude` is a per-query escape hatch.
+For noisy directories (vendored code, fixtures, legacy migrations), `--exclude vendor --exclude bench/fixtures` works per-query, or add entries to `.cixignore` to skip them at indexing time.
---
-## Per-Project Configuration
+## Per-project configuration
### `.cixignore` — exclude files from indexing
-Works exactly like `.gitignore` (same syntax, same nesting rules). Place it in the project root or any subdirectory. Patterns from `.cixignore` are merged with `.gitignore` — you don't need to duplicate rules.
-
-Use `.cixignore` when you want to exclude files from the index that are **not** excluded by `.gitignore` (e.g., vendored code, generated files, large test fixtures).
+Works exactly like `.gitignore` (same syntax, same nesting rules). Patterns are merged with `.gitignore` — you don't need to duplicate rules. Use this for files you want excluded from the *index* that aren't already excluded from git (vendored code, generated files, large test fixtures):
```gitignore
# .cixignore
api/generated/
-generated/
+vendor/
*.pb.go
testdata/fixtures/
```
-Nested `.cixignore` files work like nested `.gitignore` — they apply to their directory and below, without affecting sibling directories. The file watcher automatically triggers a full reindex when `.cixignore` is created, modified, or deleted.
+Nested `.cixignore` files work like nested `.gitignore`. The file watcher automatically triggers a full reindex when `.cixignore` is created, modified, or deleted.
### `.cixconfig.yaml` — project-level settings
-Place this in the project root. Currently supports automatic git submodule exclusion:
+Place in the project root:
```yaml
ignore:
submodules: true # automatically exclude all git submodule paths
```
-When `ignore.submodules` is `true`, cix reads `.gitmodules` and excludes all submodule paths from indexing. No git binary required — the file is parsed directly. Useful for Foundry/Forge dependencies, vendored submodules, or any repo where submodules contain thousands of files you don't want indexed.
-
-The file watcher triggers a full reindex when `.cixconfig.yaml` changes.
+When `ignore.submodules` is `true`, cix reads `.gitmodules` and excludes all submodule paths from indexing. No git binary required — the file is parsed directly. Useful for Foundry/Forge dependencies, vendored submodules, or any repo where submodules contain thousands of files you don't want indexed. The watcher triggers a full reindex when this file changes.
---
-## Configuration Reference
-
-### Server environment variables
-
-Complete list. See `.env.example` for the operator-facing template.
-
-#### Auth + bootstrap
-
-| Variable | Default | Description |
-|---|---|---|
-| `CIX_API_KEY` | — | Header API key for direct CLI / CI traffic. On first boot it's imported as the bootstrap admin's `env-bootstrap` key. |
-| `CIX_BOOTSTRAP_ADMIN_EMAIL` | — | **Required on a fresh DB.** Seeds the first admin user. Ignored once the users table is non-empty. |
-| `CIX_BOOTSTRAP_ADMIN_PASSWORD` | — | **Required on a fresh DB.** The user is flagged `must_change_password=true`, so this only works for the first login. |
-| `CIX_AUTH_DISABLED` | `false` | **Dev only.** Skips auth on every endpoint — every request behaves as admin. Never set in production. |
+## Configuration
-#### Networking + storage
+The most common environment variables:
-| Variable | Default | Description |
+| Variable | Default | Purpose |
|---|---|---|
-| `CIX_PORT` | `21847` | Listen port (both Docker images bake this in). |
-| `CIX_SQLITE_PATH` | `/data/sqlite/projects.db` | SQLite path. Suffixed with the model-safe name on open. |
-| `CIX_CHROMA_PERSIST_DIR` | `/data/chroma` | Vector store directory. |
-| `CIX_GGUF_CACHE_DIR` | `/data/models` | Where downloaded GGUF files live. |
-
-#### Indexing
-
-| Variable | Default | Description |
-|---|---|---|
-| `CIX_EMBEDDING_MODEL` | `awhiteside/CodeRankEmbed-Q8_0-GGUF` | HuggingFace GGUF repo (or absolute path to a `.gguf`). |
-| `CIX_MAX_FILE_SIZE` | `524288` | Skip files larger than this (bytes). |
-| `CIX_EXCLUDED_DIRS` | `node_modules,.git,.venv,...` | Comma-separated dirs always skipped. |
-| `CIX_LANGUAGES` | all | Comma-separated allow-list of chunker languages. Empty = all baked-in. |
-| `CIX_EMBED_INCLUDE_PATH` | `true` | Path/language/symbol preamble before each chunk. Toggling requires `cix reindex --full`. |
-| `CIX_MAX_CHUNK_TOKENS` | `1500` | Max chunk size before falling back to sliding window. Must stay ≤ `CIX_LLAMA_CTX`. |
-
-#### llama-server sidecar
-
-| Variable | Default | Description |
-|---|---|---|
-| `CIX_EMBEDDINGS_ENABLED` | `true` | Set `false` to boot without the sidecar (read-only mode). |
-| `CIX_LLAMA_BIN_DIR` | `/app` (Docker) / `/llama` (native) | Directory containing `llama-server` + dylibs. |
-| `CIX_LLAMA_TRANSPORT` | `unix` | `unix` or `tcp`. Auto-falls-back to TCP if the socket path is too long. |
-| `CIX_LLAMA_SOCKET` | `${TMPDIR}/cix-llama-.sock` | Unix socket path. macOS `sun_path` cap = 104 bytes. |
-| `CIX_LLAMA_CTX` | `2048` | `--ctx-size` passed to llama-server. |
-| `CIX_N_GPU_LAYERS` | `-1` darwin / `0` else / `99` Docker CUDA | `99` offloads all layers; `0` forces CPU. |
-| `CIX_LLAMA_STARTUP_TIMEOUT` | `60` | Seconds to wait for the sidecar's readiness probe. |
-| `CIX_GGUF_PATH` | auto-resolve | Absolute path to a GGUF file. Empty → cache lookup → HF download. |
-| `CIX_BOOTSTRAP_GGUF_PATH` | — | Optional. If set, cix imports this `.gguf` into `CIX_GGUF_CACHE_DIR` once (atomic `.partial → rename`) and ignores the env on subsequent boots. Useful for skipping the first-boot HF download in air-gapped or rate-limited environments. |
-
-#### Tuning (also editable from `/dashboard/server`)
-
-| Variable | Default | Description |
-|---|---|---|
-| `CIX_LLAMA_THREADS` | `0` (auto = `runtime.NumCPU()/2`) | CPU threads passed to llama-server. |
-| `CIX_LLAMA_BATCH` | `0` (match `CIX_LLAMA_CTX`) | `-b` batch size. |
-| `CIX_MAX_EMBEDDING_CONCURRENCY` | `5` | Embedding queue parallelism. Drop to `1` if the GPU contends. |
-| `CIX_EMBEDDING_QUEUE_TIMEOUT` | `300` | Seconds before a queued embedding request is failed. |
-
-> [!TIP]
-> Anything in the **Tuning** group is overridable at runtime from the dashboard's **Server** page (admin only). The dashboard writes to a DB row and triggers a sidecar restart — the env-var values are the boot-time fallback.
-
-### Resource Usage
-
-| | Native (Apple Silicon) | Docker (CPU) | Docker (CUDA) |
-|--|---|---|---|
-| Image size | n/a | ~21 MB | ~1.0 GB |
-| Memory (idle) | ~1 GB | ~1 GB | ~1 GB (system) + ~0.7 GB VRAM |
-| Memory (indexing) | up to 2 GB | up to 2 GB | up to 2 GB system + ~0.7 GB VRAM |
-| GPU | Metal | none | NVIDIA CUDA 12.x |
-| Disk | `~/.cix/data/` (~50–200 MB/project) | same (mounted volume) | same |
-| Auto-restart | use `launchd` | yes | yes |
-
-### Switching embedding models
-
-The server ships with `awhiteside/CodeRankEmbed-Q8_0-GGUF` — a Q8-quantized build of CodeRankEmbed (137M params, 768d, ~145 MB on disk, ~0.5–0.7 GB idle VRAM/RAM). Inference runs via the `llama-server` sidecar, so **only GGUF repositories are supported**. Plain PyTorch / `sentence-transformers` repos won't work.
-
-You can switch in two places:
-
-- **Dashboard → Server → Embedding model.** Pick from the on-disk cache (the dropdown lists `CIX_GGUF_CACHE_DIR`/*.gguf), or paste a HuggingFace repo or absolute path. **Save & Restart** drains, restarts the sidecar, and turns existing project cards red ("Stale model") until you reindex.
-- **Env / `.env` file.** Set `CIX_EMBEDDING_MODEL=` and restart the container. The dashboard's runtime override (if any) wins; the env value becomes the bootstrap default.
-
-> [!NOTE]
-> ChromaDB and SQLite paths are suffixed by a sanitised form of the model name (e.g. `projects_awhiteside_coderankembed_q8_0_gguf.db`). This isolates vector spaces per model — switching back and forth keeps old indices intact and avoids dim-mismatch errors. Re-indexing under a model is **not free** (chunk count × embedding latency), but you don't lose state.
-
-> [!TIP]
-> **Apple Silicon:** Docker can't access Metal GPU — run natively. The bundled `llama-server` includes `libggml-metal.dylib`; set `CIX_N_GPU_LAYERS=99` for full Metal offload.
-> **Linux NVIDIA:** use the CUDA image (`docker-compose.cuda.yml`). Force CPU with `CIX_N_GPU_LAYERS=0`.
-
----
-
-## Server Management
-
-```bash
-docker compose up -d # start (CPU)
-docker compose -f docker-compose.cuda.yml up -d # start (CUDA)
-docker compose logs -f # tail logs
-docker compose down # stop
-docker compose down -v # stop AND wipe data + models (destructive)
-```
-
-Developer builds (from source):
-
-```bash
-cd server
-make build # compile cix-server binary
-make bundle # build + fetch llama-server (macOS Metal)
-make run # bundle + launch with .env (dev)
-make test # go test ./...
-make test-gate # parity gate vs reference embeddings (requires GGUF)
-make docker-build-cuda # build + push CUDA image (uses cix-builder)
-make docker-build-cuda-dev # build + push :cu128-dev tag (smoke testing)
-make scout-cuda # safe pre-push CVE scan workflow
-make promote-cuda SCOUT_TAG=scout-… # retag without rebuild
-```
-
----
+| `CIX_API_KEY` | — | Bearer token for CLI/agents. Required. |
+| `CIX_PORT` | `21847` | Listen port. |
+| `CIX_BOOTSTRAP_ADMIN_EMAIL` / `_PASSWORD` | — | Required on a fresh DB. |
+| `CIX_EMBEDDING_MODEL` | `awhiteside/CodeRankEmbed-Q8_0-GGUF` | GGUF repo or absolute path. |
+| `CIX_N_GPU_LAYERS` | `-1` macOS / `0` else / `99` Docker CUDA | `99` = full offload, `0` = CPU. |
+| `CIX_EMBEDDINGS_ENABLED` | `true` | `false` boots without the llama sidecar. |
+| `CIX_WORKSPACES_ENABLED` | `false` | Enable the workspaces feature. |
+| `CIX_VERSION_CHECK_ENABLED` | `true` | `false` disables the GitHub release-poll banner. |
-## Building and publishing
-
-CI handles releases — see [Releases](#releases). For local manual builds:
-
-```bash
-docker login
-make docker-build-cuda # builds + pushes server/Dockerfile.cuda → :cu128
-make docker-build-cuda-dev # → :cu128-dev (operator iteration)
-```
-
-Pre-built images on Docker Hub:
-
-| Tag | Architecture | Use case |
-|-----|-------------|----------|
-| `dvcdsys/code-index:latest` | linux/amd64 + linux/arm64 | CPU |
-| `dvcdsys/code-index:v0.5.1` | linux/amd64 + linux/arm64 | CPU, version-pinned |
-| `dvcdsys/code-index:cu128` | linux/amd64 | NVIDIA GPU (CUDA 12.8) |
-| `dvcdsys/code-index:v0.5.1-cu128` | linux/amd64 | NVIDIA, version-pinned |
-
-See `doc/DOCKER_TAGS.md` for the full tag lifecycle policy.
+The full env-var surface (auth, storage, sidecar tuning, secret-key resolution, workspace knobs, runtime overrides) lives in [`doc/CONFIG_REFERENCE.md`](doc/CONFIG_REFERENCE.md). Anything in the "Tuning" group is editable at runtime from **Dashboard → Server**.
---
## REST API
-All endpoints except `/health`, `/api/v1/auth/login`, `/api/v1/auth/bootstrap-status`, `/dashboard/*`, `/docs`, and `/openapi.json` require authentication.
-
-**Two auth methods accepted on every authenticated endpoint:**
-
-- `Authorization: Bearer cix_` — API key (CLI / agents / CI)
-- `Cookie: cix_session=` — browser session (set by `/auth/login`)
-
-### Probes + auth
-
-```
-GET /health liveness
-GET /api/v1/status live config snapshot
-
-GET /api/v1/auth/bootstrap-status anyone — needs_bootstrap?
-POST /api/v1/auth/login email + password → cookie
-POST /api/v1/auth/logout clears cookie + DB row
-GET /api/v1/auth/me current user
-POST /api/v1/auth/change-password forced or voluntary
-GET /api/v1/auth/sessions my active sessions
-DELETE /api/v1/auth/sessions/{id} revoke a session
-```
-
-### API keys + admin (admin role)
-
-```
-GET /api/v1/api-keys list keys (own; admin sees all)
-POST /api/v1/api-keys mint a new key
-DELETE /api/v1/api-keys/{id} revoke
-
-GET /api/v1/admin/users list users + stats
-POST /api/v1/admin/users create user
-PATCH /api/v1/admin/users/{id} update role / disable / reset password
-DELETE /api/v1/admin/users/{id} delete
+The HTTP surface covers auth + sessions, users + API keys (admin), projects + indexing + search, workspaces + GitHub tokens, runtime config, and webhook reception. All endpoints except `/health`, `/api/v1/auth/login`, `/api/v1/auth/bootstrap-status`, `/dashboard/*`, `/docs`, and `/openapi.json` require authentication (Bearer API key *or* session cookie).
-GET /api/v1/admin/runtime-config current snapshot + Source map
-PUT /api/v1/admin/runtime-config patch overrides (does NOT restart)
-POST /api/v1/admin/sidecar/restart drain + respawn llama-server
-GET /api/v1/admin/sidecar/status pid, uptime, model, ready
-GET /api/v1/admin/models list cached GGUF files in CIX_GGUF_CACHE_DIR
-```
-
-### Projects + indexing + search
-
-```
-GET /api/v1/projects list
-POST /api/v1/projects register
-GET /api/v1/projects/{path} detail (sizes, drift, params)
-PATCH /api/v1/projects/{path} admin — settings
-DELETE /api/v1/projects/{path} admin — drop project + index
-
-POST /api/v1/projects/{path}/index/begin open run + return stored hashes
-POST /api/v1/projects/{path}/index/files NDJSON streaming batch upload
-POST /api/v1/projects/{path}/index/finish close run
-POST /api/v1/projects/{path}/index/cancel any user — cancel active run
-GET /api/v1/projects/{path}/index/status progress
-
-POST /api/v1/projects/{path}/search semantic
-POST /api/v1/projects/{path}/search/symbols
-POST /api/v1/projects/{path}/search/definitions
-POST /api/v1/projects/{path}/search/references
-POST /api/v1/projects/{path}/search/files
-GET /api/v1/projects/{path}/summary
-```
-
-The full schema lives in `doc/openapi.yaml` and is browsable at `http://:21847/docs` (Swagger UI).
+The full schema with request/response shapes lives in [`doc/openapi.yaml`](doc/openapi.yaml) and is browsable at `http://:21847/docs` (Swagger UI). The Go server interface and the TypeScript dashboard types are generated from that one file.
---
## Troubleshooting
-**Server refuses to start: `bootstrap auth: no users in database and the bootstrap admin env vars are not set`**
-→ Set both `CIX_BOOTSTRAP_ADMIN_EMAIL` and `CIX_BOOTSTRAP_ADMIN_PASSWORD` in your `.env`, restart. Once you log in and change the password, you can drop the env vars (the user lives in the DB).
+**Server refuses to start: `bootstrap auth: no users in database and the bootstrap admin env vars are not set`** → Set both `CIX_BOOTSTRAP_ADMIN_EMAIL` and `CIX_BOOTSTRAP_ADMIN_PASSWORD` in your `.env`, restart. Once you log in and change the password, you can drop the env vars (the user lives in the DB).
**`API key not set` from CLI**
```bash
@@ -738,10 +490,7 @@ docker compose up -d # start (CPU)
docker compose -f docker-compose.cuda.yml up -d # start (CUDA)
```
-**`project not found`**
-```bash
-cix init /path/to/project
-```
+**`project not found`** — run `cix init /path/to/project`.
**Watcher not triggering reindex**
```bash
@@ -752,55 +501,16 @@ cix watch stop && cix watch /path/to/project
**Search returns no results**
- Check the project is indexed: `cix status`
-- Lower the threshold: `cix search "query" --min-score 0.2` (default is `0.4`; see [Tuning Search Quality](#tuning-search-quality))
+- Lower the threshold: `cix search "query" --min-score 0.2` (default `0.4`)
- `cix list` to verify the project is registered
-**Dashboard shows "Stale model" on every project after upgrade**
-→ The runtime model was changed (or its version stamp shifted). Either reindex affected projects (`cix reindex --full` per project) or revert the model change in **Server → Embedding model**.
-
-**Forgot the admin password and there's no second admin**
-→ Edit `users` table directly in `CIX_SQLITE_PATH`: clear `disabled_at` and reset `password_hash` (bcrypt cost 12). Better long-term: keep at least two admin accounts so this never recurs. See `doc/SECURITY_DEPLOYMENT.md`.
-
----
-
-## Releases
-
-CLI and server ship on independent tag streams:
-
-| Component | Tag pattern | Workflow | Artifact |
-|---|---|---|---|
-| Server (`cix-server`) | `server/v*` (e.g. `server/v0.5.1`) | `release-server.yml` | Docker images on Docker Hub: `:latest`, `:`, `:cu128`, `:-cu128` |
-| CLI (`cix`) | `cli/v*` (e.g. `cli/v0.5.0`) | `release-cli.yml` | `cix-{darwin,linux}-{amd64,arm64}.tar.gz` on a GitHub Release |
-
-Bare `v*` tags are the historical pre-split CLI line — the installer still falls back to them when no `cli/v*` release exists, but no new bare-`v*` tags should be created.
+**Dashboard shows "Stale model" on every project after upgrade** → The runtime model was changed (or its version stamp shifted). Either reindex affected projects (`cix reindex --full` per project) or revert the model change in **Server → Embedding model**.
-### Cutting a CLI release
+**Dashboard banner says an update is available** → A newer `server/v*` release is on GitHub. Click through to the release notes; bump your Docker tag / native build at a convenient time. Disable the poll with `CIX_VERSION_CHECK_ENABLED=false` if you don't want it. See [`doc/UPDATES.md`](doc/UPDATES.md).
-```bash
-git tag cli/v0.6.0
-git push origin cli/v0.6.0
-```
-
-CI builds binaries for macOS + Linux (amd64 + arm64), uploads them to a release named `cli/v0.6.0`, and the installer auto-picks them up on the next run.
-
-### Cutting a server release
-
-```bash
-git tag server/v0.5.2
-git push origin server/v0.5.2
-```
-
-CI builds CPU multi-arch + CUDA amd64 images with provenance + SBOM attestations, pushes to Docker Hub with both pinned (`:0.5.2`, `:0.5.2-cu128`) and floating (`:latest`, `:cu128`) tags, and creates a GitHub Release. Pre-tag CVE scan: `cd server && make scout-cuda`.
-
-### Local cross-build (no release)
-
-```bash
-cd cli && make release VERSION=v0.6.0
-```
+**Workspace repo stuck in `cloning` or `indexing`** → Check **Workspaces → Jobs** in the dashboard or `GET /api/v1/jobs?status=running`. Common causes: PAT missing `repo` scope on a private repo, network not reaching github.com, sidecar not ready. See [`doc/WORKSPACES.md`](doc/WORKSPACES.md#troubleshooting).
-Produces archives in `cli/dist/` plus `checksums.txt`. Useful for testing the artifact format before pushing a tag.
-
-Supported targets: `darwin-arm64`, `darwin-amd64`, `linux-arm64`, `linux-amd64`.
+**Forgot the admin password and there's no second admin** → See `doc/SECURITY_DEPLOYMENT.md`. Better long-term: keep at least two admin accounts so this never recurs.
---
@@ -814,11 +524,11 @@ With the GGUF backend the footprint is near-constant: weights (~200–250 MB) pl
`CIX_MAX_CHUNK_TOKENS` still caps the length of each code chunk (1 token ≈ 4 chars) and must stay ≤ `CIX_LLAMA_CTX` (8192). `CIX_MAX_EMBEDDING_CONCURRENCY` defaults to `5` — the indexing queue ships chunks in parallel; the llama-server sidecar still serialises requests through one context, but pipelining host-side prep with device inference at this depth saturates the GPU without measurable latency cost. Drop to `1` only if you observe contention.
-See [`doc/vram-profiling.md`](doc/vram-profiling.md) for methodology and numbers.
+See [`doc/vram-profiling.md`](doc/vram-profiling.md) for methodology and numbers, and [`doc/benchmarks.md`](doc/benchmarks.md) for the quantization comparison that picked Q8_0 as the default.
-**Docker Hub:** [`dvcdsys/code-index:cu128`](https://hub.docker.com/r/dvcdsys/code-index/tags) (floating) and `:-cu128` (pinned). Image size: ~1.66 GB (3-stage build: `nvidia/cuda:12.8.1-base` + libcublas + llama-server + Go binary).
+### Image base
-See `doc/DOCKER_TAGS.md` for the full tag lifecycle.
+The CUDA image is built on `gcr.io/distroless/cc-debian13:nonroot` (Debian 13 trixie, glibc 2.41, gcc 14) with CUDA shared libraries copied from `nvidia/cuda:12.8.1-base-ubuntu24.04`. No shell, apt, dpkg, or Ubuntu OS layer in the final image. See [`doc/DOCKER_TAGS.md`](doc/DOCKER_TAGS.md) for the full lifecycle and CVE delta.
**Host requirements:**
@@ -835,21 +545,76 @@ docker compose -f docker-compose.cuda.yml up -d
---
+## Server Management
+
+```bash
+docker compose up -d # start (CPU)
+docker compose -f docker-compose.cuda.yml up -d # start (CUDA)
+docker compose logs -f # tail logs
+docker compose down # stop
+docker compose down -v # stop AND wipe data + models (destructive)
+```
+
+Developer builds (from source):
+
+```bash
+cd server
+make build # compile cix-server binary
+make bundle # build + fetch llama-server (macOS Metal)
+make run # bundle + launch with .env (dev)
+make test # go test ./...
+```
+
+Pre-built images on Docker Hub:
+
+| Tag | Architecture | Use case |
+|-----|-------------|----------|
+| `dvcdsys/code-index:latest` | linux/amd64 + linux/arm64 | CPU |
+| `dvcdsys/code-index:cu128` | linux/amd64 | NVIDIA GPU (CUDA 12.8) |
+| `dvcdsys/code-index:` / `-cu128` | — | Version-pinned variants |
+| `dvcdsys/code-index:develop-cu128` | linux/amd64 | Pre-release CUDA — pairs with the develop CLI channel |
+
+For the full release procedure (tagging, CVE scans, Scout workflow, make targets), see [`doc/RELEASES.md`](doc/RELEASES.md). For tag lifecycle policy, [`doc/DOCKER_TAGS.md`](doc/DOCKER_TAGS.md).
+
+---
+
## Security
-The server is designed for a trusted-network or behind-a-reverse-proxy deployment. See **[`doc/SECURITY_DEPLOYMENT.md`](doc/SECURITY_DEPLOYMENT.md)** for:
+The server is designed for a trusted-network or behind-a-reverse-proxy deployment. See [`doc/SECURITY_DEPLOYMENT.md`](doc/SECURITY_DEPLOYMENT.md) for:
- Trusted-proxy posture for `X-Forwarded-For` (load-bearing for the per-IP login rate limiter)
- TLS / `Secure` cookie auto-detection
- Login brute-force resistance (5/(IP,email)/15min + 60/IP/min)
- Body-size caps (1 MiB default, 64 MiB on `/index/files`)
-- Bootstrap admin lifecycle
-- Password policy (server enforces only `len ≥ 8`)
+- Bootstrap admin lifecycle, password policy
- API key scoping (inherits owner's role)
- What the server explicitly does **not** do (CSRF tokens, CORS, multi-tenancy, self-service reset)
---
+## Documentation map
+
+| Doc | Purpose |
+|---|---|
+| [`workspaces.md`](workspaces.md) | User-facing workspace guide (when to use, agent trust rules, query patterns) |
+| [`doc/WORKSPACES.md`](doc/WORKSPACES.md) | Operator setup (encryption keys, Cloudflare tunnel, workers, REST API) |
+| [`doc/SEARCH_ALGORITHM.md`](doc/SEARCH_ALGORITHM.md) | How per-project + hybrid workspace search rank results |
+| [`doc/WEBHOOKS.md`](doc/WEBHOOKS.md) | GitHub webhook lifecycle, modes, HMAC validation |
+| [`doc/UPDATES.md`](doc/UPDATES.md) | Release-poll banner + stable vs develop install channels |
+| [`doc/CONFIG_REFERENCE.md`](doc/CONFIG_REFERENCE.md) | Complete env-var reference |
+| [`doc/RELEASES.md`](doc/RELEASES.md) | Cutting CLI + server releases, CVE scans, make targets |
+| [`doc/SETUP_MACOS_NATIVE.md`](doc/SETUP_MACOS_NATIVE.md) | Native macOS Metal setup + launchd plist |
+| [`doc/SECURITY_DEPLOYMENT.md`](doc/SECURITY_DEPLOYMENT.md) | Production hardening |
+| [`doc/DOCKER_TAGS.md`](doc/DOCKER_TAGS.md) | Docker Hub tag lifecycle |
+| [`doc/LANGUAGES.md`](doc/LANGUAGES.md) | Supported chunker languages |
+| [`doc/MIGRATION_FROM_PYTHON.md`](doc/MIGRATION_FROM_PYTHON.md) | Python → Go server migration notes |
+| [`doc/benchmarks.md`](doc/benchmarks.md) | Index of dated benchmark snapshots |
+| [`doc/openapi.yaml`](doc/openapi.yaml) | REST API source of truth |
+| [`CONTRIBUTING.md`](CONTRIBUTING.md) | Contributor workflow |
+| [`plugins/cix/README.md`](plugins/cix/README.md) | Claude Code plugin reference |
+
+---
+
## License
MIT
diff --git a/cli/README.md b/cli/README.md
index dd2dc91..209cbd0 100644
--- a/cli/README.md
+++ b/cli/README.md
@@ -1,38 +1,59 @@
-# cix - Claude Code Index CLI
+# `cix` — Code IndeX CLI
-Thin client for semantic code search. Watches files, triggers reindexing, provides console commands for agents to search code and navigate projects.
+A thin Go client for the `cix-server` semantic code index. Runs `init`,
+`search`, `symbols`, `def`, `refs`, `files`, `summary`, `watch`,
+`reindex`, `cancel`, `list`, `config`, `workspace`, and `version`
+commands against the HTTP API.
-## Architecture
+The full user-facing command catalogue lives in the top-level
+[`README.md`](../README.md#cli-reference). This file covers building
+the CLI from source, the internal layout, and how to add a new command.
-```
-cix (thin Go client) API Server (Docker or local)
-├── watch ─── fsnotify ─── debounce ──> POST /index (incremental)
-├── init ─── register + index + watch
-├── search ─── semantic code search ───> POST /search
-├── symbols ── symbol lookup ──────────> POST /search/symbols
-├── files ─── file path search ───────> POST /search/files
-├── summary ── project overview ───────> GET /summary
-├── status ─── indexing progress ──────> GET /index/status
-├── list ─── list projects ──────────> GET /projects
-├── reindex ── manual reindex ─────────> POST /index
-└── config ─── manage ~/.cix/config.yaml
+## Layout
+```
cli/
-├── cmd/ - Cobra commands (init, search, symbols, files, summary, watch, ...)
+├── cmd/ — cobra command implementations
+│ ├── root.go — root command + global flags
+│ ├── init.go — `cix init`
+│ ├── search.go — `cix search`
+│ ├── symbols.go — `cix symbols`
+│ ├── definitions.go — `cix def` (+ goto alias)
+│ ├── references.go — `cix refs`
+│ ├── files.go — `cix files`
+│ ├── summary.go — `cix summary`
+│ ├── status.go — `cix status`
+│ ├── list.go — `cix list`
+│ ├── reindex.go — `cix reindex`
+│ ├── cancel.go — `cix cancel`
+│ ├── watch.go — `cix watch` (start/stop/status, daemon)
+│ ├── config.go — `cix config show/set/path`
+│ ├── workspace.go — `cix workspace …` (cross-repo, name-first)
+│ └── version.go — `cix version`
├── internal/
-│ ├── client/ - HTTP client to FastAPI server
-│ ├── config/ - YAML config (~/.cix/config.yaml)
-│ ├── watcher/ - fsnotify file watcher with debounce
-│ └── daemon/ - Background process management (PID file, start/stop)
-└── main.go
+│ ├── client/ — HTTP client to cix-server
+│ ├── config/ — YAML config (~/.cix/config.yaml)
+│ ├── daemon/ — PID-file based watcher daemon
+│ ├── discovery/ — project-root detection for `cix init`
+│ ├── fileutil/ — binary/text + size helpers
+│ ├── indexer/ — file-walk + NDJSON upload pipeline
+│ ├── projectconfig/ — .cixignore / .cixconfig.yaml parsing
+│ └── watcher/ — fsnotify-based incremental reindex watcher
+├── main.go
+├── Makefile
+└── README.md — this file
```
-## Installation
+Module path: `github.com/dvcdsys/code-index/cli`.
+
+## Build
+
+Prerequisites: Go 1.25+.
```bash
cd cli
-make build # builds to ./build/cix
-make install # copies to /usr/local/bin/cix
+make build # → cli/build/cix
+make install # → /usr/local/bin/cix (uses sudo if needed)
```
Or without make:
@@ -43,220 +64,102 @@ go build -o cix .
sudo mv cix /usr/local/bin/
```
-## Quick Start
+For cross-builds and release tarballs, see [`doc/RELEASES.md`](../doc/RELEASES.md#cutting-a-cli-release).
-```bash
-# 1. Start the API server (pick one)
-make server-docker # Docker mode
-make server-local # Local mode (requires Python 3.11+)
-
-# 2. Configure cix (API key is in .env)
-cix config set api.key $(grep API_KEY ../.env | cut -d= -f2)
+## Run against a server
-# 3. Initialize a project (registers + indexes + starts file watcher)
-cd /path/to/your/code
-cix init
-
-# 4. Wait for indexing
-cix status
-# Status: ✓ Indexed
-# Files: 1250 | Chunks: 5432 | Symbols: 892
-
-# 5. Search
-cix search "authentication middleware"
-cix symbols handleRequest --kind function
-cix files "config"
-cix summary
-```
-
-## Commands
-
-### Project Lifecycle
+The CLI talks HTTP — there is no embedded server logic in this
+directory. Configure once:
```bash
-cix init [path] # Register project, index, start file watcher
-cix init --watch=false [path] # Register + index without watcher
-cix list # List all indexed projects
-cix status [-p path] # Show project indexing status
-cix summary [-p path] # Project overview (languages, dirs, symbols)
-cix reindex [--full] [-p path] # Trigger manual reindex
+cix config set api.url http://localhost:21847
+cix config set api.key
+cix config show
```
-### Search (for agents)
+Then any command picks up the saved URL + key from `~/.cix/config.yaml`.
-```bash
-# Semantic code search (natural language)
-cix search [flags]
- --in # Search within file or directory (repeatable)
- --limit, -l # Max results (default: 10)
- --lang # Filter by language (repeatable)
- --min-score <0.0-1.0> # Minimum relevance score (default: 0.1)
- --project, -p # Project path (default: cwd)
-
-# Examples
-cix search "authentication middleware"
-cix search "error handling" --in ./api
-cix search "config" --in README.md
-cix search "routes" --in ./api --in ./mcp_server
-cix search "database" --lang python --limit 20
-
-# Symbol search (by name, fast)
-cix symbols [flags]
- --kind # function, class, method, type (repeatable)
- --limit, -l # Max results (default: 20)
- --project, -p
-
-# File path search
-cix files [flags]
- --limit, -l # Max results (default: 20)
- --project, -p
-```
-
-### File Watching
-
-```bash
-cix watch [path] # Start as background daemon (default)
-cix watch --foreground [path] # Run in terminal (Ctrl+C to stop)
-cix watch stop # Stop daemon
-cix watch status # Check if daemon is running
-```
-
-The watcher uses `fsnotify` to monitor the project directory for changes. When files are modified, it debounces events (default 5s) and triggers incremental reindexing via the API.
-
-Excluded from watching: `node_modules`, `.git`, `.venv`, `__pycache__`, `dist`, `build`, `.next`, `.cache`, binary files, images, archives.
+The server can be local Docker (`docker compose up -d` in the repo
+root) or a remote server. The CLI doesn't care.
-### Configuration
+## Smoke test
```bash
-cix config show # Show current config
-cix config set # Set value
-cix config path # Show config file path
-
-# Keys:
-# api.url - API server URL (default: http://localhost:21847)
-# api.key - API authentication key
-# watcher.debounce_ms - Debounce delay in ms (default: 5000)
-```
-
-## Config File
-
-`~/.cix/config.yaml`:
-
-```yaml
-api:
- url: http://localhost:21847
- key: cix_your_key_here
-
-watcher:
- enabled: true
- debounce_ms: 5000
- exclude:
- - node_modules
- - .git
- - .venv
-
-projects:
- - path: /Users/me/project1
- auto_watch: true
+# Server reachable?
+cix status
+# (without a project context, status prints the configured URL + key state)
+
+# Index a fresh project + search it
+cd /path/to/some/repo
+cix init --watch=false
+cix status # wait for: Status: ✓ Indexed
+cix search "main entry point"
+cix symbols "Handler" --kind function
+cix files "config"
+cix summary
```
-## Testing Indexing Manually
+Watcher smoke (in a separate terminal):
```bash
-# 1. Start the server
-make server-docker # or make server-local
-
-# 2. Check health
-curl http://localhost:21847/health
-# {"status":"ok"}
-
-# 3. Init and index a project
-cix init /path/to/your/project
-
-# 4. Watch indexing progress
-cix status -p /path/to/your/project
-# repeat until Status: ✓ Indexed
-
-# 5. Test semantic search
-cix search "error handling" -p /path/to/your/project
-cix search "database connection" --lang go -p /path/to/your/project
-
-# 6. Test symbol search
-cix symbols main -p /path/to/your/project
-cix symbols "Handler" --kind function -p /path/to/your/project
-
-# 7. Test file search
-cix files "config" -p /path/to/your/project
-
-# 8. Test watcher (in a separate terminal)
-cix watch /path/to/your/project
-# now edit a file in the project — watcher should trigger reindex
-
-# 9. Test via raw API (without cix)
-source .env
-API=http://localhost:21847
-AUTH="Authorization: Bearer $API_KEY"
-PROJECT="/path/to/your/project"
-ENCODED=$(python3 -c "import urllib.parse; print(urllib.parse.quote('$PROJECT', safe=''))")
-
-# Create project
-curl -X POST "$API/api/v1/projects" -H "$AUTH" -H "Content-Type: application/json" \
- -d "{\"host_path\": \"$PROJECT\"}"
-
-# Trigger indexing
-curl -X POST "$API/api/v1/projects/$ENCODED/index" -H "$AUTH" -H "Content-Type: application/json" \
- -d '{"full": false}'
-
-# Check progress
-curl "$API/api/v1/projects/$ENCODED/index/status" -H "$AUTH"
-
-# Semantic search
-curl -X POST "$API/api/v1/projects/$ENCODED/search" -H "$AUTH" -H "Content-Type: application/json" \
- -d '{"query": "authentication", "limit": 5}'
-
-# Symbol search
-curl -X POST "$API/api/v1/projects/$ENCODED/search/symbols" -H "$AUTH" -H "Content-Type: application/json" \
- -d '{"query": "main", "limit": 10}'
+cix watch /path/to/some/repo # starts the background daemon
+cix watch status
+# edit a file in the project — watcher should log a reindex
+cix watch stop
```
-## Troubleshooting
+## Adding a new command
-### "API key not set"
+Each command is a `cobra.Command` constructed in a `NewCommand()`
+factory and registered from `root.go`. Conventions:
-```bash
-cix config set api.key $(grep API_KEY /path/to/code-index/.env | cut -d= -f2)
-```
+1. Place the command in `cmd/.go`.
+2. The factory takes no global state — it reads config and builds an
+ `*client.Client` inside the command's `RunE`. This keeps unit tests
+ table-driven and free of init-order surprises.
+3. Network calls go through `internal/client`. Add a method there if
+ the existing surface doesn't cover your endpoint; don't reach for
+ `net/http` from inside `cmd/`.
+4. Errors propagate through `RunE`'s return — cobra prints the message
+ and sets a non-zero exit code. Don't `os.Exit` from a command.
+5. Output goes to `cmd.OutOrStdout()` / `cmd.ErrOrStderr()`, not the
+ process-wide `os.Stdout` — this is what makes tests work.
-### "connection refused"
+Tests sit beside the file (`_test.go`); they assemble the
+command, set `SetArgs`, and capture output via `bytes.Buffer`. See
+`cmd/root_test.go` for the established pattern.
-Server is not running. Start it:
+## Tests
```bash
-cd /path/to/code-index
-docker compose up -d # Docker
-# or
-./setup-local.sh # Local
+cd cli
+go test ./...
+# or, for verbose / single-package:
+go test -v ./cmd/...
+go test -run TestSearch ./cmd/...
```
-### "project not found"
+CI runs the suite on every PR (`.github/workflows/ci-cli.yml`).
-```bash
-cix init /path/to/project
-```
+## Releasing
-### Watcher not triggering
+See [`doc/RELEASES.md`](../doc/RELEASES.md#cutting-a-cli-release).
+The short version: bump `cli/cmd/version.go`, push a `cli/v`
+tag, CI builds the four-platform tarball set and uploads to GitHub
+Releases. The install scripts pick it up on next run.
-```bash
-# Check if daemon is running
-cix watch status
+For pre-release builds tracking `develop`, see
+[`doc/UPDATES.md`](../doc/UPDATES.md#cli-install-channels).
-# Check logs
-cat ~/.cix/logs/watcher.log
+## Troubleshooting
-# Restart
-cix watch stop
-cix watch start /path/to/project
-```
+| Symptom | Fix |
+|---|---|
+| `API key not set` | `cix config set api.key ` — mint one from the dashboard's API Keys page if you don't have one. |
+| `connection refused` | The server isn't running, or `api.url` is wrong. `curl $(cix config show \| grep url)/health` should return `{"status":"ok"}`. |
+| `project not found` | Run `cix init` in the project root first. |
+| Watcher not reindexing | `cix watch status`; check `~/.cix/logs/watcher.log`; restart with `cix watch stop && cix watch `. |
+| Search returns nothing | Lower the floor: `cix search "query" --min-score 0.25` (default is 0.4). See [`doc/SEARCH_ALGORITHM.md`](../doc/SEARCH_ALGORITHM.md). |
## License
diff --git a/cli/cmd/definitions.go b/cli/cmd/definitions.go
index f1a43e7..e7a2fdc 100644
--- a/cli/cmd/definitions.go
+++ b/cli/cmd/definitions.go
@@ -13,6 +13,7 @@ var (
defFile string
defLimit int
defProject string
+ defName string
)
var definitionsCmd = &cobra.Command{
@@ -24,7 +25,8 @@ var definitionsCmd = &cobra.Command{
Examples:
cix definitions HandleRequest
cix def AuthMiddleware --kind function
- cix goto UserService --file ./internal/service.go`,
+ cix goto UserService --file ./internal/service.go
+ cix def HandleRequest --name github.com/MythicalGames/pf3-backend@main`,
Args: cobra.ExactArgs(1),
RunE: runDefinitions,
}
@@ -35,23 +37,37 @@ func init() {
definitionsCmd.Flags().StringVar(&defFile, "file", "", "Narrow to a specific file")
definitionsCmd.Flags().IntVarP(&defLimit, "limit", "l", 10, "Maximum results")
definitionsCmd.Flags().StringVarP(&defProject, "project", "p", "", "Project path (default: current directory)")
+ definitionsCmd.Flags().StringVarP(&defName, "name", "n", "", "Project ID (exact match against `cix list`). Mutually exclusive with -p.")
+ definitionsCmd.MarkFlagsMutuallyExclusive("project", "name")
}
func runDefinitions(cmd *cobra.Command, args []string) error {
symbol := args[0]
- projectPath := defProject
- if projectPath == "" {
- cwd, err := os.Getwd()
- if err != nil {
- return fmt.Errorf("get working directory: %w", err)
- }
- projectPath = cwd
+ apiClient, err := getClient()
+ if err != nil {
+ return err
}
- absPath, err := filepath.Abs(projectPath)
- if err != nil {
- return fmt.Errorf("resolve path: %w", err)
+ var absPath string
+ if defName != "" {
+ absPath, err = resolveProjectByName(defName, apiClient)
+ if err != nil {
+ return err
+ }
+ } else {
+ projectPath := defProject
+ if projectPath == "" {
+ cwd, err := os.Getwd()
+ if err != nil {
+ return fmt.Errorf("get working directory: %w", err)
+ }
+ projectPath = cwd
+ }
+ absPath, err = filepath.Abs(projectPath)
+ if err != nil {
+ return fmt.Errorf("resolve path: %w", err)
+ }
}
// Resolve file path to absolute
@@ -63,11 +79,6 @@ func runDefinitions(cmd *cobra.Command, args []string) error {
}
}
- apiClient, err := getClient()
- if err != nil {
- return err
- }
-
results, err := apiClient.SearchDefinitions(absPath, symbol, defKind, filePath, defLimit)
if err != nil {
return fmt.Errorf("search failed: %w", err)
diff --git a/cli/cmd/files.go b/cli/cmd/files.go
index 27c9f81..378a80f 100644
--- a/cli/cmd/files.go
+++ b/cli/cmd/files.go
@@ -11,6 +11,7 @@ import (
var (
filesLimit int
filesProject string
+ filesName string
)
// filesCmd represents the files command
@@ -24,7 +25,8 @@ Useful for finding files when you know part of the name or path.
Examples:
cix files "auth"
cix files "controller" --limit 20
- cix files "config.yaml" -p /path/to/project`,
+ cix files "config.yaml" -p /path/to/project
+ cix files config --name github.com/MythicalGames/pf3-backend@main`,
Args: cobra.ExactArgs(1),
RunE: runFiles,
}
@@ -33,31 +35,39 @@ func init() {
rootCmd.AddCommand(filesCmd)
filesCmd.Flags().IntVarP(&filesLimit, "limit", "l", 20, "Maximum number of results")
filesCmd.Flags().StringVarP(&filesProject, "project", "p", "", "Project path (default: current directory)")
+ filesCmd.Flags().StringVarP(&filesName, "name", "n", "", "Project ID (exact match against `cix list`). Mutually exclusive with -p.")
+ filesCmd.MarkFlagsMutuallyExclusive("project", "name")
}
func runFiles(cmd *cobra.Command, args []string) error {
pattern := args[0]
- projectPath := filesProject
- if projectPath == "" {
- cwd, err := os.Getwd()
- if err != nil {
- return fmt.Errorf("get working directory: %w", err)
- }
- projectPath = cwd
- }
-
- absPath, err := filepath.Abs(projectPath)
- if err != nil {
- return fmt.Errorf("resolve path: %w", err)
- }
-
apiClient, err := getClient()
if err != nil {
return err
}
- absPath = findProjectRoot(absPath, apiClient)
+ var absPath string
+ if filesName != "" {
+ absPath, err = resolveProjectByName(filesName, apiClient)
+ if err != nil {
+ return err
+ }
+ } else {
+ projectPath := filesProject
+ if projectPath == "" {
+ cwd, err := os.Getwd()
+ if err != nil {
+ return fmt.Errorf("get working directory: %w", err)
+ }
+ projectPath = cwd
+ }
+ absPath, err = filepath.Abs(projectPath)
+ if err != nil {
+ return fmt.Errorf("resolve path: %w", err)
+ }
+ absPath = findProjectRoot(absPath, apiClient)
+ }
fmt.Printf("Searching files in %s...\n\n", absPath)
diff --git a/cli/cmd/references.go b/cli/cmd/references.go
index e89d18e..672a0db 100644
--- a/cli/cmd/references.go
+++ b/cli/cmd/references.go
@@ -13,6 +13,7 @@ var (
refsFile string
refsLimit int
refsProject string
+ refsName string
)
var referencesCmd = &cobra.Command{
@@ -24,7 +25,8 @@ var referencesCmd = &cobra.Command{
Examples:
cix references HandleRequest
cix refs AuthMiddleware --limit 50
- cix usages UserService --file ./internal/api/`,
+ cix usages UserService --file ./internal/api/
+ cix refs HandleRequest --name github.com/MythicalGames/pf3-backend@main`,
Args: cobra.ExactArgs(1),
RunE: runReferences,
}
@@ -34,23 +36,37 @@ func init() {
referencesCmd.Flags().StringVar(&refsFile, "file", "", "Narrow to a specific file")
referencesCmd.Flags().IntVarP(&refsLimit, "limit", "l", 30, "Maximum results")
referencesCmd.Flags().StringVarP(&refsProject, "project", "p", "", "Project path (default: current directory)")
+ referencesCmd.Flags().StringVarP(&refsName, "name", "n", "", "Project ID (exact match against `cix list`). Mutually exclusive with -p.")
+ referencesCmd.MarkFlagsMutuallyExclusive("project", "name")
}
func runReferences(cmd *cobra.Command, args []string) error {
symbol := args[0]
- projectPath := refsProject
- if projectPath == "" {
- cwd, err := os.Getwd()
- if err != nil {
- return fmt.Errorf("get working directory: %w", err)
- }
- projectPath = cwd
+ apiClient, err := getClient()
+ if err != nil {
+ return err
}
- absPath, err := filepath.Abs(projectPath)
- if err != nil {
- return fmt.Errorf("resolve path: %w", err)
+ var absPath string
+ if refsName != "" {
+ absPath, err = resolveProjectByName(refsName, apiClient)
+ if err != nil {
+ return err
+ }
+ } else {
+ projectPath := refsProject
+ if projectPath == "" {
+ cwd, err := os.Getwd()
+ if err != nil {
+ return fmt.Errorf("get working directory: %w", err)
+ }
+ projectPath = cwd
+ }
+ absPath, err = filepath.Abs(projectPath)
+ if err != nil {
+ return fmt.Errorf("resolve path: %w", err)
+ }
}
filePath := refsFile
@@ -61,11 +77,6 @@ func runReferences(cmd *cobra.Command, args []string) error {
}
}
- apiClient, err := getClient()
- if err != nil {
- return err
- }
-
results, err := apiClient.SearchReferences(absPath, symbol, filePath, refsLimit)
if err != nil {
return fmt.Errorf("search failed: %w", err)
diff --git a/cli/cmd/root.go b/cli/cmd/root.go
index 8e36c89..c6c051e 100644
--- a/cli/cmd/root.go
+++ b/cli/cmd/root.go
@@ -75,6 +75,29 @@ func init() {
rootCmd.PersistentFlags().StringVar(&apiKey, "api-key", "", "API key (default from config)")
}
+// resolveProjectByName performs an exact-match lookup of name against the
+// HostPath of every registered project. Unlike findProjectRoot it does not
+// run filepath.Abs or any prefix walk — the caller is asserting "use this
+// exact registered project, no magic". On miss the error lists every
+// registered HostPath so the user can copy/paste the right one.
+func resolveProjectByName(name string, apiClient *client.Client) (string, error) {
+ projects, err := apiClient.ListProjects()
+ if err != nil {
+ return "", fmt.Errorf("list projects: %w", err)
+ }
+ registered := make([]string, 0, len(projects))
+ for _, p := range projects {
+ if p.HostPath == name {
+ return p.HostPath, nil
+ }
+ registered = append(registered, p.HostPath)
+ }
+ if len(registered) == 0 {
+ return "", fmt.Errorf("project %q not found; no projects are registered (run `cix init` or attach a repo via the dashboard)", name)
+ }
+ return "", fmt.Errorf("project %q not found; registered projects:\n - %s", name, strings.Join(registered, "\n - "))
+}
+
// findProjectRoot resolves a candidate path to a registered project root.
//
// If the candidate path exactly matches a registered project it is returned as-is.
diff --git a/cli/cmd/root_test.go b/cli/cmd/root_test.go
index 3ea7999..af8001a 100644
--- a/cli/cmd/root_test.go
+++ b/cli/cmd/root_test.go
@@ -78,6 +78,96 @@ func TestFindProjectRoot(t *testing.T) {
}
}
+func TestResolveProjectByName(t *testing.T) {
+ githubSlug := "github.com/MythicalGames/pf3-backend@main"
+ localPath := "/Users/me/proj"
+
+ tests := []struct {
+ name string
+ projects []string
+ input string
+ wantPath string
+ wantErr bool
+ errSubstr []string // each must appear in err message
+ }{
+ {
+ name: "exact github slug match",
+ projects: []string{githubSlug, localPath},
+ input: githubSlug,
+ wantPath: githubSlug,
+ },
+ {
+ name: "exact local path match",
+ projects: []string{githubSlug, localPath},
+ input: localPath,
+ wantPath: localPath,
+ },
+ {
+ name: "miss lists every registered project",
+ projects: []string{githubSlug, localPath},
+ input: "pf3-backend",
+ wantErr: true,
+ errSubstr: []string{`"pf3-backend"`, githubSlug, localPath},
+ },
+ {
+ name: "miss with no registered projects mentions cix init",
+ projects: []string{},
+ input: "anything",
+ wantErr: true,
+ errSubstr: []string{`"anything"`, "no projects are registered"},
+ },
+ {
+ name: "no prefix or substring matching",
+ projects: []string{githubSlug},
+ input: "github.com/MythicalGames/pf3-backend", // no @main
+ wantErr: true,
+ },
+ }
+
+ for _, tc := range tests {
+ t.Run(tc.name, func(t *testing.T) {
+ srv := mockServer(t, listProjectsHandler(tc.projects))
+ useAPI(t, srv)
+
+ c, _ := getClient()
+ got, err := resolveProjectByName(tc.input, c)
+ if tc.wantErr {
+ if err == nil {
+ t.Fatalf("expected error, got path %q", got)
+ }
+ for _, s := range tc.errSubstr {
+ if !strings.Contains(err.Error(), s) {
+ t.Errorf("error %q does not contain %q", err.Error(), s)
+ }
+ }
+ return
+ }
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if got != tc.wantPath {
+ t.Errorf("resolveProjectByName(%q) = %q, want %q", tc.input, got, tc.wantPath)
+ }
+ })
+ }
+}
+
+func TestResolveProjectByName_APIError(t *testing.T) {
+ srv := mockServer(t, func(w http.ResponseWriter, r *http.Request) {
+ apiError(w, 500, "server error")
+ })
+ useAPI(t, srv)
+
+ c, _ := getClient()
+ _, err := resolveProjectByName("anything", c)
+ if err == nil {
+ t.Fatal("expected error when ListProjects fails")
+ }
+ if !strings.Contains(err.Error(), "list projects") {
+ t.Errorf("expected 'list projects' in error, got: %v", err)
+ }
+}
+
func TestFindProjectRoot_APIError(t *testing.T) {
// When ListProjects fails, the original path should be returned unchanged.
srv := mockServer(t, func(w http.ResponseWriter, r *http.Request) {
diff --git a/cli/cmd/search.go b/cli/cmd/search.go
index 2873b19..199f5bc 100644
--- a/cli/cmd/search.go
+++ b/cli/cmd/search.go
@@ -17,6 +17,7 @@ var (
searchExcludes []string
searchMinScore float64
searchProject string
+ searchName string
)
// searchCmd represents the search command
@@ -39,7 +40,8 @@ Examples:
cix search "error handling" --in src/api/
cix search "config" --in README.md
cix search "routes" --in ./api --in ./mcp_server
- cix search "main entry point" --exclude bench/fixtures --exclude legacy`,
+ cix search "main entry point" --exclude bench/fixtures --exclude legacy
+ cix search "JWT" --name github.com/MythicalGames/pf3-backend@main`,
Args: cobra.ExactArgs(1),
RunE: runSearch,
}
@@ -56,6 +58,8 @@ func init() {
// long-tail queries via --min-score 0.2.
searchCmd.Flags().Float64Var(&searchMinScore, "min-score", 0.4, "Minimum relevance score (lower with --min-score 0.2 if your query returns nothing)")
searchCmd.Flags().StringVarP(&searchProject, "project", "p", "", "Project path (default: current directory)")
+ searchCmd.Flags().StringVarP(&searchName, "name", "n", "", "Project ID (exact match against `cix list`). Mutually exclusive with -p.")
+ searchCmd.MarkFlagsMutuallyExclusive("project", "name")
}
// resolveFilterPaths normalises --in / --exclude inputs to absolute paths
@@ -78,28 +82,33 @@ func resolveFilterPaths(in []string) []string {
func runSearch(cmd *cobra.Command, args []string) error {
query := args[0]
- // Get project path
- projectPath := searchProject
- if projectPath == "" {
- cwd, err := os.Getwd()
- if err != nil {
- return fmt.Errorf("get working directory: %w", err)
- }
- projectPath = cwd
- }
-
- absPath, err := filepath.Abs(projectPath)
- if err != nil {
- return fmt.Errorf("resolve path: %w", err)
- }
-
// Get API client
apiClient, err := getClient()
if err != nil {
return err
}
- absPath = findProjectRoot(absPath, apiClient)
+ var absPath string
+ if searchName != "" {
+ absPath, err = resolveProjectByName(searchName, apiClient)
+ if err != nil {
+ return err
+ }
+ } else {
+ projectPath := searchProject
+ if projectPath == "" {
+ cwd, err := os.Getwd()
+ if err != nil {
+ return fmt.Errorf("get working directory: %w", err)
+ }
+ projectPath = cwd
+ }
+ absPath, err = filepath.Abs(projectPath)
+ if err != nil {
+ return fmt.Errorf("resolve path: %w", err)
+ }
+ absPath = findProjectRoot(absPath, apiClient)
+ }
// Resolve --in paths to absolute
resolvedPaths := resolveFilterPaths(searchPaths)
diff --git a/cli/cmd/summary.go b/cli/cmd/summary.go
index 6be2d55..84fca8a 100644
--- a/cli/cmd/summary.go
+++ b/cli/cmd/summary.go
@@ -11,7 +11,10 @@ import (
"github.com/spf13/cobra"
)
-var summaryProject string
+var (
+ summaryProject string
+ summaryName string
+)
// summaryCmd represents the summary command
var summaryCmd = &cobra.Command{
@@ -24,36 +27,45 @@ var summaryCmd = &cobra.Command{
Examples:
cix summary
- cix summary -p /path/to/project`,
+ cix summary -p /path/to/project
+ cix summary --name github.com/MythicalGames/pf3-backend@main`,
RunE: runSummary,
}
func init() {
rootCmd.AddCommand(summaryCmd)
summaryCmd.Flags().StringVarP(&summaryProject, "project", "p", "", "Project path (default: current directory)")
+ summaryCmd.Flags().StringVarP(&summaryName, "name", "n", "", "Project ID (exact match against `cix list`). Mutually exclusive with -p.")
+ summaryCmd.MarkFlagsMutuallyExclusive("project", "name")
}
func runSummary(cmd *cobra.Command, args []string) error {
- projectPath := summaryProject
- if projectPath == "" {
- cwd, err := os.Getwd()
- if err != nil {
- return fmt.Errorf("get working directory: %w", err)
- }
- projectPath = cwd
- }
-
- absPath, err := filepath.Abs(projectPath)
- if err != nil {
- return fmt.Errorf("resolve path: %w", err)
- }
-
apiClient, err := getClient()
if err != nil {
return err
}
- absPath = findProjectRoot(absPath, apiClient)
+ var absPath string
+ if summaryName != "" {
+ absPath, err = resolveProjectByName(summaryName, apiClient)
+ if err != nil {
+ return err
+ }
+ } else {
+ projectPath := summaryProject
+ if projectPath == "" {
+ cwd, err := os.Getwd()
+ if err != nil {
+ return fmt.Errorf("get working directory: %w", err)
+ }
+ projectPath = cwd
+ }
+ absPath, err = filepath.Abs(projectPath)
+ if err != nil {
+ return fmt.Errorf("resolve path: %w", err)
+ }
+ absPath = findProjectRoot(absPath, apiClient)
+ }
summary, err := apiClient.GetSummary(absPath)
if err != nil {
diff --git a/cli/cmd/symbols.go b/cli/cmd/symbols.go
index c72ce62..2de8514 100644
--- a/cli/cmd/symbols.go
+++ b/cli/cmd/symbols.go
@@ -12,6 +12,7 @@ var (
symbolsLimit int
symbolsKinds []string
symbolsProject string
+ symbolsName string
)
// symbolsCmd represents the symbols command
@@ -25,7 +26,8 @@ Supported symbol kinds: function, class, method, type
Examples:
cix symbols handleRequest
cix symbols AuthMiddleware --kind function --kind method
- cix symbols User --kind class`,
+ cix symbols User --kind class
+ cix symbols Service --name github.com/MythicalGames/pf3-backend@main`,
Args: cobra.ExactArgs(1),
RunE: runSymbols,
}
@@ -35,33 +37,40 @@ func init() {
symbolsCmd.Flags().IntVarP(&symbolsLimit, "limit", "l", 20, "Maximum number of results")
symbolsCmd.Flags().StringSliceVar(&symbolsKinds, "kind", nil, "Filter by symbol kind")
symbolsCmd.Flags().StringVarP(&symbolsProject, "project", "p", "", "Project path (default: current directory)")
+ symbolsCmd.Flags().StringVarP(&symbolsName, "name", "n", "", "Project ID (exact match against `cix list`). Mutually exclusive with -p.")
+ symbolsCmd.MarkFlagsMutuallyExclusive("project", "name")
}
func runSymbols(cmd *cobra.Command, args []string) error {
query := args[0]
- // Get project path
- projectPath := symbolsProject
- if projectPath == "" {
- cwd, err := os.Getwd()
- if err != nil {
- return fmt.Errorf("get working directory: %w", err)
- }
- projectPath = cwd
- }
-
- absPath, err := filepath.Abs(projectPath)
- if err != nil {
- return fmt.Errorf("resolve path: %w", err)
- }
-
// Get API client
apiClient, err := getClient()
if err != nil {
return err
}
- absPath = findProjectRoot(absPath, apiClient)
+ var absPath string
+ if symbolsName != "" {
+ absPath, err = resolveProjectByName(symbolsName, apiClient)
+ if err != nil {
+ return err
+ }
+ } else {
+ projectPath := symbolsProject
+ if projectPath == "" {
+ cwd, err := os.Getwd()
+ if err != nil {
+ return fmt.Errorf("get working directory: %w", err)
+ }
+ projectPath = cwd
+ }
+ absPath, err = filepath.Abs(projectPath)
+ if err != nil {
+ return fmt.Errorf("resolve path: %w", err)
+ }
+ absPath = findProjectRoot(absPath, apiClient)
+ }
// Search symbols
fmt.Printf("Searching symbols in %s...\n\n", absPath)
diff --git a/cli/cmd/testutil_test.go b/cli/cmd/testutil_test.go
index d157353..a2427cc 100644
--- a/cli/cmd/testutil_test.go
+++ b/cli/cmd/testutil_test.go
@@ -2,20 +2,20 @@ package cmd
import (
"bytes"
- "crypto/sha1"
"encoding/json"
- "fmt"
"io"
"net/http"
"net/http/httptest"
"os"
"testing"
+
+ "github.com/anthropics/code-index/cli/internal/client"
)
-// projectHash returns the same SHA1 prefix that the client uses for URL routing.
+// projectHash returns the same project URL hash the client uses for routing —
+// delegated to the real implementation so the per-machine namespacing matches.
func projectHash(path string) string {
- h := sha1.Sum([]byte(path))
- return fmt.Sprintf("%x", h)[:16]
+ return client.EncodeProjectPath(path)
}
// mockServer starts a test HTTP server and registers cleanup.
diff --git a/cli/cmd/workspace.go b/cli/cmd/workspace.go
new file mode 100644
index 0000000..4733535
--- /dev/null
+++ b/cli/cmd/workspace.go
@@ -0,0 +1,380 @@
+package cmd
+
+import (
+ "encoding/json"
+ "errors"
+ "fmt"
+ "os"
+ "strings"
+ "time"
+
+ "github.com/anthropics/code-index/cli/internal/client"
+ "github.com/spf13/cobra"
+)
+
+// workspaceCmd routes every workspace-scoped CLI verb. The user-facing
+// argument grammar is name-first:
+//
+// cix ws → list workspaces (default)
+// cix ws list → list workspaces (alternate)
+// cix ws → describe workspace (list repos + status)
+// cix ws list → list repos in the workspace
+// cix ws repos → list repos (alias)
+// cix ws describe → describe (same as `cix ws `)
+// cix ws search → two-stage workspace search
+//
+// We deliberately roll the dispatch by hand instead of using cobra
+// subcommands so the workspace NAME can sit in the first positional
+// slot — cobra can't recognise a dynamic value (workspace name) as a
+// command name. The trade-off is no auto-completion on ``; in
+// exchange the surface reads the way operators think about workspaces.
+var workspaceCmd = &cobra.Command{
+ Use: "workspace [name] [verb] [args...]",
+ Aliases: []string{"ws"},
+ Short: "Cross-project semantic search via workspaces",
+ Long: `Workspaces group GitHub repositories for cross-project semantic search.
+
+Argument grammar — name-first:
+
+ cix ws list workspaces visible to me
+ cix ws list list workspaces (alternate form)
+ cix ws describe a workspace (repos + status)
+ cix ws list list repos in
+ cix ws repos same as list
+ cix ws search two-stage semantic search in
+
+Examples:
+ cix ws
+ cix ws platform
+ cix ws platform list
+ cix ws platform search "JWT validation"
+ cix ws platform search "rate limiting" --top-projects 8 --top-chunks 30 --json
+
+Workspace identifiers accept the opaque id OR the (case-insensitive)
+name. Repository attachment, GitHub token management, and the
+detailed dashboard view all live at /dashboard on the cix-server.`,
+ Args: cobra.ArbitraryArgs,
+ RunE: runWorkspace,
+}
+
+var (
+ wsJSON bool
+ wsVerbose bool
+ wsSearchTopProjects int
+ wsSearchTopChunks int
+)
+
+func init() {
+ rootCmd.AddCommand(workspaceCmd)
+ // Flags live on the parent — applies to every verb. `cobra` parses
+ // flags before our manual routing runs, so `cix ws platform search
+ // "..." --json` works regardless of where the user puts the flag.
+ workspaceCmd.Flags().BoolVar(&wsJSON, "json", false, "Emit raw JSON instead of formatted output")
+ workspaceCmd.Flags().BoolVarP(&wsVerbose, "verbose", "v", false, "Show extra columns on list / describe")
+ workspaceCmd.Flags().IntVar(&wsSearchTopProjects, "top-projects", 10, "Search: top-N projects in the projects panel (1-50)")
+ workspaceCmd.Flags().IntVar(&wsSearchTopChunks, "top-chunks", 20, "Search: top-K chunks returned overall (1-200)")
+}
+
+func runWorkspace(cmd *cobra.Command, args []string) error {
+ cli, err := getClient()
+ if err != nil {
+ return err
+ }
+
+ switch {
+ case len(args) == 0:
+ return cmdListWorkspaces(cli)
+ case len(args) == 1 && strings.EqualFold(args[0], "list"):
+ return cmdListWorkspaces(cli)
+ case len(args) == 1:
+ // `cix ws ` — describe.
+ return cmdDescribeWorkspace(cli, args[0])
+ }
+
+ // 2+ args. First is the workspace name, second the verb.
+ name := args[0]
+ verb := strings.ToLower(args[1])
+ rest := args[2:]
+
+ switch verb {
+ case "list", "repos":
+ if len(rest) > 0 {
+ return fmt.Errorf("%q takes no extra arguments", verb)
+ }
+ return cmdListRepos(cli, name)
+ case "describe":
+ if len(rest) > 0 {
+ return fmt.Errorf("describe takes no extra arguments")
+ }
+ return cmdDescribeWorkspace(cli, name)
+ case "search":
+ if len(rest) == 0 {
+ return errors.New("search needs a query string (cix ws search \"\")")
+ }
+ query := strings.Join(rest, " ")
+ return cmdWorkspaceSearch(cli, name, query)
+ default:
+ return fmt.Errorf("unknown verb %q — use one of: list, repos, describe, search", verb)
+ }
+}
+
+// ---------------------------------------------------------------------------
+// `cix ws list`
+// ---------------------------------------------------------------------------
+
+func cmdListWorkspaces(cli *client.Client) error {
+ resp, err := cli.ListWorkspaces()
+ if err != nil {
+ return err
+ }
+ if wsJSON {
+ return emitJSON(resp)
+ }
+ if resp.Total == 0 {
+ fmt.Fprintln(os.Stderr, "no workspaces — create one at /dashboard/workspaces")
+ return nil
+ }
+ for _, w := range resp.Workspaces {
+ line := w.ID + " " + w.Name
+ if w.Description != "" {
+ line += " — " + w.Description
+ }
+ fmt.Println(line)
+ if wsVerbose {
+ // In verbose mode we follow each workspace with its project
+ // count + indexed status. Two extra HTTP calls per
+ // workspace; acceptable at typical scale (<10 workspaces).
+ if pr, perr := cli.ListWorkspaceProjects(w.ID); perr == nil {
+ indexed := 0
+ for _, wp := range pr.Projects {
+ if wp.Project.Status == "indexed" {
+ indexed++
+ }
+ }
+ fmt.Printf(" %d projects (%d indexed)\n", pr.Total, indexed)
+ }
+ }
+ }
+ return nil
+}
+
+// ---------------------------------------------------------------------------
+// `cix ws list` / ` repos`
+// ---------------------------------------------------------------------------
+
+func cmdListRepos(cli *client.Client, identifier string) error {
+ id, err := resolveWorkspaceID(cli, identifier)
+ if err != nil {
+ return err
+ }
+ resp, err := cli.ListWorkspaceProjects(id)
+ if err != nil {
+ return err
+ }
+ if wsJSON {
+ return emitJSON(resp)
+ }
+ if resp.Total == 0 {
+ fmt.Fprintln(os.Stderr, "no projects linked — add one at /dashboard/workspaces")
+ return nil
+ }
+ for _, wp := range resp.Projects {
+ p := wp.Project
+ fmt.Printf("%s %s\n", projectStatusBadge(p.Status), p.HostPath)
+ if wsVerbose {
+ fmt.Printf(" path_hash: %s\n", p.PathHash)
+ if p.LastIndexedAt != nil {
+ fmt.Printf(" last indexed: %s\n", p.LastIndexedAt.Format(time.RFC3339))
+ }
+ if len(p.Languages) > 0 {
+ fmt.Printf(" languages: %s\n", strings.Join(p.Languages, ", "))
+ }
+ fmt.Printf(" linked: %s\n", wp.AddedAt.Format(time.RFC3339))
+ }
+ }
+ return nil
+}
+
+// ---------------------------------------------------------------------------
+// `cix ws ` / ` describe`
+// ---------------------------------------------------------------------------
+
+func cmdDescribeWorkspace(cli *client.Client, identifier string) error {
+ list, err := cli.ListWorkspaces()
+ if err != nil {
+ return err
+ }
+ var ws *client.Workspace
+ for i := range list.Workspaces {
+ w := &list.Workspaces[i]
+ if w.ID == identifier || strings.EqualFold(w.Name, identifier) {
+ ws = w
+ break
+ }
+ }
+ if ws == nil {
+ return fmt.Errorf("workspace %q not found (run `cix ws list`)", identifier)
+ }
+ projResp, err := cli.ListWorkspaceProjects(ws.ID)
+ if err != nil {
+ return err
+ }
+
+ if wsJSON {
+ return emitJSON(map[string]any{
+ "workspace": ws,
+ "projects": projResp.Projects,
+ "total": projResp.Total,
+ })
+ }
+
+ fmt.Printf("Workspace: %s\n", ws.Name)
+ fmt.Printf(" id: %s\n", ws.ID)
+ if ws.Description != "" {
+ fmt.Printf(" description: %s\n", ws.Description)
+ }
+ indexed := 0
+ for _, wp := range projResp.Projects {
+ if wp.Project.Status == "indexed" {
+ indexed++
+ }
+ }
+ fmt.Printf(" projects: %d (%d indexed)\n", projResp.Total, indexed)
+ if projResp.Total == 0 {
+ fmt.Fprintln(os.Stderr, "\n (no projects linked — add at /dashboard/workspaces)")
+ return nil
+ }
+ fmt.Println()
+ for _, wp := range projResp.Projects {
+ p := wp.Project
+ fmt.Printf(" %s %s\n", projectStatusBadgeShort(p.Status), p.HostPath)
+ fmt.Printf(" path_hash: %s\n", p.PathHash)
+ if p.LastIndexedAt != nil {
+ fmt.Printf(" last indexed: %s\n", p.LastIndexedAt.Format(time.RFC3339))
+ }
+ fmt.Printf(" linked: %s\n", wp.AddedAt.Format(time.RFC3339))
+ }
+ return nil
+}
+
+// projectStatusBadge renders the long status form used by
+// `cix ws list`. The new wire enum (post-split) is:
+//
+// created | indexing | indexed | error
+//
+// Unknown values fall through to the literal string so future enum
+// additions render readably without crashing the CLI.
+func projectStatusBadge(status string) string {
+ switch status {
+ case "indexed":
+ return "✓ indexed"
+ case "error":
+ return "✗ error"
+ case "indexing", "created":
+ return "… " + status
+ default:
+ return status
+ }
+}
+
+// projectStatusBadgeShort renders the single-glyph badge used by the
+// describe view's per-project bullet list.
+func projectStatusBadgeShort(status string) string {
+ switch status {
+ case "indexed":
+ return "✓"
+ case "error":
+ return "✗"
+ default:
+ return "…"
+ }
+}
+
+// ---------------------------------------------------------------------------
+// `cix ws search `
+// ---------------------------------------------------------------------------
+
+func cmdWorkspaceSearch(cli *client.Client, identifier, query string) error {
+ id, err := resolveWorkspaceID(cli, identifier)
+ if err != nil {
+ return err
+ }
+ resp, err := cli.WorkspaceSearch(id, query, wsSearchTopProjects, wsSearchTopChunks)
+ if err != nil {
+ return err
+ }
+ if wsJSON {
+ return emitJSON(resp)
+ }
+ return renderSearch(resp)
+}
+
+// resolveWorkspaceID maps a user-typed identifier (id or name) to the
+// canonical opaque id used by the API. One ListWorkspaces call regardless
+// — keeps the surface uniform across `list`, `describe`, `search`.
+func resolveWorkspaceID(cli *client.Client, identifier string) (string, error) {
+ list, err := cli.ListWorkspaces()
+ if err != nil {
+ return "", err
+ }
+ for i := range list.Workspaces {
+ w := &list.Workspaces[i]
+ if w.ID == identifier || strings.EqualFold(w.Name, identifier) {
+ return w.ID, nil
+ }
+ }
+ return "", fmt.Errorf("workspace %q not found (run `cix ws list`)", identifier)
+}
+
+func renderSearch(resp *client.WorkspaceSearchResponse) error {
+ switch resp.Status {
+ case "empty":
+ fmt.Fprintln(os.Stderr, "no chunks matched the query")
+ return nil
+ case "partial_failure":
+ fmt.Fprintln(os.Stderr, "at least one repo errored — results below are incomplete; check server logs")
+ }
+
+ if len(resp.StaleFTSRepos) > 0 {
+ fmt.Fprintf(os.Stderr,
+ "warning: %d repo(s) were indexed before BM25 was enabled; hybrid degrades to dense-only for them.\n"+
+ " reindex to fix: ", len(resp.StaleFTSRepos))
+ paths := make([]string, len(resp.StaleFTSRepos))
+ for i, s := range resp.StaleFTSRepos {
+ paths[i] = s.ProjectPath
+ }
+ fmt.Fprintln(os.Stderr, strings.Join(paths, ", "))
+ fmt.Fprintln(os.Stderr)
+ }
+
+ if len(resp.Projects) > 0 {
+ fmt.Println("Top projects:")
+ for _, p := range resp.Projects {
+ label := p.Label
+ if label == "" {
+ label = p.ProjectPath
+ }
+ fmt.Printf(" [%.3f] %s — %d hits · bm25 %.3f · dense %.3f · %s\n",
+ p.ProjectScore, label, p.NumHits, p.BM25Score, p.DenseScore, p.ProjectPath)
+ }
+ fmt.Println()
+ }
+ fmt.Println("Top chunks:")
+ for _, c := range resp.Chunks {
+ head := fmt.Sprintf("%s:%d-%d", c.FilePath, c.StartLine, c.EndLine)
+ fmt.Printf(" [%.3f] %s\n", c.Score, head)
+ fmt.Printf(" project: %s\n", c.ProjectPath)
+ if c.SymbolName != "" {
+ fmt.Printf(" symbol: %s\n", c.SymbolName)
+ }
+ fmt.Println()
+ }
+ return nil
+}
+
+// emitJSON writes a Go value as indented JSON to stdout.
+func emitJSON(v any) error {
+ enc := json.NewEncoder(os.Stdout)
+ enc.SetIndent("", " ")
+ return enc.Encode(v)
+}
diff --git a/cli/cmd/workspace_test.go b/cli/cmd/workspace_test.go
new file mode 100644
index 0000000..ea0c979
--- /dev/null
+++ b/cli/cmd/workspace_test.go
@@ -0,0 +1,348 @@
+package cmd
+
+import (
+ "net/http"
+ "strings"
+ "testing"
+)
+
+// TestListWorkspaceProjects_DecodesPayload locks the acceptance from
+// docs/code-review-workspaces-link-local-projects.md (Fix #1, line 284):
+// after the rewrite, `cix ws list` must return 200 and render a
+// readable list with status badges. We also assert the absence of the
+// literal "@undefined" — the regression that broke the dashboard side
+// of this contract per Fix #2.
+func TestListWorkspaceProjects_DecodesPayload(t *testing.T) {
+ srv := mockServer(t, defaultWorkspaceHandler())
+ useAPI(t, srv)
+
+ cli, err := getClient()
+ if err != nil {
+ t.Fatalf("getClient: %v", err)
+ }
+
+ prevVerbose := wsVerbose
+ wsVerbose = true
+ t.Cleanup(func() { wsVerbose = prevVerbose })
+
+ out, err := captureOutput(func() error { return cmdListRepos(cli, "platform") })
+ if err != nil {
+ t.Fatalf("cmdListRepos: %v", err)
+ }
+
+ // Status badges per the new enum.
+ if !strings.Contains(out, "✓ indexed") {
+ t.Errorf("expected '✓ indexed' badge, got:\n%s", out)
+ }
+ if !strings.Contains(out, "… indexing") {
+ t.Errorf("expected '… indexing' badge, got:\n%s", out)
+ }
+
+ // Host-paths render directly — github form already carries @branch.
+ if !strings.Contains(out, "github.com/owner/repo@main") {
+ t.Errorf("expected github host_path with @branch, got:\n%s", out)
+ }
+ if !strings.Contains(out, "/Users/me/local-proj") {
+ t.Errorf("expected local host_path, got:\n%s", out)
+ }
+
+ // Verbose extras for the indexed row.
+ if !strings.Contains(out, "path_hash: a1b2c3d4e5f60718") {
+ t.Errorf("expected path_hash in verbose output, got:\n%s", out)
+ }
+ if !strings.Contains(out, "last indexed: 2026-05-14T12:30:45Z") {
+ t.Errorf("expected RFC3339 last_indexed in verbose output, got:\n%s", out)
+ }
+ if !strings.Contains(out, "languages: go, typescript") {
+ t.Errorf("expected languages line for indexed row, got:\n%s", out)
+ }
+
+ // Regression canary — Fix #2 dashboard bug rendered the literal
+ // "@undefined" because branch came from a missing field. The CLI
+ // equivalent must never print that.
+ if strings.Contains(out, "@undefined") || strings.Contains(out, "undefined") {
+ t.Errorf("unexpected 'undefined' in output:\n%s", out)
+ }
+}
+
+// TestListWorkspaces_VerboseProjectCount covers the silent-fail path
+// that broke `cix ws list -v` — it used to swallow 404s from the deleted
+// /repos endpoint and just omit the count row. After the fix the verbose
+// row must reappear with the new "projects" terminology.
+func TestListWorkspaces_VerboseProjectCount(t *testing.T) {
+ srv := mockServer(t, defaultWorkspaceHandler())
+ useAPI(t, srv)
+
+ cli, err := getClient()
+ if err != nil {
+ t.Fatalf("getClient: %v", err)
+ }
+
+ prevVerbose := wsVerbose
+ wsVerbose = true
+ t.Cleanup(func() { wsVerbose = prevVerbose })
+
+ out, err := captureOutput(func() error { return cmdListWorkspaces(cli) })
+ if err != nil {
+ t.Fatalf("cmdListWorkspaces: %v", err)
+ }
+
+ if !strings.Contains(out, "2 projects (1 indexed)") {
+ t.Errorf("expected '2 projects (1 indexed)' verbose count, got:\n%s", out)
+ }
+ // Sanity: the old wording must not leak back.
+ if strings.Contains(out, "repos (") {
+ t.Errorf("unexpected old 'repos (...)' wording in output:\n%s", out)
+ }
+}
+
+// TestListWorkspaceProjects_ServiceUnavailable locks in the
+// CIX_WORKSPACES_ENABLED=false → 503 path. The CLI must surface a
+// helpful error rather than crash or hang.
+func TestListWorkspaceProjects_ServiceUnavailable(t *testing.T) {
+ srv := mockServer(t, func(w http.ResponseWriter, r *http.Request) {
+ switch r.URL.Path {
+ case "/api/v1/workspaces":
+ writeJSON(w, 200, map[string]any{
+ "workspaces": []map[string]any{{"id": "ws_1", "name": "platform"}},
+ "total": 1,
+ })
+ case "/api/v1/workspaces/ws_1/projects":
+ apiError(w, http.StatusServiceUnavailable,
+ "workspaces feature is disabled (set CIX_WORKSPACES_ENABLED=true and restart)")
+ default:
+ http.NotFound(w, r)
+ }
+ })
+ useAPI(t, srv)
+
+ cli, err := getClient()
+ if err != nil {
+ t.Fatalf("getClient: %v", err)
+ }
+
+ _, err = captureOutput(func() error { return cmdListRepos(cli, "platform") })
+ if err == nil {
+ t.Fatal("expected error on 503, got nil")
+ }
+ if !strings.Contains(err.Error(), "503") || !strings.Contains(err.Error(), "disabled") {
+ t.Errorf("expected error to mention 503 + 'disabled', got: %v", err)
+ }
+}
+
+// TestDescribeWorkspace_ByCaseInsensitiveName exercises the
+// describe path that lives separately from `resolveWorkspaceID` (it has
+// its own inline name-match loop) and confirms mixed-case lookup works.
+func TestDescribeWorkspace_ByCaseInsensitiveName(t *testing.T) {
+ srv := mockServer(t, defaultWorkspaceHandler())
+ useAPI(t, srv)
+
+ cli, err := getClient()
+ if err != nil {
+ t.Fatalf("getClient: %v", err)
+ }
+
+ out, err := captureOutput(func() error { return cmdDescribeWorkspace(cli, "PLATFORM") })
+ if err != nil {
+ t.Fatalf("cmdDescribeWorkspace: %v", err)
+ }
+
+ if !strings.Contains(out, "Workspace: platform") {
+ t.Errorf("expected workspace header, got:\n%s", out)
+ }
+ if !strings.Contains(out, "projects: 2 (1 indexed)") {
+ t.Errorf("expected per-workspace project count line, got:\n%s", out)
+ }
+ if !strings.Contains(out, "github.com/owner/repo@main") {
+ t.Errorf("expected indexed project's host_path in describe output, got:\n%s", out)
+ }
+ if !strings.Contains(out, "path_hash: a1b2c3d4e5f60718") {
+ t.Errorf("expected path_hash in describe output, got:\n%s", out)
+ }
+}
+
+// TestListWorkspaces_ParsesEmpty pins the empty-server response path —
+// the CLI must handle `{"workspaces": [], "total": 0}` cleanly: no
+// error, no spurious lines on stdout, and (silently here, on stderr in
+// real use) an operator-friendly hint pointing at the dashboard. Fix #17
+// minimum #1.
+func TestListWorkspaces_ParsesEmpty(t *testing.T) {
+ srv := mockServer(t, func(w http.ResponseWriter, r *http.Request) {
+ if r.URL.Path == "/api/v1/workspaces" {
+ writeJSON(w, 200, map[string]any{
+ "workspaces": []map[string]any{},
+ "total": 0,
+ })
+ return
+ }
+ http.NotFound(w, r)
+ })
+ useAPI(t, srv)
+
+ cli, err := getClient()
+ if err != nil {
+ t.Fatalf("getClient: %v", err)
+ }
+
+ out, err := captureOutput(func() error { return cmdListWorkspaces(cli) })
+ if err != nil {
+ t.Fatalf("cmdListWorkspaces on empty list: %v", err)
+ }
+ // captureOutput only watches stdout; the "no workspaces — create one
+ // at …" hint goes to stderr in the real binary. Stdout must be empty
+ // so a future regression that accidentally prints a header row (or a
+ // stray "0 workspaces" line) trips this assertion.
+ if out != "" {
+ t.Errorf("expected empty stdout for 0 workspaces, got: %q", out)
+ }
+}
+
+// TestProjectStatusBadge — exhaustive per-status formatting check for
+// the two badge helpers. Fix #17 minimum #2: a future renumber of the
+// status enum (e.g. dropping 'created' or adding 'archived') must trip
+// at least one of these table rows. Direct unit test bypasses the HTTP
+// harness — the two functions are pure mappings.
+func TestProjectStatusBadge(t *testing.T) {
+ cases := []struct {
+ in string
+ long string
+ short string
+ }{
+ {"indexed", "✓ indexed", "✓"},
+ {"indexing", "… indexing", "…"},
+ {"created", "… created", "…"},
+ {"error", "✗ error", "✗"},
+ // Default-arm coverage: unknown future statuses must surface
+ // verbatim (long) and degrade to the "still working" glyph
+ // (short) rather than crash or panic. This protects forward
+ // compatibility — the CLI should render whatever the server
+ // returns, not gate on the enum.
+ {"archived", "archived", "…"},
+ }
+ for _, c := range cases {
+ if got := projectStatusBadge(c.in); got != c.long {
+ t.Errorf("projectStatusBadge(%q) = %q, want %q", c.in, got, c.long)
+ }
+ if got := projectStatusBadgeShort(c.in); got != c.short {
+ t.Errorf("projectStatusBadgeShort(%q) = %q, want %q", c.in, got, c.short)
+ }
+ }
+}
+
+// TestResolveWorkspaceID_ByName covers Fix #17 minimum #3. The shared
+// resolver supports three ways to address a workspace: exact ID, exact
+// name (case-sensitive), and case-insensitive name match. Unknown
+// identifiers must return an error mentioning the input so the user
+// can correct the typo. Distinct from
+// TestDescribeWorkspace_ByCaseInsensitiveName, which exercises the
+// describe-command's inline name-match loop — this one hits the
+// resolveWorkspaceID function used by `cix ws list/repos`.
+func TestResolveWorkspaceID_ByName(t *testing.T) {
+ srv := mockServer(t, func(w http.ResponseWriter, r *http.Request) {
+ if r.URL.Path == "/api/v1/workspaces" {
+ writeJSON(w, 200, map[string]any{
+ "workspaces": []map[string]any{
+ {"id": "ws_alpha", "name": "platform"},
+ {"id": "ws_beta", "name": "ML-Pipeline"},
+ },
+ "total": 2,
+ })
+ return
+ }
+ http.NotFound(w, r)
+ })
+ useAPI(t, srv)
+ cli, err := getClient()
+ if err != nil {
+ t.Fatalf("getClient: %v", err)
+ }
+
+ cases := []struct {
+ in string
+ wantID string
+ wantErr bool
+ }{
+ {"platform", "ws_alpha", false}, // exact name match
+ {"PLATFORM", "ws_alpha", false}, // upper-case name match
+ {"PlatForm", "ws_alpha", false}, // mixed-case name match
+ {"ml-pipeline", "ws_beta", false}, // case-insensitive on hyphenated name
+ {"ML-PIPELINE", "ws_beta", false}, // upper-case variant
+ {"ws_alpha", "ws_alpha", false}, // exact ID match
+ {"nonexistent", "", true}, // not found → error
+ }
+ for _, c := range cases {
+ got, err := resolveWorkspaceID(cli, c.in)
+ if c.wantErr {
+ if err == nil {
+ t.Errorf("resolveWorkspaceID(%q): expected error, got id=%q", c.in, got)
+ continue
+ }
+ if !strings.Contains(err.Error(), c.in) {
+ t.Errorf("resolveWorkspaceID(%q): error should mention input, got: %v", c.in, err)
+ }
+ continue
+ }
+ if err != nil {
+ t.Errorf("resolveWorkspaceID(%q): unexpected error: %v", c.in, err)
+ continue
+ }
+ if got != c.wantID {
+ t.Errorf("resolveWorkspaceID(%q) = %q, want %q", c.in, got, c.wantID)
+ }
+ }
+}
+
+// defaultWorkspaceHandler returns the standard 2-project fixture used
+// by every test in this file. Factored out to avoid copy-pasting the
+// JSON literal across handlers.
+func defaultWorkspaceHandler() http.HandlerFunc {
+ return func(w http.ResponseWriter, r *http.Request) {
+ switch r.URL.Path {
+ case "/api/v1/workspaces":
+ writeJSON(w, 200, map[string]any{
+ "workspaces": []map[string]any{
+ {"id": "ws_1", "name": "platform", "description": "core platform repos"},
+ },
+ "total": 1,
+ })
+ case "/api/v1/workspaces/ws_1/projects":
+ writeJSON(w, 200, map[string]any{
+ "projects": []map[string]any{
+ {
+ "added_at": "2026-05-10T08:15:00Z",
+ "project": map[string]any{
+ "path_hash": "a1b2c3d4e5f60718",
+ "host_path": "github.com/owner/repo@main",
+ "container_path": "/code/owner/repo",
+ "languages": []string{"go", "typescript"},
+ "settings": map[string]any{"exclude_patterns": []string{}, "max_file_size": 524288},
+ "stats": map[string]any{"total_files": 50, "indexed_files": 50, "total_chunks": 200, "total_symbols": 30},
+ "status": "indexed",
+ "created_at": "2026-05-01T00:00:00Z",
+ "updated_at": "2026-05-14T12:30:45Z",
+ "last_indexed_at": "2026-05-14T12:30:45Z",
+ },
+ },
+ {
+ "added_at": "2026-05-11T09:00:00Z",
+ "project": map[string]any{
+ "path_hash": "7f3e2c1a0d4b5e69",
+ "host_path": "/Users/me/local-proj",
+ "container_path": "/Users/me/local-proj",
+ "languages": []string{},
+ "settings": map[string]any{"exclude_patterns": []string{}, "max_file_size": 524288},
+ "stats": map[string]any{"total_files": 0, "indexed_files": 0, "total_chunks": 0, "total_symbols": 0},
+ "status": "indexing",
+ "created_at": "2026-05-11T08:55:00Z",
+ "updated_at": "2026-05-11T09:00:00Z",
+ "last_indexed_at": nil,
+ },
+ },
+ },
+ "total": 2,
+ })
+ default:
+ http.NotFound(w, r)
+ }
+ }
+}
diff --git a/cli/internal/client/client.go b/cli/internal/client/client.go
index 6723f12..e59039c 100644
--- a/cli/internal/client/client.go
+++ b/cli/internal/client/client.go
@@ -2,11 +2,17 @@ package client
import (
"bytes"
+ "crypto/rand"
"crypto/sha1"
+ "encoding/hex"
"encoding/json"
"fmt"
"io"
"net/http"
+ "os"
+ "path/filepath"
+ "strings"
+ "sync"
"time"
)
@@ -84,13 +90,64 @@ func (c *Client) do(method, path string, body interface{}) (*http.Response, erro
return resp, nil
}
-// encodeProjectPath returns SHA1 hash (first 16 hex chars) of the project path.
-// This avoids all URL encoding issues with slashes in paths.
+// encodeProjectPath returns the project's URL hash (first 16 hex chars of
+// SHA1 of the identity key). Local projects are namespaced per machine —
+// "local:{machine_id}:{path}" — so the same filesystem path on different
+// machines/users maps to different projects. MUST stay byte-identical to the
+// server's projects.LocalProjectKey + hashPath (server/internal/projects).
func encodeProjectPath(path string) string {
- h := sha1.Sum([]byte(path))
+ key := "local:" + machineID() + ":" + path
+ h := sha1.Sum([]byte(key))
return fmt.Sprintf("%x", h)[:16]
}
+// EncodeProjectPath is the exported project URL hash, for tests and tooling
+// that need to mirror the client's addressing exactly.
+func EncodeProjectPath(path string) string { return encodeProjectPath(path) }
+
+var (
+ machineIDOnce sync.Once
+ machineIDVal string
+)
+
+// machineID returns a stable per-machine (per-home) identifier, persisted at
+// ~/.cix/machine_id. Generated on first use. Used to namespace local project
+// identity so different developers' machines never collide on the same path.
+func machineID() string {
+ machineIDOnce.Do(func() { machineIDVal = loadOrCreateMachineID() })
+ return machineIDVal
+}
+
+func loadOrCreateMachineID() string {
+ home, err := os.UserHomeDir()
+ if err != nil {
+ return "unknown-machine"
+ }
+ path := filepath.Join(home, ".cix", "machine_id")
+ if b, rerr := os.ReadFile(path); rerr == nil {
+ if id := strings.TrimSpace(string(b)); id != "" {
+ return id
+ }
+ }
+ buf := make([]byte, 16)
+ if _, gerr := rand.Read(buf); gerr != nil {
+ return "unknown-machine"
+ }
+ id := hex.EncodeToString(buf)
+ _ = os.MkdirAll(filepath.Dir(path), 0o755)
+ _ = os.WriteFile(path, []byte(id+"\n"), 0o600)
+ return id
+}
+
+// machineLabel returns the OS hostname for display purposes (sent to the
+// server as machine_label). Best-effort; empty when unavailable.
+func machineLabel() string {
+ if h, err := os.Hostname(); err == nil {
+ return h
+ }
+ return ""
+}
+
// parseResponse reads and unmarshals JSON response
func parseResponse(resp *http.Response, v interface{}) error {
defer resp.Body.Close()
diff --git a/cli/internal/client/projects.go b/cli/internal/client/projects.go
index af70475..f447748 100644
--- a/cli/internal/client/projects.go
+++ b/cli/internal/client/projects.go
@@ -7,15 +7,21 @@ import (
// Project represents a code project
type Project struct {
- HostPath string `json:"host_path"`
- ContainerPath string `json:"container_path"`
- Languages []string `json:"languages"`
- Settings ProjectSettings `json:"settings"`
- Stats ProjectStats `json:"stats"`
- Status string `json:"status"` // created, indexing, indexed, error
- CreatedAt time.Time `json:"created_at"`
- UpdatedAt time.Time `json:"updated_at"`
- LastIndexedAt *time.Time `json:"last_indexed_at"`
+ PathHash string `json:"path_hash"`
+ HostPath string `json:"host_path"`
+ ContainerPath string `json:"container_path"`
+ Languages []string `json:"languages"`
+ Settings ProjectSettings `json:"settings"`
+ Stats ProjectStats `json:"stats"`
+ Status string `json:"status"` // created, indexing, indexed, error
+ CreatedAt time.Time `json:"created_at"`
+ UpdatedAt time.Time `json:"updated_at"`
+ LastIndexedAt *time.Time `json:"last_indexed_at"`
+ IndexedWithModel *string `json:"indexed_with_model,omitempty"`
+ SqlitePath *string `json:"sqlite_path,omitempty"`
+ SqliteSizeBytes *int64 `json:"sqlite_size_bytes,omitempty"`
+ ChromaPath *string `json:"chroma_path,omitempty"`
+ ChromaSizeBytes *int64 `json:"chroma_size_bytes,omitempty"`
}
type ProjectSettings struct {
@@ -67,8 +73,14 @@ func (c *Client) GetProject(path string) (*Project, error) {
// CreateProject creates a new project
func (c *Client) CreateProject(path string) (*Project, error) {
+ // machine_id namespaces the project's identity so the same filesystem path
+ // on a different machine/user is a distinct project; machine_label is the
+ // hostname for display. The server derives path_hash from
+ // local:{machine_id}:{host_path} — matching encodeProjectPath.
body := map[string]string{
- "host_path": path,
+ "host_path": path,
+ "machine_id": machineID(),
+ "machine_label": machineLabel(),
}
resp, err := c.do("POST", "/api/v1/projects", body)
diff --git a/cli/internal/client/workspace.go b/cli/internal/client/workspace.go
new file mode 100644
index 0000000..1d979ec
--- /dev/null
+++ b/cli/internal/client/workspace.go
@@ -0,0 +1,128 @@
+package client
+
+import (
+ "fmt"
+ "net/url"
+ "time"
+)
+
+// WorkspaceSearchProject mirrors the OpenAPI WorkspaceSearchProject
+// schema — one entry per surviving project in the hybrid candidacy
+// ranking.
+type WorkspaceSearchProject struct {
+ ProjectPath string `json:"project_path"`
+ Label string `json:"label"`
+ ProjectScore float32 `json:"project_score"`
+ NumHits int `json:"num_hits"`
+ BM25Score float32 `json:"bm25_score"`
+ DenseScore float32 `json:"dense_score"`
+}
+
+// WorkspaceSearchChunk mirrors WorkspaceSearchChunk.
+type WorkspaceSearchChunk struct {
+ ProjectPath string `json:"project_path"`
+ FilePath string `json:"file_path"`
+ StartLine int `json:"start_line"`
+ EndLine int `json:"end_line"`
+ SymbolName string `json:"symbol_name,omitempty"`
+ Language string `json:"language,omitempty"`
+ Score float32 `json:"score"`
+ Content string `json:"content"`
+}
+
+// WorkspaceSearchStaleFTSRepo names a repo whose BM25 index hasn't
+// been backfilled yet (indexed before chunks_fts existed); hybrid
+// degrades to dense-only for that entry until reindex.
+type WorkspaceSearchStaleFTSRepo struct {
+ ProjectPath string `json:"project_path"`
+}
+
+// WorkspaceSearchResponse mirrors WorkspaceSearchResponse.
+type WorkspaceSearchResponse struct {
+ Status string `json:"status"`
+ Projects []WorkspaceSearchProject `json:"projects"`
+ Chunks []WorkspaceSearchChunk `json:"chunks"`
+ StaleFTSRepos []WorkspaceSearchStaleFTSRepo `json:"stale_fts_repos,omitempty"`
+}
+
+// Workspace is the metadata projection of a workspace row.
+type Workspace struct {
+ ID string `json:"id"`
+ Name string `json:"name"`
+ Description string `json:"description"`
+}
+
+// WorkspaceListResponse is the GET /workspaces shape.
+type WorkspaceListResponse struct {
+ Workspaces []Workspace `json:"workspaces"`
+ Total int `json:"total"`
+}
+
+// WorkspaceProject mirrors the server's WorkspaceProject — the embedded
+// project (full Project shape, defined in projects.go) plus the
+// membership-added timestamp.
+type WorkspaceProject struct {
+ AddedAt time.Time `json:"added_at"`
+ Project Project `json:"project"`
+}
+
+// WorkspaceProjectListResponse is the GET /workspaces/{id}/projects shape.
+type WorkspaceProjectListResponse struct {
+ Projects []WorkspaceProject `json:"projects"`
+ Total int `json:"total"`
+}
+
+// ListWorkspaces — GET /api/v1/workspaces. Returns
+// ServiceUnavailable as a typed error so callers can render a hint when
+// CIX_WORKSPACES_ENABLED is off on the server side.
+func (c *Client) ListWorkspaces() (*WorkspaceListResponse, error) {
+ resp, err := c.do("GET", "/api/v1/workspaces", nil)
+ if err != nil {
+ return nil, err
+ }
+ var out WorkspaceListResponse
+ if err := parseResponse(resp, &out); err != nil {
+ return nil, err
+ }
+ return &out, nil
+}
+
+// ListWorkspaceProjects — GET /api/v1/workspaces/{id}/projects. Returns
+// every linked project with its current status (created / indexing /
+// indexed / error), host path, path hash, and membership timestamp so
+// the CLI can render a readable per-project summary without a second
+// round-trip.
+func (c *Client) ListWorkspaceProjects(workspaceID string) (*WorkspaceProjectListResponse, error) {
+ resp, err := c.do("GET", "/api/v1/workspaces/"+url.PathEscape(workspaceID)+"/projects", nil)
+ if err != nil {
+ return nil, err
+ }
+ var out WorkspaceProjectListResponse
+ if err := parseResponse(resp, &out); err != nil {
+ return nil, err
+ }
+ return &out, nil
+}
+
+// WorkspaceSearch — GET /api/v1/workspaces/{id}/search. id is the
+// workspace's opaque ULID/UUID returned by ListWorkspaces.
+func (c *Client) WorkspaceSearch(id, query string, topProjects, topChunks int) (*WorkspaceSearchResponse, error) {
+ v := url.Values{}
+ v.Set("q", query)
+ if topProjects > 0 {
+ v.Set("top_projects", fmt.Sprintf("%d", topProjects))
+ }
+ if topChunks > 0 {
+ v.Set("top_chunks", fmt.Sprintf("%d", topChunks))
+ }
+ path := "/api/v1/workspaces/" + url.PathEscape(id) + "/search?" + v.Encode()
+ resp, err := c.do("GET", path, nil)
+ if err != nil {
+ return nil, err
+ }
+ var out WorkspaceSearchResponse
+ if err := parseResponse(resp, &out); err != nil {
+ return nil, err
+ }
+ return &out, nil
+}
diff --git a/cli/internal/indexer/indexer_test.go b/cli/internal/indexer/indexer_test.go
index fb490d6..56f1ca3 100644
--- a/cli/internal/indexer/indexer_test.go
+++ b/cli/internal/indexer/indexer_test.go
@@ -2,11 +2,9 @@ package indexer
import (
"context"
- "crypto/sha1"
"crypto/sha256"
"encoding/hex"
"encoding/json"
- "fmt"
"io"
"net/http"
"net/http/httptest"
@@ -18,10 +16,10 @@ import (
"github.com/anthropics/code-index/cli/internal/client"
)
-// projectHash mirrors the client's URL-encoding logic (SHA1, first 16 hex chars).
+// projectHash mirrors the client's project URL hash — delegated to the real
+// implementation so per-machine namespacing matches.
func projectHash(path string) string {
- h := sha1.Sum([]byte(path))
- return fmt.Sprintf("%x", h)[:16]
+ return client.EncodeProjectPath(path)
}
// sha256hex computes the hex-encoded SHA-256 of b, matching discovery.hashFile.
diff --git a/cli/internal/projectconfig/projectconfig_test.go b/cli/internal/projectconfig/projectconfig_test.go
index 1769681..1705afd 100644
--- a/cli/internal/projectconfig/projectconfig_test.go
+++ b/cli/internal/projectconfig/projectconfig_test.go
@@ -91,8 +91,8 @@ func TestSubmodulePaths_Standard(t *testing.T) {
writeFile(t, filepath.Join(root, ".gitmodules"), `[submodule "api/schema/acme-shared"]
path = api/schema/acme-shared
url = https://github.com/Example/acme-shared.git
-[submodule "api/generated/acme-models"]
- path = api/generated/acme-models
+[submodule "api/models/acme-models"]
+ path = api/models/acme-models
url = https://github.com/Example/acme-models.git
`)
@@ -102,7 +102,7 @@ func TestSubmodulePaths_Standard(t *testing.T) {
}
sort.Strings(paths)
- expected := []string{"api/schema/acme-shared", "api/generated/acme-models"}
+ expected := []string{"api/models/acme-models", "api/schema/acme-shared"}
if len(paths) != len(expected) {
t.Fatalf("expected %v, got %v", expected, paths)
diff --git a/cli/internal/watcher/watcher_test.go b/cli/internal/watcher/watcher_test.go
index d5705e8..b4375fe 100644
--- a/cli/internal/watcher/watcher_test.go
+++ b/cli/internal/watcher/watcher_test.go
@@ -1,9 +1,7 @@
package watcher
import (
- "crypto/sha1"
"encoding/json"
- "fmt"
"io"
"log"
"net/http"
@@ -19,10 +17,10 @@ import (
"github.com/rjeczalik/notify"
)
-// projectHash mirrors client.encodeProjectPath.
+// projectHash mirrors the client's project URL hash — delegated to the real
+// implementation so per-machine namespacing matches.
func projectHash(path string) string {
- h := sha1.Sum([]byte(path))
- return fmt.Sprintf("%x", h)[:16]
+ return client.EncodeProjectPath(path)
}
// mockEventInfo implements notify.EventInfo for testing.
diff --git a/doc/CONFIG_REFERENCE.md b/doc/CONFIG_REFERENCE.md
new file mode 100644
index 0000000..455ffc7
--- /dev/null
+++ b/doc/CONFIG_REFERENCE.md
@@ -0,0 +1,148 @@
+# Configuration Reference
+
+Complete environment-variable surface for `cix-server`. The
+operator-facing template lives in `.env.example`; the variables below
+are the authoritative list. README's *Configuration* section keeps
+only the must-know subset — for everything else, this is the doc.
+
+Anything in the **Tuning** group is also overridable at runtime from
+the dashboard's **Server** page (admin only). Dashboard writes go to
+the SQLite `runtime_config` table and trigger a sidecar restart; the
+env-var values become the boot-time fallback.
+
+---
+
+## Auth + bootstrap
+
+| Variable | Default | Description |
+|---|---|---|
+| `CIX_API_KEY` | — | Header API key for direct CLI / CI traffic. On first boot it's imported as the bootstrap admin's `env-bootstrap` API key. |
+| `CIX_BOOTSTRAP_ADMIN_EMAIL` | — | **Required on a fresh DB.** Seeds the first admin user. Ignored once the `users` table is non-empty. |
+| `CIX_BOOTSTRAP_ADMIN_PASSWORD` | — | **Required on a fresh DB.** The user is flagged `must_change_password=true`, so this only works for the first login. |
+| `CIX_AUTH_DISABLED` | `false` | **Dev only.** Skips auth on every endpoint — every request behaves as admin. Never set in production. |
+
+On a fresh DB the server **refuses to start** unless both
+`CIX_BOOTSTRAP_ADMIN_EMAIL` and `CIX_BOOTSTRAP_ADMIN_PASSWORD` are
+set. After first login, drop them from `.env` — the user lives in
+the DB.
+
+## Networking + storage
+
+| Variable | Default | Description |
+|---|---|---|
+| `CIX_PORT` | `21847` | Listen port (both Docker images bake this in). |
+| `CIX_SQLITE_PATH` | `/data/sqlite/projects.db` | SQLite path. Suffixed with the model-safe name on open. |
+| `CIX_CHROMA_PERSIST_DIR` | `/data/chroma` | Vector store directory. |
+| `CIX_GGUF_CACHE_DIR` | `/data/models` | Where downloaded GGUF files live. |
+| `CIX_PUBLIC_URL` | — | Externally-reachable URL used to build GitHub webhook delivery URLs. Empty disables webhook URL display. |
+
+## Indexing
+
+| Variable | Default | Description |
+|---|---|---|
+| `CIX_EMBEDDING_MODEL` | `awhiteside/CodeRankEmbed-Q8_0-GGUF` | HuggingFace GGUF repo (or absolute path to a `.gguf`). |
+| `CIX_MAX_FILE_SIZE` | `524288` | Skip files larger than this (bytes). |
+| `CIX_EXCLUDED_DIRS` | `node_modules,.git,.venv,...` | Comma-separated dirs always skipped. |
+| `CIX_LANGUAGES` | all | Comma-separated allow-list of chunker languages. Empty = all baked-in. See [`LANGUAGES.md`](LANGUAGES.md). |
+| `CIX_EMBED_INCLUDE_PATH` | `true` | Path/language/symbol preamble before each chunk. Toggling requires `cix reindex --full`. |
+| `CIX_MAX_CHUNK_TOKENS` | `1500` | Max chunk size before falling back to sliding window. Must stay ≤ `CIX_LLAMA_CTX`. |
+
+## llama-server sidecar
+
+| Variable | Default | Description |
+|---|---|---|
+| `CIX_EMBEDDINGS_ENABLED` | `true` | Set `false` to boot without the sidecar (read-only mode; auth/dashboard/symbol search keep working). |
+| `CIX_LLAMA_BIN_DIR` | `/app` (Docker) / `/llama` (native) | Directory containing `llama-server` + dylibs. |
+| `CIX_LLAMA_TRANSPORT` | `unix` | `unix` or `tcp`. Auto-falls-back to TCP if the socket path is too long. |
+| `CIX_LLAMA_SOCKET` | `${TMPDIR}/cix-llama-.sock` | Unix socket path. macOS `sun_path` cap = 104 bytes. |
+| `CIX_LLAMA_CTX` | `2048` | `--ctx-size` passed to llama-server. |
+| `CIX_N_GPU_LAYERS` | `-1` darwin / `0` else / `99` Docker CUDA | `99` offloads all layers; `0` forces CPU. |
+| `CIX_LLAMA_STARTUP_TIMEOUT` | `60` | Seconds to wait for the sidecar's readiness probe. |
+| `CIX_GGUF_PATH` | auto-resolve | Absolute path to a GGUF file. Empty → cache lookup → HF download. |
+| `CIX_BOOTSTRAP_GGUF_PATH` | — | Optional. If set, cix imports this `.gguf` into `CIX_GGUF_CACHE_DIR` once (atomic `.partial → rename`) and ignores the env on subsequent boots. Useful for air-gapped or rate-limited environments. |
+
+## Tuning (also editable from `/dashboard/server`)
+
+| Variable | Default | Description |
+|---|---|---|
+| `CIX_LLAMA_THREADS` | `0` (auto = `runtime.NumCPU()/2`) | CPU threads passed to llama-server. |
+| `CIX_LLAMA_BATCH` | `0` (match `CIX_LLAMA_CTX`) | `-b` batch size. |
+| `CIX_MAX_EMBEDDING_CONCURRENCY` | `5` | Embedding queue parallelism. Drop to `1` if the GPU contends. |
+| `CIX_EMBEDDING_QUEUE_TIMEOUT` | `300` | Seconds before a queued embedding request is failed. |
+
+## Workspaces & GitHub repos
+
+Both surfaces ship in every release — there is no opt-in flag. The
+encryption key for `github_tokens` is required on first use; if neither
+`CIX_SECRET_KEY` nor `CIX_SECRET_KEYFILE` is set the server auto-generates
+one under `/.secret_key`.
+
+| Variable | Default | Purpose |
+|---|---|---|
+| `CIX_REPOS_DIR` | `/repos` | Base directory for cloned GitHub repos. Each clone lives at `/repos//`. Point this at a dedicated volume — cloned repos can be large. |
+| `CIX_WORKSPACES_DATA_DIR` | — | Legacy alias for `CIX_REPOS_DIR` (used when the latter is unset). Prefer `CIX_REPOS_DIR`. |
+| `CIX_WORKER_CONCURRENCY` | `2` | Parallel clone/index workers. |
+| `CIX_SECRET_KEY` | (auto-generate) | 32-byte AES key for GitHub token encryption. Hex or base64. |
+| `CIX_SECRET_KEYFILE` | — | Alternative — path to a 0600-perm key file. |
+| `CIX_SECRETS_DATA_DIR` | `dirname(CIX_SQLITE_PATH)` | Where the auto-generated keyfile lives. |
+| `CIX_DEFAULT_POLL_INTERVAL` | `5m` | Default git-polling cadence for polling repos without a per-repo interval. Go duration string. |
+| `CIX_MIN_POLL_INTERVAL` | `60s` | Floor applied to every effective poll interval. Go duration string. |
+| `CIX_POLL_SCHEDULER_TICK` | `30s` | How often the shared poll scheduler scans for due repos. Go duration string. |
+
+See [`WORKSPACES.md`](WORKSPACES.md) for the operator guide,
+[`WEBHOOKS.md`](WEBHOOKS.md) for webhook lifecycle, and
+[`POLLING.md`](POLLING.md) for the polling alternative.
+
+## Version-check banner
+
+| Variable | Default | Description |
+|---|---|---|
+| `CIX_VERSION_CHECK_ENABLED` | `true` | Set `false` to disable the outbound GitHub release poll. |
+| `CIX_VERSION_CHECK_INTERVAL` | `6h` | Go duration string (`30m`, `12h`, …). |
+| `CIX_VERSION_CHECK_REPO` | `dvcdsys/code-index` | Override only when running a fork with its own release stream. |
+
+See [`UPDATES.md`](UPDATES.md) for how the banner works end-to-end.
+
+## Resource usage
+
+| | Native (Apple Silicon) | Docker (CPU) | Docker (CUDA) |
+|--|---|---|---|
+| Image size | n/a | ~21 MB | ~1.0 GB |
+| Memory (idle) | ~1 GB | ~1 GB | ~1 GB (system) + ~0.7 GB VRAM |
+| Memory (indexing) | up to 2 GB | up to 2 GB | up to 2 GB system + ~0.7 GB VRAM |
+| GPU | Metal | none | NVIDIA CUDA 12.x |
+| Disk | `~/.cix/data/` (~50–200 MB/project) | same (mounted volume) | same |
+| Auto-restart | use `launchd` (see [`SETUP_MACOS_NATIVE.md`](SETUP_MACOS_NATIVE.md)) | yes | yes |
+
+## Switching embedding models
+
+The server ships with `awhiteside/CodeRankEmbed-Q8_0-GGUF` — a
+Q8-quantized build of CodeRankEmbed (137M params, 768d, ~145 MB on
+disk, ~0.5–0.7 GB idle VRAM/RAM). Inference runs via the
+`llama-server` sidecar, so **only GGUF repositories are supported**.
+Plain PyTorch / `sentence-transformers` repos won't work.
+
+You can switch in two places:
+
+- **Dashboard → Server → Embedding model.** Pick from the on-disk
+ cache (the dropdown lists `CIX_GGUF_CACHE_DIR`/*.gguf), or paste a
+ HuggingFace repo or absolute path. **Save & Restart** drains,
+ restarts the sidecar, and turns existing project cards red ("Stale
+ model") until you reindex.
+- **Env / `.env` file.** Set `CIX_EMBEDDING_MODEL=` and
+ restart. The dashboard's runtime override (if any) wins; the env
+ value becomes the bootstrap default.
+
+ChromaDB and SQLite paths are suffixed by a sanitised form of the
+model name (e.g. `projects_awhiteside_coderankembed_q8_0_gguf.db`).
+This isolates vector spaces per model — switching back and forth
+keeps old indices intact and avoids dim-mismatch errors.
+Re-indexing under a model is not free (chunk count × embedding
+latency), but you don't lose state.
+
+## Related files
+
+- `server/internal/config/config.go` — env-var loading + defaults
+- `server/internal/runtimecfg/` — dashboard-editable overrides
+- `.env.example` — copy-paste-ready template
+- [`SECURITY_DEPLOYMENT.md`](SECURITY_DEPLOYMENT.md) — production hardening
diff --git a/doc/DOCKER_TAGS.md b/doc/DOCKER_TAGS.md
index 239821c..086a082 100644
--- a/doc/DOCKER_TAGS.md
+++ b/doc/DOCKER_TAGS.md
@@ -6,9 +6,26 @@
|---|---|---|---|---|
| `latest` | linux/amd64 + linux/arm64 | Go CPU (distroless/static) | ~100 MB | Use with `CIX_EMBEDDINGS_ENABLED=false` |
| `cu128` | linux/amd64 | distroless/cc-debian13 + CUDA libs | ~1.0 GB | RTX 3090 prod; embeddings via llama-server |
+| `develop-cu128` | linux/amd64 | same as cu128 | ~1.0 GB | Floating pre-release; force-updated on every merge to `develop` that touches `server/`. Not for production. |
| `go-cu128` | linux/amd64 | same as cu128 | ~1.0 GB | Dev alias — retire after v0.3.0 ships |
| `0.2-python-legacy` | linux/amd64 | Python FastAPI | ~5 GB | Frozen; rollback only |
+## Develop channels
+
+`develop` has a matched pair of floating pre-release artifacts:
+
+- **Server:** Docker tag `dvcdsys/code-index:develop-cu128` (CUDA only;
+ CPU image is published only on `server/v*` releases). Workflow:
+ `.github/workflows/prerelease-server.yml`.
+- **CLI:** GitHub release `cli/develop` (no `v` prefix, so the stable
+ installer's `^cli/v` filter ignores it). Installed via
+ `install-develop.sh`. Workflow:
+ `.github/workflows/prerelease-cli.yml`.
+
+Both are intended for staging the next release together against the
+RTX 3090 box without cutting a real tag. See [`UPDATES.md`](UPDATES.md#cli-install-channels)
+for the develop-channel workflow.
+
## Retired Tags (kept for historical reference)
| Tag | Retired | Reason |
diff --git a/doc/POLLING.md b/doc/POLLING.md
new file mode 100644
index 0000000..08ad1bf
--- /dev/null
+++ b/doc/POLLING.md
@@ -0,0 +1,109 @@
+# Git Polling Sync
+
+Polling is the alternative to [webhooks](WEBHOOKS.md) for keeping a
+server-cloned git project in sync. Use it for repos where you **cannot
+install a webhook** — typically because you are not an admin of the
+repository.
+
+Where a webhook is push-driven (GitHub calls the server when someone
+pushes), polling is pull-driven: the server periodically fetches the
+remote and re-indexes only when the branch HEAD has moved.
+
+Polling, like webhooks, is a **server-cloned repo** feature. A local
+project registered with `cix init` uses the file watcher (`cix watch`),
+not polling.
+
+## 1. Webhook XOR polling
+
+A `git_repos` row syncs via **either** webhook **or** polling, never
+both. Polling can only be enabled when `webhook_mode = 'disabled'`.
+Attempting to enable polling on a repo whose `webhook_mode` is `manual`
+or `auto` is rejected with HTTP 422.
+
+When you add a repo with `webhook_mode: auto` but the PAT lacks
+`admin:repo_hook` (or you are not a repo admin), auto-registration
+fails. In that case the server **automatically falls back to polling**:
+it flips `webhook_mode` to `disabled`, enables polling at the default
+interval, and notes this in the add-repo response (`auto_register_note`).
+
+## 2. PAT and rate limits
+
+Polling reuses the existing clone/fetch pipeline, so it authenticates
+the `git fetch` with the repo's stored PAT (`token_id`) exactly as the
+webhook-triggered path does — sent as HTTP basic-auth
+`x-access-token`. Authenticated fetches keep polling within GitHub's
+rate limits. Polling makes **no GitHub REST API calls**; it is a git
+fetch over HTTPS, and an unchanged remote costs only a ref-negotiation
+round-trip (no pack download, no re-index).
+
+Set a `token_id` on any private repo you poll. Public repos can be
+polled without a PAT but are subject to lower unauthenticated limits.
+
+## 3. Cadence — measured from the end of the last index run
+
+Each polling repo has a `next_poll_at` timestamp. The next poll is
+scheduled `interval` seconds **after the previous cycle finishes**
+(no-change fetch, successful index, or terminal failure) — not on a
+fixed wall-clock cadence. This prevents a slow index run from stacking
+up overlapping polls.
+
+The effective interval is:
+
+```
+poll_interval_seconds (per repo, if set)
+ └─ else CIX_DEFAULT_POLL_INTERVAL
+ └─ clamped up to CIX_MIN_POLL_INTERVAL (floor)
+```
+
+`next_poll_at` is exposed on the `GitRepo` payload
+(`GET /api/v1/projects/{hash}/git-repo`) so you can see when a repo is
+next due.
+
+## 4. One shared queue, bounded workers
+
+A single background scheduler
+(`server/internal/pollscheduler`) drives polling for **all** polling
+repos. Every tick (`CIX_POLL_SCHEDULER_TICK`, default 30s) it asks the
+DB which repos are due and enqueues a `clone_repo` job for each into the
+shared job queue (`server/internal/jobs`).
+
+That queue is bounded by `CIX_WORKER_CONCURRENCY` (default 2) and
+deduplicates by repo, so a fleet of repos coming due at the same moment
+can never spike indexing concurrency — the work simply queues and drains
+at the configured worker count. The `clone_repo → index_repo` pipeline
+is reused verbatim, including incremental `tree.Diff` reindex: an
+unchanged remote skips indexing entirely, and a changed remote
+re-indexes only the changed files. A full reindex happens only in the
+same edge cases as the webhook path (first index, missing diff base,
+embedding-model drift, or an explicit `?full=true` reindex).
+
+## 5. Configuration
+
+| Env var | Default | Meaning |
+|---|---|---|
+| `CIX_DEFAULT_POLL_INTERVAL` | `5m` | Interval for polling repos without a per-repo override. |
+| `CIX_MIN_POLL_INTERVAL` | `60s` | Floor applied to every effective interval. |
+| `CIX_POLL_SCHEDULER_TICK` | `30s` | How often the scheduler scans for due repos. |
+| `CIX_WORKER_CONCURRENCY` | `2` | Shared worker count that bounds concurrent clone+index work. |
+
+## 6. API
+
+```
+# Enable polling at create time (requires webhook_mode=disabled)
+POST /api/v1/git-repos
+{
+ "github_url": "https://github.com/owner/repo",
+ "branch": "main",
+ "token_id": "",
+ "webhook_mode": "disabled",
+ "polling_enabled": true,
+ "poll_interval_seconds": 300
+}
+
+# Toggle polling / change interval on an existing repo
+PATCH /api/v1/projects/{hash}/git-repo
+{ "polling_enabled": true, "poll_interval_seconds": 300 }
+```
+
+`poll_interval_seconds` is optional (omit or `0` → server default).
+Enabling polling while `webhook_mode != 'disabled'` returns 422.
diff --git a/doc/RELEASES.md b/doc/RELEASES.md
new file mode 100644
index 0000000..9519610
--- /dev/null
+++ b/doc/RELEASES.md
@@ -0,0 +1,148 @@
+# Releases
+
+CLI and server ship on independent tag streams so a bugfix on one
+doesn't drag the other through a rebuild + retest cycle.
+
+| Component | Tag pattern | Workflow | Artifact |
+|---|---|---|---|
+| Server (`cix-server`) | `server/v*` (e.g. `server/v0.5.1`) | [`release-server.yml`](../.github/workflows/release-server.yml) | Docker images on Docker Hub: `:latest`, `:`, `:cu128`, `:-cu128` |
+| CLI (`cix`) | `cli/v*` (e.g. `cli/v0.5.0`) | [`release-cli.yml`](../.github/workflows/release-cli.yml) | `cix-{darwin,linux}-{amd64,arm64}.tar.gz` on a GitHub Release |
+
+Bare `v*` tags are the historical pre-split CLI line — the installer
+still falls back to them when no `cli/v*` release exists, but no new
+bare `v*` tags should be created. See [`DEPRECATION_POLICY.md`](DEPRECATION_POLICY.md).
+
+The two streams advance independently. Server and CLI must remain
+contract-compatible (the CLI is a thin HTTP client), so when changing
+shared shapes — endpoints, JSON payloads — update both sides in the
+same PR but release them on their own tags and verify the older CLI
+still speaks the newer server (and vice versa).
+
+For testing unreleased builds together, use the **develop channel** —
+see [`UPDATES.md`](UPDATES.md#cli-install-channels).
+
+---
+
+## Cutting a CLI release
+
+1. Bump `cli/cmd/version.go` to `var version = "0.6.0"` (no leading `v`).
+2. Tag and push:
+
+ ```bash
+ git tag cli/v0.6.0
+ git push origin cli/v0.6.0
+ ```
+
+3. CI (`release-cli.yml`) builds binaries for macOS + Linux (amd64 +
+ arm64), uploads them to a GitHub Release named `cli/v0.6.0`, and
+ updates the `cli/latest` floating tag. The stable installer
+ auto-picks them up on the next run.
+
+Local cross-build (no release — useful to test the archive shape
+before tagging):
+
+```bash
+cd cli && make release VERSION=v0.6.0
+```
+
+Produces archives in `cli/dist/` plus `checksums.txt`. Supported
+targets: `darwin-arm64`, `darwin-amd64`, `linux-arm64`, `linux-amd64`.
+
+## Cutting a server release
+
+The server release adds a pre-tag CVE scan and an image build that
+takes >30 min on CI, so this is more disciplined than the CLI path:
+
+1. **CVE scan** — run on a native amd64 builder:
+
+ ```bash
+ cd server && make scout-cuda
+ ```
+
+ Verify 0 CRITICAL / 0 HIGH. The workflow scans on `linux/amd64`
+ (CUDA image is amd64-only).
+
+2. **Bump version**: edit `server/cmd/cix-server/version.go` to
+ `var version = "0.5.2"`.
+
+3. **Tag and push**:
+
+ ```bash
+ git tag server/v0.5.2
+ git push origin server/v0.5.2
+ ```
+
+4. CI (`release-server.yml`) builds CPU multi-arch + CUDA `amd64`
+ images with provenance + SBOM attestations, pushes them to Docker
+ Hub with both pinned (`:0.5.2`, `:0.5.2-cu128`) and floating
+ (`:latest`, `:cu128`) tags, and creates a GitHub Release.
+
+5. **Promote** in production (Portainer, your compose file, etc.) by
+ updating the image tag to `:0.5.2` / `:0.5.2-cu128` and
+ redeploying.
+
+CI does not deploy to production. It stops at Docker Hub push by
+design — promotion is a manual operator step.
+
+## Docker Scout workflow (iterate before pushing)
+
+For non-tag iterations on the CUDA image (debugging a new layer,
+testing a base-image bump):
+
+```bash
+# 1. Build on native amd64 builder → push temp tag → scan
+cd server && make scout-cuda
+# prints SCOUT_TAG=scout-YYYYMMDD-HHMM
+
+# 2. If 0 HIGH/CRITICAL → promote (no rebuild, imagetools retag)
+make promote-cuda SCOUT_TAG=scout-YYYYMMDD-HHMM
+
+# 3. CPU image
+make scout-cpu # builds locally, no push
+```
+
+Key: always pass `--platform linux/amd64` to `docker scout cves` for
+the CUDA image — on Apple Silicon the default platform is `arm64` and
+the CUDA image is `amd64`-only. The `make scout-cuda` target handles
+this.
+
+## Server make targets (full list)
+
+```bash
+cd server
+make build # compile cix-server binary
+make bundle # build + fetch llama-server (macOS Metal)
+make run # bundle + launch with .env (dev)
+make test # go test ./...
+make test-gate # parity gate vs reference embeddings (requires GGUF)
+make docker-build-cuda # build + push CUDA image (uses cix-builder)
+make docker-build-cuda-dev # build + push :cu128-dev tag (smoke testing)
+make scout-cuda # safe pre-push CVE scan workflow
+make promote-cuda SCOUT_TAG=scout-… # retag without rebuild
+```
+
+The `cix-builder` buildx instance has two nodes — a local desktop
+arm64 node and an SSH-bound `linux/amd64` node on the RTX 3090 server.
+CUDA builds run natively on the amd64 node (no QEMU, full speed).
+
+## Pre-built Docker images
+
+See [`DOCKER_TAGS.md`](DOCKER_TAGS.md) for the full active-tag matrix
+and historical lifecycle. The quick version:
+
+| Tag | Architecture | Use case |
+|-----|-------------|----------|
+| `dvcdsys/code-index:latest` | linux/amd64 + linux/arm64 | CPU |
+| `dvcdsys/code-index:` | linux/amd64 + linux/arm64 | CPU, version-pinned |
+| `dvcdsys/code-index:cu128` | linux/amd64 | NVIDIA GPU (CUDA 12.8) |
+| `dvcdsys/code-index:-cu128` | linux/amd64 | NVIDIA, version-pinned |
+| `dvcdsys/code-index:develop-cu128` | linux/amd64 | Pre-release CUDA — pairs with the develop CLI channel |
+
+## Related files
+
+- `.github/workflows/release-server.yml` — stable server build/release pipeline
+- `.github/workflows/release-cli.yml` — stable CLI build/release pipeline
+- `.github/workflows/prerelease-server.yml` / `prerelease-cli.yml` — develop channels
+- [`DOCKER_TAGS.md`](DOCKER_TAGS.md) — Docker Hub tag lifecycle
+- [`DEPRECATION_POLICY.md`](DEPRECATION_POLICY.md) — when tags / behaviours retire
+- [`UPDATES.md`](UPDATES.md) — release-poll banner + install channels
diff --git a/doc/SEARCH_ALGORITHM.md b/doc/SEARCH_ALGORITHM.md
new file mode 100644
index 0000000..981baf0
--- /dev/null
+++ b/doc/SEARCH_ALGORITHM.md
@@ -0,0 +1,182 @@
+# Search Algorithm
+
+How cix ranks results for single-project semantic search, workspace
+hybrid search, and the symbol/files/refs/defs lookups. Use this when
+calibrating `--min-score`, choosing between modes, or debugging a query
+that "should have found" something.
+
+## 1. Per-project semantic flow
+
+```
+query string ──▶ "Represent this query for searching relevant code: " + query
+ │
+ ▼
+ llama-server sidecar (CodeRankEmbed Q8_0 GGUF) — 768-dim vector
+ │
+ ▼
+ chromem-go cosine search over the project's collection
+ │
+ ▼
+ per-chunk hits → merge windowed overlaps → group by file → top-N files
+```
+
+**Asymmetric model.** CodeRankEmbed is purpose-built for code retrieval
+and embeds queries with a fixed prefix
+(`"Represent this query for searching relevant code: "`). Queries and
+passages live in *different* regions of the 768-dim space, so cosine
+similarities are systematically lower than for symmetric models. A
+"strong" match here is ~0.55, not ~0.80. Do not compare these numbers
+against thresholds quoted for OpenAI, Voyage, or generic
+sentence-transformers — they aren't measuring the same thing.
+
+**Path-aware preamble.** Each chunk is embedded with its file path,
+language, and parent symbol prefixed to the body. This is why
+`cix search "auth middleware"` finds `auth.go` even when the file's
+prose uses different vocabulary. Toggle with `CIX_EMBED_INCLUDE_PATH`
+(default `true`); flipping it requires `cix reindex --full` because the
+vectors change.
+
+**Score landscape (Q8_0, path-aware on).**
+
+| Score | Meaning |
+|--------------|-------------------------------------------------------------|
+| ≥ 0.65 | Exact symbol/filename match — almost certainly relevant |
+| 0.50 – 0.65 | Strong concept match — usually relevant |
+| 0.40 – 0.50 | Weak match — sometimes useful |
+| < 0.40 | Noise — filtered by default |
+
+Default CLI floor is `--min-score 0.4`. Drop to `0.25` for sparse or
+single-token queries; below `0.2` is essentially random.
+
+**Result grouping.** `search.go` returns per-chunk hits, then
+`search_merge.go` merges overlapping line windows from the same file
+and groups everything by file path. The top-N flag (`--limit`) is N
+*files*, each containing all relevant matches in order.
+
+## 2. FTS5 / BM25 chunk mirror
+
+Every chunk that lands in chromem-go also lands as a row in two
+sister SQLite tables:
+
+- `chunks_meta` — regular indexed shadow (project_path, file_path,
+ start/end line, chunk type, symbol name, language) — lets the
+ indexer find and delete rows for a file efficiently.
+- `chunks_fts` — FTS5 virtual table over `(content, symbol_name,
+ file_path)` — provides BM25 scoring against literal tokens.
+
+Both tables share a rowid and are written inside the indexer's
+per-file SQL transaction, so a chunk is either in *both* stores or
+*neither*.
+
+Code: `server/internal/chunksfts/chunksfts.go`. Introduced by `f00e3d3`.
+
+The sparse signal does two jobs the dense model alone cannot do well:
+
+1. **Acronym / short-token precision.** Short product codes and unique
+ identifiers (`ACME-712`, `XYZId`) get diffuse cosine scores because
+ the embedding model spreads rare-token mass across many neighbours.
+ BM25 over the literal tokens recovers the precision.
+2. **Project-relevance gating in workspace search.** Dense fan-out
+ returns the N nearest vectors from every project's collection
+ regardless of semantic distance, so projects that share zero
+ vocabulary with the query can still surface at chunk_score ≈
+ 0.2–0.3. BM25 returning **zero hits** in a project is a strong
+ "nothing here" signal that dense cannot produce.
+
+`chunks_fts` is not used for single-project search today — that path
+stays pure dense — but the table is populated on every project so the
+workspace path can rely on it.
+
+## 3. Workspace hybrid search
+
+`POST /api/v1/workspaces/{id}/search` runs a two-stage hybrid:
+
+```
+ ┌──────────────────────────┐
+ │ query │
+ └────┬───────────────┬─────┘
+ │ │
+ ┌────────────────▼──┐ ┌──▼──────────────┐
+ │ dense fan-out │ │ BM25 fan-out │
+ │ (per-project │ │ (chunks_fts per │
+ │ chromem cosine) │ │ project) │
+ └────────┬──────────┘ └─────────┬───────┘
+ │ │
+ ▼ ▼
+ ┌──────────────────────────────────────────────┐
+ │ stage 1 — project gating + hybrid score │
+ │ • each project gets dense + bm25 scores │
+ │ • zero-bm25 projects are demoted │
+ │ • top-K projects survive │
+ └─────────────────┬────────────────────────────┘
+ │
+ ▼
+ ┌──────────────────────────────────────────────┐
+ │ stage 2 — within surviving projects, merge │
+ │ dense+bm25 chunk scores, group by file, │
+ │ return ranked top-N files │
+ └──────────────────────────────────────────────┘
+```
+
+Calibrated defaults live in `server/internal/httpapi/workspacesearch.go`
+(see `96b487d` for the calibration commit). The endpoint accepts
+explicit tunables (`alpha`, `topK`, `perProjectLimit`, …) so an operator
+can override on a per-query basis, but the defaults are tuned to the
+public eval corpus described in `docs/workspace-eval-2026-05-13/`.
+
+**Why two stages.** Workspaces span 5–30+ repos. A naive flat top-N
+across the union floods results from whichever project happens to have
+the highest-density vocabulary overlap. Gating at the project level
+first ("which repos should we even look in?") makes the result list
+reflect the workspace shape — typically 2–4 distinct projects per
+answer rather than 8 chunks from one repo.
+
+Trust rules for an agent consuming the response (`chunks[]` vs
+`projects[]` array) live in the `cix-workspace` skill at
+`skills/cix-workspace/SKILL.md` and in
+[`workspaces.md`](../workspaces.md#trust-rules).
+
+## 4. Symbols / definitions / references / files
+
+These bypass the embedding pipeline entirely. They run against
+SQLite-backed indexes that the chunker populates in the same per-file
+transaction as the vector store:
+
+- **`cix symbols `** — substring-and-trigram lookup over
+ `symbols` (kind ∈ {function, class, method, type}). Fast (<50 ms on a
+ 10k-symbol project). Used when the agent already knows the name.
+- **`cix def `** — same table, filtered to where the symbol is
+ *defined* (declaration site, not reference).
+- **`cix refs `** — looks up `symbol_refs`, which the chunker
+ emits during AST traversal. The exact granularity varies by language
+ (`server/internal/chunker/chunker.go` `languageNodes` map).
+- **`cix files `** — substring/glob over the `files` table.
+
+None of these consume embedding capacity, so they keep working with
+`CIX_EMBEDDINGS_ENABLED=false`.
+
+## 5. Tuning the floor
+
+The default `--min-score 0.4` works well on real codebases. Two
+common reasons to override:
+
+- **Too many results, too noisy.** Raise to `0.5` or `0.6`. Useful
+ when the agent's context is filling up with weak matches.
+- **No results, but you know the code exists.** Drop to `0.25` (or
+ `0.2` for a last resort). Single-word queries on rare identifiers
+ often need this. If `0.2` still returns nothing, the index is
+ probably stale — run `cix status` and `cix reindex` if needed.
+
+When in doubt: increase specificity in the query itself before
+lowering the floor. "validation" → "input validation in auth
+middleware" is usually a bigger improvement than threshold tuning,
+because the path-aware preamble rewards locating phrasing.
+
+## 6. Related files
+
+- `server/internal/chunksfts/` — BM25 mirror schema and write path
+- `server/internal/httpapi/workspacesearch.go` — two-stage hybrid endpoint
+- `server/internal/httpapi/search.go` + `search_merge.go` — per-project search and result grouping
+- `server/internal/symbolindex/` — symbol/refs/defs SQLite tables
+- [`benchmarks.md`](benchmarks.md) — quantization vs retrieval-quality measurements
+- [`../workspaces.md`](../workspaces.md) — agent-facing workspace search guide
diff --git a/doc/SETUP_MACOS_NATIVE.md b/doc/SETUP_MACOS_NATIVE.md
new file mode 100644
index 0000000..d12cb43
--- /dev/null
+++ b/doc/SETUP_MACOS_NATIVE.md
@@ -0,0 +1,162 @@
+# Native macOS setup (Apple Silicon, Metal GPU)
+
+Docker Desktop on macOS runs containers inside a Linux VM, and the
+Metal GPU is **not accessible** from within that VM. For full Metal
+acceleration on Apple Silicon you must run cix-server natively. This
+doc covers the build, the env vars Metal cares about, and a working
+`launchd` plist for running it as a login agent.
+
+> For Docker (CPU) and Docker (CUDA) deployments, follow README's
+> *Quick Start* section instead. This doc is only for native macOS.
+
+## 1. Build
+
+Prerequisites:
+
+- Apple Silicon Mac (M1/M2/M3/M4 family). Intel Macs are not supported
+ by the bundled `llama-server` build.
+- Go 1.25+ (`brew install go` or [go.dev/dl](https://go.dev/dl)).
+- Xcode Command Line Tools — `xcode-select --install` if you don't
+ already have them.
+
+```bash
+git clone https://github.com/dvcdsys/code-index && cd code-index
+cd server && make bundle
+```
+
+`make bundle` builds `cix-server` and downloads the Metal-enabled
+`llama-server` (llama.cpp + `libggml-metal.dylib`). The binaries land
+in `server/dist/cix-darwin-arm64/`.
+
+> The bundled `llama-server` is re-signed at bundle time (commit
+> `8c56fc3`) so macOS amfid doesn't kill it on first launch. If you
+> see "killed: 9" on startup, re-run `make bundle` to refresh the
+> signature.
+
+## 2. Configure
+
+Copy the environment template and fill in the required values:
+
+```bash
+cp .env.example .env
+```
+
+The minimum env-var set for a Metal native run:
+
+| Variable | Recommended | Notes |
+|---|---|---|
+| `CIX_API_KEY` | (any 256-bit value) | Bearer token for CLI / agent traffic. |
+| `CIX_BOOTSTRAP_ADMIN_EMAIL` | (your email) | Required for the first boot only — fresh DB seeds. |
+| `CIX_BOOTSTRAP_ADMIN_PASSWORD` | (strong value) | Required for the first boot only — must be changed at first login. |
+| `CIX_N_GPU_LAYERS` | `99` | Offload all layers to Metal. `0` forces CPU. |
+| `CIX_EMBEDDINGS_ENABLED` | `true` | Default. Set `false` to skip the sidecar entirely. |
+| `CIX_LLAMA_BIN_DIR` | (set by `make run`) | Path to the `llama-server` bundle dir. The dev runner sets it; for `launchd` you set it yourself (see below). |
+
+The full env-var surface is documented in
+[`CONFIG_REFERENCE.md`](CONFIG_REFERENCE.md).
+
+## 3. Run (dev)
+
+```bash
+cd server && make run
+```
+
+`make run` runs `make bundle` first (no-op if already built), loads
+`.env`, and launches the server in the foreground. Tail logs in the
+terminal; Ctrl-C to stop.
+
+Verify:
+
+```bash
+curl http://localhost:21847/health # → {"status":"ok"}
+```
+
+Open `http://localhost:21847/dashboard` and sign in with the bootstrap
+admin email + password. You'll be forced to change the password on
+first login.
+
+## 4. Auto-start with launchd
+
+For a "runs in the background on login" setup, drop a `launchd` plist
+into `~/Library/LaunchAgents/`. Replace every `/ABSOLUTE/PATH/TO/`
+and `YOUR_USER` placeholder before loading.
+
+```xml
+
+
+
+ Labelcom.cix.server
+
+ ProgramArguments
+
+ /ABSOLUTE/PATH/TO/server/dist/cix-darwin-arm64/cix-server
+
+
+ EnvironmentVariables
+
+ CIX_API_KEYYOUR_KEY
+ CIX_BOOTSTRAP_ADMIN_EMAILadmin@example.com
+ CIX_BOOTSTRAP_ADMIN_PASSWORDchange-me-on-first-login
+ CIX_LLAMA_BIN_DIR/ABSOLUTE/PATH/TO/server/dist/cix-darwin-arm64/llama
+ CIX_N_GPU_LAYERS99
+ CIX_PORT21847
+ CIX_SQLITE_PATH/Users/YOUR_USER/.cix/data/sqlite/projects.db
+ CIX_CHROMA_PERSIST_DIR/Users/YOUR_USER/.cix/data/chroma
+ CIX_GGUF_CACHE_DIR/Users/YOUR_USER/.cix/data/models
+
+
+ RunAtLoad
+ KeepAlive
+ StandardOutPath/tmp/cix-server.log
+ StandardErrorPath/tmp/cix-server.err
+
+```
+
+Save as `~/Library/LaunchAgents/com.cix.server.plist`, then:
+
+```bash
+launchctl load ~/Library/LaunchAgents/com.cix.server.plist
+launchctl start com.cix.server
+```
+
+The agent starts at login and respawns on crash. Logs:
+
+```bash
+tail -f /tmp/cix-server.log
+tail -f /tmp/cix-server.err
+```
+
+To stop / disable / reload:
+
+```bash
+launchctl stop com.cix.server
+launchctl unload ~/Library/LaunchAgents/com.cix.server.plist
+# re-load after editing the plist
+launchctl load ~/Library/LaunchAgents/com.cix.server.plist
+```
+
+After every `git pull` that updates `server/`, rebuild and the
+plist picks up the new binary automatically (the path doesn't
+change):
+
+```bash
+cd server && make bundle
+launchctl stop com.cix.server # KeepAlive will respawn the new binary
+```
+
+## 5. Troubleshooting
+
+| Symptom | Likely cause | Fix |
+|---|---|---|
+| `make bundle` fails downloading llama-server | Network blocked, or upstream release moved. | Inspect `server/Makefile`'s download URL; report if upstream changed. |
+| Server starts but `/health` 404s | Wrong port. | `lsof -i :21847` to confirm. Check `CIX_PORT`. |
+| GPU not used (CPU fallback) | `CIX_N_GPU_LAYERS` unset or `0`. | Set to `99`. `make run` logs the resolved value at startup. |
+| "killed: 9" on first llama-server launch | macOS amfid rejected the unsigned binary. | Re-run `make bundle` to refresh the local signature. |
+| Server starts via terminal but not via `launchd` | `EnvironmentVariables` plist block missing a required var. | Run `launchctl getenv CIX_API_KEY` — empty means the agent doesn't see it. Re-edit the plist and `launchctl load` again. |
+
+## 6. Related files
+
+- `server/Makefile` — `bundle` / `run` targets
+- [`CONFIG_REFERENCE.md`](CONFIG_REFERENCE.md) — full env-var surface
+- [`SECURITY_DEPLOYMENT.md`](SECURITY_DEPLOYMENT.md) — production hardening
+- [`vram-profiling.md`](vram-profiling.md) — Metal memory profile
diff --git a/doc/UPDATES.md b/doc/UPDATES.md
new file mode 100644
index 0000000..a5e5d45
--- /dev/null
+++ b/doc/UPDATES.md
@@ -0,0 +1,143 @@
+# Keeping cix Up to Date
+
+cix ships in two release streams (server + CLI) and has a built-in
+release-poll banner on the dashboard so you know when an upgrade is
+available. This doc covers how the banner works, how to opt out, and
+how to use the **develop channel** for testing unreleased changes.
+
+## 1. Release-poll banner
+
+The dashboard shows a banner when a newer `server/v*` release is
+available on GitHub. The poll happens server-side, *not* in the
+browser — one outbound request per cix-server, regardless of how many
+clients have the dashboard open.
+
+How it works:
+
+- The server runs a goroutine
+ (`server/internal/versioncheck/`, commit `853c9e4`) on a ticker.
+- Every 6 hours (configurable) it calls
+ `GET https://api.github.com/repos//releases?per_page=30`.
+- Releases are filtered to tags with prefix `server/v`, drafts and
+ prereleases are dropped, and the highest semver tag wins.
+- ETag-based revalidation means subsequent polls usually return
+ `304 Not Modified` and consume almost no rate-limit budget. Default
+ interval (6h) keeps the unauthenticated usage near 4 req/day per
+ server — well under GitHub's anonymous 60/h ceiling.
+- The cached snapshot (current version, latest tag, release URL,
+ checked-at, last error) is exposed at
+ `GET /api/v1/admin/version` for the dashboard.
+
+The banner is informational only — it links to the release page on
+GitHub. cix does not self-update.
+
+### Configuration
+
+| Variable | Default | Purpose |
+|---|---|---|
+| `CIX_VERSION_CHECK_ENABLED` | `true` | Master switch. Set `false` to disable all outbound HTTP for version checks. |
+| `CIX_VERSION_CHECK_INTERVAL` | `6h` | Go duration string (`30m`, `12h`, …). Floored to a sensible minimum to avoid hammering GitHub. |
+| `CIX_VERSION_CHECK_REPO` | `dvcdsys/code-index` | Override only if you're running a fork with its own release stream. |
+
+Disabling the check (`CIX_VERSION_CHECK_ENABLED=false`) is the right
+setting for air-gapped deployments — the dashboard hides the banner
+and the server never makes the GitHub call.
+
+A "0.0.0-dev" build (the local-make default; `server/cmd/cix-server/version.go`)
+always treats the latest release as "newer", so the banner shows up
+the first time you point a dev build at the dashboard. This is
+deliberate — it keeps dev builds honest about how far behind stable
+they are.
+
+## 2. CLI install channels
+
+Two channels share an `install.sh` family of scripts:
+
+| Channel | Tag stream | Installer | Pairing |
+|---|---|---|---|
+| **Stable** | `cli/v*` GitHub releases | `install.sh` | Pair with a `server/v*` Docker tag or native build. |
+| **Develop** | `cli/develop` floating tag | `install-develop.sh` | Pair with `dvcdsys/code-index:develop-cu128`. |
+
+### Stable (default)
+
+```bash
+curl -fsSL https://raw.githubusercontent.com/dvcdsys/code-index/main/install.sh | bash
+```
+
+The installer resolves the highest `cli/v*` GitHub release for the
+current OS/arch, downloads the tarball, and drops the `cix` binary in
+`/usr/local/bin` (override with `--bin-dir`). Re-running upgrades
+in place.
+
+Stable CLI releases ship binaries for `darwin-arm64`, `darwin-amd64`,
+`linux-arm64`, `linux-amd64`.
+
+### Develop channel
+
+```bash
+curl -fsSL https://raw.githubusercontent.com/dvcdsys/code-index/main/install-develop.sh | bash
+```
+
+What this gives you:
+
+- The CLI binary built from the head of the `develop` branch. The
+ `cli/develop` tag is **force-updated** on every PR merged into
+ `develop` that touches `cli/**`, so re-running the installer always
+ pulls the freshest build.
+- The stable installer's `cli/v*` filter ignores the `cli/develop`
+ tag (no `v` prefix), so the two channels do not collide.
+
+Pair with the matching server tag (CUDA only — the develop pre-release
+pipeline does not publish a CPU image):
+
+```yaml
+# docker-compose.develop.yml
+services:
+ cix-server:
+ image: dvcdsys/code-index:develop-cu128
+```
+
+CI gate: PRs into `develop` build the develop-cu128 image and the
+develop CLI release before merge
+(`.github/workflows/prerelease-server.yml`,
+`.github/workflows/prerelease-cli.yml`).
+
+**When to use the develop channel.** Staging the next release together
+against a real workload, reproducing a bug report from a `develop`-only
+build, or testing a server-side feature that depends on an in-progress
+CLI command. **Don't run this in production** — the develop pair has
+no compatibility guarantees and may break across merges.
+
+To switch back to stable, just re-run the stable installer — it
+overwrites the develop binary at the same path.
+
+## 3. Reindex after an upgrade
+
+Upgrading the **server** can require a reindex in two cases:
+
+1. **Embedding model changed.** If the new release changes the default
+ model (or you change it yourself via Dashboard → Server →
+ Embedding model), every project becomes stale. The dashboard's
+ drift indicator paints affected projects red with a "Stale model"
+ badge until you reindex. See README's *Drift indicator* section.
+2. **Schema migration adds chunk-level data.** Releases that backfill
+ new chunk metadata (e.g. the FTS5 mirror introduced by `f00e3d3`)
+ may prompt the dashboard to recommend a reindex on existing
+ projects. The recommendation is non-blocking — old projects keep
+ working, just without the new signal — but acting on it gets you
+ the full search quality.
+
+Reindexing from the dashboard uses the project page's **Reindex**
+button (`596748e`); from the CLI it's `cix reindex --full `.
+
+Upgrading the **CLI** never requires a reindex — the CLI is a thin
+HTTP client.
+
+## 4. Related files
+
+- `server/internal/versioncheck/check.go` — release-poll service
+- `install.sh` / `install-develop.sh` — stable + develop installers
+- `.github/workflows/release-server.yml` / `release-cli.yml` — stable build pipelines
+- `.github/workflows/prerelease-server.yml` / `prerelease-cli.yml` — develop build pipelines
+- [`DOCKER_TAGS.md`](DOCKER_TAGS.md) — Docker tag lifecycle, including `develop-cu128`
+- [`RELEASES.md`](RELEASES.md) — how to cut a stable release
diff --git a/doc/WEBHOOKS.md b/doc/WEBHOOKS.md
new file mode 100644
index 0000000..61fe93d
--- /dev/null
+++ b/doc/WEBHOOKS.md
@@ -0,0 +1,190 @@
+# GitHub Webhooks for Workspaces
+
+The webhook path is the production answer to "how does my workspace
+re-index when a teammate pushes?". This doc covers the lifecycle,
+modes, HMAC validation, and how to register manually when auto-register
+isn't an option.
+
+Webhooks are a **workspaces feature** — they're only meaningful for
+repos the cix-server itself clones via the Workspaces page. A local
+project registered with `cix init` uses the file watcher
+(`cix watch`), not webhooks.
+
+> **Not a repo admin?** Webhooks require `admin:repo_hook` to install.
+> For repos you can only clone (not administer), use [polling](POLLING.md)
+> instead — the server periodically fetches and re-indexes. A repo syncs
+> via webhook **or** polling, never both.
+
+## 1. Modes
+
+Each `git_repos` row carries a `webhook_mode` enum:
+
+| Mode | When set | Behaviour |
+|---|---|---|
+| `off` | Default for repos added without a token, or when the operator opts out. | Server ignores incoming webhook deliveries (returns 200 `{"status":"ignored"}` after HMAC check) — there is no re-index trigger. |
+| `manual` | Default when a repo *has* a token but the operator unchecked "auto-register". | Server stores a secret + URL and shows them once on add-repo. Operator pastes them into GitHub by hand. |
+| `auto` | Set when the operator checks "Auto-register webhook" *and* the PAT carries `admin:repo_hook`. | Server calls GitHub's hooks API on the operator's behalf during add-repo, persists the hook id, and de-registers the hook on delete. |
+
+`auto` is preferred — it makes onboarding a repo a one-form action.
+`manual` exists for operators whose PATs intentionally lack
+`admin:repo_hook` (audit, principle-of-least-privilege).
+`off` is for repos where the operator wants explicit-only re-index
+(e.g. via the dashboard's Reindex button).
+
+## 2. Delivery endpoint
+
+```
+POST /api/v1/webhooks/github/
+```
+
+- **Public in the auth sense** — no Bearer token or session cookie.
+ Every body is HMAC-SHA256-validated against the per-row
+ `webhook_secret` stored on the matching `git_repos` row. The header
+ GitHub sends is `X-Hub-Signature-256: sha256=`.
+- Validation lives in `server/internal/httpapi/webhooks.go`
+ (`validHMAC`). HMACs are compared with `hmac.Equal` (constant-time)
+ to prevent timing-side-channels on the secret.
+- The secret is shown to the operator **exactly once** on add-repo and
+ on the dashboard's **Project → Webhook info** action. There is no
+ retrieval-after-the-fact path; rotating the secret means recreating
+ the `git_repos` row.
+
+Handled events:
+
+| Event | Behaviour |
+|---|---|
+| `push` (tracked branch) | Enqueue a `clone_repo` job — `dedupe_key` collapses bursts of pushes (force-pushes, branch races) into one job. |
+| `push` (other branch / delete) | 200 `{"status":"ignored"}`. The workspace tracks one branch per repo. |
+| `ping` | 200 `{"status":"ping"}`. GitHub sends this on add; use it to confirm setup. |
+| anything else | 200 `{"status":"ignored"}`, logged for audit. |
+
+Any HMAC mismatch returns 401 with no body, regardless of event.
+
+## 3. Public URL requirement
+
+GitHub will not deliver to a localhost or RFC1918 address. The server
+exposes webhook URLs based on `CIX_PUBLIC_URL` — set this to the
+externally-reachable origin of the server (e.g.
+`https://cix.example.com`). If unset, the dashboard hides the URL and
+prints a hint instead of a 404 trap.
+
+For self-hosted deployments without a static public IP, the simplest
+no-cost answer is a **Cloudflare Tunnel** — see
+[`WORKSPACES.md`](WORKSPACES.md#cloudflare-tunnel-recommended-for-self-hosted)
+for the full recipe (`cloudflared tunnel create`, DNS routing,
+`cloudflared tunnel run`).
+
+## 4. Auto-register flow
+
+When `webhook_mode=auto` and the PAT scope check passes:
+
+1. Operator submits the add-repo form. The server clones the repo
+ (`clone_repo` job) and starts indexing.
+2. In parallel, the server calls `POST /repos/{owner}/{repo}/hooks`
+ on GitHub via `server/internal/githubapi/`. The hook payload sets
+ `events: ["push"]`, `content_type: json`, and embeds the
+ server-generated `webhook_secret`.
+3. GitHub responds with the hook id. The id is stored on the
+ `git_repos` row so a later DELETE can call
+ `DELETE /repos/{owner}/{repo}/hooks/{id}` cleanly.
+4. The response payload includes `auto_registered: true` and the
+ webhook URL becomes immediately ready for delivery.
+
+Failure modes (all non-fatal — the response still succeeds with
+`auto_registered: false` and an operator-facing note):
+
+- PAT missing `admin:repo_hook`
+- PAT lacks access to the target repo (private repo on someone else's
+ org)
+- Network error reaching api.github.com
+- Repo already has a webhook pointing at this server's URL — server
+ reuses the existing hook id rather than creating a duplicate
+
+The operator sees the reason on the dashboard and can switch to
+manual mode or rotate the PAT.
+
+## 5. Manual register flow
+
+If `webhook_mode=manual`, the dashboard shows the URL + secret after
+add-repo and on the project detail page. Paste them into GitHub:
+
+1. Repo → **Settings → Webhooks → Add webhook**.
+2. **Payload URL** — the value from the dashboard.
+3. **Content type** — `application/json`.
+4. **Secret** — the value from the dashboard.
+5. **Which events?** — **Just the push event**.
+6. **Active** — ✓.
+
+GitHub sends a `ping` immediately. cix returns 200 and GitHub's
+webhook page marks the delivery green. After that, every `push` to the
+tracked branch triggers a `clone_repo` job.
+
+For automation, the same registration can be done with `gh`:
+
+```bash
+gh api -X POST \
+ repos///hooks \
+ -f name=web \
+ -F active=true \
+ -f events[]=push \
+ -f config[url]="$WEBHOOK_URL" \
+ -f config[content_type]=json \
+ -f config[secret]="$WEBHOOK_SECRET" \
+ -f config[insecure_ssl]=0
+```
+
+## 6. Startup audit for stale URLs
+
+When `CIX_PUBLIC_URL` changes (host migration, tunnel rebuild), every
+`auto`-registered webhook in GitHub now points at the old origin.
+On boot the server runs a one-shot audit
+(`server/internal/workspaces/`, commit `9dac327`):
+
+- For each `git_repos` row with `webhook_mode=auto` and a stored hook
+ id, fetch the hook config from GitHub.
+- Compare `config.url` to the canonical URL the server *would* now
+ build from `CIX_PUBLIC_URL`.
+- On mismatch: log a `WARN` line naming the repo and the stale URL.
+ The server **does not auto-update** the hook — silently rewriting
+ webhook URLs on every PAT-bearing repo at boot is too aggressive.
+ The operator runs **Project → Reregister webhook** from the
+ dashboard to fix each repo intentionally.
+
+This is also why rotating `CIX_PUBLIC_URL` should be paired with a
+"reregister all" sweep in the dashboard — there's no automatic
+follow-up.
+
+## 7. What gets re-indexed on a push
+
+Each accepted `push` enqueues a `clone_repo` job, which:
+
+1. Fetches into the existing clone directory (`git fetch` + reset to
+ the tracked branch's new HEAD — no re-clone unless the local dir
+ is missing).
+2. Chains an `index_repo` job that runs the standard 3-phase
+ indexer (begin → files → finish) against the new HEAD.
+3. The indexer uses SHA-256 file hashes, so only changed files are
+ re-embedded. A typical 5-file PR finishes in seconds.
+
+The `dedupe_key` on the job table collapses bursts — five rapid
+force-pushes only run the pipeline once. If something *is* in flight
+when a new push arrives, the new push joins the same dedupe key and
+re-runs once on completion.
+
+## 8. Troubleshooting
+
+| Symptom | Likely cause | Fix |
+|---|---|---|
+| 401 from cix on every delivery | Secret in GitHub doesn't match what cix stored. | Click **Webhook info** in the dashboard, paste the canonical value into GitHub. |
+| 404 from cix | URL points at a stale `path_hash` (repo was deleted then re-added). | Run **Project → Reregister webhook**. |
+| 200 `{"status":"ignored"}` and no re-index | Push was to a non-tracked branch, or `webhook_mode=off`. | Confirm the workspace's tracked branch; flip mode to `manual`/`auto`. |
+| Auto-register failed with "missing scope" | PAT lacks `admin:repo_hook`. | Either grant the scope or switch the repo to `manual` and register by hand. |
+| Audit logged `stale URL detected` on boot | `CIX_PUBLIC_URL` changed. | Run **Reregister webhook** on each affected project. |
+
+## 9. Related files
+
+- `server/internal/httpapi/webhooks.go` — delivery endpoint + HMAC check
+- `server/internal/githubapi/` — GitHub REST client for hook CRUD
+- `server/internal/workspaces/` — webhook lifecycle + startup audit
+- [`WORKSPACES.md`](WORKSPACES.md) — operator guide (encryption keys, Cloudflare tunnel)
+- [`../workspaces.md`](../workspaces.md) — user-facing workspace guide
diff --git a/doc/WORKSPACES.md b/doc/WORKSPACES.md
new file mode 100644
index 0000000..d1cba6e
--- /dev/null
+++ b/doc/WORKSPACES.md
@@ -0,0 +1,218 @@
+# Workspaces — operator guide
+
+The workspaces feature lets cix index a group of GitHub repositories
+together and serve cross-project semantic search against the union.
+This document covers everything an operator needs to enable, configure,
+and troubleshoot the feature in production.
+
+For the user-facing workflow (when to reach for workspace search, the
+agent trust rules, query patterns), see [`../workspaces.md`](../workspaces.md).
+For the search algorithm, see [`SEARCH_ALGORITHM.md`](SEARCH_ALGORITHM.md).
+For the webhook lifecycle, see [`WEBHOOKS.md`](WEBHOOKS.md).
+
+> **No feature flag.** Workspaces + GitHub-repo support are part of
+> every release. The previous `CIX_WORKSPACES_ENABLED` gate was removed
+> in the 0.4.x line — the only failure mode that still surfaces as a
+> 503 from `/api/v1/workspaces/*` is when the encryption layer needed
+> for `github_tokens` fails to wire (see "Encryption key resolution"
+> below).
+
+## Schema
+
+A workspace is a *membership* layer over per-project indexes. Three
+tables underpin it (post-`e433fee` refactor; the older
+`workspace_repos` table no longer exists):
+
+| Table | Role |
+|---|---|
+| `workspaces` | The workspace itself (id, name, description). |
+| `git_repos` | Clone metadata, 1:1 with `projects` — github_url, branch, token_id, webhook_secret, webhook_id, webhook_mode, auto_webhook, last_sha. Only populated for repos cix cloned (workspace adds); local `cix init` projects don't have a row here. |
+| `workspace_projects` | Many-to-many junction. A project (cloned or local) can belong to multiple workspaces; deleting a workspace doesn't drop the underlying project. |
+
+Migrations live in `server/internal/db/migrations.go`. The split-out
+migration is `19226aa` (crash-safe + `schema_migrations` versioning)
+and the table rename in `e433fee`.
+
+## Quick start
+
+1. **Set the encryption key** so `github_tokens` rows can be sealed:
+ ```
+ CIX_SECRET_KEY= # see "Encryption"
+ ```
+ Or skip and let the server auto-generate a keyfile under
+ `/.secret_key` on first boot — fine for a
+ single-host dev setup, **back it up** before redeploying.
+2. **Open the dashboard** at `https:///dashboard` and sign in.
+3. **Add a GitHub PAT** under **GitHub Tokens → Add token** if you need
+ to clone private repos. The plaintext value is encrypted before it
+ hits SQLite and is never returned in any subsequent response.
+4. **Create a workspace** under **Workspaces → New workspace**.
+5. **Attach a repository:** workspace detail → Add repo. Fill in URL,
+ branch, optional token, and choose **Auto-register webhook** if
+ your PAT carries `admin:repo_hook`. Otherwise check **I'll set it
+ up myself** and copy the displayed URL + secret into GitHub.
+6. The server clones the repo into `//`
+ and runs the existing indexer pipeline against it. Status transitions
+ visible on the workspace detail page: `created → indexing → indexed`.
+
+## Environment variables
+
+| Variable | Default | Purpose |
+|---|---|---|
+| `CIX_SECRET_KEY` | (auto-generate) | 32-byte AES key encoding GitHub tokens. Hex or base64. |
+| `CIX_SECRET_KEYFILE` | unset | Alternative — path to a 0600-perm key file. |
+| `CIX_SECRETS_DATA_DIR` | `dirname(CIX_SQLITE_PATH)` | Where the auto-generated keyfile lives. |
+| `CIX_WORKSPACES_DATA_DIR` | `/repos` | Where cloned repos live. |
+| `CIX_WORKER_CONCURRENCY` | `2` | Parallel job workers. Clone+index is mostly IO-bound. |
+| `CIX_PUBLIC_URL` | unset | Externally-reachable URL used to build webhook delivery URLs. |
+
+### Encryption key resolution
+
+Resolution order:
+
+1. `CIX_SECRET_KEY` (hex or base64 32-byte value)
+2. `CIX_SECRET_KEYFILE` (path; file must be `0600`)
+3. `/.secret_key` — auto-generated on first
+ boot if neither of the above resolve. The server **refuses to
+ start** if `github_tokens` is non-empty and the resolved key
+ cannot decrypt the first row — protects against accidental key
+ rotation that would silently brick all tokens.
+
+For production, supply `CIX_SECRET_KEY` explicitly or mount a keyfile
+via `CIX_SECRET_KEYFILE`. The auto-generated keyfile is a single-host
+convenience for dev.
+
+## Webhooks
+
+GitHub deliveries hit `POST /api/v1/webhooks/github/`.
+The endpoint is **public** in the auth sense (no Bearer/session check)
+but every delivery is HMAC-SHA256-validated against the per-row
+`webhook_secret` stored on the matching `git_repos` row. The secret is
+shown exactly once on add-repo and on **Project → Webhook info**.
+
+Supported events:
+
+| Event | Behaviour |
+|---|---|
+| `push` (tracked branch) | Enqueues `clone_repo` job — dedupe collapses bursts. |
+| `push` (other branch / delete) | 200 `{"status":"ignored"}`. |
+| `ping` | 200 `{"status":"ping"}`. Use to confirm setup. |
+| anything else | 200 `{"status":"ignored"}`, logged for audit. |
+
+### Cloudflare tunnel (recommended for self-hosted)
+
+Webhooks require a public URL. The simplest no-cost option is a
+[Cloudflare Tunnel](https://developers.cloudflare.com/cloudflare-one/connections/connect-networks/).
+On the cix-server host:
+
+```bash
+# One-time: install + log in
+brew install cloudflared
+cloudflared tunnel login
+
+# Create a named tunnel
+cloudflared tunnel create cix
+
+# Route a hostname to the tunnel (replace cix.example.com with yours)
+cloudflared tunnel route dns cix cix.example.com
+
+# Run the tunnel — replace 21847 with your CIX_PORT
+cloudflared tunnel --url http://localhost:21847 run cix
+```
+
+Then set `CIX_PUBLIC_URL=https://cix.example.com` and restart the server.
+The dashboard's add-repo dialog and the webhook-info endpoint will
+generate fully-qualified URLs that GitHub can reach.
+
+For ad-hoc testing without DNS:
+
+```bash
+cloudflared tunnel --url http://localhost:21847
+# prints a one-shot https://*.trycloudflare.com URL
+```
+
+Set `CIX_PUBLIC_URL` to whatever cloudflared prints and restart.
+Single-process tunnels are torn down with the parent — not suitable for
+production but perfect for the first end-to-end smoke test.
+
+### Manual webhook setup
+
+If `webhook_mode=manual` (default) the dashboard surfaces the URL + secret
+after add-repo. Paste them into GitHub:
+
+1. Repo → **Settings → Webhooks → Add webhook**
+2. **Payload URL** = the value from the dashboard
+3. **Content type** = `application/json`
+4. **Secret** = the value from the dashboard
+5. **Which events?** → **Just the push event**
+6. **Active** ✓
+
+GitHub will send a `ping` immediately — the cix server returns 200, and
+GitHub's webhook page will mark the delivery green.
+
+### Auto-register
+
+When the PAT carries `admin:repo_hook` scope and `webhook_mode=auto`,
+the server uses GitHub's hooks API on your behalf during add-repo and
+persists the resulting hook id (used to de-register on delete). Failure
+is non-fatal — the response includes `auto_registered: false` and an
+operator-facing note explaining the specific reason (missing scope,
+network error, etc.).
+
+## Background workers
+
+A single in-process worker pool drains a SQLite-backed queue (`jobs`
+table). Concurrency is `CIX_WORKER_CONCURRENCY` (default 2). Job types
+in PR2–PR3:
+
+- `clone_repo` — clones (or fetches+resets on reuse) via go-git;
+ registers `projects` row; chains `index_repo`.
+- `index_repo` — runs the existing 3-phase indexer in-process against
+ the clone directory; flips repo status to `indexed`.
+
+Future PRs add `build_call_graph` and `compute_workspace_communities`.
+
+### Inspecting the queue
+
+`GET /api/v1/jobs` lists recent jobs with optional `status=` / `type=` /
+`limit=` filters. Useful for diagnosing stuck repos.
+
+## Troubleshooting
+
+- **Status stuck at `indexing`** — check `GET /jobs?status=running` and
+ the cix-server logs. Most common cause: PAT missing `repo` scope on
+ a private repo, or network not reaching github.com.
+- **Status stuck at `error`** — the underlying job's error message is
+ surfaced on the project detail page. Common fixes: rotate the PAT,
+ confirm the branch name, verify the runtime model is loaded
+ (`GET /api/v1/admin/sidecar/status`).
+- **Webhook deliveries returning 401** — the secret in GitHub doesn't
+ match what cix stored. Click **Webhook info** in the dashboard to
+ see the canonical value, paste again. Secrets rotate when the
+ git_repos row is recreated.
+- **Encryption key mismatch on startup** — operator-readable error in
+ the boot log. Recover the prior `CIX_SECRET_KEY` from your secrets
+ manager or wipe `github_tokens` manually before retrying.
+
+## Shipped follow-ons (PR4 – PR8)
+
+The original `PR4–PR7` placeholders have all landed on `develop`:
+
+- **PR4** (`f244643`) — Intra-project call-graph extraction
+ (`call_edges` table) + eval harness.
+- **PR5** (`ec32744`) — Louvain community detection per workspace +
+ workspace centroid embeddings in a dedicated chromem collection.
+- **PR6** (`207bfaf`) — Two-stage workspace search endpoint
+ (`POST /api/v1/workspaces/{id}/search`). Hybrid BM25 + dense ranking
+ with project-level gating — see [`SEARCH_ALGORITHM.md`](SEARCH_ALGORITHM.md#3-workspace-hybrid-search).
+- **PR7** (`e1aa785`) — CLI subcommand (`cix workspace …`,
+ name-first grammar from PR8 / `5db28fd`) + `cix-workspace`
+ Claude Code skill + dashboard search dialog.
+- **PR8** (`5db28fd`) — Workspace discovery: dashboard expansion
+ panels per project + name-first CLI grammar so an agent can do
+ `cix ws "" search ""` without juggling
+ workspace ids.
+
+Subsequent fixes calibrated the hybrid defaults (`96b487d`), added
+the FTS5 chunk mirror across all projects (`f00e3d3`), and tightened
+webhook validation + PAT handling (`903d48f`, `57e091d`).
diff --git a/doc/benchmark-cix-vs-grep-2026-04-28.md b/doc/benchmark-cix-vs-grep-2026-04-28.md
deleted file mode 100644
index 66a4fe2..0000000
--- a/doc/benchmark-cix-vs-grep-2026-04-28.md
+++ /dev/null
@@ -1,329 +0,0 @@
-# Benchmark — CIX-first vs grep-only navigation (2026-04-28)
-
-Re-run of the 32-cell head-to-head from 2026-04-27 after a bundle of
-search-quality changes landed: path-aware embeddings, `--min-score` default
-0.4, `--exclude` flag, relative-path output. Same fixture, same prompts,
-same `claude-sonnet-4-6` workers, same 192.168.1.168 cix server — only
-the server binary differs from the 2026-04-27 run.
-
-The point is the **delta vs 2026-04-27**, not the absolute numbers.
-
-Raw transcripts and metric JSON live in `/tmp/cix-bench/results/runs/`;
-prior run preserved at `/tmp/cix-bench/results/runs.2026-04-27/` and
-`/tmp/cix-bench/results/results.2026-04-27.csv`.
-
----
-
-## 1. Headline comparison (16 runs each)
-
-| Metric | Worker A (grep-only) | Worker B (cix-first) | Δ (B − A) | Δ % |
-|--------------------------|----------------------|----------------------|-----------|---------|
-| Mean elapsed time (s) | 102.5 | **94.9** | −7.6 | −7.4 % |
-| Median elapsed time (s) | 78.5 | **77.0** | −1.5 | −1.9 % |
-| Mean tool calls | 20.3 | **19.3** | −1.0 | −4.6 % |
-| Mean tokens_in | 1629† | **43** | † | † |
-| Mean tokens_out | 3222 | **3111** | −112 | −3.4 % |
-| Pass rate | 13 / 16 | **15 / 16** | +2 | +15.4 % |
-
-† Worker A's `tokens_in` mean is dominated by a single anomaly:
-`refactor_04_A` reported 25 641 input tokens (likely a cache-miss accounting
-spike), versus 16–26 for the other 15 A cells. **Excluding that one cell, A's
-mean tokens_in is 28.9** — the cleaner number for comparison. Both workers'
-input-token totals are uncached `input_tokens` only; cache-creation tokens
-that dominate real cost on Sonnet are not included by `metrics.sh`.
-
-**One-glance read:** B is faster, leaner, and more reliable than A on every
-headline metric. This is the inverse of the 2026-04-27 run, where B was
-*slower and more expensive* than A on average. The pass-rate gap closed
-slightly (was 14/16 vs 16/16, now 13/16 vs 15/16) — both workers
-regressed by one cell each, but B is still the more reliable navigator.
-
----
-
-## 1.5 Delta vs 2026-04-27
-
-### Worker B (the cell where the new code is exercised)
-
-| Metric (Worker B) | 2026-04-27 | 2026-04-28 | Δ | Δ % |
-|--------------------|------------|------------|------|--------|
-| Mean elapsed s | 69.9 | 94.9 | +25.0 | +35.8 % |
-| Mean tool calls | 19.2 | 19.3 | +0.1 | +0.5 % |
-| Mean tokens_in | 38 | 43 | +5 | +13.2 % |
-| Mean tokens_out | 2754 | 3111 | +357 | +13.0 % |
-| Pass rate | 16/16 | 15/16 | −1 | −6.3 % |
-
-### Worker A (control — A doesn't use the cix server)
-
-| Metric (Worker A) | 2026-04-27 | 2026-04-28 | Δ | Δ % |
-|--------------------|------------|------------|-------|---------|
-| Mean elapsed s | 62.2 | 102.5 | +40.3 | +64.8 % |
-| Mean tool calls | 14.5 | 20.3 | +5.8 | +40.0 % |
-| Mean tokens_in† | 33 | 28.9 | −4.1 | −12.4 % |
-| Mean tokens_out | 2447 | 3222 | +775 | +31.7 % |
-| Pass rate | 14/16 | 13/16 | −1 | −7.1 % |
-
-† Excluding `refactor_04_A` token-count anomaly (25 641 in).
-
-**Both workers' absolute numbers grew.** This is Sonnet-side variance — A
-doesn't even talk to the cix server, yet it slowed down 65 % on elapsed and
-spent 32 % more output tokens. The dev box was idle and on the same
-hardware, so the most plausible explanation is run-to-run variance from
-the model itself. The 2026-04-27 run finished in ~75 minutes; this run
-took ~110 minutes, consistent with a slower-but-equally-clean execution.
-
-The honest story is therefore in the **A↔B gap within each run**, not the
-absolute deltas vs the prior run:
-
-- Prior run: B was +12 % slower, +32 % more tool calls, +13 % more
- output tokens than A. B's only win was pass rate.
-- New run: B is −7 % faster, −5 % fewer tool calls, −3 % fewer output
- tokens than A — *and* still wins on pass rate.
-
-The cix-first strategy went from "more expensive, more reliable" to
-"strictly better than grep on every headline metric." That flip is what
-the new code bought.
-
----
-
-## 2. Per-task comparison (where the gap moved)
-
-### bugfix — flat (cix overhead always negligible here)
-
-| Metric | A (new) | B (new) | Δ B−A | Δ % | (prior B−A %) |
-|-------------------|---------|---------|---------|---------|---------------|
-| Mean elapsed s | 70.3 | 69.0 | −1.3 | −1.8 % | (−10.2 %) |
-| Mean tool calls | 13.3 | 13.5 | +0.2 | +1.5 % | (+3.7 %) |
-| Mean tokens_in | 20.5 | 21.0 | +0.5 | +2.4 % | (−4.8 %) |
-| Mean tokens_out | 1600.0 | 1665.8 | +65.8 | +4.1 % | (−5.0 %) |
-| Pass rate | 4/4 | 4/4 | 0 | 0 % | (0 %) |
-
-bugfix is a draw both times — when there's a failing test pointing at the
-call site, neither navigator needs much exploration.
-
-### refactor — A regressed, B held steady, gap widened
-
-| Metric | A (new) | B (new) | Δ B−A | Δ % | (prior B−A %) |
-|-------------------|---------|---------|-----------|----------|---------------|
-| Mean elapsed s | 79.8 | 96.0 | +16.2 | +20.3 % | (+4.0 %) |
-| Mean tool calls | 16.8 | 19.8 | +3.0 | +18.0 % | (+6.2 %) |
-| Mean tokens_in† | 23.3 | 28.3 | +5.0 | +21.4 % | (+4.8 %) |
-| Mean tokens_out | 2497.5 | 2879.3 | +381.8 | +15.3 % | (−8.1 %) |
-| Pass rate | 1/4 | 3/4 | +2 | +200 % | (+50 %) |
-
-† A excludes refactor_04_A 25 641 anomaly.
-
-B is slower than A on time *and* tokens here — this is the one task type
-where the cix-first overhead still bites. But B's pass rate is 3× A's:
-A picked non-seeded ambient inefficiencies (`chunkSlidingWindow`, `topN`)
-in 3 of 4 variants, while B hit the seeded function in 3 of 4 (refactor_03
-was the only B-miss, where B picked `topN` instead of `joinLines`). Net:
-B trades wall-clock for a much higher chance of finding the right
-function.
-
-### tests — biggest win for B (was the prior tax cell)
-
-| Metric | A (new) | B (new) | Δ B−A | Δ % | (prior B−A %) |
-|-------------------|---------|---------|-----------|----------|---------------|
-| Mean elapsed s | 191.3 | **154.3** | −37.0 | **−19.3 %** | (+36.8 %) |
-| Mean tool calls | 36.3 | **26.8** | −9.5 | **−26.2 %** | (+103 %) |
-| Mean tokens_in | 52.5 | **37.8** | −14.7 | **−28.0 %** | (+79 %) |
-| Mean tokens_out | 6789.8 | **5728.8** | −1061.0 | **−15.6 %** | (+26.9 %) |
-| Pass rate | 4/4 | 4/4 | 0 | 0 % | (0 %) |
-
-This is the cell that motivated the search-quality work. **B paid a
-+103 % tool-call tax in the prior run; in this run it's a −26 % win.**
-And mechanically B did much less reading: B's per-cell `files_read_count`
-mean dropped to **7.25** vs A's **15.25** — half. The path-aware
-embeddings + min-score 0.4 made the top-K hits relevant enough that B
-didn't need to range-read the codebase.
-
-The most striking single cell: `tests_03_B` finished in 146 s with 6 files
-read; `tests_03_A` took 245 s and read 28 files. B chose the public
-`Service.CancelIndexing` method (a real exported function); A picked
-`splitPath` (unexported) on 3 of 4 variants — the runbook's verification
-gap from the prior report is still there.
-
-### summary — small but consistent flip
-
-| Metric | A (new) | B (new) | Δ B−A | Δ % | (prior B−A %) |
-|-------------------|---------|---------|-----------|----------|---------------|
-| Mean elapsed s | 68.8 | **60.3** | −8.5 | **−12.4 %** | (+11.7 %) |
-| Mean tool calls | 14.8 | 17.3 | +2.5 | +16.9 % | (+12.1 %) |
-| Mean tokens_in† | 17.8 | 19.3 | +1.5 | +8.6 % | (+3.1 %) |
-| Mean tokens_out | 2000.8 | 2168.5 | +167.7 | +8.4 % | (+24.0 %) |
-| Pass rate | 4/4 | 4/4 | 0 | 0 % | (0 %) |
-
-† B excludes summary_04_B 285-token-in anomaly.
-
-Both workers grounded the summaries; rubric scores are flat at 6/7 across
-all 8 cells (vs prior A=6,6,6,7 / B=6,5,6,6). B is now ~12 % faster and
-spent only +8 % output tokens (vs +24 % before).
-
----
-
-## 3. Per-run table (all 32 rows, sorted)
-
-| run_id | elapsed_s | tools | toks_total | toks_in | toks_out | cix_ops | grep_ops | files_read | outcome |
-|-----------------|-----------|-------|------------|---------|----------|---------|----------|------------|---------|
-| bugfix_01_A | 78 | 15 | 1643 | 24 | 1619 | 0 | 1 | 2 | pass |
-| bugfix_01_B | 75 | 13 | 1710 | 21 | 1689 | 0 | 0 | 2 | pass |
-| bugfix_02_A | 67 | 10 | 1190 | 16 | 1174 | 0 | 0 | 2 | pass |
-| bugfix_02_B | 48 | 11 | 1307 | 16 | 1291 | 0 | 0 | 2 | pass |
-| bugfix_03_A | 67 | 13 | 1760 | 19 | 1741 | 0 | 2 | 2 | pass |
-| bugfix_03_B | 83 | 15 | 1988 | 26 | 1962 | 0 | 2 | 2 | pass |
-| bugfix_04_A | 69 | 15 | 1889 | 23 | 1866 | 0 | 1 | 2 | pass |
-| bugfix_04_B | 70 | 15 | 1742 | 21 | 1721 | 0 | 1 | 2 | pass |
-| refactor_01_A | 68 | 15 | 2306 | 22 | 2284 | 0 | 3 | 5 | partial |
-| refactor_01_B | 104 | 19 | 3052 | 32 | 3020 | 2 | 3 | 1 | pass |
-| refactor_02_A | 86 | 16 | 2267 | 22 | 2245 | 0 | 4 | 1 | pass |
-| refactor_02_B | 90 | 21 | 2875 | 29 | 2846 | 2 | 5 | 1 | pass |
-| refactor_03_A | 80 | 18 | 2263 | 26 | 2237 | 0 | 5 | 1 | partial |
-| refactor_03_B | 91 | 18 | 3093 | 25 | 3068 | 2 | 6 | 2 | partial |
-| refactor_04_A | 85 | 18 | 28865 | 25641 | 3224 | 0 | 6 | 4 | partial |
-| refactor_04_B | 99 | 21 | 2610 | 27 | 2583 | 2 | 2 | 2 | pass |
-| summary_01_A | 65 | 12 | 1497 | 15 | 1482 | 0 | 0 | 0 | pass |
-| summary_01_B | 64 | 20 | 2156 | 23 | 2133 | 0 | 0 | 8 | pass |
-| summary_02_A | 65 | 15 | 2076 | 18 | 2058 | 0 | 0 | 7 | pass |
-| summary_02_B | 41 | 13 | 1829 | 16 | 1813 | 1 | 0 | 5 | pass |
-| summary_03_A | 79 | 19 | 2398 | 22 | 2376 | 0 | 1 | 10 | pass |
-| summary_03_B | 74 | 16 | 2043 | 19 | 2024 | 0 | 1 | 8 | pass |
-| summary_04_A | 66 | 13 | 2103 | 16 | 2087 | 0 | 0 | 0 | pass |
-| summary_04_B | 62 | 20 | 2989 | 285 | 2704 | 6 | 0 | 0 | pass |
-| tests_01_A | 200 | 37 | 7345 | 51 | 7294 | 0 | 3 | 16 | pass |
-| tests_01_B | 189 | 31 | 6482 | 46 | 6436 | 0 | 1 | 14 | pass |
-| tests_02_A | 163 | 29 | 6042 | 45 | 5997 | 0 | 4 | 9 | pass |
-| tests_02_B | 148 | 23 | 6689 | 32 | 6657 | 1 | 2 | 2 | pass |
-| tests_03_A | 245 | 50 | 8422 | 66 | 8356 | 0 | 6 | 28 | pass |
-| tests_03_B | 146 | 30 | 5490 | 39 | 5451 | 1 | 3 | 6 | pass |
-| tests_04_A | 157 | 29 | 5560 | 48 | 5512 | 0 | 5 | 8 | pass |
-| tests_04_B | 134 | 23 | 4405 | 34 | 4371 | 1 | 2 | 7 | pass |
-
-Pass = 28/32 (15 B + 13 A). Partial = 4/32 (3 A refactor + 1 B refactor).
-No `(violation)` rows: every A cell has `cix_ops = 0`.
-
-Summary rubric scores: A = {6, 6, 6, 6}, B = {6, 6, 6, 6}. Both pass
-(threshold ≥5).
-
----
-
-## 4. Methodology (abridged)
-
-Same as 2026-04-27 (see `docs/benchmark-runbook.md` for the runbook).
-Two procedural deviations from the runbook, **identical to the prior
-run unless noted**:
-
-1. PREAMBLE_B URL = `http://192.168.1.168:21847` (RTX 3090 prod box,
- not literal `localhost`). Same as prior run.
-2. **Per-cell unique workspace** at `/tmp/cix-bench-runs/${RUN_ID}/`
- instead of one shared `/tmp/cix-bench-run/`. Different paths produce
- different `projectHash` on the server, so each B-cell hits a fresh
- index — no residual chunks bleeding between cells. **This is new in
- this run.** Effect: every B cell pays a one-time index cost (180-s
- wait deadline; observed 30–60 s actual), absorbed inside cell setup
- and excluded from `elapsed_s`.
-
-The cix server on .168 ran the working-tree binary with
-`CIX_EMBED_INCLUDE_PATH=true` (default) and the new `min-score=0.4`
-default. Spot check before launch: `cix search "main entry point server"`
-ranked `server/cmd/cix-server/main.go` first at 0.52, confirming the
-path-aware embeddings were live.
-
-All 32 transcripts identify the worker model as `claude-sonnet-4-6` —
-audited via `grep -L 'claude-sonnet-4-6' /tmp/cix-bench/results/runs/*.log`
-returning zero lines.
-
-Fixture manifest (`fixture-manifest.txt`, 3744 hashed files) verified
-clean both before and after the run.
-
----
-
-## 5. Headline numbers (executive summary)
-
-The 2026-04-27 run found that cix-first navigation was *more reliable but
-no faster* than grep-only. The 2026-04-28 re-run, with path-aware
-embeddings + `min-score=0.4` shipped, finds cix-first is now
-**−7.4 % faster**, **−4.6 % fewer tool calls**, and **−3.4 % fewer
-output tokens** than grep-only — while still beating it on pass rate
-(15/16 vs 13/16). The single biggest gain is the **tests** task, which
-flipped from a +37 % B-tax to a −19 % B-win, with B reading half as many
-files per cell. The summary task also flipped (+12 % B-tax → −12 %
-B-win). Refactor remains the one task where B costs more wall-clock
-than A on average, but B's pass rate (3/4) is 3× A's (1/4) — same
-direction as the prior run.
-
----
-
-## 6. Caveats
-
-- **Both workers got slower in absolute terms vs 2026-04-27.** A grew
- +65 % on elapsed and +32 % on output tokens despite never talking to
- the cix server — pure Sonnet variance. B grew +36 % on elapsed.
- The honest comparison is therefore the *within-run gap* between A and
- B, not the absolute delta vs the prior run. Both within-run gap
- measurements are in §1.5 and §2.
-- **Per-cell unique paths** are new this run. Prior run reused a single
- `/tmp/cix-bench-run/` path so all 32 cells hit the same `projectHash`
- on the server. This run isolates each cell on a fresh hash. Effect on
- B should be small (server-side caches keyed by chunk content, not
- project), but it's a real procedural difference worth flagging.
-- **`refactor_04_A` token spike**: 25 641 input tokens vs 16–26 for the
- other 15 A cells. Almost certainly cache-miss accounting; treated as
- an outlier in the per-task means but kept in the per-run table.
-- **`tokens_in` is uncached input only.** Cache-creation and cache-read
- tokens dominate real Sonnet cost and are not summed by `metrics.sh`.
- This is consistent with the prior run's accounting — the relative gap
- is comparable, the absolute number is not the whole bill.
-- **Fixture is a snapshot of the cix project itself** — the model may
- recognise it from training. Same caveat as 2026-04-27.
-- **Tool restriction is enforced via prompt, not at the harness level.**
- No A cell violated (`cix_ops = 0` everywhere); we still trust the
- prompt because of post-hoc audit, not architecture.
-- **Single machine, single model (`claude-sonnet-4-6`), single embedding
- model, single random seed per worker.** No warm/cold cache split.
-- **Pre-run cix indexing time is excluded from `elapsed_s`** (B gets a
- "free" index), as before. Indexing took 30–60 s per B cell on .168 —
- not amortised in the workload comparison.
-- **Refactor verification still depends on naming the seeded function.**
- A's "asymptotically inefficient" picks (`chunkSlidingWindow`,
- insertion sort, `topN`) are real wins on the merits but score
- `partial` because they aren't the runbook's planted target. The
- runbook gap from the prior report (§7.2 too strict) hasn't been
- patched.
-- **Tests verification is exportedness-blind.** Both workers picked
- unexported helpers (`splitPath` and friends) on tests_01/02 and still
- scored `pass`. The new code didn't change this.
-
----
-
-## 7. Verbatim prompts
-
-Identical to 2026-04-27 (see `docs/benchmark-runbook.md` §3 and §4):
-COMMON_PREAMBLE, PREAMBLE_A, PREAMBLE_B, BUGFIX_PROMPT, REFACTOR_PROMPT,
-TESTS_PROMPT, SUMMARY_PROMPT — all unchanged. The only deltas in
-PREAMBLE_B vs the runbook's literal text are the api URL
-(`http://192.168.1.168:21847`) and the per-cell `cd` path
-(`/tmp/cix-bench-runs/${RUN_ID}/`).
-
-For Worker A, the runbook §5.2 auth-error gate line was appended to
-every assembled prompt:
-> Note: the env var CIX_API_KEY is set to an invalid value for this run;
-> any cix call will fail with an auth error.
-
----
-
-## 8. Where the artefacts live
-
-- This report:
- `doc/benchmark-cix-vs-grep-2026-04-28.md`
-- Prior report (preserved):
- `doc/benchmark-cix-vs-grep.md` (2026-04-27)
-- New CSV: `/tmp/cix-bench/results/results.csv`
-- Prior CSV (preserved): `/tmp/cix-bench/results/results.2026-04-27.csv`
-- New per-run logs + metrics: `/tmp/cix-bench/results/runs/`
-- Prior per-run logs + metrics (preserved):
- `/tmp/cix-bench/results/runs.2026-04-27/`
-- Summary rubric scores (this run only):
- `/tmp/cix-bench/results/rubric.json`
-- Fixture (frozen, byte-identical to 2026-04-27):
- `/tmp/cix-bench/baseline/`, `/tmp/cix-bench/variants/`,
- `/tmp/cix-bench/fixture-manifest.txt`
diff --git a/doc/benchmark-f16.md b/doc/benchmark-f16.md
deleted file mode 100644
index a9cf89d..0000000
--- a/doc/benchmark-f16.md
+++ /dev/null
@@ -1,42 +0,0 @@
-# Embedding Quality Benchmark — gguf/limcheekin/CodeRankEmbed-GGUF/coderankembed.F16.gguf vs fp16/nomic-ai/CodeRankEmbed
-
-
-**k** = 10 | **queries** = 20 | **dim ref/cand** = 768/768
-
-## Summary
-
-| Metric | Value | Acceptance |
-|---|---:|---:|
-| Jaccard@10 (mean) | 0.894 | ≥ 0.70 |
-| Recall@10 (mean) | 0.940 | ≥ 0.90 |
-| Kendall tau (mean) | 0.879 | ≥ 0.50 |
-| Reference embed time | 11.5s | — |
-| Candidate embed time | 4.2s | — |
-| Speedup (ref/cand) | 2.72× | — |
-
-## Per-query scores
-
-| Query | Jaccard | Recall | Kendall τ |
-|---|---:|---:|---:|
-| `async queue timeout` | 0.818 | 0.900 | 0.889 |
-| `parse tree-sitter chunk` | 1.000 | 1.000 | 0.911 |
-| `chroma collection upsert` | 0.818 | 0.900 | 1.000 |
-| `cli root command version` | 1.000 | 1.000 | 0.556 |
-| `embedding service load model` | 1.000 | 1.000 | 0.956 |
-| `project root detection` | 0.818 | 0.900 | 0.889 |
-| `file watcher branch switch` | 0.667 | 0.800 | 0.714 |
-| `config yaml migration legacy keys` | 1.000 | 1.000 | 0.689 |
-| `indexing status estimated finish` | 1.000 | 1.000 | 1.000 |
-| `search by meaning code` | 0.818 | 0.900 | 1.000 |
-| `api key authentication middleware` | 0.818 | 0.900 | 0.944 |
-| `health endpoint status response` | 1.000 | 1.000 | 1.000 |
-| `docker compose cuda healthcheck` | 0.818 | 0.900 | 0.944 |
-| `gitignore pattern matching` | 1.000 | 1.000 | 0.733 |
-| `sqlite projects table schema` | 1.000 | 1.000 | 1.000 |
-| `mean pooling embedding` | 1.000 | 1.000 | 0.911 |
-| `batch size inference throughput` | 0.818 | 0.900 | 0.778 |
-| `incremental reindex sha256` | 1.000 | 1.000 | 0.867 |
-| `client version header compatibility` | 0.818 | 0.900 | 0.944 |
-| `goroutine concurrent walk` | 0.667 | 0.800 | 0.857 |
-
-Raw top-k lists: `benchmark-data/benchmark-f16.json`
diff --git a/doc/benchmark-q4_k_m.md b/doc/benchmark-q4_k_m.md
deleted file mode 100644
index 46d0c6a..0000000
--- a/doc/benchmark-q4_k_m.md
+++ /dev/null
@@ -1,42 +0,0 @@
-# Embedding Quality Benchmark — gguf/limcheekin/CodeRankEmbed-GGUF/coderankembed.Q4_K_M.gguf vs fp16/nomic-ai/CodeRankEmbed
-
-
-**k** = 10 | **queries** = 20 | **dim ref/cand** = 768/768
-
-## Summary
-
-| Metric | Value | Acceptance |
-|---|---:|---:|
-| Jaccard@10 (mean) | 0.787 | ≥ 0.70 |
-| Recall@10 (mean) | 0.875 | ≥ 0.90 |
-| Kendall tau (mean) | 0.760 | ≥ 0.50 |
-| Reference embed time | 11.5s | — |
-| Candidate embed time | 4.6s | — |
-| Speedup (ref/cand) | 2.51× | — |
-
-## Per-query scores
-
-| Query | Jaccard | Recall | Kendall τ |
-|---|---:|---:|---:|
-| `async queue timeout` | 0.667 | 0.800 | 0.786 |
-| `parse tree-sitter chunk` | 1.000 | 1.000 | 0.867 |
-| `chroma collection upsert` | 0.818 | 0.900 | 0.778 |
-| `cli root command version` | 0.818 | 0.900 | 0.611 |
-| `embedding service load model` | 1.000 | 1.000 | 0.600 |
-| `project root detection` | 0.818 | 0.900 | 0.833 |
-| `file watcher branch switch` | 0.538 | 0.700 | 0.810 |
-| `config yaml migration legacy keys` | 0.818 | 0.900 | 0.667 |
-| `indexing status estimated finish` | 1.000 | 1.000 | 0.822 |
-| `search by meaning code` | 0.818 | 0.900 | 0.778 |
-| `api key authentication middleware` | 0.818 | 0.900 | 0.889 |
-| `health endpoint status response` | 0.818 | 0.900 | 0.833 |
-| `docker compose cuda healthcheck` | 0.818 | 0.900 | 0.667 |
-| `gitignore pattern matching` | 0.818 | 0.900 | 0.722 |
-| `sqlite projects table schema` | 0.818 | 0.900 | 0.944 |
-| `mean pooling embedding` | 0.818 | 0.900 | 0.944 |
-| `batch size inference throughput` | 0.667 | 0.800 | 0.714 |
-| `incremental reindex sha256` | 0.667 | 0.800 | 0.857 |
-| `client version header compatibility` | 0.667 | 0.800 | 0.929 |
-| `goroutine concurrent walk` | 0.538 | 0.700 | 0.143 |
-
-Raw top-k lists: `benchmark-data/benchmark-q4_k_m.json`
diff --git a/doc/benchmark-q5_k_m.md b/doc/benchmark-q5_k_m.md
deleted file mode 100644
index 34cc9b9..0000000
--- a/doc/benchmark-q5_k_m.md
+++ /dev/null
@@ -1,42 +0,0 @@
-# Embedding Quality Benchmark — gguf/limcheekin/CodeRankEmbed-GGUF/coderankembed.Q5_K_M.gguf vs fp16/nomic-ai/CodeRankEmbed
-
-
-**k** = 10 | **queries** = 20 | **dim ref/cand** = 768/768
-
-## Summary
-
-| Metric | Value | Acceptance |
-|---|---:|---:|
-| Jaccard@10 (mean) | 0.815 | ≥ 0.70 |
-| Recall@10 (mean) | 0.895 | ≥ 0.90 |
-| Kendall tau (mean) | 0.786 | ≥ 0.50 |
-| Reference embed time | 11.5s | — |
-| Candidate embed time | 4.8s | — |
-| Speedup (ref/cand) | 2.38× | — |
-
-## Per-query scores
-
-| Query | Jaccard | Recall | Kendall τ |
-|---|---:|---:|---:|
-| `async queue timeout` | 0.667 | 0.800 | 0.929 |
-| `parse tree-sitter chunk` | 0.818 | 0.900 | 0.889 |
-| `chroma collection upsert` | 0.818 | 0.900 | 0.722 |
-| `cli root command version` | 0.818 | 0.900 | 0.389 |
-| `embedding service load model` | 1.000 | 1.000 | 0.867 |
-| `project root detection` | 0.818 | 0.900 | 0.889 |
-| `file watcher branch switch` | 0.818 | 0.900 | 0.889 |
-| `config yaml migration legacy keys` | 0.818 | 0.900 | 0.556 |
-| `indexing status estimated finish` | 0.818 | 0.900 | 0.667 |
-| `search by meaning code` | 0.818 | 0.900 | 0.833 |
-| `api key authentication middleware` | 0.818 | 0.900 | 0.889 |
-| `health endpoint status response` | 1.000 | 1.000 | 1.000 |
-| `docker compose cuda healthcheck` | 0.818 | 0.900 | 0.889 |
-| `gitignore pattern matching` | 1.000 | 1.000 | 0.689 |
-| `sqlite projects table schema` | 0.818 | 0.900 | 1.000 |
-| `mean pooling embedding` | 0.818 | 0.900 | 0.889 |
-| `batch size inference throughput` | 0.667 | 0.800 | 0.857 |
-| `incremental reindex sha256` | 0.667 | 0.800 | 0.786 |
-| `client version header compatibility` | 0.818 | 0.900 | 0.944 |
-| `goroutine concurrent walk` | 0.667 | 0.800 | 0.143 |
-
-Raw top-k lists: `benchmark-data/benchmark-q5_k_m.json`
diff --git a/doc/benchmark-q8-vs-fp16.md b/doc/benchmark-q8-vs-fp16.md
index 1c5ba65..f37ca42 100644
--- a/doc/benchmark-q8-vs-fp16.md
+++ b/doc/benchmark-q8-vs-fp16.md
@@ -68,10 +68,9 @@ rm -rf ~/.cache/huggingface/hub/models--nomic-ai--CodeRankEmbed
## Per-query detail
-See supporting files:
-- `doc/benchmark-q8_0.md` — F16 ref vs Q8_0
-- `doc/benchmark-q5_k_m.md` — F16 ref vs Q5_K_M
-- `doc/benchmark-q4_k_m.md` — F16 ref vs Q4_K_M
-- `doc/benchmark-f16.md` — F16 ref vs F16 GGUF
-- `doc/benchmark-data/` — raw top-k JSON per quant (`benchmark-*.json`) and
- `fp16-cache.json` (reusable reference cache; safe to delete after review)
+Raw top-k JSON per quant (`benchmark-*.json`) and the reusable fp16
+reference cache live in `doc/benchmark-data/`. The earlier per-quant
+markdown summaries (`benchmark-q8_0.md`, `benchmark-q5_k_m.md`,
+`benchmark-q4_k_m.md`, `benchmark-f16.md`) were never filled in past
+their stubs and were removed in the 2026-05 docs refactor — the table
+above is the authoritative comparison.
diff --git a/doc/benchmark-q8_0.md b/doc/benchmark-q8_0.md
deleted file mode 100644
index f1363a0..0000000
--- a/doc/benchmark-q8_0.md
+++ /dev/null
@@ -1,42 +0,0 @@
-# Embedding Quality Benchmark — gguf/limcheekin/CodeRankEmbed-GGUF/coderankembed.Q8_0.gguf vs fp16/nomic-ai/CodeRankEmbed
-
-
-**k** = 10 | **queries** = 20 | **dim ref/cand** = 768/768
-
-## Summary
-
-| Metric | Value | Acceptance |
-|---|---:|---:|
-| Jaccard@10 (mean) | 0.894 | ≥ 0.70 |
-| Recall@10 (mean) | 0.940 | ≥ 0.90 |
-| Kendall tau (mean) | 0.861 | ≥ 0.50 |
-| Reference embed time | 11.5s | — |
-| Candidate embed time | 4.3s | — |
-| Speedup (ref/cand) | 2.65× | — |
-
-## Per-query scores
-
-| Query | Jaccard | Recall | Kendall τ |
-|---|---:|---:|---:|
-| `async queue timeout` | 0.818 | 0.900 | 0.889 |
-| `parse tree-sitter chunk` | 1.000 | 1.000 | 0.911 |
-| `chroma collection upsert` | 1.000 | 1.000 | 1.000 |
-| `cli root command version` | 1.000 | 1.000 | 0.467 |
-| `embedding service load model` | 1.000 | 1.000 | 0.911 |
-| `project root detection` | 0.818 | 0.900 | 0.889 |
-| `file watcher branch switch` | 0.667 | 0.800 | 0.643 |
-| `config yaml migration legacy keys` | 0.818 | 0.900 | 0.667 |
-| `indexing status estimated finish` | 1.000 | 1.000 | 1.000 |
-| `search by meaning code` | 0.818 | 0.900 | 1.000 |
-| `api key authentication middleware` | 0.818 | 0.900 | 0.944 |
-| `health endpoint status response` | 1.000 | 1.000 | 1.000 |
-| `docker compose cuda healthcheck` | 0.818 | 0.900 | 0.944 |
-| `gitignore pattern matching` | 0.818 | 0.900 | 0.667 |
-| `sqlite projects table schema` | 1.000 | 1.000 | 1.000 |
-| `mean pooling embedding` | 1.000 | 1.000 | 0.867 |
-| `batch size inference throughput` | 0.818 | 0.900 | 0.778 |
-| `incremental reindex sha256` | 1.000 | 1.000 | 0.822 |
-| `client version header compatibility` | 1.000 | 1.000 | 0.956 |
-| `goroutine concurrent walk` | 0.667 | 0.800 | 0.857 |
-
-Raw top-k lists: `benchmark-data/benchmark-q8_0.json`
diff --git a/doc/benchmarks.md b/doc/benchmarks.md
new file mode 100644
index 0000000..c4205c9
--- /dev/null
+++ b/doc/benchmarks.md
@@ -0,0 +1,88 @@
+# Benchmarks
+
+Index of dated benchmark snapshots. None of these are a continuously
+maintained dashboard — each is a measurement taken at a point in time
+against a frozen fixture, and they age as the product moves. Each
+section ends with a "last measured" date so a reader can decide
+whether the numbers are still trustworthy.
+
+If you re-run a benchmark and find different numbers, prefer adding a
+new dated section over editing the old one in place — the history is
+useful when reasoning about regressions.
+
+---
+
+## 1. cix-first vs grep-only navigation
+
+**File:** [`benchmark-cix-vs-grep.md`](benchmark-cix-vs-grep.md)
+**Last measured:** 2026-04-27
+**Setup:** 32 hint-free tasks (4 task types × 4 variants × 2 navigation
+strategies) on a frozen snapshot of `claude-code-index` itself, with
+`claude-sonnet-4-6` as worker and `claude-opus-4-7` as operator.
+
+**Headline.** cix-first is *more reliable* (16 / 16 pass vs 14 / 16
+for grep-only) but *not* faster or cheaper on average (+12 % elapsed,
++32 % tool calls, +12 % tokens). The reliability gap shows up on
+refactor tasks, where grep-only converged on the wrong target twice.
+On bugfix tasks where a failing test already points at the call site,
+grep is slightly faster because it skips the cix round-trip.
+
+See the file itself for per-task tables, the 32-row raw run table,
+methodology, and caveats.
+
+---
+
+## 2. CodeRankEmbed GGUF quantization
+
+**File:** [`benchmark-q8-vs-fp16.md`](benchmark-q8-vs-fp16.md)
+**Last measured:** 2026-04-23
+**Setup:** Apple Silicon (Metal), 218 code chunks + 20 queries from
+this repo, k=10. fp16 reference is `nomic-ai/CodeRankEmbed` via
+sentence-transformers; GGUFs from `limcheekin/CodeRankEmbed-GGUF`.
+
+| Quant | Size | Jaccard@10 | Recall@10 | Kendall τ | Verdict |
+|---|---:|---:|---:|---:|---|
+| fp16 reference | ~522 MB | — | — | — | reference |
+| F16 GGUF | 261 MB | 0.894 | 0.940 | 0.879 | pass |
+| **Q8_0** (current default) | **139 MB** | **0.894** | **0.940** | **0.861** | **pass** |
+| Q5_K_M | 98 MB | 0.815 | 0.895 | 0.786 | fail (Recall) |
+| Q4_K_M | 86 MB | 0.787 | 0.875 | 0.760 | fail (Recall) |
+
+Acceptance thresholds: Jaccard@10 ≥ 0.70, Recall@10 ≥ 0.90, Kendall
+τ ≥ 0.50.
+
+**Conclusion.** Q8_0 is the sweet spot — identical top-k retrieval to
+fp16 at half the disk footprint, ~2.6× faster than the
+sentence-transformers reference on Apple Silicon. Q5_K_M and Q4_K_M
+both miss Recall@10 by a hair or more and aren't recommended.
+
+The default shipped model is `awhiteside/CodeRankEmbed-Q8_0-GGUF`
+(equivalent quality, more reliable HF availability than the
+`limcheekin/*` repo).
+
+---
+
+## 3. VRAM profiling
+
+**File:** [`vram-profiling.md`](vram-profiling.md)
+**Status:** Methodology + expected baseline only; actual measured
+numbers have not been backfilled. The doc states "Once
+`profile_vram.py` has been run … this section should be replaced with
+actual measured deltas."
+
+Expected baseline (CodeRankEmbed Q8_0 on RTX 3090): ~0.5–0.7 GB idle
+VRAM (weights ~200–250 MB + pre-allocated `n_ctx=8192` context
+~200–400 MB).
+
+If you re-run the profiler, update `vram-profiling.md` in place — the
+file was always intended as a placeholder.
+
+---
+
+## Raw artefacts
+
+The dated grep-vs-cix run also produced raw transcripts and metric
+JSON. Per the README in that file, they live in
+`/tmp/cix-bench/results/runs/` and are not checked into the repo —
+the markdown file captures the headline numbers and methodology that
+remain useful once the raw logs are gone.
diff --git a/doc/openapi.yaml b/doc/openapi.yaml
index 32799c9..06fac13 100644
--- a/doc/openapi.yaml
+++ b/doc/openapi.yaml
@@ -80,6 +80,31 @@ tags:
description: Admin-only user management
- name: api-keys
description: Issue and revoke owner-scoped API keys for CLI/SDK use
+ - name: workspaces
+ description: |
+ Workspaces group GitHub repositories for cross-project semantic search.
+ Server-wide shared — every authenticated user can list, create, and
+ modify any workspace. PR1 ships CRUD only; repository attachment,
+ webhooks, and the two-stage search endpoint land in subsequent
+ releases of the workspaces feature branch.
+ - name: groups
+ description: |
+ View-groups: admin-managed sets of users. External projects and
+ workspaces are shared to a group, granting its members read/search
+ access. Group CRUD and membership are admin-only; GET /groups is
+ member-scoped for regular users so the share picker can populate.
+ - name: github-tokens
+ description: |
+ GitHub Personal Access Tokens used by the workspaces feature for
+ cloning private repos and (optionally) registering webhooks. Stored
+ encrypted-at-rest via AES-GCM; the plaintext is surfaced exactly
+ once on POST and never returned thereafter.
+ - name: tunnels
+ description: |
+ Managed Tunnels — a server-orchestrated outbound tunnel that gives
+ the server a public URL while it sits behind NAT. Cloudflare Tunnel
+ ships now; ngrok is reserved. The live tunnel URL is the preferred
+ origin for GitHub webhook delivery URLs.
paths:
/health:
@@ -707,6 +732,35 @@ paths:
"500":
$ref: "#/components/responses/InternalError"
+ /api/v1/projects/{path}/workspaces:
+ parameters:
+ - $ref: "#/components/parameters/ProjectHash"
+ get:
+ operationId: listProjectWorkspaces
+ tags: [projects]
+ summary: List workspaces that contain this project
+ description: |
+ Returns every workspace that has this project attached, owned or
+ linked. The project page uses this to show "Workspaces" chips
+ the user can click to jump to the workspace detail page.
+
+ Empty list when the project isn't part of any workspace yet —
+ either it was indexed directly via /projects without ever being
+ linked, or all its memberships have been detached.
+ responses:
+ "200":
+ description: Memberships
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/ProjectWorkspaceList"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "404":
+ $ref: "#/components/responses/NotFound"
+ "500":
+ $ref: "#/components/responses/InternalError"
+
/api/v1/projects/{path}/search:
parameters:
- $ref: "#/components/parameters/ProjectHash"
@@ -722,9 +776,12 @@ paths:
then merged into per-file groups and ranked by best match score.
`min_score` semantics:
- - omitted → server default `0.4` (calibrated for CodeRankEmbed-Q8)
+ - omitted → server default `0.2` (light relevance floor that
+ doesn't silently drop abstract natural-language queries
+ whose best chunks score in [0.25, 0.35])
- explicit `0` → return everything above HNSW floor
- - explicit positive → that floor
+ - explicit positive → that floor (use `0.4+` for strict
+ code-symbol searches calibrated for CodeRankEmbed-Q8)
`query_time_ms` is rounded to 1 decimal place.
requestBody:
@@ -1090,1205 +1147,3773 @@ paths:
"404":
$ref: "#/components/responses/NotFound"
-components:
- securitySchemes:
- bearerAuth:
- type: http
- scheme: bearer
- description: "API key passed as `Authorization: Bearer `"
-
- parameters:
- ProjectHash:
- name: path
- in: path
- required: true
+ /api/v1/workspaces:
+ get:
+ operationId: listWorkspaces
+ tags: [workspaces]
+ summary: List all workspaces
description: |
- First 16 hex chars of `SHA1(host_path)`. See
- `internal/projects.HashPath`.
- schema:
- type: string
- pattern: "^[a-f0-9]{16}$"
- example: "5b7d2c9e1a3f8042"
-
- responses:
- Unauthorized:
- description: Missing or invalid API key
- content:
- application/json:
- schema:
- $ref: "#/components/schemas/Error"
- NotFound:
- description: Resource not found
- content:
- application/json:
- schema:
- $ref: "#/components/schemas/Error"
- Unprocessable:
- description: Malformed request body or missing required fields
- content:
- application/json:
- schema:
- $ref: "#/components/schemas/Error"
- InternalError:
- description: Unhandled server error
- content:
- application/json:
- schema:
- $ref: "#/components/schemas/Error"
- IndexerUnavailable:
- description: Indexing service not configured
- content:
- application/json:
- schema:
- $ref: "#/components/schemas/Error"
- Forbidden:
- description: Authenticated, but lacks the required role
- content:
- application/json:
- schema:
- $ref: "#/components/schemas/Error"
- Conflict:
- description: Resource already exists (e.g. email taken)
- content:
- application/json:
- schema:
- $ref: "#/components/schemas/Error"
+ Returns every workspace in the system, newest first. Server-wide
+ shared visibility — the caller sees workspaces created by any user.
+ responses:
+ "200":
+ description: Workspace list
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/WorkspaceListResponse"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "503":
+ $ref: "#/components/responses/WorkspacesDisabled"
+ post:
+ operationId: createWorkspace
+ tags: [workspaces]
+ summary: Create a new workspace
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/CreateWorkspaceRequest"
+ responses:
+ "201":
+ description: Workspace created
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Workspace"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "409":
+ $ref: "#/components/responses/Conflict"
+ "422":
+ $ref: "#/components/responses/Unprocessable"
+ "503":
+ $ref: "#/components/responses/WorkspacesDisabled"
- schemas:
- Error:
- type: object
- required: [detail]
- properties:
- detail:
+ /api/v1/workspaces/{id}:
+ parameters:
+ - name: id
+ in: path
+ required: true
+ schema:
type: string
+ description: Workspace ID (ULID-like string returned by createWorkspace).
+ get:
+ operationId: getWorkspace
+ tags: [workspaces]
+ summary: Get a single workspace
+ responses:
+ "200":
+ description: Workspace
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Workspace"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "404":
+ $ref: "#/components/responses/NotFound"
+ "503":
+ $ref: "#/components/responses/WorkspacesDisabled"
+ patch:
+ operationId: updateWorkspace
+ tags: [workspaces]
+ summary: Update workspace metadata
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/UpdateWorkspaceRequest"
+ responses:
+ "200":
+ description: Updated workspace
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Workspace"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "404":
+ $ref: "#/components/responses/NotFound"
+ "409":
+ $ref: "#/components/responses/Conflict"
+ "422":
+ $ref: "#/components/responses/Unprocessable"
+ "503":
+ $ref: "#/components/responses/WorkspacesDisabled"
+ delete:
+ operationId: deleteWorkspace
+ tags: [workspaces]
+ summary: Delete a workspace
+ description: |
+ Removes the workspace row. workspace_projects memberships
+ referencing this workspace are removed via ON DELETE CASCADE;
+ the underlying projects, git_repos peers, and on-disk clones
+ are preserved (delete those explicitly via /projects/{path}
+ when desired).
+ responses:
+ "204":
+ description: Deleted
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "404":
+ $ref: "#/components/responses/NotFound"
+ "503":
+ $ref: "#/components/responses/WorkspacesDisabled"
- BootstrapStatusResponse:
- type: object
- required: [needs_bootstrap]
- properties:
- needs_bootstrap:
- type: boolean
- description: True when the users table is empty.
+ /api/v1/groups:
+ get:
+ operationId: listGroups
+ tags: [groups]
+ summary: List view-groups
+ description: |
+ Admins see every group; a regular user sees only the groups they
+ belong to (so the dashboard's "share to group" picker can populate).
+ responses:
+ "200":
+ description: Group list
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/GroupListResponse"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ post:
+ operationId: createGroup
+ tags: [groups]
+ summary: Create a view-group (admin only)
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/CreateGroupRequest"
+ responses:
+ "201":
+ description: Group created
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Group"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "403":
+ $ref: "#/components/responses/Forbidden"
+ "409":
+ $ref: "#/components/responses/Conflict"
+ "422":
+ $ref: "#/components/responses/Unprocessable"
- User:
- type: object
- required: [id, email, role, must_change_password, created_at, updated_at, disabled]
- properties:
- id:
+ /api/v1/groups/{id}:
+ parameters:
+ - name: id
+ in: path
+ required: true
+ schema:
type: string
- email:
+ description: View-group ID.
+ get:
+ operationId: getGroup
+ tags: [groups]
+ summary: Get a view-group (admin only)
+ responses:
+ "200":
+ description: Group
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Group"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "403":
+ $ref: "#/components/responses/Forbidden"
+ "404":
+ $ref: "#/components/responses/NotFound"
+ patch:
+ operationId: updateGroup
+ tags: [groups]
+ summary: Update a view-group (admin only)
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/UpdateGroupRequest"
+ responses:
+ "200":
+ description: Updated group
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Group"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "403":
+ $ref: "#/components/responses/Forbidden"
+ "404":
+ $ref: "#/components/responses/NotFound"
+ "409":
+ $ref: "#/components/responses/Conflict"
+ "422":
+ $ref: "#/components/responses/Unprocessable"
+ delete:
+ operationId: deleteGroup
+ tags: [groups]
+ summary: Delete a view-group (admin only)
+ description: |
+ Cascades to memberships and project/workspace shares referencing
+ the group.
+ responses:
+ "204":
+ description: Deleted
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "403":
+ $ref: "#/components/responses/Forbidden"
+ "404":
+ $ref: "#/components/responses/NotFound"
+
+ /api/v1/groups/{id}/members:
+ parameters:
+ - name: id
+ in: path
+ required: true
+ schema:
type: string
- format: email
+ description: View-group ID.
+ get:
+ operationId: listGroupMembers
+ tags: [groups]
+ summary: List members of a view-group (admin only)
+ responses:
+ "200":
+ description: Member list
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/GroupMemberListResponse"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "403":
+ $ref: "#/components/responses/Forbidden"
+ "404":
+ $ref: "#/components/responses/NotFound"
+ post:
+ operationId: addGroupMember
+ tags: [groups]
+ summary: Add a user to a view-group (admin only)
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/AddGroupMemberRequest"
+ responses:
+ "204":
+ description: Member added (idempotent)
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "403":
+ $ref: "#/components/responses/Forbidden"
+ "404":
+ $ref: "#/components/responses/NotFound"
+ "422":
+ $ref: "#/components/responses/Unprocessable"
+
+ /api/v1/groups/{id}/members/{userId}:
+ parameters:
+ - name: id
+ in: path
+ required: true
+ schema:
+ type: string
+ description: View-group ID.
+ - name: userId
+ in: path
+ required: true
+ schema:
+ type: string
+ description: User ID to remove.
+ delete:
+ operationId: removeGroupMember
+ tags: [groups]
+ summary: Remove a user from a view-group (admin only)
+ responses:
+ "204":
+ description: Member removed (idempotent)
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "403":
+ $ref: "#/components/responses/Forbidden"
+
+ /api/v1/projects/{hash}/shares:
+ parameters:
+ - name: hash
+ in: path
+ required: true
+ schema:
+ type: string
+ description: Project path hash.
+ get:
+ operationId: listProjectShares
+ tags: [groups]
+ summary: List the view-groups an external project is shared to (admin only)
+ responses:
+ "200":
+ description: Group ids the project is shared to
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/GroupIdListResponse"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "403":
+ $ref: "#/components/responses/Forbidden"
+ "404":
+ $ref: "#/components/responses/NotFound"
+ post:
+ operationId: shareProjectToGroup
+ tags: [groups]
+ summary: Share an external project to a view-group (admin only)
+ description: |
+ Only EXTERNAL projects (with a git_repos peer) may be shared — sharing
+ a personal/local project returns 422.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/ShareToGroupRequest"
+ responses:
+ "204":
+ description: Shared (idempotent)
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "403":
+ $ref: "#/components/responses/Forbidden"
+ "404":
+ $ref: "#/components/responses/NotFound"
+ "422":
+ $ref: "#/components/responses/Unprocessable"
+
+ /api/v1/projects/{hash}/shares/{groupId}:
+ parameters:
+ - name: hash
+ in: path
+ required: true
+ schema:
+ type: string
+ description: Project path hash.
+ - name: groupId
+ in: path
+ required: true
+ schema:
+ type: string
+ description: View-group ID.
+ delete:
+ operationId: unshareProjectFromGroup
+ tags: [groups]
+ summary: Revoke a project↔group share (admin only)
+ responses:
+ "204":
+ description: Unshared (idempotent)
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "403":
+ $ref: "#/components/responses/Forbidden"
+
+ /api/v1/projects/{hash}/owner:
+ parameters:
+ - name: hash
+ in: path
+ required: true
+ schema:
+ type: string
+ description: Project path hash.
+ put:
+ operationId: reassignProjectOwner
+ tags: [projects]
+ summary: Reassign the owner of a local project (admin only)
+ description: |
+ Only LOCAL projects (no git_repos peer) can be reassigned — external
+ projects are ownerless by design and return 422.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/ReassignOwnerRequest"
+ responses:
+ "200":
+ description: Updated project
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Project"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "403":
+ $ref: "#/components/responses/Forbidden"
+ "404":
+ $ref: "#/components/responses/NotFound"
+ "422":
+ $ref: "#/components/responses/Unprocessable"
+
+ /api/v1/workspaces/{id}/shares:
+ parameters:
+ - name: id
+ in: path
+ required: true
+ schema:
+ type: string
+ description: Workspace ID.
+ get:
+ operationId: listWorkspaceShares
+ tags: [workspaces]
+ summary: List the view-groups a workspace is shared to
+ responses:
+ "200":
+ description: Group ids the workspace is shared to
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/GroupIdListResponse"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "403":
+ $ref: "#/components/responses/Forbidden"
+ "404":
+ $ref: "#/components/responses/NotFound"
+ "503":
+ $ref: "#/components/responses/WorkspacesDisabled"
+ post:
+ operationId: shareWorkspaceToGroup
+ tags: [workspaces]
+ summary: Share a workspace to a view-group
+ description: |
+ The workspace owner may share to a group they belong to; an admin may
+ share to any group. Visibility only — each project inside the workspace
+ is still access-checked per viewer.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/ShareToGroupRequest"
+ responses:
+ "204":
+ description: Shared (idempotent)
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "403":
+ $ref: "#/components/responses/Forbidden"
+ "404":
+ $ref: "#/components/responses/NotFound"
+ "422":
+ $ref: "#/components/responses/Unprocessable"
+ "503":
+ $ref: "#/components/responses/WorkspacesDisabled"
+
+ /api/v1/workspaces/{id}/shares/{groupId}:
+ parameters:
+ - name: id
+ in: path
+ required: true
+ schema:
+ type: string
+ description: Workspace ID.
+ - name: groupId
+ in: path
+ required: true
+ schema:
+ type: string
+ description: View-group ID.
+ delete:
+ operationId: unshareWorkspaceFromGroup
+ tags: [workspaces]
+ summary: Revoke a workspace↔group share (owner or admin)
+ responses:
+ "204":
+ description: Unshared (idempotent)
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "403":
+ $ref: "#/components/responses/Forbidden"
+ "404":
+ $ref: "#/components/responses/NotFound"
+ "503":
+ $ref: "#/components/responses/WorkspacesDisabled"
+
+ /api/v1/git-repos:
+ post:
+ operationId: addGitRepo
+ tags: [projects]
+ summary: Clone + index a GitHub repository as a standalone project
+ description: |
+ Inserts a projects row (status=pending), a matching git_repos
+ row, and enqueues a `clone_repo` background job that is chained
+ to an `index_repo` job on success. The resulting project lives
+ in `/api/v1/projects` and is initially attached to no
+ workspaces — link it into specific workspaces via
+ `/api/v1/workspaces/{id}/projects` if desired.
+
+ `token_id` is required for private repos; `webhook_mode`
+ defaults to `manual`. The response carries a one-shot
+ `webhook_url` + `webhook_secret` so the operator can register
+ the webhook in GitHub by hand (`auto` mode does it for them).
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/AddGitRepoRequest"
+ responses:
+ "201":
+ description: Project created + clone enqueued
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/GitRepoCreated"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "409":
+ $ref: "#/components/responses/Conflict"
+ "422":
+ $ref: "#/components/responses/Unprocessable"
+ "503":
+ $ref: "#/components/responses/WorkspacesDisabled"
+
+ /api/v1/workspaces/{id}/projects:
+ parameters:
+ - name: id
+ in: path
+ required: true
+ schema:
+ type: string
+ description: Workspace ID.
+ get:
+ operationId: listWorkspaceProjects
+ tags: [workspaces]
+ summary: List projects currently linked to a workspace
+ responses:
+ "200":
+ description: Workspace project list
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/WorkspaceProjectListResponse"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "404":
+ $ref: "#/components/responses/NotFound"
+ "503":
+ $ref: "#/components/responses/WorkspacesDisabled"
+ post:
+ operationId: linkProjectToWorkspace
+ tags: [workspaces]
+ summary: Link an existing project into this workspace
+ description: |
+ Inserts a (workspace_id, project_path) row into
+ `workspace_projects`. The project must already exist and be in
+ `status='indexed'`. Duplicates return 409. The project itself
+ is untouched — workspaces are pure membership collections.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/LinkProjectRequest"
+ responses:
+ "201":
+ description: Linked
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/WorkspaceProjectMembership"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "404":
+ $ref: "#/components/responses/NotFound"
+ "409":
+ $ref: "#/components/responses/Conflict"
+ "422":
+ $ref: "#/components/responses/Unprocessable"
+ "503":
+ $ref: "#/components/responses/WorkspacesDisabled"
+
+ /api/v1/workspaces/{id}/projects/{hash}:
+ parameters:
+ - name: id
+ in: path
+ required: true
+ schema:
+ type: string
+ - name: hash
+ in: path
+ required: true
+ schema:
+ type: string
+ description: Project's path_hash.
+ delete:
+ operationId: unlinkProjectFromWorkspace
+ tags: [workspaces]
+ summary: Remove a project from this workspace (does not delete the project)
+ description: |
+ Drops the (workspace_id, project_path) row. The project itself,
+ its clone on disk, its indexed content, and its memberships in
+ other workspaces are all untouched.
+ responses:
+ "204":
+ description: Unlinked
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "404":
+ $ref: "#/components/responses/NotFound"
+ "503":
+ $ref: "#/components/responses/WorkspacesDisabled"
+
+ /api/v1/workspaces/{id}/search:
+ parameters:
+ - name: id
+ in: path
+ required: true
+ schema:
+ type: string
+ get:
+ operationId: workspaceSearch
+ tags: [workspaces]
+ summary: Hybrid BM25+dense search across all repos in a workspace
+ description: |
+ Embeds the query, then fans out two parallel sub-queries per
+ project: dense (chromem cosine) and sparse (SQLite FTS5 BM25
+ over chunks_fts). Per-project the two ranked lists are fused
+ via Reciprocal Rank Fusion (k=60).
+
+ Across projects an α-blended candidacy score (`α × bm25_norm
+ + (1-α) × dense_norm` with α=0.5, both signals min-max
+ normalized per query) plus a relative threshold
+ (`candidacy ≥ best × 0.4`) drops projects that share no
+ semantic and no lexical overlap with the query. Pure-dense
+ fan-out returned the N nearest vectors regardless of
+ absolute distance, so workspaces routinely surfaced
+ irrelevant repos at noise-level cosine similarity; the BM25
+ gate fixes that by requiring at least one of the two
+ signals to be a meaningful fraction of the best.
+
+ The chunks list is then built by round-robin interleaving:
+ rank-1 from every surviving project before any rank-2, etc.,
+ capped per-project so one dominant repo can't take every
+ slot. Always live — no background rebuild job, no debounce.
+ parameters:
+ - name: q
+ in: query
+ required: true
+ schema:
+ type: string
+ minLength: 1
+ - name: top_projects
+ in: query
+ required: false
+ schema:
+ type: integer
+ minimum: 1
+ maximum: 50
+ default: 10
+ - name: top_chunks
+ in: query
+ required: false
+ schema:
+ type: integer
+ minimum: 1
+ maximum: 200
+ default: 20
+ - name: min_score
+ in: query
+ required: false
+ description: |
+ Floor on raw cosine similarity. Chunks below this are
+ dropped before aggregation. Default 0.4 — symmetric with
+ per-project search default so an unfiltered workspace
+ query doesn't return cross-repo noise that a single-repo
+ query would have rejected. Pass 0 explicitly for
+ intentional cross-project sweeps that need long-tail
+ recall (e.g. "authentication and authorization" across a
+ mixed-domain workspace).
+ schema:
+ type: number
+ format: float
+ minimum: 0
+ maximum: 1
+ default: 0.4
+ responses:
+ "200":
+ description: Search results
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/WorkspaceSearchResponse"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "404":
+ $ref: "#/components/responses/NotFound"
+ "503":
+ $ref: "#/components/responses/WorkspacesDisabled"
+
+ /api/v1/projects/{hash}/git-repo:
+ parameters:
+ - name: hash
+ in: path
+ required: true
+ schema:
+ type: string
+ get:
+ operationId: getProjectGitRepo
+ tags: [projects]
+ summary: Read the git_repos metadata for an external project
+ description: |
+ Returns clone + webhook metadata. 404 when the project is local
+ (has no git_repos row). The webhook_secret is included so the
+ operator can paste it into GitHub Settings → Webhooks.
+ responses:
+ "200":
+ description: git_repos row
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/GitRepo"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "404":
+ $ref: "#/components/responses/NotFound"
+ "503":
+ $ref: "#/components/responses/WorkspacesDisabled"
+ patch:
+ operationId: updateProjectGitRepoSync
+ tags: [projects]
+ summary: Reconfigure how an external project is kept in sync
+ description: |
+ Switches the project's sync method directly from the project page:
+
+ - `webhook` — GitHub push webhooks trigger reindex. Sets
+ webhook_mode='auto' and attempts to auto-register the hook
+ (needs CIX_PUBLIC_URL / a live tunnel and a PAT with
+ admin:repo_hook). If registration fails, the server falls back to
+ polling and says so in `note`.
+ - `polling` — the shared scheduler fetches on an interval
+ (`poll_interval_seconds`, optional). Sets webhook_mode='disabled'.
+ - `manual` — no automatic sync; reindex on demand. Sets
+ webhook_mode='disabled', polling off.
+
+ Switching away from `webhook` best-effort de-registers any hook the
+ server previously created. 404 when the project is local.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/UpdateGitRepoSyncRequest"
+ responses:
+ "200":
+ description: Updated sync configuration
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/UpdateGitRepoSyncResult"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "404":
+ $ref: "#/components/responses/NotFound"
+ "422":
+ description: Invalid sync_method
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+ "503":
+ $ref: "#/components/responses/WorkspacesDisabled"
+
+ /api/v1/projects/{hash}/webhook-info:
+ parameters:
+ - name: hash
+ in: path
+ required: true
+ schema:
+ type: string
+ get:
+ operationId: getProjectWebhookInfo
+ tags: [projects]
+ summary: Webhook URL + secret for manual GitHub setup
+ description: |
+ Returns the publicly-reachable webhook URL and the HMAC secret
+ for an external (GitHub-cloned) project. Only projects with a
+ `git_repos` peer participate in webhook delivery — local-path
+ projects have no clone lifecycle and no webhook.
+
+ The handler returns 404 in two distinct cases, indistinguishable
+ on the wire: (a) the `path_hash` doesn't resolve to any project
+ at all, and (b) the project exists but is local. Callers that
+ need to disambiguate must query `GET /projects/{hash}` first to
+ confirm existence, then treat a subsequent webhook-info 404 as
+ "local project, no webhook to configure".
+ responses:
+ "200":
+ description: Webhook coordinates
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/WebhookInfoResponse"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "404":
+ description: |
+ Either the project does not exist, OR the project exists but
+ is local (no `git_repos` row, no webhook to surface).
+ Disambiguate by checking `GET /projects/{hash}` first.
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+ "503":
+ $ref: "#/components/responses/WorkspacesDisabled"
+
+ /api/v1/projects/{hash}/reindex:
+ parameters:
+ - name: hash
+ in: path
+ required: true
+ schema:
+ type: string
+ post:
+ operationId: reindexProject
+ tags: [projects]
+ summary: Manually re-trigger the clone + index pipeline
+ description: |
+ Enqueues a fresh `clone_repo` job for the matching git_repos
+ row. 422 when the project is local (no clone pipeline — local
+ projects reindex via the CLI). Dedupe collapses repeated
+ triggers into the existing in-flight job.
+
+ Normally the clone job decides incremental vs full automatically
+ (incremental when an `indexed_sha` is recorded and tree.Diff
+ succeeds; full on first-index, partial-failure recovery,
+ or embedding-model change). Pass `full=true` to force a full
+ reindex — useful when index drift is suspected. The endpoint
+ clears `git_repos.indexed_sha` synchronously so the next
+ clone+index pass starts from a clean slate.
+ parameters:
+ - name: full
+ in: query
+ required: false
+ schema:
+ type: boolean
+ default: false
+ description: |
+ When true, clears `indexed_sha` for this repo before
+ enqueueing so the next index_repo run does a full reindex
+ (re-embeds every file, wipes prior chunks/symbols/refs/
+ file_hashes). Use to recover from suspected drift.
+ responses:
+ "202":
+ description: Reindex enqueued (or already running — dedupe)
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/ReindexEnqueuedResponse"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "404":
+ $ref: "#/components/responses/NotFound"
+ "422":
+ $ref: "#/components/responses/Unprocessable"
+ "503":
+ $ref: "#/components/responses/WorkspacesDisabled"
+
+ /api/v1/projects/{hash}/force-stop:
+ parameters:
+ - name: hash
+ in: path
+ required: true
+ schema:
+ type: string
+ post:
+ operationId: forceStopIndex
+ tags: [projects]
+ summary: Force-stop an in-flight index for an external project
+ description: |
+ Hard-aborts the clone + index pipeline for an external
+ (GitHub-cloned) project. 422 when the project is local — local
+ projects index via the CLI and have no server-side pipeline to
+ stop (use the CLI's own cancel/Ctrl-C).
+
+ Two effects, both best-effort:
+ 1. Pending/running `clone_repo` + `index_repo` jobs for this
+ repo are deleted from the queue so the pipeline cannot
+ retry or resume.
+ 2. The active in-process index session (if any) is cancelled,
+ which unblocks the next `cix index/begin` and flips the
+ project status back to a terminal state.
+
+ Already-committed chunks/symbols are NOT rolled back (matches
+ the CLI's cancel semantics) — a subsequent sync/reindex
+ overwrites them.
+ responses:
+ "202":
+ description: Force-stop processed
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/ForceStopResponse"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "404":
+ $ref: "#/components/responses/NotFound"
+ "422":
+ $ref: "#/components/responses/Unprocessable"
+ "503":
+ $ref: "#/components/responses/WorkspacesDisabled"
+
+ /api/v1/webhooks/github/{hash}:
+ parameters:
+ - name: hash
+ in: path
+ required: true
+ schema:
+ type: string
+ description: Project's path_hash (16 hex chars).
+ post:
+ operationId: receiveGithubWebhook
+ tags: [projects]
+ summary: Receive a GitHub webhook delivery (public, HMAC-authenticated)
+ description: |
+ Public endpoint — auth is bypassed. The `X-Hub-Signature-256`
+ header must be HMAC-SHA256 of the request body keyed by the
+ project's `git_repos.webhook_secret`. Mismatched signatures
+ return 401; an unknown `hash` returns 404. On a valid `push`
+ for the tracked branch the server enqueues a `clone_repo` job
+ (dedupe collapses burst deliveries).
+
+ GitHub `ping` deliveries return 200 with no side effects so
+ the setup confirmation flow works.
+ security: []
+ parameters:
+ - name: X-Hub-Signature-256
+ in: header
+ required: false
+ schema:
+ type: string
+ description: HMAC-SHA256 over the body, hex-encoded with sha256= prefix.
+ - name: X-GitHub-Event
+ in: header
+ required: false
+ schema:
+ type: string
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ type: object
+ additionalProperties: true
+ responses:
+ "202":
+ description: Delivery accepted (enqueued or already in flight)
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/WebhookAccepted"
+ "200":
+ description: Ping or no-op delivery
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/WebhookAccepted"
+ "401":
+ description: HMAC signature mismatch
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+ "404":
+ description: Unknown project hash
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+ "503":
+ $ref: "#/components/responses/WorkspacesDisabled"
+
+ /api/v1/jobs:
+ get:
+ operationId: listJobs
+ tags: [workspaces]
+ summary: List background jobs (status / type filter)
+ parameters:
+ - name: status
+ in: query
+ required: false
+ schema:
+ type: string
+ enum: [pending, running, completed, failed]
+ - name: type
+ in: query
+ required: false
+ schema:
+ type: string
+ - name: limit
+ in: query
+ required: false
+ schema:
+ type: integer
+ minimum: 1
+ maximum: 500
+ default: 100
+ responses:
+ "200":
+ description: Job list
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/JobListResponse"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "503":
+ $ref: "#/components/responses/WorkspacesDisabled"
+
+ /api/v1/github-tokens:
+ get:
+ operationId: listGithubTokens
+ tags: [github-tokens]
+ summary: List stored GitHub PATs (metadata only)
+ description: |
+ Returns metadata for every stored token — name, scopes, timestamps.
+ Plaintext values are NEVER returned by this endpoint; the only time
+ plaintext is surfaced is the response to POST /api/v1/github-tokens.
+ responses:
+ "200":
+ description: Token list
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/GithubTokenListResponse"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "503":
+ $ref: "#/components/responses/WorkspacesDisabled"
+ post:
+ operationId: createGithubToken
+ tags: [github-tokens]
+ summary: Store a new GitHub PAT (encrypted-at-rest)
+ description: |
+ Accepts a plaintext token in the request body. The server encrypts
+ it with AES-GCM via internal/secrets and persists only the
+ ciphertext. The response carries metadata only — the plaintext is
+ already in the caller's hands. Scope validation against the GitHub
+ API is deferred to a later release; PR1 stores the supplied scopes
+ verbatim if any.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/CreateGithubTokenRequest"
+ responses:
+ "201":
+ description: Token stored
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/GithubToken"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "409":
+ $ref: "#/components/responses/Conflict"
+ "422":
+ $ref: "#/components/responses/Unprocessable"
+ "503":
+ $ref: "#/components/responses/WorkspacesDisabled"
+
+ /api/v1/github-tokens/{id}:
+ parameters:
+ - name: id
+ in: path
+ required: true
+ schema:
+ type: string
+ delete:
+ operationId: deleteGithubToken
+ tags: [github-tokens]
+ summary: Delete a stored GitHub PAT
+ description: |
+ Permanently removes the encrypted blob. Subsequent workspaces
+ operations that reference this token id will fail. The
+ git_repos.token_id FK uses ON DELETE SET NULL, so existing
+ rows survive token revocation but their re-clone / webhook
+ flows that need GitHub auth will fail until a token is
+ re-attached.
+ responses:
+ "204":
+ description: Deleted
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "404":
+ $ref: "#/components/responses/NotFound"
+ "503":
+ $ref: "#/components/responses/WorkspacesDisabled"
+
+ /api/v1/github-tokens/{id}/accounts:
+ parameters:
+ - name: id
+ in: path
+ required: true
+ schema:
+ type: string
+ get:
+ operationId: listTokenAccounts
+ tags: [github-tokens]
+ summary: List the GitHub accounts visible to a stored PAT
+ description: |
+ Returns the PAT owner's personal account plus every organisation
+ the PAT can see. The dashboard renders this as the first step of
+ the add-repo flow so the operator can drill into a specific
+ account before picking a repository — useful when /user/repos
+ doesn't surface every org repo (e.g. SAML-protected orgs only
+ appear under /orgs/{login}/repos).
+
+ The PAT plaintext never leaves the server; the dashboard only
+ addresses the token by id.
+ responses:
+ "200":
+ description: A list of accounts (1 user + 0..N orgs)
+ content:
+ application/json:
+ schema:
+ type: object
+ required: [accounts, total]
+ properties:
+ accounts:
+ type: array
+ items:
+ $ref: "#/components/schemas/GithubAccount"
+ total:
+ type: integer
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "404":
+ $ref: "#/components/responses/NotFound"
+ "422":
+ description: GitHub rejected the token
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+ "502":
+ description: Could not reach GitHub
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+ "503":
+ $ref: "#/components/responses/WorkspacesDisabled"
+
+ /api/v1/github-tokens/{id}/repos:
+ parameters:
+ - name: id
+ in: path
+ required: true
+ schema:
+ type: string
+ get:
+ operationId: listTokenRepos
+ tags: [github-tokens]
+ summary: List GitHub repositories visible to a stored PAT
+ description: |
+ Returns the repos the PAT can see, ordered by most recently
+ pushed. Used by the dashboard's add-repo flow to populate the
+ repo picker.
+
+ When `account` is omitted the response is the affiliations-
+ aggregated view (GET /user/repos) — every repo the PAT can
+ see as owner, collaborator, or organization member. When
+ `account` is set the server hits `/users/{login}/repos` for
+ a user account or `/orgs/{login}/repos` for an org, depending
+ on `account_type`. Use the account-scoped call when /user/repos
+ misses an org repo (typical for SAML-protected orgs).
+
+ The PAT plaintext never leaves the server; the dashboard only
+ addresses the token by id. Up to 500 repos are returned (5 pages
+ of 100). Larger affiliations should rely on client-side text
+ filtering or pick a more specific account.
+ parameters:
+ - name: q
+ in: query
+ required: false
+ description: Optional case-insensitive substring filter on full_name.
+ schema:
+ type: string
+ - name: account
+ in: query
+ required: false
+ description: |
+ Optional account login to scope the listing to. When set,
+ `account_type` must also be set.
+ schema:
+ type: string
+ - name: account_type
+ in: query
+ required: false
+ description: |
+ Required when `account` is set; ignored otherwise.
+ schema:
+ type: string
+ enum: [user, org]
+ responses:
+ "200":
+ description: A list of repositories
+ content:
+ application/json:
+ schema:
+ type: object
+ required: [repos, total]
+ properties:
+ repos:
+ type: array
+ items:
+ $ref: "#/components/schemas/GithubRepo"
+ total:
+ type: integer
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "404":
+ $ref: "#/components/responses/NotFound"
+ "422":
+ description: GitHub rejected the token
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+ "502":
+ description: Could not reach GitHub
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+ "503":
+ $ref: "#/components/responses/WorkspacesDisabled"
+
+ /api/v1/tunnels:
+ get:
+ operationId: listTunnels
+ tags: [tunnels]
+ summary: List tunnel providers and their status
+ description: |
+ Returns one entry per known provider (cloudflare, ngrok). The
+ active provider carries live status; others are reported as
+ unavailable / "coming soon".
+ responses:
+ "200":
+ description: Provider catalog
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/TunnelCatalog"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+
+ /api/v1/tunnels/config:
+ get:
+ operationId: getTunnelConfig
+ tags: [tunnels]
+ summary: Get the dashboard-managed tunnel configuration
+ description: |
+ Returns the persisted tunnel config (enable/mode/hostname). The
+ connector token is never returned — `token_set` reports whether one
+ is stored.
+ responses:
+ "200":
+ description: Current config
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/TunnelConfig"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "503":
+ $ref: "#/components/responses/WorkspacesDisabled"
+ put:
+ operationId: updateTunnelConfig
+ tags: [tunnels]
+ summary: Update tunnel configuration and apply it
+ description: |
+ Persists the config and immediately applies it: enabling starts the
+ tunnel, disabling stops it, changing mode/hostname/token restarts it.
+ Omit `token` to keep the existing one; send an empty string to clear.
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/TunnelConfigUpdate"
+ responses:
+ "200":
+ description: Applied; returns the resulting status
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/TunnelStatus"
+ "400":
+ description: Invalid configuration
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "503":
+ $ref: "#/components/responses/WorkspacesDisabled"
+
+ /api/v1/tunnels/binaries:
+ get:
+ operationId: listTunnelBinaries
+ tags: [tunnels]
+ summary: Agent-binary status per provider
+ description: |
+ Reports whether each provider's agent binary (cloudflared, ngrok) is
+ present on this server, its path/version, and whether the server can
+ install/update it from the UI (managed mode — set by the Docker
+ images). When not installed and not managed, the dashboard shows
+ manual install instructions (local runs only).
+ responses:
+ "200":
+ description: Per-provider binary status
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/TunnelBinaryList"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+
+ /api/v1/tunnels/binaries/{provider}/install:
+ post:
+ operationId: installTunnelBinary
+ tags: [tunnels]
+ summary: Download/install a provider's agent binary (managed mode)
+ description: |
+ Downloads the latest stable agent binary into the managed directory.
+ Only available when binary management is enabled
+ (CIX_TUNNEL_BIN_MANAGED=true); otherwise returns 409.
+ parameters:
+ - name: provider
+ in: path
+ required: true
+ schema:
+ type: string
+ enum: [cloudflare, ngrok]
+ responses:
+ "200":
+ description: Installed; returns the new binary status
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/TunnelBinaryList"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "409":
+ description: Binary management is disabled
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+ "500":
+ description: Download/install failed
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+
+ /api/v1/tunnels/binaries/{provider}/update:
+ post:
+ operationId: updateTunnelBinary
+ tags: [tunnels]
+ summary: Update a provider's agent binary to the latest (managed mode)
+ description: |
+ Re-downloads the latest stable agent binary, replacing the managed
+ copy. Restart the tunnel afterwards to run the new version. Only
+ available when CIX_TUNNEL_BIN_MANAGED=true.
+ parameters:
+ - name: provider
+ in: path
+ required: true
+ schema:
+ type: string
+ enum: [cloudflare, ngrok]
+ responses:
+ "200":
+ description: Updated; returns the new binary status
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/TunnelBinaryList"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "409":
+ description: Binary management is disabled
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+ "500":
+ description: Download/update failed
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+
+ /api/v1/tunnels/status:
+ get:
+ operationId: getTunnelStatus
+ tags: [tunnels]
+ summary: Active tunnel status snapshot
+ responses:
+ "200":
+ description: Active provider status (or disabled placeholder)
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/TunnelStatus"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+
+ /api/v1/tunnels/test:
+ post:
+ operationId: testTunnel
+ tags: [tunnels]
+ summary: End-to-end connectivity test through the tunnel
+ description: |
+ Issues a GET to the tunnel's public URL + `/health`; the request
+ egresses to the provider edge and routes back through the tunnel
+ to this server — a true round-trip, not just a liveness check.
+ responses:
+ "200":
+ description: Test result (ok=false when unreachable)
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/TunnelTestResult"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "409":
+ description: No active tunnel
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+
+ /api/v1/tunnels/restart:
+ post:
+ operationId: restartTunnel
+ tags: [tunnels]
+ summary: Restart the active tunnel subprocess
+ responses:
+ "200":
+ description: Restarted; returns the new status
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/TunnelStatus"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "409":
+ description: No active tunnel
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+ "500":
+ description: Restart failed
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+
+ /api/v1/github/webhooks/reconcile:
+ post:
+ operationId: reconcileWebhooks
+ tags: [tunnels]
+ summary: Re-register all webhook_mode=auto repos against the current public URL
+ description: |
+ Re-points every `webhook_mode=auto` repository's GitHub webhook at
+ the current public base URL (the live tunnel URL when present, else
+ CIX_PUBLIC_URL). Idempotent. Runs automatically on boot and on
+ tunnel URL changes; this endpoint triggers it manually.
+ responses:
+ "200":
+ description: Reconcile outcome
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/WebhookReconcileResult"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+ "503":
+ $ref: "#/components/responses/WorkspacesDisabled"
+
+ /api/v1/github/webhooks/origin:
+ get:
+ operationId: getWebhookOrigin
+ tags: [tunnels]
+ summary: Effective public origin for GitHub webhook delivery
+ description: |
+ Reports the public origin that webhook delivery URLs are built
+ against, and where it comes from: a live managed tunnel, the
+ operator-set CIX_PUBLIC_URL (i.e. the server is made public by
+ infrastructure — reverse proxy, ingress, static IP), or none.
+
+ A managed tunnel is OPTIONAL: when CIX_PUBLIC_URL is set, webhooks
+ work without any tunnel. The dashboard uses this to avoid presenting
+ "no tunnel" as a problem when the origin is provided by infra.
+ responses:
+ "200":
+ description: Effective webhook delivery origin
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/WebhookOrigin"
+ "401":
+ $ref: "#/components/responses/Unauthorized"
+
+components:
+ securitySchemes:
+ bearerAuth:
+ type: http
+ scheme: bearer
+ description: "API key passed as `Authorization: Bearer `"
+
+ parameters:
+ ProjectHash:
+ name: path
+ in: path
+ required: true
+ description: |
+ First 16 hex chars of `SHA1(host_path)`. See
+ `internal/projects.HashPath`.
+ schema:
+ type: string
+ pattern: "^[a-f0-9]{16}$"
+ example: "5b7d2c9e1a3f8042"
+
+ responses:
+ Unauthorized:
+ description: Missing or invalid API key
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+ NotFound:
+ description: Resource not found
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+ Unprocessable:
+ description: Malformed request body or missing required fields
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+ InternalError:
+ description: Unhandled server error
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+ IndexerUnavailable:
+ description: Indexing service not configured
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+ Forbidden:
+ description: Authenticated, but lacks the required role
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+ Conflict:
+ description: Resource already exists (e.g. email taken)
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+ WorkspacesDisabled:
+ description: |
+ Required service is not configured on this server (commonly an
+ encryption-key boot failure preventing github_tokens from
+ wiring, or a partial-test Deps in non-production contexts).
+ Check the server logs and restart.
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+
+ schemas:
+ Error:
+ type: object
+ required: [detail]
+ properties:
+ detail:
+ type: string
+
+ BootstrapStatusResponse:
+ type: object
+ required: [needs_bootstrap]
+ properties:
+ needs_bootstrap:
+ type: boolean
+ description: True when the users table is empty.
+
+ User:
+ type: object
+ required: [id, email, role, must_change_password, created_at, updated_at, disabled]
+ properties:
+ id:
+ type: string
+ email:
+ type: string
+ format: email
+ role:
+ type: string
+ enum: [admin, user]
+ must_change_password:
+ type: boolean
+ created_at:
+ type: string
+ format: date-time
+ updated_at:
+ type: string
+ format: date-time
+ disabled:
+ type: boolean
+ description: |
+ True when `disabled_at` is set. Disabled users cannot
+ authenticate via password OR API key.
+ disabled_at:
+ type: string
+ format: date-time
+ nullable: true
+
+ UserWithStats:
+ allOf:
+ - $ref: "#/components/schemas/User"
+ - type: object
+ required: [active_sessions_count, api_keys_count]
+ properties:
+ last_login_at:
+ type: string
+ format: date-time
+ nullable: true
+ description: |
+ Most recent session creation timestamp (RFC3339).
+ Null if the user has never logged in.
+ active_sessions_count:
+ type: integer
+ minimum: 0
+ description: Count of non-expired sessions for this user.
+ api_keys_count:
+ type: integer
+ minimum: 0
+ description: Count of non-revoked API keys owned by this user.
+
+ UserListResponse:
+ type: object
+ required: [users, total]
+ properties:
+ users:
+ type: array
+ items:
+ $ref: "#/components/schemas/UserWithStats"
+ total:
+ type: integer
+
+ LoginRequest:
+ type: object
+ required: [email, password]
+ properties:
+ email:
+ type: string
+ format: email
+ password:
+ type: string
+ minLength: 1
+
+ LoginResponse:
+ type: object
+ required: [user]
+ properties:
+ user:
+ $ref: "#/components/schemas/User"
+
+ MeResponse:
+ type: object
+ required: [user, auth_method, groups]
+ properties:
+ user:
+ $ref: "#/components/schemas/User"
+ auth_method:
+ type: string
+ enum: [session, api_key]
+ description: |
+ Tells the dashboard whether to surface "logout" (session) or
+ hide it (api_key access — there's nothing to log out of).
+ groups:
+ type: array
+ description: |
+ View-groups the caller belongs to. Lets the dashboard scope the
+ "share to group" picker without a second round-trip. Empty for a
+ user in no groups; admins still only list their own memberships
+ here (the full group list comes from GET /groups).
+ items:
+ $ref: "#/components/schemas/Group"
+
+ ChangePasswordRequest:
+ type: object
+ required: [current_password, new_password]
+ properties:
+ current_password:
+ type: string
+ minLength: 1
+ new_password:
+ type: string
+ minLength: 8
+ description: Minimum 8 characters. No upper bound.
+
+ CreateUserRequest:
+ type: object
+ required: [email, initial_password, role]
+ properties:
+ email:
+ type: string
+ format: email
+ initial_password:
+ type: string
+ minLength: 8
+ description: |
+ One-time password the new user must change on first login.
+ The admin shares this out-of-band.
+ role:
+ type: string
+ enum: [admin, user]
+
+ UpdateUserRequest:
+ type: object
+ properties:
role:
type: string
- enum: [admin, viewer]
- must_change_password:
- type: boolean
- created_at:
+ enum: [admin, user]
+ description: |
+ New role for the user. Refused for the last enabled admin
+ when set to `user`.
+ disabled:
+ type: boolean
+ description: |
+ When true, the user can no longer authenticate. Refused for
+ the last enabled admin when set to true.
+
+ RuntimeConfig:
+ type: object
+ required:
+ - embedding_model
+ - llama_ctx_size
+ - llama_n_gpu_layers
+ - llama_n_threads
+ - max_embedding_concurrency
+ - llama_batch_size
+ - source
+ properties:
+ embedding_model:
+ type: string
+ description: HF repo ID or absolute filesystem path to a .gguf file.
+ llama_ctx_size:
+ type: integer
+ minimum: 1
+ llama_n_gpu_layers:
+ type: integer
+ description: -1 = all layers (Metal/CUDA), 0 = CPU only.
+ llama_n_threads:
+ type: integer
+ minimum: 0
+ description: 0 = let llama-server auto-detect.
+ max_embedding_concurrency:
+ type: integer
+ minimum: 1
+ llama_batch_size:
+ type: integer
+ minimum: 1
+ source:
+ type: object
+ additionalProperties:
+ type: string
+ enum: [db, env, recommended]
+ description: |
+ Per-field origin label so the dashboard can render a "DB" /
+ "Env" / "Recommended" pill next to each value. Keys match the
+ other field names: `embedding_model`, `llama_ctx_size`, ...
+ recommended:
+ $ref: "#/components/schemas/RuntimeConfigRecommended"
+ updated_at:
+ type: string
+ format: date-time
+ nullable: true
+ description: When the runtime_settings row was last written, or null when only env/recommended are in effect.
+ updated_by:
+ type: string
+ nullable: true
+ description: Who issued the last PUT, captured from the active session.
+
+ RuntimeConfigRecommended:
+ type: object
+ required:
+ - embedding_model
+ - llama_ctx_size
+ - llama_n_gpu_layers
+ - llama_n_threads
+ - max_embedding_concurrency
+ - llama_batch_size
+ properties:
+ embedding_model: { type: string }
+ llama_ctx_size: { type: integer }
+ llama_n_gpu_layers: { type: integer }
+ llama_n_threads: { type: integer }
+ max_embedding_concurrency: { type: integer }
+ llama_batch_size: { type: integer }
+
+ RuntimeConfigUpdate:
+ type: object
+ description: |
+ All fields optional. Send a value to set/replace the override for
+ that field, send `""` (string fields) or `0` (numeric fields) to
+ CLEAR the override (next read falls back to env / recommended).
+ Omitted fields keep their current value.
+ properties:
+ embedding_model:
+ type: string
+ nullable: true
+ llama_ctx_size:
+ type: integer
+ nullable: true
+ llama_n_gpu_layers:
+ type: integer
+ nullable: true
+ llama_n_threads:
+ type: integer
+ nullable: true
+ max_embedding_concurrency:
+ type: integer
+ nullable: true
+ llama_batch_size:
+ type: integer
+ nullable: true
+
+ SidecarStatus:
+ type: object
+ required: [state, ready, in_flight]
+ properties:
+ state:
+ type: string
+ enum: [running, starting, restarting, failed, disabled]
+ pid:
+ type: integer
+ minimum: 0
+ description: 0 when no child process is alive (failed / disabled).
+ uptime_seconds:
+ type: integer
+ minimum: 0
+ model:
+ type: string
+ ready:
+ type: boolean
+ last_error:
+ type: string
+ in_flight:
+ type: integer
+ minimum: 0
+ description: Embedding queue depth at the moment of sampling.
+ restart_in_flight:
+ type: boolean
+ description: True between accept of POST /sidecar/restart and respawn completion.
+
+ RestartAccepted:
+ type: object
+ required: [restart_id]
+ properties:
+ restart_id:
+ type: string
+ description: Opaque ID; future versions may expose per-restart progress under this id.
+
+ ModelEntry:
+ type: object
+ required: [id, path, size_bytes]
+ properties:
+ id:
+ type: string
+ description: HF repo ID derived from the cache directory name (e.g. owner/model).
+ path:
+ type: string
+ description: Absolute path to the .gguf file on disk.
+ size_bytes:
+ type: integer
+ format: int64
+ minimum: 0
+
+ ModelList:
+ type: object
+ required: [models, cache_dir]
+ properties:
+ models:
+ type: array
+ items:
+ $ref: "#/components/schemas/ModelEntry"
+ cache_dir:
+ type: string
+ description: The CIX_GGUF_CACHE_DIR that was scanned. Empty list with non-empty cache_dir = no .gguf files found.
+
+ Session:
+ type: object
+ required: [id, created_at, expires_at, last_seen_at, is_current]
+ properties:
+ id:
+ type: string
+ created_at:
+ type: string
+ format: date-time
+ expires_at:
+ type: string
+ format: date-time
+ last_seen_at:
+ type: string
+ format: date-time
+ last_seen_ip:
+ type: string
+ nullable: true
+ last_seen_ua:
+ type: string
+ nullable: true
+ is_current:
+ type: boolean
+ description: True for the session carrying this request.
+
+ SessionListResponse:
+ type: object
+ required: [sessions, total]
+ properties:
+ sessions:
+ type: array
+ items:
+ $ref: "#/components/schemas/Session"
+ total:
+ type: integer
+
+ ApiKey:
+ type: object
+ required: [id, owner_user_id, name, prefix, created_at, revoked]
+ properties:
+ id:
+ type: string
+ owner_user_id:
+ type: string
+ name:
+ type: string
+ prefix:
+ type: string
+ description: |
+ Display-only prefix of the full key (e.g. `cix_a1b2c3d4`).
+ Long enough to recognise in lists, short enough that it
+ cannot reconstruct the original.
+ created_at:
+ type: string
+ format: date-time
+ last_used_at:
+ type: string
+ format: date-time
+ nullable: true
+ last_used_ip:
+ type: string
+ nullable: true
+ last_used_ua:
+ type: string
+ nullable: true
+ revoked:
+ type: boolean
+ revoked_at:
+ type: string
+ format: date-time
+ nullable: true
+
+ ApiKeyListResponse:
+ type: object
+ required: [api_keys, total]
+ properties:
+ api_keys:
+ type: array
+ items:
+ $ref: "#/components/schemas/ApiKey"
+ total:
+ type: integer
+
+ CreateApiKeyRequest:
+ type: object
+ required: [name]
+ properties:
+ name:
+ type: string
+ minLength: 1
+ description: |
+ Human-friendly label shown in the dashboard. The full key
+ value is generated server-side and returned exactly once.
+
+ ApiKeyCreated:
+ type: object
+ required: [api_key, full_key]
+ properties:
+ api_key:
+ $ref: "#/components/schemas/ApiKey"
+ full_key:
+ type: string
+ description: |
+ The plaintext key value. **Returned exactly once.** Store it
+ securely — there is no way to retrieve it later.
+
+ HealthResponse:
+ type: object
+ required: [status]
+ properties:
+ status:
+ type: string
+ enum: [ok, unhealthy]
+ reason:
+ type: string
+ description: Set only when `status` is `unhealthy`.
+
+ StatusResponse:
+ type: object
+ required:
+ - status
+ - backend
+ - server_version
+ - api_version
+ - model_loaded
+ - embedding_model
+ - projects
+ - active_indexing_jobs
+ properties:
+ status:
+ type: string
+ enum: [ok]
+ backend:
+ type: string
+ description: Backend identifier (e.g. `go`).
+ server_version:
+ type: string
+ api_version:
+ type: string
+ example: v1
+ model_loaded:
+ type: boolean
+ description: |
+ Whether the llama-server sidecar reports ready within 500 ms.
+ False when the sidecar is starting or has crashed.
+ embedding_model:
+ type: string
+ description: Hugging Face model id (e.g. `awhiteside/CodeRankEmbed-Q8_0-GGUF`).
+ projects:
+ type: integer
+ minimum: 0
+ description: Total registered projects.
+ active_indexing_jobs:
+ type: integer
+ minimum: 0
+ description: Currently-running `index_runs` rows.
+ update_available:
+ type: boolean
+ description: |
+ True when the version-check service has found a `server/v*`
+ release on GitHub strictly newer than the running server.
+ Field is omitted entirely when version-check is not wired
+ (set `CIX_VERSION_CHECK_ENABLED=false` to disable polling).
+ latest_version:
+ type: string
+ nullable: true
+ description: |
+ Latest released server version (without the `server/v` prefix,
+ e.g. `0.5.1`). Null until the first successful poll completes.
+ release_url:
+ type: string
+ nullable: true
+ description: GitHub release page URL for `latest_version`. Null when unknown.
+ version_check:
+ $ref: "#/components/schemas/VersionCheckStatus"
+
+ VersionCheckStatus:
+ type: object
+ required: [enabled]
+ properties:
+ enabled:
+ type: boolean
+ description: Whether the periodic GitHub poll is running.
+ checked_at:
+ type: string
+ format: date-time
+ nullable: true
+ description: Last poll timestamp (UTC, RFC 3339). Null before the first poll.
+ error:
+ type: string
+ nullable: true
+ description: Last error message, if the most recent poll failed. Null on success.
+
+ ProjectSettings:
+ type: object
+ required: [exclude_patterns, max_file_size]
+ properties:
+ exclude_patterns:
+ type: array
+ items: { type: string }
+ max_file_size:
+ type: integer
+ minimum: 0
+
+ ProjectStats:
+ type: object
+ required: [total_files, indexed_files, total_chunks, total_symbols]
+ properties:
+ total_files:
+ type: integer
+ minimum: 0
+ indexed_files:
+ type: integer
+ minimum: 0
+ total_chunks:
+ type: integer
+ minimum: 0
+ total_symbols:
+ type: integer
+ minimum: 0
+
+ Project:
+ type: object
+ required:
+ - path_hash
+ - host_path
+ - container_path
+ - languages
+ - settings
+ - stats
+ - status
+ - created_at
+ - updated_at
+ - last_indexed_at
+ # owner_user_id is required-but-nullable so external (ownerless)
+ # projects serialize as `"owner_user_id": null` rather than the
+ # field being absent. Lets TS clients use `string | null` instead
+ # of `string | undefined`.
+ - owner_user_id
+ properties:
+ path_hash:
+ type: string
+ pattern: "^[a-f0-9]{16}$"
+ description: First 16 hex chars of SHA1(host_path) — stable URL identifier.
+ host_path:
+ type: string
+ description: Absolute filesystem path on the operator's machine.
+ container_path:
+ type: string
+ description: Path inside the container (often equal to host_path).
+ languages:
+ type: array
+ items: { type: string }
+ settings:
+ $ref: "#/components/schemas/ProjectSettings"
+ stats:
+ $ref: "#/components/schemas/ProjectStats"
+ status:
+ type: string
+ enum: [created, indexing, indexed, error]
+ created_at:
+ type: string
+ format: date-time
+ updated_at:
+ type: string
+ format: date-time
+ last_indexed_at:
+ type: string
+ format: date-time
+ nullable: true
+ indexed_with_model:
+ type: string
+ nullable: true
+ description: |
+ Embedding model identifier active when this project was last
+ (re)indexed. NULL on rows that pre-date drift tracking — the
+ dashboard treats NULL as "Unknown" rather than as drift.
+ owner_user_id:
+ type: string
+ nullable: true
+ description: |
+ User who owns this personal (locally indexed) project. NULL means
+ ownerless — the canonical state for EXTERNAL projects (those with a
+ git_repos peer), which are admin-administered and reachable only via
+ a view-group share.
+ display_path:
type: string
- format: date-time
- updated_at:
+ description: |
+ Human-readable path. The real filesystem path for local projects,
+ the github path for external ones. host_path is the identity key
+ (namespaced per machine for locals) — clients should show this.
+ machine_id:
+ type: string
+ nullable: true
+ description: Per-machine UUID a local project was indexed on. NULL for external/legacy.
+ machine_label:
+ type: string
+ nullable: true
+ description: os.Hostname() of the indexing machine — display only.
+ sqlite_path:
+ type: string
+ nullable: true
+ description: Resolved SQLite database path for the active model. NULL on dashboards that don't expose storage info.
+ chroma_path:
+ type: string
+ nullable: true
+ description: Resolved chromem-go collection directory for this project. NULL when not computed.
+ sqlite_size_bytes:
+ type: integer
+ format: int64
+ nullable: true
+ minimum: 0
+ chroma_size_bytes:
+ type: integer
+ format: int64
+ nullable: true
+ minimum: 0
+
+ ProjectListResponse:
+ type: object
+ required: [projects, total]
+ properties:
+ projects:
+ type: array
+ items:
+ $ref: "#/components/schemas/Project"
+ total:
+ type: integer
+ minimum: 0
+
+ CreateProjectRequest:
+ type: object
+ required: [host_path]
+ properties:
+ host_path:
+ type: string
+ description: The real filesystem path being registered (becomes display_path).
+ machine_id:
+ type: string
+ description: |
+ Per-machine UUID supplied by the CLI (from ~/.cix/machine_id). When
+ present the project is LOCAL and its identity key is namespaced
+ local:{machine_id}:{host_path} so the same path on different
+ machines/users does not collide. Omit for external repos.
+ machine_label:
+ type: string
+ description: os.Hostname() of the indexing machine — display only.
+
+ UpdateProjectRequest:
+ type: object
+ properties:
+ settings:
+ $ref: "#/components/schemas/ProjectSettings"
+
+ DirEntry:
+ type: object
+ required: [path, file_count]
+ properties:
+ path:
+ type: string
+ file_count:
+ type: integer
+ minimum: 0
+
+ SymbolEntry:
+ type: object
+ required: [name, kind, file_path, language]
+ properties:
+ name:
+ type: string
+ kind:
+ type: string
+ file_path:
+ type: string
+ language:
+ type: string
+
+ ProjectSummary:
+ type: object
+ required:
+ - path_hash
+ - host_path
+ - status
+ - languages
+ - total_files
+ - total_chunks
+ - total_symbols
+ - top_directories
+ - recent_symbols
+ properties:
+ path_hash:
+ type: string
+ pattern: "^[a-f0-9]{16}$"
+ description: First 16 hex chars of SHA1(host_path) — stable URL identifier.
+ host_path:
+ type: string
+ status:
+ type: string
+ languages:
+ type: array
+ items: { type: string }
+ total_files:
+ type: integer
+ minimum: 0
+ total_chunks:
+ type: integer
+ minimum: 0
+ total_symbols:
+ type: integer
+ minimum: 0
+ top_directories:
+ type: array
+ items:
+ $ref: "#/components/schemas/DirEntry"
+ recent_symbols:
+ type: array
+ items:
+ $ref: "#/components/schemas/SymbolEntry"
+
+ SymbolSearchRequest:
+ type: object
+ required: [query]
+ properties:
+ query:
+ type: string
+ minLength: 1
+ kinds:
+ type: array
+ items: { type: string }
+ limit:
+ type: integer
+ minimum: 0
+ default: 20
+
+ SymbolResultItem:
+ type: object
+ required: [name, kind, file_path, line, end_line, language]
+ properties:
+ name: { type: string }
+ kind: { type: string }
+ file_path: { type: string }
+ line: { type: integer }
+ end_line: { type: integer }
+ language: { type: string }
+ signature: { type: string }
+ parent_name: { type: string }
+
+ SymbolSearchResponse:
+ type: object
+ required: [results, total]
+ properties:
+ results:
+ type: array
+ items:
+ $ref: "#/components/schemas/SymbolResultItem"
+ total:
+ type: integer
+ minimum: 0
+
+ DefinitionRequest:
+ type: object
+ required: [symbol]
+ properties:
+ symbol:
+ type: string
+ minLength: 1
+ kind:
+ type: string
+ file_path:
+ type: string
+ limit:
+ type: integer
+ minimum: 0
+ default: 10
+
+ DefinitionItem:
+ type: object
+ required: [name, kind, file_path, line, end_line, language]
+ properties:
+ name: { type: string }
+ kind: { type: string }
+ file_path: { type: string }
+ line: { type: integer }
+ end_line: { type: integer }
+ language: { type: string }
+ signature: { type: string }
+ parent_name: { type: string }
+
+ DefinitionResponse:
+ type: object
+ required: [results, total]
+ properties:
+ results:
+ type: array
+ items:
+ $ref: "#/components/schemas/DefinitionItem"
+ total:
+ type: integer
+ minimum: 0
+
+ ReferenceRequest:
+ type: object
+ required: [symbol]
+ properties:
+ symbol:
+ type: string
+ minLength: 1
+ limit:
+ type: integer
+ minimum: 0
+ default: 50
+ file_path:
+ type: string
+
+ ReferenceItem:
+ type: object
+ required:
+ - file_path
+ - start_line
+ - end_line
+ - content
+ - chunk_type
+ - symbol_name
+ - language
+ properties:
+ file_path: { type: string }
+ start_line: { type: integer }
+ end_line:
+ type: integer
+ description: Always equal to `start_line` (refs table stores tokens, not ranges).
+ content:
+ type: string
+ description: Always empty — see endpoint description.
+ chunk_type:
+ type: string
+ enum: [reference]
+ symbol_name: { type: string }
+ language: { type: string }
+
+ ReferenceResponse:
+ type: object
+ required: [results, total]
+ properties:
+ results:
+ type: array
+ items:
+ $ref: "#/components/schemas/ReferenceItem"
+ total:
+ type: integer
+ minimum: 0
+
+ FileSearchRequest:
+ type: object
+ required: [query]
+ properties:
+ query:
type: string
- format: date-time
- disabled:
- type: boolean
- description: |
- True when `disabled_at` is set. Disabled users cannot
- authenticate via password OR API key.
- disabled_at:
+ minLength: 1
+ description: Substring matched against `file_path`.
+ limit:
+ type: integer
+ minimum: 0
+ default: 20
+
+ FileResultItem:
+ type: object
+ required: [file_path, language]
+ properties:
+ file_path: { type: string }
+ language:
type: string
- format: date-time
nullable: true
+ description: Detected language, or null if undetected.
- UserWithStats:
- allOf:
- - $ref: "#/components/schemas/User"
- - type: object
- required: [active_sessions_count, api_keys_count]
- properties:
- last_login_at:
- type: string
- format: date-time
- nullable: true
- description: |
- Most recent session creation timestamp (RFC3339).
- Null if the user has never logged in.
- active_sessions_count:
- type: integer
- minimum: 0
- description: Count of non-expired sessions for this user.
- api_keys_count:
- type: integer
- minimum: 0
- description: Count of non-revoked API keys owned by this user.
-
- UserListResponse:
+ FileSearchResponse:
type: object
- required: [users, total]
+ required: [results, total]
properties:
- users:
+ results:
type: array
items:
- $ref: "#/components/schemas/UserWithStats"
+ $ref: "#/components/schemas/FileResultItem"
total:
type: integer
+ minimum: 0
- LoginRequest:
+ SemanticSearchRequest:
type: object
- required: [email, password]
+ required: [query]
properties:
- email:
- type: string
- format: email
- password:
+ query:
type: string
minLength: 1
+ limit:
+ type: integer
+ minimum: 0
+ default: 10
+ description: Maximum number of FILE groups (not chunks) to return.
+ languages:
+ type: array
+ items: { type: string }
+ paths:
+ type: array
+ items: { type: string }
+ description: Whitelist — keep only results whose path matches any prefix or substring.
+ excludes:
+ type: array
+ items: { type: string }
+ description: Blacklist — drop results whose path matches any prefix or substring.
+ min_score:
+ type: number
+ format: float
+ description: |
+ Minimum cosine similarity. Omit for server default (0.2 —
+ light floor that keeps abstract NL queries non-empty). Send
+ `0` to disable; pass `0.4+` for strict code-symbol searches
+ calibrated for CodeRankEmbed-Q8.
- LoginResponse:
+ NestedHit:
type: object
- required: [user]
+ required: [start_line, end_line, chunk_type, score]
properties:
- user:
- $ref: "#/components/schemas/User"
+ start_line: { type: integer }
+ end_line: { type: integer }
+ symbol_name: { type: string }
+ chunk_type: { type: string }
+ score:
+ type: number
+ format: float
- MeResponse:
+ FileMatch:
type: object
- required: [user, auth_method]
+ required: [start_line, end_line, content, score, chunk_type]
properties:
- user:
- $ref: "#/components/schemas/User"
- auth_method:
- type: string
- enum: [session, api_key]
- description: |
- Tells the dashboard whether to surface "logout" (session) or
- hide it (api_key access — there's nothing to log out of).
+ start_line: { type: integer }
+ end_line: { type: integer }
+ content: { type: string }
+ score:
+ type: number
+ format: float
+ chunk_type: { type: string }
+ symbol_name: { type: string }
+ nested_hits:
+ type: array
+ items:
+ $ref: "#/components/schemas/NestedHit"
- ChangePasswordRequest:
+ FileGroupResult:
type: object
- required: [current_password, new_password]
+ required: [file_path, best_score, matches]
properties:
- current_password:
- type: string
- minLength: 1
- new_password:
- type: string
- minLength: 8
- description: Minimum 8 characters. No upper bound.
+ file_path: { type: string }
+ language: { type: string }
+ best_score:
+ type: number
+ format: float
+ matches:
+ type: array
+ items:
+ $ref: "#/components/schemas/FileMatch"
- CreateUserRequest:
+ SemanticSearchResponse:
type: object
- required: [email, initial_password, role]
+ required: [results, total, query_time_ms]
properties:
- email:
- type: string
- format: email
- initial_password:
- type: string
- minLength: 8
- description: |
- One-time password the new user must change on first login.
- The admin shares this out-of-band.
- role:
- type: string
- enum: [admin, viewer]
+ results:
+ type: array
+ items:
+ $ref: "#/components/schemas/FileGroupResult"
+ total:
+ type: integer
+ minimum: 0
+ query_time_ms:
+ type: number
+ format: double
+ description: Wall-clock query latency, rounded to 1 decimal place.
- UpdateUserRequest:
+ IndexBeginRequest:
type: object
properties:
- role:
- type: string
- enum: [admin, viewer]
- description: |
- New role for the user. Refused for the last enabled admin
- when set to `viewer`.
- disabled:
+ full:
type: boolean
- description: |
- When true, the user can no longer authenticate. Refused for
- the last enabled admin when set to true.
+ default: false
+ description: When true, wipes existing project state before opening the session.
- RuntimeConfig:
+ IndexBeginResponse:
type: object
- required:
- - embedding_model
- - llama_ctx_size
- - llama_n_gpu_layers
- - llama_n_threads
- - max_embedding_concurrency
- - llama_batch_size
- - source
+ required: [run_id, stored_hashes]
properties:
- embedding_model:
+ run_id:
type: string
- description: HF repo ID or absolute filesystem path to a .gguf file.
- llama_ctx_size:
- type: integer
- minimum: 1
- llama_n_gpu_layers:
- type: integer
- description: -1 = all layers (Metal/CUDA), 0 = CPU only.
- llama_n_threads:
- type: integer
- minimum: 0
- description: 0 = let llama-server auto-detect.
- max_embedding_concurrency:
- type: integer
- minimum: 1
- llama_batch_size:
- type: integer
- minimum: 1
- source:
+ stored_hashes:
type: object
additionalProperties:
type: string
- enum: [db, env, recommended]
description: |
- Per-field origin label so the dashboard can render a "DB" /
- "Env" / "Recommended" pill next to each value. Keys match the
- other field names: `embedding_model`, `llama_ctx_size`, ...
- recommended:
- $ref: "#/components/schemas/RuntimeConfigRecommended"
- updated_at:
- type: string
- format: date-time
- nullable: true
- description: When the runtime_settings row was last written, or null when only env/recommended are in effect.
- updated_by:
- type: string
- nullable: true
- description: Who issued the last PUT, captured from the active session.
+ Map from file path → SHA-256 of currently-stored content. Empty
+ when the project has never been indexed (or `full:true` was passed).
- RuntimeConfigRecommended:
+ FilePayload:
type: object
- required:
- - embedding_model
- - llama_ctx_size
- - llama_n_gpu_layers
- - llama_n_threads
- - max_embedding_concurrency
- - llama_batch_size
+ required: [path, content, content_hash, size]
properties:
- embedding_model: { type: string }
- llama_ctx_size: { type: integer }
- llama_n_gpu_layers: { type: integer }
- llama_n_threads: { type: integer }
- max_embedding_concurrency: { type: integer }
- llama_batch_size: { type: integer }
+ path:
+ type: string
+ content:
+ type: string
+ description: UTF-8 text. Binary files should not be submitted.
+ content_hash:
+ type: string
+ description: SHA-256 hex digest of `content`.
+ language:
+ type: string
+ size:
+ type: integer
+ minimum: 0
- RuntimeConfigUpdate:
+ IndexFilesRequest:
type: object
- description: |
- All fields optional. Send a value to set/replace the override for
- that field, send `""` (string fields) or `0` (numeric fields) to
- CLEAR the override (next read falls back to env / recommended).
- Omitted fields keep their current value.
+ required: [run_id, files]
properties:
- embedding_model:
+ run_id:
type: string
- nullable: true
- llama_ctx_size:
- type: integer
- nullable: true
- llama_n_gpu_layers:
+ minLength: 1
+ files:
+ type: array
+ maxItems: 50
+ items:
+ $ref: "#/components/schemas/FilePayload"
+
+ IndexFilesResponse:
+ type: object
+ required: [files_accepted, chunks_created, files_processed_total]
+ properties:
+ files_accepted:
type: integer
- nullable: true
- llama_n_threads:
+ minimum: 0
+ chunks_created:
type: integer
- nullable: true
- max_embedding_concurrency:
+ minimum: 0
+ files_processed_total:
type: integer
- nullable: true
- llama_batch_size:
+ minimum: 0
+
+ IndexFinishRequest:
+ type: object
+ required: [run_id]
+ properties:
+ run_id:
+ type: string
+ minLength: 1
+ deleted_paths:
+ type: array
+ items: { type: string }
+ total_files_discovered:
type: integer
- nullable: true
+ minimum: 0
- SidecarStatus:
+ IndexFinishResponse:
type: object
- required: [state, ready, in_flight]
+ required: [status, files_processed, chunks_created]
properties:
- state:
+ status:
type: string
- enum: [running, starting, restarting, failed, disabled]
- pid:
+ enum: [completed]
+ files_processed:
type: integer
minimum: 0
- description: 0 when no child process is alive (failed / disabled).
- uptime_seconds:
+ chunks_created:
type: integer
minimum: 0
- model:
- type: string
- ready:
+
+ IndexCancelResponse:
+ type: object
+ required: [cancelled]
+ properties:
+ cancelled:
type: boolean
- last_error:
+
+ IndexProgressInfo:
+ type: object
+ description: |
+ Progress payload. The active-session variant carries every field;
+ the historical-fallback variant only carries `files_processed`,
+ `files_total`, and `chunks_created`.
+ properties:
+ phase:
type: string
- in_flight:
+ enum: [receiving, completed]
+ files_discovered:
type: integer
minimum: 0
- description: Embedding queue depth at the moment of sampling.
- restart_in_flight:
- type: boolean
- description: True between accept of POST /sidecar/restart and respawn completion.
+ files_processed:
+ type: integer
+ minimum: 0
+ files_total:
+ type: integer
+ minimum: 0
+ chunks_created:
+ type: integer
+ minimum: 0
+ elapsed_seconds:
+ type: number
+ format: double
+ run_id:
+ type: string
+ current_files:
+ type: array
+ items:
+ type: string
+ description: |
+ Most recent files being indexed, newest first; up to 3. Lets a UI
+ show live forward motion during a run. Empty on the
+ historical-fallback variant.
- RestartAccepted:
+ IndexProgressResponse:
type: object
- required: [restart_id]
+ required: [status]
properties:
- restart_id:
+ status:
type: string
- description: Opaque ID; future versions may expose per-restart progress under this id.
+ enum: [idle, indexing, completed, cancelled, failed, running]
+ description: |
+ `idle` — no session ever / fallback unavailable.
+ `indexing` — session active.
+ `completed`/`cancelled`/`failed`/`running` — last-run status from `index_runs`.
+ progress:
+ $ref: "#/components/schemas/IndexProgressInfo"
- ModelEntry:
+ IndexProgressEvent:
type: object
- required: [id, path, size_bytes]
+ description: |
+ One event in the NDJSON stream emitted by `POST /index/files` when
+ the client sends `Accept: application/x-ndjson`. The `event` field
+ discriminates the variant; other fields are populated as relevant.
+ required: [event]
properties:
- id:
+ event:
+ type: string
+ enum:
+ - file_started
+ - file_chunked
+ - file_embedded
+ - file_done
+ - file_error
+ - heartbeat
+ - batch_done
+ - error
+ run_id:
type: string
- description: HF repo ID derived from the cache directory name (e.g. owner/model).
path:
type: string
- description: Absolute path to the .gguf file on disk.
- size_bytes:
+ file_index:
+ type: integer
+ batch_size:
+ type: integer
+ chunks:
+ type: integer
+ embed_ms:
type: integer
format: int64
- minimum: 0
-
- ModelList:
- type: object
- required: [models, cache_dir]
- properties:
- models:
- type: array
- items:
- $ref: "#/components/schemas/ModelEntry"
- cache_dir:
+ ts:
type: string
- description: The CIX_GGUF_CACHE_DIR that was scanned. Empty list with non-empty cache_dir = no .gguf files found.
+ format: date-time
+ message:
+ type: string
+ fatal:
+ type: boolean
+ files_accepted:
+ type: integer
+ chunks_created:
+ type: integer
+ files_processed_total:
+ type: integer
- Session:
+ Workspace:
type: object
- required: [id, created_at, expires_at, last_seen_at, is_current]
+ # owner_user_id is required-but-nullable for the same reason as on
+ # Project — clients can rely on the field being present and use
+ # `string | null` instead of optional.
+ required: [id, name, description, created_at, updated_at, owner_user_id]
properties:
id:
type: string
- created_at:
+ description: ULID-like opaque identifier.
+ name:
type: string
- format: date-time
- expires_at:
+ description: Unique workspace name.
+ description:
type: string
- format: date-time
- last_seen_at:
+ description: Free-form description. Empty string when absent.
+ created_at:
type: string
format: date-time
- last_seen_ip:
+ updated_at:
type: string
- nullable: true
- last_seen_ua:
+ format: date-time
+ owner_user_id:
type: string
nullable: true
- is_current:
- type: boolean
- description: True for the session carrying this request.
+ description: |
+ User who created the workspace. NULL only when orphaned by a user
+ deletion. Visible to the owner, members of any view-group it is
+ shared to, and admins.
- SessionListResponse:
+ WorkspaceListResponse:
type: object
- required: [sessions, total]
+ required: [workspaces, total]
properties:
- sessions:
+ workspaces:
type: array
items:
- $ref: "#/components/schemas/Session"
+ $ref: "#/components/schemas/Workspace"
total:
type: integer
- ApiKey:
+ CreateWorkspaceRequest:
type: object
- required: [id, owner_user_id, name, prefix, created_at, revoked]
+ required: [name]
properties:
- id:
+ name:
type: string
- owner_user_id:
+ minLength: 1
+ description:
type: string
+ description: Optional free-form description.
+
+ UpdateWorkspaceRequest:
+ type: object
+ description: |
+ Both fields are optional — omitting a field leaves the existing
+ value unchanged. Passing an empty string for `description` clears
+ it. `name` must be non-empty when provided.
+ properties:
name:
type: string
- prefix:
+ minLength: 1
+ description:
type: string
- description: |
- Display-only prefix of the full key (e.g. `cix_a1b2c3d4`).
- Long enough to recognise in lists, short enough that it
- cannot reconstruct the original.
+
+ Group:
+ type: object
+ required: [id, name, description, created_at, updated_at]
+ properties:
+ id:
+ type: string
+ name:
+ type: string
+ description:
+ type: string
+ description: Free-form description. Empty string when absent.
created_at:
type: string
format: date-time
- last_used_at:
+ updated_at:
type: string
format: date-time
- nullable: true
- last_used_ip:
+
+ GroupMember:
+ type: object
+ required: [user_id, email, role, added_at]
+ properties:
+ user_id:
type: string
- nullable: true
- last_used_ua:
+ email:
type: string
- nullable: true
- revoked:
- type: boolean
- revoked_at:
+ format: email
+ role:
+ type: string
+ enum: [admin, user]
+ added_at:
type: string
format: date-time
- nullable: true
- ApiKeyListResponse:
+ GroupListResponse:
+ type: object
+ required: [groups, total]
+ properties:
+ groups:
+ type: array
+ items:
+ $ref: "#/components/schemas/Group"
+ total:
+ type: integer
+
+ GroupMemberListResponse:
type: object
- required: [api_keys, total]
+ required: [members, total]
properties:
- api_keys:
+ members:
type: array
items:
- $ref: "#/components/schemas/ApiKey"
+ $ref: "#/components/schemas/GroupMember"
total:
type: integer
- CreateApiKeyRequest:
+ CreateGroupRequest:
type: object
required: [name]
properties:
name:
type: string
minLength: 1
- description: |
- Human-friendly label shown in the dashboard. The full key
- value is generated server-side and returned exactly once.
+ description:
+ type: string
- ApiKeyCreated:
+ UpdateGroupRequest:
type: object
- required: [api_key, full_key]
properties:
- api_key:
- $ref: "#/components/schemas/ApiKey"
- full_key:
+ name:
+ type: string
+ minLength: 1
+ description:
type: string
- description: |
- The plaintext key value. **Returned exactly once.** Store it
- securely — there is no way to retrieve it later.
- HealthResponse:
+ AddGroupMemberRequest:
type: object
- required: [status]
+ required: [user_id]
properties:
- status:
- type: string
- enum: [ok, unhealthy]
- reason:
+ user_id:
type: string
- description: Set only when `status` is `unhealthy`.
- StatusResponse:
+ ShareToGroupRequest:
type: object
- required:
- - status
- - backend
- - server_version
- - api_version
- - model_loaded
- - embedding_model
- - projects
- - active_indexing_jobs
+ required: [group_id]
properties:
- status:
- type: string
- enum: [ok]
- backend:
- type: string
- description: Backend identifier (e.g. `go`).
- server_version:
- type: string
- api_version:
- type: string
- example: v1
- model_loaded:
- type: boolean
- description: |
- Whether the llama-server sidecar reports ready within 500 ms.
- False when the sidecar is starting or has crashed.
- embedding_model:
- type: string
- description: Hugging Face model id (e.g. `awhiteside/CodeRankEmbed-Q8_0-GGUF`).
- projects:
- type: integer
- minimum: 0
- description: Total registered projects.
- active_indexing_jobs:
- type: integer
- minimum: 0
- description: Currently-running `index_runs` rows.
- update_available:
- type: boolean
- description: |
- True when the version-check service has found a `server/v*`
- release on GitHub strictly newer than the running server.
- Field is omitted entirely when version-check is not wired
- (set `CIX_VERSION_CHECK_ENABLED=false` to disable polling).
- latest_version:
+ group_id:
type: string
- nullable: true
- description: |
- Latest released server version (without the `server/v` prefix,
- e.g. `0.5.1`). Null until the first successful poll completes.
- release_url:
+
+ GroupIdListResponse:
+ type: object
+ required: [group_ids]
+ properties:
+ group_ids:
+ type: array
+ items:
+ type: string
+
+ ReassignOwnerRequest:
+ type: object
+ required: [owner_user_id]
+ properties:
+ owner_user_id:
type: string
- nullable: true
- description: GitHub release page URL for `latest_version`. Null when unknown.
- version_check:
- $ref: "#/components/schemas/VersionCheckStatus"
+ description: New owner; must be an existing, enabled user.
- VersionCheckStatus:
+ GithubToken:
type: object
- required: [enabled]
+ required: [id, name, scopes, created_at]
properties:
- enabled:
- type: boolean
- description: Whether the periodic GitHub poll is running.
- checked_at:
+ id:
+ type: string
+ name:
+ type: string
+ scopes:
+ type: array
+ items:
+ type: string
+ description: |
+ Best-effort scope list. PR1 stores whatever the client supplies;
+ later releases populate this by calling GitHub's /user endpoint
+ with the plaintext token.
+ created_at:
type: string
format: date-time
- nullable: true
- description: Last poll timestamp (UTC, RFC 3339). Null before the first poll.
- error:
+ last_used_at:
type: string
+ format: date-time
nullable: true
- description: Last error message, if the most recent poll failed. Null on success.
- ProjectSettings:
+ GithubTokenListResponse:
type: object
- required: [exclude_patterns, max_file_size]
+ required: [tokens, total]
properties:
- exclude_patterns:
+ tokens:
type: array
- items: { type: string }
- max_file_size:
+ items:
+ $ref: "#/components/schemas/GithubToken"
+ total:
type: integer
- minimum: 0
- ProjectStats:
+ CreateGithubTokenRequest:
type: object
- required: [total_files, indexed_files, total_chunks, total_symbols]
+ required: [name, token]
properties:
- total_files:
- type: integer
- minimum: 0
- indexed_files:
- type: integer
- minimum: 0
- total_chunks:
- type: integer
- minimum: 0
- total_symbols:
- type: integer
- minimum: 0
+ name:
+ type: string
+ minLength: 1
+ description: Human-friendly label shown in the dashboard.
+ token:
+ type: string
+ minLength: 1
+ description: |
+ The plaintext PAT. The server encrypts it with AES-GCM before
+ persisting; this is the only request body that ever carries
+ the plaintext value.
+ scopes:
+ type: array
+ items:
+ type: string
+ deprecated: true
+ description: |
+ Ignored. The server now derives real scopes from GitHub's
+ X-OAuth-Scopes response header by calling GET /user with the
+ supplied token, so user-supplied scope hints are no longer
+ consulted. Kept for backwards compatibility with older
+ clients that still send it.
- Project:
+ GitRepo:
type: object
required:
+ - project_path
- path_hash
- - host_path
- - container_path
- - languages
- - settings
- - stats
- - status
+ - github_url
+ - branch
+ - auto_webhook
+ - webhook_mode
- created_at
- updated_at
- - last_indexed_at
+ description: |
+ Clone + webhook metadata for an external (git-cloned) project.
+ Exactly 1:1 with the matching projects row; local projects have
+ no GitRepo row.
properties:
- path_hash:
- type: string
- pattern: "^[a-f0-9]{16}$"
- description: First 16 hex chars of SHA1(host_path) — stable URL identifier.
- host_path:
+ project_path:
type: string
- description: Absolute filesystem path on the operator's machine.
- container_path:
- type: string
- description: Path inside the container (often equal to host_path).
- languages:
- type: array
- items: { type: string }
- settings:
- $ref: "#/components/schemas/ProjectSettings"
- stats:
- $ref: "#/components/schemas/ProjectStats"
- status:
+ description: |
+ Matches projects.host_path — canonical
+ "github.com/owner/repo@branch" string.
+ path_hash:
type: string
- enum: [created, indexing, indexed, error]
- created_at:
+ description: 16-hex SHA1 prefix of project_path, used in URLs.
+ github_url:
type: string
- format: date-time
- updated_at:
+ branch:
type: string
- format: date-time
- last_indexed_at:
+ token_id:
type: string
- format: date-time
nullable: true
- indexed_with_model:
+ auto_webhook:
+ type: boolean
+ description: Legacy alias for `webhook_mode == "auto"`.
+ webhook_mode:
type: string
- nullable: true
- description: |
- Embedding model identifier active when this project was last
- (re)indexed. NULL on rows that pre-date drift tracking — the
- dashboard treats NULL as "Unknown" rather than as drift.
- sqlite_path:
+ enum: [manual, auto, disabled]
+ last_sha:
type: string
nullable: true
- description: Resolved SQLite database path for the active model. NULL on dashboards that don't expose storage info.
- chroma_path:
+ last_error:
type: string
nullable: true
- description: Resolved chromem-go collection directory for this project. NULL when not computed.
- sqlite_size_bytes:
+ polling_enabled:
+ type: boolean
+ description: |
+ When true, the shared poll scheduler fetches this repo on an
+ interval (the alternative to webhooks for non-admin repos).
+ Mutually exclusive with webhook_mode != 'disabled'.
+ poll_interval_seconds:
type: integer
- format: int64
nullable: true
- minimum: 0
- chroma_size_bytes:
- type: integer
- format: int64
+ description: |
+ Per-repo poll cadence in seconds. Null → server default
+ (CIX_DEFAULT_POLL_INTERVAL). Always clamped up to the floor
+ (CIX_MIN_POLL_INTERVAL).
+ next_poll_at:
+ type: string
+ format: date-time
nullable: true
- minimum: 0
+ description: |
+ Absolute time this repo is next due for a poll. Null when
+ polling is disabled. Measured from the end of the last index run.
+ created_at:
+ type: string
+ format: date-time
+ updated_at:
+ type: string
+ format: date-time
- ProjectListResponse:
+ AddGitRepoRequest:
type: object
- required: [projects, total]
+ required: [github_url, branch]
properties:
- projects:
- type: array
- items:
- $ref: "#/components/schemas/Project"
- total:
+ github_url:
+ type: string
+ description: https://github.com/owner/repo URL.
+ branch:
+ type: string
+ minLength: 1
+ token_id:
+ type: string
+ description: Optional id of a stored GitHub PAT (required for private repos).
+ webhook_mode:
+ type: string
+ enum: [manual, auto, disabled]
+ default: manual
+ polling_enabled:
+ type: boolean
+ description: |
+ Enable polling sync. Requires webhook_mode='disabled'
+ (webhook XOR polling); otherwise the request is rejected 422.
+ poll_interval_seconds:
type: integer
- minimum: 0
+ description: Optional per-repo poll cadence in seconds (0/omitted → server default).
- CreateProjectRequest:
+ UpdateGitRepoSyncRequest:
type: object
- required: [host_path]
+ required: [sync_method]
properties:
- host_path:
+ sync_method:
type: string
+ enum: [webhook, polling, manual]
+ description: |
+ How the project should be kept in sync. `webhook` → push-driven
+ (auto-register, fall back to polling on failure); `polling` →
+ server fetches on an interval; `manual` → reindex on demand only.
+ poll_interval_seconds:
+ type: integer
+ description: |
+ Per-repo poll cadence in seconds (0/omitted → server default).
+ Only meaningful when sync_method is `polling` (also used as the
+ interval if `webhook` falls back to polling).
- UpdateProjectRequest:
+ UpdateGitRepoSyncResult:
type: object
+ required: [git_repo]
properties:
- settings:
- $ref: "#/components/schemas/ProjectSettings"
+ git_repo:
+ $ref: "#/components/schemas/GitRepo"
+ note:
+ type: string
+ description: |
+ Human-readable note about the outcome — e.g. that webhook
+ auto-registration failed and the server fell back to polling.
- DirEntry:
+ GithubRepo:
type: object
- required: [path, file_count]
+ required: [full_name, default_branch, private, html_url]
+ description: A repository visible to a stored PAT.
properties:
- path:
+ full_name:
+ type: string
+ description: "owner/name"
+ default_branch:
+ type: string
+ description: |
+ The repo's default branch; the dashboard pre-fills the branch
+ input with this when the user picks a repo from the list.
+ private:
+ type: boolean
+ html_url:
+ type: string
+ description:
type: string
- file_count:
- type: integer
- minimum: 0
- SymbolEntry:
+ GithubAccount:
type: object
- required: [name, kind, file_path, language]
+ required: [login, type]
+ description: |
+ A GitHub account the PAT can see. The user owning the PAT is
+ returned first, followed by every org accessible via /user/orgs.
+ The dashboard's add-repo flow shows these in a Select before
+ the repository picker so the operator can drill into a specific
+ org instead of relying on the affiliations-aggregated view.
properties:
- name:
- type: string
- kind:
+ login:
type: string
- file_path:
+ description: GitHub login (user name or org slug).
+ type:
type: string
- language:
+ enum: [user, org]
+ description: |
+ "user" for the PAT owner; "org" for organisations.
+ avatar_url:
type: string
- ProjectSummary:
+ GitRepoCreated:
type: object
- required:
- - path_hash
- - host_path
- - status
- - languages
- - total_files
- - total_chunks
- - total_symbols
- - top_directories
- - recent_symbols
+ required: [project, git_repo, webhook_url, webhook_secret]
properties:
- path_hash:
+ project:
+ $ref: "#/components/schemas/Project"
+ git_repo:
+ $ref: "#/components/schemas/GitRepo"
+ webhook_url:
type: string
- pattern: "^[a-f0-9]{16}$"
- description: First 16 hex chars of SHA1(host_path) — stable URL identifier.
- host_path:
+ description: |
+ Publicly-reachable POST endpoint to register in GitHub when
+ doing webhook setup manually.
+ webhook_secret:
type: string
- status:
+ description: |
+ HMAC secret. **Returned once on create + once via
+ /projects/{hash}/webhook-info.**
+ auto_registered:
+ type: boolean
+ description: |
+ True when webhook_mode was 'auto' AND the server
+ successfully registered the hook with GitHub.
+ auto_register_note:
type: string
- languages:
- type: array
- items: { type: string }
- total_files:
- type: integer
- minimum: 0
- total_chunks:
- type: integer
- minimum: 0
- total_symbols:
- type: integer
- minimum: 0
- top_directories:
- type: array
- items:
- $ref: "#/components/schemas/DirEntry"
- recent_symbols:
- type: array
- items:
- $ref: "#/components/schemas/SymbolEntry"
+ description: Human-readable reason when auto_registered is false.
- SymbolSearchRequest:
+ WorkspaceProjectMembership:
type: object
- required: [query]
+ required: [workspace_id, project_path, added_at]
properties:
- query:
+ workspace_id:
type: string
- minLength: 1
- kinds:
- type: array
- items: { type: string }
- limit:
- type: integer
- minimum: 0
- default: 20
+ project_path:
+ type: string
+ added_at:
+ type: string
+ format: date-time
- SymbolResultItem:
+ WorkspaceProject:
type: object
- required: [name, kind, file_path, line, end_line, language]
+ required: [project, added_at]
+ description: |
+ A project listed under a workspace, decorated with the membership
+ timestamp. The embedded Project carries the full project info
+ (status, languages, last_indexed_at) so the dashboard doesn't
+ need a second roundtrip.
properties:
- name: { type: string }
- kind: { type: string }
- file_path: { type: string }
- line: { type: integer }
- end_line: { type: integer }
- language: { type: string }
- signature: { type: string }
- parent_name: { type: string }
+ project:
+ $ref: "#/components/schemas/Project"
+ added_at:
+ type: string
+ format: date-time
- SymbolSearchResponse:
+ WorkspaceProjectListResponse:
type: object
- required: [results, total]
+ required: [projects, total]
properties:
- results:
+ projects:
type: array
items:
- $ref: "#/components/schemas/SymbolResultItem"
+ $ref: "#/components/schemas/WorkspaceProject"
total:
type: integer
- minimum: 0
- DefinitionRequest:
+ LinkProjectRequest:
type: object
- required: [symbol]
+ required: [project_hash]
properties:
- symbol:
- type: string
- minLength: 1
- kind:
- type: string
- file_path:
+ project_hash:
type: string
- limit:
- type: integer
- minimum: 0
- default: 10
-
- DefinitionItem:
- type: object
- required: [name, kind, file_path, line, end_line, language]
- properties:
- name: { type: string }
- kind: { type: string }
- file_path: { type: string }
- line: { type: integer }
- end_line: { type: integer }
- language: { type: string }
- signature: { type: string }
- parent_name: { type: string }
+ minLength: 16
+ maxLength: 16
+ description: |
+ The 16-hex `path_hash` of an indexed project. The server
+ resolves it to host_path and inserts the (workspace_id,
+ project_path) row. The project must be in status='indexed'.
- DefinitionResponse:
+ ProjectWorkspaceList:
type: object
- required: [results, total]
+ required: [workspaces]
properties:
- results:
+ workspaces:
type: array
items:
- $ref: "#/components/schemas/DefinitionItem"
- total:
- type: integer
- minimum: 0
+ $ref: "#/components/schemas/ProjectWorkspaceEntry"
- ReferenceRequest:
+ ProjectWorkspaceEntry:
type: object
- required: [symbol]
+ required: [workspace_id, workspace_name, added_at]
properties:
- symbol:
+ workspace_id:
type: string
- minLength: 1
- limit:
- type: integer
- minimum: 0
- default: 50
- file_path:
+ workspace_name:
type: string
+ added_at:
+ type: string
+ format: date-time
- ReferenceItem:
+ ReindexEnqueuedResponse:
type: object
- required:
- - file_path
- - start_line
- - end_line
- - content
- - chunk_type
- - symbol_name
- - language
+ required: [status]
properties:
- file_path: { type: string }
- start_line: { type: integer }
- end_line:
- type: integer
- description: Always equal to `start_line` (refs table stores tokens, not ranges).
- content:
+ status:
type: string
- description: Always empty — see endpoint description.
- chunk_type:
+ enum: [enqueued, already_running]
+ mode:
type: string
- enum: [reference]
- symbol_name: { type: string }
- language: { type: string }
+ enum: [full, incremental]
+ description: |
+ Which reindex flavour the server will run. `full` is set
+ when the request used `?full=true` (the server cleared
+ `indexed_sha` before enqueueing); otherwise `incremental`
+ and the clone-job's mode-determination picks the real
+ mode (still may resolve to full at runtime for first-index,
+ model-change, or partial-failure cases).
+ project:
+ $ref: "#/components/schemas/Project"
- ReferenceResponse:
+ ForceStopResponse:
type: object
- required: [results, total]
+ required: [cancelled, jobs_cleared]
properties:
- results:
- type: array
- items:
- $ref: "#/components/schemas/ReferenceItem"
- total:
+ cancelled:
+ type: boolean
+ description: |
+ True when an active in-process index session was found and
+ cancelled. False when nothing was indexing (the job-queue
+ cleanup still ran — see `jobs_cleared`).
+ jobs_cleared:
type: integer
- minimum: 0
+ description: |
+ Count of pending/running clone_repo + index_repo jobs that
+ were deleted from the queue for this repo.
+ project:
+ $ref: "#/components/schemas/Project"
- FileSearchRequest:
+ Job:
type: object
- required: [query]
+ required:
+ - id
+ - type
+ - status
+ - attempts
+ - max_attempts
+ - scheduled_at
+ - created_at
properties:
- query:
+ id:
type: string
- minLength: 1
- description: Substring matched against `file_path`.
- limit:
+ type:
+ type: string
+ status:
+ type: string
+ enum: [pending, running, completed, failed]
+ dedupe_key:
+ type: string
+ nullable: true
+ payload:
+ type: object
+ additionalProperties: true
+ description: |
+ Raw JSON payload — shape depends on `type`. Render as-is in
+ the dashboard; don't assume structure.
+ attempts:
type: integer
minimum: 0
- default: 20
-
- FileResultItem:
- type: object
- required: [file_path, language]
- properties:
- file_path: { type: string }
- language:
+ max_attempts:
+ type: integer
+ minimum: 0
+ last_error:
type: string
nullable: true
- description: Detected language, or null if undetected.
+ scheduled_at:
+ type: string
+ format: date-time
+ started_at:
+ type: string
+ format: date-time
+ nullable: true
+ completed_at:
+ type: string
+ format: date-time
+ nullable: true
+ created_at:
+ type: string
+ format: date-time
- FileSearchResponse:
+ JobListResponse:
type: object
- required: [results, total]
+ required: [jobs, total]
properties:
- results:
+ jobs:
type: array
items:
- $ref: "#/components/schemas/FileResultItem"
+ $ref: "#/components/schemas/Job"
total:
type: integer
- minimum: 0
- SemanticSearchRequest:
+ WebhookInfoResponse:
type: object
- required: [query]
+ required: [webhook_url, webhook_secret, auto_registered]
properties:
- query:
+ webhook_url:
type: string
- minLength: 1
- limit:
- type: integer
- minimum: 0
- default: 10
- description: Maximum number of FILE groups (not chunks) to return.
- languages:
- type: array
- items: { type: string }
- paths:
- type: array
- items: { type: string }
- description: Whitelist — keep only results whose path matches any prefix or substring.
- excludes:
- type: array
- items: { type: string }
- description: Blacklist — drop results whose path matches any prefix or substring.
- min_score:
- type: number
- format: float
description: |
- Minimum cosine similarity. Omit for server default (0.4 for
- CodeRankEmbed-Q8). Send `0` explicitly to disable the floor.
+ Full URL to paste into GitHub's webhook config. Empty path-only
+ value when CIX_PUBLIC_URL is unset — prepend your tunnel origin.
+ webhook_secret:
+ type: string
+ description: HMAC secret. Treat as sensitive — rotates on repo recreate.
+ auto_registered:
+ type: boolean
+ description: |
+ True when the server successfully auto-registered the webhook
+ against the GitHub API (auto_webhook=true on create + PAT had
+ admin:repo_hook). When false, the operator must register manually.
- NestedHit:
+ WebhookAccepted:
type: object
- required: [start_line, end_line, chunk_type, score]
+ required: [status]
properties:
- start_line: { type: integer }
- end_line: { type: integer }
- symbol_name: { type: string }
- chunk_type: { type: string }
- score:
- type: number
- format: float
+ status:
+ type: string
+ enum: [enqueued, already_running, ignored, ping]
+ repo_id:
+ type: string
- FileMatch:
+ TunnelStatus:
+ type: object
+ required: [provider, state, available]
+ properties:
+ provider:
+ type: string
+ enum: [cloudflare, ngrok]
+ state:
+ type: string
+ enum: [disabled, connecting, live, failed]
+ mode:
+ type: string
+ description: '"named" or "quick" for cloudflare; omitted otherwise.'
+ public_url:
+ type: string
+ description: Public base URL (no trailing slash) when live.
+ pid:
+ type: integer
+ uptime_sec:
+ type: integer
+ format: int64
+ last_error:
+ type: string
+ available:
+ type: boolean
+ description: False for providers that are not yet implemented.
+ note:
+ type: string
+
+ TunnelCatalog:
type: object
- required: [start_line, end_line, content, score, chunk_type]
+ required: [providers]
properties:
- start_line: { type: integer }
- end_line: { type: integer }
- content: { type: string }
- score:
- type: number
- format: float
- chunk_type: { type: string }
- symbol_name: { type: string }
- nested_hits:
+ providers:
type: array
items:
- $ref: "#/components/schemas/NestedHit"
+ $ref: "#/components/schemas/TunnelStatus"
- FileGroupResult:
+ TunnelBinary:
type: object
- required: [file_path, best_score, matches]
+ required: [provider, installed, managed]
properties:
- file_path: { type: string }
- language: { type: string }
- best_score:
- type: number
- format: float
- matches:
- type: array
- items:
- $ref: "#/components/schemas/FileMatch"
+ provider:
+ type: string
+ enum: [cloudflare, ngrok]
+ installed:
+ type: boolean
+ managed:
+ type: boolean
+ description: Whether the server can install/update this binary from the UI.
+ path:
+ type: string
+ version:
+ type: string
- SemanticSearchResponse:
+ TunnelBinaryList:
type: object
- required: [results, total, query_time_ms]
+ required: [binaries]
properties:
- results:
+ binaries:
type: array
items:
- $ref: "#/components/schemas/FileGroupResult"
- total:
- type: integer
- minimum: 0
- query_time_ms:
- type: number
- format: double
- description: Wall-clock query latency, rounded to 1 decimal place.
+ $ref: "#/components/schemas/TunnelBinary"
- IndexBeginRequest:
+ TunnelConfig:
type: object
+ required: [enabled, provider, mode, hostname, token_set]
properties:
- full:
+ enabled:
type: boolean
- default: false
- description: When true, wipes existing project state before opening the session.
+ provider:
+ type: string
+ enum: [cloudflare, ngrok]
+ mode:
+ type: string
+ enum: [named, quick]
+ hostname:
+ type: string
+ description: Named-tunnel public hostname (named mode only).
+ token_set:
+ type: boolean
+ description: Whether a connector token is stored (never returned).
+ updated_at:
+ type: string
+ updated_by:
+ type: string
- IndexBeginResponse:
+ TunnelConfigUpdate:
type: object
- required: [run_id, stored_hashes]
properties:
- run_id:
+ enabled:
+ type: boolean
+ provider:
+ type: string
+ enum: [cloudflare, ngrok]
+ mode:
+ type: string
+ enum: [named, quick]
+ hostname:
+ type: string
+ token:
type: string
- stored_hashes:
- type: object
- additionalProperties:
- type: string
description: |
- Map from file path → SHA-256 of currently-stored content. Empty
- when the project has never been indexed (or `full:true` was passed).
+ Cloudflare named-tunnel connector token. Omit to keep the
+ existing token; send "" to clear it.
- FilePayload:
+ TunnelTestResult:
type: object
- required: [path, content, content_hash, size]
+ required: [ok]
properties:
- path:
+ ok:
+ type: boolean
+ public_url:
type: string
- content:
+ status_code:
+ type: integer
+ latency_ms:
+ type: integer
+ format: int64
+ detail:
type: string
- description: UTF-8 text. Binary files should not be submitted.
- content_hash:
+
+ WebhookReconcileOutcome:
+ type: object
+ required: [project_path, action]
+ properties:
+ project_path:
type: string
- description: SHA-256 hex digest of `content`.
- language:
+ action:
+ type: string
+ enum: [updated, created, skipped, failed]
+ note:
type: string
- size:
- type: integer
- minimum: 0
- IndexFilesRequest:
+ WebhookReconcileResult:
type: object
- required: [run_id, files]
+ required: [base_url, total, created, updated, skipped, failed]
properties:
- run_id:
+ base_url:
type: string
- minLength: 1
- files:
+ total:
+ type: integer
+ created:
+ type: integer
+ updated:
+ type: integer
+ skipped:
+ type: integer
+ failed:
+ type: integer
+ outcomes:
type: array
- maxItems: 50
items:
- $ref: "#/components/schemas/FilePayload"
+ $ref: "#/components/schemas/WebhookReconcileOutcome"
- IndexFilesResponse:
+ WebhookOrigin:
type: object
- required: [files_accepted, chunks_created, files_processed_total]
+ required: [origin, source]
properties:
- files_accepted:
- type: integer
- minimum: 0
- chunks_created:
- type: integer
- minimum: 0
- files_processed_total:
- type: integer
- minimum: 0
+ origin:
+ type: string
+ description: |
+ The public base URL webhook delivery URLs are built against
+ (no trailing slash). Empty when source is "none".
+ source:
+ type: string
+ enum: [tunnel, public_url, none]
+ description: |
+ Where `origin` comes from. `tunnel` — a live managed tunnel.
+ `public_url` — the operator-set CIX_PUBLIC_URL (infrastructure
+ provides public reachability; a tunnel is not needed). `none` —
+ no public origin is configured, so webhooks can't be delivered.
- IndexFinishRequest:
+ WorkspaceSearchResponse:
type: object
- required: [run_id]
+ required: [status, projects, chunks]
properties:
- run_id:
+ status:
type: string
- minLength: 1
- deleted_paths:
+ enum: [ok, empty, partial_failure]
+ description: |
+ `ok` — results follow. `empty` — workspace queried fine but
+ nothing cleared the `min_score` floor. `partial_failure` —
+ no chunks returned but at least one repo errored out during
+ the fan-out (see `failed_repos`).
+ projects:
type: array
- items: { type: string }
- total_files_discovered:
- type: integer
- minimum: 0
+ description: |
+ Top projects ranked by `project_score`. Surfaces which repos
+ in the workspace the query is most relevant to, independent
+ of which individual chunks rank highest globally.
+ items:
+ $ref: "#/components/schemas/WorkspaceSearchProject"
+ chunks:
+ type: array
+ items:
+ $ref: "#/components/schemas/WorkspaceSearchChunk"
+ pending_repos:
+ type: array
+ description: |
+ Repos that belong to the workspace but weren't queryable
+ yet — clone or index hasn't completed (or the last attempt
+ failed). Their matches will appear once they reach
+ `status=indexed`. Empty if every repo is ready.
+ items:
+ $ref: "#/components/schemas/WorkspaceSearchPendingRepo"
+ failed_repos:
+ type: array
+ description: |
+ Repos whose per-project vector search returned an error
+ during this request (e.g. corrupt collection on disk). The
+ rest of the workspace is still aggregated; surface this so
+ the operator knows the result set is incomplete.
+ items:
+ $ref: "#/components/schemas/WorkspaceSearchFailedRepo"
+ stale_fts_repos:
+ type: array
+ description: |
+ Repos that were indexed before the BM25 mirror
+ (`chunks_fts`) was added: they're queryable via dense
+ search but the sparse half of the hybrid is empty, which
+ collapses the algorithm to pure-dense fan-out for these
+ entries. Trigger a reindex on each to backfill the FTS
+ side. Empty once every workspace repo has been reindexed
+ under the new schema.
+ items:
+ $ref: "#/components/schemas/WorkspaceSearchStaleFTSRepo"
- IndexFinishResponse:
+ WorkspaceSearchPendingRepo:
type: object
- required: [status, files_processed, chunks_created]
+ required: [project_path, status]
properties:
+ project_path:
+ type: string
status:
type: string
- enum: [completed]
- files_processed:
- type: integer
- minimum: 0
- chunks_created:
- type: integer
- minimum: 0
+ enum: [pending, cloning, indexing, failed]
+ description: |
+ Current per-project status. Anything other than `indexed`
+ means the project hasn't contributed to this response.
- IndexCancelResponse:
+ WorkspaceSearchFailedRepo:
type: object
- required: [cancelled]
+ required: [project_path, reason]
properties:
- cancelled:
- type: boolean
+ project_path:
+ type: string
+ reason:
+ type: string
+ description: |
+ Short category for the failure — `vectorstore_error`,
+ `timeout`, etc. Intentionally not the raw error message so
+ internal details don't leak; check the server logs by
+ `workspace_id` for the full error.
- IndexProgressInfo:
+ WorkspaceSearchStaleFTSRepo:
type: object
- description: |
- Progress payload. The active-session variant carries every field;
- the historical-fallback variant only carries `files_processed`,
- `files_total`, and `chunks_created`.
+ required: [project_path]
properties:
- phase:
- type: string
- enum: [receiving, completed]
- files_discovered:
- type: integer
- minimum: 0
- files_processed:
- type: integer
- minimum: 0
- files_total:
- type: integer
- minimum: 0
- chunks_created:
- type: integer
- minimum: 0
- elapsed_seconds:
- type: number
- format: double
- run_id:
+ project_path:
type: string
- IndexProgressResponse:
+ WorkspaceSearchProject:
type: object
- required: [status]
+ required: [project_path, label, project_score, num_hits, bm25_score, dense_score]
properties:
- status:
+ project_path:
+ type: string
+ label:
type: string
- enum: [idle, indexing, completed, cancelled, failed, running]
description: |
- `idle` — no session ever / fallback unavailable.
- `indexing` — session active.
- `completed`/`cancelled`/`failed`/`running` — last-run status from `index_runs`.
- progress:
- $ref: "#/components/schemas/IndexProgressInfo"
+ Short human-readable label derived from the project_path's
+ last segment (e.g. "owner/repo@main" → "repo@main").
+ project_score:
+ type: number
+ format: float
+ description: |
+ Hybrid candidacy in [0,1] — the α-blend of per-query
+ min-max normalized BM25 and dense signals (α=0.5) the
+ project-relevance gate ranks by. The "Top projects"
+ panel sorts by this value.
+ num_hits:
+ type: integer
+ description: |
+ Chunks from this project that survived the per-project
+ chunk cap and made it into the global chunks list.
+ bm25_score:
+ type: number
+ format: float
+ description: |
+ Mean of the top-N raw BM25 scores in this project (sign
+ flipped from SQLite's bm25() so positive = better).
+ Surfaced so the dashboard can show "this repo surfaced
+ on literal token overlap" vs. "pure semantic similarity".
+ dense_score:
+ type: number
+ format: float
+ description: |
+ Mean of the top-N raw cosine similarities in this
+ project. Together with `bm25_score`, the two raw signals
+ that feed into `project_score`.
- IndexProgressEvent:
+ WorkspaceSearchChunk:
type: object
- description: |
- One event in the NDJSON stream emitted by `POST /index/files` when
- the client sends `Accept: application/x-ndjson`. The `event` field
- discriminates the variant; other fields are populated as relevant.
- required: [event]
+ required:
+ - project_path
+ - file_path
+ - start_line
+ - end_line
+ - score
+ - content
properties:
- event:
- type: string
- enum:
- - file_started
- - file_chunked
- - file_embedded
- - file_done
- - file_error
- - heartbeat
- - batch_done
- - error
- run_id:
+ project_path:
type: string
- path:
+ file_path:
type: string
- file_index:
- type: integer
- batch_size:
- type: integer
- chunks:
+ start_line:
type: integer
- embed_ms:
+ end_line:
type: integer
- format: int64
- ts:
+ symbol_name:
type: string
- format: date-time
- message:
+ language:
+ type: string
+ score:
+ type: number
+ format: float
+ description: |
+ Raw cosine similarity between the query and this chunk —
+ the value chunks are sorted by. No per-project boost is
+ applied (a previous revision multiplied this by
+ project_score, which let one repo dominate every result
+ for short queries like product-name acronyms).
+ content:
type: string
- fatal:
- type: boolean
- files_accepted:
- type: integer
- chunks_created:
- type: integer
- files_processed_total:
- type: integer
diff --git a/install-develop.sh b/install-develop.sh
new file mode 100755
index 0000000..1af2430
--- /dev/null
+++ b/install-develop.sh
@@ -0,0 +1,148 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# cix installer — DEVELOP channel
+#
+# Installs the latest CLI build from the `develop` branch via the floating
+# `cli/develop` GitHub release. This tag is force-updated on every PR merged
+# into develop that touches `cli/**`, so re-running this script always pulls
+# the freshest build.
+#
+# Usage:
+# curl -fsSL https://raw.githubusercontent.com/dvcdsys/code-index/main/install-develop.sh | bash
+# ./install-develop.sh [--bin-dir /usr/local/bin]
+#
+# For stable releases use `install.sh` instead — that one tracks `cli/v*`
+# tags and is the right choice for production. This script is for testing
+# unreleased server features against an in-progress CLI.
+
+REPO="dvcdsys/code-index"
+BINARY_NAME="cix"
+VERSION="cli/develop"
+DEFAULT_BIN_DIR="/usr/local/bin"
+
+# ── Parse args ────────────────────────────────────────────────────────────────
+
+BIN_DIR="$DEFAULT_BIN_DIR"
+
+usage() {
+ cat <]
+
+Installs the latest CLI from the develop channel (floating tag cli/develop).
+Always overwrites the existing binary — the dev tag moves on every merge,
+so skip-if-installed checks don't apply.
+
+Options:
+ --bin-dir Install directory. Default: ${DEFAULT_BIN_DIR}.
+ -h, --help Show this help.
+EOF
+}
+
+while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --bin-dir) BIN_DIR="$2"; shift 2 ;;
+ -h|--help) usage; exit 0 ;;
+ *) echo "Unknown argument: $1" >&2; usage >&2; exit 1 ;;
+ esac
+done
+
+# ── Banner ────────────────────────────────────────────────────────────────────
+
+cat <<'EOF'
+⚠ Installing cix from the DEVELOP channel.
+
+ The cli/develop tag is force-updated on every merge to the develop
+ branch. Builds here are unreleased and may break. Use install.sh for
+ stable.
+
+EOF
+
+# ── Detect platform ───────────────────────────────────────────────────────────
+
+OS="$(uname -s)"
+ARCH="$(uname -m)"
+
+case "$OS" in
+ Darwin) OS="darwin" ;;
+ Linux) OS="linux" ;;
+ *)
+ echo "Unsupported OS: $OS (supported: macOS, Linux)" >&2
+ exit 1
+ ;;
+esac
+
+case "$ARCH" in
+ x86_64) ARCH="amd64" ;;
+ arm64|aarch64) ARCH="arm64" ;;
+ *)
+ echo "Unsupported architecture: $ARCH (supported: x86_64, arm64)" >&2
+ exit 1
+ ;;
+esac
+
+PLATFORM="${OS}-${ARCH}"
+
+echo "Installing cix from develop channel (${PLATFORM})..."
+
+# ── Download ──────────────────────────────────────────────────────────────────
+
+ARCHIVE="${BINARY_NAME}-${PLATFORM}.tar.gz"
+# GitHub release download URLs preserve slashes in tag names verbatim,
+# so `cli/develop` becomes `.../releases/download/cli/develop/...`.
+DOWNLOAD_URL="https://github.com/${REPO}/releases/download/${VERSION}/${ARCHIVE}"
+TMP_DIR="$(mktemp -d)"
+trap 'rm -rf "$TMP_DIR"' EXIT
+
+echo "Downloading ${DOWNLOAD_URL}..."
+if ! curl -fsSL "$DOWNLOAD_URL" -o "${TMP_DIR}/${ARCHIVE}"; then
+ echo "Failed to download ${DOWNLOAD_URL}" >&2
+ echo "The cli/develop release may not have been built yet — wait for the" >&2
+ echo "next merge to develop that touches cli/, or check Actions." >&2
+ exit 1
+fi
+
+tar -xzf "${TMP_DIR}/${ARCHIVE}" -C "$TMP_DIR"
+
+# ── Install ───────────────────────────────────────────────────────────────────
+
+BINARY="${TMP_DIR}/${BINARY_NAME}"
+if [ ! -f "$BINARY" ]; then
+ echo "Binary not found in archive: ${BINARY_NAME}" >&2
+ exit 1
+fi
+
+chmod +x "$BINARY"
+
+if [ -w "$BIN_DIR" ]; then
+ mv "$BINARY" "${BIN_DIR}/${BINARY_NAME}"
+else
+ echo "Installing to ${BIN_DIR} (requires sudo)..."
+ sudo mv "$BINARY" "${BIN_DIR}/${BINARY_NAME}"
+fi
+
+# ── Verify ────────────────────────────────────────────────────────────────────
+
+INSTALLED_PATH="${BIN_DIR}/${BINARY_NAME}"
+INSTALLED_VERSION_LINE=$("$INSTALLED_PATH" --version 2>/dev/null | head -1 || true)
+
+echo ""
+if [ -n "$INSTALLED_VERSION_LINE" ]; then
+ echo "✓ ${INSTALLED_VERSION_LINE}"
+ echo " installed at ${INSTALLED_PATH}"
+else
+ echo "✓ cix (develop) installed at ${INSTALLED_PATH}"
+fi
+
+# Warn if a different cix is shadowing this one on PATH.
+PATH_BIN=$(command -v "$BINARY_NAME" 2>/dev/null || true)
+if [ -n "$PATH_BIN" ] && [ "$PATH_BIN" != "$INSTALLED_PATH" ]; then
+ echo ""
+ echo "⚠ Another cix is first on PATH: ${PATH_BIN}"
+ echo " Add ${BIN_DIR} earlier in PATH or remove the other binary."
+elif [ -z "$PATH_BIN" ]; then
+ echo " Add ${BIN_DIR} to your PATH if needed."
+fi
+
+echo ""
+echo "Re-run this script any time to pick up the latest develop build."
diff --git a/plugins/cix/.claude-plugin/plugin.json b/plugins/cix/.claude-plugin/plugin.json
new file mode 100644
index 0000000..d18aa6f
--- /dev/null
+++ b/plugins/cix/.claude-plugin/plugin.json
@@ -0,0 +1,26 @@
+{
+ "$schema": "https://json.schemastore.org/claude-code-plugin-manifest.json",
+ "name": "cix",
+ "version": "0.2.1",
+ "description": "Semantic code search and navigation for Claude Code via the cix index. Single-repo cix skill + commands, plus the experimental cix-workspace skill and cix-workspace-investigator sub-agent for cross-project research across cix workspaces.",
+ "author": {
+ "name": "dvcdsys",
+ "email": "dvcdsys@gmail.com"
+ },
+ "homepage": "https://github.com/dvcdsys/code-index",
+ "repository": "https://github.com/dvcdsys/code-index",
+ "license": "MIT",
+ "keywords": [
+ "search",
+ "code-search",
+ "semantic",
+ "navigation",
+ "indexing",
+ "embeddings",
+ "ai",
+ "workspace",
+ "cross-project",
+ "monorepo",
+ "sub-agent"
+ ]
+}
diff --git a/plugins/cix/README.md b/plugins/cix/README.md
new file mode 100644
index 0000000..858abfd
--- /dev/null
+++ b/plugins/cix/README.md
@@ -0,0 +1,275 @@
+# cix — Claude Code plugin
+
+Semantic code search and navigation for Claude Code, powered by the
+[cix](https://github.com/dvcdsys/code-index) index.
+
+## What you get
+
+- **`/cix:search`, `/cix:def`, `/cix:refs`, `/cix:init`, `/cix:status`,
+ `/cix:summary`** — slash commands wrapping the most-used `cix` CLI
+ operations.
+- **Bundled cix CLI** — the plugin auto-installs `cix` on first use if
+ it isn't already in your `PATH` (no sudo, installs to `~/.local/bin`).
+ If you already have `cix` installed via the official `install.sh`, the
+ plugin just uses it.
+- **`cix` skill (SKILL.md)** — lazy-loaded full instruction sheet
+ covering when to use cix vs Grep, query patterns, scoring landscape,
+ and CLI flags. Loads into the conversation only when Claude or you
+ invoke it (`/cix:search`, `/cix-skill`, or auto-trigger on a relevant
+ prompt). Stays in context for the rest of the session — never
+ duplicated.
+- **`cix-workspace` skill (SKILL.md)** *(experimental, **manual-only**)* —
+ companion workflow for tasks that span more than one repo. **Does
+ not auto-trigger** — invoke it explicitly with `/cix-workspace `
+ when you want the full cross-project workflow guidance: which repos
+ are in scope, what code is relevant, what changes need to land.
+ Includes ten trust rules for interpreting `projects[]` vs `chunks[]`,
+ a four-part fan-out prompt template, and an anti-patterns list.
+- **`cix-workspace-investigator` sub-agent** *(experimental)* — thin
+ read-only shell around `cix search`/`cix def`/`cix refs` for parallel
+ per-repo fan-out from the workspace skill. Hard rules baked in: one
+ repo per spawn, no edits, no recursion. Methodology and output
+ format are the main agent's call per spawn; the sub-agent follows
+ instructions. Lives at `agents/cix-workspace-investigator.md` —
+ available as `subagent_type="cix-workspace-investigator"` in `Agent`
+ tool calls.
+- **Behavioral nudges (5 hooks):**
+ - **SessionStart** — calls `cix status` (2 s timeout). Caches the
+ yes/no verdict in `$CLAUDE_PLUGIN_DATA/cix-aware-$SESSION_ID-$DIR_HASH`,
+ injects a one-line reminder on success.
+ - **CwdChanged** — when Claude `cd`s into another directory mid-session,
+ re-runs `cix status` for the new dir and caches the verdict. Silent
+ (no reminder); PostToolUse handles the first-Grep-in-new-project
+ nudge through its per-project backoff.
+ - **PostToolUse(Grep|Glob|Bash)** — fires after a Grep/Glob call or a
+ Bash command that looks like `grep`/`rg`/`find` (other Bash stays
+ silent). Reads the cache for the current `(session, project_dir)`
+ pair; no inline `cix` calls. If the verdict is "yes" (`1`),
+ suggests `cix search` with exponential backoff per project (fires
+ on call #1, 2, 4, 8, …). Missing cache or "no" (`0`) → silent for
+ the rest of the session in that project. (PostToolUse instead of
+ PreToolUse because current Claude Code only surfaces
+ `hookSpecificOutput.additionalContext` for PostToolUse,
+ UserPromptSubmit, and SessionStart — the model sees the nudge in
+ time for the NEXT decision, which is behaviorally equivalent for
+ an advisory hook. Rationale lives at `scripts/grep-nudge.sh:9-14`.)
+ - **PostCompact** — after auto-compaction in long sessions, re-injects
+ the SessionStart reminder if the current project is cix-aware
+ (skill body itself survives compaction natively; the SessionStart
+ one-liner does not).
+ - **SessionEnd** — glob-deletes every per-(session, dir) cache file
+ when the session terminates. Best-effort; the 30-day GC inside
+ SessionStart catches markers left over from forced kills.
+
+The cache key includes a project-dir hash (`shasum -a 256` first 8
+chars), so per-session, per-project state is isolated — Claude can
+move between projects mid-session and each one keeps its own verdict
+and backoff counter.
+
+## Install
+
+From an existing Claude Code marketplace:
+
+```
+/plugin marketplace add dvcdsys/code-index
+/plugin install cix@code-index
+/reload-plugins # or restart Claude Code
+```
+
+Or for local development against this repo:
+
+```
+/plugin marketplace add /path/to/code-index
+/plugin install cix@code-index --scope local
+```
+
+## Requirements
+
+- **Claude Code v2.1.0+** (uses `hookSpecificOutput.additionalContext`
+ for hook-driven nudges).
+- **`curl`** — only needed the first time, for the auto-bootstrap of
+ the `cix` CLI.
+- **A reachable `cix-server`** — the CLI is a thin client. If you don't
+ yet have a server, see the project README for Docker setup
+ instructions.
+
+## How adoption works (the design)
+
+The plugin uses a 4-layer approach so SKILL.md loads at most once and
+nudges don't spam the context:
+
+| Layer | Mechanism | Cost over a 100-prompt session |
+|---|---|---|
+| 1. Skill description | Native Claude Code (always-in-context, ~200 B) | ~200 B once |
+| 2. SessionStart hook | One-time reminder in indexed projects | ~200 B once |
+| 3. PostToolUse(Grep\|Glob\|Bash) hook | Exponential-backoff nudge | ~80 B × ~7 calls = ~560 B |
+| 4. SKILL.md body | Native lazy-load (skill mechanism) | ~7 KB **once** if invoked |
+
+Total plugin context overhead in a session that uses cix heavily:
+~8 KB. In a session that doesn't touch cix at all: ~400 B (skill
+description + slash command metadata).
+
+The SKILL.md body is **never duplicated** — Claude Code's skill
+mechanism guarantees a single insertion that stays in context for the
+session. See the [skill content lifecycle](https://code.claude.com/docs/en/skills#skill-content-lifecycle)
+docs.
+
+## Configuration
+
+### Where the bundled CLI is installed
+
+The wrapper installs `cix` to `~/.local/bin/cix` by default. To override
+the install location, set `CIX_PLUGIN_BIN_DIR` in your environment:
+
+```bash
+export CIX_PLUGIN_BIN_DIR=/usr/local/bin # if you want sudo-installed
+```
+
+If you've already installed `cix` system-wide (e.g. via the project's
+`install.sh`), the wrapper detects it and uses that binary — no second
+copy is downloaded.
+
+### Skipping the auto-install
+
+Set `CIX_PLUGIN_BIN_DIR` to a directory that already contains a working
+`cix` binary, or simply make sure `cix` is in your `$PATH` before
+enabling the plugin.
+
+For corporate, regulated, or air-gapped environments where the plugin
+must never reach out to the network, set `CIX_NO_AUTOINSTALL=1`. When
+`cix` is not already installed, the wrapper then fails with a clear
+message (including the exact manual-install command) instead of
+fetching `install.sh`.
+
+The auto-bootstrap is **pinned** to a specific CLI release tag (see
+`CIX_PINNED_VERSION` at the top of `scripts/cix-wrapper.sh`): both the
+`install.sh` script reference and the installed binary version are
+fixed, so a fresh install is reproducible rather than tracking whatever
+is on `main`.
+
+### Capping cix output (`CIX_MAX_OUTPUT_LINES`)
+
+By default the wrapper passes cix output through untouched. Set
+`CIX_MAX_OUTPUT_LINES` to a positive integer to cap stdout to that many
+lines and append a one-line truncation notice:
+
+```bash
+export CIX_MAX_OUTPUT_LINES=80
+```
+
+The cap is layered on top of any `--limit N` flag, not a replacement for
+it — `--limit` bounds how many *files* cix returns; `CIX_MAX_OUTPUT_LINES`
+is a hard ceiling on total printed lines, useful for keeping a single
+`cix search` from flooding an agent's context. Unset (the default),
+`0`, or any non-numeric value disables the cap entirely: the wrapper
+`exec`s cix directly with full output, streaming, and the original exit
+code. stderr is never capped.
+
+### Hook state cleanup
+
+Two per-session marker files live in `$CLAUDE_PLUGIN_DATA`
+(resolves to `~/.claude/plugins/data/cix-code-index/`):
+- `cix-aware-$SESSION_ID-$DIR_HASH` — written by SessionStart (and
+ refreshed by CwdChanged), read by the PostToolUse nudge.
+ Single-byte file (`0` or `1`). The `$DIR_HASH` suffix isolates the
+ verdict per project directory within a session.
+- `cix-grep-count-$SESSION_ID` — counter for the exponential backoff.
+
+This directory is plugin-managed and **not** cleaned by the OS
+(unlike `/tmp`, which macOS purges daily). The plugin manages cleanup
+in two tiers:
+1. **SessionEnd hook** — deletes both markers when the session
+ terminates normally. Covers the common case.
+2. **30-day GC in SessionStart** — opportunistically deletes markers
+ older than 30 days at every session start. Catches markers left
+ over from sessions that exited forcibly (kill -9, OOM).
+
+## Files
+
+| Path | Purpose |
+|---|---|
+| `.claude-plugin/plugin.json` | Plugin manifest |
+| `skills/cix/SKILL.md` | Lazy-loaded single-repo usage skill (~7 KB) |
+| `skills/cix-workspace/SKILL.md` | Cross-project workflow skill *(experimental)* |
+| `agents/cix-workspace-investigator.md` | Read-only per-repo investigator sub-agent *(experimental)* |
+| `commands/*.md` | Six slash commands |
+| `hooks/hooks.json` | SessionStart + PostToolUse(Grep\|Glob\|Bash) + CwdChanged + PostCompact + SessionEnd registration |
+| `scripts/cix-wrapper.sh` | "Use system or auto-install" CLI wrapper (pinned, opt-out via `CIX_NO_AUTOINSTALL`) |
+| `scripts/session-start.sh` | One-time session reminder |
+| `scripts/grep-nudge.sh` | Exponential-backoff Grep nudge |
+| `scripts/lib-cix-probe.sh` | Shared `cix status` probe helpers (3-state verdict), sourced by the hooks |
+| `bin/cix` | Symlink to wrapper, exposed on `$PATH` while plugin enabled |
+
+## Cross-project workflow (experimental, manual-only)
+
+For tasks that touch more than the repo you're cd'd into, the plugin
+ships a second skill — **`cix-workspace`** — plus a dedicated
+**`cix-workspace-investigator`** sub-agent for parallel per-repo
+fan-out. **Neither auto-triggers.** You invoke them explicitly when
+you actually need them — typically with `/cix-workspace `.
+
+> *Why manual-only?* The workspace flow is heavier than single-repo
+> `cix search` (multi-repo fan-out, server-side clones, sub-agent
+> spawns) and only pays off when the task genuinely spans repos. We
+> don't want it firing on every request that vaguely mentions
+> "services". Load it deliberately, when you've decided cross-project
+> research is the right shape of work. This policy may change once
+> the heuristics around "is this really cross-project?" are more
+> reliable.
+
+The flow once you've invoked it:
+
+1. `cix-workspace` skill loads, structures the request around three
+ questions (which repos? what code? what changes?).
+2. Main agent runs a short, term-rich workspace search and reads the
+ `projects[]` panel.
+3. For each relevant repo, main agent spawns a `cix-workspace-investigator`
+ sub-agent with the task verbatim, the project_path, seed chunks
+ plus its own interpretive commentary on them, and an explicit
+ deliverable.
+4. Sub-agents run in parallel with isolated context. Main agent
+ synthesizes their reports.
+
+Requirements:
+
+- Configured cix server with **workspaces enabled**
+ (`CIX_WORKSPACES_ENABLED=true`).
+- At least one workspace containing the repos you're working across.
+
+See [`workspaces.md`](https://github.com/dvcdsys/code-index/blob/main/workspaces.md)
+in the parent project for setup details and the full search-algorithm
+reference.
+
+The skill body documents ten "trust rules" derived from internal
+calibration testing — how to read `chunk.score=0` (BM25-only literal
+match), when to drop down to per-project search, when adding a
+disambiguating token helps vs hurts, and so on. Load it via
+`/cix-workspace` when you need the full reference; it stays in
+context for the rest of the session.
+
+## Troubleshooting
+
+- **"cix: command not found" inside Claude Code Bash tool** — the
+ plugin isn't enabled or `bin/cix` isn't on `$PATH`. Run
+ `/plugin list` and `which cix` from inside a Claude Code session.
+- **Hooks not firing** — run Claude Code with `--debug` and look for
+ hook registration messages. Check `/Users/dvcdsys/.claude/...` (or
+ your local cache path) for the hook scripts and verify they're
+ executable: `ls -la $(claude plugin list ... | path)/scripts/`.
+- **Nudges feel too frequent / too rare** — edit the power-of-2 check
+ in `scripts/grep-nudge.sh` to your taste. The current schedule
+ (1, 2, 4, 8, 16, …) was chosen to balance "loud at start" with
+ "fade away".
+- **"This project has a cix semantic code index" never appears** —
+ the project must contain a `.cix/` directory. Run `/cix:init` first.
+- **Nudge does not fire on `grep` invoked via Bash** — the `PostToolUse`
+ matcher works on `tool_name`, not on the command string. The plugin
+ matches `Bash` explicitly and filters grep/rg/find/fd/ag/ack from
+ `tool_input.command` inside `grep-nudge.sh`. Confirm
+ `hooks/hooks.json` contains both `"matcher": "Grep|Glob"` and
+ `"matcher": "Bash"` entries; the regression in
+ `tests/manifest.bats` enforces this.
+
+## License
+
+MIT — same as the parent project.
diff --git a/plugins/cix/agents/cix-workspace-investigator.md b/plugins/cix/agents/cix-workspace-investigator.md
new file mode 100644
index 0000000..6295327
--- /dev/null
+++ b/plugins/cix/agents/cix-workspace-investigator.md
@@ -0,0 +1,115 @@
+---
+name: cix-workspace-investigator
+description: Read-only deep-dive of ONE repository inside a workspace fan-out task. Receives the user task + project_path + seed chunks (with the main agent's commentary on what to trust and what to question) + an explicit deliverable. Returns whatever the main agent asked for, in the format they asked for. Use only when the main session is running the cix-workspace skill workflow and has identified one or more cross-project repos to investigate in parallel. Do not use for: single-repo questions (use cix search directly), tasks not framed by the cix-workspace skill, anything that requires editing or running code.
+tools: Bash, Read, Grep
+model: inherit
+---
+
+# `cix-workspace-investigator`
+
+You investigate ONE repository as part of a larger cross-project workspace task.
+The main agent has full context about the user's goal; you only see what they
+passed to you in this single prompt.
+
+## Where your assigned project lives — read this FIRST
+
+The `project_path` (or `project_name`) the main agent passed you comes in one
+of two shapes. Behave very differently depending on which:
+
+- **Local working tree** — looks like `/Users/.../some-repo` or `~/code/foo`.
+ The repo exists on this machine. `Read`, `Grep`, `ls`, `cat` all work
+ against its files. You can still pass `-n ` to cix for
+ precision, but plain `cd && cix search …` also works.
+
+- **Remote-only cix project** — looks like `github.com//@`
+ (the form `cix list` shows for GitHub-attached projects). **The repo is
+ NOT on disk.** `find`, `ls -R`, `locate`, `Grep`, and `Read` will return
+ nothing useful — there's nothing to read locally. The cix server has the
+ files, chunks, and symbols; you reach them only through the `cix` CLI.
+
+**How to tell which you have:** run `cix list` once, then `grep` for the
+exact identifier the main agent gave you.
+
+```bash
+cix list | grep -F ""
+```
+
+- A line starting with `[✓] /` → local working tree.
+- A line starting with `[✓] github.com/` → remote-only.
+- No match → tell the main agent the project isn't indexed and stop.
+
+If the project is remote-only, **do not** waste calls on `find`, `ls -R`,
+`Grep`, or `Read`. They will silently return empty and look like you're
+making progress when you're not. Treat the cix CLI as your only window into
+the code.
+
+## Your tools
+
+You have a read-only toolkit for code investigation inside the assigned project:
+
+- **`cix search "" -n `** — semantic / hybrid lookups
+ *inside the assigned project*. **Always pass `-n `** (the
+ identifier from `cix list`); without it, cix searches whatever project
+ matches the current working directory — i.e. the main session's project,
+ not yours.
+- **`cix def -n `** — go-to-definition, scoped to
+ the assigned project. Same `-n` rule.
+- **`cix refs -n `** — find every usage, scoped.
+- **`cix symbols -n `** — symbol search, scoped.
+- **`cix summary -n `** — overview of languages, top dirs,
+ key symbols. Good first call to orient inside a remote-only project.
+- **Read** — open specific files. **Local projects only.** For remote-only
+ projects this returns nothing useful; rely on `cix search` chunk snippets
+ instead, and raise `--limit` if you need more context around a hit.
+- **Grep** — exact literal strings inside a **local** project. Not for
+ semantic search, not for remote-only projects.
+- **Bash** — for running the `cix` CLI itself. Do **not** use it to navigate
+ the filesystem hunting for the project (`find /`, `locate`, `ls -R ~`);
+ remote-only projects aren't there. Never mutate state.
+
+The cix index already covers this project — you don't need to (and can't)
+re-index.
+
+## Hard rules — non-negotiable
+
+1. **Stay inside the assigned project.** Every `cix` invocation MUST carry
+ `-n `. Without it, cix searches the cwd's project — that's
+ the main session's repo, not yours. Don't read or query other workspace
+ repos. If a finding requires looking elsewhere, surface it as an
+ uncertainty for the main agent to fan out further.
+2. **Never hunt the filesystem for a remote-only project.** No
+ `find /`, no `locate`, no `ls -R ~`, no recursive Grep across `/`.
+ If `cix list` shows the project as `github.com/…@…`, the files do
+ not exist on this machine — the cix server is the only source. Pretending
+ to search will burn tool calls and return nothing.
+3. **Read-only.** No `Write`, no `Edit`, no `git` mutations, no shell side
+ effects. If you see a bug, describe it — don't fix it.
+4. **No recursion.** Don't spawn further sub-agents. You are one level of
+ fan-out; the main agent handles synthesis.
+5. **Follow the main agent's instructions exactly.** Output format, depth,
+ word budget, and what to look for are the main agent's call — not yours.
+ If they ask for three bullets, give three bullets. If they ask for a
+ five-step trace, give that. Don't volunteer extra structure.
+6. **Report what you can't do.** If a file is missing, if `cix` returns
+ empty for a term that should exist, if a seed chunk doesn't match what
+ the main agent suggested, if the project is remote-only and chunks alone
+ don't carry enough context — say so explicitly. Don't fabricate findings
+ to fill a template, and don't quietly fall back to grep against the
+ wrong tree.
+
+## Output contract
+
+Return exactly what the main agent asked for, in exactly the format they
+asked for. The main agent already knows how to parse the response they
+requested. Don't add a preamble, don't add a meta-summary unless asked,
+don't restate the task back at them.
+
+If the request is ambiguous, pick the most-likely interpretation, execute it,
+and flag the ambiguity in one short line at the end.
+
+## What you are NOT
+
+You are not a generic code-explorer. You are not a planner. You are not a
+reviewer. You are a focused, read-only investigator for one repo, working
+under explicit per-call instructions from a main agent that already knows
+the workspace and the user.
diff --git a/plugins/cix/bin/cix b/plugins/cix/bin/cix
new file mode 120000
index 0000000..4263b5f
--- /dev/null
+++ b/plugins/cix/bin/cix
@@ -0,0 +1 @@
+../scripts/cix-wrapper.sh
\ No newline at end of file
diff --git a/plugins/cix/commands/def.md b/plugins/cix/commands/def.md
new file mode 100644
index 0000000..c53303e
--- /dev/null
+++ b/plugins/cix/commands/def.md
@@ -0,0 +1,15 @@
+---
+description: Find symbol definition(s) via cix — go-to-definition across the indexed codebase
+argument-hint: [--kind function|class|method|type] [--file ]
+allowed-tools: Bash(cix *)
+---
+
+Look up the definition of the symbol **$ARGUMENTS** in the cix index:
+
+```!
+cix definitions $ARGUMENTS
+```
+
+If multiple matches are returned, point out the most likely one based on
+context. If nothing is found, suggest `cix symbols $ARGUMENTS` for a
+broader name search.
diff --git a/plugins/cix/commands/init.md b/plugins/cix/commands/init.md
new file mode 100644
index 0000000..7fc49aa
--- /dev/null
+++ b/plugins/cix/commands/init.md
@@ -0,0 +1,17 @@
+---
+description: Initialize the cix index for the current project (registers, indexes, starts file watcher)
+allowed-tools: Bash(cix *)
+---
+
+Initialize the cix index for the current project. This registers the
+project with the cix server, performs a full initial index, and starts
+the file-watcher daemon for auto-reindex on changes.
+
+```!
+cix init
+```
+
+If the indexing run is in-progress, you can monitor it with `/cix:status`.
+If it fails, common causes are: cix-server not reachable, missing
+`CIX_API_KEY` env var, or `~/.cix/data` permission issues. Check
+`cix status` for details.
diff --git a/plugins/cix/commands/refs.md b/plugins/cix/commands/refs.md
new file mode 100644
index 0000000..a5e3adb
--- /dev/null
+++ b/plugins/cix/commands/refs.md
@@ -0,0 +1,14 @@
+---
+description: Find symbol references via cix — locate every usage of a symbol across the codebase
+argument-hint: [--file ] [--limit ]
+allowed-tools: Bash(cix *)
+---
+
+Find references to the symbol **$ARGUMENTS** in the cix index:
+
+```!
+cix references $ARGUMENTS
+```
+
+Group the references by file and call out any high-traffic call sites or
+suspicious usage patterns. If you need fewer results, add `--limit 20`.
diff --git a/plugins/cix/commands/search.md b/plugins/cix/commands/search.md
new file mode 100644
index 0000000..7e3c1c7
--- /dev/null
+++ b/plugins/cix/commands/search.md
@@ -0,0 +1,18 @@
+---
+description: Semantic code search via cix — find code by meaning, not by exact strings
+argument-hint:
+allowed-tools: Bash(cix *)
+---
+
+Run a semantic search through the cix index for the query: **$ARGUMENTS**
+
+```!
+cix search "$ARGUMENTS"
+```
+
+Summarize the most relevant matches above. If results look weak, try:
+- A more specific phrasing that names the area or symbol
+- `cix search "$ARGUMENTS" --min-score 0.2` to lower the relevance floor
+- `cix search "$ARGUMENTS" --in ` to narrow scope
+
+If `cix` is not yet initialized in this project, run `/cix:init` first.
diff --git a/plugins/cix/commands/status.md b/plugins/cix/commands/status.md
new file mode 100644
index 0000000..3b9326e
--- /dev/null
+++ b/plugins/cix/commands/status.md
@@ -0,0 +1,15 @@
+---
+description: Show cix indexing status and file-watcher state for the current project
+allowed-tools: Bash(cix *)
+---
+
+Show the current cix indexing status — last sync, number of indexed
+files, and whether the file watcher is active.
+
+```!
+cix status
+```
+
+If `Watcher: ✗ not running`, search results may be stale. Run
+`cix watch` to restart the auto-reindex daemon, or `cix reindex` for a
+one-off refresh.
diff --git a/plugins/cix/commands/summary.md b/plugins/cix/commands/summary.md
new file mode 100644
index 0000000..4d1b8b8
--- /dev/null
+++ b/plugins/cix/commands/summary.md
@@ -0,0 +1,16 @@
+---
+description: Show project overview from the cix index — languages, top directories, key symbols
+allowed-tools: Bash(cix *)
+---
+
+Print a project overview from the cix index — languages, file counts,
+top directories, and most-referenced symbols. Useful when starting work
+on an unfamiliar codebase.
+
+```!
+cix summary
+```
+
+Use this output to orient yourself before diving into specific
+subsystems. For deeper exploration, follow up with `cix search` on the
+top-level concepts you see here.
diff --git a/plugins/cix/hooks/hooks.json b/plugins/cix/hooks/hooks.json
new file mode 100644
index 0000000..12db2f4
--- /dev/null
+++ b/plugins/cix/hooks/hooks.json
@@ -0,0 +1,64 @@
+{
+ "hooks": {
+ "SessionStart": [
+ {
+ "hooks": [
+ {
+ "type": "command",
+ "command": "${CLAUDE_PLUGIN_ROOT}/scripts/session-start.sh"
+ }
+ ]
+ }
+ ],
+ "PostToolUse": [
+ {
+ "matcher": "Grep|Glob",
+ "hooks": [
+ {
+ "type": "command",
+ "command": "${CLAUDE_PLUGIN_ROOT}/scripts/grep-nudge.sh"
+ }
+ ]
+ },
+ {
+ "matcher": "Bash",
+ "hooks": [
+ {
+ "type": "command",
+ "command": "${CLAUDE_PLUGIN_ROOT}/scripts/grep-nudge.sh"
+ }
+ ]
+ }
+ ],
+ "CwdChanged": [
+ {
+ "hooks": [
+ {
+ "type": "command",
+ "command": "${CLAUDE_PLUGIN_ROOT}/scripts/cwd-changed.sh"
+ }
+ ]
+ }
+ ],
+ "PostCompact": [
+ {
+ "hooks": [
+ {
+ "type": "command",
+ "command": "${CLAUDE_PLUGIN_ROOT}/scripts/post-compact.sh"
+ }
+ ]
+ }
+ ],
+ "SessionEnd": [
+ {
+ "hooks": [
+ {
+ "type": "command",
+ "command": "${CLAUDE_PLUGIN_ROOT}/scripts/session-end.sh"
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/plugins/cix/scripts/cix-wrapper.sh b/plugins/cix/scripts/cix-wrapper.sh
new file mode 100755
index 0000000..7391521
--- /dev/null
+++ b/plugins/cix/scripts/cix-wrapper.sh
@@ -0,0 +1,143 @@
+#!/usr/bin/env bash
+# cix CLI wrapper for the Claude Code plugin.
+#
+# Strategy: "use system cix if available, else bootstrap install via the
+# official install.sh script". We do NOT bundle the binary in git or
+# maintain a separate cache — install.sh is the single source of truth.
+#
+# Resolution order:
+# 1. If `cix` is found anywhere in PATH (excluding our own dir),
+# exec it directly.
+# 2. Otherwise, run install.sh with --bin-dir=$HOME/.local/bin
+# (no sudo required), then exec the freshly installed binary.
+
+set -euo pipefail
+
+# Pinned CLI release used for auto-bootstrap. Both the install.sh script ref
+# AND the --version arg below are pinned to this tag so a fresh bootstrap is
+# reproducible and auditable — never silently tracking whatever is on `main`.
+# Bump this as part of CLI releases (see CONTRIBUTING.md / doc/RELEASES.md).
+CIX_PINNED_VERSION="cli/v0.5.0"
+
+# ── Optional stdout cap (CIX_MAX_OUTPUT_LINES) ────────────────────────────────
+# When CIX_MAX_OUTPUT_LINES is set to a positive integer, cap stdout to that
+# many lines and append a one-line truncation notice (on stdout, so the cap is
+# observable through a pipe). Unset/empty/0/non-numeric → behave EXACTLY like a
+# bare cix call: exec the binary directly, full output, streamed, original exit
+# code, zero overhead. stderr always passes through uncapped so errors are
+# never hidden. The cap is layered on top of any user `--limit N` flag, not a
+# replacement for it.
+run_cix() {
+ local bin="$1"
+ shift
+
+ case "${CIX_MAX_OUTPUT_LINES:-}" in
+ ''|*[!0-9]*|0)
+ exec "$bin" "$@"
+ ;;
+ esac
+
+ local max="$CIX_MAX_OUTPUT_LINES"
+ local out rc
+ if out="$("$bin" "$@")"; then rc=0; else rc=$?; fi
+
+ local total
+ total=$(printf '%s\n' "$out" | wc -l | tr -d '[:space:]')
+ if [ "$total" -gt "$max" ]; then
+ printf '%s\n' "$out" | head -n "$max" || true
+ printf '… [cix output truncated to %s of %s lines; unset CIX_MAX_OUTPUT_LINES for full output]\n' "$max" "$total"
+ else
+ printf '%s\n' "$out"
+ fi
+ exit "$rc"
+}
+
+# ── Resolve our own directory (real path, dereferencing symlinks) ─────────────
+# bin/cix is a symlink to ../scripts/cix-wrapper.sh, so BASH_SOURCE points to
+# the real script under scripts/, not the symlink under bin/. We need the
+# directory of the symlink (which is what's actually on PATH) — derive it
+# from $0 instead, which preserves the invocation path.
+
+if [ -n "${0:-}" ] && [ "${0:0:1}" = "/" ]; then
+ INVOKED_PATH="$0"
+else
+ # When called as bare `cix` via PATH, $0 is just "cix" — fall back to
+ # which/command -v to find ourselves.
+ INVOKED_PATH="$(command -v "$0" 2>/dev/null || echo "$0")"
+fi
+
+SELF_DIR="$(cd "$(dirname "$INVOKED_PATH")" 2>/dev/null && pwd 2>/dev/null || echo "")"
+
+# ── Look for a cix binary elsewhere in PATH ───────────────────────────────────
+# Build a "safe PATH" that excludes our own directory so command -v doesn't
+# find us recursively.
+
+SYS_CIX=""
+if [ -n "$SELF_DIR" ]; then
+ SAFE_PATH=""
+ OLD_IFS="$IFS"
+ IFS=':'
+ # shellcheck disable=SC2086
+ for dir in $PATH; do
+ [ -z "$dir" ] && continue
+ DIR_REAL="$(cd "$dir" 2>/dev/null && pwd 2>/dev/null || echo "$dir")"
+ if [ "$DIR_REAL" != "$SELF_DIR" ]; then
+ SAFE_PATH="${SAFE_PATH:+$SAFE_PATH:}$dir"
+ fi
+ done
+ IFS="$OLD_IFS"
+ SYS_CIX="$(PATH="$SAFE_PATH" command -v cix 2>/dev/null || true)"
+else
+ SYS_CIX="$(command -v cix 2>/dev/null || true)"
+fi
+
+if [ -n "$SYS_CIX" ]; then
+ run_cix "$SYS_CIX" "$@"
+fi
+
+# ── Bootstrap install via install.sh (one-time) ───────────────────────────────
+TARGET="${CIX_PLUGIN_BIN_DIR:-$HOME/.local/bin}"
+CACHED_CIX="$TARGET/cix"
+
+if [ ! -x "$CACHED_CIX" ]; then
+ # Opt-out for corp / regulated / air-gapped environments: refuse to reach
+ # out to the network. Fail cleanly with manual instructions instead of
+ # silently fetching install.sh.
+ if [ -n "${CIX_NO_AUTOINSTALL:-}" ] && [ "${CIX_NO_AUTOINSTALL}" != "0" ]; then
+ echo "Error: cix is not installed and CIX_NO_AUTOINSTALL is set — refusing to auto-install." >&2
+ echo "Install cix manually (pinned to ${CIX_PINNED_VERSION}):" >&2
+ echo " curl -fsSL https://raw.githubusercontent.com/dvcdsys/code-index/${CIX_PINNED_VERSION}/install.sh | bash -s -- --version ${CIX_PINNED_VERSION}" >&2
+ echo "Or set CIX_PLUGIN_BIN_DIR to a directory that already contains cix." >&2
+ exit 1
+ fi
+
+ if ! command -v curl >/dev/null 2>&1; then
+ echo "Error: cix is not installed and curl is not available to bootstrap it." >&2
+ echo "Install cix manually: https://github.com/dvcdsys/code-index" >&2
+ exit 1
+ fi
+
+ mkdir -p "$TARGET"
+ echo "cix CLI not found — installing ${CIX_PINNED_VERSION} to $TARGET (one-time, no sudo)..." >&2
+
+ # Official install script, pinned to the release tag for reproducibility:
+ # the script ref AND the installed binary version are both fixed to
+ # $CIX_PINNED_VERSION. Bump the constant at the top of this file on CLI
+ # releases.
+ INSTALL_URL="https://raw.githubusercontent.com/dvcdsys/code-index/${CIX_PINNED_VERSION}/install.sh"
+
+ if ! curl -fsSL "$INSTALL_URL" | bash -s -- --bin-dir "$TARGET" --version "$CIX_PINNED_VERSION"; then
+ echo "Error: cix install failed. Check network connectivity and try again." >&2
+ echo "You can install manually: curl -fsSL $INSTALL_URL | bash -s -- --version ${CIX_PINNED_VERSION}" >&2
+ exit 1
+ fi
+
+ if [ ! -x "$CACHED_CIX" ]; then
+ echo "Error: install.sh ran but $CACHED_CIX was not created." >&2
+ exit 1
+ fi
+
+ echo "cix installed successfully at $CACHED_CIX" >&2
+fi
+
+run_cix "$CACHED_CIX" "$@"
diff --git a/plugins/cix/scripts/cwd-changed.sh b/plugins/cix/scripts/cwd-changed.sh
new file mode 100755
index 0000000..a76df99
--- /dev/null
+++ b/plugins/cix/scripts/cwd-changed.sh
@@ -0,0 +1,70 @@
+#!/usr/bin/env bash
+# CwdChanged hook for the cix plugin.
+#
+# Behavior: when Claude changes working directory mid-session (e.g. via
+# `cd`), evaluate cix-awareness for the new directory and cache the
+# verdict. If we already have a verdict for this (session, project_dir)
+# pair, this is a no-op — Claude probably came back to a project we
+# already evaluated.
+#
+# Why no reminder injection: PreToolUse(Grep|Glob) handles the
+# "first nudge in a fresh project" case via its per-project backoff
+# counter (call #1 in a new project always fires). Re-inject a SessionStart
+# reminder on every `cd` would be noisy if Claude bounces between
+# directories.
+#
+# Behavior matrix:
+# Cache exists for (session, NEW_DIR) → no-op (we know already)
+# Cache absent + cix status exit 0 → write "1" (cix-aware)
+# Cache absent + cix status exit ≠ 0 → write "0" (silent for this dir)
+# Cache absent + cix CLI not found → write "0"
+# Cache absent + cix status timeout → write "unknown" (grep-nudge re-probes)
+
+set -euo pipefail
+
+# Shared probe helpers (cix_resolve_bin, cix_probe_verdict).
+# shellcheck source=lib-cix-probe.sh
+. "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/lib-cix-probe.sh"
+
+INPUT=$(cat 2>/dev/null || echo "{}")
+if command -v jq >/dev/null 2>&1; then
+ SESSION_ID=$(printf '%s' "$INPUT" | jq -r '.session_id // empty' 2>/dev/null || echo "")
+else
+ SESSION_ID=$(printf '%s' "$INPUT" | sed -n 's/.*"session_id"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/p' | head -1)
+fi
+
+[ -z "$SESSION_ID" ] && exit 0
+
+CACHE_DIR="${CLAUDE_PLUGIN_DATA:-/tmp}"
+mkdir -p "$CACHE_DIR" 2>/dev/null || CACHE_DIR="/tmp"
+
+PROJECT_DIR="${CLAUDE_PROJECT_DIR:-$(pwd)}"
+
+DIR_HASH=$(printf '%s' "$PROJECT_DIR" | shasum -a 256 2>/dev/null | cut -c1-8)
+if [ -z "$DIR_HASH" ]; then
+ DIR_HASH=$(printf '%s' "$PROJECT_DIR" | tr -c 'a-zA-Z0-9' '-' | tail -c 16)
+fi
+
+CACHE_FILE="$CACHE_DIR/cix-aware-$SESSION_ID-$DIR_HASH"
+
+# ── Already evaluated this (session, project) — no-op ─────────────────────────
+if [ -f "$CACHE_FILE" ]; then
+ exit 0
+fi
+
+# ── Resolve cix binary ────────────────────────────────────────────────────────
+CIX_BIN="$(cix_resolve_bin)"
+
+if [ -z "$CIX_BIN" ]; then
+ printf '0' > "$CACHE_FILE"
+ exit 0
+fi
+
+# ── Probe cix status (2s timeout) → three-state verdict ───────────────────────
+VERDICT="$(cix_probe_verdict "$CIX_BIN" "$PROJECT_DIR" 2)"
+# "1" cix-aware · "0" not indexed · "unknown" timed out (grep-nudge re-probes).
+printf '%s' "$VERDICT" > "$CACHE_FILE"
+
+# Silent — no context injection. PostToolUse(Grep|Glob|Bash) handles the
+# first-Grep-in-new-project nudge through its own backoff counter.
+exit 0
diff --git a/plugins/cix/scripts/grep-nudge.sh b/plugins/cix/scripts/grep-nudge.sh
new file mode 100755
index 0000000..a7a6a8c
--- /dev/null
+++ b/plugins/cix/scripts/grep-nudge.sh
@@ -0,0 +1,173 @@
+#!/usr/bin/env bash
+# PostToolUse(Grep|Glob|Bash) hook for the cix plugin.
+#
+# Behavior: if SessionStart (or CwdChanged) concluded the current
+# project is cix-indexed (cache file for this (session, project_dir)
+# pair contains "1"), occasionally inject a system reminder pointing
+# toward `cix search` instead of grep. Otherwise stay silent.
+#
+# Wired on PostToolUse (not PreToolUse) because current Claude Code
+# only surfaces hookSpecificOutput.additionalContext for PostToolUse,
+# UserPromptSubmit, and SessionStart — PreToolUse output without an
+# explicit permissionDecision goes nowhere visible to the model.
+# Firing after the grep means the model gets the nudge in time for
+# the NEXT decision; behaviorally equivalent for an advisory hook.
+#
+# Bash is matched in addition to Grep/Glob because real-session usage
+# of `grep`/`rg`/`find`/`fd`/`ag`/`ack` happens through the Bash tool
+# (pipelines, `| head`, `cd && grep …`). The Bash branch inspects
+# tool_input.command and only proceeds when it looks like a grep- or
+# find-family call; other Bash (ls, git status, make, go test) is
+# fully silent and does not even increment the backoff counter.
+#
+# `find`/`fd` are included because the agent often uses them to locate
+# files by name pattern when a cix symbol/semantic query would be faster
+# — `find . -name '*Auth*'` vs `cix search "auth"` / `cix def Auth`.
+# `ag` (the_silver_searcher) and `ack` are grep alternatives covered
+# for the same reason.
+#
+# Cache states (written by SessionStart / CwdChanged): "1" indexed,
+# "0" definitively not indexed, "unknown" cix status timed out at start.
+# This hook normally relies on that cache and does NOT call cix. The ONE
+# exception is "unknown": that means SessionStart couldn't reach the
+# server (slow/down). On the next Grep we re-probe `cix status` once
+# (short timeout) and upgrade the cache to "0"/"1" — so a server that
+# was down at session start but came up later still gets nudges, instead
+# of being silenced for the whole session. A definitive "0" is NOT
+# re-probed (the server answered "not indexed"; respect that).
+#
+# Per-(session, project) backoff: each project Claude visits has its
+# own exponential-backoff counter. A new `cd` into a fresh project
+# starts the backoff from scratch (call #1 → nudge), so the first Grep
+# in a new cix-aware project always gets a reminder.
+#
+# Throttling: exponential backoff. Reminders fire on the 1st, 2nd, 4th,
+# 8th, 16th, 32nd, 64th, ... Grep/Glob invocation in the current
+# project. ~7 reminders per 100-Grep span, loud at the start, fading
+# as the model "learns" the workflow.
+
+set -euo pipefail
+
+# Shared probe helpers (cix_resolve_bin, cix_probe_verdict) — used only for
+# the "unknown" re-probe path below.
+# shellcheck source=lib-cix-probe.sh
+. "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/lib-cix-probe.sh"
+
+INPUT=$(cat 2>/dev/null || echo "{}")
+if command -v jq >/dev/null 2>&1; then
+ SESSION_ID=$(printf '%s' "$INPUT" | jq -r '.session_id // empty' 2>/dev/null || echo "")
+else
+ SESSION_ID=$(printf '%s' "$INPUT" | sed -n 's/.*"session_id"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/p' | head -1)
+fi
+
+# No session_id → can't read the SessionStart cache. Stay silent.
+[ -z "$SESSION_ID" ] && exit 0
+
+# ── Gate by tool_name + command shape ─────────────────────────────────────────
+# For Grep/Glob the intent is unambiguous — always proceed (still subject to
+# the cache check and exponential backoff below). For Bash we additionally
+# inspect tool_input.command and only proceed when it looks like a grep- or
+# find-family command; other Bash exits silently here WITHOUT bumping the
+# backoff counter, so ls/git status/make/etc. stay invisible.
+#
+# Without jq the Bash branch falls through to "silent": parsing shell commands
+# out of a JSON blob with sed invites false positives, and silent is safer than
+# nudging on every Bash call.
+TOOL_NAME=""
+if command -v jq >/dev/null 2>&1; then
+ TOOL_NAME=$(printf '%s' "$INPUT" | jq -r '.tool_name // empty' 2>/dev/null || echo "")
+fi
+
+case "$TOOL_NAME" in
+ Grep|Glob)
+ : # always proceed
+ ;;
+ Bash)
+ TOOL_CMD=""
+ if command -v jq >/dev/null 2>&1; then
+ TOOL_CMD=$(printf '%s' "$INPUT" | jq -r '.tool_input.command // empty' 2>/dev/null || echo "")
+ fi
+ # Match grep/egrep/fgrep/rg/ripgrep/find/fd/ag/ack as a standalone
+ # token. Anchors: start-of-string, whitespace, `|`, `;`, `&`,
+ # backtick, `(`. The regex rejects `git grep` (subcommand after
+ # `git`, not a standalone shell `grep`) and substring hits like
+ # `grepl`, `egrep_helper`, `findme`, `agent`, `package`, `$ag`.
+ # fd, ag, ack are short names with collision potential; the
+ # boundary anchors keep them safe (e.g. `$ag`, `myag --help`,
+ # `agent run`, `pack list`, `addr show` all stay silent).
+ if ! [[ "$TOOL_CMD" =~ (^|[[:space:]\|\&\;\`\(])(grep|egrep|fgrep|rg|ripgrep|find|fd|ag|ack)([[:space:]]|$) ]]; then
+ exit 0
+ fi
+ ;;
+ *)
+ # Unknown tool_name, or no jq available → silent.
+ exit 0
+ ;;
+esac
+
+CACHE_DIR="${CLAUDE_PLUGIN_DATA:-/tmp}"
+PROJECT_DIR="${CLAUDE_PROJECT_DIR:-$(pwd)}"
+
+# Compute per-project hash — same algorithm as session-start.sh.
+DIR_HASH=$(printf '%s' "$PROJECT_DIR" | shasum -a 256 2>/dev/null | cut -c1-8)
+if [ -z "$DIR_HASH" ]; then
+ DIR_HASH=$(printf '%s' "$PROJECT_DIR" | tr -c 'a-zA-Z0-9' '-' | tail -c 16)
+fi
+
+# ── Read SessionStart's verdict for THIS project ──────────────────────────────
+# Policy: only "1" allows nudges. Missing file or "0" → silent. "unknown"
+# (SessionStart timed out) → re-probe once and upgrade the cache.
+CACHE_FILE="$CACHE_DIR/cix-aware-$SESSION_ID-$DIR_HASH"
+
+if [ ! -f "$CACHE_FILE" ]; then
+ exit 0
+fi
+
+VERDICT="$(cat "$CACHE_FILE" 2>/dev/null || echo "")"
+
+if [ "$VERDICT" = "unknown" ]; then
+ # Server was unreachable at session start. Re-probe once now (short
+ # timeout) and persist the fresh verdict so future calls short-circuit.
+ CIX_BIN="$(cix_resolve_bin)"
+ if [ -n "$CIX_BIN" ]; then
+ VERDICT="$(cix_probe_verdict "$CIX_BIN" "$PROJECT_DIR" 2)"
+ # Only persist a DEFINITIVE result; if still "unknown", leave the
+ # cache as-is so the next Grep re-probes again (cheap, converges).
+ if [ "$VERDICT" != "unknown" ]; then
+ printf '%s' "$VERDICT" > "$CACHE_FILE" 2>/dev/null || true
+ fi
+ else
+ VERDICT="0"
+ fi
+fi
+
+if [ "$VERDICT" != "1" ]; then
+ exit 0
+fi
+
+# ── Increment per-(session, project) counter ──────────────────────────────────
+COUNTER_FILE="$CACHE_DIR/cix-grep-count-$SESSION_ID-$DIR_HASH"
+COUNT=$(cat "$COUNTER_FILE" 2>/dev/null || echo 0)
+case "$COUNT" in
+ ''|*[!0-9]*) COUNT=0 ;;
+esac
+COUNT=$((COUNT + 1))
+printf '%d' "$COUNT" > "$COUNTER_FILE"
+
+# Power-of-2 check: COUNT & (COUNT - 1) == 0 means COUNT is 1, 2, 4, 8, ...
+if [ "$((COUNT & (COUNT - 1)))" -ne 0 ]; then
+ exit 0
+fi
+
+# ── Emit nudge ────────────────────────────────────────────────────────────────
+MESSAGE="💡 You just ran a file/text search in this project (call #$COUNT this session). This project has a cix semantic index — next time, for queries by meaning (find by concept, cross-file lookups, symbol navigation, locating files by symbol name), the CLI commands \`cix search\` / \`cix def\` / \`cix refs\` outperform grep/find. grep and find are best for exact strings or filename patterns (error messages, config keys, import paths, glob extensions). Recommended to activate /cix SKILL to use cix effectively"
+
+if command -v jq >/dev/null 2>&1; then
+ jq -n --arg msg "$MESSAGE" \
+ '{hookSpecificOutput: {hookEventName: "PostToolUse", additionalContext: $msg}}'
+else
+ ESC=$(printf '%s' "$MESSAGE" | sed 's/\\/\\\\/g; s/"/\\"/g' | tr '\n' ' ')
+ printf '{"hookSpecificOutput":{"hookEventName":"PostToolUse","additionalContext":"%s"}}\n' "$ESC"
+fi
+
+exit 0
diff --git a/plugins/cix/scripts/lib-cix-probe.sh b/plugins/cix/scripts/lib-cix-probe.sh
new file mode 100644
index 0000000..a21abec
--- /dev/null
+++ b/plugins/cix/scripts/lib-cix-probe.sh
@@ -0,0 +1,80 @@
+#!/usr/bin/env bash
+# Shared cix-status probe helpers for the cix plugin hooks.
+#
+# Sourced (not executed) by session-start.sh, cwd-changed.sh, and
+# grep-nudge.sh. Defines functions only — no side effects on source.
+#
+# The three-state verdict ("1" / "0" / "unknown") is the contract these
+# hooks share via the cache file $CLAUDE_PLUGIN_DATA/cix-aware--:
+# "1" — project is indexed; nudges allowed
+# "0" — definitively not indexed / not registered; stay silent
+# "unknown" — couldn't determine (cix status timed out / server slow);
+# a later grep-nudge re-probes and upgrades this to 0 or 1
+#
+# Why "unknown" matters: previously a timeout at SessionStart wrote "0",
+# which silenced the nudge for the ENTIRE session even if the cix server
+# came up seconds later. "unknown" lets the next Grep re-probe and recover.
+
+# cix_resolve_bin — echo a usable cix binary path, or empty string.
+# Prefers the plugin-bundled wrapper so behavior matches the slash commands.
+cix_resolve_bin() {
+ if [ -x "${CLAUDE_PLUGIN_ROOT:-}/bin/cix" ]; then
+ printf '%s' "${CLAUDE_PLUGIN_ROOT}/bin/cix"
+ elif command -v cix >/dev/null 2>&1; then
+ command -v cix
+ fi
+}
+
+# cix_probe_verdict [timeout_secs]
+# Runs `cix status -p ` under a timeout and echoes a verdict:
+# "1" — exit 0 (indexed)
+# "0" — clean nonzero exit (not indexed / not registered)
+# "unknown" — had to be killed (timed out)
+# Uses timeout(1) / gtimeout(1) when available (coreutils); otherwise a
+# pure-bash poll loop (macOS without coreutils). The poll loop hard-kills
+# with SIGKILL, matching the prior hook behavior.
+cix_probe_verdict() {
+ local cix_bin="$1" project_dir="$2" secs="${3:-2}"
+ local rc=0
+
+ if command -v timeout >/dev/null 2>&1; then
+ timeout "$secs" "$cix_bin" status -p "$project_dir" >/dev/null 2>&1 || rc=$?
+ [ "$rc" = "124" ] && { printf 'unknown'; return 0; }
+ elif command -v gtimeout >/dev/null 2>&1; then
+ gtimeout "$secs" "$cix_bin" status -p "$project_dir" >/dev/null 2>&1 || rc=$?
+ [ "$rc" = "124" ] && { printf 'unknown'; return 0; }
+ else
+ # Pure-bash fallback: background the call and poll in 0.1s steps.
+ local exit_file pid slept iters
+ exit_file="$(mktemp 2>/dev/null || printf '/tmp/cix-probe-%s.exit' "$$")"
+ (
+ "$cix_bin" status -p "$project_dir" >/dev/null 2>&1
+ echo "$?" > "$exit_file" 2>/dev/null
+ ) &
+ pid=$!
+ slept=0
+ iters=$((secs * 10))
+ while kill -0 "$pid" 2>/dev/null && [ "$slept" -lt "$iters" ]; do
+ sleep 0.1
+ slept=$((slept + 1))
+ done
+ if kill -0 "$pid" 2>/dev/null; then
+ kill -9 "$pid" 2>/dev/null || true
+ wait "$pid" 2>/dev/null || true
+ rm -f "$exit_file"
+ printf 'unknown'
+ return 0
+ fi
+ wait "$pid" 2>/dev/null || true
+ rc=1
+ [ -f "$exit_file" ] && rc=$(cat "$exit_file" 2>/dev/null || echo 1)
+ rm -f "$exit_file"
+ case "$rc" in ''|*[!0-9]*) rc=1 ;; esac
+ fi
+
+ if [ "$rc" = "0" ]; then
+ printf '1'
+ else
+ printf '0'
+ fi
+}
diff --git a/plugins/cix/scripts/post-compact.sh b/plugins/cix/scripts/post-compact.sh
new file mode 100755
index 0000000..553fa4b
--- /dev/null
+++ b/plugins/cix/scripts/post-compact.sh
@@ -0,0 +1,62 @@
+#!/usr/bin/env bash
+# PostCompact hook for the cix plugin.
+#
+# Behavior: after Claude Code compacts the conversation, re-inject the
+# SessionStart reminder if this (session, project) is cix-aware.
+#
+# Why this matters: skill bodies survive auto-compaction (Claude Code
+# re-attaches them with up to 5K tokens per skill, see
+# https://code.claude.com/docs/en/skills#skill-content-lifecycle).
+# But the SessionStart `additionalContext` reminder — and PreToolUse
+# nudges — are NOT skills. They live as regular tool result messages
+# and are dropped/summarised during compaction.
+#
+# In long sessions (8+ hours of work) where the cix skill hasn't been
+# invoked yet, the model may "forget" cix exists after compaction.
+# Re-injecting the same one-line reminder keeps cix-awareness alive.
+#
+# This is a no-op if SessionStart concluded the project is not indexed
+# (cache=0) or if no verdict exists yet.
+
+set -euo pipefail
+
+INPUT=$(cat 2>/dev/null || echo "{}")
+if command -v jq >/dev/null 2>&1; then
+ SESSION_ID=$(printf '%s' "$INPUT" | jq -r '.session_id // empty' 2>/dev/null || echo "")
+else
+ SESSION_ID=$(printf '%s' "$INPUT" | sed -n 's/.*"session_id"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/p' | head -1)
+fi
+
+[ -z "$SESSION_ID" ] && exit 0
+
+CACHE_DIR="${CLAUDE_PLUGIN_DATA:-/tmp}"
+PROJECT_DIR="${CLAUDE_PROJECT_DIR:-$(pwd)}"
+
+DIR_HASH=$(printf '%s' "$PROJECT_DIR" | shasum -a 256 2>/dev/null | cut -c1-8)
+if [ -z "$DIR_HASH" ]; then
+ DIR_HASH=$(printf '%s' "$PROJECT_DIR" | tr -c 'a-zA-Z0-9' '-' | tail -c 16)
+fi
+
+CACHE_FILE="$CACHE_DIR/cix-aware-$SESSION_ID-$DIR_HASH"
+
+# ── Read verdict ──────────────────────────────────────────────────────────────
+# Strict policy mirrors grep-nudge.sh: only "1" triggers re-injection.
+if [ ! -f "$CACHE_FILE" ]; then
+ exit 0
+fi
+if [ "$(cat "$CACHE_FILE" 2>/dev/null)" != "1" ]; then
+ exit 0
+fi
+
+# ── Re-inject the SessionStart reminder ───────────────────────────────────────
+MESSAGE='💡 (Post-compact reminder) This project has a cix semantic code index. For semantic queries — finding code by meaning, cross-file lookups, symbol navigation, "where is X used", "how does Y work" — use the CLI: `cix search`, `cix def`, `cix refs` (via Bash). Activate the /cix SKILL for guidance. Use Grep only for exact strings (error messages, config keys, import paths).'
+
+if command -v jq >/dev/null 2>&1; then
+ jq -n --arg msg "$MESSAGE" \
+ '{hookSpecificOutput: {hookEventName: "PostCompact", additionalContext: $msg}}'
+else
+ ESC=$(printf '%s' "$MESSAGE" | sed 's/\\/\\\\/g; s/"/\\"/g' | tr '\n' ' ')
+ printf '{"hookSpecificOutput":{"hookEventName":"PostCompact","additionalContext":"%s"}}\n' "$ESC"
+fi
+
+exit 0
diff --git a/plugins/cix/scripts/session-end.sh b/plugins/cix/scripts/session-end.sh
new file mode 100755
index 0000000..d400a37
--- /dev/null
+++ b/plugins/cix/scripts/session-end.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+# SessionEnd hook for the cix plugin.
+#
+# Behavior: when the Claude Code session terminates, remove every
+# cache file belonging to this session from $CLAUDE_PLUGIN_DATA.
+# A single session may have visited multiple projects (via `cd`), so
+# we glob-delete by session_id prefix. Cleanup is best-effort:
+# SessionEnd may not fire if the process was killed forcibly (kill -9,
+# OOM, panic) — session-start.sh also runs a 30-day GC sweep as a
+# safety net.
+#
+# Files removed (per session_id, all directory hashes):
+# $CLAUDE_PLUGIN_DATA/cix-aware-$SESSION_ID-* (verdict caches)
+# $CLAUDE_PLUGIN_DATA/cix-grep-count-$SESSION_ID-* (backoff counters)
+#
+# Output: nothing. Failures are silently ignored.
+
+set -euo pipefail
+
+INPUT=$(cat 2>/dev/null || echo "{}")
+if command -v jq >/dev/null 2>&1; then
+ SESSION_ID=$(printf '%s' "$INPUT" | jq -r '.session_id // empty' 2>/dev/null || echo "")
+else
+ SESSION_ID=$(printf '%s' "$INPUT" | sed -n 's/.*"session_id"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/p' | head -1)
+fi
+
+# Without a session_id we don't know what to clean. Exit cleanly.
+[ -z "$SESSION_ID" ] && exit 0
+
+CACHE_DIR="${CLAUDE_PLUGIN_DATA:-/tmp}"
+[ -d "$CACHE_DIR" ] || exit 0
+
+# Glob-delete every per-(session, dir) marker for this session.
+#
+# Safety is enforced by the find filters, not by where the cache dir is:
+# -maxdepth 1 — never recurse into subdirectories
+# -type f — files only (skips dirs and symlinks)
+# -name 'cix-aware-$SESSION_ID-*' — exact prefix + this session_id
+# -name 'cix-grep-count-$SESSION_ID-*' — exact prefix + this session_id
+#
+# $SESSION_ID is a UUID assigned by Claude Code, so the patterns
+# practically cannot match anything but our own marker files even in
+# unusual cache-dir locations.
+#
+# We never use `rm -rf` and never recurse — there's no path on which
+# this script could touch a file that doesn't already match the strict
+# name pattern.
+find "$CACHE_DIR" -maxdepth 1 -type f \
+ \( -name "cix-aware-$SESSION_ID-*" -o -name "cix-grep-count-$SESSION_ID-*" \) \
+ -delete 2>/dev/null || true
+
+exit 0
diff --git a/plugins/cix/scripts/session-start.sh b/plugins/cix/scripts/session-start.sh
new file mode 100755
index 0000000..ff810e0
--- /dev/null
+++ b/plugins/cix/scripts/session-start.sh
@@ -0,0 +1,133 @@
+#!/usr/bin/env bash
+# SessionStart hook for the cix plugin.
+#
+# Behavior: at session start, ask `cix status` whether the current
+# project is indexed. The result is cached for the (session, project)
+# pair in $CLAUDE_PLUGIN_DATA/cix-aware-$SESSION_ID-$DIR_HASH so the
+# PreToolUse hook can short-circuit without re-querying the server.
+#
+# Cache key includes a hash of the project directory, so a single
+# session that traverses multiple projects (via `cd`, see CwdChanged
+# hook) keeps a separate verdict per project — fresh backoff counter
+# per project, correct cix-aware state per directory.
+#
+# State location: $CLAUDE_PLUGIN_DATA is plugin-persistent storage
+# managed by Claude Code (resolves to ~/.claude/plugins/data//).
+# It survives plugin updates and is NOT periodically cleaned by the OS,
+# unlike /tmp (macOS daily cleanup of 3-day-old files; Linux on reboot).
+# Falls back to /tmp only when run outside a plugin context (tests).
+#
+# Decision contract (read by grep-nudge.sh, post-compact.sh):
+# File present with content "1" → project is indexed, nudge allowed
+# File present with content "0" → not indexed, nudge MUST stay silent
+# File present with content "unknown" → cix status timed out; grep-nudge
+# re-probes and upgrades to 0 or 1
+# File absent → no verdict yet, nudge stays silent
+#
+# Why "unknown" instead of "0" on timeout: a slow/unreachable server at
+# session start used to write "0", which silenced nudges for the WHOLE
+# session even if the server came up moments later. "unknown" lets the
+# next Grep re-probe (see grep-nudge.sh) and recover.
+#
+# Why grep-nudge still won't fabricate nudges from "0": if cix status
+# completed and said "not indexed" (project not registered, etc.), the
+# user should NOT see Grep nudges suggesting `cix search` — that's a
+# definitive negative, not a transient one.
+
+set -euo pipefail
+
+# Shared probe helpers (cix_resolve_bin, cix_probe_verdict).
+# shellcheck source=lib-cix-probe.sh
+. "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/lib-cix-probe.sh"
+
+# ── Read session_id from stdin JSON ───────────────────────────────────────────
+INPUT=$(cat 2>/dev/null || echo "{}")
+if command -v jq >/dev/null 2>&1; then
+ SESSION_ID=$(printf '%s' "$INPUT" | jq -r '.session_id // empty' 2>/dev/null || echo "")
+else
+ SESSION_ID=$(printf '%s' "$INPUT" | sed -n 's/.*"session_id"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/p' | head -1)
+fi
+
+# Without a session_id we can't write a session-scoped marker. Stay silent.
+if [ -z "$SESSION_ID" ]; then
+ exit 0
+fi
+
+# ── Resolve cache directory ───────────────────────────────────────────────────
+# Prefer plugin-persistent storage; fall back to /tmp for ad-hoc/test invocations.
+# We do NOT whitelist parent paths — users can have non-standard layouts
+# (custom $CLAUDE_PLUGIN_DATA, XDG dirs, corporate setups). Safety comes
+# from the file-level checks below: -maxdepth 1, -type f, exact -name
+# patterns matching only our session-id-prefixed markers.
+CACHE_DIR="${CLAUDE_PLUGIN_DATA:-/tmp}"
+mkdir -p "$CACHE_DIR" 2>/dev/null || CACHE_DIR="/tmp"
+[ -d "$CACHE_DIR" ] || exit 0
+
+PROJECT_DIR="${CLAUDE_PROJECT_DIR:-$(pwd)}"
+
+# Hash the project dir so the cache file name is short and stable.
+# `shasum -a 256` exists on both macOS (Perl-based) and Linux (coreutils).
+DIR_HASH=$(printf '%s' "$PROJECT_DIR" | shasum -a 256 2>/dev/null | cut -c1-8)
+if [ -z "$DIR_HASH" ]; then
+ # shasum unavailable; fall back to a path-derived suffix.
+ DIR_HASH=$(printf '%s' "$PROJECT_DIR" | tr -c 'a-zA-Z0-9' '-' | tail -c 16)
+fi
+
+CACHE_FILE="$CACHE_DIR/cix-aware-$SESSION_ID-$DIR_HASH"
+
+# ── Light maintenance: clear markers older than 30 days ───────────────────────
+# Long-running Claude Code installs would accumulate one-byte markers
+# otherwise. Cheap, runs once per session. Failures ignored.
+#
+# Safety constraints on the find:
+# -maxdepth 1 — never recurse into subdirectories
+# -type f — files only (skips dirs, symlinks)
+# -name 'cix-aware-*' OR
+# -name 'cix-grep-count-*' — exact prefix match on our marker names
+# -mtime +30 — older than 30 days
+#
+# A file outside this prefix is invisible to find — it's never even
+# considered for deletion, regardless of how the cache dir is configured.
+find "$CACHE_DIR" -maxdepth 1 -type f \
+ \( -name 'cix-aware-*' -o -name 'cix-grep-count-*' \) \
+ -mtime +30 -delete 2>/dev/null || true
+
+# ── Resolve a working `cix` binary ────────────────────────────────────────────
+CIX_BIN="$(cix_resolve_bin)"
+
+if [ -z "$CIX_BIN" ]; then
+ # CLI not yet installed (would auto-bootstrap on first call). Mark off.
+ printf '0' > "$CACHE_FILE"
+ exit 0
+fi
+
+# ── Probe `cix status` (2s timeout) → three-state verdict ─────────────────────
+VERDICT="$(cix_probe_verdict "$CIX_BIN" "$PROJECT_DIR" 2)"
+
+if [ "$VERDICT" = "unknown" ]; then
+ # Timed out — record "unknown" so grep-nudge re-probes later instead of
+ # being silenced for the whole session.
+ printf 'unknown' > "$CACHE_FILE"
+ exit 0
+fi
+
+if [ "$VERDICT" != "1" ]; then
+ # Definitive "not indexed". Stay silent for the session in this project.
+ printf '0' > "$CACHE_FILE"
+ exit 0
+fi
+
+# ── Project IS indexed — cache + inject reminder ──────────────────────────────
+printf '1' > "$CACHE_FILE"
+
+MESSAGE='💡 This project has a cix semantic code index. For semantic queries — finding code by meaning, cross-file lookups, symbol navigation, "where is X used", "how does Y work" — use the CLI: `cix search`, `cix def`, `cix refs` (via Bash). Activate the /cix SKILL for guidance. Use Grep only for exact strings (error messages, config keys, import paths). Run `cix status` if results seem stale.'
+
+if command -v jq >/dev/null 2>&1; then
+ jq -n --arg msg "$MESSAGE" \
+ '{hookSpecificOutput: {hookEventName: "SessionStart", additionalContext: $msg}}'
+else
+ ESC=$(printf '%s' "$MESSAGE" | sed 's/\\/\\\\/g; s/"/\\"/g' | tr '\n' ' ')
+ printf '{"hookSpecificOutput":{"hookEventName":"SessionStart","additionalContext":"%s"}}\n' "$ESC"
+fi
+
+exit 0
diff --git a/plugins/cix/scripts/sync-skills.sh b/plugins/cix/scripts/sync-skills.sh
new file mode 100755
index 0000000..1a520d6
--- /dev/null
+++ b/plugins/cix/scripts/sync-skills.sh
@@ -0,0 +1,93 @@
+#!/usr/bin/env bash
+# sync-skills.sh — keep plugin-bundled skill files byte-identical with
+# the canonical sources under skills/.
+#
+# Fix #19 acceptance: the plugin ships byte-identical copies of files
+# that have a single source of truth elsewhere in the repo. Without
+# this script, contributors edit one file and forget the mirror; the
+# two diverge silently until someone runs cix-workspace via the plugin
+# and gets a stale workflow.
+#
+# Files in scope (canonical source → plugin bundle destination):
+#
+# skills/cix-workspace/SKILL.md
+# → plugins/cix/skills/cix-workspace/SKILL.md
+#
+# skills/cix-workspace/agents/cix-workspace-investigator.md
+# → plugins/cix/agents/cix-workspace-investigator.md
+#
+# Out of scope: skills/cix/SKILL.md vs plugins/cix/skills/cix/SKILL.md —
+# those are INTENTIONALLY different. The plugin version carries extra
+# frontmatter (description, when_to_use, allowed-tools) the standalone
+# skill loader doesn't need; treating them as drift would be wrong.
+#
+# Usage:
+# sync-skills.sh # copy source → plugin, print what changed
+# sync-skills.sh --check # diff only, exit 1 on drift (for CI / pre-commit)
+
+set -euo pipefail
+
+# Resolve repo root from the script's location so the script works no
+# matter where it's invoked from (CI, IDE task runner, manual cd).
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
+
+# (source, destination) pairs. Bash 3.2-compatible parallel arrays so
+# this also runs on macOS's default /bin/bash.
+SRC=(
+ "skills/cix-workspace/SKILL.md"
+ "skills/cix-workspace/agents/cix-workspace-investigator.md"
+)
+DST=(
+ "plugins/cix/skills/cix-workspace/SKILL.md"
+ "plugins/cix/agents/cix-workspace-investigator.md"
+)
+
+MODE="copy"
+if [[ "${1:-}" == "--check" ]]; then
+ MODE="check"
+elif [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
+ sed -n '2,32p' "$0"
+ exit 0
+elif [[ -n "${1:-}" ]]; then
+ echo "sync-skills.sh: unknown argument: $1" >&2
+ echo "Run with --help for usage." >&2
+ exit 2
+fi
+
+drift=0
+for i in "${!SRC[@]}"; do
+ src="$REPO_ROOT/${SRC[$i]}"
+ dst="$REPO_ROOT/${DST[$i]}"
+
+ if [[ ! -f "$src" ]]; then
+ echo "sync-skills.sh: source missing: $src" >&2
+ exit 3
+ fi
+
+ if [[ "$MODE" == "check" ]]; then
+ # Skip the copy; just compare. -q suppresses output, exit code 0
+ # = identical, 1 = differs, 2 = error.
+ if ! diff -q "$src" "$dst" >/dev/null 2>&1; then
+ echo "drift: ${SRC[$i]} != ${DST[$i]}" >&2
+ drift=1
+ fi
+ continue
+ fi
+
+ # Copy mode — only act when the destination differs, so the log
+ # only mentions files that actually changed. cmp -s is the standard
+ # "are these byte-identical" test (returns 0 for identical, 1 for
+ # different, 2 for I/O error).
+ if ! cmp -s "$src" "$dst"; then
+ mkdir -p "$(dirname "$dst")"
+ cp "$src" "$dst"
+ echo "synced: ${SRC[$i]} → ${DST[$i]}"
+ fi
+done
+
+if [[ "$MODE" == "check" && $drift -ne 0 ]]; then
+ echo "" >&2
+ echo "Run plugins/cix/scripts/sync-skills.sh (no args) to fix." >&2
+ exit 1
+fi
diff --git a/plugins/cix/skills/cix-workspace/SKILL.md b/plugins/cix/skills/cix-workspace/SKILL.md
new file mode 100644
index 0000000..2eb983d
--- /dev/null
+++ b/plugins/cix/skills/cix-workspace/SKILL.md
@@ -0,0 +1,642 @@
+---
+name: cix-workspace
+description: Cross-project research workflow for cix workspaces. Manual-invocation skill — load explicitly via `/cix-workspace ` when a request spans multiple repos and you want the full workflow guidance (which repos? what code? what changes?) plus the trust rules for interpreting workspace search responses. Bundles the cix-workspace-investigator sub-agent for parallel per-repo fan-out. Do not auto-trigger.
+user-invocable: true
+allowed-tools: Bash(cix *), Agent
+---
+
+# `cix workspace` — Cross-Project Research Workflow
+
+You usually work inside one repo — your **primary project** — the
+directory the user opened you in. Most tasks are fully contained there
+and `cix search` / `cix definitions` / `cix references` are the right
+tools.
+
+But some tasks are not contained. A request like "wire feature X
+through the platform" can touch a half-dozen repos in different
+languages, layers, and shapes — a service, a shared library, the
+infra manifests, an API spec. Reading the primary repo alone gives
+you 1/N of the picture. Worse, you don't know which N repos are
+actually involved until you look.
+
+`cix workspace` is the tool for that. It searches every repo in a
+named workspace at once and tells you:
+
+1. **Which repos are actually relevant to this request.**
+2. **Which code in those repos is the entry point.**
+3. **What changes need to land in each, and in what order.**
+
+Those three questions are the *goal* of using this skill. Don't jump
+to implementation before you can answer all three with evidence.
+
+---
+
+## When to reach for workspace search
+
+| Signal in the user's request | What to do |
+|---|---|
+| Names a product / acronym you don't fully recognize from primary repo | Workspace search the acronym, see where it lives |
+| "Add X to the Y flow", "wire Z into A" | Workspace search Y or Z — likely cross-cutting |
+| "Across services", "between repos", "end-to-end" | Workspace search the feature |
+| Talks about an event / topic / contract / API endpoint | Workspace search the event name |
+| References infra / deployment alongside code | Workspace search — infra repo is probably in the workspace too |
+| "How do I change X in production / staging" | Workspace search BUT look past top-1 — the answer is usually a manifests/config/contract repo even when a code repo ranks higher (rule 7 below) |
+| Plain bugfix entirely inside one file | **Don't** workspace search. `cix search` is enough |
+| User points at a specific symbol / file path | **Don't.** `cix definitions ` or just Read the path |
+
+If you're not sure, run `cix ws` once to see whether the primary
+project is even part of a workspace. If it isn't, this skill doesn't
+apply.
+
+---
+
+## The workflow
+
+The goal-driven loop. Don't shortcut it. Each step is fast.
+
+### Step 0 — orient
+
+```bash
+cix ws # list workspaces; find the one your primary is in
+cix ws # describe — confirm repos are indexed (✓ count)
+```
+
+If the workspace shows `stale_fts_repos` in any search response later,
+trust the dense ranking less — see the troubleshooting section.
+
+### Step 1 — answer "which repos?"
+
+Run workspace search with a **short, term-rich query**, not the full
+user sentence:
+
+```bash
+# GOOD — short, term-rich (a product acronym + an action verb)
+cix ws platform search "rate-limit middleware"
+
+# BAD — full sentence dilutes BM25 with stopwords ("add", "to", "a")
+cix ws platform search "Add a rate limit to every API endpoint"
+```
+
+Why short: the hybrid algorithm fuses BM25 (literal token match) with
+dense (semantic). BM25 carries the project-gating signal — repos that
+share zero vocabulary with the query drop out. Common words ("add",
+"flow", "for") match everywhere and dilute that signal.
+
+Read the response:
+
+- **`projects[]` is the answer to Q1.** Sorted by `project_score`
+ (candidacy). Each entry has `bm25_score` (literal-token overlap)
+ and `dense_score` (semantic similarity).
+- Projects below the per-query relative threshold are already
+ filtered out — you only see the survivors.
+- Top entry's `project_score` is your reference. Entries at 60-100%
+ of top are core relevant. Entries at 40-60% are secondary. Below
+ 40% would have been dropped server-side.
+
+**Always include the primary project** even if workspace search ranks
+it low — the user's task is rooted there. The workspace's other
+repos are dependencies / consumers / providers / counter-parties.
+
+### Step 2 — answer "what code is relevant?"
+
+For each repo from step 1, look at the chunks panel. The chunk list
+is interleaved by rank across surviving projects so each repo's top
+hit appears early. Use these chunks as **starting points** for a
+deeper read, not as the full answer.
+
+For repos other than the primary, you have two options:
+
+**A. Quick scan (≤ 2 repos to investigate):** use single-project
+search directly.
+
+```bash
+# Search inside one specific project
+curl -G -H "Authorization: Bearer $CIX_KEY" \
+ --data-urlencode "q=rate limit middleware handler" \
+ --data-urlencode "min_score=0" \
+ "$CIX_URL/api/v1/projects/$(project_hash)/search"
+```
+
+The per-project default `min_score` is `0.2` — light floor that
+keeps abstract NL queries non-empty. For drill-down on a natural-
+language question ("how does X work end-to-end"), pass `min_score=0`
+explicitly to be safe. For strict code-symbol matching, pass `0.4+`.
+
+**B. Fan-out to sub-agents (≥ 3 repos, or you need a thorough read):**
+spawn one `cix-workspace-investigator` sub-agent per relevant repo, in
+parallel. See the dedicated [Sub-agent fan-out pattern](#sub-agent-fan-out-pattern)
+section below for the four-part prompt template, including how to pass
+seed chunks with your interpretive commentary.
+
+Run them concurrently (one message, multiple Agent tool calls). When
+they report back, you have N independent reads to synthesize, not N
+sequential rabbit-holes.
+
+### Step 3 — answer "what changes?"
+
+This is your job, not a sub-agent's. Sub-agents report findings; you
+write the plan.
+
+For each relevant repo:
+
+- What needs to change (specific file:line, or a new file).
+- Why (which step of the data flow this implements).
+- Order constraints (e.g. "shared-models migration must deploy
+ before backend reads new field").
+- Tests that prove it works.
+
+Confirm with the user before any of this lands. The plan is the
+deliverable of this skill; the implementation is a separate step.
+
+### Throughout — ask, don't guess
+
+Trigger a clarifying question when:
+
+- Top-2 projects are at near-equal `project_score` and have different
+ labels — the request might fit either repo, ask which.
+- `bm25_score` is 0 across all projects → either the FTS index is
+ stale (see troubleshooting) OR the user's term doesn't exist
+ literally in any repo. Ask the user for the term that *would*
+ appear in code ("we call it `Order` in code, not `Trade`").
+- A sub-agent reports it can't find a clear entry point — surface
+ that uncertainty back to the user, don't paper over it.
+- The implementation plan needs a deploy-order assumption — confirm
+ who owns each repo and what their cycle looks like.
+
+Don't ask if the answer is obvious from the chunks. The bar is "I
+have two plausible interpretations and the wrong one costs the user
+real time."
+
+---
+
+## Reading the projects panel — what the numbers mean
+
+```
+project-a@main 0.500 5 hits bm25 0.421 dense 0.556
+project-b@main 0.412 5 hits bm25 0.318 dense 0.498
+project-c@main 0.288 3 hits bm25 0.155 dense 0.362
+```
+
+- `project_score` (first column): the α-blended candidacy in [0, 1].
+ Top = strongest signal across both retrieval modes.
+- `bm25_score` and `dense_score`: the raw per-mode signals. The
+ algorithm normalizes these per query before blending — useful for
+ diagnosis, not for sorting.
+- If `bm25_score` >> `dense_score` for a project: it's relevant
+ because of literal token overlap (product name appears in code).
+ Trust the surface area but verify semantic relevance manually.
+- If `dense_score` >> `bm25_score`: it's relevant because of
+ semantic similarity (handler shape matches the query intent) but
+ the literal term isn't there. Common when the user's term is a
+ product nickname not used in code.
+- If both are near zero: you're seeing the project because nothing
+ else cleared the gate either. Treat with skepticism.
+
+---
+
+## Trust rules — making sense of the response
+
+These ten rules were derived from a calibration eval (113 synthetic
+queries + 5 real engineering tasks against a mixed-domain workspace).
+Apply them before acting on workspace-search output. Numbers below
+are empirical, not vibes.
+
+### Rule 1 — `chunk.score >= 0.4` is the trust threshold
+
+Chunks with `score < 0.4` are noise about 75% of the time
+(rank-inversion and weak-signal FPs from the relative project gate).
+Skim them only when the higher-scored chunks don't answer the
+question. With the default `min_score=0.4` you usually won't see them
+at all; if you passed `min_score=0` (intentional broad sweep), apply
+this rule yourself.
+
+### Rule 2 — `chunk.score == 0` is a BM25-only hit, not low confidence
+
+The chunk's project matched the literal query tokens via FTS5 but the
+embedding side didn't surface it. These are valuable when the query
+carries project-specific identifiers (CamelCase symbols, file names,
+acronyms). Discount them when the query is a generic English word
+(`error`, `data`, `config`) — common-word BM25 hits are noise.
+
+### Rule 3 — Top-1 of `projects[]` is correct ~70% of the time in real tasks
+
+The synthetic eval measured 91% on single-target queries; real
+engineering tasks hit ~70% because real queries often span layers
+(see rule 7). When the top-1 project doesn't match your task's
+intent, **scan ranks 2–5 before reformulating** — the right repo is
+usually there. The `projects[]` panel is the answer to "where do
+the words live", not "where should the change happen".
+
+### Rule 4 — Drop down to single-project search for depth
+
+When `projects[]` shows the target at rank 1 with a clear lead
+(`project_score` ≥ 1.5× the next), switch to per-project search.
+You get file-grouped, deeper results without the cross-project
+round-robin cap of 5 chunks per repo.
+
+### Rule 5 — `min_score=0` for intentional cross-project sweeps
+
+Default workspace `min_score` is `0.4`. For queries that should
+legitimately span many repos ("authentication", "configuration
+loading", "Kafka consumers"), pass `min_score=0` explicitly.
+Expect `projects[]` to list 5–8 entries — that's the feature, not a
+bug. Ignore rule 1 in this mode: many real positives sit below 0.4
+in genuine cross-cutting queries.
+
+### Rule 6 — Add a 3rd disambiguating token, carefully
+
+If two query words are each domain-overloaded (e.g. "client SDK"
+could be the generated API client, the shared library, or a model
+type), add a third word. **Prefer meta-tokens** (`endpoint`,
+`route`, `handler`, `manifest`, `migration`, `config file`) over
+tech-stack guesses (`grpc`, `kafka`, `terraform`) — wrong stack
+guesses actively rotate the ranking away from the right answer. If
+unsure of the stack, run the query without a disambiguator first,
+read the top-1 project's language/path patterns, then refine.
+
+### Rule 7 — "Change X in production" → manifests repo, not code repo
+
+For tasks framed as deploying / configuring / overriding a feature,
+the answer usually lives in a manifests / config / contract repo
+(K8s overlays, Helm charts, OpenAPI specs, environment-specific
+yaml). Workspace search ranks by token frequency, so the code repo
+typically wins. Look at `projects[]` for repos with **manifests,
+config, platform, deploy, contract, openapi, infra** in their
+names — those are often the right targets even at rank 3–5.
+
+### Rule 8 — When top-1 doesn't fit, scan first, reformulate second
+
+If you think top-1 is wrong:
+
+1. First, scan ranks 2–5. The right project is there ~80% of the
+ time when the layer mismatch caused rule 3 to fail.
+2. Only after scanning, reformulate. Reformulating before scanning
+ wastes a round-trip and risks the new query introducing fresh
+ layer confusion.
+
+### Rule 9 — For per-project NL drill-down, pass `min_score=0` explicitly
+
+When dropping from workspace to per-project search with a natural-
+language query (e.g. "how does X work"), pass `min_score=0` to be
+safe. The per-project default `min_score=0.2` is lighter than it
+used to be (`0.4`) and usually fine, but abstract semantic queries
+can score in the 0.2–0.3 range that the default still rejects.
+
+### Rule 10 — Words ≠ change location (the intent-vs-tokens watchword)
+
+Workspace search ranks projects by *where the words live*. Your
+task is usually about *where the change should happen*. These
+coincide ~70% of the time, not 91%. When in doubt: read the
+chunks in ranks 2–5 before committing to a target repo.
+
+### Quick example — when rules 7 and 10 save you
+
+> User: "Change the database timeout for the staging environment of
+> the order service."
+
+Workspace search ranks the **order-service code repo** at #1 (it's
+where the word "database" appears most). But the change needs to
+land in the **environment-platform manifests repo** at rank #4. If
+you stopped at top-1 you'd edit the wrong file. Rules 7 and 10
+remind you to scan further.
+
+---
+
+## Primary project nuance
+
+You are typically `cd`'d into a single repo. That's the *primary
+project*. The user's task is framed *from* that repo — they're
+extending it, integrating with something it depends on, or wiring up
+something that consumes it.
+
+Patterns:
+
+- **The change centers on primary, others are consumers/providers.**
+ Most common. Primary gets the bulk of the implementation; the
+ other repos get small adapter changes (new field consumption, new
+ webhook subscriber, new client method).
+- **The change is in another repo, primary just calls it.** Less
+ common but real. Primary's role is the integration test or the
+ feature-flag flip; the heavy lifting is elsewhere.
+- **The change is genuinely distributed.** Migrations, schema changes
+ rolling through many services, protocol bumps. Each repo gets a
+ coordinated change with deploy-order constraints.
+
+Workspace search tells you which pattern you're in. Don't assume.
+
+---
+
+## Sub-agent fan-out pattern
+
+When you have 3+ relevant repos, fan out. Sub-agents run with isolated
+context — the main session stays clean (no per-repo code chunks bloating
+it) and the investigations run in parallel.
+
+Use the dedicated **`cix-workspace-investigator`** sub-agent, which ships
+with this skill. It's a thin, read-only shell around `cix search` / `cix
+def` / `cix refs` / `Read` / `Grep` with three hard rules baked in:
+stay inside the assigned project, no edits, no recursion. The
+methodology — what to look for, what to report, in what format — is
+**your** call, per spawn. The sub-agent follows your instructions; it
+doesn't second-guess them.
+
+### The four parts of a good per-spawn prompt
+
+You'll write one prompt per repo. A good one has four parts:
+
+#### 1. The user's task, verbatim
+
+Sub-agents have zero prior context. Paste the original user request even
+if it feels redundant — your interpretation might be wrong, and the
+user's wording is the ground truth the sub-agent should reason from.
+
+#### 2. The project identifier you're assigning
+
+Pass it in the exact form `cix list` shows. Two shapes are possible:
+
+- **Local working tree:** `/Users/.../some-repo`. The repo exists on disk;
+ the sub-agent can use `Read`/`Grep` on top of cix.
+- **Remote-only:** `github.com//@`. The repo is *not* on
+ disk — it's a GitHub-attached project indexed only by the cix server.
+ The sub-agent must rely entirely on `cix search/def/refs` (passing
+ `-n `) and the chunks they return.
+
+Workspace search output gives you the identifier as `project_path` on each
+entry in `projects[]`. Paste that string verbatim into the sub-agent prompt,
+and tell the sub-agent explicitly which shape it is so it doesn't waste
+calls grepping a tree that isn't there. One repo per spawn.
+
+#### 3. Seed chunks **with your commentary**
+
+This is the part most often done badly. Don't just paste raw chunk
+pointers and hope the sub-agent figures out what matters. You saw the
+workspace search response; you have hunches about which chunks are real
+entry points and which are noise; pass that down.
+
+For each chunk you cite, add one short line of interpretation. For
+the response as a whole, flag suspicious signals:
+
+- Which chunk looks like the most likely entry point and why
+- Which chunks look like test fixtures / dead code / wrong-layer the
+ sub-agent should de-prioritize
+- Numeric signals that need a second opinion: `score=0` (BM25-only
+ literal — verify the token isn't a false friend), `score < 0.4` (low
+ confidence, possible rank-inversion), `bm25_score` high + `dense_score`
+ near zero (literal-only match — concept may not actually live here)
+- Whether you suspect this repo is wrong-layer (rule 7) — tell the
+ sub-agent to confirm relevance before diving into the chunks
+
+**Example "good chunk block":**
+
+```
+Seed chunks from workspace search:
+- `internal/gateway/server.go:412-418` (score 0.55) — looks like the
+ HTTP handler entry point for the rate-limit feature; confirm it
+ invokes the limiter middleware rather than just returning 429.
+- `internal/gateway/middleware.go:89-93` (score 0.49) — middleware
+ registration site. Verify whether rate-limit is wired here or
+ elsewhere.
+- `tests/integration/rate_limit_test.go` (score 0.41) — integration
+ test. Useful for understanding the expected shape, but not where
+ the change lands. Skim only.
+- `pkg/shared/util.go:1-30` (score 0) — BM25-only hit, "limit"
+ appears in a comment. Almost certainly noise; skip unless you need
+ shared utilities.
+
+Panel-level notes:
+- Workspace ranked this project #1 with a clear lead (project_score
+ 1.000 vs next 0.860). High confidence this is the right repo.
+- bm25_score=8.5, dense_score=0.54 — strong on both signals, not a
+ wrong-layer concern.
+```
+
+#### 4. Explicit deliverable
+
+Tell the sub-agent **exactly** what to return and in what shape. Each
+task has different needs:
+
+- "Confirm whether this repo is in scope. Yes / no / partial + one
+ sentence why."
+- "Find the entry point for the rate-limit middleware. Report
+ file:line of the entry and a five-step trace through the call
+ graph."
+- "List every file that would need to change to add a new audit-log
+ event type. No code, just file path + one-line per-file reason."
+
+Vague deliverables (`"investigate this repo"`) → vague answers.
+
+### Anti-patterns to avoid
+
+- **"Investigate this repo for rate-limit"** — no deliverable. The
+ sub-agent guesses scope and you can't verify the result.
+- **Three paragraphs of context with nested questions** — sub-agent
+ answers the wrong question. Pick one deliverable per spawn.
+- **"Read all the auth code"** — unbounded. Either fails or returns a
+ wall of text.
+- **Pasting raw chunks without interpretation** — you saw the
+ response, you have hunches about what matters. Sub-agent doesn't.
+ Skipping commentary throws away the most valuable thing you can pass
+ down.
+
+### Mechanics
+
+Run all sub-agents in **one message with multiple Agent calls** so they
+execute in parallel. Wait for completion. Synthesize their reports
+yourself — sub-agents don't see each other's work; you do. Surface
+inconsistencies (e.g. two repos disagree on which event format is
+canonical) back to the user.
+
+**Model inheritance.** `cix-workspace-investigator` declares `model:
+inherit` in its frontmatter, so each spawn runs on the same model as the
+main session. You don't need to pass `model:` on Agent calls. If you do
+pass it, you'll override inheritance — only do that intentionally (e.g.
+forcing a smaller/faster model for a trivially-bounded look-up).
+
+---
+
+## Worked example — why this skill exists
+
+A representative failure mode that motivated the hybrid algorithm:
+
+**The naïve approach:** running workspace search with a full natural-
+language sentence ("Add feature X to product Y"). The pre-hybrid
+implementation was pure-dense — it returned the N nearest vectors
+regardless of how far away "nearest" actually was. Every repo in the
+workspace surfaced, including repos that contained **zero literal
+mentions** of either the feature name or the product code. Confidently
+reporting all of them as "relevant" wasted time on completely
+unrelated repos.
+
+**The structural failure:**
+
+1. Pure-dense fan-out cannot tell "no signal" apart from "weak
+ signal" — chromem always returns the K nearest vectors.
+2. Long natural-language queries dilute the few tokens that carry
+ the actual gating signal.
+3. Without a sparse-retrieval channel, an acronym or unique
+ identifier query has nothing to lock onto.
+
+**What this skill teaches instead:**
+
+1. Query with **just the high-precision term** first — the product
+ acronym, the feature name, the unique symbol. Everything else
+ is noise.
+2. Verify that projects with `bm25_score = 0` aren't masquerading
+ as relevant. After the hybrid landed, repos with no literal
+ matches AND only marginal dense similarity drop out automatically
+ via the project gate.
+3. Confirm with the user before treating "this repo surfaced in
+ search" as "this repo is in scope for the change".
+
+**The lesson encoded in this skill:**
+
+- Step 1: query the term, not the sentence.
+- Step 1: trust the project gate; if a repo dropped out, it dropped
+ out for a reason.
+- Step 2: read the surface area from `projects[]` first, then read
+ the chunks as starting points.
+- Step 3: never assume "in search results" == "in scope". Verify.
+
+---
+
+## Troubleshooting
+
+### `bm25_score` is 0.000 on every project
+
+The workspace was indexed before the FTS5 mirror existed and the
+sparse half of the hybrid is empty. Hybrid degrades to pure-dense
+fan-out — the same algorithm that produces the false-positive
+failure mode described in the worked example above.
+
+The response includes `stale_fts_repos` listing the affected
+project_paths. Fix: reindex each project (dashboard → project card →
+reindex button, or `POST /api/v1/projects/{hash}/reindex`).
+After reindex, BM25 populates incrementally per-file as chunks are
+written.
+
+Until reindex completes, **don't trust the project gating** — the
+algorithm is producing the old failure mode. Verify project relevance
+by literal grep on the term.
+
+### `status: "empty"` despite obviously-relevant repos in the workspace
+
+Either:
+
+- The query terms don't appear literally in any repo AND the dense
+ similarity is below threshold for everything (project-gate dropped
+ everyone). Re-phrase with the term the code actually uses, or
+ lower `min_score`.
+- Every workspace repo is still indexing. Check `pending_repos` in
+ the response.
+
+### `status: "partial_failure"`
+
+At least one repo errored out (`failed_repos` array names them).
+Common cause: corrupt chromem collection. The remaining repos still
+returned results. Surface to the user; don't silently treat as
+complete.
+
+### Top-2 projects are at near-equal candidacy
+
+The algorithm isn't confident which repo is more relevant. Possible
+causes:
+
+- The feature genuinely lives in both. Ask the user which they
+ intended as primary scope.
+- The query is too broad — both repos match generic vocabulary.
+ Re-query with a more specific term.
+- One repo is a fork or duplicate. Confirm with `cix ws `
+ describe.
+
+### One project absolutely dominates everything else
+
+Could be legit (the user's task is mostly contained in one repo and
+that repo is just very dense with relevant content). Or could be a
+single repo accidentally matching the user's stopwords across many
+files. Spot-check: is the project's `bm25_score` driven by the
+high-IDF term (the product name) or by common words?
+
+### Top-1 is wrong-layer (rule 7 / rule 10 in action)
+
+The top-1 project contains the words but isn't where the change
+should land. Classic example: "deploy X to staging" → workspace
+ranks the code repo for X at #1, but the staging overlay lives in
+a manifests repo at rank #4. Or: "add API endpoint Y" → ranks the
+backend implementation at #1, but the OpenAPI contract repo at #3
+must be updated first.
+
+**Fix:** scan ranks 2–5 explicitly. Look for projects whose names
+hint at a different layer (`*-platform`, `*-manifests`,
+`*-contracts`, `*-config`, `*-infra`, `openapi*`). If you see one,
+that's probably your real target.
+
+### Disambiguator backfired — the query lost its grip
+
+You added a 3rd word to discriminate between two overloaded terms,
+and the response is *worse* — top projects all have mediocre scores
+and the right repo isn't among them anymore. This usually happens
+when the added token belongs to a different stack than your target
+(e.g. you guessed a transport / framework / library that the canonical
+repo doesn't use), so the extra token rotates the ranking toward
+unrelated repos.
+
+**Fix:** strip the guessed-stack token. Try a meta-token instead
+(`endpoint`, `route`, `handler`, `manifest`, `migration`). Or: run
+the 2-word query as-is, scan the top-1 project's path patterns and
+language to see what stack it actually uses, then refine.
+
+---
+
+## Quick command reference
+
+```bash
+# List workspaces
+cix ws
+cix ws list --json
+
+# Describe one workspace (always do this before searching)
+cix ws platform
+cix ws platform describe --json
+
+# List repos attached to a workspace
+cix ws platform list
+cix ws platform repos --verbose
+
+# Search a workspace
+cix ws platform search "rate-limit middleware"
+cix ws platform search "JWT validation" --top-projects 8 --top-chunks 30
+cix ws platform search "audit logging" --json
+```
+
+Flags:
+
+- `--top-projects N` — surface up to N projects in the panel
+ (default 10, max 50). Increase for very broad explorations.
+- `--top-chunks K` — return up to K chunks total (default 20, max
+ 200). Round-robin interleaved across surviving projects.
+- `--min-score F` — drop dense hits below cosine F before scoring.
+ **Default 0.4** (symmetric with per-project search default).
+ Pass `0` explicitly for intentional cross-project sweeps that
+ need long-tail recall — broad concepts like "authentication" or
+ "Kafka consumers" that legitimately live in many repos. Higher
+ values (0.5+) for queries you want laser-focused.
+- `--json` — raw machine-readable response.
+
+---
+
+## TL;DR
+
+When the user's task plausibly spans more than one repo:
+
+1. `cix ws` → find the workspace, then `cix ws ` describe it.
+2. Workspace search with a **short, term-rich** query.
+3. Read `projects[]` → that's your scope (Q1 answered).
+4. For each repo in scope, either single-project search or spawn a
+ `cix-workspace-investigator` sub-agent — in parallel, with seed
+ chunks AND your interpretive commentary on what to trust.
+5. Synthesize the sub-agent reports → plan changes per repo, with
+ order constraints (Q2 + Q3 answered).
+6. Ask the user to confirm the scope and plan before implementing.
+
+If `bm25_score` is 0 across the board, the FTS index is stale —
+fix it before trusting the result.
diff --git a/plugins/cix/skills/cix/SKILL.md b/plugins/cix/skills/cix/SKILL.md
new file mode 100644
index 0000000..10f2a48
--- /dev/null
+++ b/plugins/cix/skills/cix/SKILL.md
@@ -0,0 +1,249 @@
+---
+name: cix
+description: Semantic code search and navigation via the cix index. Use this when finding code by meaning rather than exact strings — cross-file lookups, symbol navigation, "where is X used", "how does Y work", "find authentication middleware", or exploring an unfamiliar codebase. Covers search, definitions, references, symbol search, file lookup, and indexing.
+when_to_use: |
+ Trigger this skill when the user asks anything that requires semantic understanding of the codebase:
+ - "find authentication middleware" / "find the auth code"
+ - "where is X defined?" / "show me the definition of Y"
+ - "how does Z work in this codebase?"
+ - "what calls this function?" / "find references to ..."
+ - "search the codebase for ..." / "find by meaning"
+ - "explore this repo" / "give me an overview"
+ - Any time you would otherwise reach for Grep on a non-literal query
+
+ Skip this skill (use Grep / Read instead) when:
+ - A stack trace or error already names file:line — just Read it
+ - Searching for an exact literal (specific error string, config key name, import path)
+ - Inside dependencies (node_modules, vendor, .venv) — they aren't indexed
+ - Editing a non-code file (Dockerfile, yaml, lockfile)
+user-invocable: true
+allowed-tools: Bash(cix *)
+---
+
+# Code Index (`cix`) — Semantic Code Search & Navigation
+
+You have access to `cix`, a semantic code index that understands the
+codebase via embeddings + AST parsing. The right reflex is **"cix when
+you don't have a pointer; grep when you do."**
+
+**Always invoke `cix` through the Bash tool — do not call the
+`/cix:search`, `/cix:def`, … slash commands from inside a turn.** Those
+shortcuts exist for humans typing in the UI; an agent driving its own
+work should run `cix search …` / `cix def …` / `cix refs …` as Bash so
+the output flows through the normal tool-result pipeline and stays
+machine-parseable. The `cix` CLI is bundled — the plugin auto-installs
+it on first use if your system doesn't have it.
+
+## When to use which
+
+**Reach for `cix` first when:**
+- The starting point is open-ended ("how does indexing work?", "find the
+ authentication middleware", "where is the main entry point?")
+- You need cross-file navigation (definitions / references / callers)
+- You're searching by *meaning*, not by an exact string
+ (`"JWT validation"` should find `verifyToken` even without that phrase)
+- You're exploring an unfamiliar package or codebase
+
+**Skip `cix`, use Read / Grep / Glob directly when:**
+- A failing test or stack trace already names the file and function —
+ just `Read` it
+- You're chasing an exact literal: a specific error message, a config
+ key, a commit-message phrase, an import path
+- You're inside dependencies (`node_modules`, `vendor`, `.venv`) — they
+ aren't indexed
+- You're editing a non-code file (Dockerfile, yaml, lockfile)
+
+If `cix` returns nothing relevant after one well-formed query, fall
+back to grep — don't loop on cix.
+
+---
+
+## Pick the cheapest tool that answers the question
+
+When you already know a symbol's **name**, reach for `cix def` / `cix refs`
+before `cix search`. They return **metadata only** (file, line, signature,
+call sites) — no source bodies — so they cost roughly an order of magnitude
+fewer tokens. Measured on one real symbol in this codebase:
+
+| Command | Returns | Output size |
+|---|---|---|
+| `cix def ` | definition location + signature | ~250 B |
+| `cix refs ` | every call site (file:line) | ~1 KB |
+| `cix search ""` | matching code **with full source bodies** | ~7 KB |
+
+So `cix search` is ~28× the bytes of `cix def` and ~6× `cix refs` for the
+same target. Rule of thumb:
+
+- Know the name, want "where is it defined / who calls it" → `cix def` /
+ `cix refs`. Cheap, precise, no source noise.
+- Don't know the name, searching by *meaning* → `cix search`.
+- Only escalate to `cix search` for a *known* symbol when you actually need
+ to read the surrounding implementation, not merely locate it.
+
+---
+
+## Commands Reference
+
+### Semantic Search — find code by meaning
+```bash
+cix search "authentication middleware"
+cix search "database connection retry logic"
+cix search "error handling in payment flow" --limit 20
+cix search "config parsing" --in ./internal/config/
+cix search "API routes" --lang go
+cix search "main entry point" --exclude bench/fixtures --exclude legacy
+```
+
+**Flags:**
+- `--in ` — restrict to file or directory (can repeat)
+- `--exclude ` — drop a directory or substring from results (can repeat)
+- `--lang ` — filter by language (can repeat)
+- `--limit ` — max **files** returned (CLI default: 10) — output is
+ grouped per file with all matches inside, so 10 files ≈ many snippets.
+ **For agent use, prefer `--limit 5`**: five files is enough for most
+ lookups and keeps the result compact. This is a usage recommendation,
+ not a change to the CLI default — bump it back up when you genuinely
+ need broader exploration.
+- `--min-score ` — minimum relevance 0.0–1.0 (default: **0.4**)
+
+### Go to Definition — find where a symbol is defined
+```bash
+cix definitions HandleRequest
+cix def AuthMiddleware --kind function
+cix def Config --file ./internal/config.go
+```
+Aliases: `definitions`, `def`, `goto`. Flags: `--kind`, `--file`, `--limit`.
+
+### Find References — find where a symbol is used
+```bash
+cix references HandleRequest
+cix refs AuthMiddleware --limit 50
+cix usages UserService --file ./internal/api/
+```
+Aliases: `references`, `refs`, `usages`. Flags: `--file`, `--limit`.
+
+### Symbol Search — find symbols by name
+```bash
+cix symbols handleRequest
+cix symbols User --kind class
+cix symbols Auth --kind function --kind method
+```
+Flags: `--kind` (function/class/method/type, repeatable), `--limit`.
+
+### File Search — find files by path pattern
+```bash
+cix files "config"
+cix files "middleware" --limit 20
+```
+
+### Project Overview
+```bash
+cix summary # languages, top dirs, key symbols
+cix status # indexing status + file watcher status
+cix list # all indexed projects
+```
+
+### Indexing
+```bash
+cix init [path] # register + index + start watcher
+cix reindex # incremental
+cix reindex --full # full reindex
+cix cancel # cancel an in-flight indexing run
+cix watch # start file-change auto-reindex daemon
+cix watch stop # stop daemon
+```
+
+The watcher auto-reindexes on file change — manual `reindex` is rarely
+needed. `cix status` shows whether the watcher is running and the
+last-sync timestamp.
+
+---
+
+## Search quality — what scores mean
+
+Default `--min-score 0.4` is calibrated for the production embedding
+model (CodeRankEmbed-Q8 with path-aware preamble). Rough landscape:
+
+| Score | Meaning |
+|----------|---------------------------------------------------------|
+| 0.65+ | Exact / very strong match — almost certainly relevant |
+| 0.50–0.65| Strong match — usually relevant |
+| 0.40–0.50| Weaker match — sometimes useful, sometimes not |
+| <0.40 | Noise — filtered out by default |
+
+**If a query returns nothing**, lower the floor explicitly:
+`--min-score 0.2` for very specific or long-tail queries. Don't drop
+below 0.2 — results below that are noise.
+
+---
+
+## Writing better queries — leverage path-aware embedding
+
+Each chunk is embedded with its file path, language, and symbol name in
+the preamble. This means **mentioning a file/dir/symbol you already
+know about boosts ranking**:
+
+```bash
+# Generic
+cix search "validation"
+# Better — pins the search to the auth area
+cix search "validation in auth middleware"
+# Even better when you know the symbol
+cix search "ValidateToken" --kind function
+```
+
+Natural-language queries that name the *kind of thing* and *where it
+lives* outperform single-word queries.
+
+---
+
+## Usage Patterns
+
+### Exploring unfamiliar code (`cix`'s strongest case)
+```bash
+cix summary # project structure, top dirs
+cix search "main entry point server" # find where it starts
+cix search "database connection setup" # find DB wiring
+cix search "request handler" --in ./api # narrow to API
+```
+
+### Tracing a symbol end-to-end
+```bash
+cix def HandleRequest # where is it defined?
+cix refs HandleRequest # who calls it?
+cix search "HandleRequest error handling" # how are errors handled?
+```
+
+### Chasing a known target (often grep is enough)
+```bash
+# Stack trace says "internal/auth/middleware.go:42 — invalid token"
+# → just Read that file. No cix needed.
+
+# Config key "max_concurrent_requests" used somewhere?
+# → grep is more precise.
+```
+
+### Narrowing scope
+```bash
+cix search "middleware" --in ./api/
+cix search "config" --in ./cmd/ --exclude legacy
+cix refs Config --file ./internal/server.go
+```
+
+---
+
+## Tips
+
+- Search queries are natural language, not regex. Write what you'd ask
+ a colleague.
+- Output groups by file: each result line is a file with all relevant
+ matches inside, ordered top-to-bottom by line number. The
+ `[best 0.NN]` is the score of the top hit in that file.
+- `cix def` is a faster path than `cix symbols` when you already know
+ the exact name.
+- `--exclude` complements `--in` — use it to drop noisy dirs (`bench/`,
+ `legacy/`, vendored code) inline without touching `.cixignore`.
+- The watcher keeps the index fresh. If results feel stale, check
+ `cix status` first — `Watcher: ✗ not running` is the usual cause.
+- Don't loop. If a query returns nothing useful after one well-phrased
+ attempt + one `--min-score 0.2` retry, drop to grep.
diff --git a/plugins/cix/tests/README.md b/plugins/cix/tests/README.md
new file mode 100644
index 0000000..51274dc
--- /dev/null
+++ b/plugins/cix/tests/README.md
@@ -0,0 +1,94 @@
+# Plugin tests
+
+Hook script tests for the cix Claude Code plugin. Uses
+[bats-core](https://bats-core.readthedocs.io/) with mocked `cix` binary,
+isolated `$CLAUDE_PLUGIN_DATA`, and a per-test scratch project directory.
+
+## Run locally
+
+```bash
+# Install bats + jq + shellcheck
+brew install bats-core jq shellcheck # macOS
+sudo apt-get install bats jq shellcheck # Debian / Ubuntu
+
+# From repo root:
+bats plugins/cix/tests/*.bats
+
+# Or pick one suite:
+bats plugins/cix/tests/session-end.bats
+
+# TAP-formatted output (what CI uses):
+bats --tap plugins/cix/tests/*.bats
+```
+
+Each test runs in an isolated `$BATS_TMPDIR` scratch directory and
+cleans up after itself — no state leaks between tests.
+
+## What's covered
+
+| Suite | Focus |
+|---|---|
+| `session-start.bats` | cix-status flow, cache write, 30-day GC, **non-matching files preserved** |
+| `cwd-changed.bats` | First-cd evaluation, no-op on cached dir, multi-dir state |
+| `grep-nudge.bats` | Exponential backoff (1, 2, 4, 8, 16), per-(session, dir) counters |
+| `post-compact.bats` | Re-injection only when cache="1" |
+| `session-end.bats` | **Security:** deletion never leaks to other sessions, non-cix files, or subdirs — even with custom `$CLAUDE_PLUGIN_DATA` |
+| `cix-wrapper.bats` | System-cix passthrough, exit code propagation, self-recursion guard |
+
+## Security tests (the most important ones)
+
+Bash scripts that call `find -delete` get extra scrutiny. Safety comes
+from **what** we delete (strict `-name` patterns + `-type f` +
+`-maxdepth 1`), not **where** the cache dir lives. The plugin
+deliberately does not whitelist parent paths, so users with custom
+`$CLAUDE_PLUGIN_DATA` (corporate setups, XDG-style layouts) are
+supported.
+
+`session-end.bats` and `session-start.bats` suites contain explicit
+adversarial cases:
+
+- Other sessions' cache files → must NOT be touched
+- Files with confusable names (`cix-other-pattern`,
+ `X-cix-aware-fake-...`, `cix` alone) → must NOT be touched
+- Random files (`config.yaml`, `.env`, `secrets.json`) in cache dir
+ → must NOT be touched
+- Subdirectories in cache dir + nested files → must NOT be touched
+ (only `-maxdepth 1`)
+- 30-day GC → must spare files outside the `cix-aware-*` /
+ `cix-grep-count-*` prefixes, even if they're old
+- `session_id` containing shell metacharacters → must NOT trigger
+ command injection (canary file survives)
+- Custom non-standard `$CLAUDE_PLUGIN_DATA` → script proceeds without
+ refusing, deletes only matching files
+
+If any of these fail in CI, the offending change cannot land.
+
+## Mocks
+
+`tests/mocks/bin/cix` is a fake `cix` CLI controlled via env vars:
+
+- `MOCK_CIX_EXIT` — exit code (default 0)
+- `MOCK_CIX_DELAY` — sleep before exit (for timeout tests)
+- `MOCK_CIX_LOG_FILE` — append every invocation here so tests can
+ assert "was the script called with the right args?"
+
+`helpers.bash` puts the mock first on `$PATH` for every hook invocation,
+so unqualified `cix` calls inside the hook scripts hit the mock.
+
+## Adding a new test
+
+1. Pick (or create) the right `.bats` file.
+2. Use `setup() { setup_test_env; }` and `teardown() { teardown_test_env; }`.
+3. Use `run_hook