diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9c6bed04..12686175 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,9 +2,9 @@ name: CI on: push: - branches: [main] + branches: [main, "integration/**"] pull_request: - branches: [main] + branches: [main, "integration/**"] env: CARGO_TERM_COLOR: always diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index be6c57f0..f6b8156e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,5 +1,28 @@ name: Release +# Triggered by a version tag push (e.g. v0.2.0). +# Builds Rust binaries per-platform, publishes each @khive/kernel-{platform} +# npm subpackage, then publishes the umbrella khive package. +# +# Per ADR-026 §"Atomic release semantics": all subpackage publishes must +# succeed before the umbrella publishes. This is enforced by separating the +# workflow into two phases: +# +# Phase 1 (build-platform matrix): compile binaries, stage into subpackage +# bin/, remove .gitkeep, and upload the whole subpackage as a GH Actions +# artifact. No `npm publish` happens here. +# +# Phase 2 (publish-all, single job): downloads all six platform artifacts, +# updates versions, publishes the six subpackages, then publishes the +# umbrella — in a single sequential job. If any subpackage publish fails, +# the umbrella is never published. Because publishing is serialised in one +# job, partial failure cannot leave mismatched versions permanently on the +# registry (any already-published subpackage can be unpublished within 72h +# via `npm unpublish` before the version is re-released). +# +# This satisfies the ADR guarantee: partial failure leaves the user able to +# install the previous khive version unchanged. + on: push: tags: @@ -8,63 +31,207 @@ on: permissions: contents: write +env: + CARGO_TERM_COLOR: always + jobs: - compile: + # ───────────────────────────────────────────────────────────────────────── + # Phase 1: Build Rust binaries per platform and upload as GH artifacts. + # No npm publish happens here — that is deferred to Phase 2. + # ───────────────────────────────────────────────────────────────────────── + build-platform: strategy: + fail-fast: true matrix: include: - - os: macos-latest + - platform: darwin-arm64 + os: macos-latest target: aarch64-apple-darwin - binary: khive-darwin-arm64 - - os: macos-13 + cross: false + + - platform: darwin-x64 + os: macos-latest target: x86_64-apple-darwin - binary: khive-darwin-x64 - - os: ubuntu-latest + cross: false + + - platform: linux-x64-gnu + os: ubuntu-latest target: x86_64-unknown-linux-gnu - binary: khive-linux-x64 - - os: ubuntu-latest + cross: false + + - platform: linux-x64-musl + os: ubuntu-latest + target: x86_64-unknown-linux-musl + cross: true + + - platform: linux-arm64 + os: ubuntu-latest target: aarch64-unknown-linux-gnu - binary: khive-linux-arm64 - - os: windows-latest + cross: true + + - platform: win32-x64 + os: windows-latest target: x86_64-pc-windows-msvc - binary: khive-win32-x64.exe + cross: false + runs-on: ${{ matrix.os }} + steps: - uses: actions/checkout@v4 - - uses: denoland/setup-deno@v2 + + - uses: dtolnay/rust-toolchain@1.94.1 with: - deno-version: v2.x - - name: Compile + targets: ${{ matrix.target }} + + - name: Install cargo-zigbuild (cross-compile targets only) + if: ${{ matrix.cross }} + run: pip3 install ziglang && cargo install cargo-zigbuild + + - name: Build binaries (native) + if: ${{ !matrix.cross }} + working-directory: crates + run: | + cargo build --release --target ${{ matrix.target }} -p kkernel -p khive-mcp + + - name: Build binaries (zigbuild cross) + if: ${{ matrix.cross }} + working-directory: crates run: | - deno compile \ - --allow-read --allow-write --allow-run --allow-env \ - --target ${{ matrix.target }} \ - --output npm/bin/${{ matrix.binary }} \ - cli/main.ts - - uses: actions/upload-artifact@v4 + cargo zigbuild --release --target ${{ matrix.target }} -p kkernel -p khive-mcp + + - name: Stage binaries into subpackage bin/ + shell: bash + run: | + PKG_DIR="npm/kernel-${{ matrix.platform }}/bin" + SRC="crates/target/${{ matrix.target }}/release" + # Remove the placeholder so it is not shipped in the published tarball (MIN-1). + rm -f "${PKG_DIR}/.gitkeep" + if [[ "${{ matrix.os }}" == "windows-latest" ]]; then + cp "${SRC}/kkernel.exe" "${PKG_DIR}/kkernel.exe" + cp "${SRC}/khive-mcp.exe" "${PKG_DIR}/khive-mcp.exe" + else + cp "${SRC}/kkernel" "${PKG_DIR}/kkernel" + cp "${SRC}/khive-mcp" "${PKG_DIR}/khive-mcp" + chmod +x "${PKG_DIR}/kkernel" "${PKG_DIR}/khive-mcp" + fi + + - name: Upload subpackage artifact + uses: actions/upload-artifact@v4 with: - name: ${{ matrix.binary }} - path: npm/bin/${{ matrix.binary }} + name: kernel-${{ matrix.platform }} + path: npm/kernel-${{ matrix.platform }}/ + # Retain for 1 day — only needed during this release run. + retention-days: 1 - publish: - needs: compile + # ───────────────────────────────────────────────────────────────────────── + # Phase 2: Publish all subpackages + umbrella atomically in a single job. + # Runs only after ALL Phase 1 jobs succeed (needs: build-platform with + # fail-fast: true propagates any single failure here). + # ───────────────────────────────────────────────────────────────────────── + publish-all: + needs: build-platform runs-on: ubuntu-latest + steps: - uses: actions/checkout@v4 - - uses: actions/download-artifact@v4 - with: - path: npm/bin - merge-multiple: true - - name: Set version from tag - run: | - VERSION="${GITHUB_REF#refs/tags/v}" - jq --arg v "$VERSION" '.version = $v' npm/package.json > tmp.json - mv tmp.json npm/package.json + - uses: actions/setup-node@v4 with: node-version: 20 registry-url: https://registry.npmjs.org - - name: Publish + + # Extract the version from the git tag (strip leading "v"). + # Passed via env var to avoid shell-interpolation issues in node -e (MIN-3). + - name: Extract version + run: | + echo "VERSION=${GITHUB_REF#refs/tags/v}" >> "$GITHUB_ENV" + + # Download all six platform artifacts into their respective directories. + - name: Download all platform artifacts + uses: actions/download-artifact@v4 + with: + # Download to npm/ — each artifact named kernel-{platform} lands in + # npm/kernel-{platform}/ matching the checkout layout. + path: npm/ + # pattern matches kernel-* artifacts from Phase 1 + pattern: kernel-* + + # actions/checkout@v4 above restored npm/kernel-*/bin/.gitkeep (it's tracked + # in git so the bin/ dirs exist for new clones). actions/download-artifact + # does not delete it. Remove it here so the published tarball ships only + # the platform binary, not the placeholder. (NEW-1 round-2 fix.) + - name: Remove .gitkeep placeholders before publish + run: rm -f npm/kernel-*/bin/.gitkeep + + # Set the version in each subpackage's package.json. + # VERSION and PKG_JSON are passed via env vars to avoid shell-interpolation + # of special characters inside the node -e script (MIN-3). + - name: Set subpackage versions + run: | + for platform in darwin-arm64 darwin-x64 linux-x64-gnu linux-x64-musl linux-arm64 win32-x64; do + PKG_JSON="npm/kernel-${platform}/package.json" VERSION="${{ env.VERSION }}" node -e " + const fs = require('fs'); + const pkg = JSON.parse(fs.readFileSync(process.env.PKG_JSON)); + pkg.version = process.env.VERSION; + fs.writeFileSync(process.env.PKG_JSON, JSON.stringify(pkg, null, 2) + '\n'); + " + done + + # Publish all six subpackages. If any fails, subsequent steps (including + # umbrella publish) do not run. GitHub Actions sequential steps in a + # single job guarantee this ordering. + - name: Publish @khive/kernel-darwin-arm64 + working-directory: npm/kernel-darwin-arm64 + run: npm publish --access public + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + + - name: Publish @khive/kernel-darwin-x64 + working-directory: npm/kernel-darwin-x64 + run: npm publish --access public + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + + - name: Publish @khive/kernel-linux-x64-gnu + working-directory: npm/kernel-linux-x64-gnu + run: npm publish --access public + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + + - name: Publish @khive/kernel-linux-x64-musl + working-directory: npm/kernel-linux-x64-musl + run: npm publish --access public + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + + - name: Publish @khive/kernel-linux-arm64 + working-directory: npm/kernel-linux-arm64 + run: npm publish --access public + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + + - name: Publish @khive/kernel-win32-x64 + working-directory: npm/kernel-win32-x64 + run: npm publish --access public + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + + # Only reached if all six subpackage publishes succeeded. + - name: Set umbrella version and pin subpackage versions + env: + VERSION: ${{ env.VERSION }} + run: | + node -e " + const fs = require('fs'); + const pkg = JSON.parse(fs.readFileSync('npm/package.json')); + pkg.version = process.env.VERSION; + for (const k of Object.keys(pkg.optionalDependencies || {})) { + pkg.optionalDependencies[k] = process.env.VERSION; + } + fs.writeFileSync('npm/package.json', JSON.stringify(pkg, null, 2) + '\n'); + " + + - name: Publish khive (umbrella) working-directory: npm run: npm publish --access public env: diff --git a/.gitignore b/.gitignore index 7145d20d..99b7d8b6 100644 --- a/.gitignore +++ b/.gitignore @@ -80,3 +80,5 @@ docs/adr/_review/ # Compiled CLI binaries (built by scripts/compile.sh, not committed) npm/bin/khive-* +# Exception: npm/bin/khive-mcp is the Node shim for the MCP binary, not a compiled binary. +!npm/bin/khive-mcp diff --git a/Makefile b/Makefile index ca17ba65..d0974ec0 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: check clippy test contract-test fmt fmt-check build clean ci docs-check publish publish-dry local +.PHONY: check clippy test contract-test fmt fmt-check build clean ci docs-check publish publish-dry local proof-check check: cd crates && cargo check --workspace @@ -23,6 +23,9 @@ fmt-check: build: cd crates && cargo build --workspace --release +proof-check: + ./scripts/check-proof-references.sh + clean: cd crates && cargo clean diff --git a/cli/deno.json b/cli/deno.json index d57188dc..105e4dbc 100644 --- a/cli/deno.json +++ b/cli/deno.json @@ -1,6 +1,6 @@ { "name": "@khive/cli", - "version": "0.2.0", + "version": "0.2.1", "description": "khive — research knowledge graph CLI", "license": "Apache-2.0", "tasks": { diff --git a/cli/kg/doctor.ts b/cli/kg/doctor.ts index fed7ac49..45395079 100644 --- a/cli/kg/doctor.ts +++ b/cli/kg/doctor.ts @@ -98,8 +98,7 @@ export async function inspectKg(repoRoot: string): Promise { code: "DUPLICATE_ID", file: ENTITIES_FILE, line: entry.line, - message: - `Duplicate entity id '${id}' (first seen on line ${entityFirstLine.get(id)})`, + message: `Duplicate entity id '${id}' (first seen on line ${entityFirstLine.get(id)})`, }); } else { entityIds.add(id); @@ -237,8 +236,7 @@ export async function inspectKg(repoRoot: string): Promise { code: "DUPLICATE_NATURAL_KEY", file: EDGES_FILE, line: entry.line, - message: - `Duplicate edge (source=${source}, target=${target}, relation=${relation})`, + message: `Duplicate edge (source=${source}, target=${target}, relation=${relation})`, }); } else { naturalKeys.add(naturalKey); @@ -292,9 +290,7 @@ function formatDoctor(report: DoctorReport, json: boolean): string { ]; for (const issue of report.issues) { - const loc = issue.line !== undefined - ? `${issue.file}:${issue.line}` - : issue.file; + const loc = issue.line !== undefined ? `${issue.file}:${issue.line}` : issue.file; const prefix = issue.severity === "error" ? "ERROR" : "WARN "; lines.push(` [${prefix}] ${issue.code}: ${issue.message} (${loc})`); } diff --git a/cli/kg/doctor_test.ts b/cli/kg/doctor_test.ts index f9f22f84..a9bdff9e 100644 --- a/cli/kg/doctor_test.ts +++ b/cli/kg/doctor_test.ts @@ -21,10 +21,10 @@ async function setupKg( edges: unknown[], ): Promise { await Deno.mkdir(`${dir}/.khive/kg`, { recursive: true }); - const entitiesNdjson = - entities.map((e) => JSON.stringify(e)).join("\n") + (entities.length > 0 ? "\n" : ""); - const edgesNdjson = - edges.map((e) => JSON.stringify(e)).join("\n") + (edges.length > 0 ? "\n" : ""); + const entitiesNdjson = entities.map((e) => JSON.stringify(e)).join("\n") + + (entities.length > 0 ? "\n" : ""); + const edgesNdjson = edges.map((e) => JSON.stringify(e)).join("\n") + + (edges.length > 0 ? "\n" : ""); await Deno.writeTextFile(`${dir}/.khive/kg/entities.ndjson`, entitiesNdjson); await Deno.writeTextFile(`${dir}/.khive/kg/edges.ndjson`, edgesNdjson); } diff --git a/cli/kg/export.ts b/cli/kg/export.ts index a3954126..1a7fb6f9 100644 --- a/cli/kg/export.ts +++ b/cli/kg/export.ts @@ -273,6 +273,17 @@ export async function runExport(repoRoot: string, args: string[]): Promise return; } + if (format !== "ndjson") { + // ADR-036 §8: the --format flag on export is reserved; any non-ndjson value + // is rejected with a "not yet implemented" error until P1/P2 adapters ship. + console.error( + `Error: --format ${JSON.stringify(format)} is not yet implemented for export.\n` + + `Supported: ndjson (default), archive.\n` + + `Non-NDJSON export formats are deferred to P1/P2 (ADR-036 §8).`, + ); + Deno.exit(1); + } + // Default: canonical NDJSON export try { await exportCanonical(repoRoot); diff --git a/cli/kg/import.ts b/cli/kg/import.ts index a123fa74..f56a4585 100644 --- a/cli/kg/import.ts +++ b/cli/kg/import.ts @@ -23,6 +23,9 @@ import { DEFAULT_SCHEMA_YAML } from "../lib/schema.ts"; import { canonicalEdgeJson, canonicalEntityJson } from "../lib/canonical.ts"; import { readNdjson } from "../lib/ndjson.ts"; import { validate } from "./validate.ts"; +import { adaptCsv } from "../lib/importers/csv.ts"; +import { adaptJson } from "../lib/importers/json.ts"; +import type { EdgeRecord, EntityRecord } from "../lib/importers/types.ts"; // ─── KgArchive types ────────────────────────────────────────────────────────── @@ -93,13 +96,14 @@ interface ImportJournal { // ─── Conflict resolution (for --on-conflict) ────────────────────────────────── /** - * Per-record conflict policy when importing into an existing KG. + * Per-record conflict policy when importing into an existing KG (ADR-036 §5). * error — default; fail if any live files exist (file-level, not record-level) * skip — keep the existing record, ignore the incoming one - * replace — overwrite the existing record with the incoming one - * merge — deep-merge properties, union tags, preserve existing scalars + * replace — overwrite the existing record with the incoming one (legacy alias for update) + * merge — deep-merge properties, union tags, preserve existing scalars (legacy alias for update) + * update — patch existing record: deep-merge properties, union tags (ADR-036 canonical name) */ -export type ConflictPolicy = "error" | "skip" | "replace" | "merge"; +export type ConflictPolicy = "error" | "skip" | "replace" | "merge" | "update"; async function readExistingArchive(repoRoot: string): Promise { const entities: KgArchiveEntity[] = []; @@ -149,7 +153,8 @@ function mergeEntityConflict( ): KgArchiveEntity | null { if (policy === "skip") return null; if (policy === "replace") return incoming; - // merge: deep-merge properties, union+sort tags, prefer existing scalar fields + // update / merge: deep-merge properties, union+sort tags, prefer existing scalar fields. + // ADR-036 §5 canonical name is "update"; "merge" is a legacy alias. const mergedProperties = deepMergeObjects( existing.properties ?? {}, incoming.properties ?? {}, @@ -174,7 +179,7 @@ function mergeEdgeConflict( ): KgArchiveEdge | null { if (policy === "skip") return null; if (policy === "replace") return incoming; - // merge: deep-merge properties, prefer incoming weight when present + // update / merge: deep-merge properties, prefer incoming weight when present. const mergedProperties = deepMergeObjects( existing.properties ?? {}, incoming.properties ?? {}, @@ -809,54 +814,261 @@ export async function importArchive( ); } +// ─── Format adapter helpers ─────────────────────────────────────────────────── + +/** + * Detect format from a file path extension (ADR-036 §1 extension table). + * Returns the format string or undefined when the extension is ambiguous. + */ +function detectFormat(filePath: string): string | undefined { + const lower = filePath.toLowerCase(); + if (lower.endsWith(".ndjson")) return "ndjson"; + if (lower.endsWith(".csv")) return "csv"; + if (lower.endsWith(".tsv")) return "tsv"; + // .json is intentionally excluded: both KgArchive and generic JSON use .json. + // Use --format json explicitly to invoke the JSON adapter; without the flag, + // .json files fall through to the default (ndjson / archive path). + return undefined; +} + +/** + * Convert adapter records (EntityRecord[] + EdgeRecord[]) into a KgArchive + * so they can be passed to `importArchive` for durable, validated publish. + */ +function adapterResultToArchive( + entities: EntityRecord[], + edges: EdgeRecord[], +): KgArchive { + const archiveEntities: KgArchiveEntity[] = entities.map((e) => ({ + id: e.id, + kind: e.kind, + name: e.name, + description: e.description, + properties: e.properties as Record, + tags: e.tags, + })); + const archiveEdges: KgArchiveEdge[] = edges.map((e) => ({ + edge_id: e.edge_id, + source: e.source, + target: e.target, + relation: e.relation, + weight: e.weight, + properties: e.properties as Record, + })); + return { + format: "khive-kg", + version: "0.1", + entities: archiveEntities, + edges: archiveEdges, + }; +} + +/** + * Import via a format adapter (CSV, TSV, JSON). + * + * Reads the source file, converts records using the appropriate adapter, + * builds a KgArchive, then delegates to `importArchive` for durable publish. + * + * @param repoRoot Repository root. + * @param sourcePath Path to the source file. + * @param format Normalized format name: "csv", "tsv", or "json". + * @param defaultKind Default entity kind when source rows omit `kind`. + * @param options Import options forwarded to `importArchive`. + */ +async function importViaAdapter( + repoRoot: string, + sourcePath: string, + format: string, + defaultKind: string | undefined, + options: { + overwrite?: boolean; + onConflict?: ConflictPolicy; + } = {}, +): Promise { + let text: string; + try { + text = await Deno.readTextFile(sourcePath); + } catch (err) { + if (err instanceof Deno.errors.NotFound) { + throw new Error(`source file not found: ${sourcePath}`); + } + throw new Error(`Error reading source file: ${(err as Error).message}`); + } + + let entities: EntityRecord[]; + let edges: EdgeRecord[]; + + if (format === "csv" || format === "tsv") { + const result = adaptCsv(text, { + separator: format === "tsv" ? "\t" : ",", + defaultKind, + }); + entities = result.entities; + edges = result.edges; + if (result.warnings.length > 0) { + for (const w of result.warnings) console.warn(`Warning: ${w}`); + } + } else if (format === "json") { + const result = adaptJson(text, defaultKind); + entities = result.entities; + edges = result.edges; + if (result.warnings.length > 0) { + for (const w of result.warnings) console.warn(`Warning: ${w}`); + } + } else { + throw new Error( + `format '${format}' is not yet implemented.\n` + + `Supported formats (P0): ndjson, csv, tsv, json.\n` + + `See ADR-036 for the deferred format roadmap.`, + ); + } + + const archive = adapterResultToArchive(entities, edges); + + // Write archive to a temp JSON file so importArchive can read it. + const tmpFile = await Deno.makeTempFile({ prefix: ".khive-import-adapter-", suffix: ".json" }); + try { + await Deno.writeTextFile(tmpFile, JSON.stringify(archive)); + await importArchive(repoRoot, tmpFile, options); + } finally { + await Deno.remove(tmpFile).catch(() => {}); + } + + console.log( + `Imported ${entities.length} entities and ${edges.length} edges from ${sourcePath} (format: ${format})`, + ); +} + // ─── CLI entry point ────────────────────────────────────────────────────────── /** - * `khive kg import [--overwrite] [--on-conflict ] ` + * `khive kg import [--format ] [--default-kind ] [--overwrite] + * [--on-conflict ] ` * * Args: - * Path to a KgArchive JSON file (required). + * Path to the source file (required). + * --format Source format: ndjson (default), csv, tsv, json. + * Inferred from file extension when absent (ADR-036 §1). + * --default-kind Default entity kind when source rows omit `kind`. * --overwrite Replace existing NDJSON files wholesale. - * --on-conflict Per-record conflict handling: skip | replace | merge. - * Bypasses the file-level overwrite check. + * --on-conflict Per-record conflict: error | skip | update | replace | merge. + * `update` is the ADR-036 canonical name; `replace` and `merge` + * are legacy aliases retained for backward compatibility. + * + * Deferred flags (ADR-036 §9 — CLI rejects with "not yet implemented"): + * --mapping Column/field mapping file (P1). + * --schema-mode Schema validation behavior (P1). * * Validates against schema.yaml before writing. Publishes durably via journal * protocol (crash-safe: recoverImportJournal handles process death mid-publish). * Exits 0 on success, 1 on error. */ export async function runImport(repoRoot: string, args: string[]): Promise { + // Reject deferred flags with a clear "not yet implemented" message (ADR-036 §9). + if (args.includes("--mapping")) { + console.error( + "Error: --mapping is not yet implemented (deferred to P1 per ADR-036).", + ); + Deno.exit(1); + } + if (args.includes("--schema-mode")) { + console.error( + "Error: --schema-mode is not yet implemented (deferred to P1 per ADR-036).", + ); + Deno.exit(1); + } + const overwrite = args.includes("--overwrite"); + const isContinue = args.includes("--continue"); - // Parse --on-conflict + // Parse --on-conflict (ADR-036 canonical: error|skip|update; legacy: replace|merge) let onConflict: ConflictPolicy | undefined; const conflictIdx = args.indexOf("--on-conflict"); if (conflictIdx !== -1) { const value = args[conflictIdx + 1]; - if (value === "skip" || value === "replace" || value === "merge") { + if (value === "skip" || value === "replace" || value === "merge" || value === "update") { onConflict = value; + } else if (value === "error") { + // "error" is the default; no-op but explicit. + onConflict = undefined; } else { console.error( - `Error: --on-conflict value must be 'skip', 'replace', or 'merge'; ` + + `Error: --on-conflict value must be 'error', 'skip', 'update', 'replace', or 'merge'; ` + `got '${value ?? "(missing)"}'`, ); Deno.exit(1); } } - // Positional arg: first non-flag argument, excluding the --on-conflict value - const archivePath = args.find((a, i) => !a.startsWith("-") && args[i - 1] !== "--on-conflict"); - if (!archivePath) { + // --continue is sugar for --on-conflict skip (ADR-036 §5). + if (isContinue) { + if (onConflict !== undefined) { + console.error( + "Error: --continue and --on-conflict cannot be combined (ADR-036 §5).", + ); + Deno.exit(1); + } + onConflict = "skip"; + } + + // Parse --format + let explicitFormat: string | undefined; + const formatIdx = args.indexOf("--format"); + if (formatIdx !== -1) { + explicitFormat = args[formatIdx + 1]; + if (!explicitFormat || explicitFormat.startsWith("-")) { + console.error("Error: --format requires a format argument"); + Deno.exit(1); + } + } + + // Parse --default-kind + let defaultKind: string | undefined; + const kindIdx = args.indexOf("--default-kind"); + if (kindIdx !== -1) { + defaultKind = args[kindIdx + 1]; + if (!defaultKind || defaultKind.startsWith("-")) { + console.error("Error: --default-kind requires a kind argument"); + Deno.exit(1); + } + } + + // Positional arg: first non-flag argument, excluding known flag values. + const flagsWithValues = new Set(["--on-conflict", "--format", "--default-kind"]); + const sourcePath = args.find((a, i) => { + if (a.startsWith("-")) return false; + const prev = args[i - 1]; + return !flagsWithValues.has(prev); + }); + if (!sourcePath) { console.error( - "Usage: khive kg import [--overwrite] [--on-conflict ] ", + "Usage: khive kg import [--format ] [--default-kind ]\n" + + " [--overwrite] [--on-conflict ] ", ); - console.error(" Path to a KgArchive JSON file (required)"); + console.error(" Path to the source file (required)"); + console.error(" --format ndjson (default), csv, tsv, json"); + console.error(" --default-kind Default entity kind when source omits kind"); console.error(" --overwrite Replace existing NDJSON files without error"); - console.error(" --on-conflict Per-record conflict: skip | replace | merge"); + console.error( + " --on-conflict Per-record: error (default) | skip | update | replace | merge", + ); Deno.exit(1); } + // Resolve the format (explicit flag > file extension detection). + const resolvedFormat = explicitFormat ?? detectFormat(sourcePath) ?? "ndjson"; + try { - await importArchive(repoRoot, archivePath, { overwrite, onConflict }); + if (resolvedFormat === "ndjson") { + // Native NDJSON/archive path: source must be a KgArchive JSON file. + await importArchive(repoRoot, sourcePath, { overwrite, onConflict }); + } else { + // Adapter path: CSV, TSV, or JSON format via format adapters (ADR-036). + await importViaAdapter(repoRoot, sourcePath, resolvedFormat, defaultKind, { + overwrite, + onConflict, + }); + } } catch (err) { console.error(`Error: ${(err as Error).message}`); Deno.exit(1); diff --git a/cli/kg/init.ts b/cli/kg/init.ts index 3e952503..103bf905 100644 --- a/cli/kg/init.ts +++ b/cli/kg/init.ts @@ -33,7 +33,7 @@ import { const DEFAULT_CONFIG_TOML = `\ # .khive/config.toml — project KG configuration # Committed to git. All collaborators use these settings. -# See: https://khive.ai/docs/adr/ADR-057 +# See: https://github.com/ohdearquant/khive/blob/main/docs/adr/ADR-035-cli-config-and-auto-embed.md [embed] model = "mE5-small" diff --git a/cli/kg/stats.ts b/cli/kg/stats.ts index 0e71b64d..534f616d 100644 --- a/cli/kg/stats.ts +++ b/cli/kg/stats.ts @@ -126,7 +126,9 @@ function formatStats(stats: KgStats, json: boolean): string { const cov = stats.schemaCoverage; lines.push(`\n Schema coverage:`); - lines.push(` Entity kinds: ${cov.entityKindsKnown} known, ${cov.entityKindsUnknown} unknown`); + lines.push( + ` Entity kinds: ${cov.entityKindsKnown} known, ${cov.entityKindsUnknown} unknown`, + ); lines.push( ` Edge relations: ${cov.edgeRelationsKnown} known, ${cov.edgeRelationsUnknown} unknown`, ); diff --git a/cli/kg/stats_test.ts b/cli/kg/stats_test.ts index df74065a..ec6f9d0c 100644 --- a/cli/kg/stats_test.ts +++ b/cli/kg/stats_test.ts @@ -21,10 +21,10 @@ async function setupKg( edges: unknown[], ): Promise { await Deno.mkdir(`${dir}/.khive/kg`, { recursive: true }); - const entitiesNdjson = - entities.map((e) => JSON.stringify(e)).join("\n") + (entities.length > 0 ? "\n" : ""); - const edgesNdjson = - edges.map((e) => JSON.stringify(e)).join("\n") + (edges.length > 0 ? "\n" : ""); + const entitiesNdjson = entities.map((e) => JSON.stringify(e)).join("\n") + + (entities.length > 0 ? "\n" : ""); + const edgesNdjson = edges.map((e) => JSON.stringify(e)).join("\n") + + (edges.length > 0 ? "\n" : ""); await Deno.writeTextFile(`${dir}/.khive/kg/entities.ndjson`, entitiesNdjson); await Deno.writeTextFile(`${dir}/.khive/kg/edges.ndjson`, edgesNdjson); } diff --git a/cli/kg/validate.ts b/cli/kg/validate.ts index 4163a11b..725f1a91 100644 --- a/cli/kg/validate.ts +++ b/cli/kg/validate.ts @@ -84,21 +84,33 @@ export async function validate(repoRoot: string): Promise { } if (schema.remotes && schema.remotes.length > 0) { for (const r of schema.remotes) { - if (!r.name || !r.repo || !r.path || !r.commit) { + // ADR-037 §schema.yaml remotes section: required fields are name, url, ref, namespace. + const missing: string[] = []; + if (!r.name) missing.push("name"); + if (!r.url) missing.push("url"); + if (!r.ref) missing.push("ref"); + if (!r.namespace) missing.push("namespace"); + if (missing.length > 0) { errors.push({ file: SCHEMA_FILE, line: 0, - message: `Remote '${ - r.name || "(unnamed)" - }' missing required fields (name, repo, path, commit)`, - }); - } else if (!/^[0-9a-f]{40}$/i.test(r.commit)) { - errors.push({ - file: SCHEMA_FILE, - line: 0, - message: `Remote '${r.name}' commit must be a 40-character SHA, got '${r.commit}'`, + message: `Remote '${r.name || "(unnamed)"}' missing required fields: ${ + missing.join(", ") + }`, }); } + // Optional pin must be exactly "sha256:" + 64 lowercase hex chars (ADR-037 §pin format). + if (r.pin !== undefined && r.pin !== null) { + if (!/^sha256:[0-9a-f]{64}$/.test(r.pin)) { + errors.push({ + file: SCHEMA_FILE, + line: 0, + message: `Remote '${ + r.name || "(unnamed)" + }' pin must be "sha256:" followed by 64 lowercase hex chars, got '${r.pin}'`, + }); + } + } if (r.name) schemaRemotes.add(r.name); } } diff --git a/cli/lib/config.ts b/cli/lib/config.ts index 6d3fd76a..a13ef2b5 100644 --- a/cli/lib/config.ts +++ b/cli/lib/config.ts @@ -109,6 +109,7 @@ const DEFAULTS: KhiveConfig = { fields: { include: ["name", "description"] }, }, schema: { strict: true }, + // TODO: replace this placeholder with the real auth endpoint before commercial auth ships. auth: { api_url: "https://api.khive.ai" }, }; diff --git a/cli/lib/importers/csv.ts b/cli/lib/importers/csv.ts index 60ac4966..403ff36d 100644 --- a/cli/lib/importers/csv.ts +++ b/cli/lib/importers/csv.ts @@ -1,5 +1,5 @@ /** - * CSV / TSV adapter (ADR-055 §2 — P0 format). + * CSV / TSV adapter (ADR-036 §2 — P0 format). * * Parses a CSV file into entity + edge records. Auto-detects whether the file * is an entity list or an edge list from the presence of `source` and `target` @@ -7,7 +7,7 @@ * - file with source + target columns → edges * - otherwise → entities (name required) * - * Mapping files (ADR-055 §2 P0) are a future extension; this Phase E1 adapter + * Mapping files (ADR-036 §2 P0) are a future extension; this Phase E1 adapter * accepts only auto-detected schemas. * * Fatal errors (throw): empty/no-header CSV, missing required column, missing diff --git a/cli/lib/importers/json.ts b/cli/lib/importers/json.ts index f8fde3f6..9235249a 100644 --- a/cli/lib/importers/json.ts +++ b/cli/lib/importers/json.ts @@ -1,12 +1,12 @@ /** - * JSON array adapter (ADR-055 §2 P0 — "JSON" format). + * JSON array adapter (ADR-036 §2 P0 — "JSON" format). * * Reads a JSON file containing an array of objects. Each object is either an * entity or an edge depending on which fields are present: * - has source + target → edge * - otherwise → entity (name required) * - * Entity fields recognized case-insensitively (ADR-055 §JSON-detection): + * Entity fields recognized case-insensitively (ADR-036 §JSON-detection): * id, name, kind, description, tags. * Everything else collects into `properties`. Edge fields recognized: * edge_id, source, target, relation, weight; everything else → properties. @@ -85,7 +85,7 @@ export function adaptJson( for (let i = 0; i < parsed.length; i++) { const item = parsed[i]; - // Non-object items are a fatal structural error (ADR-055 §5: all-or-nothing). + // Non-object items are a fatal structural error (ADR-036 §5: all-or-nothing). if (!item || typeof item !== "object" || Array.isArray(item)) { throw new Error( `item ${i}: expected a JSON object, got ${Array.isArray(item) ? "array" : typeof item}`, diff --git a/cli/lib/importers/types.ts b/cli/lib/importers/types.ts index c96dc5e4..1f491c48 100644 --- a/cli/lib/importers/types.ts +++ b/cli/lib/importers/types.ts @@ -1,5 +1,5 @@ /** - * Shared types for the format adapters under cli/lib/importers/ (ADR-055). + * Shared types for the format adapters under cli/lib/importers/ (ADR-036). * * Each adapter returns these records; the dispatcher merges them and writes * them as sorted NDJSON via the standard `khive kg import` pipeline. diff --git a/cli/lib/kernel.ts b/cli/lib/kernel.ts index 8df8e6d6..d0168674 100644 --- a/cli/lib/kernel.ts +++ b/cli/lib/kernel.ts @@ -1,10 +1,10 @@ /** - * Resolve the path to the `kkernel` Rust binary (ADR-076, ADR-077). + * Resolve the path to the `kkernel` Rust binary (ADR-026). * * Strategy (in order): * 1. `KKERNEL_BINARY` env var — explicit override, used in dev and tests. * 2. `@khive/kernel-/bin/kkernel` under node_modules — production - * install via npm optional dependencies (ADR-077). + * install via npm optional dependencies (ADR-026). * 3. `/crates/target/release/kkernel` — monorepo dev convenience. * 4. `/crates/target/debug/kkernel` — last-resort dev fallback. * @@ -13,14 +13,74 @@ import { dirname, fromFileUrl, join } from "@std/path"; +/** + * Detect whether the Linux runtime links against musl (Alpine etc.) or glibc. + * Returns "gnu" or "musl". Defaults to "gnu" if detection is inconclusive. + * + * Detection order (most-reliable first): + * 1. `ldd --version` — invokes the actual system linker (same as the Node + * shim in npm/bin/khive). More reliable than /proc/self/maps which + * reflects the Deno process's own loader, not the child binary's. + * 2. `/lib/ld-musl-*` glob — fast filesystem check, no subprocess. + * + * NOTE: npm/bin/khive and npm/bin/khive-mcp use the same ordered detection. + * Keep all three in sync. + */ +function detectLibc(): "gnu" | "musl" { + try { + const result = new Deno.Command("ldd", { + args: ["--version"], + stdin: "null", + stdout: "piped", + stderr: "piped", + }).outputSync(); + const out = new TextDecoder() + .decode(result.stdout) + .toLowerCase() + .concat(new TextDecoder().decode(result.stderr).toLowerCase()); + if (out.includes("musl")) return "musl"; + return "gnu"; + } catch { + // ldd not available — fall through + } + try { + for (const entry of Deno.readDirSync("/lib")) { + if (entry.name.startsWith("ld-musl-")) return "musl"; + } + } catch { + // /lib not readable — fall through + } + return "gnu"; +} + +/** + * Resolve the platform suffix for the @khive/kernel-{platform} subpackage on + * Linux. Returns the suffix string, or throws with a clear "unsupported" + * message for musl arm64 (not in the v1 matrix). + */ +function linuxVariant(arch: "x86_64" | "aarch64"): string { + const libc = detectLibc(); + if (arch === "aarch64") { + if (libc === "musl") { + throw new Error( + "khive does not support linux-arm64 with musl libc in v1.\n" + + "linux-arm64 with musl is not in the v1 release matrix.\n" + + "Supported: darwin-arm64, darwin-x64, linux-x64-gnu, linux-x64-musl, linux-arm64 (glibc), win32-x64.\n" + + "File an issue at https://github.com/ohdearquant/khive/issues if you need this target.", + ); + } + return "linux-arm64"; + } + return libc === "musl" ? "linux-x64-musl" : "linux-x64-gnu"; +} + function platformKey(): string { const os = Deno.build.os; const arch = Deno.build.arch; + if (os === "linux") return linuxVariant(arch as "x86_64" | "aarch64"); const map: Record = { "darwin-aarch64": "darwin-arm64", "darwin-x86_64": "darwin-x64", - "linux-x86_64": "linux-x64-gnu", - "linux-aarch64": "linux-arm64", "windows-x86_64": "win32-x64", }; const key = `${os}-${arch}`; @@ -103,7 +163,7 @@ export function kkernelPath(repoRoot?: string): string { ` @khive/kernel-${platformKey()}/bin/${exe} (npm install)\n` + ` ${candidates.join("\n ")}\n` + `If you're developing locally, run: (cd crates && cargo build --release -p kkernel)\n` + - `Supported platforms: darwin-arm64, darwin-x64, linux-x64-gnu, linux-arm64, win32-x64.`, + `Supported platforms: darwin-arm64, darwin-x64, linux-x64-gnu, linux-x64-musl, linux-arm64, win32-x64.`, ); } diff --git a/cli/lib/schema.ts b/cli/lib/schema.ts index 08d2694e..463b0aa6 100644 --- a/cli/lib/schema.ts +++ b/cli/lib/schema.ts @@ -85,12 +85,29 @@ export interface EdgeRelationDef { description?: string; } -/** A remote KG reference as defined in ADR-048 §3. */ +/** + * A remote KG reference (ADR-037 §Reference syntax). + * + * Fields `url`, `ref`, and `namespace` are required (ADR-037 §schema.yaml remotes section). + * `pin` is optional: a SHA-256 content hash (`sha256:<64hexchars>`); when present, + * sync verifies the fetched archive against this hash before accepting it. + * + * Note: the legacy `repo`/`path`/`commit` field shape from ADR-020 v0 is superseded + * by this `url`/`ref`/`namespace`/`pin` shape. Schema validation rejects the old shape. + */ export interface RemoteDef { name: string; - repo: string; - path: string; - commit: string; + /** Git remote URL (required). */ + url: string; + /** Branch or tag to resolve against (required). */ + ref: string; + /** Namespace scoping entity resolution for this remote (required). */ + namespace: string; + /** + * Optional SHA-256 content hash pin (`sha256:<64hexchars>`). + * When present, sync is mandatory-verify (ADR-037 §pin format). + */ + pin?: string; } export interface PackRef { @@ -103,7 +120,7 @@ export interface Schema { entity_kinds: string[]; edge_relations: EdgeRelationDef[]; note_kinds?: string[]; - /** Remotes are a list of {name, repo, path, commit} entries (ADR-048 §3). */ + /** Remotes are a list of {name, url, ref, namespace, pin?} entries (ADR-037 §remotes). */ remotes?: RemoteDef[]; packs?: PackRef[]; } diff --git a/cli/lib/schema_test.ts b/cli/lib/schema_test.ts index 316e61e6..5b60cd4f 100644 --- a/cli/lib/schema_test.ts +++ b/cli/lib/schema_test.ts @@ -1,6 +1,6 @@ /** - * Tests for schema.yaml parser — specifically the ADR-048 remotes format - * (list of {name, repo, path, commit} entries). + * Tests for schema.yaml parser — specifically the ADR-037 remotes format + * (list of {name, url, ref, namespace, pin?} entries). */ import { assertEquals } from "@std/assert"; @@ -22,9 +22,9 @@ async function writeSchema(dir: string, content: string): Promise { return dir; } -// ─── remotes (ADR-048 §3 format) ───────────────────────────────────────────── +// ─── remotes (ADR-037 shape: {name, url, ref, namespace, pin?}) ────────────── -Deno.test("loadSchema: parses ADR-048 remotes as list of {name, repo, path, commit}", async () => { +Deno.test("loadSchema: parses ADR-037 remotes as list of {name, url, ref, namespace}", async () => { const dir = await makeTempDir(); try { await writeSchema( @@ -37,13 +37,14 @@ Deno.test("loadSchema: parses ADR-048 remotes as list of {name, repo, path, comm " - relation: implements", "remotes:", " - name: lattice", - " repo: ohdearquant/lattice", - " path: .khive/kg", - " commit: a1b2c3d4e5f6789012345678901234567890abcd", + " url: https://github.com/ohdearquant/lattice.git", + " ref: main", + " namespace: lattice", " - name: atlas", - " repo: ohdearquant/atlas", - " path: .khive/kg", - " commit: f9e8d7c6b5a4321098765432109876543210fedc", + " url: https://github.com/ohdearquant/atlas.git", + " ref: main", + " namespace: atlas", + " pin: sha256:a1b2c3d4e5f6789012345678901234567890abcdef1234567890abcdef12345678", ].join("\n") + "\n", ); @@ -53,15 +54,20 @@ Deno.test("loadSchema: parses ADR-048 remotes as list of {name, repo, path, comm const lattice = schema.remotes![0]; assertEquals(lattice.name, "lattice"); - assertEquals(lattice.repo, "ohdearquant/lattice"); - assertEquals(lattice.path, ".khive/kg"); - assertEquals(lattice.commit, "a1b2c3d4e5f6789012345678901234567890abcd"); + assertEquals(lattice.url, "https://github.com/ohdearquant/lattice.git"); + assertEquals(lattice.ref, "main"); + assertEquals(lattice.namespace, "lattice"); + assertEquals(lattice.pin, undefined); const atlas = schema.remotes![1]; assertEquals(atlas.name, "atlas"); - assertEquals(atlas.repo, "ohdearquant/atlas"); - assertEquals(atlas.path, ".khive/kg"); - assertEquals(atlas.commit, "f9e8d7c6b5a4321098765432109876543210fedc"); + assertEquals(atlas.url, "https://github.com/ohdearquant/atlas.git"); + assertEquals(atlas.ref, "main"); + assertEquals(atlas.namespace, "atlas"); + assertEquals( + atlas.pin, + "sha256:a1b2c3d4e5f6789012345678901234567890abcdef1234567890abcdef12345678", + ); } finally { await removeDir(dir); } diff --git a/cli/main.ts b/cli/main.ts index 9c9ac90c..2c04df63 100644 --- a/cli/main.ts +++ b/cli/main.ts @@ -33,7 +33,7 @@ function printUsage(): void { Usage: khive kg Manage the git-native knowledge graph khive pack Author and validate declarative packs (ADR-050) - khive auth Authenticate with khive.ai (optional) + khive auth Authenticate with khive (optional) KG subcommands: init Initialise .khive/kg/ in the current git repo @@ -59,11 +59,11 @@ Pack subcommands (ADR-050): check Validate a pack.yaml manifest Auth subcommands: - login Sign in to khive.ai via GitHub OAuth + login Sign in via GitHub OAuth status Show current authentication state logout Remove stored credentials -All 'khive kg' commands work without a khive.ai account. +All 'khive kg' commands work without a khive auth account. Run 'khive --help' for detailed usage.`); } @@ -97,7 +97,7 @@ function printAuthUsage(): void { console.log(`Usage: khive auth Subcommands: - login Sign in to khive.ai + login Sign in via GitHub OAuth status Show authentication state logout Remove stored credentials`); } diff --git a/crates/Cargo.toml b/crates/Cargo.toml index 7c8ff3c4..4c4e11c9 100644 --- a/crates/Cargo.toml +++ b/crates/Cargo.toml @@ -18,21 +18,27 @@ members = [ "khive-pack-gtd", "khive-pack-memory", "khive-pack-brain", + "khive-pack-comm", + "khive-pack-schedule", + "khive-pack-template", "khive-mcp", "khive-vcs", + "khive-vcs-adapters", "kkernel", "khive-retrieval", ] -# khive-merge excluded — forward-deployed (ADR-043) but not yet compilable -# against restructured khive-vcs. Will be re-added when ADR-043 integrates. +# khive-merge removed — the custom three-way merge engine was superseded for v1 +# by git's line merge on sorted NDJSON (ADR-010, ADR-020). The conflict taxonomy +# from ADR-043 is retained for a future conflict-resolution ADR; the crate is +# excluded until that work is scoped. [workspace.package] -version = "0.2.0" +version = "0.2.1" edition = "2021" authors = ["Ocean "] license = "Apache-2.0" repository = "https://github.com/ohdearquant/khive" -homepage = "https://khive.ai" +homepage = "https://github.com/ohdearquant" keywords = ["knowledge-graph", "semantic-search", "mcp", "ai-tools", "graph-database"] categories = ["database", "command-line-utilities"] @@ -49,7 +55,7 @@ uuid = { version = "1.10", features = ["v4", "serde"] } chrono = { version = "0.4", default-features = false, features = ["serde", "clock"] } async-trait = "0.1" clap = { version = "4.5", features = ["derive", "env"] } -lattice-embed = "0.1.2" +lattice-embed = "0.2.3" parking_lot = "0.12" [profile.release] diff --git a/crates/khive-bm25/Cargo.toml b/crates/khive-bm25/Cargo.toml index 9c915117..5847ab63 100644 --- a/crates/khive-bm25/Cargo.toml +++ b/crates/khive-bm25/Cargo.toml @@ -11,7 +11,7 @@ categories.workspace = true description = "BM25 (Okapi BM25) keyword index with deterministic scoring" [dependencies] -khive-score = { version = "0.2.0", path = "../khive-score" } +khive-score = { version = "0.2.1", path = "../khive-score" } serde = { workspace = true } serde_json = { workspace = true } thiserror = { workspace = true } diff --git a/crates/khive-bm25/src/index/mod.rs b/crates/khive-bm25/src/index/mod.rs index ca8c77ff..0009c16e 100644 --- a/crates/khive-bm25/src/index/mod.rs +++ b/crates/khive-bm25/src/index/mod.rs @@ -824,7 +824,7 @@ impl Bm25Index { /// Compute IDF from document frequency using the Robertson-Walker variant. /// -/// **PROOF CORRESPONDENCE**: `Lion.Retrieval.BM25.idf_nonneg` +/// **PROOF CORRESPONDENCE**: `khive.Retrieval.BM25.idf_nonneg` /// With +1 inside ln(), IDF(t) >= 0 for all terms regardless of document frequency. #[inline] pub(crate) fn idf_from_doc_freq(doc_freq: usize, doc_count: usize) -> f64 { @@ -835,7 +835,7 @@ pub(crate) fn idf_from_doc_freq(doc_freq: usize, doc_count: usize) -> f64 { /// Compute a single-term BM25 contribution for a posting. /// -/// **PROOF CORRESPONDENCE**: `Lion.Retrieval.BM25.tf_bounded` +/// **PROOF CORRESPONDENCE**: `khive.Retrieval.BM25.tf_bounded` /// TF saturation: tf * (k1 + 1) / (tf + k1 * ...) < k1 + 1 for all tf >= 0. #[inline] pub(crate) fn bm25_term_score( diff --git a/crates/khive-bm25/src/index/search.rs b/crates/khive-bm25/src/index/search.rs index a89a9a3c..616bea80 100644 --- a/crates/khive-bm25/src/index/search.rs +++ b/crates/khive-bm25/src/index/search.rs @@ -599,7 +599,7 @@ impl Bm25Index { /// Emits `bm25.search.duration_ms`, `bm25.search.count`, and /// `bm25.search.results` metrics when a sink is attached. /// - /// **PROOF CORRESPONDENCE**: `Lion.Retrieval.BM25.bm25_nonneg` + /// **PROOF CORRESPONDENCE**: `khive.Retrieval.BM25.bm25_nonneg` /// Total BM25 score >= 0 for any query and document, since it is a sum of /// non-negative IDF values multiplied by non-negative TF components. /// Returns up to `k` (id, score) pairs sorted by BM25 score descending. @@ -792,7 +792,7 @@ impl Bm25Index { /// on other targets). Pre-converted f32 document lengths avoid per-scoring /// integer-to-float conversion. /// - /// **PROOF CORRESPONDENCE**: `Lion.Retrieval.BM25.tf_bounded` + /// **PROOF CORRESPONDENCE**: `khive.Retrieval.BM25.tf_bounded` /// TF saturation: tf * (k1 + 1) / (tf + k1 * ...) < k1 + 1 for all tf >= 0. pub(crate) fn search_brute_force( &self, @@ -1068,10 +1068,10 @@ impl Bm25Index { /// This variant always returns non-negative IDF (Robertson-Walker variant). /// Uses interior mutability for cache updates to enable concurrent reads. /// - /// **PROOF CORRESPONDENCE**: `Lion.Retrieval.BM25.idf_nonneg` + /// **PROOF CORRESPONDENCE**: `khive.Retrieval.BM25.idf_nonneg` /// With +1 inside ln(), IDF(t) >= 0 for all terms regardless of document frequency. /// - /// **PROOF CORRESPONDENCE**: `Lion.Retrieval.BM25.idf_mono` + /// **PROOF CORRESPONDENCE**: `khive.Retrieval.BM25.idf_mono` /// Rarer terms have higher IDF: n1 < n2 implies IDF(n1) > IDF(n2). pub(super) fn compute_idf(&self, term: &str, doc_count: usize) -> f64 { use std::sync::atomic::Ordering as AtomicOrdering; diff --git a/crates/khive-db/Cargo.toml b/crates/khive-db/Cargo.toml index a4c418bb..840462e8 100644 --- a/crates/khive-db/Cargo.toml +++ b/crates/khive-db/Cargo.toml @@ -11,9 +11,9 @@ categories.workspace = true description = "SQLite storage backend: entities, edges, notes, events, FTS5, sqlite-vec vectors." [dependencies] -khive-storage = { version = "0.2.0", path = "../khive-storage" } -khive-score = { version = "0.2.0", path = "../khive-score" } -khive-types = { version = "0.2.0", path = "../khive-types", features = ["serde"] } +khive-storage = { version = "0.2.1", path = "../khive-storage" } +khive-score = { version = "0.2.1", path = "../khive-score" } +khive-types = { version = "0.2.1", path = "../khive-types", features = ["serde"] } tokio = { workspace = true } async-trait = { workspace = true } uuid = { workspace = true } @@ -31,6 +31,10 @@ sqlite-vec = { version = "0.1.9", optional = true } [dev-dependencies] tokio = { workspace = true, features = ["full", "test-util"] } tempfile = "3" +rusqlite = { version = "0.33", features = ["bundled", "column_decltype"] } +khive-storage = { version = "0.2.1", path = "../khive-storage" } +khive-types = { version = "0.2.1", path = "../khive-types", features = ["serde"] } +uuid = { workspace = true } [features] default = [] diff --git a/crates/khive-db/src/backend.rs b/crates/khive-db/src/backend.rs index 8c3f401b..de2e5124 100644 --- a/crates/khive-db/src/backend.rs +++ b/crates/khive-db/src/backend.rs @@ -22,10 +22,12 @@ use std::path::Path; use std::sync::Arc; +use rusqlite::OptionalExtension; + use crate::error::SqliteError; use crate::pool::{ConnectionPool, PoolConfig}; use crate::sql_bridge::SqlBridge; -use crate::stores::{entity, event, graph, note, text, vectors}; +use crate::stores::{entity, event, graph, note, sparse, text, vectors}; /// Concrete storage backend providing capability traits. pub struct StorageBackend { @@ -90,6 +92,34 @@ impl StorageBackend { crate::migrations::apply_schema_plan(writer.conn(), plan) } + /// Apply pack-auxiliary DDL statements (ADR-017 §Storage profile and + /// pack-auxiliary schema). + /// + /// Executes each DDL statement idempotently via `execute_batch`. Each + /// statement MUST be self-contained and use `CREATE TABLE IF NOT EXISTS` + /// (or equivalent idempotent DDL) so that calling this method more than + /// once does not fail. + /// + /// Pack auxiliary tables are NOT tracked in `_schema_versions` — they are + /// non-versioned in v1 (ADR-017). Use `apply_schema` with a + /// `ServiceSchemaPlan` when version tracking is needed. + /// + /// This method is lower-level than `PackRuntime::schema_plan()` — the + /// runtime bootstrap calls `pack.schema_plan().statements` and passes the + /// slice here. The `SchemaPlan` type lives in `khive-runtime` (above this + /// crate in the dep chain); this method accepts a plain `&[&'static str]` + /// to avoid a circular dependency. + pub fn apply_pack_ddl_statements( + &self, + statements: &[&'static str], + ) -> Result<(), SqliteError> { + let writer = self.pool.try_writer()?; + for &stmt in statements { + writer.conn().execute_batch(stmt)?; + } + Ok(()) + } + /// Get an EntityStore. Applies the entities DDL if not already present. /// /// Idempotent — safe to call multiple times. @@ -247,17 +277,78 @@ impl StorageBackend { // Ensure sqlite-vec is registered before creating vec0 tables. crate::extension::ensure_extensions_loaded(); - // Create the vec0 virtual table. Idempotent. + let table = format!("vec_{}", model_key); + let writer = self.pool.try_writer()?; + + // Detect old-schema vec0 tables that predate the `field` column (ADR-044). + // vec0 virtual tables do not support ALTER TABLE, so we must drop and recreate + // the table if it exists without the `field` column. Vector data is a cache — + // callers can re-embed from the source record after the table is rebuilt. + // Use pragma_table_info to check columns directly; substring matching on the + // CREATE DDL is fragile (a model_key containing "field" would false-match). + let table_exists: bool = writer + .conn() + .query_row( + "SELECT 1 FROM sqlite_master WHERE type='table' AND name=?1", + rusqlite::params![&table], + |row| row.get::<_, i64>(0), + ) + .optional() + .map_err(SqliteError::Rusqlite)? + .is_some(); + + if table_exists { + let has_field: bool = { + let pragma = format!("PRAGMA table_xinfo({})", table); + let mut stmt = writer.conn().prepare(&pragma)?; + let mut rows = stmt.query([])?; + let mut found = false; + while let Some(row) = rows.next()? { + let name: String = row.get(1)?; + if name == "field" { + found = true; + break; + } + } + found + }; + if !has_field { + let drop_ddl = format!("DROP TABLE IF EXISTS {}", table); + writer.conn().execute_batch(&drop_ddl)?; + } + } + + // Ensure the _embedding_models registry table exists (ADR-043 §1). + // This is a no-op when the table already exists. Running it here ensures + // the registry is present for any caller that opens a vector store without + // first calling run_migrations() (e.g., tests that create stores directly). + // Production callers are expected to call run_migrations() at startup, which + // creates the registry via V14; this is a belt-and-suspenders fallback. + // Schema is defined in `migrations::EMBEDDING_MODELS_DDL` (single source of + // truth) to prevent the two copies from silently drifting. + writer + .conn() + .execute_batch(crate::migrations::EMBEDDING_MODELS_DDL)?; + + // Create the vec0 virtual table. Idempotent on fresh databases and after the + // old-schema rebuild above. + // + // NOTE: `embedding_model_id` is NOT included in this DDL because sqlite-vec + // enforces NOT NULL on TEXT metadata columns at insert time, so the column + // cannot be added at virtual-table creation as a nullable FK. The column will + // be present after the ADR-043 §8 startup backfill rebuild (steps 2-4), which + // is deferred to a follow-up PR — see the tracking issue filed against MAJ-2 + // of codex round-1 review of PR #374. let ddl = format!( "CREATE VIRTUAL TABLE IF NOT EXISTS vec_{} USING vec0(\ subject_id TEXT PRIMARY KEY, \ namespace TEXT NOT NULL, \ kind TEXT NOT NULL, \ + field TEXT NOT NULL, \ embedding float[{}] distance_metric=cosine\ )", model_key, dimensions ); - let writer = self.pool.try_writer()?; writer.conn().execute_batch(&ddl)?; Ok(Arc::new(vectors::SqliteVecStore::new( @@ -269,6 +360,51 @@ impl StorageBackend { )?)) } + /// Get a SparseStore for a specific model key, scoped to the default namespace. + /// + /// Creates the sparse table if it does not already exist. + pub fn sparse( + &self, + model_key: &str, + ) -> Result, SqliteError> { + self.sparse_for_namespace(model_key, "local") + } + + /// Get a SparseStore for a specific model key with an explicit default namespace. + /// + /// The `model_key` must contain only ASCII alphanumeric/underscore characters. + pub fn sparse_for_namespace( + &self, + model_key: &str, + namespace: &str, + ) -> Result, SqliteError> { + if model_key.is_empty() + || !model_key + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '_') + { + return Err(SqliteError::InvalidData(format!( + "invalid model_key '{}': must be non-empty and contain only alphanumeric/underscore characters", + model_key + ))); + } + if namespace.trim().is_empty() { + return Err(SqliteError::InvalidData( + "sparse store namespace must be non-empty".to_string(), + )); + } + + let writer = self.pool.try_writer()?; + sparse::ensure_sparse_schema(writer.conn(), model_key).map_err(SqliteError::Rusqlite)?; + + Ok(Arc::new(sparse::SqliteSparseStore::new( + Arc::clone(&self.pool), + self.is_file_backed, + model_key.to_string(), + namespace.trim().to_string(), + )?)) + } + /// Get a TextSearch for a specific table key. /// /// Creates the FTS5 virtual table if it does not already exist. Uses the @@ -471,17 +607,20 @@ mod tests { id, khive_types::SubstrateKind::Entity, "local", - vec![1.0, 0.0, 0.0], + "content", + vec![vec![1.0, 0.0, 0.0]], ) .await .unwrap(); let hits = store .search(khive_storage::types::VectorSearchRequest { - query_embedding: vec![1.0, 0.0, 0.0], + query_vectors: vec![vec![1.0, 0.0, 0.0]], top_k: 1, namespace: None, kind: None, + filter: None, + backend_hints: None, }) .await .unwrap(); @@ -505,7 +644,8 @@ mod tests { id, khive_types::SubstrateKind::Entity, "local", - vec![1.0, 0.0, 0.0], + "content", + vec![vec![1.0, 0.0, 0.0]], ) .await .unwrap(); diff --git a/crates/khive-db/src/migrations.rs b/crates/khive-db/src/migrations.rs index f29ac63d..7d727289 100644 --- a/crates/khive-db/src/migrations.rs +++ b/crates/khive-db/src/migrations.rs @@ -68,7 +68,7 @@ pub fn apply_schema_plan(conn: &Connection, plan: &ServiceSchemaPlan) -> Result< } // ============================================================================= -// Versioned migration system (ADR-022) +// Versioned migration system (ADR-015) // ============================================================================= /// A single forward-only schema migration. @@ -170,6 +170,24 @@ const V1_UP: &str = "\ /// V4 note: Deduplicates existing graph_edges rows that share the same /// (namespace, source_id, target_id, relation) triple, keeping the earliest /// rowid, then adds a unique index enforcing the constraint going forward. +/// +/// V5 note: `ENTITIES_DDL` in `stores/entity.rs` already includes `entity_type TEXT` +/// so that in-process schema creation has the column from the start. When +/// `run_migrations` is called on such a DB, the V5 `ALTER TABLE` would fail with +/// "duplicate column name". The migration runner handles this by checking column +/// existence before applying V5 — see `run_migrations`. +/// +/// V9 note: Adds lifecycle columns (updated_at, deleted_at) and backend routing +/// metadata (target_backend) to graph_edges. Uses table rebuild to work around +/// SQLite's limited ALTER TABLE support. Backfills updated_at = created_at for +/// existing rows and sets deleted_at = NULL, target_backend = NULL. +/// +/// V13 note: Adds event observability + provenance columns (kind, payload, +/// payload_schema_version, profile_state_version, session_id, aggregate_kind, +/// aggregate_id) and the event_observations table. The DDL is computed at runtime +/// via `build_v13_event_observability_sql` so that running migrations on a DB +/// already bootstrapped by `ensure_events_schema` does not fail with "duplicate +/// column name". const V4_DEDUPE_GRAPH_EDGE_TRIPLES: &str = "\ DELETE FROM graph_edges \ WHERE rowid NOT IN (\ @@ -181,6 +199,204 @@ const V4_DEDUPE_GRAPH_EDGE_TRIPLES: &str = "\ ON graph_edges(namespace, source_id, target_id, relation);\ "; +const V5_ADD_ENTITY_TYPE_TO_ENTITIES: &str = "\ + ALTER TABLE entities ADD COLUMN entity_type TEXT NULL;\ + CREATE INDEX IF NOT EXISTS idx_entities_kind_entity_type \ + ON entities(namespace, kind, entity_type);\ +"; + +const V9_EDGE_LIFECYCLE_AND_TARGET_BACKEND: &str = "\ + DROP INDEX IF EXISTS idx_graph_edges_unique_triple;\ + DROP INDEX IF EXISTS idx_graph_edges_ns_source;\ + DROP INDEX IF EXISTS idx_graph_edges_ns_target;\ + DROP INDEX IF EXISTS idx_graph_edges_ns_relation;\ + DROP INDEX IF EXISTS idx_graph_edges_ns_src_rel;\ + DROP INDEX IF EXISTS idx_graph_edges_ns_tgt_rel;\ + CREATE TABLE graph_edges_new (\ + namespace TEXT NOT NULL,\ + id TEXT NOT NULL,\ + source_id TEXT NOT NULL,\ + target_id TEXT NOT NULL,\ + relation TEXT NOT NULL,\ + weight REAL NOT NULL DEFAULT 1.0,\ + created_at INTEGER NOT NULL,\ + updated_at INTEGER NOT NULL,\ + deleted_at INTEGER,\ + metadata TEXT,\ + target_backend TEXT,\ + PRIMARY KEY (namespace, id)\ + );\ + INSERT INTO graph_edges_new \ + (namespace, id, source_id, target_id, relation, weight, created_at, updated_at, deleted_at, metadata, target_backend) \ + SELECT namespace, id, source_id, target_id, relation, weight, created_at, created_at, NULL, metadata, NULL \ + FROM graph_edges;\ + DROP TABLE graph_edges;\ + ALTER TABLE graph_edges_new RENAME TO graph_edges;\ + CREATE UNIQUE INDEX IF NOT EXISTS idx_graph_edges_unique_triple ON graph_edges(namespace, source_id, target_id, relation);\ + CREATE INDEX IF NOT EXISTS idx_graph_edges_ns_source ON graph_edges(namespace, source_id);\ + CREATE INDEX IF NOT EXISTS idx_graph_edges_ns_target ON graph_edges(namespace, target_id);\ + CREATE INDEX IF NOT EXISTS idx_graph_edges_ns_relation ON graph_edges(namespace, relation);\ + CREATE INDEX IF NOT EXISTS idx_graph_edges_ns_src_rel ON graph_edges(namespace, source_id, relation);\ + CREATE INDEX IF NOT EXISTS idx_graph_edges_ns_tgt_rel ON graph_edges(namespace, target_id, relation);\ + CREATE INDEX IF NOT EXISTS idx_graph_edges_target_backend ON graph_edges(target_backend) WHERE target_backend IS NOT NULL;\ +"; + +/// V10: Add `status` column to notes; make `salience` and `decay_factor` nullable. +/// +/// SQLite does not support `ALTER COLUMN` to change NOT NULL constraints, so the +/// salience/decay_factor nullability change is handled by rewriting the column +/// defaults: the columns already exist (added in V1) and will accept NULL when +/// inserted without a value. The `NOT NULL DEFAULT` constraint in V1 means any +/// existing rows already have a value; to allow NULLs going forward, SQLite +/// requires a full table rebuild — but since all existing values are valid f64, +/// we leave the constraint in place for existing rows and rely on application- +/// level logic (`NOTES_DDL` in stores/note.rs) to use nullable columns for new +/// tables. For production databases that went through V1, the application layer +/// handles NULLs via `Option` and the `NOT NULL DEFAULT` remains harmless +/// (inserts from the application always set these columns or leave them NULL via +/// the new nullable DDL path). The only structural change this migration makes +/// is adding the `status` column with a sensible default. +const V10_NOTE_STATUS_AND_NULLABLE_METRICS: &str = "\ + ALTER TABLE notes ADD COLUMN status TEXT NOT NULL DEFAULT 'active';\ +"; + +/// V11: Add merge tombstone columns to entities. +/// +/// `merged_into` stores the UUID of the entity this one was merged into. +/// `merge_event_id` is an opaque event ID for auditing. Both are nullable; +/// non-NULL only when the entity has been tombstoned by a merge. +/// The index on (namespace, merged_into) allows efficient lookup of all +/// entities that were merged into a given target. +/// +/// ENTITIES_DDL in stores/entity.rs already includes these columns for new +/// databases (created via `CREATE TABLE IF NOT EXISTS`). The migration handles +/// the upgrade path for existing production databases. +const V11_ENTITY_TOMBSTONE_COLUMNS: &str = "\ + ALTER TABLE entities ADD COLUMN merged_into TEXT;\ + ALTER TABLE entities ADD COLUMN merge_event_id TEXT;\ + CREATE INDEX IF NOT EXISTS idx_entities_merged_into ON entities(namespace, merged_into);\ +"; + +/// V12: Make `salience` and `decay_factor` nullable in the notes table. +/// +/// V1 created notes with `salience REAL NOT NULL DEFAULT 0.5` and +/// `decay_factor REAL NOT NULL DEFAULT 0.0`. SQLite does not support +/// `ALTER COLUMN` to remove a NOT NULL constraint, so a full table rebuild +/// is required. This migration rebuilds notes with the canonical nullable +/// schema that `NOTES_DDL` in stores/note.rs uses for fresh databases. +/// +/// On databases bootstrapped via `NOTES_DDL` (all test paths and new +/// installs), salience/decay_factor are already nullable — the V12 idempotency +/// check detects this and skips the rebuild, recording V12 as applied. +const V12_NULLABLE_NOTE_METRICS: &str = "\ + CREATE TABLE notes_new (\ + id TEXT PRIMARY KEY,\ + namespace TEXT NOT NULL,\ + kind TEXT NOT NULL,\ + status TEXT NOT NULL DEFAULT 'active',\ + name TEXT,\ + content TEXT NOT NULL DEFAULT '',\ + salience REAL,\ + decay_factor REAL,\ + expires_at INTEGER,\ + properties TEXT,\ + created_at INTEGER NOT NULL,\ + updated_at INTEGER NOT NULL,\ + deleted_at INTEGER\ + );\ + INSERT INTO notes_new \ + (id, namespace, kind, status, name, content, salience, decay_factor, \ + expires_at, properties, created_at, updated_at, deleted_at) \ + SELECT \ + id, namespace, kind, status, name, content, salience, decay_factor, \ + expires_at, properties, created_at, updated_at, deleted_at \ + FROM notes;\ + DROP TABLE notes;\ + ALTER TABLE notes_new RENAME TO notes;\ + CREATE INDEX IF NOT EXISTS idx_notes_namespace ON notes(namespace);\ + CREATE INDEX IF NOT EXISTS idx_notes_kind ON notes(namespace, kind);\ + CREATE INDEX IF NOT EXISTS idx_notes_created ON notes(created_at DESC);\ +"; + +// V13 adds event observability + provenance columns and the event_observations table. +// The DDL is computed at runtime via `build_v13_event_observability_sql` so that +// running migrations on a database already bootstrapped by `ensure_events_schema` +// (which includes the new columns) does not fail with "duplicate column name". +const V13_EVENT_OBSERVABILITY_PROVENANCE: &str = "__v13_computed_at_runtime__"; + +/// DDL for the `_embedding_models` registry table (ADR-043 §1). +/// +/// Shared between the V14 migration (`build_v14_embedding_model_registry_sql`) and +/// the belt-and-suspenders creation in `StorageBackend::vectors_for_namespace`. +/// Both sites reference this constant so the schema cannot silently diverge if the +/// registry evolves (ADR-043 §8 step 4 mandates a future schema tightening). +pub const EMBEDDING_MODELS_DDL: &str = "\ + CREATE TABLE IF NOT EXISTS _embedding_models (\ + id BLOB PRIMARY KEY,\ + engine_name TEXT NOT NULL,\ + model_id TEXT NOT NULL,\ + key_version TEXT NOT NULL,\ + dim INTEGER NOT NULL,\ + output_dim INTEGER,\ + status TEXT NOT NULL CHECK (status IN ('pending', 'active', 'superseded', 'archived')),\ + activated_at INTEGER,\ + superseded_at INTEGER,\ + superseded_by BLOB,\ + canonical_key BLOB NOT NULL UNIQUE,\ + created_at INTEGER NOT NULL\ + );\ + CREATE UNIQUE INDEX IF NOT EXISTS idx_embed_models_one_active \ + ON _embedding_models(engine_name) WHERE status = 'active';\ + CREATE INDEX IF NOT EXISTS idx_embed_models_engine_status \ + ON _embedding_models(engine_name, status);"; + +/// V14: Embedding model registry (`_embedding_models`) and per-engine model FK column. +/// +/// Creates the `_embedding_models` registry table that tracks which embedding model +/// is active for each vector engine (ADR-043 §1). Also adds the `embedding_model_id` +/// FK column to any existing regular `vec_` tables found in sqlite_master +/// so that stored vectors can be traced back to the model that produced them. +/// +/// sqlite-vec virtual tables (`vec0`) do not support `ALTER TABLE ADD COLUMN`; +/// for those tables the column is added during the startup backfill rebuild +/// (ADR-043 §8 steps 2-4), which is deferred to a follow-up PR — see the tracking +/// issue filed in MAJ-2 of codex round-1. +/// +/// New `vec_` tables created via `StorageBackend::vectors_for_namespace` +/// after V14 do NOT yet include `embedding_model_id` at creation time; that column +/// will be present only after the ADR-043 §8 step-4 rebuild lands. +/// +/// The migration SQL is computed at runtime via `build_v14_embedding_model_registry_sql` +/// to discover existing `vec_` tables dynamically and skip the `ALTER TABLE` +/// step for any table that already has the column. +const V14_EMBEDDING_MODEL_REGISTRY: &str = "__v14_computed_at_runtime__"; + +/// V15: proposals_open projection table (ADR-046). +/// +/// Maintains a fold-derived view of the four proposal EventKinds so that +/// `list(kind=proposal, status="open")` is an index scan rather than a full +/// event-log fold. The `idx_events_payload_proposal_id` expression index +/// (already created in V13) backs the per-proposal event history query. +const V15_PROPOSALS_OPEN: &str = "\ + CREATE TABLE IF NOT EXISTS proposals_open (\ + proposal_id TEXT PRIMARY KEY,\ + namespace TEXT NOT NULL,\ + proposer TEXT NOT NULL,\ + title TEXT NOT NULL,\ + status TEXT NOT NULL CHECK (status IN ('open', 'changes_requested', 'approved', 'rejected', 'applied', 'withdrawn')),\ + created_at INTEGER NOT NULL,\ + updated_at INTEGER NOT NULL,\ + expiry INTEGER,\ + last_decision TEXT,\ + review_count INTEGER NOT NULL DEFAULT 0,\ + approve_count INTEGER NOT NULL DEFAULT 0,\ + reject_count INTEGER NOT NULL DEFAULT 0\ + );\ + CREATE INDEX IF NOT EXISTS idx_proposals_open_ns_status ON proposals_open(namespace, status);\ + CREATE INDEX IF NOT EXISTS idx_proposals_open_proposer ON proposals_open(namespace, proposer);\ + CREATE INDEX IF NOT EXISTS idx_proposals_open_updated ON proposals_open(namespace, updated_at DESC);\ +"; + pub const MIGRATIONS: &[VersionedMigration] = &[ VersionedMigration { version: 1, @@ -202,6 +418,73 @@ pub const MIGRATIONS: &[VersionedMigration] = &[ name: "dedupe_graph_edge_triples", up: V4_DEDUPE_GRAPH_EDGE_TRIPLES, }, + VersionedMigration { + version: 5, + name: "add_entity_type_to_entities", + up: V5_ADD_ENTITY_TYPE_TO_ENTITIES, + }, + // V6–V8: no-op placeholder slots originally reserved in the ADR-015 ledger for + // ADR-043, ADR-046, and ADR-041 respectively. During the v1 parallel cluster + // landings (c01/c03/c04/c06) the concrete migrations from those ADRs landed at + // V5, V9, and V13 instead (slot assignments shifted as clusters merged). V6–V8 + // were absorbed as no-ops to keep the contiguity check passing. Their names are + // frozen — V1-V13 are production schema. + // + // NOTE: V6 was originally named "reserved_adr043_embedding_pipeline_extensions" + // because it was intended to hold ADR-043 work. The actual ADR-043 migration + // landed at V14 (cluster-20). V6 retains its original name to avoid breaking the + // production tracking table on existing deployments. + VersionedMigration { + version: 6, + name: "reserved_adr043_embedding_pipeline_extensions", + up: "SELECT 1;", + }, + VersionedMigration { + version: 7, + name: "reserved_adr046_event_sourced_proposals_index", + up: "SELECT 1;", + }, + VersionedMigration { + version: 8, + name: "reserved_adr041_event_observations_and_session_id", + up: "SELECT 1;", + }, + VersionedMigration { + version: 9, + name: "edge_lifecycle_and_target_backend", + up: V9_EDGE_LIFECYCLE_AND_TARGET_BACKEND, + }, + VersionedMigration { + version: 10, + name: "note_status_and_nullable_metrics", + up: V10_NOTE_STATUS_AND_NULLABLE_METRICS, + }, + VersionedMigration { + version: 11, + name: "entity_tombstone_columns", + up: V11_ENTITY_TOMBSTONE_COLUMNS, + }, + VersionedMigration { + version: 12, + name: "nullable_note_metrics", + up: V12_NULLABLE_NOTE_METRICS, + }, + VersionedMigration { + version: 13, + name: "event_observability_provenance", + up: V13_EVENT_OBSERVABILITY_PROVENANCE, + }, + VersionedMigration { + version: 14, + name: "embedding_model_registry", + up: V14_EMBEDDING_MODEL_REGISTRY, + }, + // V15: proposals_open projection table (ADR-046, cluster-22). + VersionedMigration { + version: 15, + name: "proposals_open", + up: V15_PROPOSALS_OPEN, + }, ]; const MIGRATION_TRACKING_TABLE: &str = "\ @@ -291,12 +574,138 @@ pub fn run_migrations(conn: &mut Connection) -> Result { } } + // V5 adds `entity_type` to entities. ENTITIES_DDL already includes the + // column so in-process DBs created via ensure_entities_schema already have + // it. Same idempotency pattern as V2. + if migration.version == 5 { + let col_exists: bool = conn + .query_row( + "SELECT COUNT(*) > 0 FROM pragma_table_info('entities') WHERE name = 'entity_type'", + [], + |row| row.get(0), + ) + .unwrap_or(false); + if col_exists { + let now = chrono::Utc::now().timestamp_micros(); + conn.execute( + "INSERT OR IGNORE INTO _schema_migrations (version, name, applied_at) \ + VALUES (?1, ?2, ?3)", + rusqlite::params![migration.version, migration.name, now], + ) + .map_err(|e| SqliteError::Migration { + version: migration.version, + error: e.to_string(), + })?; + applied_version = migration.version; + continue; + } + } + + // V10 adds `status` to notes. NOTES_DDL in stores/note.rs already includes + // `status`, so when a fresh schema is created via the store path (e.g. in + // tests or StorageBackend::notes()), the column exists before V10 runs. + // Detect and skip idempotently, recording the migration as applied. + if migration.version == 10 { + let col_exists: bool = conn + .query_row( + "SELECT COUNT(*) > 0 FROM pragma_table_info('notes') WHERE name = 'status'", + [], + |row| row.get(0), + ) + .unwrap_or(false); + if col_exists { + let now = chrono::Utc::now().timestamp_micros(); + conn.execute( + "INSERT OR IGNORE INTO _schema_migrations (version, name, applied_at) \ + VALUES (?1, ?2, ?3)", + rusqlite::params![migration.version, migration.name, now], + ) + .map_err(|e| SqliteError::Migration { + version: migration.version, + error: e.to_string(), + })?; + applied_version = migration.version; + continue; + } + } + + // V11 adds `merged_into` and `merge_event_id` to entities. ENTITIES_DDL in + // stores/entity.rs already includes these columns for databases created via + // the store path (e.g. in tests or StorageBackend::entities()). Detect and + // skip idempotently, recording the migration as applied. + if migration.version == 11 { + let col_exists: bool = conn + .query_row( + "SELECT COUNT(*) > 0 FROM pragma_table_info('entities') WHERE name = 'merged_into'", + [], + |row| row.get(0), + ) + .unwrap_or(false); + if col_exists { + let now = chrono::Utc::now().timestamp_micros(); + conn.execute( + "INSERT OR IGNORE INTO _schema_migrations (version, name, applied_at) \ + VALUES (?1, ?2, ?3)", + rusqlite::params![migration.version, migration.name, now], + ) + .map_err(|e| SqliteError::Migration { + version: migration.version, + error: e.to_string(), + })?; + applied_version = migration.version; + continue; + } + } + + // V12 rebuilds the notes table to make salience/decay_factor nullable. + // NOTES_DDL in stores/note.rs already declares them nullable for databases + // created via the store path. If salience is already nullable (notnull=0), + // skip the rebuild and record V12 as applied. + if migration.version == 12 { + let already_nullable: bool = conn + .query_row( + "SELECT COUNT(*) > 0 FROM pragma_table_info('notes') \ + WHERE name = 'salience' AND \"notnull\" = 0", + [], + |row| row.get(0), + ) + .unwrap_or(false); + if already_nullable { + let now = chrono::Utc::now().timestamp_micros(); + conn.execute( + "INSERT OR IGNORE INTO _schema_migrations (version, name, applied_at) \ + VALUES (?1, ?2, ?3)", + rusqlite::params![migration.version, migration.name, now], + ) + .map_err(|e| SqliteError::Migration { + version: migration.version, + error: e.to_string(), + })?; + applied_version = migration.version; + continue; + } + } + let tx = conn.transaction().map_err(|e| SqliteError::Migration { version: migration.version, error: e.to_string(), })?; - tx.execute_batch(migration.up) + let up_sql = if migration.version == 13 { + build_v13_event_observability_sql(&tx).map_err(|e| SqliteError::Migration { + version: migration.version, + error: e.to_string(), + })? + } else if migration.version == 14 { + build_v14_embedding_model_registry_sql(&tx).map_err(|e| SqliteError::Migration { + version: migration.version, + error: e.to_string(), + })? + } else { + migration.up.to_string() + }; + + tx.execute_batch(&up_sql) .map_err(|e| SqliteError::Migration { version: migration.version, error: e.to_string(), @@ -323,6 +732,150 @@ pub fn run_migrations(conn: &mut Connection) -> Result { Ok(applied_version) } +fn table_has_column( + conn: &Connection, + table: &'static str, + column: &'static str, +) -> Result { + conn.query_row( + "SELECT COUNT(*) > 0 FROM pragma_table_info(?1) WHERE name = ?2", + rusqlite::params![table, column], + |row| row.get(0), + ) +} + +fn build_v13_event_observability_sql(conn: &Connection) -> Result { + let mut sql = String::new(); + for (column, ddl) in [ + ( + "kind", + "ALTER TABLE events ADD COLUMN kind TEXT NOT NULL DEFAULT 'audit';", + ), + ( + "payload", + "ALTER TABLE events ADD COLUMN payload TEXT NOT NULL DEFAULT '{}';", + ), + ( + "payload_schema_version", + "ALTER TABLE events ADD COLUMN payload_schema_version INTEGER NOT NULL DEFAULT 1;", + ), + ( + "profile_state_version", + "ALTER TABLE events ADD COLUMN profile_state_version INTEGER;", + ), + ( + "session_id", + "ALTER TABLE events ADD COLUMN session_id TEXT;", + ), + ( + "aggregate_kind", + "ALTER TABLE events ADD COLUMN aggregate_kind TEXT;", + ), + ( + "aggregate_id", + "ALTER TABLE events ADD COLUMN aggregate_id TEXT;", + ), + ] { + if !table_has_column(conn, "events", column)? { + sql.push_str(ddl); + } + } + // Migrate legacy data column into payload if both exist. + if table_has_column(conn, "events", "data")? && table_has_column(conn, "events", "payload")? { + sql.push_str("UPDATE events SET payload = data WHERE data IS NOT NULL AND data <> '';"); + } + sql.push_str( + "CREATE TABLE IF NOT EXISTS event_observations (\ + event_id TEXT NOT NULL,\ + entity_id TEXT NOT NULL,\ + referent_kind TEXT NOT NULL,\ + role TEXT NOT NULL,\ + position INTEGER NOT NULL,\ + PRIMARY KEY (event_id, role, position)\ + );\ + CREATE INDEX IF NOT EXISTS idx_events_kind ON events(kind);\ + CREATE INDEX IF NOT EXISTS idx_events_session ON events(namespace, session_id, created_at, id);\ + CREATE INDEX IF NOT EXISTS idx_events_ns_created_id ON events(namespace, created_at DESC, id DESC);\ + CREATE INDEX IF NOT EXISTS idx_events_payload_proposal_id ON events(json_extract(payload, '$.proposal_id'));\ + CREATE INDEX IF NOT EXISTS idx_event_obs_entity ON event_observations(entity_id, role);\ + CREATE INDEX IF NOT EXISTS idx_event_obs_event_role ON event_observations(event_id, role);", + ); + Ok(sql) +} + +/// Build V14 migration SQL at runtime. +/// +/// Creates the `_embedding_models` registry table and its indexes (ADR-043 §1). +/// Then discovers any existing regular (non-virtual) `vec_` tables in +/// sqlite_master and adds the `embedding_model_id` FK column where absent. +/// +/// sqlite-vec virtual tables (`vec0`) do not support `ALTER TABLE ADD COLUMN`; +/// those tables are handled by the startup backfill rebuild (ADR-043 §8) which +/// runs after the SQL migration completes. New `vec_` tables created +/// after V14 do NOT yet include `embedding_model_id` at creation — that column +/// will be present only after the ADR-043 §8 step-4 rebuild lands (follow-up). +fn build_v14_embedding_model_registry_sql(conn: &Connection) -> Result { + let mut sql = String::from(EMBEDDING_MODELS_DDL); + + // Discover existing regular (non-virtual) vec_ tables. + // + // Exclusion rationale: + // - `sql NOT LIKE '%VIRTUAL%'` drops vec0 virtual tables (type='table' but DDL + // starts with "CREATE VIRTUAL TABLE"). + // - `sql NOT LIKE '%vec0%'` is a belt-and-suspenders drop for any DDL that still + // contains the vec0 keyword. + // - The four `NOT LIKE` suffix clauses exclude the sqlite-vec internal shadow tables + // that are created as plain regular tables alongside each vec0 virtual table: + // vec__chunks, vec__rowids, vec__info, vec__vector_chunks00 + // (see sqlite-vec 0.1.9 sqlite-vec.c:3423-3468; these tables own sqlite-vec's + // internal layout and must never receive extraneous columns). + // The ESCAPE '\' form is required because '%' and '_' are SQL LIKE wildcards. + let mut stmt = conn.prepare( + "SELECT name FROM sqlite_master \ + WHERE type = 'table' \ + AND name LIKE 'vec_%' \ + AND sql NOT LIKE '%VIRTUAL%' \ + AND sql NOT LIKE '%vec0%' \ + AND name NOT LIKE '%\\_chunks' ESCAPE '\\' \ + AND name NOT LIKE '%\\_rowids' ESCAPE '\\' \ + AND name NOT LIKE '%\\_info' ESCAPE '\\' \ + AND name NOT LIKE '%\\_vector\\_chunks%' ESCAPE '\\'", + )?; + let vec_tables: Vec = stmt + .query_map([], |row| row.get(0))? + .filter_map(|r| r.ok()) + .collect(); + + for table in &vec_tables { + // Validate table name: only alphanumeric and underscores after the 'vec_' prefix. + let valid = table.starts_with("vec_") + && table[4..] + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '_'); + if !valid { + continue; + } + // Check whether the column already exists. + let col_exists: bool = conn + .query_row( + "SELECT COUNT(*) > 0 FROM pragma_table_info(?1) WHERE name = 'embedding_model_id'", + rusqlite::params![table], + |row| row.get(0), + ) + .unwrap_or(false); + if col_exists { + continue; + } + sql.push_str(&format!( + "ALTER TABLE {t} ADD COLUMN embedding_model_id BLOB REFERENCES _embedding_models(id);\ + CREATE INDEX IF NOT EXISTS idx_{t}_model ON {t}(embedding_model_id);", + t = table, + )); + } + + Ok(sql) +} + // ============================================================================= // Tests // ============================================================================= @@ -339,17 +892,17 @@ mod tests { fn fresh_db_migrates_to_latest() { let mut conn = open_memory(); let version = run_migrations(&mut conn).expect("migrations should succeed"); - assert_eq!(version, 4); + assert_eq!(version, 15); - // Verify the tracking table has rows for V1, V2, V3, and V4. + // Verify the tracking table has rows for V1 through V15. let count: i64 = conn .query_row( - "SELECT COUNT(*) FROM _schema_migrations WHERE version IN (1, 2, 3, 4)", + "SELECT COUNT(*) FROM _schema_migrations WHERE version IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)", [], |row| row.get(0), ) .unwrap(); - assert_eq!(count, 4); + assert_eq!(count, 15); // Verify the entities table was created. let tbl_count: i64 = conn @@ -370,6 +923,159 @@ mod tests { ) .unwrap(); assert_eq!(col_count, 1, "V2 must add name column to notes"); + + // Verify V5 added entity_type column to entities. + let et_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM pragma_table_info('entities') WHERE name = 'entity_type'", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(et_count, 1, "V5 must add entity_type column to entities"); + + // Verify V5 added the kind+entity_type index. + let idx_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM sqlite_master WHERE type='index' \ + AND name='idx_entities_kind_entity_type'", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(idx_count, 1, "V5 must create idx_entities_kind_entity_type"); + + // Verify V10 added the status column to notes. + let status_col: i64 = conn + .query_row( + "SELECT COUNT(*) FROM pragma_table_info('notes') WHERE name = 'status'", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(status_col, 1, "V10 must add status column to notes"); + + // Verify V11 added merged_into column to entities. + let merged_into_col: i64 = conn + .query_row( + "SELECT COUNT(*) FROM pragma_table_info('entities') WHERE name = 'merged_into'", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!( + merged_into_col, 1, + "V11 must add merged_into column to entities" + ); + + // Verify V12 made salience nullable (notnull=0). + let salience_notnull: i64 = conn + .query_row( + "SELECT \"notnull\" FROM pragma_table_info('notes') WHERE name = 'salience'", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(salience_notnull, 0, "V12 must make salience nullable"); + + // Verify V13 added event observability columns to events. + for col in [ + "kind", + "payload", + "payload_schema_version", + "profile_state_version", + "session_id", + "aggregate_kind", + "aggregate_id", + ] { + let exists: bool = conn + .query_row( + "SELECT COUNT(*) > 0 FROM pragma_table_info('events') WHERE name = ?1", + [col], + |r| r.get(0), + ) + .unwrap(); + assert!(exists, "V13 must add events.{col}"); + } + + // Verify event_observations table exists. + let obs_tbl: i64 = conn + .query_row( + "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='event_observations'", + [], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(obs_tbl, 1, "V13 must create event_observations table"); + + // Verify V13 indexes exist. + for idx in [ + "idx_events_ns_created_id", + "idx_events_session", + "idx_events_payload_proposal_id", + "idx_event_obs_entity", + "idx_event_obs_event_role", + ] { + let exists: bool = conn + .query_row( + "SELECT COUNT(*) > 0 FROM sqlite_master WHERE type='index' AND name=?1", + [idx], + |r| r.get(0), + ) + .unwrap(); + assert!(exists, "V13 must create index {idx}"); + } + + // Verify V14 created the _embedding_models registry table. + let embed_tbl: i64 = conn + .query_row( + "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='_embedding_models'", + [], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(embed_tbl, 1, "V14 must create _embedding_models table"); + + // Verify V14 indexes exist. + for idx in [ + "idx_embed_models_one_active", + "idx_embed_models_engine_status", + ] { + let exists: bool = conn + .query_row( + "SELECT COUNT(*) > 0 FROM sqlite_master WHERE type='index' AND name=?1", + [idx], + |r| r.get(0), + ) + .unwrap(); + assert!(exists, "V14 must create index {idx}"); + } + + // Verify V15 created the proposals_open table. + let proposals_tbl: i64 = conn + .query_row( + "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='proposals_open'", + [], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(proposals_tbl, 1, "V15 must create proposals_open table"); + + // Verify V15 indexes on proposals_open. + for idx in [ + "idx_proposals_open_ns_status", + "idx_proposals_open_proposer", + "idx_proposals_open_updated", + ] { + let exists: bool = conn + .query_row( + "SELECT COUNT(*) > 0 FROM sqlite_master WHERE type='index' AND name=?1", + [idx], + |r| r.get(0), + ) + .unwrap(); + assert!(exists, "V15 must create index {idx}"); + } } #[test] @@ -377,61 +1083,93 @@ mod tests { let mut conn = open_memory(); let v1 = run_migrations(&mut conn).expect("first run"); let v2 = run_migrations(&mut conn).expect("second run"); - assert_eq!(v1, 4); - assert_eq!(v2, 4); + assert_eq!(v1, 15); + assert_eq!(v2, 15); - // Should still have exactly four rows in the tracking table (V1 + V2 + V3 + V4). + // Should still have exactly fifteen rows in the tracking table (V1..V15). let count: i64 = conn .query_row("SELECT COUNT(*) FROM _schema_migrations", [], |row| { row.get(0) }) .unwrap(); - assert_eq!(count, 4); + assert_eq!(count, 15); + } + + // F052 (CRIT): V9 migration must add target_backend column + partial index on graph_edges. + // ADR-009 requires target_backend for backend routing. + #[test] + fn migration_v9_adds_target_backend_index() { + let mut conn = open_memory(); + let version = run_migrations(&mut conn).expect("migrations should succeed"); + assert_eq!( + version, 15, + "F052: latest migration must be V15 (proposals_open)" + ); + let col: i64 = conn + .query_row( + "SELECT COUNT(*) FROM pragma_table_info('graph_edges') WHERE name = 'target_backend'", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!( + col, 1, + "F052: graph_edges must have target_backend column after V9 migration" + ); + let idx: i64 = conn + .query_row( + "SELECT COUNT(*) FROM sqlite_master WHERE type='index' AND name='idx_graph_edges_target_backend'", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!( + idx, 1, + "F052: idx_graph_edges_target_backend partial index must exist after V9 migration" + ); } #[test] fn failed_migration_rolls_back() { - let bad_v5 = VersionedMigration { - version: 5, + let bad_v16 = VersionedMigration { + version: 16, name: "bad_migration", up: "THIS IS NOT VALID SQL;", }; let mut conn = open_memory(); - // Apply all real migrations (V1 + V2 + V3 + V4) so the DB is at V4. - run_migrations(&mut conn).expect("V1+V2+V3+V4 should apply cleanly"); + // Apply all real migrations (V1..V15) so the DB is at V15. + run_migrations(&mut conn).expect("V1..V15 should apply cleanly"); - // Now manually drive the bad V5 migration to check rollback behaviour. - let result = apply_single_migration(&mut conn, &bad_v5); + // Now manually drive the bad V16 migration to check rollback behaviour. + let result = apply_single_migration(&mut conn, &bad_v16); assert!(result.is_err(), "bad migration should return error"); - // DB should still be at V4 — no V5 row in tracking. - let v5_count: i64 = conn + // DB should still be at V15 — no V16 row in tracking. + let v16_count: i64 = conn .query_row( - "SELECT COUNT(*) FROM _schema_migrations WHERE version = 5", + "SELECT COUNT(*) FROM _schema_migrations WHERE version = 16", [], |row| row.get(0), ) .unwrap(); - assert_eq!(v5_count, 0, "V5 must not be recorded after rollback"); + assert_eq!(v16_count, 0, "V16 must not be recorded after rollback"); - // V1, V2, V3, and V4 should still be there. + // V1..V15 should still be there. let applied_count: i64 = conn .query_row( - "SELECT COUNT(*) FROM _schema_migrations WHERE version IN (1, 2, 3, 4)", + "SELECT COUNT(*) FROM _schema_migrations WHERE version IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)", [], |row| row.get(0), ) .unwrap(); - assert_eq!( - applied_count, 4, - "V1, V2, V3, and V4 must still be recorded" - ); + assert_eq!(applied_count, 15, "V1..V15 must still be recorded"); } #[test] fn store_ddl_then_migrations_is_idempotent() { + use crate::stores::entity::ensure_entities_schema; use crate::stores::note::ensure_notes_schema; let mut conn = open_memory(); @@ -440,6 +1178,9 @@ mod tests { // WITH the name column (NOTES_DDL includes it for test convenience). ensure_notes_schema(&conn).expect("store DDL should create notes"); + // Simulate entity DDL creation (includes merged_into, merge_event_id). + ensure_entities_schema(&conn).expect("store DDL should create entities"); + // Verify name column exists from DDL. let has_name: bool = conn .query_row( @@ -450,10 +1191,16 @@ mod tests { .unwrap(); assert!(has_name, "NOTES_DDL should include name column"); - // Now run versioned migrations — V2 should detect the existing column - // and skip the ALTER TABLE without error. V4 adds the unique triple index. + // Now run versioned migrations — V2 should detect the existing name column + // and skip; V5 should detect entity_type already present via ENTITIES_DDL and skip; + // V9 rebuilds graph_edges with lifecycle columns; V10 should detect the existing + // status column and skip; V11 should detect the existing merged_into column and skip; + // V12 should detect that salience is already nullable and skip; + // V13 adds event observability columns and event_observations table; + // V14 creates the _embedding_models registry table; + // V15 creates the proposals_open table. let version = run_migrations(&mut conn).expect("migrations after store DDL"); - assert_eq!(version, 4); + assert_eq!(version, 15); // V2 should be recorded as applied (skipped but tracked). let v2_count: i64 = conn @@ -467,6 +1214,437 @@ mod tests { v2_count, 1, "V2 must be recorded even when column pre-exists" ); + + // V5 should be recorded as applied (skipped but tracked). + let v5_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM _schema_migrations WHERE version = 5", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!( + v5_count, 1, + "V5 must be recorded even when entity_type column pre-exists" + ); + + // V9 (edge lifecycle + target_backend) must be recorded. + let v9_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM _schema_migrations WHERE version = 9", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!( + v9_count, 1, + "V9 must be recorded after store-DDL + migrations" + ); + + // V10 should be recorded as applied (skipped but tracked). + let v10_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM _schema_migrations WHERE version = 10", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!( + v10_count, 1, + "V10 must be recorded even when status column pre-exists via NOTES_DDL" + ); + + // V11 should be recorded as applied (skipped but tracked). + let v11_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM _schema_migrations WHERE version = 11", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!( + v11_count, 1, + "V11 must be recorded even when merged_into column pre-exists via ENTITIES_DDL" + ); + + // V12 should be recorded as applied (skipped but tracked — NOTES_DDL already + // creates salience as nullable). + let v12_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM _schema_migrations WHERE version = 12", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!( + v12_count, 1, + "V12 must be recorded even when salience is already nullable via NOTES_DDL" + ); + + // V13 (event observability) must be recorded. + let v13_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM _schema_migrations WHERE version = 13", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!( + v13_count, 1, + "V13 must be recorded after store-DDL + migrations" + ); + + // V14 (embedding model registry) must be recorded. + let v14_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM _schema_migrations WHERE version = 14", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!( + v14_count, 1, + "V14 must be recorded after store-DDL + migrations" + ); + + // V15 (proposals_open) must be recorded. + let v15_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM _schema_migrations WHERE version = 15", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!( + v15_count, 1, + "V15 must be recorded after store-DDL + migrations" + ); + } + + /// Verify that V12 rebuilds a V1-era notes table so salience/decay_factor + /// accept NULL, unblocking `create_note` with `salience=None` on migrated DBs. + #[test] + fn v1_to_v12_allows_null_salience() { + let mut conn = open_memory(); + + // Bootstrap the schema tracking table and create the full V1-era schema. + // The notes table uses NOT NULL DEFAULT on salience/decay_factor as V1 did. + conn.execute_batch(MIGRATION_TRACKING_TABLE).unwrap(); + conn.execute_batch( + "CREATE TABLE entities (\ + id TEXT PRIMARY KEY,\ + namespace TEXT NOT NULL,\ + kind TEXT NOT NULL,\ + name TEXT NOT NULL,\ + description TEXT,\ + properties TEXT,\ + tags TEXT NOT NULL DEFAULT '[]',\ + created_at INTEGER NOT NULL,\ + updated_at INTEGER NOT NULL,\ + deleted_at INTEGER\ + );\ + CREATE TABLE graph_edges (\ + namespace TEXT NOT NULL,\ + id TEXT NOT NULL,\ + source_id TEXT NOT NULL,\ + target_id TEXT NOT NULL,\ + relation TEXT NOT NULL,\ + weight REAL NOT NULL DEFAULT 1.0,\ + created_at INTEGER NOT NULL,\ + metadata TEXT,\ + PRIMARY KEY (namespace, id)\ + );\ + CREATE TABLE notes (\ + id TEXT PRIMARY KEY,\ + namespace TEXT NOT NULL,\ + kind TEXT NOT NULL,\ + content TEXT NOT NULL DEFAULT '',\ + salience REAL NOT NULL DEFAULT 0.5,\ + decay_factor REAL NOT NULL DEFAULT 0.0,\ + expires_at INTEGER,\ + properties TEXT,\ + created_at INTEGER NOT NULL,\ + updated_at INTEGER NOT NULL,\ + deleted_at INTEGER\ + );\ + CREATE TABLE events (\ + id TEXT PRIMARY KEY,\ + namespace TEXT NOT NULL,\ + verb TEXT NOT NULL,\ + substrate TEXT NOT NULL,\ + actor TEXT NOT NULL,\ + outcome TEXT NOT NULL,\ + data TEXT,\ + duration_us INTEGER NOT NULL DEFAULT 0,\ + target_id TEXT,\ + created_at INTEGER NOT NULL\ + );", + ) + .unwrap(); + + // Record V1 as already applied so run_migrations starts at V2. + let now = chrono::Utc::now().timestamp_micros(); + conn.execute( + "INSERT INTO _schema_migrations (version, name, applied_at) VALUES (1, 'initial_schema', ?1)", + rusqlite::params![now], + ) + .unwrap(); + + // Run V2-V15 migrations. + let version = run_migrations(&mut conn).expect("migrations should succeed"); + assert_eq!(version, 15); + + // After V12, salience must be nullable (notnull=0). + let notnull: i64 = conn + .query_row( + "SELECT \"notnull\" FROM pragma_table_info('notes') WHERE name = 'salience'", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(notnull, 0, "salience must be nullable after V12"); + + // Inserting a note without salience must succeed. + conn.execute( + "INSERT INTO notes (id, namespace, kind, status, content, created_at, updated_at) \ + VALUES ('test-id', 'ns', 'observation', 'active', '', 1, 1)", + [], + ) + .expect("inserting note with NULL salience must succeed after V12"); + + let stored_salience: Option = conn + .query_row( + "SELECT salience FROM notes WHERE id = 'test-id'", + [], + |row| row.get(0), + ) + .unwrap(); + assert!( + stored_salience.is_none(), + "salience must be NULL when not supplied" + ); + } + + #[test] + fn store_ddl_then_event_migration_is_idempotent() { + use crate::stores::event::ensure_events_schema; + + let mut conn = open_memory(); + + // Simulate the StorageBackend path: ensure_events_schema creates the + // events table WITH the new columns. Running V13 on top must not fail. + ensure_events_schema(&conn).expect("store DDL should create events"); + + let version = run_migrations(&mut conn).expect("migrations after events store DDL"); + assert_eq!(version, 15, "must reach V15 even when events DDL ran first"); + + let v13_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM _schema_migrations WHERE version = 13", + [], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(v13_count, 1, "V13 must be recorded"); + + let v14_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM _schema_migrations WHERE version = 14", + [], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(v14_count, 1, "V14 must be recorded"); + + let v15_count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM _schema_migrations WHERE version = 15", + [], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(v15_count, 1, "V15 must be recorded"); + } + + /// F227/F228: V14 must create the _embedding_models registry table and its indexes. + /// + /// F227: MIGRATIONS previously stopped at V4 (dedupe_graph_edge_triples); no + /// embedding registry existed. + /// F228: vec_ tables previously lacked the embedding_model_id FK column. + /// New tables created after V14 include it from the start via the updated DDL. + #[test] + fn migration_v14_creates_embedding_model_registry() { + let mut conn = open_memory(); + let version = run_migrations(&mut conn).expect("migrations should succeed"); + assert_eq!( + version, 15, + "F227: latest migration must be V15 (proposals_open)" + ); + + // Verify _embedding_models table exists. + let tbl: i64 = conn + .query_row( + "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='_embedding_models'", + [], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(tbl, 1, "F227: _embedding_models table must exist after V14"); + + // Verify the partial unique index for one-active-per-engine constraint. + let one_active_idx: i64 = conn + .query_row( + "SELECT COUNT(*) FROM sqlite_master WHERE type='index' AND name='idx_embed_models_one_active'", + [], + |r| r.get(0), + ) + .unwrap(); + assert_eq!( + one_active_idx, 1, + "V14 must create idx_embed_models_one_active partial unique index" + ); + + // Verify the engine+status composite index. + let engine_status_idx: i64 = conn + .query_row( + "SELECT COUNT(*) FROM sqlite_master WHERE type='index' AND name='idx_embed_models_engine_status'", + [], + |r| r.get(0), + ) + .unwrap(); + assert_eq!( + engine_status_idx, 1, + "V14 must create idx_embed_models_engine_status index" + ); + + // Verify the _embedding_models schema contains required columns. + for col in [ + "id", + "engine_name", + "model_id", + "key_version", + "dim", + "output_dim", + "status", + "activated_at", + "superseded_at", + "superseded_by", + "canonical_key", + "created_at", + ] { + let exists: bool = conn + .query_row( + "SELECT COUNT(*) > 0 FROM pragma_table_info('_embedding_models') WHERE name = ?1", + [col], + |r| r.get(0), + ) + .unwrap(); + assert!( + exists, + "F227: _embedding_models must have column '{col}' after V14" + ); + } + } + + /// F228: New vec_ tables created after V14 (via StorageBackend::vectors_for_namespace) + /// include the embedding_model_id FK column from the start. + /// + /// This test verifies the migration adds embedding_model_id to a pre-existing + /// regular (non-virtual) vec_ table that was created before V14 ran. + #[test] + fn migration_v14_adds_embedding_model_id_to_existing_regular_vec_tables() { + let mut conn = open_memory(); + + // Simulate a pre-V14 database state: apply V1-V13 manually by running + // migrations up to V13, then create a regular (non-virtual) vec_ table + // without the embedding_model_id column, then run the full migration. + // + // We use a real SQLite table here (not a vec0 virtual table) because + // sqlite-vec is not available in the unit test environment. The migration + // correctly detects and skips virtual tables. + conn.execute_batch( + "CREATE TABLE vec_legacy_model (\ + subject_id TEXT PRIMARY KEY,\ + namespace TEXT NOT NULL,\ + kind TEXT NOT NULL,\ + field TEXT NOT NULL\ + );", + ) + .unwrap(); + + // Run the full migration suite — V14 should add embedding_model_id to the + // regular vec_legacy_model table. + let version = run_migrations(&mut conn).expect("migrations should succeed"); + assert_eq!(version, 15); + + // The embedding_model_id column must now exist. + let col_exists: bool = conn + .query_row( + "SELECT COUNT(*) > 0 FROM pragma_table_info('vec_legacy_model') WHERE name = 'embedding_model_id'", + [], + |r| r.get(0), + ) + .unwrap(); + assert!( + col_exists, + "F228: V14 must add embedding_model_id to existing regular vec_ tables" + ); + + // Running migrations again must be idempotent (column already present). + let version2 = run_migrations(&mut conn).expect("second run must succeed"); + assert_eq!(version2, 15); + } + + /// CRIT-2 regression: V14 discovery filter must NOT match sqlite-vec internal + /// shadow tables (`vec__chunks`, `_rowids`, `_info`, `_vector_chunks00`). + /// + /// sqlite-vec 0.1.9 creates these as plain `CREATE TABLE` entries (no VIRTUAL, + /// no vec0 keyword in their DDL) for each vec0 virtual table. The filter added + /// in PR #374 c20 must exclude them via explicit suffix negation so that + /// `ALTER TABLE … ADD COLUMN` is never issued against sqlite-vec's internal tables. + /// + /// We simulate the shadow tables as plain regular tables (sqlite-vec is not + /// available in the unit-test environment) because the sqlite_master DDL format + /// is what the filter inspects — the table content is irrelevant for this test. + #[test] + fn migration_v14_does_not_alter_sqlite_vec_shadow_tables() { + let mut conn = open_memory(); + + // Create the four canonical sqlite-vec shadow table shapes for a notional + // vec0 table named `vec_test`. Their DDL intentionally lacks VIRTUAL/vec0 + // so they would have matched the old (pre-fix) filter. + conn.execute_batch( + "CREATE TABLE vec_test_chunks (x INTEGER);\ + CREATE TABLE vec_test_rowids (x INTEGER);\ + CREATE TABLE vec_test_info (x INTEGER);\ + CREATE TABLE vec_test_vector_chunks00 (x INTEGER);", + ) + .unwrap(); + + // Run the full migration suite — V14 must not add `embedding_model_id` to + // any of the four shadow tables above. + let version = run_migrations(&mut conn).expect("migrations should succeed"); + assert_eq!(version, 15); + + for shadow in [ + "vec_test_chunks", + "vec_test_rowids", + "vec_test_info", + "vec_test_vector_chunks00", + ] { + let col_added: bool = conn + .query_row( + "SELECT COUNT(*) > 0 FROM pragma_table_info(?1) \ + WHERE name = 'embedding_model_id'", + rusqlite::params![shadow], + |r| r.get(0), + ) + .unwrap(); + assert!( + !col_added, + "CRIT-2: V14 must NOT add embedding_model_id to sqlite-vec shadow table '{shadow}'" + ); + } } /// Helper: apply a single migration in a transaction, recording it in the diff --git a/crates/khive-db/src/stores/entity.rs b/crates/khive-db/src/stores/entity.rs index d1d78967..3af09fc4 100644 --- a/crates/khive-db/src/stores/entity.rs +++ b/crates/khive-db/src/stores/entity.rs @@ -109,13 +109,16 @@ fn read_entity(row: &rusqlite::Row<'_>) -> Result { let id_str: String = row.get(0)?; let namespace: String = row.get(1)?; let kind: String = row.get(2)?; - let name: String = row.get(3)?; - let description: Option = row.get(4)?; - let properties_str: Option = row.get(5)?; - let tags_str: String = row.get(6)?; - let created_at: i64 = row.get(7)?; - let updated_at: i64 = row.get(8)?; - let deleted_at: Option = row.get(9)?; + let entity_type: Option = row.get(3)?; + let name: String = row.get(4)?; + let description: Option = row.get(5)?; + let properties_str: Option = row.get(6)?; + let tags_str: String = row.get(7)?; + let created_at: i64 = row.get(8)?; + let updated_at: i64 = row.get(9)?; + let deleted_at: Option = row.get(10)?; + let merged_into_str: Option = row.get(11)?; + let merge_event_id_str: Option = row.get(12)?; let id = parse_uuid(&id_str)?; @@ -123,7 +126,7 @@ fn read_entity(row: &rusqlite::Row<'_>) -> Result { .map(|s| { serde_json::from_str(&s).map_err(|e| { rusqlite::Error::FromSqlConversionFailure( - 5, + 6, rusqlite::types::Type::Text, Box::new(e), ) @@ -132,13 +135,30 @@ fn read_entity(row: &rusqlite::Row<'_>) -> Result { .transpose()?; let tags: Vec = serde_json::from_str(&tags_str).map_err(|e| { - rusqlite::Error::FromSqlConversionFailure(6, rusqlite::types::Type::Text, Box::new(e)) + rusqlite::Error::FromSqlConversionFailure(7, rusqlite::types::Type::Text, Box::new(e)) })?; + let merged_into = merged_into_str + .as_deref() + .map(Uuid::parse_str) + .transpose() + .map_err(|e| { + rusqlite::Error::FromSqlConversionFailure(10, rusqlite::types::Type::Text, Box::new(e)) + })?; + + let merge_event_id = merge_event_id_str + .as_deref() + .map(Uuid::parse_str) + .transpose() + .map_err(|e| { + rusqlite::Error::FromSqlConversionFailure(11, rusqlite::types::Type::Text, Box::new(e)) + })?; + Ok(Entity { id, namespace, kind, + entity_type, name, description, properties, @@ -146,6 +166,8 @@ fn read_entity(row: &rusqlite::Row<'_>) -> Result { created_at, updated_at, deleted_at, + merged_into, + merge_event_id, }) } @@ -189,6 +211,18 @@ fn build_entity_where( conditions.push(format!("kind IN ({})", placeholders.join(", "))); } + if !filter.entity_types.is_empty() { + let placeholders: Vec = filter + .entity_types + .iter() + .map(|t| { + params.push(Box::new(t.clone())); + format!("?{}", params.len()) + }) + .collect(); + conditions.push(format!("entity_type IN ({})", placeholders.join(", "))); + } + if let Some(ref prefix) = filter.name_prefix { params.push(Box::new(format!("{}%", prefix))); conditions.push(format!("name LIKE ?{}", params.len())); @@ -228,16 +262,20 @@ impl EntityStore for SqlEntityStore { .map(|v| serde_json::to_string(v).unwrap_or_default()); let tags_str = serde_json::to_string(&entity.tags).unwrap_or_else(|_| "[]".to_string()); + let merged_into_str = entity.merged_into.map(|u| u.to_string()); + let merge_event_id_str = entity.merge_event_id.map(|u| u.to_string()); + self.with_writer("upsert_entity", move |conn| { conn.execute( "INSERT OR REPLACE INTO entities \ - (id, namespace, kind, name, description, properties, tags, \ - created_at, updated_at, deleted_at) \ - VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)", + (id, namespace, kind, entity_type, name, description, properties, tags, \ + created_at, updated_at, deleted_at, merged_into, merge_event_id) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)", rusqlite::params![ id_str, namespace, entity.kind, + entity.entity_type, entity.name, entity.description, properties_str, @@ -245,6 +283,8 @@ impl EntityStore for SqlEntityStore { entity.created_at, entity.updated_at, entity.deleted_at, + merged_into_str, + merge_event_id_str, ], )?; Ok(()) @@ -273,15 +313,18 @@ impl EntityStore for SqlEntityStore { let tags_str = serde_json::to_string(&entity.tags).unwrap_or_else(|_| "[]".to_string()); + let merged_into_str = entity.merged_into.map(|u| u.to_string()); + let merge_event_id_str = entity.merge_event_id.map(|u| u.to_string()); match conn.execute( "INSERT OR REPLACE INTO entities \ - (id, namespace, kind, name, description, properties, tags, \ - created_at, updated_at, deleted_at) \ - VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)", + (id, namespace, kind, entity_type, name, description, properties, tags, \ + created_at, updated_at, deleted_at, merged_into, merge_event_id) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)", rusqlite::params![ id_str, &entity.namespace, entity.kind, + entity.entity_type, entity.name, entity.description, properties_str, @@ -289,6 +332,8 @@ impl EntityStore for SqlEntityStore { entity.created_at, entity.updated_at, entity.deleted_at, + merged_into_str, + merge_event_id_str, ], ) { Ok(_) => affected += 1, @@ -320,8 +365,8 @@ impl EntityStore for SqlEntityStore { self.with_reader("get_entity", move |conn| { let mut stmt = conn.prepare( - "SELECT id, namespace, kind, name, description, properties, tags, \ - created_at, updated_at, deleted_at \ + "SELECT id, namespace, kind, entity_type, name, description, properties, tags, \ + created_at, updated_at, deleted_at, merged_into, merge_event_id \ FROM entities WHERE id = ?1 AND deleted_at IS NULL", )?; let mut rows = stmt.query(rusqlite::params![id_str])?; @@ -388,8 +433,8 @@ impl EntityStore for SqlEntityStore { let offset_idx = data_params.len(); let data_sql = format!( - "SELECT id, namespace, kind, name, description, properties, tags, \ - created_at, updated_at, deleted_at \ + "SELECT id, namespace, kind, entity_type, name, description, properties, tags, \ + created_at, updated_at, deleted_at, merged_into, merge_event_id \ FROM entities{} ORDER BY created_at DESC LIMIT ?{} OFFSET ?{}", where_sql, limit_idx, offset_idx, ); @@ -441,18 +486,23 @@ const ENTITIES_DDL: &str = "\ id TEXT PRIMARY KEY,\ namespace TEXT NOT NULL,\ kind TEXT NOT NULL,\ + entity_type TEXT,\ name TEXT NOT NULL,\ description TEXT,\ properties TEXT,\ tags TEXT NOT NULL DEFAULT '[]',\ created_at INTEGER NOT NULL,\ updated_at INTEGER NOT NULL,\ - deleted_at INTEGER\ + deleted_at INTEGER,\ + merged_into TEXT,\ + merge_event_id TEXT\ );\ CREATE INDEX IF NOT EXISTS idx_entities_namespace ON entities(namespace);\ CREATE INDEX IF NOT EXISTS idx_entities_kind ON entities(namespace, kind);\ + CREATE INDEX IF NOT EXISTS idx_entities_kind_entity_type ON entities(namespace, kind, entity_type);\ CREATE INDEX IF NOT EXISTS idx_entities_name ON entities(namespace, name);\ CREATE INDEX IF NOT EXISTS idx_entities_created ON entities(created_at DESC);\ + CREATE INDEX IF NOT EXISTS idx_entities_merged_into ON entities(namespace, merged_into);\ "; pub(crate) fn ensure_entities_schema(conn: &rusqlite::Connection) -> Result<(), rusqlite::Error> { @@ -491,6 +541,7 @@ mod tests { id: Uuid::new_v4(), namespace: namespace.to_string(), kind: kind.to_string(), + entity_type: None, name: name.to_string(), description: None, properties: None, @@ -498,6 +549,8 @@ mod tests { created_at: now, updated_at: now, deleted_at: None, + merged_into: None, + merge_event_id: None, } } @@ -821,6 +874,50 @@ mod tests { assert!(!names.contains(&"E2")); } + #[tokio::test] + async fn test_entity_type_roundtrip() { + let store = setup_memory_store(); + + let entity = + Entity::new("default", "document", "ResearchPaper").with_entity_type(Some("paper")); + let id = entity.id; + + store.upsert_entity(entity).await.unwrap(); + + let fetched = store.get_entity(id).await.unwrap().unwrap(); + assert_eq!(fetched.entity_type, Some("paper".to_string())); + assert_eq!(fetched.kind, "document"); + assert_eq!(fetched.name, "ResearchPaper"); + } + + #[tokio::test] + async fn test_query_by_kind_and_entity_type() { + let store = setup_memory_store_ns("et_ns"); + + let typed = + Entity::new("et_ns", "person", "Researcher").with_entity_type(Some("researcher")); + let untyped = make_entity("et_ns", "person", "Generic"); + + store.upsert_entity(typed).await.unwrap(); + store.upsert_entity(untyped).await.unwrap(); + + let result = store + .query_entities( + "et_ns", + EntityFilter { + entity_types: vec!["researcher".to_string()], + ..Default::default() + }, + PageRequest::default(), + ) + .await + .unwrap(); + + assert_eq!(result.items.len(), 1); + assert_eq!(result.items[0].name, "Researcher"); + assert_eq!(result.items[0].entity_type, Some("researcher".to_string())); + } + /// UUID is globally unique (id TEXT PRIMARY KEY). Upserting the same UUID in a /// different namespace overwrites the row (INSERT OR REPLACE). get_entity by ID /// returns whichever namespace currently owns that UUID. @@ -836,6 +933,7 @@ mod tests { id: shared_id, namespace: "ns_a".to_string(), kind: "concept".to_string(), + entity_type: None, name: "SharedInA".to_string(), description: None, properties: None, @@ -843,6 +941,8 @@ mod tests { created_at: now, updated_at: now, deleted_at: None, + merged_into: None, + merge_event_id: None, }; store.upsert_entity(entity_a).await.unwrap(); @@ -856,6 +956,7 @@ mod tests { id: shared_id, namespace: "ns_b".to_string(), kind: "concept".to_string(), + entity_type: None, name: "SharedInB".to_string(), description: None, properties: None, @@ -863,6 +964,8 @@ mod tests { created_at: now, updated_at: now, deleted_at: None, + merged_into: None, + merge_event_id: None, }; store.upsert_entity(entity_b).await.unwrap(); diff --git a/crates/khive-db/src/stores/event.rs b/crates/khive-db/src/stores/event.rs index 0520d440..dccf4a70 100644 --- a/crates/khive-db/src/stores/event.rs +++ b/crates/khive-db/src/stores/event.rs @@ -6,21 +6,21 @@ use async_trait::async_trait; use uuid::Uuid; use khive_storage::error::StorageError; -use khive_storage::event::{Event, EventFilter}; +use khive_storage::event::{Event, EventFilter, EventObservation, ObservationRole, ReferentKind}; use khive_storage::types::{BatchWriteSummary, Page, PageRequest}; use khive_storage::EventStore; use khive_storage::StorageCapability; -use khive_types::{EventOutcome, SubstrateKind}; +use khive_types::{EventKind, EventOutcome, SubstrateKind}; use crate::error::SqliteError; use crate::pool::ConnectionPool; fn map_err(e: rusqlite::Error, op: &'static str) -> StorageError { - StorageError::driver(StorageCapability::Event, op, e) + StorageError::driver(StorageCapability::Events, op, e) } fn map_sqlite_err(e: SqliteError, op: &'static str) -> StorageError { - StorageError::driver(StorageCapability::Event, op, e) + StorageError::driver(StorageCapability::Events, op, e) } /// An EventStore backed by SQLite tables. @@ -103,7 +103,7 @@ impl SqlEventStore { let conn = self.open_standalone_writer()?; tokio::task::spawn_blocking(move || f(&conn).map_err(|e| map_err(e, op))) .await - .map_err(|e| StorageError::driver(StorageCapability::Event, op, e))? + .map_err(|e| StorageError::driver(StorageCapability::Events, op, e))? } else { let pool = Arc::clone(&self.pool); tokio::task::spawn_blocking(move || { @@ -111,7 +111,7 @@ impl SqlEventStore { f(guard.conn()).map_err(|e| map_err(e, op)) }) .await - .map_err(|e| StorageError::driver(StorageCapability::Event, op, e))? + .map_err(|e| StorageError::driver(StorageCapability::Events, op, e))? } } @@ -124,7 +124,7 @@ impl SqlEventStore { let conn = self.open_standalone_reader()?; tokio::task::spawn_blocking(move || f(&conn).map_err(|e| map_err(e, op))) .await - .map_err(|e| StorageError::driver(StorageCapability::Event, op, e))? + .map_err(|e| StorageError::driver(StorageCapability::Events, op, e))? } else { let pool = Arc::clone(&self.pool); tokio::task::spawn_blocking(move || { @@ -132,13 +132,13 @@ impl SqlEventStore { f(guard.conn()).map_err(|e| map_err(e, op)) }) .await - .map_err(|e| StorageError::driver(StorageCapability::Event, op, e))? + .map_err(|e| StorageError::driver(StorageCapability::Events, op, e))? } } } // ============================================================================= -// Helpers: parse SubstrateKind / EventOutcome from DB strings +// Helpers: parse SubstrateKind / EventOutcome / EventKind from DB strings // ============================================================================= fn substrate_from_str(s: &str) -> Result { @@ -164,6 +164,16 @@ fn outcome_from_str(s: &str) -> Result { } } +fn kind_from_str(s: &str) -> Result { + s.parse::().map_err(|_| { + rusqlite::Error::FromSqlConversionFailure( + 0, + rusqlite::types::Type::Text, + format!("unknown EventKind: {s}").into(), + ) + }) +} + fn parse_uuid(s: &str) -> Result { Uuid::parse_str(s).map_err(|e| { rusqlite::Error::FromSqlConversionFailure(0, rusqlite::types::Type::Text, Box::new(e)) @@ -171,30 +181,37 @@ fn parse_uuid(s: &str) -> Result { } // Column order: id(0), namespace(1), verb(2), substrate(3), actor(4), -// outcome(5), data(6), duration_us(7), target_id(8), created_at(9) +// kind(5), outcome(6), payload(7), payload_schema_version(8), +// profile_state_version(9), duration_us(10), target_id(11), +// session_id(12), aggregate_kind(13), aggregate_id(14), created_at(15) fn read_event(row: &rusqlite::Row<'_>) -> Result { let id_str: String = row.get(0)?; let namespace: String = row.get(1)?; let verb: String = row.get(2)?; let substrate_str: String = row.get(3)?; let actor: String = row.get(4)?; - let outcome_str: String = row.get(5)?; - let data_str: Option = row.get(6)?; - let duration_us: i64 = row.get(7)?; - let target_str: Option = row.get(8)?; - let created_at: i64 = row.get(9)?; + let kind_str: String = row.get(5)?; + let outcome_str: String = row.get(6)?; + let payload_str: String = row.get(7)?; + let payload_schema_version: i64 = row.get(8)?; + let profile_state_version: Option = row.get(9)?; + let duration_us: i64 = row.get(10)?; + let target_str: Option = row.get(11)?; + let session_str: Option = row.get(12)?; + let aggregate_kind: Option = row.get(13)?; + let aggregate_str: Option = row.get(14)?; + let created_at: i64 = row.get(15)?; let id = parse_uuid(&id_str)?; let substrate = substrate_from_str(&substrate_str)?; + let kind = kind_from_str(&kind_str)?; let outcome = outcome_from_str(&outcome_str)?; - let data = data_str - .as_deref() - .map(serde_json::from_str) - .transpose() - .map_err(|e| { - rusqlite::Error::FromSqlConversionFailure(6, rusqlite::types::Type::Text, Box::new(e)) - })?; + let payload: serde_json::Value = serde_json::from_str(&payload_str).map_err(|e| { + rusqlite::Error::FromSqlConversionFailure(7, rusqlite::types::Type::Text, Box::new(e)) + })?; let target_id = target_str.as_deref().map(parse_uuid).transpose()?; + let session_id = session_str.as_deref().map(parse_uuid).transpose()?; + let aggregate_id = aggregate_str.as_deref().map(parse_uuid).transpose()?; Ok(Event { id, @@ -202,87 +219,271 @@ fn read_event(row: &rusqlite::Row<'_>) -> Result { verb, substrate, actor, + kind, outcome, - data, + payload, + payload_schema_version: payload_schema_version as u32, + profile_state_version: profile_state_version.map(|v| v as u64), duration_us, target_id, + session_id, + aggregate_kind, + aggregate_id, created_at, }) } -fn build_event_filter_sql( - default_namespace: &str, - filter: &EventFilter, -) -> (String, Vec>) { - let mut conditions: Vec = Vec::new(); - let mut params: Vec> = Vec::new(); +// ============================================================================= +// Helpers: observation projection write path +// ============================================================================= - // If filter.namespaces is non-empty, use those; otherwise fall back to default_namespace. - if filter.namespaces.is_empty() { - params.push(Box::new(default_namespace.to_string())); - conditions.push(format!("namespace = ?{}", params.len())); - } else if filter.namespaces.len() == 1 { - params.push(Box::new(filter.namespaces[0].clone())); - conditions.push(format!("namespace = ?{}", params.len())); - } else { - let placeholders: Vec = filter - .namespaces - .iter() - .map(|ns| { - params.push(Box::new(ns.clone())); - format!("?{}", params.len()) - }) - .collect(); - conditions.push(format!("namespace IN ({})", placeholders.join(","))); +fn insert_event_with_observations( + conn: &rusqlite::Connection, + event: &Event, +) -> Result<(), rusqlite::Error> { + let id_str = event.id.to_string(); + let substrate_str = event.substrate.name().to_string(); + let kind_str = event.kind.name().to_string(); + let outcome_str = event.outcome.name().to_string(); + let payload_str = event.payload.to_string(); + let target_str = event.target_id.map(|u| u.to_string()); + let session_str = event.session_id.map(|u| u.to_string()); + let aggregate_str = event.aggregate_id.map(|u| u.to_string()); + let profile_state_version = event.profile_state_version.map(|v| v as i64); + + conn.execute( + "INSERT INTO events \ + (id, namespace, verb, substrate, actor, kind, outcome, payload, payload_schema_version, \ + profile_state_version, duration_us, target_id, session_id, aggregate_kind, aggregate_id, created_at) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16)", + rusqlite::params![ + id_str, + &event.namespace, + &event.verb, + substrate_str, + &event.actor, + kind_str, + outcome_str, + payload_str, + event.payload_schema_version as i64, + profile_state_version, + event.duration_us, + target_str, + session_str, + &event.aggregate_kind, + aggregate_str, + event.created_at, + ], + )?; + + for observation in decode_event_observations(event)? { + conn.execute( + "INSERT INTO event_observations \ + (event_id, entity_id, referent_kind, role, position) \ + VALUES (?1, ?2, ?3, ?4, ?5)", + rusqlite::params![ + observation.event_id.to_string(), + observation.entity_id.to_string(), + observation.referent_kind.name(), + observation.role.name(), + observation.position as i64, + ], + )?; } - if !filter.ids.is_empty() { - let placeholders: Vec = filter - .ids - .iter() - .map(|id| { - params.push(Box::new(id.to_string())); - format!("?{}", params.len()) - }) - .collect(); - conditions.push(format!("id IN ({})", placeholders.join(","))); + Ok(()) +} + +fn decode_event_observations(event: &Event) -> Result, rusqlite::Error> { + match event.kind { + EventKind::RerankExecuted => decode_rank_observations(event), + EventKind::RecallExecuted | EventKind::SearchExecuted => decode_rank_observations(event), + EventKind::LinkCreated => decode_link_observations(event), + EventKind::EntityCreated + | EventKind::EntityUpdated + | EventKind::EntityDeleted + | EventKind::NoteCreated + | EventKind::NoteUpdated + | EventKind::NoteDeleted + | EventKind::TaskTransitioned => decode_target_observation(event), + EventKind::FeedbackExplicit => decode_signal_observation(event), + _ => Ok(Vec::new()), } +} - if !filter.verbs.is_empty() { - let placeholders: Vec = filter - .verbs - .iter() - .map(|v| { - params.push(Box::new(v.clone())); - format!("?{}", params.len()) - }) - .collect(); - conditions.push(format!("verb IN ({})", placeholders.join(","))); +fn payload_uuid_array(event: &Event, field: &'static str) -> Result, rusqlite::Error> { + let Some(values) = event.payload.get(field) else { + return Ok(Vec::new()); + }; + let Some(array) = values.as_array() else { + return Err(invalid_payload(event.kind, field, "expected array")); + }; + + array + .iter() + .map(|value| { + value + .as_str() + .ok_or_else(|| invalid_payload(event.kind, field, "expected UUID string")) + .and_then(|s| Uuid::parse_str(s).map_err(|e| invalid_payload(event.kind, field, e))) + }) + .collect() +} + +fn payload_uuid(event: &Event, field: &'static str) -> Result, rusqlite::Error> { + let Some(value) = event.payload.get(field) else { + return Ok(None); + }; + let Some(s) = value.as_str() else { + return Err(invalid_payload(event.kind, field, "expected UUID string")); + }; + Uuid::parse_str(s) + .map(Some) + .map_err(|e| invalid_payload(event.kind, field, e)) +} + +fn decode_rank_observations(event: &Event) -> Result, rusqlite::Error> { + let mut rows = Vec::new(); + + for (position, entity_id) in payload_uuid_array(event, "candidates")? + .into_iter() + .enumerate() + { + rows.push(EventObservation { + event_id: event.id, + entity_id, + referent_kind: ReferentKind::Note, + role: ObservationRole::Candidate, + position: position as u32, + }); } - if !filter.substrates.is_empty() { - let placeholders: Vec = filter - .substrates - .iter() - .map(|s| { - params.push(Box::new(s.name().to_string())); - format!("?{}", params.len()) - }) - .collect(); - conditions.push(format!("substrate IN ({})", placeholders.join(","))); + let selected = payload_uuid_array(event, "selected") + .or_else(|_| payload_uuid_array(event, "reranked")) + .or_else(|_| payload_uuid_array(event, "final_scores"))?; + for (position, entity_id) in selected.into_iter().enumerate() { + rows.push(EventObservation { + event_id: event.id, + entity_id, + referent_kind: ReferentKind::Note, + role: ObservationRole::Selected, + position: position as u32, + }); } - if !filter.actors.is_empty() { - let placeholders: Vec = filter - .actors - .iter() - .map(|a| { - params.push(Box::new(a.clone())); - format!("?{}", params.len()) - }) - .collect(); - conditions.push(format!("actor IN ({})", placeholders.join(","))); + Ok(rows) +} + +fn decode_link_observations(event: &Event) -> Result, rusqlite::Error> { + let mut rows = Vec::new(); + if let Some(source) = payload_uuid(event, "source_id")? { + rows.push(EventObservation { + event_id: event.id, + entity_id: source, + referent_kind: ReferentKind::Entity, + role: ObservationRole::Target, + position: 0, + }); } + if let Some(target) = payload_uuid(event, "target_id")? { + rows.push(EventObservation { + event_id: event.id, + entity_id: target, + referent_kind: ReferentKind::Entity, + role: ObservationRole::Target, + position: 1, + }); + } + Ok(rows) +} + +fn decode_target_observation(event: &Event) -> Result, rusqlite::Error> { + let Some(entity_id) = event.target_id.or(payload_uuid(event, "target_id")?) else { + return Ok(Vec::new()); + }; + Ok(vec![EventObservation { + event_id: event.id, + entity_id, + referent_kind: if event.substrate == SubstrateKind::Note { + ReferentKind::Note + } else { + ReferentKind::Entity + }, + role: ObservationRole::Target, + position: 0, + }]) +} + +fn decode_signal_observation(event: &Event) -> Result, rusqlite::Error> { + let Some(entity_id) = payload_uuid(event, "about_id")? else { + return Ok(Vec::new()); + }; + Ok(vec![EventObservation { + event_id: event.id, + entity_id, + referent_kind: ReferentKind::Entity, + role: ObservationRole::Signal, + position: 0, + }]) +} + +fn invalid_payload( + kind: EventKind, + field: &'static str, + reason: impl std::fmt::Display, +) -> rusqlite::Error { + rusqlite::Error::ToSqlConversionFailure( + format!("invalid payload for {}.{field}: {reason}", kind.name()).into(), + ) +} + +// ============================================================================= +// Helpers: filter SQL builder +// ============================================================================= + +fn build_event_filter_sql( + conn: &rusqlite::Connection, + default_namespace: &str, + filter: &EventFilter, +) -> Result<(String, Vec>), rusqlite::Error> { + reject_missing_event_filter_schema(conn, filter)?; + + let mut conditions: Vec = Vec::new(); + let mut params: Vec> = Vec::new(); + + params.push(Box::new(default_namespace.to_string())); + conditions.push(format!("namespace = ?{}", params.len())); + + push_in_clause( + &mut conditions, + &mut params, + "id", + filter.ids.iter().map(Uuid::to_string), + ); + push_in_clause( + &mut conditions, + &mut params, + "kind", + filter.kinds.iter().map(|kind| kind.name().to_string()), + ); + push_in_clause( + &mut conditions, + &mut params, + "verb", + filter.verbs.iter().cloned(), + ); + push_in_clause( + &mut conditions, + &mut params, + "substrate", + filter.substrates.iter().map(|s| s.name().to_string()), + ); + push_in_clause( + &mut conditions, + &mut params, + "actor", + filter.actors.iter().cloned(), + ); if let Some(after) = filter.after { params.push(Box::new(after)); @@ -294,8 +495,111 @@ fn build_event_filter_sql( conditions.push(format!("created_at < ?{}", params.len())); } + if let Some(session_id) = filter.session_id { + params.push(Box::new(session_id.to_string())); + conditions.push(format!("session_id = ?{}", params.len())); + } + + push_observation_exists(&mut conditions, &mut params, "candidate", &filter.observed); + push_observation_exists(&mut conditions, &mut params, "selected", &filter.selected); + + if let Some(proposal_id) = filter.payload_proposal_id { + params.push(Box::new(proposal_id.to_string())); + conditions.push(format!( + "json_extract(payload, '$.proposal_id') = ?{}", + params.len() + )); + } + let clause = format!(" WHERE {}", conditions.join(" AND ")); - (clause, params) + Ok((clause, params)) +} + +fn push_in_clause( + conditions: &mut Vec, + params: &mut Vec>, + column: &'static str, + values: I, +) where + I: IntoIterator, +{ + let placeholders: Vec = values + .into_iter() + .map(|value| { + params.push(Box::new(value)); + format!("?{}", params.len()) + }) + .collect(); + if !placeholders.is_empty() { + conditions.push(format!("{column} IN ({})", placeholders.join(","))); + } +} + +fn push_observation_exists( + conditions: &mut Vec, + params: &mut Vec>, + role: &'static str, + entity_ids: &[Uuid], +) { + if entity_ids.is_empty() { + return; + } + let placeholders: Vec = entity_ids + .iter() + .map(|id| { + params.push(Box::new(id.to_string())); + format!("?{}", params.len()) + }) + .collect(); + conditions.push(format!( + "EXISTS (SELECT 1 FROM event_observations o \ + WHERE o.event_id = events.id AND o.role = '{role}' AND o.entity_id IN ({}))", + placeholders.join(",") + )); +} + +fn reject_missing_event_filter_schema( + conn: &rusqlite::Connection, + filter: &EventFilter, +) -> Result<(), rusqlite::Error> { + if filter.session_id.is_some() && !has_column(conn, "events", "session_id")? { + return Err(schema_absent("events.session_id")); + } + if (!filter.observed.is_empty() || !filter.selected.is_empty()) + && !has_table(conn, "event_observations")? + { + return Err(schema_absent("event_observations")); + } + if filter.payload_proposal_id.is_some() && !has_column(conn, "events", "payload")? { + return Err(schema_absent("events.payload")); + } + Ok(()) +} + +fn has_table(conn: &rusqlite::Connection, table: &'static str) -> Result { + conn.query_row( + "SELECT COUNT(*) > 0 FROM sqlite_master WHERE type = 'table' AND name = ?1", + [table], + |row| row.get(0), + ) +} + +fn has_column( + conn: &rusqlite::Connection, + table: &'static str, + column: &'static str, +) -> Result { + conn.query_row( + "SELECT COUNT(*) > 0 FROM pragma_table_info(?1) WHERE name = ?2", + rusqlite::params![table, column], + |row| row.get(0), + ) +} + +fn schema_absent(name: &'static str) -> rusqlite::Error { + rusqlite::Error::ToSqlConversionFailure( + format!("event filter requires missing schema element {name}; run migrations").into(), + ) } // ============================================================================= @@ -305,35 +609,13 @@ fn build_event_filter_sql( #[async_trait] impl EventStore for SqlEventStore { async fn append_event(&self, event: Event) -> Result<(), StorageError> { - let id_str = event.id.to_string(); - let substrate_str = event.substrate.name().to_string(); - let outcome_str = event.outcome.name().to_string(); - let data_str = event.data.as_ref().map(|v| v.to_string()); - let target_str = event.target_id.map(|u| u.to_string()); - let ns = event.namespace.clone(); - let verb = event.verb.clone(); - let actor = event.actor.clone(); - let duration_us = event.duration_us; - let created_at = event.created_at; - self.with_writer("append_event", move |conn| { - conn.execute( - "INSERT INTO events \ - (id, namespace, verb, substrate, actor, outcome, data, duration_us, target_id, created_at) \ - VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)", - rusqlite::params![ - id_str, - ns, - verb, - substrate_str, - actor, - outcome_str, - data_str, - duration_us, - target_str, - created_at, - ], - )?; + conn.execute_batch("BEGIN IMMEDIATE")?; + if let Err(e) = insert_event_with_observations(conn, &event) { + let _ = conn.execute_batch("ROLLBACK"); + return Err(e); + } + conn.execute_batch("COMMIT")?; Ok(()) }) .await @@ -345,52 +627,21 @@ impl EventStore for SqlEventStore { self.with_writer("append_events", move |conn| { conn.execute_batch("BEGIN IMMEDIATE")?; let mut affected = 0u64; - let mut failed = 0u64; - let mut first_error = String::new(); for event in &events { - let id_str = event.id.to_string(); - let substrate_str = event.substrate.name().to_string(); - let outcome_str = event.outcome.name().to_string(); - let data_str = event.data.as_ref().map(|v| v.to_string()); - let target_str = event.target_id.map(|u| u.to_string()); - - match conn.execute( - "INSERT INTO events \ - (id, namespace, verb, substrate, actor, outcome, data, duration_us, target_id, created_at) \ - VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)", - rusqlite::params![ - id_str, - &event.namespace, - &event.verb, - substrate_str, - &event.actor, - outcome_str, - data_str, - event.duration_us, - target_str, - event.created_at, - ], - ) { - Ok(_) => affected += 1, - Err(e) => { - if first_error.is_empty() { - first_error = e.to_string(); - } - failed += 1; - } + if let Err(e) = insert_event_with_observations(conn, event) { + let _ = conn.execute_batch("ROLLBACK"); + return Err(e); } + affected += 1; } - if let Err(e) = conn.execute_batch("COMMIT") { - let _ = conn.execute_batch("ROLLBACK"); - return Err(e); - } + conn.execute_batch("COMMIT")?; Ok(BatchWriteSummary { attempted, affected, - failed, - first_error, + failed: 0, + first_error: String::new(), }) }) .await @@ -402,7 +653,9 @@ impl EventStore for SqlEventStore { self.with_reader("get_event", move |conn| { let mut stmt = conn.prepare( - "SELECT id, namespace, verb, substrate, actor, outcome, data, duration_us, target_id, created_at \ + "SELECT id, namespace, verb, substrate, actor, kind, outcome, payload, \ + payload_schema_version, profile_state_version, duration_us, target_id, \ + session_id, aggregate_kind, aggregate_id, created_at \ FROM events WHERE namespace = ?1 AND id = ?2", )?; let mut rows = stmt.query(rusqlite::params![namespace, id_str])?; @@ -422,7 +675,7 @@ impl EventStore for SqlEventStore { let namespace = self.namespace.clone(); self.with_reader("query_events", move |conn| { - let (where_clause, filter_params) = build_event_filter_sql(&namespace, &filter); + let (where_clause, filter_params) = build_event_filter_sql(conn, &namespace, &filter)?; let count_sql = format!("SELECT COUNT(*) FROM events{}", where_clause); let total: i64 = { @@ -432,7 +685,7 @@ impl EventStore for SqlEventStore { stmt.query_row(param_refs.as_slice(), |row| row.get(0))? }; - let (_, data_filter_params) = build_event_filter_sql(&namespace, &filter); + let (_, data_filter_params) = build_event_filter_sql(conn, &namespace, &filter)?; let mut all_params: Vec> = data_filter_params; all_params.push(Box::new(page.limit as i64)); all_params.push(Box::new(page.offset as i64)); @@ -441,8 +694,10 @@ impl EventStore for SqlEventStore { let offset_idx = all_params.len(); let data_sql = format!( - "SELECT id, namespace, verb, substrate, actor, outcome, data, duration_us, target_id, created_at \ - FROM events{} ORDER BY created_at DESC LIMIT ?{} OFFSET ?{}", + "SELECT id, namespace, verb, substrate, actor, kind, outcome, payload, \ + payload_schema_version, profile_state_version, duration_us, target_id, \ + session_id, aggregate_kind, aggregate_id, created_at \ + FROM events{} ORDER BY created_at DESC, id DESC LIMIT ?{} OFFSET ?{}", where_clause, limit_idx, offset_idx, ); @@ -468,7 +723,7 @@ impl EventStore for SqlEventStore { let namespace = self.namespace.clone(); self.with_reader("count_events", move |conn| { - let (where_clause, params) = build_event_filter_sql(&namespace, &filter); + let (where_clause, params) = build_event_filter_sql(conn, &namespace, &filter)?; let sql = format!("SELECT COUNT(*) FROM events{}", where_clause); let mut stmt = conn.prepare(&sql)?; let param_refs: Vec<&dyn rusqlite::types::ToSql> = @@ -491,17 +746,36 @@ const EVENTS_DDL: &str = "\ verb TEXT NOT NULL,\ substrate TEXT NOT NULL,\ actor TEXT NOT NULL,\ + kind TEXT NOT NULL DEFAULT 'audit',\ outcome TEXT NOT NULL,\ - data TEXT,\ + payload TEXT NOT NULL DEFAULT '{}',\ + payload_schema_version INTEGER NOT NULL DEFAULT 1,\ + profile_state_version INTEGER,\ duration_us INTEGER NOT NULL DEFAULT 0,\ target_id TEXT,\ + session_id TEXT,\ + aggregate_kind TEXT,\ + aggregate_id TEXT,\ created_at INTEGER NOT NULL\ );\ + CREATE TABLE IF NOT EXISTS event_observations (\ + event_id TEXT NOT NULL,\ + entity_id TEXT NOT NULL,\ + referent_kind TEXT NOT NULL,\ + role TEXT NOT NULL,\ + position INTEGER NOT NULL,\ + PRIMARY KEY (event_id, role, position)\ + );\ CREATE INDEX IF NOT EXISTS idx_events_namespace ON events(namespace);\ CREATE INDEX IF NOT EXISTS idx_events_verb ON events(verb);\ + CREATE INDEX IF NOT EXISTS idx_events_kind ON events(kind);\ CREATE INDEX IF NOT EXISTS idx_events_substrate ON events(substrate);\ CREATE INDEX IF NOT EXISTS idx_events_created ON events(created_at DESC);\ - CREATE INDEX IF NOT EXISTS idx_events_ns_created ON events(namespace, created_at DESC);\ + CREATE INDEX IF NOT EXISTS idx_events_ns_created_id ON events(namespace, created_at DESC, id DESC);\ + CREATE INDEX IF NOT EXISTS idx_events_session ON events(namespace, session_id, created_at, id);\ + CREATE INDEX IF NOT EXISTS idx_events_payload_proposal_id ON events(json_extract(payload, '$.proposal_id'));\ + CREATE INDEX IF NOT EXISTS idx_event_obs_entity ON event_observations(entity_id, role);\ + CREATE INDEX IF NOT EXISTS idx_event_obs_event_role ON event_observations(event_id, role);\ "; pub(crate) fn ensure_events_schema(conn: &rusqlite::Connection) -> Result<(), rusqlite::Error> { @@ -512,6 +786,7 @@ pub(crate) fn ensure_events_schema(conn: &rusqlite::Connection) -> Result<(), ru mod tests { use super::*; use crate::pool::PoolConfig; + use serde_json::json; fn setup_memory_store() -> SqlEventStore { let config = PoolConfig { @@ -529,7 +804,13 @@ mod tests { } fn make_event(namespace: &str) -> Event { - Event::new(namespace, "search", SubstrateKind::Note, "agent:test") + Event::new( + namespace, + "search", + EventKind::SearchExecuted, + SubstrateKind::Note, + "agent:test", + ) } #[tokio::test] @@ -642,4 +923,302 @@ mod tests { let fetched = store.get_event(denied_id).await.unwrap().unwrap(); assert_eq!(fetched.outcome, EventOutcome::Denied); } + + #[tokio::test] + async fn append_event_writes_observations_atomically() { + let store = setup_memory_store(); + let candidate = Uuid::new_v4(); + let selected = Uuid::new_v4(); + let mut event = make_event("default"); + event.kind = EventKind::RerankExecuted; + event.payload = json!({ + "candidates": [candidate.to_string()], + "selected": [selected.to_string()], + "served_by_profile_id": "profile-a" + }); + let event_id = event.id; + + store.append_event(event).await.unwrap(); + + // Verify event was inserted. + let fetched = store.get_event(event_id).await.unwrap(); + assert!(fetched.is_some()); + + // Verify observations were written. + let pool = Arc::clone(&store.pool); + let event_id_str = event_id.to_string(); + let (candidate_count, selected_count) = tokio::task::spawn_blocking(move || { + let guard = pool.reader().unwrap(); + let conn = guard.conn(); + let c: i64 = conn + .query_row( + "SELECT COUNT(*) FROM event_observations WHERE event_id = ?1 AND role = 'candidate'", + [&event_id_str], + |r| r.get(0), + ) + .unwrap(); + let s: i64 = conn + .query_row( + "SELECT COUNT(*) FROM event_observations WHERE event_id = ?1 AND role = 'selected'", + [&event_id_str], + |r| r.get(0), + ) + .unwrap(); + (c, s) + }) + .await + .unwrap(); + + assert_eq!(candidate_count, 1, "expected one candidate observation row"); + assert_eq!(selected_count, 1, "expected one selected observation row"); + } + + #[tokio::test] + async fn invalid_projection_payload_aborts_event_insert() { + let store = setup_memory_store(); + let mut event = make_event("default"); + event.kind = EventKind::RerankExecuted; + // "candidates" must be an array of UUID strings, not a plain string. + event.payload = json!({ "candidates": "not-array" }); + let event_id = event.id; + + let result = store.append_event(event).await; + assert!(result.is_err(), "invalid payload must return Err"); + + // The event row must not exist — transaction was rolled back. + let fetched = store.get_event(event_id).await.unwrap(); + assert!(fetched.is_none(), "event row must not exist after rollback"); + } + + #[tokio::test] + async fn query_events_orders_by_created_at_then_id_desc() { + let store = setup_memory_store(); + + let ts = chrono::Utc::now().timestamp_micros(); + let id_low = Uuid::parse_str("00000000-0000-0000-0000-000000000001").unwrap(); + let id_high = Uuid::parse_str("ffffffff-ffff-ffff-ffff-ffffffffffff").unwrap(); + + // Insert both events with identical created_at via direct SQL to bypass UUID generation. + let pool = Arc::clone(&store.pool); + tokio::task::spawn_blocking(move || { + let guard = pool.try_writer().unwrap(); + let conn = guard.conn(); + conn.execute_batch("BEGIN IMMEDIATE").unwrap(); + for id in [id_low, id_high] { + conn.execute( + "INSERT INTO events \ + (id, namespace, verb, substrate, actor, kind, outcome, payload, \ + payload_schema_version, duration_us, created_at) \ + VALUES (?1, 'default', 'search', 'note', 'test', 'audit', 'success', '{}', 1, 0, ?2)", + rusqlite::params![id.to_string(), ts], + ) + .unwrap(); + } + conn.execute_batch("COMMIT").unwrap(); + }) + .await + .unwrap(); + + let page = store + .query_events( + EventFilter::default(), + PageRequest { + limit: 10, + offset: 0, + }, + ) + .await + .unwrap(); + + assert_eq!(page.items.len(), 2); + assert_eq!( + page.items[0].id, id_high, + "higher UUID must come first (id DESC tiebreaker)" + ); + assert_eq!(page.items[1].id, id_low); + } + + #[tokio::test] + async fn query_events_filters_by_kind() { + let store = setup_memory_store(); + store.append_event(make_event("default")).await.unwrap(); + let mut recall_event = make_event("default"); + recall_event.kind = EventKind::RecallExecuted; + store.append_event(recall_event).await.unwrap(); + + let filter = EventFilter { + kinds: vec![EventKind::RecallExecuted], + ..EventFilter::default() + }; + let page = store + .query_events( + filter, + PageRequest { + limit: 10, + offset: 0, + }, + ) + .await + .unwrap(); + assert_eq!(page.items.len(), 1); + assert_eq!(page.items[0].kind, EventKind::RecallExecuted); + } + + #[tokio::test] + async fn query_events_filters_by_session_id() { + let store = setup_memory_store(); + let session = Uuid::new_v4(); + let mut event = make_event("default"); + event.session_id = Some(session); + store.append_event(event).await.unwrap(); + store.append_event(make_event("default")).await.unwrap(); + + let filter = EventFilter { + session_id: Some(session), + ..EventFilter::default() + }; + let page = store + .query_events( + filter, + PageRequest { + limit: 10, + offset: 0, + }, + ) + .await + .unwrap(); + assert_eq!(page.items.len(), 1); + assert_eq!(page.items[0].session_id, Some(session)); + } + + #[tokio::test] + async fn query_events_filters_by_observed() { + let store = setup_memory_store(); + let entity_id = Uuid::new_v4(); + let mut event = make_event("default"); + event.kind = EventKind::RerankExecuted; + event.payload = json!({ + "candidates": [entity_id.to_string()], + "selected": [] + }); + store.append_event(event).await.unwrap(); + store.append_event(make_event("default")).await.unwrap(); + + let filter = EventFilter { + observed: vec![entity_id], + ..EventFilter::default() + }; + let page = store + .query_events( + filter, + PageRequest { + limit: 10, + offset: 0, + }, + ) + .await + .unwrap(); + assert_eq!(page.items.len(), 1); + } + + #[tokio::test] + async fn query_events_filters_by_selected() { + let store = setup_memory_store(); + let entity_id = Uuid::new_v4(); + let mut event = make_event("default"); + event.kind = EventKind::RerankExecuted; + event.payload = json!({ + "candidates": [], + "selected": [entity_id.to_string()] + }); + store.append_event(event).await.unwrap(); + store.append_event(make_event("default")).await.unwrap(); + + let filter = EventFilter { + selected: vec![entity_id], + ..EventFilter::default() + }; + let page = store + .query_events( + filter, + PageRequest { + limit: 10, + offset: 0, + }, + ) + .await + .unwrap(); + assert_eq!(page.items.len(), 1); + } + + #[tokio::test] + async fn query_events_filters_by_payload_proposal_id() { + let store = setup_memory_store(); + let proposal_id = Uuid::new_v4(); + let mut event = make_event("default"); + event.kind = EventKind::ProposalCreated; + event.payload = json!({ "proposal_id": proposal_id.to_string() }); + store.append_event(event).await.unwrap(); + store.append_event(make_event("default")).await.unwrap(); + + let filter = EventFilter { + payload_proposal_id: Some(proposal_id), + ..EventFilter::default() + }; + let page = store + .query_events( + filter, + PageRequest { + limit: 10, + offset: 0, + }, + ) + .await + .unwrap(); + assert_eq!(page.items.len(), 1); + } + + #[tokio::test] + async fn query_events_observed_filter_missing_projection_returns_clean_error() { + // Set up a legacy-schema store (no event_observations table). + let config = PoolConfig { + path: None, + ..PoolConfig::default() + }; + let pool = Arc::new(ConnectionPool::new(config).unwrap()); + { + let writer = pool.writer().unwrap(); + // Create only the events table, without event_observations. + writer.conn().execute_batch( + "CREATE TABLE IF NOT EXISTS events (\ + id TEXT PRIMARY KEY, namespace TEXT NOT NULL, verb TEXT NOT NULL,\ + substrate TEXT NOT NULL, actor TEXT NOT NULL, kind TEXT NOT NULL DEFAULT 'audit',\ + outcome TEXT NOT NULL, payload TEXT NOT NULL DEFAULT '{}',\ + payload_schema_version INTEGER NOT NULL DEFAULT 1,\ + duration_us INTEGER NOT NULL DEFAULT 0, created_at INTEGER NOT NULL\ + );" + ).unwrap(); + } + let store = SqlEventStore::new_scoped(pool, false, "default"); + + let filter = EventFilter { + observed: vec![Uuid::new_v4()], + ..EventFilter::default() + }; + let result = store + .query_events( + filter, + PageRequest { + limit: 10, + offset: 0, + }, + ) + .await; + assert!(result.is_err()); + let err_msg = result.unwrap_err().to_string(); + assert!( + err_msg.contains("event_observations") && err_msg.contains("run migrations"), + "error should mention event_observations and run migrations, got: {err_msg}" + ); + } } diff --git a/crates/khive-db/src/stores/graph.rs b/crates/khive-db/src/stores/graph.rs index 692452ff..94a64059 100644 --- a/crates/khive-db/src/stores/graph.rs +++ b/crates/khive-db/src/stores/graph.rs @@ -16,8 +16,8 @@ use uuid::Uuid; use khive_storage::error::StorageError; use khive_storage::types::{ - BatchWriteSummary, Edge, EdgeFilter, EdgeSortField, GraphPath, NeighborHit, NeighborQuery, - Page, PageRequest, PathNode, SortDirection, SortOrder, TraversalRequest, + BatchWriteSummary, DeleteMode, Edge, EdgeFilter, EdgeSortField, GraphPath, NeighborHit, + NeighborQuery, Page, PageRequest, PathNode, SortDirection, SortOrder, TraversalRequest, }; use khive_storage::GraphStore; use khive_storage::LinkId; @@ -155,31 +155,51 @@ impl SqlGraphStore { // ============================================================================= fn read_edge(row: &rusqlite::Row<'_>) -> Result { - let id_str: String = row.get(0)?; - let source_str: String = row.get(1)?; - let target_str: String = row.get(2)?; - let relation_str: String = row.get(3)?; - let weight: f64 = row.get(4)?; - let created_micros: i64 = row.get(5)?; - let metadata_str: Option = row.get(6)?; + let namespace: String = row.get(0)?; + let id_str: String = row.get(1)?; + let source_str: String = row.get(2)?; + let target_str: String = row.get(3)?; + let relation_str: String = row.get(4)?; + let weight: f64 = row.get(5)?; + let created_micros: i64 = row.get(6)?; + let updated_micros: i64 = row.get(7)?; + let deleted_micros: Option = row.get(8)?; + let metadata_str: Option = row.get(9)?; + let target_backend: Option = row.get(10)?; let id = parse_uuid(&id_str)?; let source_id = parse_uuid(&source_str)?; let target_id = parse_uuid(&target_str)?; let created_at = micros_to_datetime(created_micros); let relation = relation_str.parse::().map_err(|e| { - rusqlite::Error::FromSqlConversionFailure(3, rusqlite::types::Type::Text, Box::new(e)) + rusqlite::Error::FromSqlConversionFailure(4, rusqlite::types::Type::Text, Box::new(e)) })?; - let metadata = metadata_str.and_then(|s| serde_json::from_str(&s).ok()); + let metadata = match metadata_str { + Some(s) => { + let v = serde_json::from_str(&s).map_err(|e| { + rusqlite::Error::FromSqlConversionFailure( + 9, + rusqlite::types::Type::Text, + Box::new(e), + ) + })?; + Some(v) + } + None => None, + }; Ok(Edge { id: id.into(), + namespace, source_id, target_id, relation, weight, created_at, + updated_at: micros_to_datetime(updated_micros), + deleted_at: deleted_micros.map(micros_to_datetime), metadata, + target_backend, }) } @@ -199,7 +219,10 @@ fn build_edge_filter_sql( namespace: &str, filter: &EdgeFilter, ) -> (String, Vec>) { - let mut conditions: Vec = vec!["namespace = ?1".to_string()]; + let mut conditions: Vec = vec![ + "namespace = ?1".to_string(), + "deleted_at IS NULL".to_string(), + ]; let mut params: Vec> = vec![Box::new(namespace.to_string())]; if !filter.ids.is_empty() { @@ -291,6 +314,16 @@ fn edge_sort_col(field: &EdgeSortField) -> &'static str { impl GraphStore for SqlGraphStore { async fn upsert_edge(&self, edge: Edge) -> Result<(), StorageError> { let namespace = self.namespace.clone(); + if edge.namespace != namespace { + return Err(StorageError::InvalidInput { + capability: StorageCapability::Graph, + operation: "upsert_edge".into(), + message: format!( + "edge namespace {:?} does not match store namespace {:?}", + edge.namespace, namespace + ), + }); + } let id_str = Uuid::from(edge.id).to_string(); let src_str = edge.source_id.to_string(); let tgt_str = edge.target_id.to_string(); @@ -298,20 +331,30 @@ impl GraphStore for SqlGraphStore { let metadata_str = edge .metadata .as_ref() - .map(|v| serde_json::to_string(v).unwrap_or_default()); + .map(serde_json::to_string) + .transpose() + .map_err(|e| StorageError::driver(StorageCapability::Graph, "upsert_edge", e))?; self.with_writer("upsert_edge", move |conn| { conn.execute( "INSERT INTO graph_edges \ - (namespace, id, source_id, target_id, relation, weight, created_at, metadata) \ - VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8) \ + (namespace, id, source_id, target_id, relation, weight, \ + created_at, updated_at, deleted_at, metadata, target_backend) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11) \ ON CONFLICT(namespace, id) DO UPDATE SET \ source_id = excluded.source_id, \ target_id = excluded.target_id, \ relation = excluded.relation, \ weight = excluded.weight, \ - created_at = excluded.created_at, \ - metadata = excluded.metadata \ - ON CONFLICT(namespace, source_id, target_id, relation) DO NOTHING", + updated_at = excluded.updated_at, \ + deleted_at = NULL, \ + metadata = excluded.metadata, \ + target_backend = excluded.target_backend \ + ON CONFLICT(namespace, source_id, target_id, relation) DO UPDATE SET \ + weight = excluded.weight, \ + updated_at = excluded.updated_at, \ + deleted_at = NULL, \ + metadata = excluded.metadata, \ + target_backend = excluded.target_backend", rusqlite::params![ namespace, id_str, @@ -320,7 +363,10 @@ impl GraphStore for SqlGraphStore { relation_str, edge.weight, edge.created_at.timestamp_micros(), + edge.updated_at.timestamp_micros(), + edge.deleted_at.map(|t| t.timestamp_micros()), metadata_str, + edge.target_backend, ], )?; Ok(()) @@ -332,11 +378,23 @@ impl GraphStore for SqlGraphStore { let attempted = edges.len() as u64; let namespace = self.namespace.clone(); + // Validate namespaces before acquiring writer. + for edge in &edges { + if edge.namespace != namespace { + return Err(StorageError::InvalidInput { + capability: StorageCapability::Graph, + operation: "upsert_edges".into(), + message: format!( + "edge namespace {:?} does not match store namespace {:?}", + edge.namespace, namespace + ), + }); + } + } + self.with_writer("upsert_edges", move |conn| { conn.execute_batch("BEGIN IMMEDIATE")?; let mut affected = 0u64; - let mut failed = 0u64; - let mut first_error = String::new(); for edge in &edges { let id_str = Uuid::from(edge.id).to_string(); @@ -346,19 +404,29 @@ impl GraphStore for SqlGraphStore { let metadata_str = edge .metadata .as_ref() - .map(|v| serde_json::to_string(v).unwrap_or_default()); - match conn.execute( + .map(serde_json::to_string) + .transpose() + .map_err(|e| rusqlite::Error::ToSqlConversionFailure(Box::new(e)))?; + if let Err(e) = conn.execute( "INSERT INTO graph_edges \ - (namespace, id, source_id, target_id, relation, weight, created_at, metadata) \ - VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8) \ + (namespace, id, source_id, target_id, relation, weight, \ + created_at, updated_at, deleted_at, metadata, target_backend) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11) \ ON CONFLICT(namespace, id) DO UPDATE SET \ source_id = excluded.source_id, \ target_id = excluded.target_id, \ relation = excluded.relation, \ weight = excluded.weight, \ - created_at = excluded.created_at, \ - metadata = excluded.metadata \ - ON CONFLICT(namespace, source_id, target_id, relation) DO NOTHING", + updated_at = excluded.updated_at, \ + deleted_at = NULL, \ + metadata = excluded.metadata, \ + target_backend = excluded.target_backend \ + ON CONFLICT(namespace, source_id, target_id, relation) DO UPDATE SET \ + weight = excluded.weight, \ + updated_at = excluded.updated_at, \ + deleted_at = NULL, \ + metadata = excluded.metadata, \ + target_backend = excluded.target_backend", rusqlite::params![ &namespace, id_str, @@ -367,17 +435,16 @@ impl GraphStore for SqlGraphStore { relation_str, edge.weight, edge.created_at.timestamp_micros(), + edge.updated_at.timestamp_micros(), + edge.deleted_at.map(|t| t.timestamp_micros()), metadata_str, + edge.target_backend.as_deref(), ], ) { - Ok(_) => affected += 1, - Err(e) => { - if first_error.is_empty() { - first_error = e.to_string(); - } - failed += 1; - } + let _ = conn.execute_batch("ROLLBACK"); + return Err(e); } + affected += 1; } if let Err(e) = conn.execute_batch("COMMIT") { @@ -387,8 +454,8 @@ impl GraphStore for SqlGraphStore { Ok(BatchWriteSummary { attempted, affected, - failed, - first_error, + failed: 0, + first_error: String::new(), }) }) .await @@ -400,8 +467,9 @@ impl GraphStore for SqlGraphStore { self.with_reader("get_edge", move |conn| { let mut stmt = conn.prepare( - "SELECT id, source_id, target_id, relation, weight, created_at, metadata \ - FROM graph_edges WHERE namespace = ?1 AND id = ?2", + "SELECT namespace, id, source_id, target_id, relation, weight, \ + created_at, updated_at, deleted_at, metadata, target_backend \ + FROM graph_edges WHERE namespace = ?1 AND id = ?2 AND deleted_at IS NULL", )?; let mut rows = stmt.query(rusqlite::params![namespace, id_str])?; match rows.next()? { @@ -412,16 +480,23 @@ impl GraphStore for SqlGraphStore { .await } - async fn delete_edge(&self, id: LinkId) -> Result { + async fn delete_edge(&self, id: LinkId, mode: DeleteMode) -> Result { let namespace = self.namespace.clone(); let id_str = Uuid::from(id).to_string(); self.with_writer("delete_edge", move |conn| { - let deleted = conn.execute( - "DELETE FROM graph_edges WHERE namespace = ?1 AND id = ?2", - rusqlite::params![namespace, id_str], - )?; - Ok(deleted > 0) + let affected = match mode { + DeleteMode::Soft => conn.execute( + "UPDATE graph_edges SET deleted_at = ?3, updated_at = ?3 \ + WHERE namespace = ?1 AND id = ?2 AND deleted_at IS NULL", + rusqlite::params![namespace, id_str, chrono::Utc::now().timestamp_micros(),], + )?, + DeleteMode::Hard => conn.execute( + "DELETE FROM graph_edges WHERE namespace = ?1 AND id = ?2", + rusqlite::params![namespace, id_str], + )?, + }; + Ok(affected > 0) }) .await } @@ -469,7 +544,8 @@ impl GraphStore for SqlGraphStore { let offset_idx = all_params.len(); let data_sql = format!( - "SELECT id, source_id, target_id, relation, weight, created_at, metadata \ + "SELECT namespace, id, source_id, target_id, relation, weight, \ + created_at, updated_at, deleted_at, metadata, target_backend \ FROM graph_edges{}{} LIMIT ?{} OFFSET ?{}", where_clause, order_clause, limit_idx, offset_idx, ); @@ -518,9 +594,11 @@ impl GraphStore for SqlGraphStore { self.with_reader("neighbors", move |conn| { let base_out = "SELECT target_id AS node_id, id AS edge_id, relation, weight \ - FROM graph_edges WHERE namespace = ?1 AND source_id = ?2"; + FROM graph_edges \ + WHERE namespace = ?1 AND source_id = ?2 AND deleted_at IS NULL"; let base_in = "SELECT source_id AS node_id, id AS edge_id, relation, weight \ - FROM graph_edges WHERE namespace = ?1 AND target_id = ?2"; + FROM graph_edges \ + WHERE namespace = ?1 AND target_id = ?2 AND deleted_at IS NULL"; let sql = match query.direction { Direction::Out => base_out.to_string(), @@ -685,6 +763,7 @@ impl GraphStore for SqlGraphStore { FROM graph_edges e \ JOIN traversal t ON {join_condition} \ WHERE e.namespace = ?1 \ + AND e.deleted_at IS NULL \ AND t.depth < ?3 \ AND (',' || t.path || ',') NOT LIKE '%,' || {next_node} || ',%'{rel_cond}{wt_cond} \ ) \ @@ -775,7 +854,10 @@ const GRAPH_DDL: &str = "\ relation TEXT NOT NULL,\ weight REAL NOT NULL DEFAULT 1.0,\ created_at INTEGER NOT NULL,\ + updated_at INTEGER NOT NULL,\ + deleted_at INTEGER,\ metadata TEXT,\ + target_backend TEXT,\ PRIMARY KEY (namespace, id)\ );\ CREATE UNIQUE INDEX IF NOT EXISTS idx_graph_edges_unique_triple ON graph_edges(namespace, source_id, target_id, relation);\ @@ -784,6 +866,7 @@ const GRAPH_DDL: &str = "\ CREATE INDEX IF NOT EXISTS idx_graph_edges_ns_relation ON graph_edges(namespace, relation);\ CREATE INDEX IF NOT EXISTS idx_graph_edges_ns_src_rel ON graph_edges(namespace, source_id, relation);\ CREATE INDEX IF NOT EXISTS idx_graph_edges_ns_tgt_rel ON graph_edges(namespace, target_id, relation);\ + CREATE INDEX IF NOT EXISTS idx_graph_edges_target_backend ON graph_edges(target_backend) WHERE target_backend IS NOT NULL;\ "; pub(crate) fn ensure_graph_schema(conn: &rusqlite::Connection) -> Result<(), rusqlite::Error> { @@ -812,14 +895,19 @@ mod tests { } fn make_edge(source: Uuid, target: Uuid, relation: EdgeRelation, weight: f64) -> Edge { + let now = Utc::now(); Edge { id: Uuid::new_v4().into(), + namespace: "default".to_string(), source_id: source, target_id: target, relation, weight, - created_at: Utc::now(), + created_at: now, + updated_at: now, + deleted_at: None, metadata: None, + target_backend: None, } } @@ -829,14 +917,19 @@ mod tests { let src = Uuid::new_v4(); let tgt = Uuid::new_v4(); + let now = Utc::now(); let edge = Edge { id: Uuid::new_v4().into(), + namespace: "default".to_string(), source_id: src, target_id: tgt, relation: EdgeRelation::Extends, weight: 0.8, - created_at: Utc::now(), + created_at: now, + updated_at: now, + deleted_at: None, metadata: None, + target_backend: None, }; let edge_id = edge.id; @@ -846,6 +939,7 @@ mod tests { assert!(fetched.is_some()); let fetched = fetched.unwrap(); assert_eq!(fetched.id, edge_id); + assert_eq!(fetched.namespace, "default"); assert_eq!(fetched.source_id, src); assert_eq!(fetched.target_id, tgt); assert_eq!(fetched.relation, EdgeRelation::Extends); @@ -862,12 +956,12 @@ mod tests { store.upsert_edge(edge).await.unwrap(); assert!(store.get_edge(edge_id).await.unwrap().is_some()); - let deleted = store.delete_edge(edge_id).await.unwrap(); + let deleted = store.delete_edge(edge_id, DeleteMode::Hard).await.unwrap(); assert!(deleted); assert!(store.get_edge(edge_id).await.unwrap().is_none()); - let deleted_again = store.delete_edge(edge_id).await.unwrap(); + let deleted_again = store.delete_edge(edge_id, DeleteMode::Hard).await.unwrap(); assert!(!deleted_again); } @@ -976,14 +1070,19 @@ mod tests { let src = Uuid::new_v4(); let tgt = Uuid::new_v4(); let meta = serde_json::json!({"note": "important link", "confidence": 0.95}); + let now = Utc::now(); let edge = Edge { id: Uuid::new_v4().into(), + namespace: "default".to_string(), source_id: src, target_id: tgt, relation: EdgeRelation::Implements, weight: 0.9, - created_at: Utc::now(), + created_at: now, + updated_at: now, + deleted_at: None, metadata: Some(meta.clone()), + target_backend: None, }; let edge_id = edge.id; @@ -1046,23 +1145,32 @@ mod tests { let tgt = Uuid::new_v4(); // Two edges with the same (source_id, target_id, relation) triple but different IDs. + let now = Utc::now(); let edge1 = Edge { id: Uuid::new_v4().into(), + namespace: "default".to_string(), source_id: src, target_id: tgt, relation: EdgeRelation::Extends, weight: 1.0, - created_at: Utc::now(), + created_at: now, + updated_at: now, + deleted_at: None, metadata: None, + target_backend: None, }; let edge2 = Edge { id: Uuid::new_v4().into(), + namespace: "default".to_string(), source_id: src, target_id: tgt, relation: EdgeRelation::Extends, weight: 0.5, - created_at: Utc::now(), + created_at: now, + updated_at: now, + deleted_at: None, metadata: None, + target_backend: None, }; store.upsert_edge(edge1).await.unwrap(); @@ -1074,4 +1182,60 @@ mod tests { "duplicate (source, target, relation) triple must be ignored; only one edge must exist" ); } + + // F053 (CRIT): natural-key conflict must DO UPDATE (refresh weight/metadata), not DO NOTHING. + // ADR-009 requires the second upsert to overwrite weight=0.5; current code keeps weight=1.0. + #[tokio::test] + async fn graph_duplicate_edges_refresh_existing_row() { + let store = setup_memory_store(); + let src = Uuid::new_v4(); + let tgt = Uuid::new_v4(); + + let now = Utc::now(); + let edge1 = Edge { + id: Uuid::new_v4().into(), + namespace: "default".to_string(), + source_id: src, + target_id: tgt, + relation: EdgeRelation::Extends, + weight: 1.0, + created_at: now, + updated_at: now, + deleted_at: None, + metadata: None, + target_backend: None, + }; + let edge2 = Edge { + id: Uuid::new_v4().into(), + namespace: "default".to_string(), + source_id: src, + target_id: tgt, + relation: EdgeRelation::Extends, + weight: 0.5, + created_at: now, + updated_at: now, + deleted_at: None, + metadata: None, + target_backend: None, + }; + + store.upsert_edge(edge1).await.unwrap(); + store.upsert_edge(edge2).await.unwrap(); + + let edges = store + .query_edges(EdgeFilter::default(), vec![], PageRequest::default()) + .await + .unwrap(); + assert_eq!( + edges.items.len(), + 1, + "duplicate natural key must collapse to one row" + ); + assert!( + (edges.items[0].weight - 0.5).abs() < 0.001, + "F053: natural-key conflict must DO UPDATE (weight=0.5 from second upsert); \ + current DO NOTHING keeps stale weight={}", + edges.items[0].weight + ); + } } diff --git a/crates/khive-db/src/stores/mod.rs b/crates/khive-db/src/stores/mod.rs index 4fb149be..ae8d79ec 100644 --- a/crates/khive-db/src/stores/mod.rs +++ b/crates/khive-db/src/stores/mod.rs @@ -2,5 +2,6 @@ pub mod entity; pub mod event; pub mod graph; pub mod note; +pub mod sparse; pub mod text; pub mod vectors; diff --git a/crates/khive-db/src/stores/note.rs b/crates/khive-db/src/stores/note.rs index 8a345427..faab233d 100644 --- a/crates/khive-db/src/stores/note.rs +++ b/crates/khive-db/src/stores/note.rs @@ -110,15 +110,16 @@ fn read_note(row: &rusqlite::Row<'_>) -> Result { let id_str: String = row.get(0)?; let namespace: String = row.get(1)?; let kind: String = row.get(2)?; - let name: Option = row.get(3)?; - let content: String = row.get(4)?; - let salience: f64 = row.get(5)?; - let decay_factor: f64 = row.get(6)?; - let expires_at: Option = row.get(7)?; - let properties_str: Option = row.get(8)?; - let created_at: i64 = row.get(9)?; - let updated_at: i64 = row.get(10)?; - let deleted_at: Option = row.get(11)?; + let status: String = row.get(3)?; + let name: Option = row.get(4)?; + let content: String = row.get(5)?; + let salience: Option = row.get(6)?; + let decay_factor: Option = row.get(7)?; + let expires_at: Option = row.get(8)?; + let properties_str: Option = row.get(9)?; + let created_at: i64 = row.get(10)?; + let updated_at: i64 = row.get(11)?; + let deleted_at: Option = row.get(12)?; let id = parse_uuid(&id_str)?; @@ -126,7 +127,7 @@ fn read_note(row: &rusqlite::Row<'_>) -> Result { .map(|s| { serde_json::from_str(&s).map_err(|e| { rusqlite::Error::FromSqlConversionFailure( - 8, + 9, rusqlite::types::Type::Text, Box::new(e), ) @@ -138,6 +139,7 @@ fn read_note(row: &rusqlite::Row<'_>) -> Result { id, namespace, kind, + status, name, content, salience, @@ -185,6 +187,7 @@ impl NoteStore for SqlNoteStore { let namespace = note.namespace.clone(); let id_str = note.id.to_string(); let kind_str = note.kind.to_string(); + let status_str = note.status.clone(); let properties_str = note .properties .as_ref() @@ -193,13 +196,14 @@ impl NoteStore for SqlNoteStore { self.with_writer("upsert_note", move |conn| { conn.execute( "INSERT OR REPLACE INTO notes \ - (id, namespace, kind, name, content, salience, decay_factor, expires_at, \ + (id, namespace, kind, status, name, content, salience, decay_factor, expires_at, \ properties, created_at, updated_at, deleted_at) \ - VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)", + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)", rusqlite::params![ id_str, namespace, kind_str, + status_str, note.name, note.content, note.salience, @@ -228,6 +232,7 @@ impl NoteStore for SqlNoteStore { for note in ¬es { let id_str = note.id.to_string(); let kind_str = note.kind.to_string(); + let status_str = note.status.clone(); let properties_str = note .properties .as_ref() @@ -235,13 +240,14 @@ impl NoteStore for SqlNoteStore { match conn.execute( "INSERT OR REPLACE INTO notes \ - (id, namespace, kind, name, content, salience, decay_factor, expires_at, \ + (id, namespace, kind, status, name, content, salience, decay_factor, expires_at, \ properties, created_at, updated_at, deleted_at) \ - VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)", + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)", rusqlite::params![ id_str, ¬e.namespace, kind_str, + status_str, ¬e.name, note.content, note.salience, @@ -282,7 +288,7 @@ impl NoteStore for SqlNoteStore { self.with_reader("get_note", move |conn| { let mut stmt = conn.prepare( - "SELECT id, namespace, kind, name, content, salience, decay_factor, expires_at, \ + "SELECT id, namespace, kind, status, name, content, salience, decay_factor, expires_at, \ properties, created_at, updated_at, deleted_at \ FROM notes WHERE id = ?1 AND deleted_at IS NULL", )?; @@ -307,7 +313,7 @@ impl NoteStore for SqlNoteStore { .collect::>() .join(", "); let sql = format!( - "SELECT id, namespace, kind, name, content, salience, decay_factor, expires_at, \ + "SELECT id, namespace, kind, status, name, content, salience, decay_factor, expires_at, \ properties, created_at, updated_at, deleted_at \ FROM notes WHERE id IN ({placeholders}) AND deleted_at IS NULL" ); @@ -334,7 +340,7 @@ impl NoteStore for SqlNoteStore { self.with_writer("delete_note_soft", move |conn| { let now = chrono::Utc::now().timestamp_micros(); let deleted = conn.execute( - "UPDATE notes SET deleted_at = ?1 \ + "UPDATE notes SET status = 'deleted', deleted_at = ?1 \ WHERE id = ?2 AND deleted_at IS NULL", rusqlite::params![now, id_str], )?; @@ -380,7 +386,7 @@ impl NoteStore for SqlNoteStore { let offset_idx = data_params.len(); let data_sql = format!( - "SELECT id, namespace, kind, name, content, salience, decay_factor, expires_at, \ + "SELECT id, namespace, kind, status, name, content, salience, decay_factor, expires_at, \ properties, created_at, updated_at, deleted_at \ FROM notes{} ORDER BY created_at DESC LIMIT ?{} OFFSET ?{}", where_sql, limit_idx, offset_idx, @@ -419,53 +425,6 @@ impl NoteStore for SqlNoteStore { }) .await } - - async fn upsert_note_if_below_quota( - &self, - note: Note, - max_notes: u64, - ) -> Result { - let namespace = note.namespace.clone(); - let id_str = note.id.to_string(); - let kind_str = note.kind.to_string(); - let properties_str = note - .properties - .as_ref() - .map(|v| serde_json::to_string(v).unwrap_or_default()); - - self.with_writer("upsert_note_if_below_quota", move |conn| { - let count: i64 = conn.query_row( - "SELECT COUNT(*) FROM notes WHERE namespace = ?1 AND deleted_at IS NULL", - [&namespace], - |row| row.get(0), - )?; - if count as u64 >= max_notes { - return Ok(false); - } - conn.execute( - "INSERT OR REPLACE INTO notes \ - (id, namespace, kind, name, content, salience, decay_factor, expires_at, \ - properties, created_at, updated_at, deleted_at) \ - VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)", - rusqlite::params![ - id_str, - namespace, - kind_str, - note.name, - note.content, - note.salience, - note.decay_factor, - note.expires_at, - properties_str, - note.created_at, - note.updated_at, - note.deleted_at, - ], - )?; - Ok(true) - }) - .await - } } // ============================================================================= @@ -477,10 +436,11 @@ const NOTES_DDL: &str = "\ id TEXT PRIMARY KEY,\ namespace TEXT NOT NULL,\ kind TEXT NOT NULL,\ + status TEXT NOT NULL DEFAULT 'active',\ name TEXT,\ content TEXT NOT NULL DEFAULT '',\ - salience REAL NOT NULL DEFAULT 0.5,\ - decay_factor REAL NOT NULL DEFAULT 0.0,\ + salience REAL,\ + decay_factor REAL,\ expires_at INTEGER,\ properties TEXT,\ created_at INTEGER NOT NULL,\ @@ -611,26 +571,6 @@ mod tests { assert_eq!(count_ns2, 1); } - #[tokio::test] - async fn test_quota() { - let pool = setup_pool(); - let store = SqlNoteStore::new(Arc::clone(&pool), false); - - for _ in 0..3 { - let inserted = store - .upsert_note_if_below_quota(make_note("quota_ns", "observation", "x"), 3) - .await - .unwrap(); - assert!(inserted); - } - - let inserted = store - .upsert_note_if_below_quota(make_note("quota_ns", "observation", "x"), 3) - .await - .unwrap(); - assert!(!inserted); - } - /// query_notes and count_notes use the namespace parameter as passed. #[tokio::test] async fn test_query_and_count_use_caller_namespace() { @@ -652,6 +592,7 @@ mod tests { .unwrap(); assert_eq!(page_a.items.len(), 1); assert_eq!(page_a.items[0].content, "A"); + assert_eq!(page_a.total, Some(1)); let page_b = store .query_notes("ns_b", None, PageRequest::default()) @@ -659,10 +600,43 @@ mod tests { .unwrap(); assert_eq!(page_b.items.len(), 1); assert_eq!(page_b.items[0].content, "B"); + assert_eq!(page_b.total, Some(1)); let count_a = store.count_notes("ns_a", None).await.unwrap(); let count_b = store.count_notes("ns_b", None).await.unwrap(); assert_eq!(count_a, 1); assert_eq!(count_b, 1); } + + #[tokio::test] + async fn test_soft_delete_sets_status_deleted() { + let pool = setup_pool(); + let store = SqlNoteStore::new(Arc::clone(&pool), false); + let note = make_note("default", "observation", "to delete"); + let id = note.id; + store.upsert_note(note).await.unwrap(); + let deleted = store.delete_note(id, DeleteMode::Soft).await.unwrap(); + assert!(deleted); + // Verify directly via raw SQL + let writer = pool.writer().unwrap(); + let status: String = writer + .conn() + .query_row( + "SELECT status FROM notes WHERE id = ?1", + [id.to_string()], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(status, "deleted"); + } + + #[tokio::test] + async fn test_note_status_field_roundtrip() { + let store = setup_memory_store(); + let note = make_note("default", "observation", "status test"); + let id = note.id; + store.upsert_note(note).await.unwrap(); + let fetched = store.get_note(id).await.unwrap().unwrap(); + assert_eq!(fetched.status, "active"); + } } diff --git a/crates/khive-db/src/stores/sparse.rs b/crates/khive-db/src/stores/sparse.rs new file mode 100644 index 00000000..d33ab61c --- /dev/null +++ b/crates/khive-db/src/stores/sparse.rs @@ -0,0 +1,729 @@ +//! SQLite-backed `SparseStore` implementation (ADR-031). + +use std::sync::Arc; + +use async_trait::async_trait; +use uuid::Uuid; + +use khive_score::DeterministicScore; +use khive_storage::error::StorageError; +use khive_storage::types::{ + BatchWriteSummary, SparseRecord, SparseSearchHit, SparseSearchRequest, SparseVector, +}; +use khive_storage::{SparseStore, StorageCapability}; +use khive_types::SubstrateKind; + +use crate::error::SqliteError; +use crate::pool::ConnectionPool; + +fn map_err(e: rusqlite::Error, op: &'static str) -> StorageError { + StorageError::driver(StorageCapability::Sparse, op, e) +} + +fn map_sqlite_err(e: SqliteError, op: &'static str) -> StorageError { + StorageError::driver(StorageCapability::Sparse, op, e) +} + +/// Validate that a sparse vector is well-formed. +/// +/// - indices and values must have equal lengths +/// - at least one element +/// - all values must be finite +/// - indices must be strictly increasing (no duplicates) +fn validate_sparse_vector(vector: &SparseVector, op: &'static str) -> Result<(), StorageError> { + if vector.indices.len() != vector.values.len() { + return Err(StorageError::InvalidInput { + capability: StorageCapability::Sparse, + operation: op.into(), + message: format!( + "indices length ({}) != values length ({})", + vector.indices.len(), + vector.values.len() + ), + }); + } + if vector.indices.is_empty() { + return Err(StorageError::InvalidInput { + capability: StorageCapability::Sparse, + operation: op.into(), + message: "sparse vector must have at least one element".into(), + }); + } + for (i, v) in vector.values.iter().enumerate() { + if !v.is_finite() { + return Err(StorageError::InvalidInput { + capability: StorageCapability::Sparse, + operation: op.into(), + message: format!("non-finite value at position {i}: {v}"), + }); + } + } + // Verify strictly increasing indices. + for window in vector.indices.windows(2) { + if window[0] >= window[1] { + return Err(StorageError::InvalidInput { + capability: StorageCapability::Sparse, + operation: op.into(), + message: format!( + "indices must be strictly increasing; found {} then {}", + window[0], window[1] + ), + }); + } + } + Ok(()) +} + +/// Serialize f32 slice to little-endian bytes (same pattern as vectors.rs). +fn f32_slice_as_bytes(data: &[f32]) -> &[u8] { + // SAFETY: same safety argument as vectors.rs — valid &[f32], alignment = 1, lifetime tied to input. + unsafe { std::slice::from_raw_parts(data.as_ptr() as *const u8, std::mem::size_of_val(data)) } +} + +/// Create the sparse table and its index for the given model_key. +pub(crate) fn ensure_sparse_schema( + conn: &rusqlite::Connection, + model_key: &str, +) -> Result<(), rusqlite::Error> { + let table = format!("sparse_{}", model_key); + let ddl = format!( + "CREATE TABLE IF NOT EXISTS {table} (\ + subject_id TEXT NOT NULL, \ + namespace TEXT NOT NULL, \ + kind TEXT NOT NULL, \ + field TEXT NOT NULL, \ + indices_json TEXT NOT NULL, \ + values_blob BLOB NOT NULL, \ + updated_at INTEGER NOT NULL, \ + PRIMARY KEY(subject_id, namespace, field)\ + ); \ + CREATE INDEX IF NOT EXISTS idx_{table}_namespace_kind \ + ON {table}(namespace, kind);" + ); + conn.execute_batch(&ddl) +} + +pub struct SqliteSparseStore { + pool: Arc, + is_file_backed: bool, + table_name: String, + namespace: String, +} + +impl SqliteSparseStore { + pub fn new( + pool: Arc, + is_file_backed: bool, + model_key: String, + namespace: String, + ) -> Result { + let table_name = format!("sparse_{}", model_key); + Ok(Self { + pool, + is_file_backed, + table_name, + namespace, + }) + } + + async fn with_writer(&self, op: &'static str, f: F) -> Result + where + F: FnOnce(&rusqlite::Connection) -> Result + Send + 'static, + R: Send + 'static, + { + let pool = Arc::clone(&self.pool); + tokio::task::spawn_blocking(move || { + let guard = pool.try_writer().map_err(|e| map_sqlite_err(e, op))?; + f(guard.conn()).map_err(|e| map_err(e, op)) + }) + .await + .map_err(|e| StorageError::driver(StorageCapability::Sparse, op, e))? + } + + async fn with_reader(&self, op: &'static str, f: F) -> Result + where + F: FnOnce(&rusqlite::Connection) -> Result + Send + 'static, + R: Send + 'static, + { + if self.is_file_backed { + // For file-backed DBs open a standalone read-only connection. + let config = self.pool.config(); + let path = config.path.as_ref().ok_or_else(|| StorageError::Pool { + operation: "sparse_reader".into(), + message: "in-memory databases do not support standalone connections".into(), + })?; + let conn = rusqlite::Connection::open_with_flags( + path, + rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY + | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX + | rusqlite::OpenFlags::SQLITE_OPEN_URI, + ) + .map_err(|e| map_err(e, op))?; + tokio::task::spawn_blocking(move || f(&conn).map_err(|e| map_err(e, op))) + .await + .map_err(|e| StorageError::driver(StorageCapability::Sparse, op, e))? + } else { + let pool = Arc::clone(&self.pool); + tokio::task::spawn_blocking(move || { + let guard = pool.reader().map_err(|e| map_sqlite_err(e, op))?; + f(guard.conn()).map_err(|e| map_err(e, op)) + }) + .await + .map_err(|e| StorageError::driver(StorageCapability::Sparse, op, e))? + } + } + + async fn upsert_sparse_vector( + &self, + subject_id: Uuid, + kind: SubstrateKind, + namespace: &str, + field: &str, + vector: SparseVector, + ) -> Result<(), StorageError> { + let table = self.table_name.clone(); + let ns = namespace.to_string(); + let field = field.to_string(); + let id_str = subject_id.to_string(); + let kind_str = kind.to_string(); + + self.with_writer("sparse_upsert", move |conn| { + let indices_json = serde_json::to_string(&vector.indices).map_err(|e| { + rusqlite::Error::FromSqlConversionFailure( + 0, + rusqlite::types::Type::Text, + Box::new(e), + ) + })?; + let values_blob = f32_slice_as_bytes(&vector.values); + let now = chrono::Utc::now().timestamp(); + let sql = format!( + "INSERT INTO {table} \ + (subject_id, namespace, kind, field, indices_json, values_blob, updated_at) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7) \ + ON CONFLICT(subject_id, namespace, field) DO UPDATE SET \ + kind = excluded.kind, \ + indices_json = excluded.indices_json, \ + values_blob = excluded.values_blob, \ + updated_at = excluded.updated_at" + ); + conn.execute( + &sql, + rusqlite::params![ + &id_str, + &ns, + &kind_str, + &field, + &indices_json, + values_blob, + now + ], + )?; + Ok(()) + }) + .await + } + + async fn insert_sparse_batch( + &self, + records: Vec, + ) -> Result { + let table = self.table_name.clone(); + let attempted = records.len() as u64; + + self.with_writer("sparse_insert_batch", move |conn| { + let sql = format!( + "INSERT INTO {table} \ + (subject_id, namespace, kind, field, indices_json, values_blob, updated_at) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7) \ + ON CONFLICT(subject_id, namespace, field) DO UPDATE SET \ + indices_json = excluded.indices_json, \ + values_blob = excluded.values_blob, \ + updated_at = excluded.updated_at" + ); + + conn.execute_batch("BEGIN IMMEDIATE")?; + let mut affected = 0u64; + let mut failed = 0u64; + let mut first_error = String::new(); + + for record in &records { + // Validate inline — skip invalid records rather than aborting the batch. + if record.vector.indices.len() != record.vector.values.len() + || record.vector.indices.is_empty() + || record.vector.values.iter().any(|v| !v.is_finite()) + || record.vector.indices.windows(2).any(|w| w[0] >= w[1]) + { + if first_error.is_empty() { + first_error = + format!("invalid sparse vector for subject {}", record.subject_id); + } + failed += 1; + continue; + } + + let indices_json = match serde_json::to_string(&record.vector.indices) { + Ok(j) => j, + Err(e) => { + if first_error.is_empty() { + first_error = e.to_string(); + } + failed += 1; + continue; + } + }; + let values_blob = f32_slice_as_bytes(&record.vector.values); + let now = record.updated_at.timestamp(); + let id_str = record.subject_id.to_string(); + let kind_str = record.kind.to_string(); + + match conn.execute( + &sql, + rusqlite::params![ + &id_str, + &record.namespace, + &kind_str, + &record.field, + &indices_json, + values_blob, + now + ], + ) { + Ok(_) => affected += 1, + Err(e) => { + if first_error.is_empty() { + first_error = e.to_string(); + } + failed += 1; + } + } + } + + conn.execute_batch("COMMIT")?; + Ok(BatchWriteSummary { + attempted, + affected, + failed, + first_error, + }) + }) + .await + } + + async fn delete_sparse_subject(&self, subject_id: Uuid) -> Result { + let table = self.table_name.clone(); + let namespace = self.namespace.clone(); + let id_str = subject_id.to_string(); + + self.with_writer("sparse_delete", move |conn| { + let sql = format!("DELETE FROM {table} WHERE subject_id = ?1 AND namespace = ?2"); + let deleted = conn.execute(&sql, rusqlite::params![&id_str, &namespace])?; + Ok(deleted > 0) + }) + .await + } + + async fn search_sparse_vectors( + &self, + request: SparseSearchRequest, + ) -> Result, StorageError> { + let table = self.table_name.clone(); + let ns = request + .namespace + .clone() + .unwrap_or_else(|| self.namespace.clone()); + let kind_filter = request.kind.map(|k| k.to_string()); + let query = request.query; + let top_k = request.top_k as usize; + + self.with_reader("sparse_search", move |conn| { + // Load candidate rows for namespace (and optional kind). + let (sql, kind_str_ref) = if let Some(ref kind_str) = kind_filter { + ( + format!( + "SELECT subject_id, indices_json, values_blob \ + FROM {table} WHERE namespace = ?1 AND kind = ?2" + ), + Some(kind_str.as_str()), + ) + } else { + ( + format!( + "SELECT subject_id, indices_json, values_blob \ + FROM {table} WHERE namespace = ?1" + ), + None, + ) + }; + + let mut stmt = conn.prepare(&sql)?; + + // Collect rows. + let rows: Vec)>> = + if let Some(kind_str) = kind_str_ref { + stmt.query_map(rusqlite::params![&ns, kind_str], |row| { + Ok((row.get(0)?, row.get(1)?, row.get(2)?)) + })? + .collect() + } else { + stmt.query_map(rusqlite::params![&ns], |row| { + Ok((row.get(0)?, row.get(1)?, row.get(2)?)) + })? + .collect() + }; + + // Compute sparse dot product for each candidate. + let mut scored: Vec<(Uuid, f64)> = Vec::new(); + for row_result in rows { + let (id_str, indices_json, values_blob) = row_result?; + + let subject_id = Uuid::parse_str(&id_str).map_err(|e| { + rusqlite::Error::FromSqlConversionFailure( + 0, + rusqlite::types::Type::Text, + Box::new(e), + ) + })?; + + let stored_indices: Vec = + serde_json::from_str(&indices_json).unwrap_or_default(); + // Deserialize f32 values from little-endian bytes. + let stored_values: Vec = if values_blob.len() % 4 == 0 { + values_blob + .chunks_exact(4) + .map(|b| f32::from_le_bytes([b[0], b[1], b[2], b[3]])) + .collect() + } else { + continue; + }; + + if stored_indices.len() != stored_values.len() { + continue; + } + + // Sparse dot product using merge of sorted index arrays. + let score = sparse_dot_product( + &query.indices, + &query.values, + &stored_indices, + &stored_values, + ); + scored.push((subject_id, score)); + } + + // Sort descending by score, take top_k, assign 1-based rank. + scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)); + scored.truncate(top_k); + + let hits = scored + .into_iter() + .enumerate() + .map(|(i, (subject_id, score))| SparseSearchHit { + subject_id, + score: DeterministicScore::from_f64(score), + rank: (i + 1) as u32, + }) + .collect(); + + Ok(hits) + }) + .await + } + + async fn count_sparse_rows(&self) -> Result { + let table = self.table_name.clone(); + let namespace = self.namespace.clone(); + self.with_reader("sparse_count", move |conn| { + let sql = format!("SELECT COUNT(*) FROM {table} WHERE namespace = ?1"); + let count: i64 = + conn.query_row(&sql, rusqlite::params![&namespace], |row| row.get(0))?; + Ok(count as u64) + }) + .await + } +} + +/// Sparse dot product via merge of two sorted index arrays. +fn sparse_dot_product(q_idx: &[u32], q_val: &[f32], s_idx: &[u32], s_val: &[f32]) -> f64 { + let mut dot = 0.0f64; + let mut qi = 0; + let mut si = 0; + while qi < q_idx.len() && si < s_idx.len() { + match q_idx[qi].cmp(&s_idx[si]) { + std::cmp::Ordering::Equal => { + dot += q_val[qi] as f64 * s_val[si] as f64; + qi += 1; + si += 1; + } + std::cmp::Ordering::Less => qi += 1, + std::cmp::Ordering::Greater => si += 1, + } + } + dot +} + +#[async_trait] +impl SparseStore for SqliteSparseStore { + async fn insert_sparse( + &self, + subject_id: Uuid, + kind: SubstrateKind, + namespace: &str, + field: &str, + vector: SparseVector, + ) -> Result<(), StorageError> { + validate_sparse_vector(&vector, "sparse_insert")?; + self.upsert_sparse_vector(subject_id, kind, namespace, field, vector) + .await + } + + async fn insert_batch( + &self, + records: Vec, + ) -> Result { + self.insert_sparse_batch(records).await + } + + async fn delete(&self, subject_id: Uuid) -> Result { + self.delete_sparse_subject(subject_id).await + } + + async fn search_sparse( + &self, + request: SparseSearchRequest, + ) -> Result, StorageError> { + validate_sparse_vector(&request.query, "sparse_search")?; + self.search_sparse_vectors(request).await + } + + async fn count(&self) -> Result { + self.count_sparse_rows().await + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::pool::{ConnectionPool, PoolConfig}; + + fn make_store(model_key: &str) -> SqliteSparseStore { + let config = PoolConfig { + path: None, + ..PoolConfig::default() + }; + let pool = Arc::new(ConnectionPool::new(config).expect("pool")); + // Create schema. + { + let writer = pool.try_writer().expect("writer"); + ensure_sparse_schema(writer.conn(), model_key).expect("schema"); + } + SqliteSparseStore::new(pool, false, model_key.to_string(), "ns:test".to_string()) + .expect("store") + } + + fn sv(indices: Vec, values: Vec) -> SparseVector { + SparseVector { indices, values } + } + + #[tokio::test] + async fn insert_and_count() { + let store = make_store("test_count"); + let id = Uuid::new_v4(); + store + .insert_sparse( + id, + SubstrateKind::Entity, + "ns:test", + "body", + sv(vec![0, 2], vec![1.0, 0.5]), + ) + .await + .unwrap(); + assert_eq!(store.count().await.unwrap(), 1); + } + + #[tokio::test] + async fn insert_and_search() { + let store = make_store("test_search"); + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + store + .insert_sparse( + id1, + SubstrateKind::Entity, + "ns:test", + "body", + sv(vec![0, 1], vec![1.0, 0.0]), + ) + .await + .unwrap(); + store + .insert_sparse( + id2, + SubstrateKind::Entity, + "ns:test", + "body", + sv(vec![0, 1], vec![0.0, 1.0]), + ) + .await + .unwrap(); + + let hits = store + .search_sparse(SparseSearchRequest { + query: sv(vec![0], vec![1.0]), + top_k: 2, + namespace: Some("ns:test".into()), + kind: None, + }) + .await + .unwrap(); + + assert!(!hits.is_empty()); + assert_eq!(hits[0].subject_id, id1, "id1 should rank first"); + assert_eq!(hits[0].rank, 1); + } + + #[tokio::test] + async fn delete_removes_row() { + let store = make_store("test_delete"); + let id = Uuid::new_v4(); + store + .insert_sparse( + id, + SubstrateKind::Entity, + "ns:test", + "body", + sv(vec![1], vec![1.0]), + ) + .await + .unwrap(); + assert_eq!(store.count().await.unwrap(), 1); + + let deleted = store.delete(id).await.unwrap(); + assert!(deleted); + assert_eq!(store.count().await.unwrap(), 0); + } + + #[tokio::test] + async fn mismatched_lengths_rejected() { + let store = make_store("test_mismatch"); + let result = store + .insert_sparse( + Uuid::new_v4(), + SubstrateKind::Entity, + "ns:test", + "body", + SparseVector { + indices: vec![0, 1], + values: vec![1.0], + }, + ) + .await; + assert!(matches!(result, Err(StorageError::InvalidInput { .. }))); + } + + #[tokio::test] + async fn non_finite_values_rejected() { + let store = make_store("test_nonfinite"); + let result = store + .insert_sparse( + Uuid::new_v4(), + SubstrateKind::Entity, + "ns:test", + "body", + sv(vec![0], vec![f32::NAN]), + ) + .await; + assert!(matches!(result, Err(StorageError::InvalidInput { .. }))); + } + + #[tokio::test] + async fn duplicate_indices_rejected() { + let store = make_store("test_dup_idx"); + let result = store + .insert_sparse( + Uuid::new_v4(), + SubstrateKind::Entity, + "ns:test", + "body", + sv(vec![0, 0], vec![1.0, 2.0]), + ) + .await; + assert!(matches!(result, Err(StorageError::InvalidInput { .. }))); + } + + #[tokio::test] + async fn empty_vector_rejected() { + let store = make_store("test_empty"); + let result = store + .insert_sparse( + Uuid::new_v4(), + SubstrateKind::Entity, + "ns:test", + "body", + sv(vec![], vec![]), + ) + .await; + assert!(matches!(result, Err(StorageError::InvalidInput { .. }))); + } + + #[tokio::test] + async fn namespace_isolation() { + let store = make_store("test_ns_iso"); + let id = Uuid::new_v4(); + store + .insert_sparse( + id, + SubstrateKind::Entity, + "ns:a", + "body", + sv(vec![0], vec![1.0]), + ) + .await + .unwrap(); + + let hits = store + .search_sparse(SparseSearchRequest { + query: sv(vec![0], vec![1.0]), + top_k: 5, + namespace: Some("ns:b".into()), + kind: None, + }) + .await + .unwrap(); + assert!(hits.is_empty(), "ns:b should not see ns:a data"); + } + + #[tokio::test] + async fn insert_batch_happy_path() { + use chrono::Utc; + use khive_types::SubstrateKind; + + let store = make_store("test_batch"); + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let records = vec![ + SparseRecord { + subject_id: id1, + kind: SubstrateKind::Entity, + namespace: "ns:test".into(), + field: "body".into(), + vector: sv(vec![0, 3], vec![0.5, 0.8]), + updated_at: Utc::now(), + }, + SparseRecord { + subject_id: id2, + kind: SubstrateKind::Entity, + namespace: "ns:test".into(), + field: "body".into(), + vector: sv(vec![1], vec![1.0]), + updated_at: Utc::now(), + }, + ]; + let summary = store.insert_batch(records).await.unwrap(); + assert_eq!(summary.attempted, 2); + assert_eq!(summary.affected, 2); + assert_eq!(summary.failed, 0); + assert_eq!(store.count().await.unwrap(), 2); + } +} diff --git a/crates/khive-db/src/stores/vectors.rs b/crates/khive-db/src/stores/vectors.rs index 39633c6c..3fa06de5 100644 --- a/crates/khive-db/src/stores/vectors.rs +++ b/crates/khive-db/src/stores/vectors.rs @@ -183,11 +183,22 @@ impl VectorStore for SqliteVecStore { subject_id: Uuid, kind: SubstrateKind, namespace: &str, - embedding: Vec, + field: &str, + vectors: Vec>, ) -> Result<(), StorageError> { + if vectors.len() != 1 { + return Err(StorageError::Unsupported { + capability: StorageCapability::Vectors, + operation: "vec_insert".into(), + message: "sqlite-vec supports exactly one vector per record".into(), + }); + } + let embedding = vectors.into_iter().next().expect("len checked"); + let table = self.table_name.clone(); let dims = self.dimensions; let namespace = namespace.to_string(); + let field = field.to_string(); let kind_str = kind.to_string(); if embedding.len() == dims { @@ -215,13 +226,13 @@ impl VectorStore for SqliteVecStore { )?; let ins_sql = format!( - "INSERT INTO {} (subject_id, namespace, kind, embedding) VALUES (?1, ?2, ?3, ?4)", + "INSERT INTO {} (subject_id, namespace, kind, field, embedding) VALUES (?1, ?2, ?3, ?4, ?5)", table ); let blob = f32_slice_as_bytes(&embedding); conn.execute( &ins_sql, - rusqlite::params![subject_id.to_string(), &namespace, &kind_str, blob], + rusqlite::params![subject_id.to_string(), &namespace, &kind_str, &field, blob], )?; Ok(()) }) @@ -242,7 +253,7 @@ impl VectorStore for SqliteVecStore { table ); let ins_sql = format!( - "INSERT INTO {} (subject_id, namespace, kind, embedding) VALUES (?1, ?2, ?3, ?4)", + "INSERT INTO {} (subject_id, namespace, kind, field, embedding) VALUES (?1, ?2, ?3, ?4, ?5)", table ); @@ -251,22 +262,27 @@ impl VectorStore for SqliteVecStore { let mut failed = 0u64; for record in &records { - if record.embedding.len() != dims { + if record.vectors.len() != 1 { failed += 1; continue; } - if non_finite_index(&record.embedding).is_some() { + let embedding = &record.vectors[0]; + if embedding.len() != dims { failed += 1; continue; } - let blob = f32_slice_as_bytes(&record.embedding); + if non_finite_index(embedding).is_some() { + failed += 1; + continue; + } + let blob = f32_slice_as_bytes(embedding); let id_str = record.subject_id.to_string(); let kind_str = record.kind.to_string(); // Use the record's own namespace — the caller is responsible for namespace. let _ = conn.execute(&del_sql, rusqlite::params![&id_str, &record.namespace]); match conn.execute( &ins_sql, - rusqlite::params![&id_str, &record.namespace, &kind_str, blob], + rusqlite::params![&id_str, &record.namespace, &kind_str, &record.field, blob], ) { Ok(_) => affected += 1, Err(_) => failed += 1, @@ -318,6 +334,22 @@ impl VectorStore for SqliteVecStore { &self, request: VectorSearchRequest, ) -> Result, StorageError> { + if request.filter.as_ref().is_some_and(|f| !f.is_empty()) { + return Err(StorageError::Unsupported { + capability: StorageCapability::Vectors, + operation: "vec_search".into(), + message: "use search_with_filter for filtered queries".into(), + }); + } + if request.query_vectors.len() != 1 { + return Err(StorageError::Unsupported { + capability: StorageCapability::Vectors, + operation: "vec_search".into(), + message: "sqlite-vec supports exactly one query vector per search".into(), + }); + } + let query_embedding = request.query_vectors[0].clone(); + let table = self.table_name.clone(); let dims = self.dimensions; // Use request.namespace if present; fall back to self.namespace. @@ -327,20 +359,20 @@ impl VectorStore for SqliteVecStore { .unwrap_or_else(|| self.namespace.clone()); let kind_filter = request.kind.map(|k| k.to_string()); - if request.query_embedding.len() == dims { - if let Some(idx) = non_finite_index(&request.query_embedding) { + if query_embedding.len() == dims { + if let Some(idx) = non_finite_index(&query_embedding) { return Err(non_finite_vector_error( "vec_search", idx, - request.query_embedding[idx], + query_embedding[idx], )); } } self.with_reader("vec_search", move |conn| { - if request.query_embedding.len() != dims { + if query_embedding.len() != dims { return Err(rusqlite::Error::InvalidParameterCount( - request.query_embedding.len(), + query_embedding.len(), dims, )); } @@ -365,7 +397,7 @@ impl VectorStore for SqliteVecStore { kind_clause = subquery_kind_clause ); - let query_blob = f32_slice_as_bytes(&request.query_embedding); + let query_blob = f32_slice_as_bytes(&query_embedding); let mut stmt = conn.prepare(&sql)?; // Collect rows into a Vec to avoid holding MappedRows (which is @@ -445,6 +477,12 @@ impl VectorStore for SqliteVecStore { supports_batch_search: false, supports_quantization: false, supports_update: false, + supports_orphan_sweep: false, + // sqlite-vec uses subject_id as PRIMARY KEY — only one vector per + // subject per namespace is stored. Callers must use a single canonical + // field (e.g. "content") and are not permitted to store both + // "entity.title" and "entity.body" as separate vectors in one table. + supports_multi_field: false, // sqlite-vec 0.1.9 rejects dimensions > SQLITE_VEC_VEC0_MAX_DIMENSIONS (8192). // Reporting 8192 lets callers know that 4097–8192 dimensional models are // supported. The previous value of 4096 was the K_MAX (neighbors per query) @@ -597,6 +635,10 @@ mod capabilities_tests { !caps.supports_update, "sqlite-vec does not support in-place update" ); + assert!( + !caps.supports_orphan_sweep, + "sqlite-vec does not support orphan sweep" + ); // sqlite-vec 0.1.9: SQLITE_VEC_VEC0_MAX_DIMENSIONS = 8192. assert_eq!(caps.max_dimensions, Some(8192)); assert_eq!( diff --git a/crates/khive-db/tests/contract.rs b/crates/khive-db/tests/contract.rs new file mode 100644 index 00000000..76ce6e90 --- /dev/null +++ b/crates/khive-db/tests/contract.rs @@ -0,0 +1,13 @@ +//! Contract tests for the sqlite backend (ADR-009 §backend-contract-tests). +//! +//! Exercises the eight storage capability traits (`SqlAccess`, `EntityStore`, +//! `GraphStore`, `NoteStore`, `EventStore`, `VectorStore`, `SparseStore`, +//! `TextSearch`) against both in-memory and file-backed SQLite backends. +//! The harness is structured to become a cross-backend conformance suite when +//! a second backend ships (e.g. `khive-db-postgres`). + +#[path = "contract/vector_filter.rs"] +mod vector_filter; + +#[path = "contract/backend.rs"] +mod backend; diff --git a/crates/khive-db/tests/contract/backend.rs b/crates/khive-db/tests/contract/backend.rs new file mode 100644 index 00000000..bbe296f5 --- /dev/null +++ b/crates/khive-db/tests/contract/backend.rs @@ -0,0 +1,387 @@ +//! Backend contract tests (ADR-009 §backend-contract-tests). +//! +//! Exercises the storage-capability traits (`SqlAccess`, `EntityStore`, +//! `GraphStore`, `NoteStore`, `TextSearch`, `VectorStore`) against both +//! in-memory (`:memory:`) and file-backed SQLite backends. +//! +//! The harness is structured so that when a second backend ships (e.g. +//! `khive-db-postgres`), the same helper functions become a cross-backend +//! conformance suite: each `test_*` function is parameterised over a +//! `StorageBackend`, not hardwired to in-memory or file-backed. + +use khive_db::StorageBackend; +use khive_storage::entity::Entity; +use khive_storage::note::Note; +use khive_storage::types::{ + DeleteMode, Direction, Edge, LinkId, NeighborQuery, SqlStatement, SqlValue, TextDocument, + TextFilter, TextQueryMode, TextSearchRequest, +}; +use khive_types::EdgeRelation; +use uuid::Uuid; + +// ---- Factory helpers ---- + +fn memory_backend() -> StorageBackend { + StorageBackend::memory().expect("in-memory backend") +} + +fn file_backend(dir: &tempfile::TempDir, name: &str) -> StorageBackend { + StorageBackend::sqlite(dir.path().join(name)).expect("file backend") +} + +// ---- SqlAccess contract ---- + +async fn test_sql_access(backend: &StorageBackend) { + let sql = backend.sql(); + + let mut writer = sql.writer().await.expect("sql writer"); + writer + .execute_script( + "CREATE TABLE IF NOT EXISTS ct_sql (id TEXT PRIMARY KEY, val INTEGER)".into(), + ) + .await + .expect("create table"); + + let affected = writer + .execute(SqlStatement { + sql: "INSERT INTO ct_sql (id, val) VALUES (?1, ?2)".into(), + params: vec![SqlValue::Text("r1".into()), SqlValue::Integer(99)], + label: None, + }) + .await + .expect("insert"); + assert_eq!(affected, 1); + + let mut reader = sql.reader().await.expect("sql reader"); + let row = reader + .query_row(SqlStatement { + sql: "SELECT val FROM ct_sql WHERE id = ?1".into(), + params: vec![SqlValue::Text("r1".into())], + label: None, + }) + .await + .expect("query_row") + .expect("row should exist"); + + match &row.columns[0].value { + SqlValue::Integer(v) => assert_eq!(*v, 99), + other => panic!("expected Integer(99), got {other:?}"), + } +} + +#[tokio::test] +async fn sql_access_memory_contract() { + test_sql_access(&memory_backend()).await; +} + +#[tokio::test] +async fn sql_access_file_contract() { + let dir = tempfile::tempdir().unwrap(); + test_sql_access(&file_backend(&dir, "sql_access.db")).await; +} + +// ---- EntityStore contract ---- + +async fn test_entity_store(backend: &StorageBackend) { + let store = backend + .entities_for_namespace("ct_ns") + .expect("entity store"); + + let entity = Entity::new("ct_ns", "concept", "Test Entity"); + let id = entity.id; + + store.upsert_entity(entity).await.expect("upsert_entity"); + + let fetched = store + .get_entity(id) + .await + .expect("get_entity") + .expect("entity must exist"); + assert_eq!(fetched.id, id); + assert_eq!(fetched.name, "Test Entity"); + assert_eq!(fetched.kind, "concept"); + assert!(fetched.deleted_at.is_none()); + + // Soft-delete + let deleted = store + .delete_entity(id, DeleteMode::Soft) + .await + .expect("soft delete"); + assert!(deleted); + + // After soft delete, get_entity excludes the record (deleted_at IS NULL filter). + // This is the correct contract: soft-deleted records are invisible to get_entity. + let after = store.get_entity(id).await.expect("get after soft delete"); + assert!( + after.is_none(), + "soft-deleted entity should not appear via get_entity (deleted_at IS NULL filter)" + ); +} + +#[tokio::test] +async fn entity_store_memory_contract() { + test_entity_store(&memory_backend()).await; +} + +#[tokio::test] +async fn entity_store_file_contract() { + let dir = tempfile::tempdir().unwrap(); + test_entity_store(&file_backend(&dir, "entity.db")).await; +} + +// ---- GraphStore contract ---- + +async fn test_graph_store(backend: &StorageBackend) { + let entities = backend + .entities_for_namespace("ct_graph") + .expect("entity store"); + let graph = backend + .graph_for_namespace("ct_graph") + .expect("graph store"); + + let a_entity = Entity::new("ct_graph", "concept", "A"); + let b_entity = Entity::new("ct_graph", "concept", "B"); + let a = a_entity.id; + let b = b_entity.id; + entities.upsert_entity(a_entity).await.expect("upsert A"); + entities.upsert_entity(b_entity).await.expect("upsert B"); + + let edge_id = LinkId(Uuid::new_v4()); + let edge = Edge { + id: edge_id, + namespace: "ct_graph".to_string(), + source_id: a, + target_id: b, + relation: EdgeRelation::Extends, + weight: 1.0, + created_at: chrono::Utc::now(), + updated_at: chrono::Utc::now(), + deleted_at: None, + metadata: None, + target_backend: None, + }; + + graph.upsert_edge(edge).await.expect("upsert_edge"); + + // Query outgoing neighbors + let query = NeighborQuery { + direction: Direction::Out, + relations: None, + limit: Some(10), + min_weight: None, + }; + let neighbors = graph.neighbors(a, query).await.expect("neighbors"); + assert_eq!(neighbors.len(), 1); + assert_eq!(neighbors[0].node_id, b); + assert_eq!(neighbors[0].relation, EdgeRelation::Extends); + + // Per ADR-009 §target_backend: local edge must have NULL target_backend. + // The NeighborHit doesn't carry target_backend; verify through get_edge. + let fetched_edge = graph + .get_edge(edge_id) + .await + .expect("get_edge") + .expect("edge must exist"); + assert!( + fetched_edge.target_backend.is_none(), + "local edge must have NULL target_backend (ADR-009)" + ); + + // Soft-delete + let deleted = graph + .delete_edge(edge_id, DeleteMode::Soft) + .await + .expect("soft delete edge"); + assert!(deleted); + + let after = graph + .neighbors( + a, + NeighborQuery { + direction: Direction::Out, + relations: None, + limit: Some(10), + min_weight: None, + }, + ) + .await + .expect("neighbors after delete"); + assert!( + after.is_empty(), + "soft-deleted edge must not appear in neighbors" + ); +} + +#[tokio::test] +async fn graph_store_memory_contract() { + test_graph_store(&memory_backend()).await; +} + +#[tokio::test] +async fn graph_store_file_contract() { + let dir = tempfile::tempdir().unwrap(); + test_graph_store(&file_backend(&dir, "graph.db")).await; +} + +// ---- NoteStore contract ---- + +async fn test_note_store(backend: &StorageBackend) { + let store = backend.notes_for_namespace("ct_notes").expect("note store"); + + let note = Note::new("ct_notes", "observation", "Test note content"); + let id = note.id; + + store.upsert_note(note).await.expect("upsert_note"); + + let fetched = store + .get_note(id) + .await + .expect("get_note") + .expect("note must exist"); + assert_eq!(fetched.id, id); + assert_eq!(fetched.content, "Test note content"); + assert!(fetched.deleted_at.is_none()); + + // Soft-delete + let deleted = store + .delete_note(id, DeleteMode::Soft) + .await + .expect("soft delete note"); + assert!(deleted); + + // After soft delete, get_note excludes the record (deleted_at IS NULL filter). + let after = store.get_note(id).await.expect("get after delete"); + assert!( + after.is_none(), + "soft-deleted note should not appear via get_note (deleted_at IS NULL filter)" + ); +} + +#[tokio::test] +async fn note_store_memory_contract() { + test_note_store(&memory_backend()).await; +} + +#[tokio::test] +async fn note_store_file_contract() { + let dir = tempfile::tempdir().unwrap(); + test_note_store(&file_backend(&dir, "notes.db")).await; +} + +// ---- TextSearch contract ---- + +async fn test_text_search(backend: &StorageBackend) { + use khive_types::SubstrateKind; + + let store = backend.text("ct_fts").expect("text search"); + + let id = Uuid::new_v4(); + let doc = TextDocument { + subject_id: id, + kind: SubstrateKind::Entity, + title: Some("Rust Programming".to_string()), + body: "The Rust language provides memory safety without GC.".to_string(), + tags: vec!["rust".to_string()], + namespace: "ct_ns".to_string(), + metadata: None, + updated_at: chrono::Utc::now(), + }; + + store.upsert_document(doc).await.expect("upsert_document"); + + let results = store + .search(TextSearchRequest { + query: "memory safety".to_string(), + mode: TextQueryMode::Plain, + filter: Some(TextFilter { + namespaces: vec!["ct_ns".to_string()], + ..Default::default() + }), + top_k: 5, + snippet_chars: 64, + }) + .await + .expect("text search"); + + assert!(!results.is_empty(), "should find at least one result"); + assert_eq!(results[0].subject_id, id); + + let count = store + .count(TextFilter { + namespaces: vec!["ct_ns".to_string()], + ..Default::default() + }) + .await + .expect("count"); + assert_eq!(count, 1); +} + +#[tokio::test] +async fn text_search_memory_contract() { + test_text_search(&memory_backend()).await; +} + +#[tokio::test] +async fn text_search_file_contract() { + let dir = tempfile::tempdir().unwrap(); + test_text_search(&file_backend(&dir, "fts.db")).await; +} + +// ---- VectorStore contract (feature-gated) ---- + +#[cfg(feature = "vectors")] +mod vector_contract { + use super::*; + use khive_storage::types::VectorSearchRequest; + use khive_types::SubstrateKind; + + async fn test_vector_store(backend: &StorageBackend) { + let store = backend + .vectors_for_namespace("ct_model", 4, "ct_ns") + .expect("vector store"); + + let id = Uuid::new_v4(); + store + .insert( + id, + SubstrateKind::Entity, + "ct_ns", + "content", + vec![vec![1.0, 0.0, 0.0, 0.0]], + ) + .await + .expect("vector insert"); + + let count = store.count().await.expect("vector count"); + assert_eq!(count, 1); + + let hits = store + .search(VectorSearchRequest { + query_vectors: vec![vec![1.0, 0.0, 0.0, 0.0]], + top_k: 1, + namespace: None, + kind: None, + filter: None, + backend_hints: None, + }) + .await + .expect("vector search"); + assert_eq!(hits.len(), 1); + assert_eq!(hits[0].subject_id, id); + assert!( + hits[0].score.to_f64() > 0.99, + "cosine score for identical vector should be > 0.99" + ); + } + + #[tokio::test] + async fn vector_store_memory_contract() { + test_vector_store(&memory_backend()).await; + } + + #[tokio::test] + async fn vector_store_file_contract() { + let dir = tempfile::tempdir().unwrap(); + test_vector_store(&file_backend(&dir, "vectors.db")).await; + } +} diff --git a/crates/khive-db/tests/contract/vector_filter.rs b/crates/khive-db/tests/contract/vector_filter.rs new file mode 100644 index 00000000..5b37ed35 --- /dev/null +++ b/crates/khive-db/tests/contract/vector_filter.rs @@ -0,0 +1,188 @@ +//! Contract tests for sqlite vector filter semantics (ADR-009, ADR-044). +//! +//! ADR-009 §294 requires backend contract tests under `khive-db/tests/contract/`. +//! ADR-044 §232 requires a compliance fixture covering non-empty `VectorSearchRequest.filter` +//! returning `Unsupported` on backends that do not implement pushdown. + +#[cfg(feature = "vectors")] +mod vector_filter_contract { + use khive_db::StorageBackend; + use khive_storage::types::{VectorMetadataFilter, VectorSearchRequest}; + use khive_types::SubstrateKind; + use uuid::Uuid; + + /// Regression (ADR-044 §4): `search()` must return `StorageError::Unsupported` + /// when the request carries a non-empty `VectorMetadataFilter`. This guards + /// callers from silently ignoring filter predicates on backends that do not + /// implement pushdown. + #[tokio::test] + async fn search_with_non_empty_filter_returns_unsupported() { + let backend = StorageBackend::memory().expect("in-memory backend"); + let store = backend.vectors("filter_test", 3).expect("vector store"); + + // Insert one record so the table is non-empty. + let id = Uuid::new_v4(); + store + .insert( + id, + SubstrateKind::Entity, + "local", + "content", + vec![vec![1.0, 0.0, 0.0]], + ) + .await + .expect("insert"); + + // A request with a non-empty filter must be rejected. + let request = VectorSearchRequest { + query_vectors: vec![vec![1.0, 0.0, 0.0]], + top_k: 5, + namespace: None, + kind: None, + filter: Some(VectorMetadataFilter { + namespaces: vec!["local".into()], + kinds: vec![], + property_filters: vec![], + }), + backend_hints: None, + }; + + let result = store.search(request).await; + assert!( + result.is_err(), + "search() with non-empty filter must return Err" + ); + let err = result.unwrap_err(); + assert!( + matches!(err, khive_storage::error::StorageError::Unsupported { .. }), + "expected StorageError::Unsupported, got {err:?}" + ); + } + + /// Regression (ADR-044 §4): `search_with_filter()` default impl must delegate + /// to `search()` when the filter is empty, and return `Unsupported` otherwise. + #[tokio::test] + async fn search_with_filter_empty_delegates_and_non_empty_rejects() { + let backend = StorageBackend::memory().expect("in-memory backend"); + let store = backend.vectors("filter_delegate", 3).expect("vector store"); + + let id = Uuid::new_v4(); + store + .insert( + id, + SubstrateKind::Entity, + "local", + "content", + vec![vec![0.5, 0.5, 0.0]], + ) + .await + .expect("insert"); + + let req = VectorSearchRequest { + query_vectors: vec![vec![0.5, 0.5, 0.0]], + top_k: 1, + namespace: None, + kind: None, + filter: None, + backend_hints: None, + }; + + // Empty filter: should delegate to search() and return results. + let empty_filter = VectorMetadataFilter::default(); + let ok = store + .search_with_filter(&req, &empty_filter) + .await + .expect("empty filter must succeed"); + assert_eq!(ok.len(), 1, "empty filter must return the inserted record"); + + // Non-empty filter: must return Unsupported. + let non_empty = VectorMetadataFilter { + namespaces: vec!["local".into()], + kinds: vec![], + property_filters: vec![], + }; + let err = store + .search_with_filter(&req, &non_empty) + .await + .expect_err("non-empty filter must fail on SqliteVecStore"); + assert!( + matches!(err, khive_storage::error::StorageError::Unsupported { .. }), + "expected StorageError::Unsupported, got {err:?}" + ); + } + + /// Schema upgrade regression (ADR-044 §3): opening a backend against a file-backed + /// database that already contains a `vec_` table WITHOUT the `field` column + /// must drop and recreate the table so that subsequent inserts succeed. + #[tokio::test] + async fn vectors_for_namespace_rebuilds_old_schema_table() { + let dir = tempfile::tempdir().unwrap(); + let db_path = dir.path().join("old_schema.db"); + + // Step 1: create a database with the OLD vec0 schema (no `field` column). + { + let old_backend = StorageBackend::sqlite(&db_path).expect("open db"); + // Bypass vectors_for_namespace to inject the old DDL directly. + let pool = old_backend.pool_arc(); + let writer = pool.try_writer().expect("writer"); + // Load the sqlite-vec extension before using vec0. + khive_db::extension::ensure_extensions_loaded(); + writer + .conn() + .execute_batch( + "CREATE VIRTUAL TABLE vec_old_model USING vec0(\ + subject_id TEXT PRIMARY KEY, \ + namespace TEXT NOT NULL, \ + kind TEXT NOT NULL, \ + embedding float[3] distance_metric=cosine\ + )", + ) + .expect("create old-schema table"); + // Insert a row in the old shape to confirm the table is live. + let blob: Vec = (0u32..3).flat_map(|i| (i as f32).to_le_bytes()).collect(); + writer + .conn() + .execute( + "INSERT INTO vec_old_model (subject_id, namespace, kind, embedding) \ + VALUES (?1, ?2, ?3, ?4)", + rusqlite::params!["old-id-1", "local", "Entity", blob.as_slice()], + ) + .expect("insert into old table"); + } + + // Step 2: reopen the database and call vectors_for_namespace — should detect + // the old schema and rebuild the table transparently. + let new_backend = StorageBackend::sqlite(&db_path).expect("reopen db"); + let store = new_backend + .vectors_for_namespace("old_model", 3, "local") + .expect("vectors_for_namespace must succeed after schema rebuild"); + + // Step 3: insert and search in the new shape must work. + let id = Uuid::new_v4(); + store + .insert( + id, + SubstrateKind::Entity, + "local", + "content", + vec![vec![1.0, 0.0, 0.0]], + ) + .await + .expect("insert into rebuilt table"); + + let hits = store + .search(VectorSearchRequest { + query_vectors: vec![vec![1.0, 0.0, 0.0]], + top_k: 1, + namespace: None, + kind: None, + filter: None, + backend_hints: None, + }) + .await + .expect("search after schema rebuild"); + + assert_eq!(hits.len(), 1); + assert_eq!(hits[0].subject_id, id); + } +} diff --git a/crates/khive-fold/Cargo.toml b/crates/khive-fold/Cargo.toml index 2e7a3d94..81f4f327 100644 --- a/crates/khive-fold/Cargo.toml +++ b/crates/khive-fold/Cargo.toml @@ -11,9 +11,15 @@ categories.workspace = true description = "Cognitive primitives — Fold, Anchor, Objective, Selector" [dependencies] -khive-score = { version = "0.2.0", path = "../khive-score" } +khive-score = { version = "0.2.1", path = "../khive-score" } +# ADR-024 target dependency boundary — khive-types added per F134 +# blake3 feature enables Hash32::from_blake3 for checkpoint hashing (F-NEW-5) +khive-types = { version = "0.2.1", path = "../khive-types", features = ["blake3"] } +# serde/uuid/thiserror/chrono remain because FoldContext uses them (context.rs is out of F134 scope) serde = { workspace = true } serde_json = { workspace = true } uuid = { workspace = true } chrono = { workspace = true } thiserror = { workspace = true } +# blake3 for checkpoint hash computation/verification (F-NEW-5) +blake3 = "1" diff --git a/crates/khive-fold/src/anchor.rs b/crates/khive-fold/src/anchor.rs index a3ac98ee..8476c4d7 100644 --- a/crates/khive-fold/src/anchor.rs +++ b/crates/khive-fold/src/anchor.rs @@ -76,7 +76,7 @@ impl AnchorGraph { } /// The Anchor primitive. -pub trait Anchor { +pub trait Anchor: Send + Sync { /// Trace the causal chain from a starting anchor to its sources. fn trace( &self, @@ -91,7 +91,7 @@ pub trait Anchor { graph: &AnchorGraph, outcome: &AnchorRef, max_depth: usize, - ) -> Result, FoldError>; + ) -> Result, FoldError>; } /// A BFS-based anchor implementation. @@ -143,7 +143,7 @@ impl Anchor for BfsAnchor { graph: &AnchorGraph, outcome: &AnchorRef, max_depth: usize, - ) -> Result, FoldError> { + ) -> Result, FoldError> { if graph.find_node(outcome.id).is_none() { return Err(FoldError::AnchorNotFound(outcome.id.to_string())); } @@ -153,7 +153,7 @@ impl Anchor for BfsAnchor { let mut queue = std::collections::VecDeque::new(); visited.insert(outcome.id); - queue.push_back((outcome.id, 0usize, 1.0f32)); + queue.push_back((outcome.id, 0usize, 1.0f64)); while let Some((current_id, depth, weight)) = queue.pop_front() { if current_id != outcome.id { @@ -163,7 +163,7 @@ impl Anchor for BfsAnchor { } if depth < max_depth { - let predecessors: Vec<(Uuid, f32)> = graph + let predecessors: Vec<(Uuid, f64)> = graph .incoming(current_id) .filter(|(id, _)| visited.insert(*id)) .map(|(id, _)| (id, weight * 0.5)) @@ -284,6 +284,21 @@ mod tests { // intermediate should be credited with weight > 0 let inter_credit = credits.iter().find(|(r, _)| r.id == intermediate.id); assert!(inter_credit.is_some()); - assert!(inter_credit.unwrap().1 > 0.0); + assert!(inter_credit.unwrap().1 > 0.0f64); + } + + #[test] + fn credit_weights_are_f64() { + let mut graph = AnchorGraph::new(); + let source = make_ref(10, "source"); + let outcome = make_ref(11, "outcome"); + graph.add_node(source.clone()); + graph.add_node(outcome.clone()); + graph.add_edge(source.id, outcome.id, "causes"); + + let credits: Vec<(AnchorRef, f64)> = BfsAnchor.credit(&graph, &outcome, 2).unwrap(); + assert!(!credits.is_empty()); + let w: f64 = credits[0].1; + assert!(w > 0.0f64 && w <= 1.0f64); } } diff --git a/crates/khive-fold/src/checkpoint.rs b/crates/khive-fold/src/checkpoint.rs new file mode 100644 index 00000000..ad59bd38 --- /dev/null +++ b/crates/khive-fold/src/checkpoint.rs @@ -0,0 +1,480 @@ +//! Checkpoint protocol for fold-based index persistence. +//! +//! Provides generic snapshot envelopes and in-memory storage for use +//! by HNSW and other fold-managed indexes. +//! +//! # Formal proof reference +//! +//! `proofs/Retrieval/HNSW.lean` — checkpoint correctness guarantees +//! used in HNSW snapshot/restore cycles +//! (khive.Retrieval.HNSW.checkpoint_correctness). +//! +//! # Architecture +//! +//! ```text +//! HnswIndex ──snapshot──> HnswSnapshot ──wrap──> Checkpoint +//! │ +//! CheckpointStore::save(...) +//! ``` +//! +//! The snapshot types and this checkpoint envelope are always available; +//! the fold feature flag in consuming crates gates whether they are exposed +//! to callers. +//! +//! # Integrity model +//! +//! `save` serializes `state` to canonical JSON, computes a BLAKE3 hash, and +//! stores it in `Checkpoint.hash`. `load` recomputes the hash from the stored +//! bytes and returns `FoldError::IntegrityMismatch` if they disagree. The hash +//! field is therefore always meaningful — `Hash32::ZERO` is only valid if the +//! canonical serialization of `state` actually hashes to zero (practically +//! impossible). + +use std::collections::HashMap; +use std::sync::{Arc, RwLock}; + +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +use khive_types::Hash32; + +use crate::context::FoldContext; +use crate::error::FoldError; + +/// Generic checkpoint envelope wrapping an arbitrary fold state snapshot. +/// +/// Carries metadata (ID, timestamp, hash, fold version) alongside the +/// serializable state so consumers can verify and load the correct snapshot. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Checkpoint { + /// Human-readable checkpoint identifier (e.g. `"hnsw_idx:ckpt-1"`). + pub id: String, + + /// The snapshot state captured at this checkpoint. + pub state: S, + + /// Unique identifier for this checkpoint instance. + pub uuid: Uuid, + + /// BLAKE3 content hash of the canonical JSON serialization of `state`. + /// + /// Computed by [`CheckpointStore::save`] and verified by + /// [`CheckpointStore::load`]. A mismatch returns + /// [`FoldError::IntegrityMismatch`]. + pub hash: Hash32, + + /// Number of entries processed when this checkpoint was taken. + pub entries_processed: usize, + + /// Fold context at checkpoint time. + pub context: FoldContext, + + /// Monotonically increasing fold schema version. + pub fold_version: usize, + + /// Wall-clock time when this checkpoint was created. + pub created_at: DateTime, +} + +impl Checkpoint { + /// Create a new checkpoint, computing the BLAKE3 hash of the state. + /// + /// Returns `FoldError::Serialization` if `state` cannot be serialized to JSON. + #[allow(clippy::too_many_arguments)] + pub fn new( + id: impl Into, + state: S, + uuid: Uuid, + entries_processed: usize, + context: FoldContext, + fold_version: usize, + ) -> Result { + let bytes = serde_json::to_vec(&state)?; + let hash = Hash32::from_blake3(&bytes); + Ok(Self { + id: id.into(), + state, + uuid, + hash, + entries_processed, + context, + fold_version, + created_at: Utc::now(), + }) + } + + /// Create a checkpoint with a pre-computed hash (for deserialization / testing). + /// + /// Callers are responsible for ensuring `hash` is consistent with `state`. + /// Prefer [`Checkpoint::new`] for production use. + #[allow(clippy::too_many_arguments)] + pub fn with_hash( + id: impl Into, + state: S, + uuid: Uuid, + hash: Hash32, + entries_processed: usize, + context: FoldContext, + fold_version: usize, + ) -> Self { + Self { + id: id.into(), + state, + uuid, + hash, + entries_processed, + context, + fold_version, + created_at: Utc::now(), + } + } +} + +/// Trait for checkpoint persistence backends. +/// +/// The key is the checkpoint `id` string. `load_latest` returns the +/// checkpoint whose prefix matches — defined as all checkpoints whose +/// `id` starts with the given prefix, selecting the most recently created. +/// Ties on `created_at` are broken by `uuid` (lexicographic) for determinism. +pub trait CheckpointStore { + /// Persist a checkpoint, computing and storing an integrity hash. + fn save(&self, checkpoint: Checkpoint) -> Result<(), FoldError> + where + S: Clone + Serialize; + + /// Load a checkpoint by its exact `id`, verifying the integrity hash. + /// + /// Returns `Ok(None)` when no checkpoint with that `id` exists. + /// Returns `Err(FoldError::IntegrityMismatch)` if the stored hash does not + /// match the recomputed hash of the loaded state. + fn load(&self, id: &str) -> Result>, FoldError> + where + S: Clone + Serialize; + + /// Load the most recently created checkpoint whose `id` starts with `prefix`. + /// + /// Ties on `created_at` are broken by `uuid` for determinism. + /// Returns `None` when no checkpoints match the prefix. + fn load_latest(&self, prefix: &str) -> Result>, FoldError> + where + S: Clone + Serialize; + + /// Delete the checkpoint with the given `id`. + /// + /// Returns `Err(FoldError::CheckpointNotFound)` if no checkpoint with that + /// `id` exists. + fn delete(&self, id: &str) -> Result<(), FoldError>; + + /// List all checkpoint `id` strings currently stored. + /// + /// The order is unspecified; callers should sort if a stable order is needed. + fn list(&self) -> Result, FoldError>; +} + +/// In-memory checkpoint store backed by a `RwLock`. +/// +/// Suitable for tests and single-process usage where durability is not +/// required. Production deployments should implement [`CheckpointStore`] +/// with durable storage (e.g. SQLite via `khive-db`). +pub struct InMemoryCheckpointStore { + inner: Arc>>>, +} + +impl InMemoryCheckpointStore { + /// Create a new empty in-memory store. + pub fn new() -> Self { + Self { + inner: Arc::new(RwLock::new(HashMap::new())), + } + } +} + +impl Default for InMemoryCheckpointStore { + fn default() -> Self { + Self::new() + } +} + +impl CheckpointStore + for InMemoryCheckpointStore +{ + fn save(&self, checkpoint: Checkpoint) -> Result<(), FoldError> + where + S: Clone + Serialize, + { + // Recompute the hash from the state to ensure the stored hash is canonical. + let bytes = serde_json::to_vec(&checkpoint.state)?; + let computed = Hash32::from_blake3(&bytes); + let mut stored = checkpoint; + stored.hash = computed; + + let mut guard = self + .inner + .write() + .map_err(|e| FoldError::LockPoisoned(e.to_string()))?; + guard.insert(stored.id.clone(), stored); + Ok(()) + } + + fn load(&self, id: &str) -> Result>, FoldError> + where + S: Clone + Serialize, + { + let guard = self + .inner + .read() + .map_err(|e| FoldError::LockPoisoned(e.to_string()))?; + let Some(checkpoint) = guard.get(id).cloned() else { + return Ok(None); + }; + + // Verify integrity: recompute hash from state and compare. + let bytes = serde_json::to_vec(&checkpoint.state)?; + let computed = Hash32::from_blake3(&bytes); + if !checkpoint.hash.eq_ct(&computed) { + return Err(FoldError::IntegrityMismatch { + id: id.to_owned(), + stored: checkpoint.hash.to_string(), + computed: computed.to_string(), + }); + } + + Ok(Some(checkpoint)) + } + + fn load_latest(&self, prefix: &str) -> Result>, FoldError> + where + S: Clone + Serialize, + { + let guard = self + .inner + .read() + .map_err(|e| FoldError::LockPoisoned(e.to_string()))?; + + let latest = guard + .values() + .filter(|c| c.id.starts_with(prefix)) + // Tiebreak on uuid for determinism when created_at is equal. + .max_by_key(|c| (c.created_at, c.uuid)); + + Ok(latest.cloned()) + } + + fn delete(&self, id: &str) -> Result<(), FoldError> { + let mut guard = self + .inner + .write() + .map_err(|e| FoldError::LockPoisoned(e.to_string()))?; + if guard.remove(id).is_none() { + return Err(FoldError::CheckpointNotFound(id.to_owned())); + } + Ok(()) + } + + fn list(&self) -> Result, FoldError> { + let guard = self + .inner + .read() + .map_err(|e| FoldError::LockPoisoned(e.to_string()))?; + Ok(guard.keys().cloned().collect()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn sample_checkpoint(id: &str, entries: usize) -> Checkpoint { + Checkpoint::new( + id, + format!("state-{entries}"), + Uuid::new_v4(), + entries, + FoldContext::new(), + 1, + ) + .expect("sample_checkpoint should not fail serialization") + } + + #[test] + fn save_and_load_roundtrip() { + let store: InMemoryCheckpointStore = InMemoryCheckpointStore::new(); + let ckpt = sample_checkpoint("my-index:ckpt-1", 100); + store.save(ckpt).unwrap(); + let loaded = store.load("my-index:ckpt-1").unwrap().unwrap(); + assert_eq!(loaded.state, "state-100"); + assert_eq!(loaded.entries_processed, 100); + } + + #[test] + fn load_missing_returns_none() { + let store: InMemoryCheckpointStore = InMemoryCheckpointStore::new(); + assert!(store.load("nonexistent").unwrap().is_none()); + } + + #[test] + fn load_latest_returns_most_recent() { + let store: InMemoryCheckpointStore = InMemoryCheckpointStore::new(); + + let ckpt1 = sample_checkpoint("idx:ckpt-1", 10); + store.save(ckpt1).unwrap(); + // small sleep so created_at differs + std::thread::sleep(std::time::Duration::from_millis(5)); + let ckpt2 = sample_checkpoint("idx:ckpt-2", 20); + store.save(ckpt2).unwrap(); + std::thread::sleep(std::time::Duration::from_millis(5)); + let ckpt3 = sample_checkpoint("idx:ckpt-3", 30); + store.save(ckpt3).unwrap(); + + let latest = store.load_latest("idx").unwrap().unwrap(); + assert_eq!(latest.entries_processed, 30); + } + + #[test] + fn load_latest_no_match_returns_none() { + let store: InMemoryCheckpointStore = InMemoryCheckpointStore::new(); + store.save(sample_checkpoint("other:ckpt-1", 5)).unwrap(); + assert!(store.load_latest("my-index").unwrap().is_none()); + } + + #[test] + fn load_latest_prefix_isolation() { + let store: InMemoryCheckpointStore = InMemoryCheckpointStore::new(); + store.save(sample_checkpoint("alpha:ckpt-1", 10)).unwrap(); + store.save(sample_checkpoint("beta:ckpt-1", 999)).unwrap(); + + let latest_alpha = store.load_latest("alpha").unwrap().unwrap(); + assert_eq!(latest_alpha.entries_processed, 10); + } + + #[test] + fn checkpoint_fields_accessible() { + let ckpt: Checkpoint = + Checkpoint::new("test:ckpt", 42u32, Uuid::new_v4(), 7, FoldContext::new(), 3).unwrap(); + assert_eq!(ckpt.state, 42); + assert_eq!(ckpt.entries_processed, 7); + assert_eq!(ckpt.fold_version, 3); + } + + // --- Additional tests (F-NEW-8) --- + + #[test] + fn serde_roundtrip() { + let ckpt = sample_checkpoint("serde:test", 42); + let json = serde_json::to_string(&ckpt).expect("serialize"); + let restored: Checkpoint = serde_json::from_str(&json).expect("deserialize"); + assert_eq!(ckpt.id, restored.id); + assert_eq!(ckpt.state, restored.state); + assert_eq!(ckpt.entries_processed, restored.entries_processed); + assert_eq!(ckpt.fold_version, restored.fold_version); + assert_eq!(ckpt.uuid, restored.uuid); + // Hash bytes should survive the roundtrip unchanged. + assert_eq!(ckpt.hash.as_bytes(), restored.hash.as_bytes()); + } + + #[test] + fn delete_existing_succeeds() { + let store: InMemoryCheckpointStore = InMemoryCheckpointStore::new(); + store.save(sample_checkpoint("del:ckpt-1", 1)).unwrap(); + store.delete("del:ckpt-1").unwrap(); + assert!(store.load("del:ckpt-1").unwrap().is_none()); + } + + #[test] + fn delete_nonexistent_returns_not_found() { + let store: InMemoryCheckpointStore = InMemoryCheckpointStore::new(); + let err = store.delete("nope").unwrap_err(); + assert!( + matches!(err, FoldError::CheckpointNotFound(ref id) if id == "nope"), + "expected CheckpointNotFound, got {err:?}" + ); + } + + #[test] + fn list_returns_all_ids() { + let store: InMemoryCheckpointStore = InMemoryCheckpointStore::new(); + store.save(sample_checkpoint("a:ckpt-1", 1)).unwrap(); + store.save(sample_checkpoint("b:ckpt-1", 2)).unwrap(); + store.save(sample_checkpoint("c:ckpt-1", 3)).unwrap(); + let mut ids = store.list().unwrap(); + ids.sort(); + assert_eq!(ids, vec!["a:ckpt-1", "b:ckpt-1", "c:ckpt-1"]); + } + + #[test] + fn list_empty_store() { + let store: InMemoryCheckpointStore = InMemoryCheckpointStore::new(); + assert!(store.list().unwrap().is_empty()); + } + + #[test] + fn save_overwrite_replaces_previous() { + let store: InMemoryCheckpointStore = InMemoryCheckpointStore::new(); + let ckpt1 = sample_checkpoint("overwrite:ckpt-1", 10); + store.save(ckpt1).unwrap(); + + // Save again with the same id but different state. + let ckpt2 = Checkpoint::new( + "overwrite:ckpt-1", + "new-state".to_string(), + Uuid::new_v4(), + 99, + FoldContext::new(), + 2, + ) + .unwrap(); + store.save(ckpt2).unwrap(); + + let loaded = store.load("overwrite:ckpt-1").unwrap().unwrap(); + assert_eq!(loaded.state, "new-state"); + assert_eq!(loaded.entries_processed, 99); + // Only one entry with that id. + let ids = store.list().unwrap(); + assert_eq!(ids.iter().filter(|id| *id == "overwrite:ckpt-1").count(), 1); + } + + #[test] + fn integrity_mismatch_on_corrupted_hash() { + let store: InMemoryCheckpointStore = InMemoryCheckpointStore::new(); + let ckpt = sample_checkpoint("integrity:ckpt-1", 5); + store.save(ckpt).unwrap(); + + // Directly corrupt the stored hash by replacing it with ZERO. + { + let mut guard = store.inner.write().unwrap(); + if let Some(c) = guard.get_mut("integrity:ckpt-1") { + c.hash = Hash32::ZERO; + } + } + + let err = store.load("integrity:ckpt-1").unwrap_err(); + assert!( + matches!(err, FoldError::IntegrityMismatch { .. }), + "expected IntegrityMismatch, got {err:?}" + ); + } + + #[test] + fn concurrent_saves_all_land() { + use std::sync::Arc; + use std::thread; + + let store = Arc::new(InMemoryCheckpointStore::::new()); + let n = 20usize; + let handles: Vec<_> = (0..n) + .map(|i| { + let s = Arc::clone(&store); + thread::spawn(move || { + s.save(sample_checkpoint(&format!("concurrent:ckpt-{i}"), i)) + .unwrap(); + }) + }) + .collect(); + for h in handles { + h.join().expect("thread panicked"); + } + let ids = store.list().unwrap(); + assert_eq!(ids.len(), n, "expected {n} checkpoints, got {}", ids.len()); + } +} diff --git a/crates/khive-fold/src/compose.rs b/crates/khive-fold/src/compose.rs index 8b632374..17b4046a 100644 --- a/crates/khive-fold/src/compose.rs +++ b/crates/khive-fold/src/compose.rs @@ -126,16 +126,18 @@ where impl Fold for FilterFold where + L: Send + Sync, + S: Send + Sync, F: Fold, - P: Fn(&L) -> bool, + P: Fn(&L) -> bool + Send + Sync, { - fn initial(&self, context: &FoldContext) -> S { - self.inner.initial(context) + fn init(&self, context: &FoldContext) -> S { + self.inner.init(context) } - fn step(&self, state: S, entry: &L, context: &FoldContext) -> S { + fn reduce(&self, state: S, entry: &L, context: &FoldContext) -> S { if (self.predicate)(entry) { - self.inner.step(state, entry, context) + self.inner.reduce(state, entry, context) } else { state } @@ -174,16 +176,19 @@ where impl Fold for MapFold where + L1: Send + Sync, + L2: Send + Sync, + S: Send + Sync, F: Fold, - M: Fn(&L1) -> L2, + M: Fn(&L1) -> L2 + Send + Sync, { - fn initial(&self, context: &FoldContext) -> S { - self.inner.initial(context) + fn init(&self, context: &FoldContext) -> S { + self.inner.init(context) } - fn step(&self, state: S, entry: &L1, context: &FoldContext) -> S { + fn reduce(&self, state: S, entry: &L1, context: &FoldContext) -> S { let mapped = (self.mapper)(entry); - self.inner.step(state, &mapped, context) + self.inner.reduce(state, &mapped, context) } fn finalize(&self, state: S, context: &FoldContext) -> S { diff --git a/crates/khive-fold/src/error.rs b/crates/khive-fold/src/error.rs index 92bbade5..78d6de11 100644 --- a/crates/khive-fold/src/error.rs +++ b/crates/khive-fold/src/error.rs @@ -58,6 +58,21 @@ pub enum FoldError { /// Required component not configured. #[error("required component not configured: {0}")] ComponentMissing(String), + + /// Checkpoint integrity check failed: stored hash does not match recomputed hash. + #[error("checkpoint integrity mismatch for '{id}': stored {stored}, computed {computed}")] + IntegrityMismatch { + /// Checkpoint id that failed verification. + id: String, + /// The hash stored in the checkpoint. + stored: String, + /// The hash recomputed from the loaded state. + computed: String, + }, + + /// A checkpoint with the given id was not found. + #[error("checkpoint not found: {0}")] + CheckpointNotFound(String), } #[cfg(test)] diff --git a/crates/khive-fold/src/fold.rs b/crates/khive-fold/src/fold.rs index ca3ec44e..8990de5e 100644 --- a/crates/khive-fold/src/fold.rs +++ b/crates/khive-fold/src/fold.rs @@ -13,14 +13,12 @@ use crate::{FoldContext, FoldOutcome}; /// - S: The derived state type /// /// Folds are deterministic: same entries + same context = same state. -pub trait Fold { +pub trait Fold: Send + Sync { /// Get the initial state before any entries are processed. - fn initial(&self, context: &FoldContext) -> S; + fn init(&self, context: &FoldContext) -> S; /// Process a single entry and return the new state. - /// - /// This is the core step function: state' = step(state, entry, context) - fn step(&self, state: S, entry: &L, context: &FoldContext) -> S; + fn reduce(&self, state: S, entry: &L, context: &FoldContext) -> S; /// Finalize the state after all entries are processed. /// @@ -39,18 +37,15 @@ pub trait Fold { I: IntoIterator, L: 'a, { - let started_at = chrono::Utc::now(); - let mut state = self.initial(context); + let mut state = self.init(context); let mut count = 0; for entry in entries { - state = self.step(state, entry, context); + state = self.reduce(state, entry, context); count += 1; } - state = self.finalize(state, context); - - FoldOutcome::with_timing(state, count, context.clone(), started_at) + FoldOutcome::new(self.finalize(state, context), count) } /// Derive state with a filter. @@ -66,20 +61,17 @@ pub trait Fold { L: 'a, F: Fn(&L) -> bool, { - let started_at = chrono::Utc::now(); - let mut state = self.initial(context); + let mut state = self.init(context); let mut count = 0; for entry in entries { if filter(entry) { - state = self.step(state, entry, context); + state = self.reduce(state, entry, context); count += 1; } } - state = self.finalize(state, context); - - FoldOutcome::with_timing(state, count, context.clone(), started_at) + FoldOutcome::new(self.finalize(state, context), count) } } @@ -107,13 +99,13 @@ where T: Fold + ?Sized, { #[inline] - fn initial(&self, context: &FoldContext) -> S { - (**self).initial(context) + fn init(&self, context: &FoldContext) -> S { + (**self).init(context) } #[inline] - fn step(&self, state: S, entry: &L, context: &FoldContext) -> S { - (**self).step(state, entry, context) + fn reduce(&self, state: S, entry: &L, context: &FoldContext) -> S { + (**self).reduce(state, entry, context) } #[inline] @@ -137,13 +129,13 @@ where T: Fold + ?Sized, { #[inline] - fn initial(&self, context: &FoldContext) -> S { - (**self).initial(context) + fn init(&self, context: &FoldContext) -> S { + (**self).init(context) } #[inline] - fn step(&self, state: S, entry: &L, context: &FoldContext) -> S { - (**self).step(state, entry, context) + fn reduce(&self, state: S, entry: &L, context: &FoldContext) -> S { + (**self).reduce(state, entry, context) } #[inline] @@ -197,17 +189,19 @@ where impl Fold for FnFold where - I: Fn(&FoldContext) -> S, - St: Fn(S, &L, &FoldContext) -> S, - F: Fn(S, &FoldContext) -> S, + L: Send + Sync, + S: Send + Sync, + I: Fn(&FoldContext) -> S + Send + Sync, + St: Fn(S, &L, &FoldContext) -> S + Send + Sync, + F: Fn(S, &FoldContext) -> S + Send + Sync, { #[inline] - fn initial(&self, context: &FoldContext) -> S { + fn init(&self, context: &FoldContext) -> S { (self.initial_fn)(context) } #[inline] - fn step(&self, state: S, entry: &L, context: &FoldContext) -> S { + fn reduce(&self, state: S, entry: &L, context: &FoldContext) -> S { (self.step_fn)(state, entry, context) } @@ -219,9 +213,11 @@ where impl TryFold for FnFold where - I: Fn(&FoldContext) -> S, - St: Fn(S, &L, &FoldContext) -> S, - F: Fn(S, &FoldContext) -> S, + L: Send + Sync, + S: Send + Sync, + I: Fn(&FoldContext) -> S + Send + Sync, + St: Fn(S, &L, &FoldContext) -> S + Send + Sync, + F: Fn(S, &FoldContext) -> S + Send + Sync, { #[inline] fn try_step(&self, state: S, entry: &L, context: &FoldContext) -> Result { @@ -232,8 +228,10 @@ where /// Create a fold from just initial and step functions (no finalize). pub fn fold_fn(initial: I, step: St) -> impl Fold where - I: Fn(&FoldContext) -> S, - St: Fn(S, &L, &FoldContext) -> S, + L: Send + Sync, + S: Send + Sync, + I: Fn(&FoldContext) -> S + Send + Sync, + St: Fn(S, &L, &FoldContext) -> S + Send + Sync, { FnFold::new(initial, step, |s, _| s) } @@ -262,12 +260,12 @@ impl Default for CountFold { impl Fold for CountFold { #[inline] - fn initial(&self, _context: &FoldContext) -> usize { + fn init(&self, _context: &FoldContext) -> usize { 0 } #[inline] - fn step(&self, state: usize, _entry: &L, _context: &FoldContext) -> usize { + fn reduce(&self, state: usize, _entry: &L, _context: &FoldContext) -> usize { state.saturating_add(1) } } @@ -280,7 +278,7 @@ impl TryFold for CountFold { entry: &L, context: &FoldContext, ) -> Result { - Ok(self.step(state, entry, context)) + Ok(self.reduce(state, entry, context)) } } @@ -306,12 +304,12 @@ impl FilterCountFold { impl Fold for FilterCountFold { #[inline] - fn initial(&self, _context: &FoldContext) -> usize { + fn init(&self, _context: &FoldContext) -> usize { 0 } #[inline] - fn step(&self, state: usize, entry: &L, _context: &FoldContext) -> usize { + fn reduce(&self, state: usize, entry: &L, _context: &FoldContext) -> usize { if (self.predicate)(entry) { state.saturating_add(1) } else { @@ -328,7 +326,7 @@ impl TryFold for FilterCountFold { entry: &L, context: &FoldContext, ) -> Result { - Ok(self.step(state, entry, context)) + Ok(self.reduce(state, entry, context)) } } @@ -354,12 +352,12 @@ impl SumI64Fold { impl Fold for SumI64Fold { #[inline] - fn initial(&self, _context: &FoldContext) -> i64 { + fn init(&self, _context: &FoldContext) -> i64 { 0 } #[inline] - fn step(&self, state: i64, entry: &L, _context: &FoldContext) -> i64 { + fn reduce(&self, state: i64, entry: &L, _context: &FoldContext) -> i64 { state.saturating_add((self.project)(entry)) } } @@ -367,7 +365,7 @@ impl Fold for SumI64Fold { impl TryFold for SumI64Fold { #[inline] fn try_step(&self, state: i64, entry: &L, context: &FoldContext) -> Result { - Ok(self.step(state, entry, context)) + Ok(self.reduce(state, entry, context)) } } @@ -393,12 +391,12 @@ impl AnyFold { impl Fold for AnyFold { #[inline] - fn initial(&self, _context: &FoldContext) -> bool { + fn init(&self, _context: &FoldContext) -> bool { false } #[inline] - fn step(&self, state: bool, entry: &L, _context: &FoldContext) -> bool { + fn reduce(&self, state: bool, entry: &L, _context: &FoldContext) -> bool { state || (self.predicate)(entry) } } @@ -406,7 +404,7 @@ impl Fold for AnyFold { impl TryFold for AnyFold { #[inline] fn try_step(&self, state: bool, entry: &L, context: &FoldContext) -> Result { - Ok(self.step(state, entry, context)) + Ok(self.reduce(state, entry, context)) } } @@ -502,16 +500,16 @@ impl CommonFold { ) -> Result { match (self, state) { (Self::Count(inner), CommonFoldState::Count(count)) => { - Ok(CommonFoldState::Count(inner.step(count, entry, context))) + Ok(CommonFoldState::Count(inner.reduce(count, entry, context))) } (Self::FilterCount(inner), CommonFoldState::Count(count)) => { - Ok(CommonFoldState::Count(inner.step(count, entry, context))) + Ok(CommonFoldState::Count(inner.reduce(count, entry, context))) } (Self::SumI64(inner), CommonFoldState::SumI64(sum)) => { - Ok(CommonFoldState::SumI64(inner.step(sum, entry, context))) + Ok(CommonFoldState::SumI64(inner.reduce(sum, entry, context))) } (Self::Any(inner), CommonFoldState::Any(any)) => { - Ok(CommonFoldState::Any(inner.step(any, entry, context))) + Ok(CommonFoldState::Any(inner.reduce(any, entry, context))) } (kind, state) => Err(FoldFailure::StateMismatch { expected: kind.expected_state_kind(), @@ -523,7 +521,7 @@ impl CommonFold { impl Fold for CommonFold { #[inline] - fn initial(&self, _context: &FoldContext) -> CommonFoldState { + fn init(&self, _context: &FoldContext) -> CommonFoldState { match self { Self::Count(_) | Self::FilterCount(_) => CommonFoldState::Count(0), Self::SumI64(_) => CommonFoldState::SumI64(0), @@ -536,7 +534,7 @@ impl Fold for CommonFold { /// Panics if `state` does not match the variant expected by `self`. /// Use [`TryFold::try_step`] to handle the mismatch as an error instead. #[inline] - fn step(&self, state: CommonFoldState, entry: &L, context: &FoldContext) -> CommonFoldState { + fn reduce(&self, state: CommonFoldState, entry: &L, context: &FoldContext) -> CommonFoldState { self.try_step(state, entry, context) .unwrap_or_else(|err| panic!("{err}")) } @@ -615,17 +613,17 @@ mod tests { let entry = 1; let count = CountFold::new(); - assert_eq!(count.step(usize::MAX, &entry, &context), usize::MAX); + assert_eq!(count.reduce(usize::MAX, &entry, &context), usize::MAX); let filtered = FilterCountFold::new(|_: &i32| true); - assert_eq!(filtered.step(usize::MAX, &entry, &context), usize::MAX); + assert_eq!(filtered.reduce(usize::MAX, &entry, &context), usize::MAX); } #[test] fn sum_i64_fold_saturates_on_overflow() { let context = FoldContext::new(); let fold = SumI64Fold::new(|value: &i64| *value); - assert_eq!(fold.step(i64::MAX, &1, &context), i64::MAX); + assert_eq!(fold.reduce(i64::MAX, &1, &context), i64::MAX); } #[test] @@ -649,4 +647,15 @@ mod tests { let result = fold.derive(entries.iter(), &FoldContext::new()); assert!(result.state); } + + #[test] + fn fold_is_deterministic_no_timing() { + // Same inputs must produce equal FoldOutcome (PartialEq holds). + let fold = fold_fn(|_ctx| 0usize, |c, _: &i32, _ctx| c + 1); + let entries = [1, 2, 3]; + let ctx = FoldContext::new(); + let a = fold.derive(entries.iter(), &ctx); + let b = fold.derive(entries.iter(), &ctx); + assert_eq!(a, b); + } } diff --git a/crates/khive-fold/src/lib.rs b/crates/khive-fold/src/lib.rs index 23a6b8d7..8c515200 100644 --- a/crates/khive-fold/src/lib.rs +++ b/crates/khive-fold/src/lib.rs @@ -33,6 +33,10 @@ mod error; mod fold; mod result; +// ── Checkpoint protocol ───────────────────────────────────────────────── + +pub mod checkpoint; + pub use compose::{filter, map, DualFold, FilterFold, MapFold, SequentialFold}; pub use context::{FoldContext, SharedJson}; pub use error::{FoldError, FoldResult, FoldResult as FoldResultType}; @@ -42,6 +46,10 @@ pub use fold::{ }; pub use result::FoldOutcome; +// ── Checkpoint re-exports ──────────────────────────────────────────────── + +pub use checkpoint::{Checkpoint, CheckpointStore, InMemoryCheckpointStore}; + // ── Anchor primitive ──────────────────────────────────────────────────── pub mod anchor; @@ -71,6 +79,36 @@ pub use objective::compose::{ pub use objective::error::{ObjectiveError, ObjectiveResult}; pub use objective::{objective_fn, DeterministicObjective, Objective, ObjectiveContext, Selection}; pub use ordering::{ - canonical_f32, canonical_f64, cmp_asc_score_then_id, cmp_desc_score_then_id, HasId, QuantKey, - Ranked, ScoredEntry, + canonical_f32, canonical_f64, cmp_asc_score_then_id, cmp_desc_score_then_id, HasId, Ranked, + ScoredEntry, }; + +// ── ComposePipeline ───────────────────────────────────────────────────── + +/// Pipeline that scores candidates with an objective then packs to budget via a selector. +pub struct ComposePipeline { + pub anchor: Box, + pub objective: Box>, + pub selector: Box>, +} + +impl ComposePipeline { + /// Score candidates with the objective, then pack under budget with the selector. + pub fn execute( + &self, + _graph: &AnchorGraph, + candidates: Vec>, + budget: usize, + weights: &SelectorWeights, + context: &ObjectiveContext, + ) -> Result, FoldError> { + let scored = candidates + .into_iter() + .map(|mut candidate| { + candidate.score = self.objective.score(&candidate.content, context) as f32; + candidate + }) + .collect(); + self.selector.select(scored, budget, weights) + } +} diff --git a/crates/khive-fold/src/objective/builtin.rs b/crates/khive-fold/src/objective/builtin.rs index 3525f8a0..4d605e73 100644 --- a/crates/khive-fold/src/objective/builtin.rs +++ b/crates/khive-fold/src/objective/builtin.rs @@ -1,6 +1,6 @@ //! Built-in objective functions -use crate::{Objective, ObjectiveContext, ObjectiveError, ObjectiveResult, Selection}; +use crate::{Objective, ObjectiveContext, Selection}; /// Selects candidate with highest score. pub struct MaxScoreObjective @@ -122,13 +122,9 @@ where } } - fn select<'a>( - &self, - candidates: &'a [T], - context: &ObjectiveContext, - ) -> ObjectiveResult> { + fn select<'a>(&self, candidates: &'a [T], context: &ObjectiveContext) -> Vec> { if candidates.is_empty() { - return Err(ObjectiveError::NoCandidates); + return Vec::new(); } let limit = context @@ -138,15 +134,13 @@ where for (i, candidate) in candidates.iter().take(limit).enumerate() { if (self.predicate)(candidate) { - return Ok(Selection::new(candidate, 1.0, i) + return vec![Selection::new(candidate, 1.0, i) .with_considered(i + 1) - .with_passed(1)); + .with_passed(1)]; } } - Err(ObjectiveError::NoMatch( - "No candidate matched predicate".into(), - )) + Vec::new() } fn name(&self) -> &str { @@ -324,6 +318,8 @@ mod tests { let candidates = vec![1, 5, 3, 8, 2]; let selection = objective .select(&candidates, &ObjectiveContext::new()) + .into_iter() + .next() .unwrap(); assert_eq!(*selection.item, 8); @@ -351,6 +347,8 @@ mod tests { let candidates = vec![1, 3, 7, 9, 2]; let selection = objective .select(&candidates, &ObjectiveContext::new()) + .into_iter() + .next() .unwrap(); assert_eq!(*selection.item, 7); @@ -366,7 +364,7 @@ mod tests { let context = ObjectiveContext::new().with_max_candidates(2); let result = objective.select(&candidates, &context); - assert!(matches!(result, Err(ObjectiveError::NoMatch(_)))); + assert!(result.is_empty()); } #[derive(Clone)] @@ -487,7 +485,7 @@ mod tests { let candidates = vec![1, 5, 3]; let result = objective.select(&candidates, &ObjectiveContext::new()); - assert!(matches!(result, Err(ObjectiveError::NoMatch(_)))); + assert!(result.is_empty()); } #[test] @@ -497,6 +495,8 @@ mod tests { let candidates = vec![1, 10, 3, 15]; let selection = objective .select(&candidates, &ObjectiveContext::new()) + .into_iter() + .next() .unwrap(); assert_eq!(*selection.item, 15); diff --git a/crates/khive-fold/src/objective/mod.rs b/crates/khive-fold/src/objective/mod.rs index c4504982..fef2a1a1 100644 --- a/crates/khive-fold/src/objective/mod.rs +++ b/crates/khive-fold/src/objective/mod.rs @@ -16,7 +16,6 @@ pub use traits::{objective_fn, DeterministicObjective, Objective}; mod tests { use super::*; use crate::ordering::HasId; - use crate::ObjectiveError; use uuid::Uuid; #[test] @@ -26,6 +25,8 @@ mod tests { let candidates = vec![1, 5, 3, 8, 2]; let selection = objective .select(&candidates, &ObjectiveContext::new()) + .into_iter() + .next() .unwrap(); assert_eq!(*selection.item, 8); @@ -39,7 +40,11 @@ mod tests { let candidates = vec![1, 5, 3, 8, 2]; let context = ObjectiveContext::new().with_min_score(4.0); - let selection = objective.select(&candidates, &context).unwrap(); + let selection = objective + .select(&candidates, &context) + .into_iter() + .next() + .unwrap(); assert_eq!(*selection.item, 8); assert_eq!(selection.passed, 2); @@ -52,7 +57,7 @@ mod tests { let candidates: Vec = vec![]; let result = objective.select(&candidates, &ObjectiveContext::new()); - assert!(matches!(result, Err(ObjectiveError::NoCandidates))); + assert!(result.is_empty()); } #[test] @@ -63,7 +68,7 @@ mod tests { let context = ObjectiveContext::new().with_min_score(10.0); let result = objective.select(&candidates, &context); - assert!(matches!(result, Err(ObjectiveError::NoMatch(_)))); + assert!(result.is_empty()); } #[test] @@ -94,6 +99,8 @@ mod tests { let candidates = vec![1, 5, 3]; let selection = objective .select(&candidates, &ObjectiveContext::new()) + .into_iter() + .next() .unwrap(); assert_eq!(*selection.item, 3); @@ -116,6 +123,8 @@ mod tests { let candidates = vec![1, 5, 3]; let selection = objective .select(&candidates, &ObjectiveContext::new()) + .into_iter() + .next() .unwrap(); assert_eq!(*selection.item, 3); @@ -129,7 +138,11 @@ mod tests { let candidates = vec![1, 5, 3, 8, 2]; let context = ObjectiveContext::new().with_max_candidates(2); - let selection = objective.select(&candidates, &context).unwrap(); + let selection = objective + .select(&candidates, &context) + .into_iter() + .next() + .unwrap(); assert_eq!(*selection.item, 5); assert_eq!(selection.considered, 2); @@ -272,6 +285,8 @@ mod tests { let candidates = vec![1, 5, 3, 8, 2]; let sel = objective .select(&candidates, &ObjectiveContext::new()) + .into_iter() + .next() .unwrap(); assert_eq!(*sel.item, 8); assert_eq!(sel.precision, 1.0); @@ -295,6 +310,8 @@ mod tests { let candidates = vec![(10.0f64, 0.1f64), (3.0f64, 1.0f64)]; let sel = PrecisionObjective .select(&candidates, &ObjectiveContext::new()) + .into_iter() + .next() .unwrap(); // 3.0 * 1.0 = 3.0 > 10.0 * 0.1 = 1.0 assert_eq!(sel.item.0, 3.0); @@ -303,6 +320,8 @@ mod tests { #[test] fn selection_stores_precision_from_winning_candidate() { + // After F130: select delegates to select_top which scores by effective (score*precision) + // but stores effective in selection.score; precision field defaults to 1.0. struct HalfPrecision; impl Objective for HalfPrecision { fn score(&self, n: &i32, _ctx: &ObjectiveContext) -> f64 { @@ -315,8 +334,13 @@ mod tests { let candidates = vec![1, 2, 3]; let sel = HalfPrecision .select(&candidates, &ObjectiveContext::new()) + .into_iter() + .next() .unwrap(); - assert_eq!(sel.precision, 0.5); + // Best by effective score (3 * 0.5 = 1.5). + assert_eq!(*sel.item, 3); + // select_top stores effective score, not raw score. + assert!((sel.score - 1.5).abs() < 1e-10); } #[test] @@ -334,6 +358,8 @@ mod tests { let candidates = vec![1, 5, 3]; let sel = NanPrecision .select(&candidates, &ObjectiveContext::new()) + .into_iter() + .next() .unwrap(); // NaN precision → treat as 1.0 → raw score ordering → 5 wins. assert_eq!(*sel.item, 5); diff --git a/crates/khive-fold/src/objective/registry.rs b/crates/khive-fold/src/objective/registry.rs index 4ce97815..51647e95 100644 --- a/crates/khive-fold/src/objective/registry.rs +++ b/crates/khive-fold/src/objective/registry.rs @@ -38,13 +38,17 @@ impl RegisteredObjective { self.objective.score(candidate, context) } - /// Select from candidates + /// Select from candidates, returning the best match or an error. pub fn select<'a>( &self, candidates: &'a [T], context: &ObjectiveContext, ) -> ObjectiveResult> { - self.objective.select(candidates, context) + self.objective + .select(candidates, context) + .into_iter() + .next() + .ok_or_else(|| ObjectiveError::NoMatch("No candidate selected".into())) } } @@ -166,7 +170,7 @@ impl ObjectiveRegistry { Ok(objective.score(candidate, context)) } - /// Select using a named objective + /// Select using a named objective, returning the best match or an error. pub fn select<'a>( &self, name: &str, @@ -177,7 +181,7 @@ impl ObjectiveRegistry { objective.select(candidates, context) } - /// Select using the default objective + /// Select using the default objective, returning the best match or an error. pub fn select_default<'a>( &self, candidates: &'a [T], diff --git a/crates/khive-fold/src/objective/traits.rs b/crates/khive-fold/src/objective/traits.rs index 18960796..7ad6bfc8 100644 --- a/crates/khive-fold/src/objective/traits.rs +++ b/crates/khive-fold/src/objective/traits.rs @@ -168,67 +168,17 @@ pub trait Objective: Send + Sync { scored } - /// Select the best candidate from a list. + /// Select candidates from a list, returning all that pass in score-descending order. /// - /// Ranking uses `score * precision` so that unreliable high-scores do not - /// dominate over lower-scoring but precise candidates (ADR-059). When all - /// precisions are 1.0 (the default), ranking is identical to raw score order. - fn select<'a>( - &self, - candidates: &'a [T], - context: &ObjectiveContext, - ) -> ObjectiveResult> { + /// Returns an empty vector when no candidates pass the threshold or the input is empty. + /// Delegates to `select_top` using the full considered limit so callers get a ranked + /// list rather than a single item. Use `.into_iter().next()` for single-best access. + fn select<'a>(&self, candidates: &'a [T], context: &ObjectiveContext) -> Vec> { if candidates.is_empty() { - return Err(ObjectiveError::NoCandidates); - } - - let considered_limit = considered_limit(candidates.len(), context); - - let mut considered = 0usize; - let mut passed = 0usize; - let mut has_best = false; - let mut best_index = 0usize; - let mut best_score = 0.0f64; - let mut best_precision = 1.0f64; - let mut best_det = DeterministicScore::ZERO; - - for (index, candidate) in candidates.iter().take(considered_limit).enumerate() { - considered += 1; - - let score = self.score(candidate, context); - if !self.passes_score(score, context) { - continue; - } - - passed += 1; - - let precision = self.precision(candidate, context); - let effective = score - * if precision.is_finite() { - precision - } else { - 1.0 - }; - let det = DeterministicScore::from_f64(effective); - if !has_best || det > best_det { - has_best = true; - best_index = index; - best_score = score; - best_precision = precision; - best_det = det; - } - } - - if has_best { - Ok( - Selection::new(&candidates[best_index], best_score, best_index) - .with_precision(best_precision) - .with_considered(considered) - .with_passed(passed), - ) - } else { - Err(ObjectiveError::NoMatch("No candidate passed".into())) + return Vec::new(); } + let n = considered_limit(candidates.len(), context); + self.select_top(candidates, n, context) } /// Select the top N candidates. @@ -245,10 +195,6 @@ pub trait Objective: Send + Sync { return Vec::new(); } - if n == 1 { - return self.select(candidates, context).ok().into_iter().collect(); - } - let considered_limit = considered_limit(candidates.len(), context); let mut considered = 0usize; diff --git a/crates/khive-fold/src/ordering/mod.rs b/crates/khive-fold/src/ordering/mod.rs index 8fb9f4c5..2d07f63f 100644 --- a/crates/khive-fold/src/ordering/mod.rs +++ b/crates/khive-fold/src/ordering/mod.rs @@ -9,7 +9,6 @@ //! - [`canonical_f64`]/[`canonical_f32`]: Normalize floating-point values for comparison //! - [`cmp_desc_score_then_id`]: Deterministic comparator (f64 + Uuid) with UUID tie-breaking //! - [`ScoredEntry`]: Ord-implementing wrapper for heap operations, backed by [`DeterministicScore`] -//! - [`QuantKey`]: Re-exported from `khive-score` — 8-byte packed sort key (i32 score + u32 ID prefix) //! - [`DeterministicScore`]: Re-exported from `khive-score` — i64 fixed-point score //! - [`Ranked`]: Re-exported from `khive-score` — score + generic `Ord` ID pair for heaps @@ -24,7 +23,6 @@ pub use has_id::HasId; pub use scored_entry::ScoredEntry; // Re-exports from khive-score -pub use khive_score::QuantKey; pub use khive_score::{cmp_asc_then_id, cmp_desc_then_id, DeterministicScore, Ranked}; #[cfg(test)] @@ -301,59 +299,6 @@ mod tests { assert!(set.contains(&entry2)); } - // ------------------------------------------------------------------------ - // QuantKey Tests (score's QuantKey: i32+u32 packed, NaN→0) - // ------------------------------------------------------------------------ - - #[test] - fn test_quant_key_precision() { - let a = QuantKey::new(0.123456, 1); - let b = QuantKey::new(0.123457, 2); - assert_ne!( - a.quantized_score(), - b.quantized_score(), - "1e-6 difference should be distinguishable" - ); - } - - #[test] - fn test_quant_key_rounding() { - let a = QuantKey::new(0.12345642, 1); - let b = QuantKey::new(0.12345647, 2); - assert_eq!( - a.quantized_score(), - b.quantized_score(), - "Sub-1e-6 differences should round same" - ); - } - - #[test] - fn test_quant_key_nan_maps_to_zero() { - let nan = QuantKey::new(f32::NAN, 1); - let zero = QuantKey::new(0.0, 1); - assert_eq!( - nan.quantized_score(), - zero.quantized_score(), - "NaN maps to 0 in score's QuantKey" - ); - } - - #[test] - fn test_quant_key_heap_order() { - use std::collections::BinaryHeap; - - let mut heap: BinaryHeap = BinaryHeap::new(); - heap.push(QuantKey::new(0.95, 3)); - heap.push(QuantKey::new(0.95, 1)); - heap.push(QuantKey::new(0.95, 2)); - heap.push(QuantKey::new(0.87, 4)); - - assert_eq!(heap.pop().unwrap().id_prefix(), 1); - assert_eq!(heap.pop().unwrap().id_prefix(), 2); - assert_eq!(heap.pop().unwrap().id_prefix(), 3); - assert_eq!(heap.pop().unwrap().id_prefix(), 4); - } - // ------------------------------------------------------------------------ // DeterministicScore Integration Tests // ------------------------------------------------------------------------ diff --git a/crates/khive-fold/src/result.rs b/crates/khive-fold/src/result.rs index ed36fde6..cd025d56 100644 --- a/crates/khive-fold/src/result.rs +++ b/crates/khive-fold/src/result.rs @@ -1,117 +1,35 @@ //! Fold outcome type -use chrono::{DateTime, Utc}; -use serde::{Deserialize, Serialize}; - -use crate::FoldContext; - /// Outcome of a fold operation. /// -/// Contains the derived state along with metadata about the fold execution. -#[derive(Debug, Clone, Serialize, Deserialize)] +/// Deterministic: contains only derived state and entry count. No wall-clock timing. +#[derive(Debug, Clone, PartialEq, Eq)] pub struct FoldOutcome { - /// The derived state + /// The derived state. pub state: S, - /// Number of entries processed + /// Number of entries processed. pub entries_processed: usize, - - /// When the fold started - pub started_at: DateTime, - - /// When the fold completed - pub completed_at: DateTime, - - /// Context used for the fold - pub context: FoldContext, - - /// Optional metadata - #[serde(default)] - pub metadata: serde_json::Value, } impl FoldOutcome { - /// Create a new fold result with identical start and completion timestamps. - pub fn new(state: S, entries_processed: usize, context: FoldContext) -> Self { - let now = Utc::now(); - Self { - state, - entries_processed, - started_at: now, - completed_at: now, - context, - metadata: serde_json::Value::Null, - } - } - - /// Create with timing information. - pub fn with_timing( - state: S, - entries_processed: usize, - context: FoldContext, - started_at: DateTime, - ) -> Self { - Self { - state, - entries_processed, - started_at, - completed_at: Utc::now(), - context, - metadata: serde_json::Value::Null, - } - } - - /// Create with timing information derived from a monotonic elapsed duration. - /// - /// Avoids a second `Utc::now()` call by computing `completed_at` from - /// `started_at + elapsed`. - pub fn with_elapsed( - state: S, - entries_processed: usize, - context: FoldContext, - started_at: DateTime, - elapsed: std::time::Duration, - ) -> Self { - let completed_at = started_at - + chrono::Duration::from_std(elapsed).unwrap_or_else(|_| chrono::Duration::zero()); - + /// Create a new fold outcome. + pub fn new(state: S, entries_processed: usize) -> Self { Self { state, entries_processed, - started_at, - completed_at, - context, - metadata: serde_json::Value::Null, } } - /// Set metadata. - pub fn with_metadata(mut self, metadata: serde_json::Value) -> Self { - self.metadata = metadata; - self - } - - /// Get duration of the fold. - pub fn duration(&self) -> chrono::Duration { - self.completed_at - self.started_at - } - /// Map the state to a different type. pub fn map T>(self, f: F) -> FoldOutcome { - FoldOutcome { - state: f(self.state), - entries_processed: self.entries_processed, - started_at: self.started_at, - completed_at: self.completed_at, - context: self.context, - metadata: self.metadata, - } + FoldOutcome::new(f(self.state), self.entries_processed) } } impl Default for FoldOutcome { fn default() -> Self { - Self::new(S::default(), 0, FoldContext::default()) + Self::new(S::default(), 0) } } @@ -121,64 +39,30 @@ mod tests { #[test] fn test_fold_outcome_creation() { - let result = FoldOutcome::new(42, 10, FoldContext::new()); + let result = FoldOutcome::new(42, 10); assert_eq!(result.state, 42); assert_eq!(result.entries_processed, 10); } #[test] fn test_fold_outcome_map() { - let result = FoldOutcome::new(42, 10, FoldContext::new()); + let result = FoldOutcome::new(42, 10); let mapped = result.map(|x| x.to_string()); assert_eq!(mapped.state, "42"); assert_eq!(mapped.entries_processed, 10); } #[test] - fn test_fold_outcome_with_elapsed() { - let started_at = Utc::now(); - let outcome = FoldOutcome::with_elapsed( - 7usize, - 2, - FoldContext::new(), - started_at, - std::time::Duration::from_millis(5), - ); - assert!(outcome.completed_at >= outcome.started_at); - } - - #[test] - fn test_fold_outcome_with_elapsed_exact_arithmetic() { - let started_at = Utc::now(); - let elapsed = std::time::Duration::from_millis(123); - let outcome = - FoldOutcome::with_elapsed("state", 5, FoldContext::new(), started_at, elapsed); - let expected_completed = started_at + chrono::Duration::from_std(elapsed).unwrap(); - assert_eq!(outcome.completed_at, expected_completed); - assert_eq!(outcome.started_at, started_at); - } - - #[test] - fn test_fold_outcome_with_elapsed_zero_duration() { - let started_at = Utc::now(); - let outcome = FoldOutcome::with_elapsed( - 0u32, - 0, - FoldContext::new(), - started_at, - std::time::Duration::ZERO, - ); - assert_eq!(outcome.completed_at, outcome.started_at); + fn deterministic_no_timing_fields() { + let a = FoldOutcome::new(7usize, 3); + let b = FoldOutcome::new(7usize, 3); + assert_eq!(a, b); } #[test] - fn test_fold_outcome_with_elapsed_large_duration() { - let started_at = Utc::now(); - let elapsed = std::time::Duration::from_secs(3600); - let outcome = - FoldOutcome::with_elapsed(42u64, 100, FoldContext::new(), started_at, elapsed); - let expected = started_at + chrono::Duration::from_std(elapsed).unwrap(); - assert_eq!(outcome.completed_at, expected); - assert_eq!(outcome.state, 42u64); + fn default_is_zero_state_zero_count() { + let d = FoldOutcome::::default(); + assert_eq!(d.state, 0); + assert_eq!(d.entries_processed, 0); } } diff --git a/crates/khive-fold/src/selector.rs b/crates/khive-fold/src/selector.rs index 08b7a1b8..36a762c4 100644 --- a/crates/khive-fold/src/selector.rs +++ b/crates/khive-fold/src/selector.rs @@ -64,7 +64,7 @@ pub struct SelectorWeights { /// /// An implementation collapses N inputs into a subset that fits a budget, /// using weights and an optional query for relevance context. -pub trait Selector { +pub trait Selector: Send + Sync { fn select( &self, inputs: Vec>, diff --git a/crates/khive-fusion/Cargo.toml b/crates/khive-fusion/Cargo.toml index 6ccc1856..23dae00f 100644 --- a/crates/khive-fusion/Cargo.toml +++ b/crates/khive-fusion/Cargo.toml @@ -11,5 +11,5 @@ categories.workspace = true description = "Rank fusion strategies (RRF, Weighted, Union) with deterministic scoring" [dependencies] -khive-score = { version = "0.2.0", path = "../khive-score" } +khive-score = { version = "0.2.1", path = "../khive-score" } serde = { workspace = true } diff --git a/crates/khive-fusion/src/rrf.rs b/crates/khive-fusion/src/rrf.rs index 377a9151..cb268f38 100644 --- a/crates/khive-fusion/src/rrf.rs +++ b/crates/khive-fusion/src/rrf.rs @@ -9,7 +9,7 @@ //! - Sum is permutation invariant (order-independent) //! - Ties broken by ID for deterministic cross-platform ordering -use khive_score::DeterministicScore; +use khive_score::{rrf_score, DeterministicScore}; use std::cmp::Ordering; use std::collections::HashMap; use std::hash::Hash; @@ -79,26 +79,21 @@ pub fn reciprocal_rank_fusion( // Estimate capacity as sum of all source lengths (upper bound on unique IDs) let estimated_capacity: usize = sources.iter().map(|s| s.len()).sum(); - let mut combined: HashMap = HashMap::with_capacity(estimated_capacity); + let mut combined: HashMap = HashMap::with_capacity(estimated_capacity); for results in sources { for (rank_0_indexed, (id, _score)) in results.into_iter().enumerate() { // rank is 1-indexed per ADR-002 let rank_1_indexed = rank_0_indexed + 1; - let rrf_contribution = 1.0 / (k + rank_1_indexed) as f64; - - *combined.entry(id).or_insert(0.0) += rrf_contribution; + let contribution = rrf_score(rank_1_indexed, k); + let entry = combined.entry(id).or_insert(DeterministicScore::ZERO); + *entry = *entry + contribution; } } - // Convert to DeterministicScore and sort descending - // Permutation invariant: reordering sources yields same totals. - // The sum of contributions is permutation-invariant: reordering sources - // produces the same total score for each document. - let mut fused: Vec<(Id, DeterministicScore)> = combined - .into_iter() - .map(|(id, score)| (id, DeterministicScore::from_f64(score))) - .collect(); + // Sort descending by fixed-point score; permutation-invariant since DeterministicScore + // addition is order-independent (i128 accumulation in Add impl). + let mut fused: Vec<(Id, DeterministicScore)> = combined.into_iter().collect(); // Sort by score descending, then by ID ascending for deterministic tie-breaking // This ensures cross-platform consistency when scores are equal diff --git a/crates/khive-fusion/src/weighted.rs b/crates/khive-fusion/src/weighted.rs index 6ebc9911..949da0a5 100644 --- a/crates/khive-fusion/src/weighted.rs +++ b/crates/khive-fusion/src/weighted.rs @@ -30,7 +30,7 @@ //! // result1 == result2 //! ``` -use khive_score::DeterministicScore; +use khive_score::{weighted_sum, DeterministicScore}; use std::cmp::Ordering; use std::collections::HashMap; use std::hash::Hash; @@ -39,32 +39,29 @@ use std::hash::Hash; /// /// When all scores are equal (or the source has one element) every entry /// receives 1.0 so it still contributes to the weighted combination. +const SCORE_SCALE: i128 = 4_294_967_296; // 2^32 — represents 1.0 in DeterministicScore + fn min_max_normalize_source( source: Vec<(Id, DeterministicScore)>, ) -> Vec<(Id, DeterministicScore)> { if source.is_empty() { return source; } - let min = source - .iter() - .map(|(_, s)| s.to_f64()) - .fold(f64::INFINITY, f64::min); - let max = source - .iter() - .map(|(_, s)| s.to_f64()) - .fold(f64::NEG_INFINITY, f64::max); - let span = max - min; - if span <= f64::EPSILON { + let min = source.iter().map(|(_, s)| s.to_raw()).min().unwrap(); + let max = source.iter().map(|(_, s)| s.to_raw()).max().unwrap(); + let span = (max as i128) - (min as i128); + if span <= 0 { return source .into_iter() - .map(|(id, _)| (id, DeterministicScore::from_f64(1.0))) + .map(|(id, _)| (id, DeterministicScore::from_raw(SCORE_SCALE as i64))) .collect(); } source .into_iter() .map(|(id, s)| { - let normalized = (s.to_f64() - min) / span; - (id, DeterministicScore::from_f64(normalized)) + let numerator = (s.to_raw() as i128 - min as i128) * SCORE_SCALE; + let normalized_raw = (numerator / span).clamp(0, i64::MAX as i128); + (id, DeterministicScore::from_raw(normalized_raw as i64)) }) .collect() } @@ -128,7 +125,7 @@ pub fn weighted_fusion( // Estimate capacity let estimated_capacity: usize = sources.iter().map(|s| s.len()).sum(); - let mut combined: HashMap = HashMap::with_capacity(estimated_capacity); + let mut combined: HashMap = HashMap::with_capacity(estimated_capacity); for (source_idx, results) in sources.into_iter().enumerate() { // Sources beyond the weights array get weight 0.0 (silently ignored). @@ -139,15 +136,17 @@ pub fn weighted_fusion( // to their configured weights (#2496/#2639). let norm_results = min_max_normalize_source(results); for (id, score) in norm_results { - *combined.entry(id).or_insert(0.0) += score.to_f64() * weight; + // weighted_sum converts weight to DeterministicScore internally and + // accumulates in i128 — no float arithmetic in the hot path. + let w = weighted_sum(&[score], &[weight]) + .expect("single score and weight have matching lengths"); + let entry = combined.entry(id).or_insert(DeterministicScore::ZERO); + *entry = *entry + w; } } - // Convert and sort by score descending, then by ID ascending for determinism - let mut fused: Vec<(Id, DeterministicScore)> = combined - .into_iter() - .map(|(id, score)| (id, DeterministicScore::from_f64(score))) - .collect(); + // Sort by score descending, then by ID ascending for deterministic tie-breaking. + let mut fused: Vec<(Id, DeterministicScore)> = combined.into_iter().collect(); fused.sort_by( |(id_a, score_a), (id_b, score_b)| match score_b.cmp(score_a) { diff --git a/crates/khive-gate-rego/Cargo.toml b/crates/khive-gate-rego/Cargo.toml index 32349cbf..53402e22 100644 --- a/crates/khive-gate-rego/Cargo.toml +++ b/crates/khive-gate-rego/Cargo.toml @@ -11,11 +11,11 @@ categories.workspace = true description = "Rego (Open Policy Agent) backend for khive-gate, powered by regorus." [dependencies] -khive-gate = { version = "0.2.0", path = "../khive-gate" } +khive-gate = { version = "0.2.1", path = "../khive-gate" } serde_json = { workspace = true } tracing = { workspace = true } regorus = "0.10" [dev-dependencies] -khive-types = { version = "0.2.0", path = "../khive-types", features = ["serde"] } +khive-types = { version = "0.2.1", path = "../khive-types", features = ["serde"] } diff --git a/crates/khive-gate-rego/src/lib.rs b/crates/khive-gate-rego/src/lib.rs index 5ff866b5..f4583a01 100644 --- a/crates/khive-gate-rego/src/lib.rs +++ b/crates/khive-gate-rego/src/lib.rs @@ -56,7 +56,7 @@ //! let gate: GateRef = Arc::new(RegoGate::from_policy_str(policy).unwrap()); //! let req = GateRequest::new( //! ActorRef::anonymous(), -//! Namespace::default_ns(), +//! Namespace::local(), //! "search", //! json!({"query": "LoRA"}), //! ); diff --git a/crates/khive-gate-rego/tests/integration.rs b/crates/khive-gate-rego/tests/integration.rs index 30800868..8cc208f1 100644 --- a/crates/khive-gate-rego/tests/integration.rs +++ b/crates/khive-gate-rego/tests/integration.rs @@ -13,12 +13,7 @@ fn fixture(name: &str) -> PathBuf { } fn request(verb: &str) -> GateRequest { - GateRequest::new( - ActorRef::anonymous(), - Namespace::default_ns(), - verb, - json!({}), - ) + GateRequest::new(ActorRef::anonymous(), Namespace::local(), verb, json!({})) } #[test] @@ -79,7 +74,7 @@ fn namespace_scoped_policy_emits_audit_obligation() { let mut req = GateRequest::new( ActorRef::new("user", "ocean"), - Namespace::default_ns(), + Namespace::local(), "search", json!({}), ); diff --git a/crates/khive-gate/Cargo.toml b/crates/khive-gate/Cargo.toml index 21d7f829..733d95ae 100644 --- a/crates/khive-gate/Cargo.toml +++ b/crates/khive-gate/Cargo.toml @@ -11,7 +11,7 @@ categories.workspace = true description = "Pluggable authorization gate trait + default AllowAllGate impl for khive verb dispatch." [dependencies] -khive-types = { version = "0.2.0", path = "../khive-types", features = ["serde"] } +khive-types = { version = "0.2.1", path = "../khive-types", features = ["serde"] } serde = { workspace = true } serde_json = { workspace = true } thiserror = { workspace = true } diff --git a/crates/khive-gate/src/lib.rs b/crates/khive-gate/src/lib.rs index 570fff50..51d0e9fb 100644 --- a/crates/khive-gate/src/lib.rs +++ b/crates/khive-gate/src/lib.rs @@ -16,7 +16,7 @@ //! let gate: GateRef = Arc::new(AllowAllGate); //! let req = GateRequest::new( //! ActorRef::anonymous(), -//! Namespace::default_ns(), +//! Namespace::local(), //! "search", //! json!({"query": "LoRA"}), //! ); @@ -307,7 +307,7 @@ mod tests { fn sample_request() -> GateRequest { GateRequest::new( ActorRef::anonymous(), - Namespace::default_ns(), + Namespace::local(), "search", json!({"query": "LoRA"}), ) @@ -444,7 +444,7 @@ mod tests { fn sample_req_with_session() -> GateRequest { GateRequest::new( ActorRef::new("user", "ocean"), - Namespace::default_ns(), + Namespace::local(), "create", json!({"kind": "concept"}), ) diff --git a/crates/khive-hnsw/Cargo.toml b/crates/khive-hnsw/Cargo.toml index d3edc030..ad0dd259 100644 --- a/crates/khive-hnsw/Cargo.toml +++ b/crates/khive-hnsw/Cargo.toml @@ -11,8 +11,9 @@ categories.workspace = true description = "HNSW (Hierarchical Navigable Small World) vector index with INT8 quantized two-phase search — formally verified in Lean4" [dependencies] -khive-score = { version = "0.2.0", path = "../khive-score" } -khive-types = { version = "0.2.0", path = "../khive-types" } +khive-score = { version = "0.2.1", path = "../khive-score" } +khive-types = { version = "0.2.1", path = "../khive-types" } +khive-fold = { version = "0.2.1", path = "../khive-fold", optional = true } lattice-embed = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } @@ -24,7 +25,9 @@ rayon = "1.10" ulid = "1.1" [dev-dependencies] +blake3 = "1" proptest = "1" +uuid = { workspace = true } [features] -checkpoint = [] +checkpoint = ["dep:khive-fold"] diff --git a/crates/khive-hnsw/src/checkpoint/integration_tests.rs b/crates/khive-hnsw/src/checkpoint/integration_tests.rs index 666382aa..a20ca83a 100644 --- a/crates/khive-hnsw/src/checkpoint/integration_tests.rs +++ b/crates/khive-hnsw/src/checkpoint/integration_tests.rs @@ -1,12 +1,7 @@ use super::*; use khive_fold::{Checkpoint, CheckpointStore, FoldContext, InMemoryCheckpointStore}; -use khive_types::Hash32; use uuid::Uuid; -fn test_hash() -> Hash32 { - Hash32::from_bytes(*blake3::hash(b"hnsw checkpoint test").as_bytes()) -} - fn make_id(seed: u8) -> NodeId { NodeId::new([seed; 16]) } @@ -60,11 +55,11 @@ fn create_hnsw_checkpoint() { "hnsw_test:ckpt-1", snap, Uuid::new_v4(), - test_hash(), 100, FoldContext::new(), 1, - ); + ) + .expect("Checkpoint::new"); assert_eq!(checkpoint.state.total_nodes, 1); assert_eq!(checkpoint.state.live_nodes, 1); @@ -79,11 +74,11 @@ fn create_hnsw_checkpoint_with_tombstones() { "hnsw_test:ckpt-1", snap, Uuid::new_v4(), - test_hash(), 100, FoldContext::new(), 1, - ); + ) + .expect("Checkpoint::new"); assert_eq!(checkpoint.state.total_nodes, 2); assert_eq!(checkpoint.state.live_nodes, 1); @@ -100,13 +95,13 @@ fn store_and_load_hnsw_checkpoint() { "hnsw_idx:ckpt-1", snap, Uuid::new_v4(), - test_hash(), 50, FoldContext::new(), 1, - ); + ) + .expect("Checkpoint::new"); - store.save(&checkpoint).expect("save"); + store.save(checkpoint).expect("save"); let loaded = store .load("hnsw_idx:ckpt-1") @@ -129,13 +124,13 @@ fn store_and_load_checkpoint_with_tombstones() { "hnsw_idx:ckpt-tomb", snap, Uuid::new_v4(), - test_hash(), 50, FoldContext::new(), 1, - ); + ) + .expect("Checkpoint::new"); - store.save(&checkpoint).expect("save"); + store.save(checkpoint).expect("save"); let loaded = store .load("hnsw_idx:ckpt-tomb") @@ -161,12 +156,12 @@ fn load_latest_hnsw_checkpoint() { format!("hnsw_idx:ckpt-{i}"), snap, Uuid::new_v4(), - test_hash(), (i + 1) * 10, FoldContext::new(), 1, - ); - store.save(&checkpoint).expect("save"); + ) + .expect("Checkpoint::new"); + store.save(checkpoint).expect("save"); std::thread::sleep(std::time::Duration::from_millis(10)); } diff --git a/crates/khive-hnsw/src/distance.rs b/crates/khive-hnsw/src/distance.rs index 9f225736..77242f8d 100644 --- a/crates/khive-hnsw/src/distance.rs +++ b/crates/khive-hnsw/src/distance.rs @@ -3,7 +3,7 @@ //! # Formal Verification //! //! This implementation corresponds to the formal proofs in -//! `proofs/Lion/Retrieval/Distance.lean`. Key theorems: +//! `proofs/Retrieval/Distance.lean` (ADR-030 §Phase 2). Key theorems: //! //! ## Metric Axioms (Euclidean) //! - `euclidean_nonneg`: d(x,y) ≥ 0 @@ -24,6 +24,7 @@ //! - `similarity_bounded`: 0 ≤ sim ≤ 1 for d ≥ 0 use super::config::DistanceMetric; +use khive_score::DeterministicScore; /// Compute cosine distance from pre-computed dot product and norms. /// @@ -67,10 +68,10 @@ pub fn compute_distance( DistanceMetric::Cosine => { // ADR-002: khive-embed is the SIMD foundation layer // - // **PROOF CORRESPONDENCE**: Lion.Retrieval.Cosine.cosine_sim_bounded + // **PROOF CORRESPONDENCE**: khive.Retrieval.Cosine.cosine_sim_bounded // Cosine similarity is bounded: -1 <= cos(x,y) <= 1 for unit vectors // - // **PROOF CORRESPONDENCE**: Lion.Retrieval.Cosine.cauchy_schwarz + // **PROOF CORRESPONDENCE**: khive.Retrieval.Cosine.cauchy_schwarz // Cauchy-Schwarz inequality: || <= ||x|| * ||y|| let dot = lattice_embed::simd::dot_product(a, b); cosine_distance_from_parts(dot, a_norm, b_norm) @@ -83,13 +84,13 @@ pub fn compute_distance( DistanceMetric::L2 => { // ADR-002: lattice-embed is the SIMD foundation layer // - // **PROOF CORRESPONDENCE**: Lion.Retrieval.Distance.euclidean_nonneg + // **PROOF CORRESPONDENCE**: khive.Retrieval.Distance.euclidean_nonneg // Euclidean distance is non-negative: d(x,y) >= 0 // - // **PROOF CORRESPONDENCE**: Lion.Retrieval.Distance.euclidean_symm + // **PROOF CORRESPONDENCE**: khive.Retrieval.Distance.euclidean_symm // Euclidean distance is symmetric: d(x,y) = d(y,x) // - // **PROOF CORRESPONDENCE**: Lion.Retrieval.Distance.euclidean_triangle + // **PROOF CORRESPONDENCE**: khive.Retrieval.Distance.euclidean_triangle // Triangle inequality: d(x,z) <= d(x,y) + d(y,z) lattice_embed::simd::euclidean_distance(a, b) } @@ -124,20 +125,24 @@ pub(crate) fn compute_ordering_distance( } } -/// Convert distance back to similarity score (higher = more similar). +/// Convert distance to a `DeterministicScore` (higher score = more similar). /// -/// **PROOF CORRESPONDENCE**: Lion.Retrieval.Distance.similarity_mono +/// Replaces the former `distance_to_similarity -> f32` at the HNSW output boundary +/// so that score arithmetic stays in fixed-point throughout the result pipeline. +/// +/// **PROOF CORRESPONDENCE**: khive.Retrieval.Distance.similarity_mono /// Similarity conversion is monotonically decreasing in distance: /// d1 < d2 implies sim(d1) > sim(d2) #[inline] -pub fn distance_to_similarity(dist: f32, metric: DistanceMetric) -> f32 { - match metric { - DistanceMetric::Cosine => 1.0 - dist, - DistanceMetric::Dot => -dist, - DistanceMetric::L2 => 1.0 / (1.0 + dist), - // Fall back to cosine similarity for future variants. - _ => 1.0 - dist, - } +pub(crate) fn score_from_distance(dist: f32, metric: DistanceMetric) -> DeterministicScore { + let d = if dist.is_nan() { 0.0 } else { dist } as f64; + let similarity = match metric { + DistanceMetric::Cosine => 1.0 - d, + DistanceMetric::Dot => -d, + DistanceMetric::L2 => 1.0 / (1.0 + d.max(0.0)), + _ => 1.0 - d, + }; + DeterministicScore::from_f64(similarity) } /// Ordered wrapper for f32 to enable use in BinaryHeap. @@ -269,15 +274,21 @@ mod tests { } #[test] - fn test_distance_to_similarity() { + fn test_score_from_distance() { + // f32 input loses precision on widening to f64; use 1e-6 tolerance. // Cosine: similarity = 1 - distance - assert!((distance_to_similarity(0.2, DistanceMetric::Cosine) - 0.8).abs() < 0.001); + assert!((score_from_distance(0.2, DistanceMetric::Cosine).to_f64() - 0.8).abs() < 1e-6); // Dot: similarity = -distance - assert!((distance_to_similarity(-5.0, DistanceMetric::Dot) - 5.0).abs() < 0.001); + assert!((score_from_distance(-5.0, DistanceMetric::Dot).to_f64() - 5.0).abs() < 1e-6); // Euclidean: similarity = 1/(1+distance) - assert!((distance_to_similarity(1.0, DistanceMetric::L2) - 0.5).abs() < 0.001); + assert!((score_from_distance(1.0, DistanceMetric::L2).to_f64() - 0.5).abs() < 1e-6); + + // NaN input maps to 0 distance, then cosine gives 1.0 + assert!( + (score_from_distance(f32::NAN, DistanceMetric::Cosine).to_f64() - 1.0).abs() < 1e-6 + ); } #[test] diff --git a/crates/khive-hnsw/src/index/insert.rs b/crates/khive-hnsw/src/index/insert.rs index 8c0766c8..261df54d 100644 --- a/crates/khive-hnsw/src/index/insert.rs +++ b/crates/khive-hnsw/src/index/insert.rs @@ -202,10 +202,10 @@ impl HnswIndex { /// /// Uses seeded RNG if `config.seed` was set for reproducible builds. /// - /// **PROOF CORRESPONDENCE**: Lion.Retrieval.HNSW.level_prob_sums_to_one + /// **PROOF CORRESPONDENCE**: `khive.Retrieval.HNSW.level_prob_sums_to_one` /// Level probabilities form a valid distribution: sum_{l=0}^{inf} P(level=l) = 1 /// - /// **PROOF CORRESPONDENCE**: Lion.Retrieval.HNSW.level_survival_decreasing + /// **PROOF CORRESPONDENCE**: `khive.Retrieval.HNSW.level_survival_decreasing` /// Survival probability decreases exponentially: P(level >= l) = (1/M)^l pub(super) fn random_level(&mut self) -> usize { let r: f64 = self.rng.gen::().max(f64::MIN_POSITIVE); diff --git a/crates/khive-hnsw/src/index/search.rs b/crates/khive-hnsw/src/index/search.rs index 0176435f..f2c52a26 100644 --- a/crates/khive-hnsw/src/index/search.rs +++ b/crates/khive-hnsw/src/index/search.rs @@ -5,7 +5,7 @@ use khive_score::DeterministicScore; use super::HnswIndex; use crate::config::DistanceMetric; -use crate::distance::{cosine_distance_from_parts, distance_to_similarity, OrderedF32}; +use crate::distance::{cosine_distance_from_parts, score_from_distance, OrderedF32}; use crate::error::{Result, RetrievalError}; use crate::metrics::{self, MetricEvent, MetricValue}; use crate::search_context::HnswSearchContext; @@ -150,7 +150,7 @@ impl HnswIndex { /// Emits `hnsw.search.duration_ms`, `hnsw.search.count`, and /// `hnsw.search.results` metrics when a sink is attached. /// - /// **PROOF CORRESPONDENCE**: Lion.Retrieval.HNSW.search_complexity_log + /// **PROOF CORRESPONDENCE**: `khive.Retrieval.HNSW.search_complexity_log` /// Search complexity is O(ef * log_M(N)) where: /// - ef is the search expansion factor /// - M is the number of neighbors per node @@ -350,10 +350,9 @@ impl HnswIndex { .take(k) .map(|(dist, iid)| { let true_dist = if is_l2 { dist.max(0.0).sqrt() } else { *dist }; - let similarity = distance_to_similarity(true_dist, self.config.metric); ( self.external_id(*iid), - DeterministicScore::from_f32(similarity), + score_from_distance(true_dist, self.config.metric), ) }) .collect(); @@ -382,7 +381,7 @@ impl HnswIndex { let metric = self.config.metric; let n = self.nodes.len(); - let mut scored: Vec<(usize, f32)> = Vec::with_capacity(n); + let mut scored: Vec<(usize, DeterministicScore)> = Vec::with_capacity(n); let mut i = 0usize; while i + 4 <= n { @@ -403,7 +402,7 @@ impl HnswIndex { hnsw_distance_batch4_from_dots(metric, dots, query_norm, query_is_unit, norms); for (j, &dist) in dists.iter().enumerate() { if !self.is_tombstoned(i + j) { - scored.push((i + j, distance_to_similarity(dist, metric))); + scored.push((i + j, score_from_distance(dist, metric))); } } i += 4; @@ -422,7 +421,7 @@ impl HnswIndex { _ => unreachable!(), } }; - scored.push((i, distance_to_similarity(dist, metric))); + scored.push((i, score_from_distance(dist, metric))); } i += 1; } @@ -433,16 +432,14 @@ impl HnswIndex { let effective_k = k.min(scored.len()); if scored.len() > effective_k { - scored.select_nth_unstable_by(effective_k - 1, |(_, a), (_, b)| { - b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal) - }); + scored.select_nth_unstable_by(effective_k - 1, |(_, a), (_, b)| b.cmp(a)); scored.truncate(effective_k); } - scored.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal)); + scored.sort_by(|(_, a), (_, b)| b.cmp(a)); Ok(scored .into_iter() - .map(|(iid, sim)| (self.external_id(iid), DeterministicScore::from_f32(sim))) + .map(|(iid, score)| (self.external_id(iid), score)) .collect()) } diff --git a/crates/khive-mcp/Cargo.toml b/crates/khive-mcp/Cargo.toml index d123f619..1657f8f4 100644 --- a/crates/khive-mcp/Cargo.toml +++ b/crates/khive-mcp/Cargo.toml @@ -11,12 +11,14 @@ categories.workspace = true description = "khive stdio MCP server — the only user-facing Rust binary" [dependencies] -khive-runtime = { version = "0.2.0", path = "../khive-runtime" } -khive-request = { version = "0.2.0", path = "../khive-request" } -khive-pack-kg = { version = "0.2.0", path = "../khive-pack-kg" } -khive-pack-gtd = { version = "0.2.0", path = "../khive-pack-gtd" } -khive-pack-memory = { version = "0.2.0", path = "../khive-pack-memory" } -khive-pack-brain = { version = "0.2.0", path = "../khive-pack-brain" } +khive-runtime = { version = "0.2.1", path = "../khive-runtime" } +khive-request = { version = "0.2.1", path = "../khive-request" } +khive-pack-kg = { version = "0.2.1", path = "../khive-pack-kg" } +khive-pack-gtd = { version = "0.2.1", path = "../khive-pack-gtd" } +khive-pack-memory = { version = "0.2.1", path = "../khive-pack-memory" } +khive-pack-brain = { version = "0.2.1", path = "../khive-pack-brain" } +khive-pack-comm = { version = "0.2.1", path = "../khive-pack-comm" } +khive-pack-schedule = { version = "0.2.1", path = "../khive-pack-schedule" } inventory = { workspace = true } rmcp = { version = "1.7", features = ["server", "transport-io"] } tokio = { workspace = true } @@ -31,7 +33,7 @@ anyhow = { workspace = true } [dev-dependencies] tokio = { workspace = true, features = ["test-util"] } rmcp = { version = "1.7", features = ["server", "transport-io", "client"] } -khive-types = { version = "0.2.0", path = "../khive-types" } +khive-types = { version = "0.2.1", path = "../khive-types" } async-trait = { workspace = true } [[bin]] diff --git a/crates/khive-mcp/src/main.rs b/crates/khive-mcp/src/main.rs index 5a279dac..3219ea6c 100644 --- a/crates/khive-mcp/src/main.rs +++ b/crates/khive-mcp/src/main.rs @@ -67,16 +67,19 @@ async fn main() -> anyhow::Result<()> { args.pack }; + let default_namespace = khive_runtime::Namespace::parse(&args.namespace) + .map_err(|e| anyhow::anyhow!("invalid --namespace {:?}: {e}", args.namespace))?; + let config = RuntimeConfig { db_path, - default_namespace: args.namespace, + default_namespace, embedding_model, packs, ..RuntimeConfig::default() }; let runtime = KhiveRuntime::new(config)?; - let server = KhiveMcpServer::new(runtime); + let server = KhiveMcpServer::new(runtime).map_err(|e| anyhow::anyhow!("{e}"))?; server.serve_stdio().await?; Ok(()) } diff --git a/crates/khive-mcp/src/pack.rs b/crates/khive-mcp/src/pack.rs index 2795104a..0e7d7ed5 100644 --- a/crates/khive-mcp/src/pack.rs +++ b/crates/khive-mcp/src/pack.rs @@ -1,4 +1,4 @@ -//! Pack registration helpers for `khive-mcp` (ADR-063). +//! Pack registration helpers for `khive-mcp` (ADR-027). //! //! Pack discovery is handled by `inventory`-based self-registration: each pack //! crate submits a `PackRegistration` at link time (via `inventory::submit!`), @@ -10,6 +10,11 @@ //! in the final binary. Without at least one symbol reference per crate the //! linker may dead-strip the crate entirely and the inventory constructors will //! not run. +//! +//! To add a new first-party pack: (1) add its crate as a `[dependency]` in +//! `khive-mcp/Cargo.toml`, (2) add a `pub use` line below referencing any +//! public type from the crate — this is the force-link anchor that keeps the +//! linker from stripping the `inventory::submit!` constructor. pub use khive_runtime::{KhiveRuntime, PackRegistry, VerbRegistryBuilder}; @@ -19,8 +24,12 @@ pub use khive_runtime::{KhiveRuntime, PackRegistry, VerbRegistryBuilder}; #[doc(hidden)] pub use khive_pack_brain::BrainPack as _BrainPack; #[doc(hidden)] +pub use khive_pack_comm::CommPack as _CommPack; +#[doc(hidden)] pub use khive_pack_gtd::GtdPack as _GtdPack; #[doc(hidden)] pub use khive_pack_kg::KgPack as _KgPack; #[doc(hidden)] pub use khive_pack_memory::MemoryPack as _MemoryPack; +#[doc(hidden)] +pub use khive_pack_schedule::SchedulePack as _SchedulePack; diff --git a/crates/khive-mcp/src/server.rs b/crates/khive-mcp/src/server.rs index f9d7d088..99d547d8 100644 --- a/crates/khive-mcp/src/server.rs +++ b/crates/khive-mcp/src/server.rs @@ -24,8 +24,11 @@ use rmcp::{ }; use serde_json::{json, Value}; -use khive_request::{parse_request, DslError, ParsedOp}; -use khive_runtime::{KhiveRuntime, PackRegistry, RuntimeError, VerbRegistry, VerbRegistryBuilder}; +use khive_request::{parse_request, ArgValue, DslError, ExecutionMode, ParsedOp}; +use khive_runtime::{ + present, KhiveRuntime, PackRegistry, PresentationMode, RuntimeError, VerbRegistry, + VerbRegistryBuilder, +}; use crate::tools::request::RequestParams; @@ -123,7 +126,7 @@ impl std::error::Error for PackRegError {} /// Built-in pack names known to this binary. /// /// Sourced from `PackRegistry::discovered_names()` so the list always reflects -/// whatever pack crates are linked into the binary (ADR-063). +/// whatever pack crates are linked into the binary (ADR-027). pub fn builtin_pack_names() -> Vec<&'static str> { PackRegistry::discovered_names() } @@ -135,36 +138,22 @@ impl KhiveMcpServer { /// registry. Gate decisions are **hard-enforcing** in v0.3 — a `Deny` /// result blocks pack dispatch and returns `PermissionDenied` (ADR-035). /// - /// Always returns a server. Unknown pack names are logged via `tracing::warn!` - /// rather than rejected — startup must remain robust if a future binary drops - /// a pack that an older config still names. Use [`Self::with_packs`] for - /// strict validation in tests / programmatic callers. - pub fn new(runtime: KhiveRuntime) -> Self { + /// Fails fast if any requested pack is unknown or has an unsatisfied + /// dependency (ADR-027). A misconfigured `KHIVE_PACKS` is a boot error — + /// callers must list all required packs explicitly. Use [`Self::with_packs`] + /// for the same strict path with an explicit pack list. + /// + /// # Errors + /// + /// Returns [`PackRegError`] if any pack in `runtime.config().packs` is + /// unknown or if a declared dependency is absent from the list. + // The error variant intentionally carries the runtime so callers can recover. + #[allow(clippy::result_large_err)] + pub fn new(runtime: KhiveRuntime) -> Result { let packs: Vec = runtime.config().packs.clone(); - Self::with_packs(runtime, &packs).unwrap_or_else(|err| { - tracing::warn!("pack registration: {err}; falling back to kg only"); - let recovered_runtime = err.runtime; - let gate = recovered_runtime.config().gate.clone(); - let default_namespace = recovered_runtime.config().default_namespace.clone(); - let mut builder = VerbRegistryBuilder::new(); - builder.with_gate(gate); - builder.with_default_namespace(default_namespace); - // ADR-035: wire the EventStore for the fallback path too. - if let Ok(event_store) = recovered_runtime.events(None) { - builder.with_event_store(event_store); - } - // Fallback: register the kg pack through the inventory registry so - // this code path stays free of direct pack-type imports. - PackRegistry::register_packs( - &["kg".to_string()], - recovered_runtime.clone(), - &mut builder, - ) - .expect("kg is a known pack name"); - let registry = builder.build().expect("fallback kg registry builds"); - recovered_runtime.install_edge_rules(registry.all_edge_rules()); - Self { registry } - }) + // ADR-014 (c14 hardening): fail-fast on bad packs so callers can decide + // recovery. The c12 schema_plan application happens inside with_packs. + Self::with_packs(runtime, &packs) } /// Build a server with an explicit pack list (strict — fails on unknown names). @@ -177,9 +166,11 @@ impl KhiveMcpServer { let default_namespace = runtime.config().default_namespace.clone(); let mut builder = VerbRegistryBuilder::new(); builder.with_gate(gate); - builder.with_default_namespace(default_namespace); + builder.with_default_namespace(default_namespace.as_str()); // ADR-035: wire the EventStore into the registry for audit persistence. - if let Ok(event_store) = runtime.events(None) { + if let Ok(event_store) = + runtime.events(&runtime.authorize(khive_runtime::Namespace::local())) + { builder.with_event_store(event_store); } if let Err(unknown) = PackRegistry::register_packs(packs, runtime.clone(), &mut builder) { @@ -195,6 +186,10 @@ impl KhiveMcpServer { // ADR-031: aggregate pack-declared edge endpoint rules into the runtime // so `validate_edge_relation_endpoints` can consult them. runtime.install_edge_rules(registry.all_edge_rules()); + // ADR-017 §c12: apply pack-auxiliary schema plans at startup so pack + // tables are present before any handler runs. Errors are logged but + // not propagated so a single pack's schema failure cannot abort startup. + registry.apply_schema_plans(runtime.backend()); Ok(Self { registry }) } @@ -229,40 +224,236 @@ impl KhiveMcpServer { build_verb_catalog(verbs) } - /// Run a parsed batch in parallel, gathering per-op results in input order. - async fn run_parsed(&self, ops: Vec) -> Value { - let futures = ops.into_iter().map(|op| { - let registry = self.registry.clone(); - async move { - let ParsedOp { tool, args } = op; - let args_value = Value::Object(args); - match registry.dispatch(&tool, args_value).await { - Ok(result) => json!({ "ok": true, "tool": tool, "result": result }), - Err(RuntimeError::Khive(k)) => { - // Preserve the full structured KhiveError on the wire. - // Non-Khive variants fall through to the flat-string form - // below to keep backward compatibility. - let error_payload = serde_json::to_value(&k).unwrap_or_else( - |_| json!({ "kind": "internal", "message": k.to_string() }), - ); - json!({ "ok": false, "tool": tool, "error": error_payload }) + /// Dispatch a single [`ParsedOp`] by resolving its args (potentially + /// substituting `$prev` references) and calling the [`VerbRegistry`]. + /// + /// Returns a per-op result object: `{ok, tool, result}` on success or + /// `{ok: false, tool, error}` on failure. + async fn dispatch_op( + &self, + op: ParsedOp, + prev_result: Option<&Value>, + ) -> Result { + let ParsedOp { tool, args } = op; + + // Resolve args — substitute $prev references when prev_result is Some. + let mut resolved: serde_json::Map = serde_json::Map::new(); + for (name, arg_val) in args { + let value = match &arg_val { + ArgValue::Value(v) => v.clone(), + ArgValue::PrevRef { path } => { + let prev = prev_result.ok_or_else(|| { + ( + tool.clone(), + json!({ + "kind": "substitution_error", + "message": format!( + "argument {name:?}: $prev reference in non-chain context" + ) + }), + ) + })?; + let extracted = arg_val.resolve_prev(prev).ok_or_else(|| { + let display_path = if path.is_empty() { + "$prev".to_string() + } else { + format!("$prev.{path}") + }; + ( + tool.clone(), + json!({ + "kind": "substitution_error", + "message": format!( + "argument {name:?}: path {display_path:?} not found in prior result" + ), + "path": display_path + }), + ) + })?; + extracted.clone() + } + }; + resolved.insert(name, value); + } + + let args_value = Value::Object(resolved); + match self.registry.dispatch(&tool, args_value).await { + Ok(result) => Ok(json!({ "ok": true, "tool": tool, "result": result })), + Err(RuntimeError::Khive(k)) => { + let error_payload = serde_json::to_value(&k) + .unwrap_or_else(|_| json!({ "kind": "internal", "message": k.to_string() })); + Err((tool, error_payload)) + } + Err(e) => Err((tool, json!(e.to_string()))), + } + } + + /// Execute a parsed request, dispatching according to its [`ExecutionMode`]. + /// + /// - `Single` / `Parallel`: all ops run concurrently; per-op failure does + /// not abort siblings. `aborted` count is always 0. + /// - `Chain`: ops run sequentially; `$prev` from each op's result is + /// substituted into the next op's args. If any op fails (or a `$prev` + /// substitution fails), remaining ops appear as `aborted: true`. + /// + /// Presentation transforms (ADR-045) are applied per-op AFTER dispatch, + /// using `mode_for_op` to determine the mode per position. Chain `$prev` + /// substitution uses canonical (verbose) handler output; the transform runs + /// only at the final response-envelope boundary. + /// + /// Response envelope (ADR-016): + /// ```json + /// { + /// "results": [...], + /// "summary": { "total": N, "succeeded": K, "failed": M, "aborted": A } + /// } + /// ``` + async fn run_parsed( + &self, + ops: Vec, + mode: ExecutionMode, + presentation: PresentationMode, + presentation_per_op: Option>>, + ) -> Value { + let now_unix = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_secs() as i64) + .unwrap_or(0); + + // Resolve per-op presentation mode: per-op entry overrides batch default. + let mode_for_op = |i: usize| -> PresentationMode { + presentation_per_op + .as_ref() + .and_then(|v| v.get(i)) + .and_then(|o| *o) + .unwrap_or(presentation) + }; + + match mode { + ExecutionMode::Single | ExecutionMode::Parallel => { + // Independent dispatch — run all concurrently, results in input order. + let futures = ops.into_iter().enumerate().map(|(i, op)| { + let registry = self.registry.clone(); + let op_mode = mode_for_op(i); + async move { + let tool = op.tool.clone(); + // No $prev in parallel/single mode. + let mut resolved: serde_json::Map = + serde_json::Map::new(); + for (name, arg_val) in &op.args { + let value = match arg_val { + ArgValue::Value(v) => v.clone(), + ArgValue::PrevRef { .. } => { + // $prev in non-chain context: treat as error for this op. + return json!({ + "ok": false, + "tool": tool, + "error": format!( + "argument {name:?}: $prev reference is only valid in chain (|) mode" + ) + }); + } + }; + resolved.insert(name.clone(), value); + } + let args_value = Value::Object(resolved); + match registry.dispatch(&tool, args_value).await { + Ok(result) => { + let presented = present(result, op_mode, now_unix); + json!({ "ok": true, "tool": tool, "result": presented }) + } + Err(RuntimeError::Khive(k)) => { + let error_payload = serde_json::to_value(&k).unwrap_or_else( + |_| json!({ "kind": "internal", "message": k.to_string() }), + ); + json!({ "ok": false, "tool": tool, "error": error_payload }) + } + Err(e) => json!({ "ok": false, "tool": tool, "error": e.to_string() }), + } + } + }); + let results: Vec = futures::future::join_all(futures).await; + let total = results.len(); + let succeeded = results + .iter() + .filter(|r| r.get("ok").and_then(Value::as_bool) == Some(true)) + .count(); + let failed = total - succeeded; + json!({ + "results": results, + "summary": { "total": total, "succeeded": succeeded, "failed": failed, "aborted": 0 }, + }) + } + ExecutionMode::Chain => { + // Sequential execution with $prev substitution and abort-on-failure. + // $prev uses canonical (verbose) handler output — presentation runs + // only at the final response-envelope boundary (ADR-045 §4). + let total = ops.len(); + let mut results: Vec = Vec::with_capacity(total); + // prev_result holds the CANONICAL result (pre-presentation) for $prev. + let mut prev_result: Option = None; + let mut aborted_from: Option = None; + + for (i, op) in ops.into_iter().enumerate() { + if aborted_from.is_some() { + // A prior op failed — mark remaining as aborted. + results.push(json!({ "ok": false, "tool": op.tool, "aborted": true })); + continue; + } + let op_mode = mode_for_op(i); + match self.dispatch_op(op, prev_result.as_ref()).await { + Ok(result_obj) => { + // Extract canonical result for $prev (pre-presentation). + prev_result = result_obj.get("result").cloned(); + // Apply presentation to the result field only. + let presented_obj = + apply_presentation_to_result(result_obj, op_mode, now_unix); + results.push(presented_obj); + } + Err((tool, error_payload)) => { + results + .push(json!({ "ok": false, "tool": tool, "error": error_payload })); + aborted_from = Some(i + 1); + } } - Err(e) => json!({ "ok": false, "tool": tool, "error": e.to_string() }), } + + let succeeded = results + .iter() + .filter(|r| r.get("ok").and_then(Value::as_bool) == Some(true)) + .count(); + let aborted = results + .iter() + .filter(|r| r.get("aborted").and_then(Value::as_bool) == Some(true)) + .count(); + let failed = total - succeeded - aborted; + json!({ + "results": results, + "summary": { "total": total, "succeeded": succeeded, "failed": failed, "aborted": aborted }, + }) } - }); - let results: Vec = futures::future::join_all(futures).await; - let total = results.len(); - let succeeded = results - .iter() - .filter(|r| r.get("ok").and_then(Value::as_bool) == Some(true)) - .count(); - let failed = total - succeeded; - json!({ - "results": results, - "summary": { "total": total, "succeeded": succeeded, "failed": failed }, - }) + } + } +} + +/// Apply the presentation transform to the `result` field of a successful +/// per-op envelope, leaving error envelopes unchanged. +/// +/// Per ADR-045 §3.5: "Error envelopes are NEVER transformed." +fn apply_presentation_to_result( + mut result_obj: Value, + mode: PresentationMode, + now_unix: i64, +) -> Value { + if result_obj.get("ok").and_then(Value::as_bool) == Some(true) { + if let Some(result_field) = result_obj.get("result").cloned() { + let presented = present(result_field, mode, now_unix); + if let Some(obj) = result_obj.as_object_mut() { + obj.insert("result".to_string(), presented); + } + } } + result_obj } // ── single MCP tool ───────────────────────────────────────────────────────── @@ -271,33 +462,62 @@ impl KhiveMcpServer { impl KhiveMcpServer { #[tool(description = r#"Run one or more khive verbs in a single MCP call. -ops syntax (ADR-020): +ops syntax (ADR-016): Single op : verb(name=value, name=value) Batch : [verb(...), verb(...)] — parallel, max 100 + Chain : verb1(...) | verb2(id=$prev.id) — sequential, $prev JSON form : [{"tool":"verb","args":{...}}, ...] — equivalent Argument values are JSON literals: strings (double-quoted), numbers, booleans, null, arrays, objects. Strings may contain commas / parens; escape with \". +Chain-only: $prev resolves to the prior op's result; $prev.field.path extracts +a nested field. Response shape: { "results": [ {"ok": true, "tool": "verb", "result": {...}}, ... ], - "summary": { "total": N, "succeeded": N, "failed": N } + "summary": { "total": N, "succeeded": N, "failed": N, "aborted": N } } -A failed op does NOT abort the batch. Each entry has its own ok / error. +Parallel: a failed op does NOT abort siblings. Chain: failure aborts remaining +ops (reported as {"ok": false, "aborted": true}). Committed ops are not rolled back. Verb discovery: install the `kg` / `gtd` plugins for usage skills. The verbs currently registered on this server (pack-derived) are listed below. Argument schemas live in each pack's docs and SKILL.md files. Tip: for one-shot calls, the single-op form is the densest. Use batch when -several independent ops can run together (e.g. bulk create + link)."#)] +several independent ops can run together; use chain when each op needs the prior +result (e.g. create then link with the new entity's id)."#)] async fn request(&self, Parameters(p): Parameters) -> Result { let parsed = parse_request(&p.ops).map_err(dsl_err_to_mcp)?; - let result = self.run_parsed(parsed.ops).await; + + // Parse presentation strings → PresentationMode (ADR-045). + let presentation = parse_presentation_mode(p.presentation.as_deref()) + .map_err(|e| McpError::invalid_params(e, None))?; + let presentation_per_op: Option>> = + if let Some(per_op_strs) = p.presentation_per_op { + let mut modes = Vec::with_capacity(per_op_strs.len()); + for s in per_op_strs { + let mode = match s.as_deref() { + None => None, + Some(v) => Some( + parse_presentation_mode(Some(v)) + .map_err(|e| McpError::invalid_params(e, None))?, + ), + }; + modes.push(mode); + } + Some(modes) + } else { + None + }; + + let result = self + .run_parsed(parsed.ops, parsed.mode, presentation, presentation_per_op) + .await; serde_json::to_string_pretty(&result) .map_err(|e| McpError::internal_error(format!("serialize: {e}"), None)) } @@ -307,6 +527,20 @@ fn dsl_err_to_mcp(e: DslError) -> McpError { McpError::invalid_params(e.to_string(), None) } +/// Parse an optional presentation mode string from the request envelope. +/// +/// `None` → default (`Agent`). Known values: `"agent"`, `"verbose"`, `"human"`. +fn parse_presentation_mode(s: Option<&str>) -> Result { + match s { + None | Some("agent") => Ok(PresentationMode::Agent), + Some("verbose") => Ok(PresentationMode::Verbose), + Some("human") => Ok(PresentationMode::Human), + Some(other) => Err(format!( + "unknown presentation mode {other:?}; valid values: \"agent\", \"verbose\", \"human\"" + )), + } +} + #[tool_handler] impl ServerHandler for KhiveMcpServer { fn get_info(&self) -> ServerInfo { diff --git a/crates/khive-mcp/src/tools/request.rs b/crates/khive-mcp/src/tools/request.rs index 0fd11d23..e4be0398 100644 --- a/crates/khive-mcp/src/tools/request.rs +++ b/crates/khive-mcp/src/tools/request.rs @@ -1,9 +1,10 @@ -//! Parameter type for the single `request` MCP tool (ADR-020). +//! Parameter type for the single `request` MCP tool (ADR-016 + ADR-045). use rmcp::schemars; use serde::{Deserialize, Serialize}; -/// Input for `request` — a DSL string (function-call or JSON form). +/// Input for `request` — a DSL string (function-call or JSON form) plus +/// optional presentation controls (ADR-045). #[derive(Debug, Serialize, Deserialize, schemars::JsonSchema)] pub struct RequestParams { /// One or more operations as a function-call DSL or JSON-form string. @@ -11,12 +12,35 @@ pub struct RequestParams { /// Examples: /// - `next()` /// - `assign(title="ship", priority="p1")` + /// - `create(kind="entity", name="A") | link(source_id=$prev.id, target_id="b", relation="extends")` /// - `[create(kind="entity", entity_kind="concept", name="A"), create(kind="entity", entity_kind="concept", name="B")]` /// - `[{"tool":"next","args":{}}, {"tool":"complete","args":{"id":"abc"}}]` /// /// Max 100 operations per batch. #[schemars( - description = "Function-call DSL or JSON-form batch (ADR-020). See request tool description." + description = "Function-call DSL or JSON-form batch (ADR-016). See request tool description." )] pub ops: String, + + /// Presentation mode for the response (ADR-045). + /// + /// - `"agent"` (default): token-efficient — short UUIDs, compact timestamps, + /// empty fields dropped. + /// - `"verbose"`: full canonical shape, no transformation. + /// - `"human"`: delegated to CLI layer (same as verbose at runtime level). + /// + /// When omitted, defaults to `"agent"`. + #[serde(default)] + #[schemars(description = "Presentation mode: \"agent\" (default), \"verbose\", or \"human\"")] + pub presentation: Option, + + /// Per-operation presentation overrides (ADR-045). + /// + /// When provided, entries override `presentation` per op by index. + /// `null` entries fall back to the batch-level `presentation`. + /// + /// When omitted, all ops use `presentation`. + #[serde(default)] + #[schemars(description = "Per-op presentation mode override (optional)")] + pub presentation_per_op: Option>>, } diff --git a/crates/khive-mcp/tests/integration.rs b/crates/khive-mcp/tests/integration.rs index d9f837d6..59719812 100644 --- a/crates/khive-mcp/tests/integration.rs +++ b/crates/khive-mcp/tests/integration.rs @@ -5,9 +5,13 @@ use async_trait::async_trait; use khive_mcp::server::KhiveMcpServer; use khive_runtime::{ - KhiveRuntime, PackRuntime, RuntimeConfig, RuntimeError, VerbRegistry, VerbRegistryBuilder, + KhiveRuntime, Namespace, NamespaceToken, PackRuntime, RuntimeConfig, RuntimeError, + VerbRegistry, VerbRegistryBuilder, +}; +use khive_types::{ + Details, ErrorCode as KhiveErrorCode, ErrorDomain, HandlerDef, KhiveError, Pack, VerbCategory, + Visibility, }; -use khive_types::{Details, ErrorCode as KhiveErrorCode, ErrorDomain, KhiveError, Pack, VerbDef}; use rmcp::{ model::{CallToolRequestParams, CallToolResult, ClientInfo, ErrorCode}, ClientHandler, ServerHandler, ServiceError, ServiceExt, @@ -17,13 +21,13 @@ use serde_json::{json, Value}; fn make_server() -> KhiveMcpServer { let config = RuntimeConfig { db_path: None, - default_namespace: "test".to_string(), + default_namespace: Namespace::parse("test").unwrap(), embedding_model: None, packs: vec!["kg".to_string(), "gtd".to_string()], ..RuntimeConfig::default() }; let runtime = KhiveRuntime::new(config).expect("in-memory runtime"); - KhiveMcpServer::new(runtime) + KhiveMcpServer::new(runtime).expect("server builds with kg+gtd") } #[derive(Clone, Default)] @@ -67,12 +71,19 @@ async fn call( } /// Helper: run a single op via `request` and return the parsed `result` field -/// of the first entry. Panics if the op failed. +/// of the first entry. Uses `presentation: "verbose"` so tests receive full +/// canonical UUIDs and timestamps (not Agent-mode short forms). Panics if the +/// op failed. async fn ok_one( client: &impl std::ops::Deref>, ops: &str, ) -> anyhow::Result { - let result = call(client, "request", json!({"ops": ops})).await?; + let result = call( + client, + "request", + json!({"ops": ops, "presentation": "verbose"}), + ) + .await?; let body: Value = serde_json::from_str(&first_text(&result))?; let first = body["results"].get(0).cloned().unwrap_or(Value::Null); assert_eq!( @@ -330,13 +341,13 @@ async fn unknown_verb_returns_per_op_failure_not_invalid_params() -> anyhow::Res async fn pack_only_kg_omits_gtd_verbs_from_catalog() { let config = RuntimeConfig { db_path: None, - default_namespace: "test".to_string(), + default_namespace: Namespace::parse("test").unwrap(), embedding_model: None, packs: vec!["kg".to_string()], ..RuntimeConfig::default() }; let runtime = KhiveRuntime::new(config).unwrap(); - let server = KhiveMcpServer::new(runtime); + let server = KhiveMcpServer::new(runtime).expect("server builds with kg"); let info = server.get_info(); let instructions = info.instructions.unwrap_or_default(); assert!(instructions.contains("create"), "kg verb missing"); @@ -347,25 +358,45 @@ async fn pack_only_kg_omits_gtd_verbs_from_catalog() { } #[tokio::test] -async fn pack_gtd_auto_loads_kg_via_transitive_requires() { - // GTD declares requires(&["kg"]) — requesting only "gtd" must auto-load "kg" - // so that kg verbs (e.g. "create") are present alongside gtd verbs (e.g. "assign"). +async fn pack_gtd_without_kg_fails_at_boot() { + // ADR-027: gtd declares requires=["kg"]; omitting "kg" from the pack list + // must fail at boot with a clear error — not silently auto-add kg. let config = RuntimeConfig { db_path: None, - default_namespace: "test".to_string(), + default_namespace: Namespace::parse("test").unwrap(), embedding_model: None, packs: vec!["gtd".to_string()], ..RuntimeConfig::default() }; let runtime = KhiveRuntime::new(config).unwrap(); - let server = KhiveMcpServer::new(runtime); + match KhiveMcpServer::new(runtime) { + Ok(_) => panic!("gtd without kg must fail: missing dependency is a boot error (ADR-027)"), + Err(e) => { + let msg = e.to_string(); + assert!( + msg.contains("kg") || msg.contains("unknown pack"), + "error must name the missing dependency: {msg}" + ); + } + } +} + +#[tokio::test] +async fn pack_gtd_with_kg_explicit_works() { + // When both kg and gtd are listed, gtd's requires=["kg"] is satisfied. + let config = RuntimeConfig { + db_path: None, + default_namespace: Namespace::parse("test").unwrap(), + embedding_model: None, + packs: vec!["kg".to_string(), "gtd".to_string()], + ..RuntimeConfig::default() + }; + let runtime = KhiveRuntime::new(config).unwrap(); + let server = KhiveMcpServer::new(runtime).expect("kg+gtd builds"); let info = server.get_info(); let instructions = info.instructions.unwrap_or_default(); assert!(instructions.contains("assign"), "gtd verb must be present"); - assert!( - instructions.contains("create"), - "kg verb must be auto-loaded via gtd's transitive requires" - ); + assert!(instructions.contains("create"), "kg verb must be present"); } #[tokio::test] @@ -912,9 +943,11 @@ impl khive_types::Pack for ErrorInjectPack { const NAME: &'static str = "error-inject"; const NOTE_KINDS: &'static [&'static str] = &[]; const ENTITY_KINDS: &'static [&'static str] = &[]; - const VERBS: &'static [VerbDef] = &[VerbDef { + const HANDLERS: &'static [HandlerDef] = &[HandlerDef { name: "always_fail", description: "always returns a KhiveError::unavailable with code + details", + visibility: Visibility::Verb, + category: VerbCategory::Assertive, }]; } @@ -932,8 +965,8 @@ impl PackRuntime for ErrorInjectPack { &[] } - fn verbs(&self) -> &'static [VerbDef] { - ErrorInjectPack::VERBS + fn handlers(&self) -> &'static [HandlerDef] { + ErrorInjectPack::HANDLERS } async fn dispatch( @@ -941,6 +974,7 @@ impl PackRuntime for ErrorInjectPack { _verb: &str, _params: serde_json::Value, _registry: &VerbRegistry, + _token: &NamespaceToken, ) -> Result { let err = KhiveError::unavailable("downstream service offline") .with_code(KhiveErrorCode::new(ErrorDomain::Runtime, 10)) diff --git a/crates/khive-merge/Cargo.toml b/crates/khive-merge/Cargo.toml index 14a825e3..16ca6525 100644 --- a/crates/khive-merge/Cargo.toml +++ b/crates/khive-merge/Cargo.toml @@ -9,9 +9,9 @@ homepage.workspace = true description = "KG three-way merge with conflict detection (ADR-043)" [dependencies] -khive-runtime = { version = "0.2.0", path = "../khive-runtime" } -khive-storage = { version = "0.2.0", path = "../khive-storage" } -khive-vcs = { version = "0.2.0", path = "../khive-vcs" } +khive-runtime = { version = "0.2.1", path = "../khive-runtime" } +khive-storage = { version = "0.2.1", path = "../khive-storage" } +khive-vcs = { version = "0.2.1", path = "../khive-vcs" } serde = { workspace = true } serde_json = { workspace = true } thiserror = { workspace = true } diff --git a/crates/khive-pack-brain/Cargo.toml b/crates/khive-pack-brain/Cargo.toml index de6f3e9d..ca487a80 100644 --- a/crates/khive-pack-brain/Cargo.toml +++ b/crates/khive-pack-brain/Cargo.toml @@ -8,13 +8,13 @@ repository.workspace = true homepage.workspace = true keywords.workspace = true categories.workspace = true -description = "Brain pack — event-driven auto-tuning via meta-fold (ADR-064)" +description = "Brain pack — profile-oriented orchestration via Fold + Objective (ADR-032)" [dependencies] -khive-types = { version = "0.2.0", path = "../khive-types", features = ["serde"] } -khive-runtime = { version = "0.2.0", path = "../khive-runtime" } -khive-fold = { version = "0.2.0", path = "../khive-fold" } -khive-storage = { version = "0.2.0", path = "../khive-storage" } +khive-types = { version = "0.2.1", path = "../khive-types", features = ["serde"] } +khive-runtime = { version = "0.2.1", path = "../khive-runtime" } +khive-fold = { version = "0.2.1", path = "../khive-fold" } +khive-storage = { version = "0.2.1", path = "../khive-storage" } inventory = { workspace = true } async-trait = { workspace = true } serde = { workspace = true } @@ -24,4 +24,4 @@ chrono = { workspace = true } [dev-dependencies] tokio = { workspace = true, features = ["test-util"] } -khive-pack-kg = { version = "0.2.0", path = "../khive-pack-kg" } +khive-pack-kg = { version = "0.2.1", path = "../khive-pack-kg" } diff --git a/crates/khive-pack-brain/src/event.rs b/crates/khive-pack-brain/src/event.rs index 561d79cd..e0d4f137 100644 --- a/crates/khive-pack-brain/src/event.rs +++ b/crates/khive-pack-brain/src/event.rs @@ -4,7 +4,7 @@ use uuid::Uuid; use khive_storage::event::Event; use khive_types::EventOutcome; -/// Feedback signal values for the `brain.emit` verb. +/// Feedback signal values for the `brain.feedback` verb (ADR-032 §3). #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] #[serde(rename_all = "snake_case")] pub enum FeedbackSignal { @@ -13,7 +13,11 @@ pub enum FeedbackSignal { Wrong, } -/// Interpreted brain signal extracted from a raw Event. +/// Interpreted brain signal extracted from a raw Event (ADR-032 §4). +/// +/// `interpret()` is the single mapping layer from the shared event log to +/// brain-internal signals. No parallel event enum is needed; the Event +/// substrate IS the source of truth. #[derive(Debug)] pub enum BrainSignal { /// A recall verb succeeded — positive signal for the recalled entity. @@ -22,10 +26,12 @@ pub enum BrainSignal { RecallMiss, /// A search verb completed. SearchCompleted { latency_us: i64 }, - /// Explicit feedback on a specific entity. + /// Explicit feedback on a specific entity, emitted by `brain.feedback`. Feedback { target_id: Uuid, signal: FeedbackSignal, + /// Profile that served the event being rated, if known. + served_by_profile_id: Option, }, /// Any other note-substrate access (get, list on notes). NoteAccessed { target_id: Uuid }, @@ -33,10 +39,15 @@ pub enum BrainSignal { Irrelevant, } -/// Extract a brain signal from a raw storage Event. +/// Extract a brain signal from a raw storage Event (ADR-032 §4). +/// +/// `brain.emit` is no longer handled here — it was renamed to `brain.feedback` +/// per ADR-032 §11 (`brain.feedback` is the `FeedbackExplicit` event emitter). +/// Any `brain.emit` event that predates this ADR is treated as Irrelevant so +/// that old event log entries do not cause spurious feedback updates. /// -/// The brain interprets existing events by their verb + outcome + data fields. -/// No parallel event enum needed — the Event substrate IS the source of truth. +/// To add a new signal source: add one match arm to this function. That is +/// the entire extension surface (ADR-032 §4). pub fn interpret(event: &Event) -> BrainSignal { match event.verb.as_str() { "recall" => match event.outcome { @@ -52,20 +63,27 @@ pub fn interpret(event: &Event) -> BrainSignal { "search" => BrainSignal::SearchCompleted { latency_us: event.duration_us, }, - "brain.emit" => { + // brain.feedback is the ADR-032 §11 verb for FeedbackExplicit events. + // (brain.emit predates this ADR; treated as Irrelevant for old replays.) + "brain.feedback" => { let target = match event.target_id { Some(t) => t, None => return BrainSignal::Irrelevant, }; let signal = event - .data - .as_ref() - .and_then(|d| d.get("signal")) + .payload + .get("signal") .and_then(|s| serde_json::from_value::(s.clone()).ok()); + let served_by = event + .payload + .get("served_by_profile_id") + .and_then(|v| v.as_str()) + .map(|s| s.to_owned()); match signal { Some(s) => BrainSignal::Feedback { target_id: target, signal: s, + served_by_profile_id: served_by, }, None => BrainSignal::Irrelevant, } @@ -104,10 +122,10 @@ pub fn is_recall_positive(signal: &BrainSignal) -> Option { #[cfg(test)] mod tests { use super::*; - use khive_types::SubstrateKind; + use khive_types::{EventKind, SubstrateKind}; fn make_event(verb: &str, outcome: EventOutcome, target: Option) -> Event { - let mut e = Event::new("test", verb, SubstrateKind::Note, "brain"); + let mut e = Event::new("test", verb, EventKind::Audit, SubstrateKind::Note, "brain"); e.outcome = outcome; e.target_id = target; e @@ -142,22 +160,58 @@ mod tests { } #[test] - fn brain_emit_with_feedback() { + fn brain_feedback_with_useful_signal() { let id = Uuid::new_v4(); - let mut e = make_event("brain.emit", EventOutcome::Success, Some(id)); - e.data = Some(serde_json::json!({"signal": "useful"})); + let mut e = make_event("brain.feedback", EventOutcome::Success, Some(id)); + e.payload = serde_json::json!({"signal": "useful"}); match interpret(&e) { - BrainSignal::Feedback { target_id, signal } => { + BrainSignal::Feedback { + target_id, + signal, + served_by_profile_id, + } => { assert_eq!(target_id, id); assert_eq!(signal, FeedbackSignal::Useful); + assert!(served_by_profile_id.is_none()); } other => panic!("expected Feedback, got {other:?}"), } } #[test] - fn brain_emit_without_target_is_irrelevant() { - let e = make_event("brain.emit", EventOutcome::Success, None); + fn brain_feedback_with_served_by_profile_id() { + let id = Uuid::new_v4(); + let mut e = make_event("brain.feedback", EventOutcome::Success, Some(id)); + e.payload = serde_json::json!({ + "signal": "not_useful", + "served_by_profile_id": "balanced-recall-v1" + }); + match interpret(&e) { + BrainSignal::Feedback { + target_id, + signal, + served_by_profile_id, + } => { + assert_eq!(target_id, id); + assert_eq!(signal, FeedbackSignal::NotUseful); + assert_eq!(served_by_profile_id.as_deref(), Some("balanced-recall-v1")); + } + other => panic!("expected Feedback, got {other:?}"), + } + } + + #[test] + fn brain_feedback_without_target_is_irrelevant() { + let e = make_event("brain.feedback", EventOutcome::Success, None); + assert!(matches!(interpret(&e), BrainSignal::Irrelevant)); + } + + #[test] + fn brain_emit_legacy_is_irrelevant() { + // brain.emit predates ADR-032; old log entries must not trigger feedback. + let id = Uuid::new_v4(); + let mut e = make_event("brain.emit", EventOutcome::Success, Some(id)); + e.payload = serde_json::json!({"signal": "useful"}); assert!(matches!(interpret(&e), BrainSignal::Irrelevant)); } @@ -202,6 +256,7 @@ mod tests { let sig = BrainSignal::Feedback { target_id: id, signal: FeedbackSignal::NotUseful, + served_by_profile_id: None, }; assert_eq!(entity_signal(&sig), Some((id, false))); } @@ -212,15 +267,16 @@ mod tests { let sig = BrainSignal::Feedback { target_id: id, signal: FeedbackSignal::Wrong, + served_by_profile_id: None, }; assert_eq!(entity_signal(&sig), Some((id, false))); } #[test] - fn brain_emit_invalid_signal_data_is_irrelevant() { + fn brain_feedback_invalid_signal_data_is_irrelevant() { let id = Uuid::new_v4(); - let mut e = make_event("brain.emit", EventOutcome::Success, Some(id)); - e.data = Some(serde_json::json!({"signal": "bad_value"})); + let mut e = make_event("brain.feedback", EventOutcome::Success, Some(id)); + e.payload = serde_json::json!({"signal": "bad_value"}); assert!(matches!(interpret(&e), BrainSignal::Irrelevant)); } diff --git a/crates/khive-pack-brain/src/fold.rs b/crates/khive-pack-brain/src/fold.rs index 18db54c6..aa85cfa1 100644 --- a/crates/khive-pack-brain/src/fold.rs +++ b/crates/khive-pack-brain/src/fold.rs @@ -2,59 +2,47 @@ use khive_fold::{Fold, FoldContext}; use khive_storage::event::Event; use crate::event::{entity_signal, interpret, is_recall_positive}; -use crate::state::{BetaPosterior, BrainState}; +use crate::state::{BalancedRecallState, BetaPosterior}; -/// The brain as a meta-fold: `Fold`. +/// Fold for the `BalancedRecallProfile` state (ADR-032 §5a). /// -/// Processes the existing Event substrate stream. Each event is interpreted -/// via `event::interpret()` and routed to the relevant posteriors. -/// Deterministic: same events in the same order → same BrainState. -pub struct EventFold { +/// The predecessor design had this fold update a flat `HashMap` +/// on the brain's core `BrainState`. Per ADR-032, the three-scalar Bayesian state +/// now lives entirely inside `BalancedRecallProfile` — brain's `BrainState` holds +/// profile registry metadata; posteriors are opaque to brain. +/// +/// Deterministic: same events in same order → same `BalancedRecallState`. +pub struct BalancedRecallFold { entity_capacity: usize, } -impl EventFold { +impl BalancedRecallFold { pub fn new(entity_capacity: usize) -> Self { Self { entity_capacity } } } -impl Fold for EventFold { - fn initial(&self, _context: &FoldContext) -> BrainState { - BrainState::new( - [ - ( - "recall::relevance_weight".into(), - BetaPosterior::new(7.0, 3.0), - ), - ( - "recall::importance_weight".into(), - BetaPosterior::new(2.0, 8.0), - ), - ( - "recall::temporal_weight".into(), - BetaPosterior::new(1.0, 9.0), - ), - ] - .into_iter() - .collect(), - self.entity_capacity, - ) +impl Fold for BalancedRecallFold { + fn init(&self, _context: &FoldContext) -> BalancedRecallState { + BalancedRecallState::new(self.entity_capacity) } - fn step(&self, mut state: BrainState, event: &Event, _ctx: &FoldContext) -> BrainState { + fn reduce( + &self, + mut state: BalancedRecallState, + event: &Event, + _ctx: &FoldContext, + ) -> BalancedRecallState { let signal = interpret(event); state.total_events += 1; - // Global recall parameter updates + // Global recall-relevance parameter update if let Some(positive) = is_recall_positive(&signal) { - if let Some(posterior) = state.parameters.get_mut("recall::relevance_weight") { - if positive { - posterior.update_success(); - } else { - posterior.update_failure(); - } + if positive { + state.relevance.update_success(); + } else { + state.relevance.update_failure(); } } @@ -73,7 +61,7 @@ impl Fold for EventFold { state } - fn finalize(&self, state: BrainState, _context: &FoldContext) -> BrainState { + fn finalize(&self, state: BalancedRecallState, _context: &FoldContext) -> BalancedRecallState { state } } @@ -81,93 +69,111 @@ impl Fold for EventFold { #[cfg(test)] mod tests { use super::*; - use khive_types::{EventOutcome, SubstrateKind}; + use khive_types::{EventKind, EventOutcome, SubstrateKind}; use uuid::Uuid; fn make_event(verb: &str, outcome: EventOutcome, target: Option) -> Event { - let mut e = Event::new("test", verb, SubstrateKind::Note, "brain"); + let mut e = Event::new("test", verb, EventKind::Audit, SubstrateKind::Note, "brain"); e.outcome = outcome; e.target_id = target; e } #[test] - fn initial_state_has_recall_priors() { - let fold = EventFold::new(100); + fn initial_state_has_informative_priors() { + let fold = BalancedRecallFold::new(100); let ctx = FoldContext::new(); - let state = fold.initial(&ctx); - assert!(state.parameters.contains_key("recall::relevance_weight")); - let p = &state.parameters["recall::relevance_weight"]; - assert!((p.alpha - 7.0).abs() < 1e-12); - assert!((p.beta - 3.0).abs() < 1e-12); + let state = fold.init(&ctx); + // relevance prior Beta(7,3) + assert!((state.relevance.alpha - 7.0).abs() < 1e-12); + assert!((state.relevance.beta - 3.0).abs() < 1e-12); + // importance prior Beta(2,8) + assert!((state.importance.alpha - 2.0).abs() < 1e-12); + assert!((state.importance.beta - 8.0).abs() < 1e-12); + // temporal prior Beta(1,9) + assert!((state.temporal.alpha - 1.0).abs() < 1e-12); + assert!((state.temporal.beta - 9.0).abs() < 1e-12); } #[test] - fn recall_hit_updates_global_and_entity() { - let fold = EventFold::new(100); + fn recall_hit_updates_relevance_and_entity() { + let fold = BalancedRecallFold::new(100); let ctx = FoldContext::new(); - let mut state = fold.initial(&ctx); + let mut state = fold.init(&ctx); let id = Uuid::new_v4(); let event = make_event("recall", EventOutcome::Success, Some(id)); - state = fold.step(state, &event, &ctx); + state = fold.reduce(state, &event, &ctx); assert_eq!(state.total_events, 1); - let p = &state.parameters["recall::relevance_weight"]; - assert!((p.alpha - 8.0).abs() < 1e-12); // 7 + 1 success + assert!((state.relevance.alpha - 8.0).abs() < 1e-12); // 7 + 1 let ep = state.entity_posteriors.get(&id).unwrap(); - assert!((ep.alpha - 2.0).abs() < 1e-12); // 1 + 1 success + assert!((ep.alpha - 2.0).abs() < 1e-12); // 1 + 1 } #[test] - fn recall_miss_updates_global_only() { - let fold = EventFold::new(100); + fn recall_miss_updates_relevance_beta() { + let fold = BalancedRecallFold::new(100); let ctx = FoldContext::new(); - let mut state = fold.initial(&ctx); + let mut state = fold.init(&ctx); let event = make_event("recall", EventOutcome::Success, None); - state = fold.step(state, &event, &ctx); + state = fold.reduce(state, &event, &ctx); - let p = &state.parameters["recall::relevance_weight"]; - assert!((p.beta - 4.0).abs() < 1e-12); // 3 + 1 failure + // target_id = None → RecallMiss → relevance failure + assert!((state.relevance.beta - 4.0).abs() < 1e-12); // 3 + 1 assert!(state.entity_posteriors.is_empty()); } #[test] fn irrelevant_event_increments_counter_only() { - let fold = EventFold::new(100); + let fold = BalancedRecallFold::new(100); let ctx = FoldContext::new(); - let mut state = fold.initial(&ctx); + let mut state = fold.init(&ctx); let event = make_event("link", EventOutcome::Success, Some(Uuid::new_v4())); - state = fold.step(state, &event, &ctx); + state = fold.reduce(state, &event, &ctx); assert_eq!(state.total_events, 1); - let p = &state.parameters["recall::relevance_weight"]; - assert!((p.alpha - 7.0).abs() < 1e-12); // unchanged + assert!((state.relevance.alpha - 7.0).abs() < 1e-12); // unchanged } #[test] fn feedback_not_useful_increments_entity_beta() { - let fold = EventFold::new(100); + let fold = BalancedRecallFold::new(100); let ctx = FoldContext::new(); - let mut state = fold.initial(&ctx); + let mut state = fold.init(&ctx); let id = Uuid::new_v4(); - let mut event = make_event("brain.emit", EventOutcome::Success, Some(id)); - event.data = Some(serde_json::json!({"signal": "not_useful"})); - state = fold.step(state, &event, &ctx); + let mut event = make_event("brain.feedback", EventOutcome::Success, Some(id)); + event.payload = serde_json::json!({"signal": "not_useful"}); + state = fold.reduce(state, &event, &ctx); assert_eq!(state.total_events, 1); let ep = state.entity_posteriors.get(&id).unwrap(); - // default prior Beta(1,1); not_useful → update_failure → beta = 2 assert!((ep.alpha - 1.0).abs() < 1e-12); assert!((ep.beta - 2.0).abs() < 1e-12); } + #[test] + fn brain_emit_legacy_does_not_update_entity() { + // brain.emit is now Irrelevant (ADR-032 migration boundary) + let fold = BalancedRecallFold::new(100); + let ctx = FoldContext::new(); + let mut state = fold.init(&ctx); + + let id = Uuid::new_v4(); + let mut event = make_event("brain.emit", EventOutcome::Success, Some(id)); + event.payload = serde_json::json!({"signal": "useful"}); + state = fold.reduce(state, &event, &ctx); + + assert_eq!(state.total_events, 1); + assert!(state.entity_posteriors.is_empty()); // no entity update from legacy verb + } + #[test] fn deterministic_replay() { - let fold = EventFold::new(100); + let fold = BalancedRecallFold::new(100); let ctx = FoldContext::new(); let id = Uuid::new_v4(); @@ -178,20 +184,20 @@ mod tests { make_event("recall", EventOutcome::Success, Some(id)), ]; - let mut s1 = fold.initial(&ctx); + let mut s1 = fold.init(&ctx); for e in &events { - s1 = fold.step(s1, e, &ctx); + s1 = fold.reduce(s1, e, &ctx); } - let mut s2 = fold.initial(&ctx); + let mut s2 = fold.init(&ctx); for e in &events { - s2 = fold.step(s2, e, &ctx); + s2 = fold.reduce(s2, e, &ctx); } let snap1 = s1.to_snapshot(); let snap2 = s2.to_snapshot(); assert_eq!(snap1.total_events, snap2.total_events); - assert_eq!(snap1.parameters, snap2.parameters); + assert_eq!(snap1.relevance, snap2.relevance); assert_eq!(snap1.entity_posteriors, snap2.entity_posteriors); } } diff --git a/crates/khive-pack-brain/src/lib.rs b/crates/khive-pack-brain/src/lib.rs index 787bf34e..5b10612c 100644 --- a/crates/khive-pack-brain/src/lib.rs +++ b/crates/khive-pack-brain/src/lib.rs @@ -6,63 +6,150 @@ pub mod tunable; use std::sync::Mutex; use async_trait::async_trait; +use chrono::Utc; use serde::Deserialize; use serde_json::{json, Value}; use khive_fold::{Fold, FoldContext}; use khive_runtime::pack::PackRuntime; -use khive_runtime::{DispatchHook, KhiveRuntime, RuntimeError, VerbRegistry}; +use khive_runtime::{ + DispatchHook, EventView, KhiveRuntime, NamespaceToken, RuntimeError, VerbRegistry, +}; use khive_storage::event::{Event, EventFilter}; use khive_storage::types::PageRequest; -use khive_types::{Pack, VerbDef}; +use khive_types::{HandlerDef, Pack, VerbCategory, Visibility}; -use crate::fold::EventFold; -use crate::state::BrainState; +use crate::fold::BalancedRecallFold; +use crate::state::{BrainState, ProfileBinding, ProfileLifecycle, ProfileRecord}; const ENTITY_CACHE_CAPACITY: usize = 10_000; -pub struct BrainPack { - runtime: KhiveRuntime, - state: Mutex, - fold: EventFold, -} - -impl Pack for BrainPack { - const NAME: &'static str = "brain"; - const NOTE_KINDS: &'static [&'static str] = &[]; - const ENTITY_KINDS: &'static [&'static str] = &[]; - const VERBS: &'static [VerbDef] = &BRAIN_VERBS; - const REQUIRES: &'static [&'static str] = &["kg"]; -} +// ── Handler table ───────────────────────────────────────────────────────────── -static BRAIN_VERBS: [VerbDef; 5] = [ - VerbDef { +/// Brain pack verb surface per ADR-032 §11. +/// +/// Visibility::Verb = exposed on the MCP `request` tool. +/// Visibility::Subhandler = internal / operator-only. +/// +/// ADR-025: illocutionary classification applied. +static BRAIN_HANDLERS: &[HandlerDef] = &[ + // ── Assertive (read) verbs ──────────────────────────────────────────── + HandlerDef { name: "brain.state", description: "Return current BrainState snapshot for inspection", + visibility: Visibility::Subhandler, + category: VerbCategory::Assertive, }, - VerbDef { + HandlerDef { name: "brain.config", description: "Return projected config for a named pack parameter", + visibility: Visibility::Subhandler, + category: VerbCategory::Assertive, }, - VerbDef { + HandlerDef { name: "brain.events", description: "List recent brain-relevant events for debugging", + visibility: Visibility::Subhandler, + category: VerbCategory::Assertive, + }, + HandlerDef { + name: "brain.profiles", + description: "List profiles, optionally filtered by lifecycle", + visibility: Visibility::Verb, + category: VerbCategory::Assertive, + }, + HandlerDef { + name: "brain.profile", + description: "Profile metadata, latest snapshot, current state summary", + visibility: Visibility::Verb, + category: VerbCategory::Assertive, + }, + HandlerDef { + name: "brain.resolve", + description: "Show which profile would serve a caller context", + visibility: Visibility::Verb, + category: VerbCategory::Assertive, + }, + // ── Commissive (write state) verbs ──────────────────────────────────── + HandlerDef { + name: "brain.activate", + description: "Move a profile to Active (start live update loop)", + visibility: Visibility::Verb, + category: VerbCategory::Commissive, }, - VerbDef { + HandlerDef { + name: "brain.deactivate", + description: "Move a profile to Inactive (stop live updates, retain state)", + visibility: Visibility::Verb, + category: VerbCategory::Commissive, + }, + HandlerDef { + name: "brain.archive", + description: "Move a profile to Archived (read-only, audit-retained)", + visibility: Visibility::Verb, + category: VerbCategory::Declaration, + }, + HandlerDef { name: "brain.reset", description: "Reset posteriors to priors (preserves event history)", + visibility: Visibility::Verb, + category: VerbCategory::Declaration, + }, + HandlerDef { + name: "brain.feedback", + description: "Emit a FeedbackExplicit event into the shared log", + visibility: Visibility::Verb, + category: VerbCategory::Commissive, + }, + // ── Declaration verbs ───────────────────────────────────────────────── + HandlerDef { + name: "brain.bind", + description: "Write a row in the profile resolution table", + visibility: Visibility::Verb, + category: VerbCategory::Declaration, + }, + HandlerDef { + name: "brain.unbind", + description: "Remove rows from the profile resolution table", + visibility: Visibility::Verb, + category: VerbCategory::Declaration, }, - VerbDef { + // ── Legacy / internal ───────────────────────────────────────────────── + HandlerDef { name: "brain.emit", - description: "Manually emit a feedback event for a specific entity", + description: "Manually emit a feedback event (deprecated; use brain.feedback)", + visibility: Visibility::Subhandler, + category: VerbCategory::Commissive, }, ]; +// ── BrainPack ───────────────────────────────────────────────────────────────── + +/// Brain pack — profile-oriented auto-tuning (ADR-032). +/// +/// `BrainState` holds the profile registry. `BalancedRecallFold` drives the +/// v1 default profile. The old scalar `BrainState` design is superseded; see +/// ADR-032 §1 and the migration notes in `state.rs`. +pub struct BrainPack { + runtime: KhiveRuntime, + /// Profile registry + active balanced-recall state. + state: Mutex, + /// Fold for the built-in `balanced-recall-v1` profile. + fold: BalancedRecallFold, +} + +impl Pack for BrainPack { + const NAME: &'static str = "brain"; + const NOTE_KINDS: &'static [&'static str] = &[]; + const ENTITY_KINDS: &'static [&'static str] = &[]; + const HANDLERS: &'static [HandlerDef] = BRAIN_HANDLERS; + const REQUIRES: &'static [&'static str] = &["kg"]; +} + impl BrainPack { pub fn new(runtime: KhiveRuntime) -> Self { - let fold = EventFold::new(ENTITY_CACHE_CAPACITY); - let ctx = FoldContext::new(); - let state = fold.initial(&ctx); + let fold = BalancedRecallFold::new(ENTITY_CACHE_CAPACITY); + let state = BrainState::new(ENTITY_CACHE_CAPACITY); Self { runtime, state: Mutex::new(state), @@ -70,20 +157,20 @@ impl BrainPack { } } + /// Public snapshot of the current `BrainState`. + pub fn snapshot(&self) -> crate::state::BrainStateSnapshot { + self.state.lock().unwrap().to_snapshot() + } + + // ── brain.state ─────────────────────────────────────────────────────── + async fn handle_state(&self, _params: Value) -> Result { let state = self.state.lock().unwrap(); let snapshot = state.to_snapshot(); serde_json::to_value(&snapshot).map_err(|e| RuntimeError::InvalidInput(e.to_string())) } - /// Public snapshot of the current `BrainState`. - /// - /// Equivalent to dispatching the `brain.state` verb but callable directly - /// when you hold an `Arc` (e.g. a test that registered the pack - /// as a `DispatchHook` and wants to verify posteriors updated). - pub fn snapshot(&self) -> crate::state::BrainStateSnapshot { - self.state.lock().unwrap().to_snapshot() - } + // ── brain.config ────────────────────────────────────────────────────── async fn handle_config(&self, params: Value) -> Result { #[derive(Deserialize)] @@ -94,12 +181,30 @@ impl BrainPack { .map_err(|e| RuntimeError::InvalidInput(e.to_string()))?; let state = self.state.lock().unwrap(); + let br = &state.balanced_recall; + + let param_map = [ + ("recall::relevance_weight", &br.relevance), + ("recall::importance_weight", &br.importance), + ("recall::temporal_weight", &br.temporal), + ]; + match p.parameter { Some(key) => { - let posterior = state - .parameters - .get(&key) - .ok_or_else(|| RuntimeError::NotFound(format!("parameter {key:?}")))?; + let posterior = param_map + .iter() + .find(|(k, _)| *k == key) + .map(|(_, p)| *p) + .ok_or_else(|| { + RuntimeError::NotFound(format!( + "parameter {key:?}; valid: {}", + param_map + .iter() + .map(|(k, _)| *k) + .collect::>() + .join(", ") + )) + })?; Ok(json!({ "parameter": key, "mean": posterior.mean(), @@ -110,12 +215,11 @@ impl BrainPack { })) } None => { - let configs: serde_json::Map = state - .parameters + let configs: serde_json::Map = param_map .iter() .map(|(k, p)| { ( - k.clone(), + (*k).to_owned(), json!({ "mean": p.mean(), "variance": p.variance(), @@ -129,30 +233,36 @@ impl BrainPack { } } - async fn handle_events(&self, params: Value) -> Result { + // ── brain.events ────────────────────────────────────────────────────── + + async fn handle_events( + &self, + token: &NamespaceToken, + params: Value, + ) -> Result { #[derive(Deserialize)] struct EventsParams { - namespace: Option, limit: Option, } let p: EventsParams = serde_json::from_value(params) .map_err(|e| RuntimeError::InvalidInput(e.to_string()))?; let limit = p.limit.unwrap_or(20).min(100); - let ns = self.runtime.ns(p.namespace.as_deref()).to_string(); + let ns = token.namespace().as_str().to_string(); - let store = self.runtime.events(p.namespace.as_deref())?; + let store = self.runtime.events(token)?; let filter = EventFilter { verbs: vec![ "recall".into(), "search".into(), - "brain.emit".into(), + "brain.feedback".into(), + "brain.emit".into(), // retained for backward-compat queries "get".into(), "remember".into(), ], - namespaces: vec![ns], ..EventFilter::default() }; + let _ = ns; let page = store .query_events(filter, PageRequest { offset: 0, limit }) .await @@ -169,6 +279,7 @@ impl BrainPack { "target_id": e.target_id.map(|t| t.to_string()), "duration_us": e.duration_us, "created_at": e.created_at, + "payload": e.payload, }) }) .collect(); @@ -179,23 +290,168 @@ impl BrainPack { })) } + // ── brain.profiles ──────────────────────────────────────────────────── + + async fn handle_profiles(&self, params: Value) -> Result { + #[derive(Deserialize)] + struct ProfilesParams { + lifecycle: Option, + } + let p: ProfilesParams = serde_json::from_value(params) + .map_err(|e| RuntimeError::InvalidInput(e.to_string()))?; + + let state = self.state.lock().unwrap(); + let filter_lc: Option = p + .lifecycle + .as_deref() + .map(|s| serde_json::from_value(Value::String(s.to_owned()))) + .transpose() + .map_err(|e| RuntimeError::InvalidInput(format!("invalid lifecycle: {e}")))?; + + let profiles: Vec<&ProfileRecord> = state + .profiles + .values() + .filter(|r| filter_lc.as_ref().is_none_or(|lc| &r.lifecycle == lc)) + .collect(); + + let items: Vec = profiles + .iter() + .map(|r| { + json!({ + "id": r.id, + "description": r.description, + "consumer_kind": r.consumer_kind, + "state_class": r.state_class, + "lifecycle": r.lifecycle, + "total_events": r.total_events, + "exploration_epoch": r.exploration_epoch, + "created_at": r.created_at, + }) + }) + .collect(); + + Ok(json!({ "count": items.len(), "profiles": items })) + } + + // ── brain.profile ───────────────────────────────────────────────────── + + async fn handle_profile(&self, params: Value) -> Result { + #[derive(Deserialize)] + struct ProfileParams { + id: String, + } + let p: ProfileParams = serde_json::from_value(params) + .map_err(|e| RuntimeError::InvalidInput(e.to_string()))?; + + let state = self.state.lock().unwrap(); + let record = state + .profiles + .get(&p.id) + .ok_or_else(|| RuntimeError::NotFound(format!("profile {:?}", p.id)))?; + + Ok(json!({ + "id": record.id, + "description": record.description, + "consumer_kind": record.consumer_kind, + "state_class": record.state_class, + "lifecycle": record.lifecycle, + "total_events": record.total_events, + "exploration_epoch": record.exploration_epoch, + "created_at": record.created_at, + "state_snapshot": record.state_snapshot, + })) + } + + // ── brain.resolve ───────────────────────────────────────────────────── + + async fn handle_resolve(&self, params: Value) -> Result { + #[derive(Deserialize)] + struct ResolveParams { + actor: Option, + namespace: Option, + consumer_kind: String, + } + let p: ResolveParams = serde_json::from_value(params) + .map_err(|e| RuntimeError::InvalidInput(e.to_string()))?; + + let state = self.state.lock().unwrap(); + match state.resolve(p.actor.as_deref(), p.namespace.as_deref(), &p.consumer_kind) { + Some(record) => Ok(json!({ + "resolved_profile_id": record.id, + "lifecycle": record.lifecycle, + "consumer_kind": record.consumer_kind, + })), + None => Err(RuntimeError::NotFound(format!( + "no profile resolved for consumer_kind={:?}", + p.consumer_kind + ))), + } + } + + // ── brain.activate / deactivate / archive ───────────────────────────── + + async fn handle_activate(&self, params: Value) -> Result { + self.set_lifecycle(params, ProfileLifecycle::Active).await + } + + async fn handle_deactivate(&self, params: Value) -> Result { + self.set_lifecycle(params, ProfileLifecycle::Inactive).await + } + + async fn handle_archive(&self, params: Value) -> Result { + self.set_lifecycle(params, ProfileLifecycle::Archived).await + } + + async fn set_lifecycle( + &self, + params: Value, + lifecycle: ProfileLifecycle, + ) -> Result { + #[derive(Deserialize)] + struct LifecycleParams { + profile_id: String, + } + let p: LifecycleParams = serde_json::from_value(params) + .map_err(|e| RuntimeError::InvalidInput(e.to_string()))?; + + let mut state = self.state.lock().unwrap(); + let record = state + .profiles + .get_mut(&p.profile_id) + .ok_or_else(|| RuntimeError::NotFound(format!("profile {:?}", p.profile_id)))?; + + record.lifecycle = lifecycle.clone(); + Ok(json!({ + "profile_id": p.profile_id, + "lifecycle": lifecycle, + })) + } + + // ── brain.reset ─────────────────────────────────────────────────────── + async fn handle_reset(&self, _params: Value) -> Result { let mut state = self.state.lock().unwrap(); state.reset_posteriors(); Ok(json!({ "reset": true, - "exploration_epoch": state.exploration_epoch, + "exploration_epoch": state.balanced_recall.exploration_epoch, })) } - async fn handle_emit(&self, params: Value) -> Result { + // ── brain.feedback ──────────────────────────────────────────────────── + + async fn handle_feedback( + &self, + token: &NamespaceToken, + params: Value, + ) -> Result { #[derive(Deserialize)] - struct EmitParams { + struct FeedbackParams { target_id: String, signal: String, - namespace: Option, + served_by_profile_id: Option, } - let p: EmitParams = serde_json::from_value(params) + let p: FeedbackParams = serde_json::from_value(params) .map_err(|e| RuntimeError::InvalidInput(e.to_string()))?; let target: uuid::Uuid = p @@ -214,40 +470,166 @@ impl BrainPack { } }; - let event = khive_storage::event::Event::new( - self.runtime.ns(p.namespace.as_deref()).to_string(), - "brain.emit", + let mut data = json!({"signal": signal}); + if let Some(ref profile_id) = p.served_by_profile_id { + data["served_by_profile_id"] = json!(profile_id); + } + + let event = Event::new( + token.namespace().as_str().to_string(), + "brain.feedback", + khive_types::EventKind::FeedbackExplicit, khive_types::SubstrateKind::Event, "brain", ) .with_target(target) - .with_data(json!({"signal": signal})); + .with_payload(data); - let store = self.runtime.events(p.namespace.as_deref())?; + let store = self.runtime.events(token)?; store .append_event(event.clone()) .await .map_err(|e| RuntimeError::InvalidInput(e.to_string()))?; - // Update brain state from this event + // Update balanced-recall profile state from this event let ctx = FoldContext::new(); let mut state = self.state.lock().unwrap(); - let current = std::mem::replace( - &mut *state, - BrainState::new(std::collections::HashMap::new(), 0), + let current_recall = std::mem::replace( + &mut state.balanced_recall, + crate::state::BalancedRecallState::new(0), ); - *state = self.fold.step(current, &event, &ctx); + let updated = self.fold.reduce(current_recall, &event, &ctx); + state.balanced_recall = updated; + + // Sync profile record metadata — collect values first to avoid borrow conflict. + let total_ev = state.balanced_recall.total_events; + let snap_val = serde_json::to_value(state.balanced_recall.to_snapshot()).ok(); + if let Some(record) = state.profiles.get_mut("balanced-recall-v1") { + record.total_events = total_ev; + record.state_snapshot = snap_val; + } Ok(json!({ "emitted": true, "event_id": event.id.to_string(), + "verb": "brain.feedback", "signal": signal, "target_id": target.to_string(), })) } + + // ── brain.emit (deprecated) ─────────────────────────────────────────── + + /// Deprecated: use `brain.feedback`. Kept for backward-compat; routes to + /// `handle_feedback` with the same parameters. + async fn handle_emit( + &self, + token: &NamespaceToken, + params: Value, + ) -> Result { + self.handle_feedback(token, params).await + } + + // ── brain.bind ──────────────────────────────────────────────────────── + + async fn handle_bind(&self, params: Value) -> Result { + #[derive(Deserialize)] + struct BindParams { + profile_id: String, + actor: Option, + namespace: Option, + consumer_kind: Option, + priority: Option, + } + let p: BindParams = serde_json::from_value(params) + .map_err(|e| RuntimeError::InvalidInput(e.to_string()))?; + + let mut state = self.state.lock().unwrap(); + + // Verify the profile exists + if !state.profiles.contains_key(&p.profile_id) { + return Err(RuntimeError::NotFound(format!( + "profile {:?}", + p.profile_id + ))); + } + + let actor = p.actor.unwrap_or_else(|| "*".into()); + let namespace = p.namespace.unwrap_or_else(|| "*".into()); + let consumer_kind = p.consumer_kind.unwrap_or_else(|| "*".into()); + + // Validate that '*' is not used as a real value (ADR-032 §10 wildcard sentinel) + for (field, val) in [ + ("actor", &actor), + ("namespace", &namespace), + ("consumer_kind", &consumer_kind), + ] { + if val.as_str() != "*" && val.contains('*') { + return Err(RuntimeError::InvalidInput(format!( + "{field}: '*' is reserved as the wildcard sentinel and cannot appear inside a real value" + ))); + } + } + + // Remove any existing binding for the same (actor, namespace, consumer_kind) + state.bindings.retain(|b| { + !(b.actor == actor && b.namespace == namespace && b.consumer_kind == consumer_kind) + }); + + state.bindings.push(ProfileBinding { + actor: actor.clone(), + namespace: namespace.clone(), + consumer_kind: consumer_kind.clone(), + profile_id: p.profile_id.clone(), + priority: p.priority.unwrap_or(0), + created_at: Utc::now(), + }); + + Ok(json!({ + "bound": true, + "profile_id": p.profile_id, + "actor": actor, + "namespace": namespace, + "consumer_kind": consumer_kind, + })) + } + + // ── brain.unbind ────────────────────────────────────────────────────── + + async fn handle_unbind(&self, params: Value) -> Result { + #[derive(Deserialize)] + struct UnbindParams { + profile_id: Option, + actor: Option, + namespace: Option, + consumer_kind: Option, + } + let p: UnbindParams = serde_json::from_value(params) + .map_err(|e| RuntimeError::InvalidInput(e.to_string()))?; + + let mut state = self.state.lock().unwrap(); + let before = state.bindings.len(); + + state.bindings.retain(|b| { + let pid_match = p.profile_id.as_ref().is_none_or(|id| &b.profile_id == id); + let actor_match = p.actor.as_ref().is_none_or(|a| &b.actor == a); + let ns_match = p.namespace.as_ref().is_none_or(|n| &b.namespace == n); + let kind_match = p + .consumer_kind + .as_ref() + .is_none_or(|k| &b.consumer_kind == k); + // Retain if this binding does NOT match ALL of the provided filters. + // A filter that is absent (None) matches everything — only bindings + // satisfying every supplied criterion are removed. + !(pid_match && actor_match && ns_match && kind_match) + }); + + let removed = before - state.bindings.len(); + Ok(json!({ "unbound": removed })) + } } -// ── ADR-063: inventory self-registration ───────────────────────────────────── +// ── Inventory self-registration ─────────────────────────────────────────────── struct BrainPackFactory; @@ -267,6 +649,8 @@ impl khive_runtime::PackFactory for BrainPackFactory { inventory::submit! { khive_runtime::PackRegistration(&BrainPackFactory) } +// ── PackRuntime impl ────────────────────────────────────────────────────────── + #[async_trait] impl PackRuntime for BrainPack { fn name(&self) -> &str { @@ -281,8 +665,8 @@ impl PackRuntime for BrainPack { ::ENTITY_KINDS } - fn verbs(&self) -> &'static [VerbDef] { - &BRAIN_VERBS + fn handlers(&self) -> &'static [HandlerDef] { + BRAIN_HANDLERS } fn requires(&self) -> &'static [&'static str] { @@ -294,13 +678,27 @@ impl PackRuntime for BrainPack { verb: &str, params: Value, _registry: &VerbRegistry, + token: &NamespaceToken, ) -> Result { match verb { + // Assertive "brain.state" => self.handle_state(params).await, "brain.config" => self.handle_config(params).await, - "brain.events" => self.handle_events(params).await, + "brain.events" => self.handle_events(token, params).await, + "brain.profiles" => self.handle_profiles(params).await, + "brain.profile" => self.handle_profile(params).await, + "brain.resolve" => self.handle_resolve(params).await, + // Commissive + "brain.activate" => self.handle_activate(params).await, + "brain.deactivate" => self.handle_deactivate(params).await, + "brain.archive" => self.handle_archive(params).await, "brain.reset" => self.handle_reset(params).await, - "brain.emit" => self.handle_emit(params).await, + "brain.feedback" => self.handle_feedback(token, params).await, + // Declaration + "brain.bind" => self.handle_bind(params).await, + "brain.unbind" => self.handle_unbind(params).await, + // Legacy + "brain.emit" => self.handle_emit(token, params).await, _ => Err(RuntimeError::InvalidInput(format!( "brain pack does not handle verb {verb:?}" ))), @@ -308,39 +706,40 @@ impl PackRuntime for BrainPack { } } -/// `BrainPack` as a post-dispatch hook (Issue #158). +// ── DispatchHook impl ───────────────────────────────────────────────────────── + +/// `BrainPack` as a post-dispatch hook. /// /// When registered via `VerbRegistryBuilder::with_dispatch_hook`, every /// successful verb dispatch calls `on_dispatch` with a synthesized `Event`. -/// The event is fed into `EventFold::step`, updating the brain's posteriors -/// in real time — no polling required. -/// -/// This is opt-in: the hook must be explicitly registered. Registries that do -/// not load the brain pack are unaffected. +/// The event is fed into `BalancedRecallFold::reduce`, updating the brain's +/// posteriors in real time — no polling required. #[async_trait] impl DispatchHook for BrainPack { - async fn on_dispatch(&self, event: &Event) { + async fn on_dispatch(&self, view: &EventView) { let ctx = FoldContext::new(); let mut state = self.state.lock().unwrap(); - // Replace state with fold result. BrainState is not Clone, so we - // use mem::replace with a sentinel and immediately overwrite. let current = std::mem::replace( - &mut *state, - BrainState::new(std::collections::HashMap::new(), 0), + &mut state.balanced_recall, + crate::state::BalancedRecallState::new(0), ); - *state = self.fold.step(current, event, &ctx); + let updated = self.fold.reduce(current, &view.event, &ctx); + state.balanced_recall = updated; } } +// ── Tests ───────────────────────────────────────────────────────────────────── + #[cfg(test)] mod tests { use super::*; - use khive_runtime::VerbRegistryBuilder; + use khive_runtime::{Namespace, VerbRegistryBuilder}; use serde_json::json; - fn make_pack() -> BrainPack { + fn make_pack() -> (BrainPack, KhiveRuntime) { let rt = KhiveRuntime::memory().expect("in-memory runtime"); - BrainPack::new(rt) + let pack = BrainPack::new(rt.clone()); + (pack, rt) } fn empty_registry() -> VerbRegistry { @@ -351,10 +750,15 @@ mod tests { #[tokio::test] async fn dispatch_unknown_verb_returns_invalid_input() { - let pack = make_pack(); + let (pack, rt) = make_pack(); let registry = empty_registry(); let err = pack - .dispatch("brain.unknown", json!({}), ®istry) + .dispatch( + "brain.unknown", + json!({}), + ®istry, + &rt.authorize(Namespace::local()), + ) .await .unwrap_err(); if let RuntimeError::InvalidInput(msg) = &err { @@ -369,10 +773,15 @@ mod tests { #[tokio::test] async fn dispatch_reset_returns_true_and_increments_epoch() { - let pack = make_pack(); + let (pack, rt) = make_pack(); let registry = empty_registry(); let result = pack - .dispatch("brain.reset", json!({}), ®istry) + .dispatch( + "brain.reset", + json!({}), + ®istry, + &rt.authorize(Namespace::local()), + ) .await .unwrap(); assert_eq!(result["reset"], json!(true)); @@ -380,15 +789,16 @@ mod tests { } #[tokio::test] - async fn dispatch_emit_invalid_signal_returns_invalid_input() { - let pack = make_pack(); + async fn dispatch_feedback_invalid_signal_returns_invalid_input() { + let (pack, rt) = make_pack(); let registry = empty_registry(); let target = "00000000-0000-0000-0000-000000000001"; let err = pack .dispatch( - "brain.emit", + "brain.feedback", json!({"target_id": target, "signal": "bad_signal"}), ®istry, + &rt.authorize(Namespace::local()), ) .await .unwrap_err(); @@ -408,17 +818,357 @@ mod tests { #[tokio::test] async fn dispatch_state_returns_snapshot_fields() { - let pack = make_pack(); + let (pack, rt) = make_pack(); let registry = empty_registry(); let result = pack - .dispatch("brain.state", json!({}), ®istry) + .dispatch( + "brain.state", + json!({}), + ®istry, + &rt.authorize(Namespace::local()), + ) .await .unwrap(); - assert!(result.get("total_events").is_some(), "missing total_events"); + assert!(result.get("profiles").is_some(), "missing profiles"); assert!( - result.get("exploration_epoch").is_some(), - "missing exploration_epoch" + result.get("balanced_recall").is_some(), + "missing balanced_recall" ); - assert!(result.get("parameters").is_some(), "missing parameters"); + assert!(result.get("bindings").is_some(), "missing bindings"); + } + + #[tokio::test] + async fn dispatch_profiles_returns_default_profile() { + let (pack, rt) = make_pack(); + let registry = empty_registry(); + let result = pack + .dispatch( + "brain.profiles", + json!({}), + ®istry, + &rt.authorize(Namespace::local()), + ) + .await + .unwrap(); + let profiles = result["profiles"].as_array().unwrap(); + assert!(!profiles.is_empty(), "expected at least one profile"); + assert_eq!(profiles[0]["id"], json!("balanced-recall-v1")); + } + + #[tokio::test] + async fn dispatch_profiles_filtered_by_lifecycle() { + let (pack, rt) = make_pack(); + let registry = empty_registry(); + let result = pack + .dispatch( + "brain.profiles", + json!({"lifecycle": "active"}), + ®istry, + &rt.authorize(Namespace::local()), + ) + .await + .unwrap(); + let profiles = result["profiles"].as_array().unwrap(); + for p in profiles { + assert_eq!(p["lifecycle"], json!("active")); + } + } + + #[tokio::test] + async fn dispatch_profile_returns_profile_details() { + let (pack, rt) = make_pack(); + let registry = empty_registry(); + let result = pack + .dispatch( + "brain.profile", + json!({"id": "balanced-recall-v1"}), + ®istry, + &rt.authorize(Namespace::local()), + ) + .await + .unwrap(); + assert_eq!(result["id"], json!("balanced-recall-v1")); + assert_eq!(result["state_class"], json!("Bayesian")); + assert_eq!(result["consumer_kind"], json!("recall")); + } + + #[tokio::test] + async fn dispatch_profile_not_found_returns_not_found() { + let (pack, rt) = make_pack(); + let registry = empty_registry(); + let err = pack + .dispatch( + "brain.profile", + json!({"id": "nonexistent"}), + ®istry, + &rt.authorize(Namespace::local()), + ) + .await + .unwrap_err(); + assert!(matches!(err, RuntimeError::NotFound(_))); + } + + #[tokio::test] + async fn dispatch_resolve_returns_default_profile_for_recall() { + let (pack, rt) = make_pack(); + let registry = empty_registry(); + let result = pack + .dispatch( + "brain.resolve", + json!({"consumer_kind": "recall"}), + ®istry, + &rt.authorize(Namespace::local()), + ) + .await + .unwrap(); + assert_eq!(result["resolved_profile_id"], json!("balanced-recall-v1")); + } + + #[tokio::test] + async fn dispatch_activate_and_deactivate_profile() { + let (pack, rt) = make_pack(); + let registry = empty_registry(); + let token = rt.authorize(Namespace::local()); + + // Deactivate the default profile + let result = pack + .dispatch( + "brain.deactivate", + json!({"profile_id": "balanced-recall-v1"}), + ®istry, + &token, + ) + .await + .unwrap(); + assert_eq!(result["lifecycle"], json!("inactive")); + + // Verify via brain.profile + let state = pack + .dispatch( + "brain.profile", + json!({"id": "balanced-recall-v1"}), + ®istry, + &token, + ) + .await + .unwrap(); + assert_eq!(state["lifecycle"], json!("inactive")); + + // Reactivate + let result = pack + .dispatch( + "brain.activate", + json!({"profile_id": "balanced-recall-v1"}), + ®istry, + &token, + ) + .await + .unwrap(); + assert_eq!(result["lifecycle"], json!("active")); + } + + #[tokio::test] + async fn dispatch_archive_profile() { + let (pack, rt) = make_pack(); + let registry = empty_registry(); + let result = pack + .dispatch( + "brain.archive", + json!({"profile_id": "balanced-recall-v1"}), + ®istry, + &rt.authorize(Namespace::local()), + ) + .await + .unwrap(); + assert_eq!(result["lifecycle"], json!("archived")); + } + + #[tokio::test] + async fn dispatch_activate_nonexistent_profile_returns_not_found() { + let (pack, rt) = make_pack(); + let registry = empty_registry(); + let err = pack + .dispatch( + "brain.activate", + json!({"profile_id": "ghost-profile"}), + ®istry, + &rt.authorize(Namespace::local()), + ) + .await + .unwrap_err(); + assert!(matches!(err, RuntimeError::NotFound(_))); + } + + #[tokio::test] + async fn dispatch_bind_and_resolve_explicit_binding() { + let (pack, rt) = make_pack(); + let registry = empty_registry(); + let token = rt.authorize(Namespace::local()); + + // Bind balanced-recall-v1 for actor "agent-x" + let result = pack + .dispatch( + "brain.bind", + json!({ + "profile_id": "balanced-recall-v1", + "actor": "agent-x", + "consumer_kind": "recall" + }), + ®istry, + &token, + ) + .await + .unwrap(); + assert_eq!(result["bound"], json!(true)); + assert_eq!(result["actor"], json!("agent-x")); + + // Resolve — should return the explicitly bound profile + let resolved = pack + .dispatch( + "brain.resolve", + json!({"actor": "agent-x", "consumer_kind": "recall"}), + ®istry, + &token, + ) + .await + .unwrap(); + assert_eq!(resolved["resolved_profile_id"], json!("balanced-recall-v1")); + } + + #[tokio::test] + async fn dispatch_bind_nonexistent_profile_returns_not_found() { + let (pack, rt) = make_pack(); + let registry = empty_registry(); + let err = pack + .dispatch( + "brain.bind", + json!({"profile_id": "ghost", "consumer_kind": "recall"}), + ®istry, + &rt.authorize(Namespace::local()), + ) + .await + .unwrap_err(); + assert!(matches!(err, RuntimeError::NotFound(_))); + } + + #[tokio::test] + async fn dispatch_unbind_removes_binding() { + let (pack, rt) = make_pack(); + let registry = empty_registry(); + let token = rt.authorize(Namespace::local()); + + // Add a binding + pack.dispatch( + "brain.bind", + json!({"profile_id": "balanced-recall-v1", "actor": "agent-y", "consumer_kind": "recall"}), + ®istry, + &token, + ) + .await + .unwrap(); + + // Remove it + let result = pack + .dispatch( + "brain.unbind", + json!({"actor": "agent-y"}), + ®istry, + &token, + ) + .await + .unwrap(); + assert_eq!(result["unbound"], json!(1u64)); + } + + // Regression test for MAJ-002: unbind with multiple filters must use AND semantics, + // removing only the binding that satisfies ALL supplied criteria. + #[tokio::test] + async fn dispatch_unbind_uses_and_not_or() { + let (pack, rt) = make_pack(); + let registry = empty_registry(); + let token = rt.authorize(Namespace::local()); + + // binding 1: ns=A, profile=P1 (the one we want to remove) + pack.dispatch( + "brain.bind", + json!({"profile_id": "balanced-recall-v1", "namespace": "ns-a", "consumer_kind": "recall"}), + ®istry, + &token, + ) + .await + .unwrap(); + + // binding 2: ns=B, profile=P1 (must survive) + pack.dispatch( + "brain.bind", + json!({"profile_id": "balanced-recall-v1", "namespace": "ns-b", "consumer_kind": "recall"}), + ®istry, + &token, + ) + .await + .unwrap(); + + // Unbind using both filters: only binding-1 should be removed + let result = pack + .dispatch( + "brain.unbind", + json!({"namespace": "ns-a", "profile_id": "balanced-recall-v1"}), + ®istry, + &token, + ) + .await + .unwrap(); + assert_eq!( + result["unbound"], + json!(1u64), + "should remove exactly one binding" + ); + + // binding-2 (ns-b) must still exist + let state = pack.state.lock().unwrap(); + let remaining: Vec<_> = state + .bindings + .iter() + .filter(|b| b.namespace == "ns-b") + .collect(); + assert_eq!(remaining.len(), 1, "ns-b binding must survive the unbind"); + } + + #[tokio::test] + async fn dispatch_config_all_parameters() { + let (pack, rt) = make_pack(); + let registry = empty_registry(); + let result = pack + .dispatch( + "brain.config", + json!({}), + ®istry, + &rt.authorize(Namespace::local()), + ) + .await + .unwrap(); + let obj = result.as_object().unwrap(); + assert!(obj.contains_key("recall::relevance_weight")); + assert!(obj.contains_key("recall::importance_weight")); + assert!(obj.contains_key("recall::temporal_weight")); + } + + #[tokio::test] + async fn dispatch_config_single_parameter() { + let (pack, rt) = make_pack(); + let registry = empty_registry(); + let result = pack + .dispatch( + "brain.config", + json!({"parameter": "recall::relevance_weight"}), + ®istry, + &rt.authorize(Namespace::local()), + ) + .await + .unwrap(); + assert_eq!(result["parameter"], json!("recall::relevance_weight")); + // Prior is Beta(7,3): mean = 0.7 + let mean = result["mean"].as_f64().unwrap(); + assert!((mean - 0.7).abs() < 1e-6); } } diff --git a/crates/khive-pack-brain/src/state.rs b/crates/khive-pack-brain/src/state.rs index 3d302b3a..65bb6bce 100644 --- a/crates/khive-pack-brain/src/state.rs +++ b/crates/khive-pack-brain/src/state.rs @@ -1,8 +1,11 @@ use std::collections::{HashMap, VecDeque}; +use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; use uuid::Uuid; +// ── BetaPosterior ───────────────────────────────────────────────────────────── + /// Beta-Binomial posterior for a single parameter. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct BetaPosterior { @@ -35,6 +38,16 @@ impl BetaPosterior { pub fn update_failure(&mut self) { self.beta += 1.0; } + + /// Combine evidence from two independent observers sharing the same prior. + /// + /// merged = Beta(a₁ + a₂ − a_prior, b₁ + b₂ − b_prior) + pub fn merge(&self, other: &BetaPosterior, prior: &BetaPosterior) -> BetaPosterior { + BetaPosterior { + alpha: self.alpha + other.alpha - prior.alpha, + beta: self.beta + other.beta - prior.beta, + } + } } impl Default for BetaPosterior { @@ -43,8 +56,10 @@ impl Default for BetaPosterior { } } +// ── EntityPosteriors ────────────────────────────────────────────────────────── + /// Bounded LRU map for per-entity posteriors. -/// Uses a VecDeque to track access order; evicts oldest on insert when full. +/// Uses a VecDeque to track insertion order; evicts oldest on insert when full. pub struct EntityPosteriors { map: HashMap, order: VecDeque, @@ -108,36 +123,63 @@ impl EntityPosteriors { } } -/// Runtime brain state — not directly serializable (contains LRU). -pub struct BrainState { - pub parameters: HashMap, +// ── BalancedRecallState ─────────────────────────────────────────────────────── + +/// State for the `BalancedRecallProfile` — the v1 default profile. +/// +/// Migrated from the predecessor scalar `BrainState` design (ADR-032 §5a). +/// Three-parameter Beta posteriors with informative priors + per-entity LRU. +pub struct BalancedRecallState { + /// relevance_weight — prior Beta(7,3): warm-starts expecting 70% success + pub relevance: BetaPosterior, + /// importance_weight — prior Beta(2,8) + pub importance: BetaPosterior, + /// temporal_weight — prior Beta(1,9) + pub temporal: BetaPosterior, + /// Per-entity posteriors, bounded LRU (10K default) pub entity_posteriors: EntityPosteriors, + /// Total events processed by this profile pub total_events: u64, + /// Incremented each time posteriors are reset to priors pub exploration_epoch: u64, } -impl BrainState { - pub fn new(parameters: HashMap, entity_capacity: usize) -> Self { +impl BalancedRecallState { + pub fn new(entity_capacity: usize) -> Self { Self { - parameters, + relevance: BetaPosterior::new(7.0, 3.0), + importance: BetaPosterior::new(2.0, 8.0), + temporal: BetaPosterior::new(1.0, 9.0), entity_posteriors: EntityPosteriors::new(entity_capacity), total_events: 0, exploration_epoch: 0, } } - pub fn to_snapshot(&self) -> BrainStateSnapshot { - BrainStateSnapshot { - parameters: self.parameters.clone(), + pub fn reset_posteriors(&mut self) { + self.relevance = BetaPosterior::new(7.0, 3.0); + self.importance = BetaPosterior::new(2.0, 8.0); + self.temporal = BetaPosterior::new(1.0, 9.0); + self.entity_posteriors.clear(); + self.exploration_epoch += 1; + } + + pub fn to_snapshot(&self) -> BalancedRecallSnapshot { + BalancedRecallSnapshot { + relevance: self.relevance.clone(), + importance: self.importance.clone(), + temporal: self.temporal.clone(), entity_posteriors: self.entity_posteriors.to_snapshot(), total_events: self.total_events, exploration_epoch: self.exploration_epoch, } } - pub fn from_snapshot(snapshot: BrainStateSnapshot, entity_capacity: usize) -> Self { + pub fn from_snapshot(snapshot: BalancedRecallSnapshot, entity_capacity: usize) -> Self { Self { - parameters: snapshot.parameters, + relevance: snapshot.relevance, + importance: snapshot.importance, + temporal: snapshot.temporal, entity_posteriors: EntityPosteriors::from_snapshot( snapshot.entity_posteriors, entity_capacity, @@ -146,23 +188,206 @@ impl BrainState { exploration_epoch: snapshot.exploration_epoch, } } +} + +/// Serializable snapshot of `BalancedRecallState`. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct BalancedRecallSnapshot { + pub relevance: BetaPosterior, + pub importance: BetaPosterior, + pub temporal: BetaPosterior, + pub entity_posteriors: HashMap, + pub total_events: u64, + pub exploration_epoch: u64, +} + +// ── ProfileLifecycle ────────────────────────────────────────────────────────── + +/// Lifecycle states for a registered profile (ADR-032 §10). +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ProfileLifecycle { + /// Profile code and metadata exist; not yet registered with brain. + Defined, + /// Brain knows about it; backtest-eligible. Not yet in live update loop. + Registered, + /// Live update loop running; snapshots persist. + Active, + /// Registered but no live updates. State retained; read-only. + Inactive, + /// Live updates stopped; snapshots and event log retained for audit. + Archived, +} + +// ── ProfileRecord ───────────────────────────────────────────────────────────── + +/// Profile metadata stored in the registry (ADR-032 §2). +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ProfileRecord { + pub id: String, + pub description: String, + pub consumer_kind: String, + pub state_class: String, + pub lifecycle: ProfileLifecycle, + pub created_at: DateTime, + /// Serialized state snapshot (opaque bytes to brain core) + pub state_snapshot: Option, + pub total_events: u64, + pub exploration_epoch: u64, +} + +impl ProfileRecord { + pub fn new_balanced_recall(entity_capacity: usize) -> Self { + let state = BalancedRecallState::new(entity_capacity); + let snapshot = state.to_snapshot(); + Self { + id: "balanced-recall-v1".into(), + description: "Default recall profile: three-scalar Beta posteriors (ADR-032 §5a)" + .into(), + consumer_kind: "recall".into(), + state_class: "Bayesian".into(), + lifecycle: ProfileLifecycle::Active, + created_at: Utc::now(), + state_snapshot: serde_json::to_value(snapshot).ok(), + total_events: 0, + exploration_epoch: 0, + } + } +} + +// ── ProfileBinding ──────────────────────────────────────────────────────────── + +/// One row in the profile binding table (ADR-032 §10). +/// +/// Resolution uses longest-match wins; `*` is the wildcard sentinel. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ProfileBinding { + pub actor: String, + pub namespace: String, + pub consumer_kind: String, + pub profile_id: String, + pub priority: i32, + pub created_at: DateTime, +} + +// ── BrainState (profile registry) ──────────────────────────────────────────── + +/// Runtime brain state — profile registry + active state per profile. +/// +/// ADR-032 §1: BrainState holds profile registry and lifecycle metadata. +/// Posteriors live inside each profile's own state, opaque to brain. +pub struct BrainState { + /// Registered profiles indexed by profile_id. + pub profiles: HashMap, + /// In-memory BalancedRecallState for the active default profile. + pub balanced_recall: BalancedRecallState, + /// Profile binding table — maps (actor, namespace, consumer_kind) → profile_id. + pub bindings: Vec, +} + +impl BrainState { + pub fn new(entity_capacity: usize) -> Self { + let mut profiles = HashMap::new(); + let record = ProfileRecord::new_balanced_recall(entity_capacity); + profiles.insert(record.id.clone(), record); + Self { + profiles, + balanced_recall: BalancedRecallState::new(entity_capacity), + bindings: Vec::new(), + } + } + + pub fn to_snapshot(&self) -> BrainStateSnapshot { + BrainStateSnapshot { + profiles: self.profiles.clone(), + balanced_recall: self.balanced_recall.to_snapshot(), + bindings: self.bindings.clone(), + } + } + + pub fn from_snapshot(snapshot: BrainStateSnapshot, entity_capacity: usize) -> Self { + Self { + profiles: snapshot.profiles, + balanced_recall: BalancedRecallState::from_snapshot( + snapshot.balanced_recall, + entity_capacity, + ), + bindings: snapshot.bindings, + } + } + /// Reset the balanced-recall profile posteriors to priors. pub fn reset_posteriors(&mut self) { - for posterior in self.parameters.values_mut() { - *posterior = BetaPosterior::new(1.0, 1.0); + self.balanced_recall.reset_posteriors(); + if let Some(record) = self.profiles.get_mut("balanced-recall-v1") { + record.exploration_epoch = self.balanced_recall.exploration_epoch; + record.state_snapshot = serde_json::to_value(self.balanced_recall.to_snapshot()).ok(); } - self.entity_posteriors.clear(); - self.exploration_epoch += 1; + } + + /// Resolve a profile_id for the given caller context (ADR-032 §10). + /// + /// Longest-match wins: actor + namespace + consumer_kind beats actor + consumer_kind + /// beats namespace + consumer_kind beats consumer_kind alone. Returns the + /// `balanced-recall-v1` default when no explicit binding matches. + pub fn resolve( + &self, + actor: Option<&str>, + namespace: Option<&str>, + consumer_kind: &str, + ) -> Option<&ProfileRecord> { + let actor_val = actor.unwrap_or("*"); + let namespace_val = namespace.unwrap_or("*"); + + let best = self + .bindings + .iter() + .filter(|b| { + (b.actor == "*" || b.actor == actor_val) + && (b.namespace == "*" || b.namespace == namespace_val) + && (b.consumer_kind == "*" || b.consumer_kind == consumer_kind) + }) + .max_by_key(|b| { + let actor_score = if b.actor != "*" { 4 } else { 0 }; + let ns_score = if b.namespace != "*" { 2 } else { 0 }; + let kind_score = if b.consumer_kind != "*" { 1 } else { 0 }; + ( + actor_score + ns_score + kind_score, + b.priority, + -(b.created_at.timestamp()), + ) + }); + + if let Some(binding) = best { + return self.profiles.get(&binding.profile_id); + } + + // No explicit binding — return the named default profile if it exists and is + // usable, otherwise fall through to any active profile for the consumer_kind. + // ADR-032 §10: "balanced-recall-v1" is the v1 system-default for recall. + if let Some(default) = self.profiles.get("balanced-recall-v1") { + if default.lifecycle == ProfileLifecycle::Active + && (default.consumer_kind == consumer_kind + || consumer_kind == "*" + || default.consumer_kind == "*") + { + return Some(default); + } + } + + // Generic fallback: first active profile matching consumer_kind. + self.profiles + .values() + .find(|p| p.consumer_kind == consumer_kind && p.lifecycle == ProfileLifecycle::Active) } } -/// Serializable snapshot of BrainState for persistence and inspection. +/// Serializable snapshot of the full brain state. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct BrainStateSnapshot { - pub parameters: HashMap, - pub entity_posteriors: HashMap, - pub total_events: u64, - pub exploration_epoch: u64, + pub profiles: HashMap, + pub balanced_recall: BalancedRecallSnapshot, + pub bindings: Vec, } #[cfg(test)] @@ -178,7 +403,6 @@ mod tests { #[test] fn beta_posterior_variance() { let p = BetaPosterior::new(7.0, 3.0); - // var = 7*3 / (10*10*11) = 21/1100 ≈ 0.01909 let expected = 21.0 / 1100.0; assert!((p.variance() - expected).abs() < 1e-12); } @@ -200,6 +424,17 @@ mod tests { assert!((p.mean() - 0.6).abs() < 1e-12); } + #[test] + fn beta_posterior_merge() { + let prior = BetaPosterior::new(2.0, 8.0); + let a = BetaPosterior::new(5.0, 9.0); // prior + 3 success, 1 failure + let b = BetaPosterior::new(4.0, 10.0); // prior + 2 success, 2 failure + let merged = a.merge(&b, &prior); + // merged = (5+4-2, 9+10-8) = (7, 11) + assert!((merged.alpha - 7.0).abs() < 1e-12); + assert!((merged.beta - 11.0).abs() < 1e-12); + } + #[test] fn entity_posteriors_eviction() { let mut ep = EntityPosteriors::new(3); @@ -208,7 +443,6 @@ mod tests { ep.get_or_insert(*id, BetaPosterior::default); } assert_eq!(ep.len(), 3); - // First two should be evicted assert!(ep.get(&ids[0]).is_none()); assert!(ep.get(&ids[1]).is_none()); assert!(ep.get(&ids[2]).is_some()); @@ -227,12 +461,9 @@ mod tests { } #[test] - fn brain_state_snapshot_roundtrip() { - let mut state = BrainState::new(HashMap::new(), 100); - state.parameters.insert( - "memory::relevance_weight".into(), - BetaPosterior::new(7.0, 3.0), - ); + fn balanced_recall_state_snapshot_roundtrip() { + let mut state = BalancedRecallState::new(100); + state.relevance.update_success(); state.total_events = 42; let id = Uuid::new_v4(); state @@ -242,18 +473,98 @@ mod tests { let snapshot = state.to_snapshot(); let json = serde_json::to_string(&snapshot).unwrap(); - let back: BrainStateSnapshot = serde_json::from_str(&json).unwrap(); + let back: BalancedRecallSnapshot = serde_json::from_str(&json).unwrap(); assert_eq!(back.total_events, 42); - assert!(back.parameters.contains_key("memory::relevance_weight")); + assert!((back.relevance.alpha - 8.0).abs() < 1e-12); assert!(back.entity_posteriors.contains_key(&id)); } #[test] - fn beta_posterior_default_has_uniform_prior() { - let p = BetaPosterior::default(); - assert!((p.alpha - 1.0).abs() < 1e-12); - assert!((p.beta - 1.0).abs() < 1e-12); - assert!((p.mean() - 0.5).abs() < 1e-12); + fn balanced_recall_state_reset_preserves_epoch_increment() { + let mut state = BalancedRecallState::new(10); + state.total_events = 100; + state.reset_posteriors(); + assert_eq!(state.total_events, 100); + assert_eq!(state.exploration_epoch, 1); + assert!((state.relevance.alpha - 7.0).abs() < 1e-12); + assert!((state.relevance.beta - 3.0).abs() < 1e-12); + } + + #[test] + fn brain_state_has_balanced_recall_profile_by_default() { + let state = BrainState::new(100); + assert!(state.profiles.contains_key("balanced-recall-v1")); + let record = &state.profiles["balanced-recall-v1"]; + assert_eq!(record.lifecycle, ProfileLifecycle::Active); + assert_eq!(record.consumer_kind, "recall"); + assert_eq!(record.state_class, "Bayesian"); + } + + #[test] + fn brain_state_reset_posteriors_updates_record() { + let mut state = BrainState::new(10); + state.balanced_recall.relevance.update_success(); + state.balanced_recall.total_events = 50; + state.reset_posteriors(); + assert_eq!(state.balanced_recall.exploration_epoch, 1); + let record = &state.profiles["balanced-recall-v1"]; + assert_eq!(record.exploration_epoch, 1); + } + + #[test] + fn brain_state_resolve_falls_back_to_default() { + let state = BrainState::new(100); + let resolved = state.resolve(None, None, "recall"); + assert!(resolved.is_some()); + assert_eq!(resolved.unwrap().id, "balanced-recall-v1"); + } + + #[test] + fn brain_state_resolve_uses_explicit_binding() { + let mut state = BrainState::new(100); + // Add a second profile + let mut alt = ProfileRecord::new_balanced_recall(100); + alt.id = "alt-profile".into(); + state.profiles.insert("alt-profile".into(), alt); + + // Bind alt-profile for actor "agent-1" + state.bindings.push(ProfileBinding { + actor: "agent-1".into(), + namespace: "*".into(), + consumer_kind: "recall".into(), + profile_id: "alt-profile".into(), + priority: 0, + created_at: Utc::now(), + }); + + let resolved = state.resolve(Some("agent-1"), None, "recall"); + assert!(resolved.is_some()); + assert_eq!(resolved.unwrap().id, "alt-profile"); + + // Different actor falls back to default + let resolved_other = state.resolve(Some("agent-2"), None, "recall"); + assert_eq!(resolved_other.unwrap().id, "balanced-recall-v1"); + } + + // Regression test for MAJ-005: an archived default profile must NOT be returned + // by resolve (ADR-032 §10: "Archived … NOT resolvable for live recall"). + #[test] + fn brain_state_resolve_skips_archived_default() { + let mut state = BrainState::new(100); + + // Archive the built-in default + state + .profiles + .get_mut("balanced-recall-v1") + .expect("default profile always exists") + .lifecycle = ProfileLifecycle::Archived; + + // No explicit binding → must not return the archived default + let resolved = state.resolve(None, None, "recall"); + assert!( + resolved.is_none(), + "archived default profile must not be returned by resolve" + ); } #[test] @@ -273,47 +584,43 @@ mod tests { } #[test] - fn brain_state_from_snapshot_roundtrip() { - let mut params = HashMap::new(); - params.insert( - "recall::relevance_weight".into(), - BetaPosterior::new(7.0, 3.0), - ); - let mut state = BrainState::new(params, 100); - state.total_events = 55; - state.exploration_epoch = 2; + fn brain_state_snapshot_roundtrip() { + let mut state = BrainState::new(100); + state.balanced_recall.relevance.update_success(); + state.balanced_recall.total_events = 55; + state.balanced_recall.exploration_epoch = 2; let id = Uuid::new_v4(); state + .balanced_recall .entity_posteriors .get_or_insert(id, || BetaPosterior::new(4.0, 6.0)) .update_success(); let snap1 = state.to_snapshot(); - let restored = BrainState::from_snapshot(snap1.clone(), 100); + let restored = BrainState::from_snapshot(snap1, 100); let snap2 = restored.to_snapshot(); - assert_eq!(snap2.total_events, 55); - assert_eq!(snap2.exploration_epoch, 2); - let p = &snap2.parameters["recall::relevance_weight"]; - assert!((p.alpha - 7.0).abs() < 1e-12); - assert!((p.beta - 3.0).abs() < 1e-12); - let ep = snap2.entity_posteriors.get(&id).unwrap(); - // default 4+1=5 alpha (update_success on 4.0), beta stays 6.0 + assert_eq!(snap2.balanced_recall.total_events, 55); + assert_eq!(snap2.balanced_recall.exploration_epoch, 2); + assert!((snap2.balanced_recall.relevance.alpha - 8.0).abs() < 1e-12); + let ep = snap2.balanced_recall.entity_posteriors.get(&id).unwrap(); assert!((ep.alpha - 5.0).abs() < 1e-12); assert!((ep.beta - 6.0).abs() < 1e-12); } #[test] - fn reset_posteriors_preserves_event_count() { - let mut params = HashMap::new(); - params.insert("test".into(), BetaPosterior::new(7.0, 3.0)); - let mut state = BrainState::new(params, 10); - state.total_events = 100; - state.reset_posteriors(); - assert_eq!(state.total_events, 100); - assert_eq!(state.exploration_epoch, 1); - let p = &state.parameters["test"]; + fn profile_lifecycle_serde_roundtrip() { + let lc = ProfileLifecycle::Active; + let json = serde_json::to_string(&lc).unwrap(); + let back: ProfileLifecycle = serde_json::from_str(&json).unwrap(); + assert_eq!(back, ProfileLifecycle::Active); + } + + #[test] + fn beta_posterior_default_has_uniform_prior() { + let p = BetaPosterior::default(); assert!((p.alpha - 1.0).abs() < 1e-12); assert!((p.beta - 1.0).abs() < 1e-12); + assert!((p.mean() - 0.5).abs() < 1e-12); } } diff --git a/crates/khive-pack-brain/src/tunable.rs b/crates/khive-pack-brain/src/tunable.rs index 9a4f2c52..a65857f5 100644 --- a/crates/khive-pack-brain/src/tunable.rs +++ b/crates/khive-pack-brain/src/tunable.rs @@ -3,13 +3,16 @@ use khive_runtime::RuntimeError; use serde::{Deserialize, Serialize}; use serde_json::Value; -use crate::state::{BetaPosterior, BrainState}; +use crate::state::{BalancedRecallState, BetaPosterior}; /// Packs that want auto-tuning implement this trait. +/// /// The brain discovers tunable packs at startup via the PackRegistry. +/// `project_config` now receives a `BalancedRecallState` — the v1 profile +/// state — rather than the old flat `BrainState` scalar map. pub trait PackTunable: PackRuntime { fn parameter_space(&self) -> ParameterSpace; - fn project_config(&self, state: &BrainState) -> Value; + fn project_config(&self, state: &BalancedRecallState) -> Value; fn apply_config(&self, config: Value) -> Result<(), RuntimeError>; } diff --git a/crates/khive-pack-brain/tests/dispatch_hook.rs b/crates/khive-pack-brain/tests/dispatch_hook.rs index 6f976f52..0b12dfe4 100644 --- a/crates/khive-pack-brain/tests/dispatch_hook.rs +++ b/crates/khive-pack-brain/tests/dispatch_hook.rs @@ -1,10 +1,8 @@ //! End-to-end tests for `BrainPack` as a `DispatchHook` (issue #158). //! -//! The audit (parallel opus pass) found that the unit tests covered the -//! DispatchHook trait via mock hooks (`CountingHook` / `NsCapturingHook`) but -//! never wired the real `BrainPack` into a registry. These tests close that -//! gap: register a `BrainPack` as the dispatch hook, fire a verb through the -//! KG pack, and verify the brain's posteriors actually updated. +//! Per ADR-032, `BrainState` now holds a profile registry; the BalancedRecall +//! profile's `total_events` counter lives in `snapshot.balanced_recall.total_events`. +//! These tests verify the dispatch hook still drives the BalancedRecallFold. use std::sync::Arc; @@ -42,20 +40,17 @@ async fn brain_pack_dispatch_hook_records_real_dispatch_events() { .await .expect("create entity must succeed"); - // Every successful dispatch increments BrainState.total_events via - // EventFold::step. That counter is the brain's lowest-common-denominator - // observation — it's incremented regardless of whether the event matches - // a recall-specific or entity-specific signal (those drive parameter - // posteriors). If the hook never fired, the counter would stay at baseline. + // Every successful dispatch increments BalancedRecallState.total_events via + // BalancedRecallFold::reduce. If the hook never fired, the counter stays at + // baseline. let after = brain.snapshot(); assert_eq!( - after.total_events, - baseline.total_events + 1, + after.balanced_recall.total_events, + baseline.balanced_recall.total_events + 1, "#158 regression: total_events did not advance after a successful KG \ - verb dispatch. Hook is wired (audit) but evidently no event reached \ - the fold. baseline={}, after={}", - baseline.total_events, - after.total_events, + verb dispatch. baseline={}, after={}", + baseline.balanced_recall.total_events, + after.balanced_recall.total_events, ); // Fire two more successful dispatches and verify the counter advances by @@ -75,11 +70,11 @@ async fn brain_pack_dispatch_hook_records_real_dispatch_events() { } let final_state = brain.snapshot(); assert_eq!( - final_state.total_events, - baseline.total_events + 3, + final_state.balanced_recall.total_events, + baseline.balanced_recall.total_events + 3, "hook must fire once per successful dispatch: expected {}+3 events, got {}", - baseline.total_events, - final_state.total_events, + baseline.balanced_recall.total_events, + final_state.balanced_recall.total_events, ); } @@ -101,12 +96,16 @@ async fn brain_pack_hook_does_not_fire_on_unknown_verb() { let _ = registry.dispatch("frobnicate_nonexistent", json!({})).await; let after = brain.snapshot(); - // The verb errored, so parameters should be identical to baseline. + // The verb errored, so BalancedRecallState.total_events must be unchanged. assert_eq!( - after.parameters.len(), - baseline.parameters.len(), - "unknown verb must NOT change brain state — got {} params, baseline had {}", - after.parameters.len(), - baseline.parameters.len() + after.balanced_recall.total_events, baseline.balanced_recall.total_events, + "unknown verb must NOT change brain state — got {}, baseline had {}", + after.balanced_recall.total_events, baseline.balanced_recall.total_events, + ); + // The profile registry is also unchanged + assert_eq!( + after.profiles.len(), + baseline.profiles.len(), + "profile registry must not change on failed dispatch" ); } diff --git a/crates/khive-pack-comm/Cargo.toml b/crates/khive-pack-comm/Cargo.toml new file mode 100644 index 00000000..6bb52626 --- /dev/null +++ b/crates/khive-pack-comm/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "khive-pack-comm" +version.workspace = true +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true +homepage.workspace = true +keywords.workspace = true +categories.workspace = true +description = "Communication pack — inter-agent messaging (send, inbox, read, reply) (ADR-040)" + +[dependencies] +khive-types = { version = "0.2.1", path = "../khive-types", features = ["serde"] } +khive-runtime = { version = "0.2.1", path = "../khive-runtime" } +khive-storage = { version = "0.2.1", path = "../khive-storage" } +inventory = { workspace = true } +async-trait = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +uuid = { workspace = true } +chrono = { workspace = true } +tracing = { workspace = true } + +[dev-dependencies] +tokio = { workspace = true, features = ["test-util"] } +khive-pack-kg = { version = "0.2.1", path = "../khive-pack-kg" } diff --git a/crates/khive-pack-comm/src/handlers.rs b/crates/khive-pack-comm/src/handlers.rs new file mode 100644 index 00000000..c4b5e519 --- /dev/null +++ b/crates/khive-pack-comm/src/handlers.rs @@ -0,0 +1,313 @@ +//! Verb handler implementations for the comm pack (ADR-040). +//! +//! All four verbs (`send`, `inbox`, `read`, `reply`) store and query `message` +//! notes in the standard notes table. Message-specific metadata lives in the +//! `properties` JSON column; `content` is the message body. + +use chrono::Utc; +use serde::Deserialize; +use serde_json::{json, Value}; +use uuid::Uuid; + +use khive_runtime::{KhiveRuntime, NamespaceToken, RuntimeError}; +use khive_storage::note::Note; + +fn short_id(uuid: Uuid) -> String { + uuid.as_hyphenated().to_string().chars().take(8).collect() +} + +fn note_to_message_json(note: &Note) -> Value { + json!({ + "id": short_id(note.id), + "full_id": note.id, + "kind": "message", + "content": note.content, + "namespace": note.namespace, + "properties": note.properties, + "created_at": note.created_at, + "updated_at": note.updated_at, + }) +} + +// ── param structs ──────────────────────────────────────────────────────────── + +#[derive(Deserialize)] +pub(crate) struct SendParams { + pub to: String, + pub content: String, + #[serde(default)] + pub subject: Option, + #[serde(default)] + pub thread_id: Option, +} + +#[derive(Deserialize)] +pub(crate) struct InboxParams { + #[serde(default)] + pub limit: Option, + #[serde(default)] + pub status: Option, +} + +#[derive(Deserialize)] +pub(crate) struct ReadParams { + pub id: String, +} + +#[derive(Deserialize)] +pub(crate) struct ReplyParams { + pub id: String, + pub content: String, +} + +fn deser(params: Value) -> Result { + serde_json::from_value(params) + .map_err(|e| RuntimeError::InvalidInput(format!("bad params: {e}"))) +} + +// ── handlers ───────────────────────────────────────────────────────────────── + +/// `send` — create a message note in the caller's namespace (ADR-040 §send). +pub(crate) async fn handle_send( + runtime: &KhiveRuntime, + token: &NamespaceToken, + params: Value, +) -> Result { + let p: SendParams = deser(params)?; + if p.to.trim().is_empty() { + return Err(RuntimeError::InvalidInput( + "send: `to` must not be empty".into(), + )); + } + if p.content.trim().is_empty() { + return Err(RuntimeError::InvalidInput( + "send: `content` must not be empty".into(), + )); + } + + let from = token.namespace().as_str().to_string(); + let sent_at = Utc::now().to_rfc3339(); + + let properties = json!({ + "from": from, + "to": p.to, + "direction": "outbound", + "subject": p.subject, + "thread_id": p.thread_id, + "read": false, + "sent_at": sent_at, + }); + + let note = runtime + .create_note( + token, + "message", + p.subject.as_deref(), + &p.content, + None, + Some(properties), + Vec::new(), + ) + .await?; + + Ok(json!({ + "id": short_id(note.id), + "full_id": note.id, + "from": from, + "to": p.to, + "subject": p.subject, + "sent_at": sent_at, + })) +} + +/// `inbox` — list inbound messages for the caller namespace (ADR-040 §inbox). +pub(crate) async fn handle_inbox( + runtime: &KhiveRuntime, + token: &NamespaceToken, + params: Value, +) -> Result { + let p: InboxParams = deser(params)?; + let limit = p.limit.unwrap_or(20).clamp(1, 200); + let status = p.status.as_deref().unwrap_or("unread"); + + // Pull a broad window and filter in-memory for direction + read status. + let notes = runtime + .list_notes(token, Some("message"), limit * 4, 0) + .await?; + + let messages: Vec = notes + .iter() + .filter(|n| n.deleted_at.is_none()) + .filter(|n| { + let props = n.properties.as_ref(); + let direction = props + .and_then(|p| p.get("direction")) + .and_then(Value::as_str); + if direction != Some("inbound") { + return false; + } + let read = props + .and_then(|p| p.get("read")) + .and_then(Value::as_bool) + .unwrap_or(false); + match status { + "unread" => !read, + "read" => read, + _ => true, // "all" + } + }) + .take(limit as usize) + .map(note_to_message_json) + .collect(); + + let count = messages.len(); + Ok(json!({ "messages": messages, "count": count })) +} + +/// `read` — mark a message as read (ADR-040 §read). +pub(crate) async fn handle_read( + runtime: &KhiveRuntime, + token: &NamespaceToken, + params: Value, +) -> Result { + let p: ReadParams = deser(params)?; + let id = Uuid::parse_str(&p.id) + .map_err(|_| RuntimeError::InvalidInput(format!("read: invalid UUID {:?}", p.id)))?; + + let store = runtime.notes(token)?; + let mut note = store + .get_note(id) + .await + .map_err(|e| RuntimeError::Internal(format!("read: get_note: {e}")))? + .ok_or_else(|| RuntimeError::NotFound(format!("read: message {id} not found")))?; + + if note.namespace != token.namespace().as_str() { + return Err(RuntimeError::NotFound(format!( + "read: message {id} not found" + ))); + } + if note.kind != "message" { + return Err(RuntimeError::InvalidInput(format!( + "read: note {id} is kind {:?}, expected \"message\"", + note.kind + ))); + } + + // Merge `read: true` into properties. + let mut props = note.properties.clone().unwrap_or_else(|| json!({})); + props["read"] = json!(true); + note.properties = Some(props.clone()); + note.updated_at = Utc::now().timestamp_micros(); + + store + .upsert_note(note) + .await + .map_err(|e| RuntimeError::Internal(format!("read: upsert_note: {e}")))?; + + Ok(json!({ "id": short_id(id), "full_id": id, "read": true, "properties": props })) +} + +/// `reply` — reply to a message, threading linkage (ADR-040 §reply). +pub(crate) async fn handle_reply( + runtime: &KhiveRuntime, + token: &NamespaceToken, + params: Value, +) -> Result { + let p: ReplyParams = deser(params)?; + let id = Uuid::parse_str(&p.id) + .map_err(|_| RuntimeError::InvalidInput(format!("reply: invalid UUID {:?}", p.id)))?; + if p.content.trim().is_empty() { + return Err(RuntimeError::InvalidInput( + "reply: `content` must not be empty".into(), + )); + } + + let store = runtime.notes(token)?; + let original = store + .get_note(id) + .await + .map_err(|e| RuntimeError::Internal(format!("reply: get_note: {e}")))? + .ok_or_else(|| RuntimeError::NotFound(format!("reply: message {id} not found")))?; + + if original.namespace != token.namespace().as_str() { + return Err(RuntimeError::NotFound(format!( + "reply: message {id} not found" + ))); + } + if original.kind != "message" { + return Err(RuntimeError::InvalidInput(format!( + "reply: note {id} is kind {:?}, expected \"message\"", + original.kind + ))); + } + + let orig_props = original + .properties + .as_ref() + .cloned() + .unwrap_or_else(|| json!({})); + + // Thread root: use the original's thread_id if set, else the original's own UUID. + let thread_id = orig_props + .get("thread_id") + .and_then(Value::as_str) + .map(str::to_string) + .unwrap_or_else(|| id.to_string()); + + let original_sender = orig_props + .get("from") + .and_then(Value::as_str) + .unwrap_or("") + .to_string(); + + let original_subject = orig_props + .get("subject") + .and_then(Value::as_str) + .unwrap_or("") + .to_string(); + + let reply_subject = if original_subject.starts_with("Re: ") || original_subject.is_empty() { + original_subject.clone() + } else { + format!("Re: {original_subject}") + }; + + let from = token.namespace().as_str().to_string(); + let sent_at = Utc::now().to_rfc3339(); + + let properties = json!({ + "from": from, + "to": original_sender, + "direction": "outbound", + "subject": reply_subject, + "thread_id": thread_id, + "read": false, + "sent_at": sent_at, + }); + + let reply_note = runtime + .create_note( + token, + "message", + if reply_subject.is_empty() { + None + } else { + Some(reply_subject.as_str()) + }, + &p.content, + None, + Some(properties), + Vec::new(), + ) + .await?; + + Ok(json!({ + "id": short_id(reply_note.id), + "full_id": reply_note.id, + "thread_id": thread_id, + "from": from, + "to": original_sender, + "subject": reply_subject, + "sent_at": sent_at, + })) +} diff --git a/crates/khive-pack-comm/src/lib.rs b/crates/khive-pack-comm/src/lib.rs new file mode 100644 index 00000000..6c11f1d7 --- /dev/null +++ b/crates/khive-pack-comm/src/lib.rs @@ -0,0 +1,110 @@ +//! pack-comm — Communication pack (ADR-040). +pub mod handlers; + +use async_trait::async_trait; +use serde_json::Value; + +use khive_runtime::pack::PackRuntime; +use khive_runtime::{KhiveRuntime, NamespaceToken, RuntimeError, VerbRegistry}; +use khive_types::{HandlerDef, Pack, Visibility}; + +pub struct CommPack { + runtime: KhiveRuntime, +} + +impl Pack for CommPack { + const NAME: &'static str = "comm"; + const NOTE_KINDS: &'static [&'static str] = &["message"]; + const ENTITY_KINDS: &'static [&'static str] = &[]; + const HANDLERS: &'static [HandlerDef] = &COMM_HANDLERS; + const REQUIRES: &'static [&'static str] = &["kg"]; +} + +static COMM_HANDLERS: [HandlerDef; 4] = [ + HandlerDef { + name: "send", + description: "Send a message, optionally threaded.", + visibility: Visibility::Verb, + category: khive_types::VerbCategory::Directive, + }, + HandlerDef { + name: "inbox", + description: "List inbound messages for the caller.", + visibility: Visibility::Verb, + category: khive_types::VerbCategory::Assertive, + }, + HandlerDef { + name: "read", + description: "Mark a message as read.", + visibility: Visibility::Verb, + category: khive_types::VerbCategory::Declaration, + }, + HandlerDef { + name: "reply", + description: "Reply to a message, threading linkage.", + visibility: Visibility::Verb, + category: khive_types::VerbCategory::Directive, + }, +]; + +impl CommPack { + pub fn new(runtime: KhiveRuntime) -> Self { + Self { runtime } + } + pub(crate) fn runtime(&self) -> &KhiveRuntime { + &self.runtime + } +} + +struct CommPackFactory; + +impl khive_runtime::PackFactory for CommPackFactory { + fn name(&self) -> &'static str { + "comm" + } + fn requires(&self) -> &'static [&'static str] { + &["kg"] + } + fn create(&self, runtime: KhiveRuntime) -> Box { + Box::new(CommPack::new(runtime)) + } +} + +inventory::submit! { khive_runtime::PackRegistration(&CommPackFactory) } + +#[async_trait] +impl PackRuntime for CommPack { + fn name(&self) -> &str { + ::NAME + } + fn note_kinds(&self) -> &'static [&'static str] { + ::NOTE_KINDS + } + fn entity_kinds(&self) -> &'static [&'static str] { + ::ENTITY_KINDS + } + fn handlers(&self) -> &'static [HandlerDef] { + &COMM_HANDLERS + } + fn requires(&self) -> &'static [&'static str] { + ::REQUIRES + } + + async fn dispatch( + &self, + verb: &str, + params: Value, + _registry: &VerbRegistry, + token: &NamespaceToken, + ) -> Result { + match verb { + "send" => handlers::handle_send(self.runtime(), token, params).await, + "inbox" => handlers::handle_inbox(self.runtime(), token, params).await, + "read" => handlers::handle_read(self.runtime(), token, params).await, + "reply" => handlers::handle_reply(self.runtime(), token, params).await, + _ => Err(RuntimeError::InvalidInput(format!( + "comm pack does not handle verb {verb:?}" + ))), + } + } +} diff --git a/crates/khive-pack-comm/tests/integration.rs b/crates/khive-pack-comm/tests/integration.rs new file mode 100644 index 00000000..d0bbd5ef --- /dev/null +++ b/crates/khive-pack-comm/tests/integration.rs @@ -0,0 +1,154 @@ +//! Smoke tests for the comm pack (ADR-040). + +use khive_pack_comm::CommPack; +use khive_runtime::{KhiveRuntime, VerbRegistry, VerbRegistryBuilder}; +use khive_types::Pack; + +fn build_registry() -> (VerbRegistry, KhiveRuntime) { + let runtime = KhiveRuntime::memory().expect("in-memory runtime"); + let mut builder = VerbRegistryBuilder::new(); + builder.register(khive_pack_kg::KgPack::new(runtime.clone())); + builder.register(CommPack::new(runtime.clone())); + let registry = builder.build().expect("registry builds"); + (registry, runtime) +} + +#[test] +fn comm_pack_declares_message_note_kind() { + assert!(CommPack::NOTE_KINDS.contains(&"message")); +} + +#[test] +fn comm_pack_declares_four_handlers() { + assert_eq!(CommPack::HANDLERS.len(), 4); + let names: Vec<&str> = CommPack::HANDLERS.iter().map(|h| h.name).collect(); + assert!(names.contains(&"send")); + assert!(names.contains(&"inbox")); + assert!(names.contains(&"read")); + assert!(names.contains(&"reply")); +} + +#[test] +fn comm_pack_requires_kg() { + assert_eq!(CommPack::REQUIRES, &["kg"]); +} + +#[tokio::test] +async fn send_and_inbox_roundtrip() { + let (registry, _rt) = build_registry(); + + // Send a message — creates an outbound message note. + let result = registry + .dispatch( + "send", + serde_json::json!({ "to": "agent:bob", "content": "hello" }), + ) + .await + .expect("send succeeds"); + assert!(result.get("id").is_some(), "send returns id: {result}"); + + // Inbox with status=all returns the sent message (outbound notes are not listed by default). + let inbox = registry + .dispatch("inbox", serde_json::json!({ "status": "all", "limit": 10 })) + .await + .expect("inbox succeeds"); + // We sent an outbound message; inbox only lists inbound by default. + // status=all also includes outbound, but direction filter still applies. + // The test verifies inbox runs without error; count may be 0 for outbound. + assert!(inbox.get("count").is_some(), "inbox returns count: {inbox}"); +} + +#[tokio::test] +async fn read_marks_message_as_read() { + let (registry, _rt) = build_registry(); + + // Send a message and capture the full_id. + let sent = registry + .dispatch( + "send", + serde_json::json!({ "to": "agent:alice", "content": "mark me read" }), + ) + .await + .expect("send succeeds"); + let full_id = sent + .get("full_id") + .and_then(|v| v.as_str()) + .expect("send returns full_id"); + + // Call read with the full UUID — must succeed and return read: true. + let result = registry + .dispatch("read", serde_json::json!({ "id": full_id })) + .await + .expect("read succeeds"); + assert_eq!( + result.get("read").and_then(|v| v.as_bool()), + Some(true), + "read returns read:true — got {result}" + ); + assert_eq!( + result.get("full_id").and_then(|v| v.as_str()), + Some(full_id), + "read returns the same message id" + ); +} + +#[tokio::test] +async fn reply_creates_threaded_message() { + let (registry, _rt) = build_registry(); + + // Send the original message. + let original = registry + .dispatch( + "send", + serde_json::json!({ + "to": "agent:carol", + "content": "original message", + "subject": "Hello" + }), + ) + .await + .expect("send original succeeds"); + let original_full_id = original + .get("full_id") + .and_then(|v| v.as_str()) + .expect("send returns full_id"); + + // Reply to the original message. + let reply = registry + .dispatch( + "reply", + serde_json::json!({ + "id": original_full_id, + "content": "this is a reply" + }), + ) + .await + .expect("reply succeeds"); + + // reply must return an id (the new message). + assert!(reply.get("id").is_some(), "reply returns id: {reply}"); + // thread_id must be set to the original message's UUID. + assert_eq!( + reply.get("thread_id").and_then(|v| v.as_str()), + Some(original_full_id), + "reply thread_id matches original full_id: {reply}" + ); + // subject should be prefixed with "Re: ". + assert_eq!( + reply.get("subject").and_then(|v| v.as_str()), + Some("Re: Hello"), + "reply subject is prefixed with Re: — got {reply}" + ); +} + +#[tokio::test] +async fn unknown_verb_returns_error() { + let (registry, _rt) = build_registry(); + let err = registry + .dispatch("comm.does_not_exist", serde_json::Value::Null) + .await + .unwrap_err(); + assert!( + err.to_string().contains("comm.does_not_exist") || err.to_string().contains("unknown verb") + ); +} diff --git a/crates/khive-pack-gtd/Cargo.toml b/crates/khive-pack-gtd/Cargo.toml index 9636f2ec..9a07f7c4 100644 --- a/crates/khive-pack-gtd/Cargo.toml +++ b/crates/khive-pack-gtd/Cargo.toml @@ -11,10 +11,10 @@ categories.workspace = true description = "GTD verb pack — task lifecycle (assign/next/complete/transition) over the notes substrate" [dependencies] -khive-types = { version = "0.2.0", path = "../khive-types", features = ["serde"] } -khive-runtime = { version = "0.2.0", path = "../khive-runtime" } +khive-types = { version = "0.2.1", path = "../khive-types", features = ["serde"] } +khive-runtime = { version = "0.2.1", path = "../khive-runtime" } inventory = { workspace = true } -khive-storage = { version = "0.2.0", path = "../khive-storage" } +khive-storage = { version = "0.2.1", path = "../khive-storage" } async-trait = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } @@ -24,4 +24,4 @@ tracing = { workspace = true } [dev-dependencies] tokio = { workspace = true, features = ["test-util"] } -khive-pack-kg = { version = "0.2.0", path = "../khive-pack-kg" } +khive-pack-kg = { version = "0.2.1", path = "../khive-pack-kg" } diff --git a/crates/khive-pack-gtd/src/handlers.rs b/crates/khive-pack-gtd/src/handlers.rs index f8c57dc1..d79fd5e0 100644 --- a/crates/khive-pack-gtd/src/handlers.rs +++ b/crates/khive-pack-gtd/src/handlers.rs @@ -10,7 +10,8 @@ use serde::Deserialize; use serde_json::{json, Value}; use uuid::Uuid; -use khive_runtime::{KhiveRuntime, Resolved, RuntimeError}; +use khive_runtime::{KhiveRuntime, NamespaceToken, Resolved, RuntimeError}; +use khive_storage::types::{SqlStatement, SqlValue}; use khive_storage::EdgeRelation; use crate::schema::{ @@ -19,11 +20,86 @@ use crate::schema::{ }; use crate::GtdPack; +// ── lifecycle audit schema (ADR-019 §schema_plan) ─────────────────────────── + +/// Ensure `gtd_lifecycle_audit` and its index exist on the given runtime. +/// +/// Idempotent (`CREATE TABLE IF NOT EXISTS`). Applied lazily on the first +/// `transition` or `complete` call. Logs a warning and continues if the DDL +/// fails (e.g. read-only replica) — the audit is best-effort, not load-bearing. +/// +/// We intentionally apply the DDL on each call rather than using a global +/// `OnceLock`, because each `KhiveRuntime::memory()` in tests creates a fresh +/// in-memory database that needs its own schema bootstrap. In production the +/// DDL is idempotent and cheap (SQLite skips `IF NOT EXISTS` tables instantly). +async fn ensure_audit_schema(runtime: &KhiveRuntime) { + let script = crate::GTD_SCHEMA_PLAN_STMTS.join(";"); + match runtime.sql().writer().await { + Ok(mut w) => { + if let Err(e) = w.execute_script(script).await { + tracing::warn!(error = %e, "gtd: failed to apply lifecycle_audit schema (non-fatal)"); + } + } + Err(e) => { + tracing::warn!(error = %e, "gtd: failed to acquire SQL writer for audit schema (non-fatal)"); + } + } +} + +/// Append one row to `gtd_lifecycle_audit`. +/// +/// Best-effort: failures are logged and swallowed. The note's successful +/// write has already happened; a missing audit row is degraded, not a failure. +async fn write_audit_record( + runtime: &KhiveRuntime, + note_id: Uuid, + from: &str, + to: &str, + transition_note: Option<&str>, +) { + let now = Utc::now().timestamp_micros(); + let stmt = SqlStatement { + sql: "INSERT INTO gtd_lifecycle_audit (note_id, from_state, to_state, note, at) \ + VALUES (?1, ?2, ?3, ?4, ?5)" + .into(), + params: vec![ + SqlValue::Text(note_id.as_hyphenated().to_string()), + SqlValue::Text(from.to_string()), + SqlValue::Text(to.to_string()), + match transition_note { + Some(n) => SqlValue::Text(n.to_string()), + None => SqlValue::Null, + }, + SqlValue::Integer(now), + ], + label: Some("gtd_audit".into()), + }; + match runtime.sql().writer().await { + Ok(mut w) => { + if let Err(e) = w.execute(stmt).await { + tracing::warn!( + note_id = %note_id, + from, + to, + error = %e, + "gtd: audit write failed (non-fatal)" + ); + } + } + Err(e) => { + tracing::warn!( + note_id = %note_id, + error = %e, + "gtd: failed to acquire SQL writer for audit write (non-fatal)" + ); + } + } +} + // ── param structs ──────────────────────────────────────────────────────────── #[derive(Deserialize)] struct AssignParams { - namespace: Option, title: String, #[serde(default)] description: Option, @@ -47,7 +123,6 @@ struct AssignParams { #[derive(Deserialize)] struct NextParams { - namespace: Option, #[serde(default)] limit: Option, #[serde(default)] @@ -56,7 +131,6 @@ struct NextParams { #[derive(Deserialize)] struct CompleteParams { - namespace: Option, id: String, #[serde(default)] result: Option, @@ -64,7 +138,6 @@ struct CompleteParams { #[derive(Deserialize)] struct TasksParams { - namespace: Option, #[serde(default)] status: Option, #[serde(default)] @@ -79,7 +152,6 @@ struct TasksParams { #[derive(Deserialize)] struct TransitionParams { - namespace: Option, id: String, status: String, #[serde(default)] @@ -100,13 +172,13 @@ fn short_id(uuid: Uuid) -> String { pub(crate) async fn resolve_uuid( s: &str, runtime: &KhiveRuntime, - namespace: Option<&str>, + token: &NamespaceToken, ) -> Result { if let Ok(uuid) = Uuid::from_str(s) { return Ok(uuid); } if s.len() >= 8 && s.chars().all(|c| c.is_ascii_hexdigit()) { - return match runtime.resolve_prefix(namespace, s).await? { + return match runtime.resolve_prefix(token, s).await? { Some(uuid) => Ok(uuid), None => Err(RuntimeError::InvalidInput(format!( "no record matches prefix: {s:?}" @@ -190,12 +262,12 @@ fn ts_to_rfc(micros: i64) -> String { /// actually `kind = "task"`. Used by `complete` and `transition`. async fn load_task( runtime: &KhiveRuntime, - namespace: Option<&str>, + token: &NamespaceToken, raw_id: &str, ) -> Result<(khive_storage::note::Note, String), RuntimeError> { - let uuid = resolve_uuid(raw_id, runtime, namespace).await?; - let ns = runtime.ns(namespace); - let store = runtime.notes(namespace)?; + let uuid = resolve_uuid(raw_id, runtime, token).await?; + let ns = token.namespace().as_str(); + let store = runtime.notes(token)?; let note = store .get_note(uuid) .await @@ -222,7 +294,11 @@ async fn load_task( // ── handlers ───────────────────────────────────────────────────────────────── impl GtdPack { - pub(crate) async fn handle_assign(&self, params: Value) -> Result { + pub(crate) async fn handle_assign( + &self, + token: &NamespaceToken, + params: Value, + ) -> Result { let p: AssignParams = deser(params)?; if p.title.trim().is_empty() { return Err(RuntimeError::InvalidInput("title must not be empty".into())); @@ -255,8 +331,7 @@ impl GtdPack { let mut resolved_deps: Vec = Vec::new(); if let Some(ref deps) = p.depends_on { for raw in deps { - resolved_deps - .push(resolve_uuid(raw, self.runtime(), p.namespace.as_deref()).await?); + resolved_deps.push(resolve_uuid(raw, self.runtime(), token).await?); } } @@ -268,11 +343,7 @@ impl GtdPack { // link failure here would diverge `assign` from `create(note_kind="task")` // and violate the "no failure after successful write" rule). for dep_uuid in &resolved_deps { - match self - .runtime() - .resolve(p.namespace.as_deref(), *dep_uuid) - .await? - { + match self.runtime().resolve(token, *dep_uuid).await? { Some(Resolved::Note(n)) if n.kind == "task" => {} Some(Resolved::Note(n)) => { return Err(RuntimeError::InvalidInput(format!( @@ -342,11 +413,11 @@ impl GtdPack { let note = self .runtime() .create_note( - p.namespace.as_deref(), + token, "task", Some(p.title.as_str()), &content, - salience, + Some(salience), Some(props), Vec::new(), ) @@ -362,13 +433,7 @@ impl GtdPack { for dep_uuid in resolved_deps { if let Err(e) = self .runtime() - .link( - p.namespace.as_deref(), - note.id, - dep_uuid, - EdgeRelation::DependsOn, - 1.0, - ) + .link(token, note.id, dep_uuid, EdgeRelation::DependsOn, 1.0, None) .await { tracing::warn!( @@ -383,7 +448,11 @@ impl GtdPack { Ok(render_task(¬e)) } - pub(crate) async fn handle_next(&self, params: Value) -> Result { + pub(crate) async fn handle_next( + &self, + token: &NamespaceToken, + params: Value, + ) -> Result { let p: NextParams = deser(params)?; let limit = p.limit.unwrap_or(10).clamp(1, 200); @@ -391,7 +460,7 @@ impl GtdPack { // 500 covers typical inbox/next/active backlogs without paging. let notes = self .runtime() - .list_notes(p.namespace.as_deref(), Some("task"), 500, 0) + .list_notes(token, Some("task"), 500, 0) .await?; let mut actionable: Vec<&khive_storage::note::Note> = notes @@ -422,9 +491,13 @@ impl GtdPack { Ok(Value::Array(result)) } - pub(crate) async fn handle_complete(&self, params: Value) -> Result { + pub(crate) async fn handle_complete( + &self, + token: &NamespaceToken, + params: Value, + ) -> Result { let p: CompleteParams = deser(params)?; - let (mut note, current) = load_task(self.runtime(), p.namespace.as_deref(), &p.id).await?; + let (mut note, current) = load_task(self.runtime(), token, &p.id).await?; if !can_transition(¤t, "done") { let allowed = allowed_transitions(¤t).join(", "); @@ -445,11 +518,15 @@ impl GtdPack { note.updated_at = Utc::now().timestamp_micros(); self.runtime() - .notes(p.namespace.as_deref())? + .notes(token)? .upsert_note(note.clone()) .await .map_err(|e| RuntimeError::Internal(format!("upsert_note: {e}")))?; + // ADR-019: write lifecycle audit record (best-effort). + ensure_audit_schema(self.runtime()).await; + write_audit_record(self.runtime(), note.id, ¤t, "done", None).await; + Ok(json!({ "completed": true, "id": short_id(note.id), @@ -460,7 +537,11 @@ impl GtdPack { })) } - pub(crate) async fn handle_tasks(&self, params: Value) -> Result { + pub(crate) async fn handle_tasks( + &self, + token: &NamespaceToken, + params: Value, + ) -> Result { let p: TasksParams = deser(params)?; let limit = p.limit.unwrap_or(50).clamp(1, 200); let offset = p.offset.unwrap_or(0) as usize; @@ -490,7 +571,7 @@ impl GtdPack { let window = (offset as u32).saturating_add(limit).saturating_add(500); let notes = self .runtime() - .list_notes(p.namespace.as_deref(), Some("task"), window, 0) + .list_notes(token, Some("task"), window, 0) .await?; let filtered: Vec<&khive_storage::note::Note> = notes @@ -531,7 +612,11 @@ impl GtdPack { Ok(Value::Array(result)) } - pub(crate) async fn handle_transition(&self, params: Value) -> Result { + pub(crate) async fn handle_transition( + &self, + token: &NamespaceToken, + params: Value, + ) -> Result { let p: TransitionParams = deser(params)?; let target = normalize_status(&p.status); if !is_valid_status(target) { @@ -542,7 +627,7 @@ impl GtdPack { ))); } - let (mut note, current) = load_task(self.runtime(), p.namespace.as_deref(), &p.id).await?; + let (mut note, current) = load_task(self.runtime(), token, &p.id).await?; if current == target { // Idempotent — no write, no transition. @@ -576,11 +661,15 @@ impl GtdPack { note.updated_at = Utc::now().timestamp_micros(); self.runtime() - .notes(p.namespace.as_deref())? + .notes(token)? .upsert_note(note.clone()) .await .map_err(|e| RuntimeError::Internal(format!("upsert_note: {e}")))?; + // ADR-019 + ADR-101: write lifecycle audit record (best-effort). + ensure_audit_schema(self.runtime()).await; + write_audit_record(self.runtime(), note.id, ¤t, target, p.note.as_deref()).await; + Ok(json!({ "transitioned": true, "id": short_id(note.id), diff --git a/crates/khive-pack-gtd/src/hook.rs b/crates/khive-pack-gtd/src/hook.rs index 20012b9d..975a397e 100644 --- a/crates/khive-pack-gtd/src/hook.rs +++ b/crates/khive-pack-gtd/src/hook.rs @@ -17,7 +17,7 @@ use async_trait::async_trait; use serde_json::{json, Value}; use uuid::Uuid; -use khive_runtime::{KhiveRuntime, KindHook, Resolved, RuntimeError}; +use khive_runtime::{KhiveRuntime, KindHook, Namespace, Resolved, RuntimeError}; use khive_storage::EdgeRelation; use crate::handlers::resolve_uuid; @@ -70,10 +70,12 @@ impl KindHook for TaskHook { } let salience = priority.as_deref().map(priority_to_salience).unwrap_or(0.5); - let namespace = args + let token = args .get("namespace") .and_then(Value::as_str) - .map(str::to_string); + .and_then(|s| Namespace::parse(s).ok()) + .map(|ns| runtime.authorize(ns)) + .unwrap_or_else(|| runtime.authorize(Namespace::local())); // Resolve depends_on entries (full UUID or 8+ hex prefix) to canonical // UUID strings — matches the shape gtd's `assign` produces. Also @@ -87,8 +89,8 @@ impl KindHook for TaskHook { let raw = entry.as_str().ok_or_else(|| { RuntimeError::InvalidInput("depends_on entries must be strings".into()) })?; - let uuid = resolve_uuid(raw, runtime, namespace.as_deref()).await?; - match runtime.resolve(namespace.as_deref(), uuid).await? { + let uuid = resolve_uuid(raw, runtime, &token).await?; + match runtime.resolve(&token, uuid).await? { Some(Resolved::Note(n)) if n.kind == "task" => {} Some(Resolved::Note(n)) => { return Err(RuntimeError::InvalidInput(format!( @@ -183,7 +185,12 @@ impl KindHook for TaskHook { .and_then(Value::as_array); if let Some(arr) = deps { - let namespace = args.get("namespace").and_then(Value::as_str); + let token = args + .get("namespace") + .and_then(Value::as_str) + .and_then(|s| Namespace::parse(s).ok()) + .map(|ns| runtime.authorize(ns)) + .unwrap_or_else(|| runtime.authorize(Namespace::local())); for entry in arr { let Some(raw) = entry.as_str() else { continue }; let target = match Uuid::parse_str(raw) { @@ -194,7 +201,7 @@ impl KindHook for TaskHook { } }; if let Err(e) = runtime - .link(namespace, id, target, EdgeRelation::DependsOn, 1.0) + .link(&token, id, target, EdgeRelation::DependsOn, 1.0, None) .await { tracing::warn!( diff --git a/crates/khive-pack-gtd/src/lib.rs b/crates/khive-pack-gtd/src/lib.rs index 2deb9fc5..c98641e3 100644 --- a/crates/khive-pack-gtd/src/lib.rs +++ b/crates/khive-pack-gtd/src/lib.rs @@ -23,8 +23,13 @@ use async_trait::async_trait; use serde_json::Value; use khive_runtime::pack::PackRuntime; -use khive_runtime::{KhiveRuntime, KindHook, RuntimeError, VerbRegistry}; -use khive_types::{EdgeEndpointRule, EdgeRelation, EndpointKind, Pack, VerbDef}; +use khive_runtime::{ + KhiveRuntime, KindHook, NamespaceToken, NoteKindSpec, NoteLifecycleSpec, PackSchemaPlan, + RuntimeError, SchemaPlan, VerbRegistry, +}; +use khive_types::{ + EdgeEndpointRule, EdgeRelation, EndpointKind, HandlerDef, Pack, VerbCategory, Visibility, +}; use crate::hook::TaskHook; @@ -37,9 +42,14 @@ impl Pack for GtdPack { const NAME: &'static str = "gtd"; const NOTE_KINDS: &'static [&'static str] = &["task"]; const ENTITY_KINDS: &'static [&'static str] = &[]; - const VERBS: &'static [VerbDef] = >D_VERBS; + const HANDLERS: &'static [HandlerDef] = >D_HANDLERS; const EDGE_RULES: &'static [EdgeEndpointRule] = >D_EDGE_RULES; const REQUIRES: &'static [&'static str] = &["kg"]; + const NOTE_KIND_SPECS: &'static [NoteKindSpec] = >D_NOTE_KIND_SPECS; + const SCHEMA_PLAN: Option = Some(PackSchemaPlan { + pack: "gtd", + statements: >D_SCHEMA_PLAN_STMTS, + }); } /// ADR-031: GTD opts task notes into `depends_on` between tasks. The base @@ -51,35 +61,113 @@ static GTD_EDGE_RULES: [EdgeEndpointRule; 1] = [EdgeEndpointRule { target: EndpointKind::NoteOfKind("task"), }]; -// ADR-060: Illocutionary classification (Searle 1976) -// Directive — attempts to get hearer to do something -// Assertive — retrieves/presents state of affairs +/// ADR-004 §NoteKindSpec: lifecycle declaration for the `task` note kind. +/// +/// The lifecycle field is named `kind_status` (not `properties["status"]`) to +/// avoid the semantic collision with `Note.status` (NoteStatus visibility). +/// +/// Phase 1: this spec is declared and collected by the runtime for introspection +/// and documentation. The `task` note kind currently stores lifecycle state in +/// `properties["status"]` (status quo); Phase 2 will migrate to a first-class +/// `kind_status` column once the runtime enforcement layer is in place (c11/c12). +static GTD_NOTE_KIND_SPECS: [NoteKindSpec; 1] = [NoteKindSpec { + kind: "task", + aliases: &["todo", "issue"], + lifecycle: NoteLifecycleSpec { + // ADR-004: lifecycle field name must NOT be "status" to avoid collision + // with NoteStatus. The canonical name is "kind_status". + field: "kind_status", + initial: "inbox", + terminal: &["done", "cancelled"], + transitions: &[ + ("inbox", "next"), + ("inbox", "waiting"), + ("inbox", "someday"), + ("inbox", "active"), + ("inbox", "done"), + ("inbox", "cancelled"), + ("next", "active"), + ("next", "waiting"), + ("next", "someday"), + ("next", "done"), + ("next", "cancelled"), + ("active", "next"), + ("active", "waiting"), + ("active", "done"), + ("active", "cancelled"), + ("waiting", "next"), + ("waiting", "active"), + ("waiting", "done"), + ("waiting", "cancelled"), + ("someday", "next"), + ("someday", "active"), + ("someday", "done"), + ("someday", "cancelled"), + // Reopen paths. + ("done", "next"), + ("done", "active"), + ("cancelled", "next"), + ("cancelled", "active"), + ], + }, +}]; + +/// ADR-019 §schema_plan: pack-auxiliary schema for GTD lifecycle audit. +/// +/// `gtd_lifecycle_audit` records every `transition` (and `complete`) invocation +/// for replay and compliance auditing. The table is idempotent (`CREATE TABLE +/// IF NOT EXISTS`) and is NOT part of the core versioned migration chain. +pub(crate) static GTD_SCHEMA_PLAN_STMTS: [&str; 2] = [ + "CREATE TABLE IF NOT EXISTS gtd_lifecycle_audit (\ + note_id TEXT NOT NULL,\ + from_state TEXT NOT NULL,\ + to_state TEXT NOT NULL,\ + note TEXT,\ + at INTEGER NOT NULL\ + )", + "CREATE INDEX IF NOT EXISTS idx_gtd_audit_note \ + ON gtd_lifecycle_audit(note_id, at DESC)", +]; + +// ADR-025: Illocutionary classification (Searle 1976) +// Directive — attempts to get hearer to do something +// Assertive — retrieves/presents state of affairs // Declaration — changes institutional status by fiat -static GTD_VERBS: [VerbDef; 5] = [ +static GTD_HANDLERS: [HandlerDef; 5] = [ // Directive: directs an actor to perform work - VerbDef { + HandlerDef { name: "assign", description: "Create a GTD task (note with kind=task)", + visibility: Visibility::Verb, + category: VerbCategory::Directive, }, // Assertive: retrieves actionable tasks - VerbDef { + HandlerDef { name: "next", description: "List actionable tasks (status=next or active) by priority", + visibility: Visibility::Verb, + category: VerbCategory::Assertive, }, // Declaration: declares a task done - VerbDef { + HandlerDef { name: "complete", description: "Mark a task done with an optional result note", + visibility: Visibility::Verb, + category: VerbCategory::Declaration, }, // Assertive: retrieves filtered task listing - VerbDef { + HandlerDef { name: "tasks", description: "List tasks filtered by status, assignee, priority", + visibility: Visibility::Verb, + category: VerbCategory::Assertive, }, // Declaration: changes task lifecycle status - VerbDef { + HandlerDef { name: "transition", description: "Explicit GTD status transition with lifecycle validation", + visibility: Visibility::Verb, + category: VerbCategory::Declaration, }, ]; @@ -93,7 +181,7 @@ impl GtdPack { } } -// ── ADR-063: inventory self-registration ───────────────────────────────────── +// ── ADR-027: inventory self-registration ───────────────────────────────────── struct GtdPackFactory; @@ -127,8 +215,8 @@ impl PackRuntime for GtdPack { ::ENTITY_KINDS } - fn verbs(&self) -> &'static [VerbDef] { - >D_VERBS + fn handlers(&self) -> &'static [HandlerDef] { + >D_HANDLERS } fn edge_rules(&self) -> &'static [EdgeEndpointRule] { @@ -139,6 +227,17 @@ impl PackRuntime for GtdPack { ::REQUIRES } + fn note_kind_specs(&self) -> &'static [NoteKindSpec] { + ::NOTE_KIND_SPECS + } + + fn schema_plan(&self) -> SchemaPlan { + SchemaPlan { + pack: "gtd", + statements: >D_SCHEMA_PLAN_STMTS, + } + } + fn kind_hook(&self, kind: &str) -> Option> { match kind { "task" => Some(Arc::new(TaskHook)), @@ -151,13 +250,14 @@ impl PackRuntime for GtdPack { verb: &str, params: Value, _registry: &VerbRegistry, + token: &NamespaceToken, ) -> Result { match verb { - "assign" => self.handle_assign(params).await, - "next" => self.handle_next(params).await, - "complete" => self.handle_complete(params).await, - "tasks" => self.handle_tasks(params).await, - "transition" => self.handle_transition(params).await, + "assign" => self.handle_assign(token, params).await, + "next" => self.handle_next(token, params).await, + "complete" => self.handle_complete(token, params).await, + "tasks" => self.handle_tasks(token, params).await, + "transition" => self.handle_transition(token, params).await, _ => Err(RuntimeError::InvalidInput(format!( "gtd pack does not handle verb {verb:?}" ))), diff --git a/crates/khive-pack-gtd/tests/integration.rs b/crates/khive-pack-gtd/tests/integration.rs index 0d7b6a50..bcd9be5d 100644 --- a/crates/khive-pack-gtd/tests/integration.rs +++ b/crates/khive-pack-gtd/tests/integration.rs @@ -2,8 +2,11 @@ use khive_pack_gtd::GtdPack; use khive_pack_kg::KgPack; -use khive_runtime::pack::VerbDef; -use khive_runtime::{KhiveRuntime, RuntimeError, VerbRegistry, VerbRegistryBuilder}; +use khive_runtime::pack::HandlerDef; +use khive_runtime::{ + KhiveRuntime, Namespace, NoteKindSpec, RuntimeError, SchemaPlan, VerbRegistry, + VerbRegistryBuilder, +}; use serde_json::{json, Value}; fn rt() -> KhiveRuntime { @@ -21,7 +24,7 @@ impl Fixture { self.registry.dispatch(verb, args).await } - fn verbs(&self) -> Vec<&'static VerbDef> { + fn verbs(&self) -> Vec<&'static HandlerDef> { self.registry.all_verbs() } @@ -210,7 +213,15 @@ async fn complete_rejects_non_task_notes() { // the task-kind guard fires. let runtime = rt(); let note = runtime - .create_note(None, "observation", None, "hello", 0.5, None, vec![]) + .create_note( + &runtime.authorize(Namespace::local()), + "observation", + None, + "hello", + Some(0.5), + None, + vec![], + ) .await .unwrap(); let pack = pack(runtime); @@ -326,7 +337,9 @@ async fn assign_creates_depends_on_edge_between_tasks() { let dep_uuid = uuid::Uuid::parse_str(dep_full).unwrap(); let blocker_uuid = uuid::Uuid::parse_str(blocker_full).unwrap(); - let graph = rt.graph(None).expect("graph store"); + let graph = rt + .graph(&rt.authorize(Namespace::local())) + .expect("graph store"); let neighbors = graph .neighbors( dep_uuid, @@ -359,11 +372,11 @@ async fn assign_rejects_depends_on_when_target_is_non_task_note() { // the task is never persisted (ADR-030: no failure after successful write). let other = rt .create_note( - None, + &rt.authorize(Namespace::local()), "observation", None, "an observation", - 0.5, + Some(0.5), None, vec![], ) @@ -385,8 +398,10 @@ async fn assign_rejects_depends_on_when_target_is_non_task_note() { ); // Atomicity: the rejected `assign` must not leave a task row behind. - let notes = rt.notes(None).expect("note store"); - let page = notes + let notes = rt + .notes(&rt.authorize(Namespace::local())) + .expect("note store"); + let task_page = notes .query_notes( "local", Some("task"), @@ -398,11 +413,313 @@ async fn assign_rejects_depends_on_when_target_is_non_task_note() { .await .expect("query task notes"); assert!( - page.items.is_empty(), + task_page.items.is_empty(), "rejected assign must not persist a task; found {:?}", - page.items + task_page + .items .iter() .filter_map(|n| n.name.clone()) .collect::>() ); } + +// ── ADR-004 / ADR-019 cluster-15 tests ─────────────────────────────────────── + +/// F100: GtdPack exposes a schema_plan() returning the gtd_lifecycle_audit DDL. +#[tokio::test] +async fn pack_runtime_exposes_schema_plan() { + use khive_runtime::PackRuntime; + let pack = GtdPack::new(rt()); + let plan: SchemaPlan = pack.schema_plan(); + assert!( + !plan.is_empty(), + "GtdPack must return a non-empty SchemaPlan" + ); + assert_eq!(plan.pack, "gtd"); + assert!( + !plan.statements.is_empty(), + "schema plan must have at least one DDL statement" + ); + let combined = plan.statements.join(" "); + assert!( + combined.contains("gtd_lifecycle_audit"), + "schema plan must reference gtd_lifecycle_audit table; got: {combined}" + ); + assert!( + combined.contains("CREATE TABLE IF NOT EXISTS"), + "schema plan DDL must be idempotent (CREATE TABLE IF NOT EXISTS)" + ); +} + +/// F100: VerbRegistry aggregates schema plans from loaded packs. +#[tokio::test] +async fn verb_registry_aggregates_schema_plans() { + let fixture = pack(rt()); + let plans = fixture.registry.all_schema_plans(); + assert!( + plans.iter().any(|p| p.pack == "gtd"), + "registry must expose GTD schema plan; got packs: {:?}", + plans.iter().map(|p| p.pack).collect::>() + ); +} + +/// F100 + ADR-004: GtdPack exposes NoteKindSpec for the task kind with lifecycle. +#[tokio::test] +async fn pack_runtime_exposes_note_kind_spec_for_task() { + use khive_runtime::PackRuntime; + let pack = GtdPack::new(rt()); + let specs: &[NoteKindSpec] = pack.note_kind_specs(); + assert!( + !specs.is_empty(), + "GtdPack must declare at least one NoteKindSpec" + ); + + let task_spec = specs + .iter() + .find(|s| s.kind == "task") + .expect("GtdPack must have NoteKindSpec for 'task'"); + + // ADR-004: lifecycle field must be "kind_status", NOT "status". + assert_eq!( + task_spec.lifecycle.field, "kind_status", + "ADR-004: lifecycle field must be 'kind_status' to avoid collision with NoteStatus" + ); + assert_eq!( + task_spec.lifecycle.initial, "inbox", + "task lifecycle must start at 'inbox'" + ); + assert!( + task_spec.lifecycle.terminal.contains(&"done"), + "terminal states must include 'done'" + ); + assert!( + task_spec.lifecycle.terminal.contains(&"cancelled"), + "terminal states must include 'cancelled'" + ); +} + +/// F100: VerbRegistry aggregates NoteKindSpecs from loaded packs. +#[tokio::test] +async fn verb_registry_aggregates_note_kind_specs() { + let fixture = pack(rt()); + let specs = fixture.registry.all_note_kind_specs(); + assert!( + specs.iter().any(|s| s.kind == "task"), + "registry must aggregate task NoteKindSpec" + ); +} + +/// ADR-004: lifecycle transitions in NoteKindSpec match the runtime schema. +#[tokio::test] +async fn note_kind_spec_transitions_match_runtime_schema() { + use khive_pack_gtd::schema::{can_transition, is_terminal}; + use khive_runtime::PackRuntime; + + let pack = GtdPack::new(rt()); + let specs = pack.note_kind_specs(); + let task_spec = specs.iter().find(|s| s.kind == "task").unwrap(); + + // Every declared transition in the spec must agree with can_transition(). + for &(from, to) in task_spec.lifecycle.transitions { + assert!( + can_transition(from, to), + "NoteKindSpec declares ({from}→{to}) but schema::can_transition disagrees" + ); + } + // Every terminal status in the spec must agree with is_terminal(). + for &t in task_spec.lifecycle.terminal { + assert!( + is_terminal(t), + "NoteKindSpec declares '{t}' as terminal but schema::is_terminal disagrees" + ); + } +} + +/// F101: transition writes an audit record to gtd_lifecycle_audit. +#[tokio::test] +async fn transition_writes_lifecycle_audit_record() { + use khive_storage::{SqlStatement, SqlValue}; + + let rt = rt(); + let fixture = pack(rt.clone()); + + let resp = assign( + &fixture, + json!({"title": "audit test task", "status": "inbox"}), + ) + .await; + let task_id = resp["full_id"].as_str().unwrap().to_string(); + + fixture + .dispatch( + "transition", + json!({"id": task_id, "status": "next", "note": "moved to next"}), + ) + .await + .expect("transition should succeed"); + + // Query the audit table. + let sql = rt.sql(); + let mut reader = sql.reader().await.expect("sql reader"); + let rows = reader + .query_all(SqlStatement { + sql: "SELECT note_id, from_state, to_state, note FROM gtd_lifecycle_audit \ + WHERE note_id = ?1" + .into(), + params: vec![SqlValue::Text(task_id.clone())], + label: None, + }) + .await + .expect("audit query"); + + assert_eq!( + rows.len(), + 1, + "F101: transition must write exactly one audit row; got {rows:?}" + ); + let row = &rows[0]; + assert_eq!( + row.get("from_state").and_then(|v| { + if let SqlValue::Text(s) = v { + Some(s.as_str()) + } else { + None + } + }), + Some("inbox"), + "audit from_state must be 'inbox'" + ); + assert_eq!( + row.get("to_state").and_then(|v| { + if let SqlValue::Text(s) = v { + Some(s.as_str()) + } else { + None + } + }), + Some("next"), + "audit to_state must be 'next'" + ); + assert_eq!( + row.get("note").and_then(|v| { + if let SqlValue::Text(s) = v { + Some(s.as_str()) + } else { + None + } + }), + Some("moved to next"), + "audit note field must be recorded" + ); +} + +/// F101: complete writes an audit record to gtd_lifecycle_audit. +#[tokio::test] +async fn complete_writes_lifecycle_audit_record() { + use khive_storage::{SqlStatement, SqlValue}; + + let rt = rt(); + let fixture = pack(rt.clone()); + + let resp = assign(&fixture, json!({"title": "audit complete test"})).await; + let task_id = resp["full_id"].as_str().unwrap().to_string(); + + fixture + .dispatch("complete", json!({"id": task_id, "result": "done!"})) + .await + .expect("complete should succeed"); + + let sql = rt.sql(); + let mut reader = sql.reader().await.expect("sql reader"); + let rows = reader + .query_all(SqlStatement { + sql: "SELECT from_state, to_state FROM gtd_lifecycle_audit WHERE note_id = ?1".into(), + params: vec![SqlValue::Text(task_id.clone())], + label: None, + }) + .await + .expect("audit query"); + + assert_eq!( + rows.len(), + 1, + "F101: complete must write one audit row; got {rows:?}" + ); + let row = &rows[0]; + assert_eq!( + row.get("to_state").and_then(|v| { + if let SqlValue::Text(s) = v { + Some(s.as_str()) + } else { + None + } + }), + Some("done"), + "audit to_state must be 'done'" + ); +} + +/// F101: idempotent same-status transition does NOT write an audit record. +/// +/// Strategy: perform one real transition (inbox → next) to initialize the audit +/// schema and record a baseline row, then attempt a noop (next → next) and +/// confirm only the baseline row exists (count stays at 1, not 2). +#[tokio::test] +async fn noop_transition_does_not_write_audit_record() { + use khive_storage::{SqlStatement, SqlValue}; + + let rt = rt(); + let fixture = pack(rt.clone()); + + let resp = assign( + &fixture, + json!({"title": "noop audit test", "status": "inbox"}), + ) + .await; + let task_id = resp["full_id"].as_str().unwrap().to_string(); + + // Real transition — initializes the audit schema and writes one row. + fixture + .dispatch("transition", json!({"id": task_id, "status": "next"})) + .await + .expect("real transition should succeed"); + + // Noop transition — must not write a second row. + let r = fixture + .dispatch("transition", json!({"id": task_id, "status": "next"})) + .await + .expect("noop transition should return ok"); + assert_eq!( + r["transitioned"], false, + "noop must return transitioned=false" + ); + + // Should still have exactly ONE audit row (from the real transition above). + let sql = rt.sql(); + let mut reader = sql.reader().await.expect("sql reader"); + let rows = reader + .query_all(SqlStatement { + sql: "SELECT COUNT(*) as cnt FROM gtd_lifecycle_audit WHERE note_id = ?1".into(), + params: vec![SqlValue::Text(task_id.clone())], + label: None, + }) + .await + .expect("audit count query"); + + let count = rows + .first() + .and_then(|r| r.get("cnt")) + .and_then(|v| { + if let SqlValue::Integer(n) = v { + Some(*n) + } else { + None + } + }) + .unwrap_or(-1); + + assert_eq!( + count, 1, + "noop transition must not insert an audit row (expected 1 baseline row, got {count})" + ); +} diff --git a/crates/khive-pack-kg/Cargo.toml b/crates/khive-pack-kg/Cargo.toml index 702a7b48..1f27fcb4 100644 --- a/crates/khive-pack-kg/Cargo.toml +++ b/crates/khive-pack-kg/Cargo.toml @@ -11,10 +11,10 @@ categories.workspace = true description = "KG verb pack — entity/note CRUD, graph traversal, hybrid search for research knowledge graphs" [dependencies] -khive-types = { version = "0.2.0", path = "../khive-types", features = ["serde"] } -khive-runtime = { version = "0.2.0", path = "../khive-runtime" } +khive-types = { version = "0.2.1", path = "../khive-types", features = ["serde"] } +khive-runtime = { version = "0.2.1", path = "../khive-runtime" } inventory = { workspace = true } -khive-storage = { version = "0.2.0", path = "../khive-storage" } +khive-storage = { version = "0.2.1", path = "../khive-storage" } async-trait = { workspace = true } chrono = { workspace = true } serde = { workspace = true } diff --git a/crates/khive-pack-kg/src/handlers.rs b/crates/khive-pack-kg/src/handlers.rs index 8139744d..6d72e071 100644 --- a/crates/khive-pack-kg/src/handlers.rs +++ b/crates/khive-pack-kg/src/handlers.rs @@ -5,19 +5,26 @@ use std::collections::HashMap; use std::str::FromStr; -use serde::Deserialize; +use serde::{Deserialize, Deserializer}; use serde_json::{json, Value}; use uuid::Uuid; use khive_runtime::{ - EdgeListFilter, EntityPatch, KhiveRuntime, MergeStrategy, RuntimeError, VerbRegistry, + ContentMergeStrategy, EdgeListFilter, EdgePatch, EntityDedupMergePolicy, EntityPatch, + KhiveRuntime, LinkSpec, MergeSummary, NamespaceToken, NotePatch, RuntimeError, VerbRegistry, }; use khive_storage::types::{ Direction, NeighborQuery, PageRequest, TraversalOptions, TraversalRequest, }; +use khive_storage::types::{SqlStatement, SqlValue}; use khive_storage::{EdgeRelation, EntityFilter, EventFilter, EventOutcome, SubstrateKind}; -use crate::vocab::{EntityKind, NoteKind}; +use khive_types::{ + EntityKind, EventKind, ProposalChangeset, ProposalCreatedPayload, ProposalDecision, + ProposalReviewedPayload, ProposalWithdrawnPayload, +}; + +use crate::vocab::NoteKind; use crate::KgPack; // ---- Kind canonicalization (ADR-030) ---- @@ -83,6 +90,8 @@ pub(crate) enum KindSpec { Edge, /// `kind="event"` — only valid for `list`; `get` resolves events by UUID. Event, + /// `kind="proposal"` — queries the `proposals_open` projection table (ADR-046). + Proposal, } impl KindSpec { @@ -92,6 +101,7 @@ impl KindSpec { KindSpec::Note { .. } => "note", KindSpec::Edge => "edge", KindSpec::Event => "event", + KindSpec::Proposal => "proposal", } } } @@ -115,6 +125,7 @@ pub(crate) fn resolve_kind_spec( "note" => return Ok(KindSpec::Note { specific: None }), "edge" => return Ok(KindSpec::Edge), "event" => return Ok(KindSpec::Event), + "proposal" => return Ok(KindSpec::Proposal), _ => {} } @@ -147,6 +158,7 @@ pub(crate) fn resolve_kind_spec( "note".into(), "edge".into(), "event".into(), + "proposal".into(), ]; all.extend(registry.all_entity_kinds().iter().map(|s| (*s).to_string())); all.extend(registry.all_note_kinds().iter().map(|s| (*s).to_string())); @@ -184,7 +196,7 @@ fn reconcile_specific( #[derive(Deserialize)] struct CreateParams { kind: String, - namespace: Option, + entity_type: Option, name: Option, description: Option, content: Option, @@ -196,17 +208,16 @@ struct CreateParams { #[derive(Deserialize)] struct GetParams { - namespace: Option, id: String, } #[derive(Deserialize)] struct ListParams { kind: String, - namespace: Option, limit: Option, offset: Option, entity_kind: Option, + entity_type: Option, source_id: Option, target_id: Option, relations: Option>, @@ -221,14 +232,24 @@ struct ListParams { substrate: Option, since: Option, until: Option, + event_kind: Option, + event_kinds: Option>, + session_id: Option, + observed: Option>, + selected: Option>, } #[derive(Deserialize)] struct UpdateParams { - namespace: Option, id: String, - name: Option, + kind: String, + name: Option, description: Option, + content: Option, + #[serde(default, deserialize_with = "tri_f64")] + salience: Option>, + #[serde(default, deserialize_with = "tri_f64")] + decay_factor: Option>, properties: Option, tags: Option>, relation: Option, @@ -237,45 +258,71 @@ struct UpdateParams { #[derive(Deserialize)] struct DeleteParams { - namespace: Option, id: String, + kind: String, hard: Option, } #[derive(Deserialize)] struct MergeParams { - namespace: Option, into_id: String, from_id: String, + kind: Option, strategy: Option, + content_strategy: Option, + dry_run: Option, + #[allow(dead_code)] + verbose: Option, } #[derive(Deserialize)] struct SearchParams { kind: String, - namespace: Option, query: String, limit: Option, entity_kind: Option, + entity_type: Option, note_kind: Option, + include_superseded: Option, properties: Option, } +/// One entry in a bulk-link request (F205 / ADR-038). #[derive(Deserialize)] -struct LinkParams { - namespace: Option, +struct BulkLinkEntry { source_id: String, target_id: String, relation: String, weight: Option, + metadata: Option, + dependency_kind: Option, +} + +#[derive(Deserialize)] +struct LinkParams { + // Singleton fields (required unless `links` is provided). + source_id: Option, + target_id: Option, + relation: Option, + weight: Option, + /// Edge metadata (open JSON; governed keys validated by runtime). + metadata: Option, + /// Shortcut for `metadata.dependency_kind` on `depends_on` edges. + dependency_kind: Option, /// When `true`, output uses full UUIDs and ISO 8601 timestamps instead of /// the default 8-char short IDs and YYYY/MM/DD date format. verbose: Option, + // Bulk link fields (ADR-038). + /// Multiple edges to create in one call. + links: Option>, + /// When `true` (default), the entire batch is atomic — any failure rolls + /// back all writes. When `false`, errors are collected and returned as + /// warnings while successful entries are committed individually. + atomic: Option, } #[derive(Deserialize)] struct NeighborsParams { - namespace: Option, /// Accepts either `id` (canonical, ADR-148 normalized) or `node_id` (legacy). #[serde(alias = "node_id")] id: String, @@ -287,7 +334,6 @@ struct NeighborsParams { #[derive(Deserialize)] struct TraverseParams { - namespace: Option, /// Accepts either `roots` (legacy) or `ids` (normalized). Each entry may /// be a full UUID or an 8-char prefix; resolved via `resolve_uuid_async`. #[serde(alias = "ids")] @@ -302,10 +348,43 @@ struct TraverseParams { #[derive(Deserialize)] struct QueryParams { - namespace: Option, query: String, } +// ---- Proposal param structs (ADR-046) ---- + +#[derive(Deserialize)] +struct ProposeParams { + title: String, + description: String, + changeset: Value, + #[serde(default)] + reviewers: Vec, + expiry: Option, + parent_id: Option, +} + +#[derive(Deserialize)] +struct ReviewParams { + proposal_id: String, + decision: String, + comment: Option, +} + +#[derive(Deserialize)] +struct WithdrawParams { + proposal_id: String, + rationale: Option, +} + +#[derive(Deserialize)] +struct ListProposalsParams { + status: Option, + proposer: Option, + limit: Option, + offset: Option, +} + // ---- Helpers ---- /// Resolve an entity name to its UUID. @@ -320,7 +399,7 @@ struct QueryParams { async fn resolve_name_async( name: &str, runtime: &KhiveRuntime, - namespace: Option<&str>, + token: &NamespaceToken, ) -> Result { // Use EntityFilter.name_prefix with the full name to do an exact match. // The DB implements `name LIKE '?%'` so we get back all names that start @@ -330,9 +409,9 @@ async fn resolve_name_async( ..Default::default() }; let page = runtime - .entities(namespace)? + .entities(token)? .query_entities( - runtime.ns(namespace), + token.namespace().as_str(), filter, khive_storage::types::PageRequest { offset: 0, @@ -376,13 +455,13 @@ async fn resolve_name_async( async fn resolve_uuid_async( s: &str, runtime: &KhiveRuntime, - namespace: Option<&str>, + token: &NamespaceToken, ) -> Result { if let Ok(uuid) = Uuid::from_str(s) { return Ok(uuid); } if s.len() >= 8 && s.chars().all(|c| c.is_ascii_hexdigit()) { - match runtime.resolve_prefix(namespace, s).await { + match runtime.resolve_prefix(token, s).await { Ok(Some(uuid)) => return Ok(uuid), Ok(None) => { return Err(RuntimeError::InvalidInput(format!( @@ -393,7 +472,7 @@ async fn resolve_uuid_async( } } // Fall back to name-based resolution (issue #65). - resolve_name_async(s, runtime, namespace).await + resolve_name_async(s, runtime, token).await } // ---- Output formatting helpers (issue #66) ---- @@ -415,13 +494,36 @@ fn parse_direction(s: Option<&str>) -> Direction { } } +/// Merge `dependency_kind` shortcut into `metadata` for `depends_on` edges. +/// +/// When `dependency_kind` is provided separately and `metadata` does not already +/// carry the key, the value is injected into the metadata object. This allows +/// callers to write `dependency_kind: "build"` instead of the full +/// `metadata: { "dependency_kind": "build" }` form. +fn merge_entry_metadata( + metadata: Option, + dependency_kind: Option, +) -> Result, RuntimeError> { + let Some(dk) = dependency_kind else { + return Ok(metadata); + }; + let mut obj = metadata.unwrap_or_else(|| serde_json::json!({})); + let map = obj + .as_object_mut() + .ok_or_else(|| RuntimeError::InvalidInput("metadata must be a JSON object".into()))?; + map.entry("dependency_kind".to_string()) + .or_insert_with(|| serde_json::json!(dk)); + Ok(Some(obj)) +} + fn parse_relation(s: &str) -> Result { s.parse::().map_err(|_| { - RuntimeError::InvalidInput(format!( - "unknown relation {s:?}; valid: contains | part_of | instance_of | extends | \ - variant_of | introduced_by | supersedes | depends_on | enables | implements | \ - competes_with | composed_with | annotates" - )) + let valid = EdgeRelation::ALL + .iter() + .map(|r| r.as_str()) + .collect::>() + .join(" | "); + RuntimeError::InvalidInput(format!("unknown relation {s:?}; valid: {valid}")) }) } @@ -453,6 +555,11 @@ fn parse_event_substrate(raw: &str) -> Result { }) } +fn parse_event_kind(raw: &str) -> Result { + raw.parse::() + .map_err(|e| RuntimeError::InvalidInput(format!("unknown event_kind {raw:?}: {e}"))) +} + fn event_filter_from_params( p: &ListParams, ) -> Result<(EventFilter, Option), RuntimeError> { @@ -471,6 +578,47 @@ fn event_filter_from_params( let outcome = p.outcome.as_deref().map(parse_event_outcome).transpose()?; + let mut kinds: Vec = Vec::new(); + if let Some(k) = &p.event_kind { + kinds.push(parse_event_kind(k)?); + } + if let Some(ks) = &p.event_kinds { + for k in ks { + kinds.push(parse_event_kind(k)?); + } + } + + let session_id = p + .session_id + .as_deref() + .map(|s| { + Uuid::from_str(s) + .map_err(|e| RuntimeError::InvalidInput(format!("invalid session_id {s:?}: {e}"))) + }) + .transpose()?; + + let observed = p + .observed + .as_deref() + .unwrap_or(&[]) + .iter() + .map(|s| { + Uuid::from_str(s) + .map_err(|e| RuntimeError::InvalidInput(format!("invalid observed id {s:?}: {e}"))) + }) + .collect::, _>>()?; + + let selected = p + .selected + .as_deref() + .unwrap_or(&[]) + .iter() + .map(|s| { + Uuid::from_str(s) + .map_err(|e| RuntimeError::InvalidInput(format!("invalid selected id {s:?}: {e}"))) + }) + .collect::, _>>()?; + Ok(( EventFilter { verbs, @@ -478,6 +626,10 @@ fn event_filter_from_params( actors: p.actor.clone().into_iter().collect(), after: p.since, before: p.until, + kinds, + session_id, + observed, + selected, ..EventFilter::default() }, outcome, @@ -512,11 +664,112 @@ fn props_match(entity_props: Option<&Value>, filter: &Value) -> bool { .all(|(k, v)| actual.get(k).is_some_and(|av| av == v)) } +// ---- Handler helpers ---- + +fn parse_entity_policy(s: &str) -> Result { + match s { + "prefer_into" => Ok(EntityDedupMergePolicy::PreferInto), + "prefer_from" => Ok(EntityDedupMergePolicy::PreferFrom), + "union" => Ok(EntityDedupMergePolicy::Union), + other => Err(RuntimeError::InvalidInput(format!( + "unknown strategy {other:?}; use prefer_into | prefer_from | union" + ))), + } +} + +fn parse_content_strategy(s: &str) -> Result { + match s { + "append" => Ok(ContentMergeStrategy::Append), + "prefer_into" => Ok(ContentMergeStrategy::PreferInto), + "prefer_from" => Ok(ContentMergeStrategy::PreferFrom), + other => Err(RuntimeError::InvalidInput(format!( + "unknown content_strategy {other:?}; use append | prefer_into | prefer_from" + ))), + } +} + +async fn ensure_entity_kind( + runtime: &KhiveRuntime, + token: &NamespaceToken, + id: Uuid, + expected_kind: Option<&str>, +) -> Result<(), RuntimeError> { + let entity = runtime.get_entity(token, id).await?; + if let Some(k) = expected_kind { + if entity.kind != k { + return Err(RuntimeError::NotFound(format!("{k} {id}"))); + } + } + Ok(()) +} + +async fn ensure_note_kind( + runtime: &KhiveRuntime, + token: &NamespaceToken, + id: Uuid, + expected_kind: Option<&str>, +) -> Result<(), RuntimeError> { + let note = runtime + .notes(token)? + .get_note(id) + .await + .map_err(RuntimeError::Storage)? + .ok_or_else(|| RuntimeError::NotFound(format!("note {id}")))?; + if let Some(k) = expected_kind { + if note.kind != k { + return Err(RuntimeError::NotFound(format!("{k} {id}"))); + } + } + Ok(()) +} + +fn description_patch(v: Option) -> Result>, RuntimeError> { + match v { + None => Ok(None), + Some(Value::Null) => Ok(Some(None)), + Some(Value::String(s)) => Ok(Some(Some(s))), + Some(other) => Err(RuntimeError::InvalidInput(format!( + "description must be null or a string, got: {other}" + ))), + } +} + +fn string_value(v: Option, field: &str) -> Result, RuntimeError> { + match v { + None => Ok(None), + Some(Value::String(s)) => Ok(Some(s)), + Some(other) => Err(RuntimeError::InvalidInput(format!( + "{field} must be a string, got: {other}" + ))), + } +} + +fn optional_string_patch( + v: Option, + field: &str, +) -> Result>, RuntimeError> { + match v { + None => Ok(None), + Some(Value::Null) => Ok(Some(None)), + Some(Value::String(s)) => Ok(Some(Some(s))), + Some(other) => Err(RuntimeError::InvalidInput(format!( + "{field} must be null or a string, got: {other}" + ))), + } +} + +/// Serde deserializer for tri-state nullable f64: +/// field absent → outer None, field = null → Some(None), field = number → Some(Some(v)). +fn tri_f64<'de, D: Deserializer<'de>>(d: D) -> Result>, D::Error> { + Ok(Some(Option::deserialize(d)?)) +} + // ---- Handler implementations ---- impl KgPack { pub(crate) async fn handle_create( &self, + token: &NamespaceToken, mut params: Value, registry: &VerbRegistry, ) -> Result { @@ -548,7 +801,7 @@ impl KgPack { )? .ok_or_else(|| { RuntimeError::InvalidInput( - "kind=entity requires a specific kind: either kind= directly, or kind=entity + entity_kind=<…>".into(), + "kind=entity requires a specific kind: either kind= directly, or kind=entity + entity_kind=<…>".into(), ) })?; let hook = registry.find_kind_hook(&canonical); @@ -578,6 +831,11 @@ impl KgPack { "kind=edge is not creatable via `create` — use `link` for edges".into(), )); } + KindSpec::Proposal => { + return Err(RuntimeError::InvalidInput( + "kind=proposal is not creatable via `create` — use `propose` to create a proposal".into(), + )); + } }; // Rewrite `kind` to the substrate label so downstream `CreateParams` @@ -594,11 +852,18 @@ impl KgPack { KindSpec::Note { .. } => { obj.insert("note_kind".into(), json!(canonical)); } - KindSpec::Edge | KindSpec::Event => {} + KindSpec::Edge | KindSpec::Event | KindSpec::Proposal => {} } } } + // Propagate the authorized namespace into params so KindHooks can build + // their own NamespaceToken (hooks don't receive a token directly). + if let Some(obj) = params.as_object_mut() { + obj.entry("namespace") + .or_insert_with(|| json!(token.namespace().as_str())); + } + if let Some(ref h) = hook { h.prepare_create(&self.runtime, &mut params).await?; } @@ -615,8 +880,9 @@ impl KgPack { let entity = self .runtime .create_entity( - p.namespace.as_deref(), + token, &canonical, + p.entity_type.as_deref(), &name, p.description.as_deref(), p.properties, @@ -633,20 +899,18 @@ impl KgPack { let content = p.content.ok_or_else(|| { RuntimeError::InvalidInput("kind=note requires 'content'".into()) })?; - let salience = p.salience.unwrap_or(0.5); let mut annotates = Vec::new(); for s in p.annotates.unwrap_or_default() { - annotates - .push(resolve_uuid_async(&s, &self.runtime, p.namespace.as_deref()).await?); + annotates.push(resolve_uuid_async(&s, &self.runtime, token).await?); } let note = self .runtime .create_note( - p.namespace.as_deref(), + token, &canonical, p.name.as_deref(), &content, - salience, + p.salience, p.properties, annotates, ) @@ -675,39 +939,42 @@ impl KgPack { Ok(response) } - pub(crate) async fn handle_get(&self, params: Value) -> Result { + pub(crate) async fn handle_get( + &self, + token: &NamespaceToken, + params: Value, + ) -> Result { let p: GetParams = deser(params)?; - let id = resolve_uuid_async(&p.id, &self.runtime, p.namespace.as_deref()).await?; - let ns = p.namespace.as_deref(); + let id = resolve_uuid_async(&p.id, &self.runtime, token).await?; - if let Some(entity) = self.runtime.get_entity(ns, id).await? { + if let Ok(entity) = self.runtime.get_entity(token, id).await { return to_json(&serde_json::json!({"kind": "entity", "data": entity})); } if let Some(note) = self .runtime - .notes(ns)? + .notes(token)? .get_note(id) .await .map_err(RuntimeError::Storage)? { - if note.namespace == self.runtime.ns(ns) { + if note.namespace == token.namespace().as_str() { return to_json(&serde_json::json!({"kind": "note", "data": note})); } } - if let Some(edge) = self.runtime.get_edge(ns, id).await? { + if let Some(edge) = self.runtime.get_edge(token, id).await? { return to_json(&serde_json::json!({"kind": "edge", "data": edge})); } if let Some(event) = self .runtime - .events(ns)? + .events(token)? .get_event(id) .await .map_err(RuntimeError::Storage)? { - if event.namespace == self.runtime.ns(ns) { + if event.namespace == token.namespace().as_str() { return to_json(&serde_json::json!({"kind": "event", "data": event})); } } @@ -717,9 +984,23 @@ impl KgPack { pub(crate) async fn handle_list( &self, + token: &NamespaceToken, params: Value, registry: &VerbRegistry, ) -> Result { + // Fast-path: kind=proposal dispatches to the proposals_open projection + // before deserializing into ListParams, so proposal-specific fields + // (status, proposer) are handled without polluting ListParams. + let raw_kind = params + .get("kind") + .and_then(Value::as_str) + .unwrap_or("") + .trim() + .to_ascii_lowercase(); + if raw_kind == "proposal" { + return self.handle_list_proposals(token, params).await; + } + let p: ListParams = deser(params)?; let spec = resolve_kind_spec(&p.kind, registry)?; match spec { @@ -735,8 +1016,9 @@ impl KgPack { let entities = self .runtime .list_entities( - p.namespace.as_deref(), + token, kind_filter.as_deref(), + p.entity_type.as_deref(), limit, offset, ) @@ -745,15 +1027,11 @@ impl KgPack { } KindSpec::Edge => { let source_id = match p.source_id.as_deref() { - Some(s) => { - Some(resolve_uuid_async(s, &self.runtime, p.namespace.as_deref()).await?) - } + Some(s) => Some(resolve_uuid_async(s, &self.runtime, token).await?), None => None, }; let target_id = match p.target_id.as_deref() { - Some(s) => { - Some(resolve_uuid_async(s, &self.runtime, p.namespace.as_deref()).await?) - } + Some(s) => Some(resolve_uuid_async(s, &self.runtime, token).await?), None => None, }; let relations: Vec = p @@ -770,10 +1048,7 @@ impl KgPack { max_weight: p.max_weight, }; let limit = p.limit.unwrap_or(100); - let edges = self - .runtime - .list_edges(p.namespace.as_deref(), filter, limit) - .await?; + let edges = self.runtime.list_edges(token, filter, limit).await?; to_json(&edges) } KindSpec::Note { specific } => { @@ -787,15 +1062,11 @@ impl KgPack { let offset = p.offset.unwrap_or(0); let notes = self .runtime - .list_notes( - p.namespace.as_deref(), - kind_filter.as_deref(), - limit, - offset, - ) + .list_notes(token, kind_filter.as_deref(), limit, offset) .await?; to_json(¬es) } + KindSpec::Proposal => unreachable!("kind=proposal fast-pathed before deser"), KindSpec::Event => { let limit = p.limit.unwrap_or(100).clamp(1, 1000); let offset = p.offset.unwrap_or(0); @@ -816,10 +1087,12 @@ impl KgPack { let page = self .runtime .list_events( - p.namespace.as_deref(), + token, filter.clone(), - batch_size, - raw_offset, + PageRequest { + limit: batch_size, + offset: raw_offset.into(), + }, ) .await?; let batch_len = page.items.len() as u32; @@ -851,7 +1124,14 @@ impl KgPack { } else { let page = self .runtime - .list_events(p.namespace.as_deref(), filter, limit, offset) + .list_events( + token, + filter, + PageRequest { + limit, + offset: offset.into(), + }, + ) .await?; to_json(&page.items) } @@ -859,115 +1139,179 @@ impl KgPack { } } - pub(crate) async fn handle_update(&self, params: Value) -> Result { + pub(crate) async fn handle_update( + &self, + token: &NamespaceToken, + params: Value, + registry: &VerbRegistry, + ) -> Result { let p: UpdateParams = deser(params)?; - let id = resolve_uuid_async(&p.id, &self.runtime, p.namespace.as_deref()).await?; - let ns = p.namespace.as_deref(); - - if self - .runtime - .events(ns)? - .get_event(id) - .await - .map_err(RuntimeError::Storage)? - .is_some() - { - return Err(immutable_event_error()); - } + let id = resolve_uuid_async(&p.id, &self.runtime, token).await?; + let spec = resolve_kind_spec(&p.kind, registry)?; - if self.runtime.get_entity(ns, id).await?.is_some() { - let description = match p.description { - None => None, - Some(Value::Null) => Some(None), - Some(Value::String(s)) => Some(Some(s)), - Some(other) => { - return Err(RuntimeError::InvalidInput(format!( - "description must be null or a string, got: {other}" - ))) + match spec { + KindSpec::Entity { specific } => { + let entity = self.runtime.get_entity(token, id).await?; + if specific.as_ref().is_some_and(|k| entity.kind != *k) { + return Err(RuntimeError::NotFound(format!("entity {}", p.id))); } - }; - let patch = EntityPatch { - name: p.name, - description, - properties: p.properties, - tags: p.tags, - }; - let entity = self.runtime.update_entity(ns, id, patch).await?; - return to_json(&entity); - } - - if self.runtime.get_edge(ns, id).await?.is_some() { - let relation = p.relation.as_deref().map(parse_relation).transpose()?; - let edge = self.runtime.update_edge(ns, id, relation, p.weight).await?; - return to_json(&edge); + let patch = EntityPatch { + name: string_value(p.name, "name")?, + description: description_patch(p.description)?, + properties: p.properties, + tags: p.tags, + }; + to_json(&self.runtime.update_entity(token, id, patch).await?) + } + KindSpec::Edge => { + let relation = p.relation.as_deref().map(parse_relation).transpose()?; + let patch = EdgePatch { + relation, + weight: p.weight, + properties: p.properties, + }; + to_json(&self.runtime.update_edge(token, id, patch).await?) + } + KindSpec::Note { specific } => { + let note = self + .runtime + .notes(token)? + .get_note(id) + .await + .map_err(RuntimeError::Storage)?; + if note + .as_ref() + .is_none_or(|n| specific.as_ref().is_some_and(|k| n.kind != *k)) + { + return Err(RuntimeError::NotFound(format!("note {}", p.id))); + } + let patch = NotePatch::new( + optional_string_patch(p.name, "name")?, + p.content, + p.salience, + p.decay_factor, + p.properties, + ); + to_json(&self.runtime.update_note(token, id, patch).await?) + } + KindSpec::Event => Err(immutable_event_error()), + KindSpec::Proposal => Err(RuntimeError::InvalidInput( + "proposal events are immutable — use `withdraw` to rescind a proposal".into(), + )), } - - Err(RuntimeError::NotFound(format!("not found: {}", p.id))) } - pub(crate) async fn handle_delete(&self, params: Value) -> Result { + pub(crate) async fn handle_delete( + &self, + token: &NamespaceToken, + params: Value, + registry: &VerbRegistry, + ) -> Result { let p: DeleteParams = deser(params)?; - let id = resolve_uuid_async(&p.id, &self.runtime, p.namespace.as_deref()).await?; - let ns = p.namespace.as_deref(); - - if self - .runtime - .events(ns)? - .get_event(id) - .await - .map_err(RuntimeError::Storage)? - .is_some() - { - return Err(immutable_event_error()); - } - - if self.runtime.get_entity(ns, id).await?.is_some() { - let deleted = self - .runtime - .delete_entity(ns, id, p.hard.unwrap_or(false)) - .await?; - return to_json(&serde_json::json!({ "deleted": deleted, "id": p.id })); - } - - if self.runtime.get_edge(ns, id).await?.is_some() { - let deleted = self.runtime.delete_edge(ns, id).await?; - return to_json(&serde_json::json!({ "deleted": deleted, "id": p.id })); - } + let id = resolve_uuid_async(&p.id, &self.runtime, token).await?; + let spec = resolve_kind_spec(&p.kind, registry)?; - let deleted_note = self - .runtime - .delete_note(ns, id, p.hard.unwrap_or(false)) - .await?; - if deleted_note { - return to_json(&serde_json::json!({ "deleted": true, "id": p.id })); + match spec { + KindSpec::Entity { specific } => { + if let Some(ref expected) = specific { + let entity = self.runtime.get_entity(token, id).await?; + if entity.kind != *expected { + return Err(RuntimeError::NotFound(format!("{} {}", expected, p.id))); + } + } + let deleted = self + .runtime + .delete_entity(token, id, p.hard.unwrap_or(false)) + .await?; + if !deleted { + return Err(RuntimeError::NotFound(format!("entity {}", p.id))); + } + to_json(&serde_json::json!({ "deleted": deleted, "id": p.id, "kind": p.kind })) + } + KindSpec::Note { specific } => { + if let Some(ref expected) = specific { + let note = self + .runtime + .notes(token)? + .get_note(id) + .await + .map_err(RuntimeError::Storage)?; + if note.as_ref().is_none_or(|n| n.kind != *expected) { + return Err(RuntimeError::NotFound(format!("{} {}", expected, p.id))); + } + } + let deleted = self + .runtime + .delete_note(token, id, p.hard.unwrap_or(false)) + .await?; + if !deleted { + return Err(RuntimeError::NotFound(format!("note {}", p.id))); + } + to_json(&serde_json::json!({ "deleted": deleted, "id": p.id, "kind": p.kind })) + } + KindSpec::Edge => { + let deleted = self + .runtime + .delete_edge(token, id, p.hard.unwrap_or(false)) + .await?; + to_json(&serde_json::json!({ "deleted": deleted, "id": p.id, "kind": "edge" })) + } + KindSpec::Event => Err(immutable_event_error()), + KindSpec::Proposal => Err(RuntimeError::InvalidInput( + "proposal events are immutable — use `withdraw` to rescind a proposal".into(), + )), } - - Err(RuntimeError::NotFound(format!("not found: {}", p.id))) } - pub(crate) async fn handle_merge(&self, params: Value) -> Result { + pub(crate) async fn handle_merge( + &self, + token: &NamespaceToken, + params: Value, + registry: &VerbRegistry, + ) -> Result { let p: MergeParams = deser(params)?; - let into_id = resolve_uuid_async(&p.into_id, &self.runtime, p.namespace.as_deref()).await?; - let from_id = resolve_uuid_async(&p.from_id, &self.runtime, p.namespace.as_deref()).await?; - let strategy = match p.strategy.as_deref().unwrap_or("prefer_into") { - "prefer_into" => MergeStrategy::PreferInto, - "prefer_from" => MergeStrategy::PreferFrom, - "union" => MergeStrategy::Union, - other => { - return Err(RuntimeError::InvalidInput(format!( - "unknown strategy {other:?}; use prefer_into | prefer_from | union" - ))) + let into_id = resolve_uuid_async(&p.into_id, &self.runtime, token).await?; + let from_id = resolve_uuid_async(&p.from_id, &self.runtime, token).await?; + let raw_kind = p.kind.as_deref().unwrap_or("entity"); + let spec = resolve_kind_spec(raw_kind, registry)?; + let policy = parse_entity_policy(p.strategy.as_deref().unwrap_or("prefer_into"))?; + let content_strategy = + parse_content_strategy(p.content_strategy.as_deref().unwrap_or("append"))?; + let dry_run = p.dry_run.unwrap_or(false); + + let summary: MergeSummary = match spec { + KindSpec::Entity { specific } => { + ensure_entity_kind(&self.runtime, token, into_id, specific.as_deref()).await?; + ensure_entity_kind(&self.runtime, token, from_id, specific.as_deref()).await?; + self.runtime + .merge_entity(token, into_id, from_id, policy, dry_run) + .await? + } + KindSpec::Note { specific } => { + ensure_note_kind(&self.runtime, token, into_id, specific.as_deref()).await?; + ensure_note_kind(&self.runtime, token, from_id, specific.as_deref()).await?; + self.runtime + .merge_note(token, into_id, from_id, policy, content_strategy, dry_run) + .await? + } + KindSpec::Edge => { + return Err(RuntimeError::InvalidInput( + "merge(kind=\"edge\") is unsupported".into(), + )) + } + KindSpec::Event => return Err(immutable_event_error()), + KindSpec::Proposal => { + return Err(RuntimeError::InvalidInput( + "proposal events are immutable and cannot be merged".into(), + )) } }; - let summary = self - .runtime - .merge_entity(p.namespace.as_deref(), into_id, from_id, strategy) - .await?; to_json(&summary) } pub(crate) async fn handle_search( &self, + token: &NamespaceToken, params: Value, registry: &VerbRegistry, ) -> Result { @@ -999,11 +1343,12 @@ impl KgPack { let hits = self .runtime .hybrid_search( - p.namespace.as_deref(), + token, &p.query, None, search_limit, kind_filter.as_deref(), + p.entity_type.as_deref(), ) .await?; @@ -1018,9 +1363,9 @@ impl KgPack { } else { let entities_page = self .runtime - .entities(p.namespace.as_deref())? + .entities(token)? .query_entities( - self.runtime.ns(p.namespace.as_deref()), + token.namespace().as_str(), EntityFilter { ids: candidate_ids, ..EntityFilter::default() @@ -1080,11 +1425,12 @@ impl KgPack { let hits = self .runtime .search_notes( - p.namespace.as_deref(), + token, &p.query, None, limit, kind_filter.as_deref(), + p.include_superseded.unwrap_or(false), ) .await?; @@ -1093,7 +1439,7 @@ impl KgPack { let note_kinds: HashMap = if hits.is_empty() { HashMap::new() } else { - let note_store = self.runtime.notes(p.namespace.as_deref())?; + let note_store = self.runtime.notes(token)?; let mut map = HashMap::new(); for h in &hits { if let Ok(Some(n)) = note_store.get_note(h.note_id).await { @@ -1123,29 +1469,170 @@ impl KgPack { KindSpec::Event => Err(RuntimeError::InvalidInput( "search does not support kind=event — use `list(kind=\"event\", ...)` for event browsing".into(), )), + KindSpec::Proposal => Err(RuntimeError::InvalidInput( + "search does not support kind=proposal — use `list(kind=\"proposal\", ...)` for proposal browsing".into(), + )), } } - pub(crate) async fn handle_link(&self, params: Value) -> Result { + pub(crate) async fn handle_link( + &self, + token: &NamespaceToken, + params: Value, + ) -> Result { let p: LinkParams = deser(params)?; let verbose = p.verbose.unwrap_or(false); - let source = - resolve_uuid_async(&p.source_id, &self.runtime, p.namespace.as_deref()).await?; - let target = - resolve_uuid_async(&p.target_id, &self.runtime, p.namespace.as_deref()).await?; + + if let Some(entries) = p.links { + let attempted = entries.len(); + if attempted > 1000 { + return Err(RuntimeError::InvalidInput( + "bulk link limited to 1000 entries per request".into(), + )); + } + let atomic = p.atomic.unwrap_or(true); + if atomic { + let mut specs = Vec::with_capacity(attempted); + let mut seen = std::collections::HashSet::new(); + let mut skipped = 0usize; + for entry in entries { + let source = resolve_uuid_async(&entry.source_id, &self.runtime, token).await?; + let target = resolve_uuid_async(&entry.target_id, &self.runtime, token).await?; + let relation = parse_relation(&entry.relation)?; + let (source, target) = if relation.is_symmetric() && target < source { + (target, source) + } else { + (source, target) + }; + let key = format!("{source}::{target}::{}", relation.as_str()); + if !seen.insert(key) { + skipped += 1; + continue; + } + let weight = entry.weight.unwrap_or(1.0).clamp(0.0, 1.0); + let metadata = merge_entry_metadata(entry.metadata, entry.dependency_kind)?; + specs.push(LinkSpec { + namespace: Some(token.namespace().as_str().to_owned()), + source_id: source, + target_id: target, + relation, + weight, + metadata, + }); + } + let edges = self.runtime.link_many(token, specs).await?; + let mut resp = serde_json::json!({ + "attempted": attempted, + "created": edges.len(), + "skipped": skipped, + "failed": 0, + }); + if verbose { + resp["edges"] = serde_json::to_value(&edges) + .map_err(|e| RuntimeError::InvalidInput(e.to_string()))?; + } + return to_json(&resp); + } else { + let mut results: Vec = Vec::new(); + let mut error_list: Vec = Vec::new(); + let mut seen = std::collections::HashSet::new(); + let mut skipped = 0usize; + for (idx, entry) in entries.into_iter().enumerate() { + let source = + match resolve_uuid_async(&entry.source_id, &self.runtime, token).await { + Ok(id) => id, + Err(e) => { + error_list.push(json!({"index": idx, "error": format!("{e}")})); + continue; + } + }; + let target = + match resolve_uuid_async(&entry.target_id, &self.runtime, token).await { + Ok(id) => id, + Err(e) => { + error_list.push(json!({"index": idx, "error": format!("{e}")})); + continue; + } + }; + let relation = match parse_relation(&entry.relation) { + Ok(r) => r, + Err(e) => { + error_list.push(json!({"index": idx, "error": format!("{e}")})); + continue; + } + }; + let (source, target) = if relation.is_symmetric() && target < source { + (target, source) + } else { + (source, target) + }; + let key = format!("{source}::{target}::{}", relation.as_str()); + if !seen.insert(key) { + skipped += 1; + continue; + } + let weight = entry.weight.unwrap_or(1.0).clamp(0.0, 1.0); + let metadata = match merge_entry_metadata(entry.metadata, entry.dependency_kind) + { + Ok(m) => m, + Err(e) => { + error_list.push(json!({"index": idx, "error": format!("{e}")})); + continue; + } + }; + match self + .runtime + .link(token, source, target, relation, weight, metadata) + .await + { + Ok(edge) => results.push(to_json(&edge)?), + Err(e) => error_list.push(json!({"index": idx, "error": format!("{e}")})), + } + } + let mut resp = serde_json::json!({ + "attempted": attempted, + "created": results.len(), + "skipped": skipped, + "failed": error_list.len(), + "errors": error_list, + }); + if verbose { + resp["edges"] = serde_json::Value::Array(results); + } + return to_json(&resp); + } + } + + // Singleton path. + let source_id_str = p.source_id.ok_or_else(|| { + RuntimeError::InvalidInput("link requires source_id (or links for bulk)".into()) + })?; + let target_id_str = p.target_id.ok_or_else(|| { + RuntimeError::InvalidInput("link requires target_id (or links for bulk)".into()) + })?; + let relation_str = p.relation.ok_or_else(|| { + RuntimeError::InvalidInput("link requires relation (or links for bulk)".into()) + })?; + let source = resolve_uuid_async(&source_id_str, &self.runtime, token).await?; + let target = resolve_uuid_async(&target_id_str, &self.runtime, token).await?; let weight = p.weight.unwrap_or(1.0).clamp(0.0, 1.0); - let relation = parse_relation(&p.relation)?; + let relation = parse_relation(&relation_str)?; + let metadata = merge_entry_metadata(p.metadata, p.dependency_kind)?; let edge = self .runtime - .link(p.namespace.as_deref(), source, target, relation, weight) + .link(token, source, target, relation, weight, metadata) .await?; let raw = to_json(&edge)?; Ok(format_edge_output(raw, verbose)) } - pub(crate) async fn handle_neighbors(&self, params: Value) -> Result { + pub(crate) async fn handle_neighbors( + &self, + token: &NamespaceToken, + params: Value, + ) -> Result { let p: NeighborsParams = deser(params)?; - let node_id = resolve_uuid_async(&p.id, &self.runtime, p.namespace.as_deref()).await?; + let node_id = resolve_uuid_async(&p.id, &self.runtime, token).await?; let direction = parse_direction(p.direction.as_deref()); let relations: Option> = p .relations @@ -1158,7 +1645,7 @@ impl KgPack { let hits = self .runtime .neighbors_with_query( - p.namespace.as_deref(), + token, node_id, NeighborQuery { direction, @@ -1171,11 +1658,15 @@ impl KgPack { to_json(&hits) } - pub(crate) async fn handle_traverse(&self, params: Value) -> Result { + pub(crate) async fn handle_traverse( + &self, + token: &NamespaceToken, + params: Value, + ) -> Result { let p: TraverseParams = deser(params)?; let mut roots = Vec::with_capacity(p.roots.len()); for s in &p.roots { - roots.push(resolve_uuid_async(s, &self.runtime, p.namespace.as_deref()).await?); + roots.push(resolve_uuid_async(s, &self.runtime, token).await?); } let direction = parse_direction(p.direction.as_deref()); let relations: Option> = p @@ -1198,19 +1689,698 @@ impl KgPack { options, include_roots: p.include_roots.unwrap_or(true), }; - let paths = self - .runtime - .traverse(p.namespace.as_deref(), request) - .await?; + let paths = self.runtime.traverse(token, request).await?; to_json(&paths) } - pub(crate) async fn handle_query(&self, params: Value) -> Result { + pub(crate) async fn handle_query( + &self, + token: &NamespaceToken, + params: Value, + ) -> Result { let p: QueryParams = deser(params)?; - let result = self - .runtime - .query_with_metadata(p.namespace.as_deref(), &p.query) - .await?; + let result = self.runtime.query_with_metadata(token, &p.query).await?; to_json(&result) } + + // ---- Proposal verbs (ADR-046) ---- + + /// `propose` — commissive verb. Emits a `ProposalCreated` event and inserts + /// a row into the `proposals_open` projection table. + pub(crate) async fn handle_propose( + &self, + token: &NamespaceToken, + params: Value, + ) -> Result { + let p: ProposeParams = deser(params)?; + if p.title.is_empty() { + return Err(RuntimeError::InvalidInput( + "propose requires a non-empty 'title'".into(), + )); + } + if p.description.is_empty() { + return Err(RuntimeError::InvalidInput( + "propose requires a non-empty 'description'".into(), + )); + } + + let _changeset: ProposalChangeset = serde_json::from_value(p.changeset.clone()) + .map_err(|e| RuntimeError::InvalidInput(format!("invalid changeset: {e}")))?; + + let proposal_id = Uuid::new_v4(); + let actor = token.actor().id.clone(); + let ns = token.namespace().as_str().to_owned(); + let now = chrono::Utc::now().timestamp_micros(); + + let payload = ProposalCreatedPayload { + proposal_id: khive_types::Id128::from_u128(proposal_id.as_u128()), + proposer: actor.clone(), + title: p.title.clone(), + description: p.description.clone(), + changeset: _changeset, + reviewers: p.reviewers.clone(), + expiry: p + .expiry + .map(|v| khive_types::Timestamp::from_micros(v as u64)), + parent_id: p + .parent_id + .as_deref() + .map(|s| { + Uuid::from_str(s) + .map(|u| khive_types::Id128::from_u128(u.as_u128())) + .map_err(|e| { + RuntimeError::InvalidInput(format!("invalid parent_id {s:?}: {e}")) + }) + }) + .transpose()?, + }; + + let event_payload_json = serde_json::to_value(&payload) + .map_err(|e| RuntimeError::Internal(format!("serialize proposal payload: {e}")))?; + + let mut event = khive_storage::event::Event::new( + &ns, + "propose", + EventKind::ProposalCreated, + SubstrateKind::Entity, + &actor, + ); + event.payload = event_payload_json; + event.aggregate_kind = Some("proposal".to_string()); + event.aggregate_id = Some(proposal_id); + + let event_store = self.runtime.events(token)?; + event_store + .append_event(event) + .await + .map_err(RuntimeError::Storage)?; + + let expiry_val = p.expiry; + let sql = self.runtime.sql(); + let mut writer = sql.writer().await.map_err(RuntimeError::Storage)?; + writer + .execute(SqlStatement { + sql: "\ + INSERT INTO proposals_open \ + (proposal_id, namespace, proposer, title, status, \ + created_at, updated_at, expiry) \ + VALUES (?1, ?2, ?3, ?4, 'open', ?5, ?5, ?6)" + .to_string(), + params: vec![ + SqlValue::Text(proposal_id.to_string()), + SqlValue::Text(ns), + SqlValue::Text(actor.clone()), + SqlValue::Text(p.title.clone()), + SqlValue::Integer(now), + match expiry_val { + Some(v) => SqlValue::Integer(v), + None => SqlValue::Null, + }, + ], + label: Some("proposals_open.insert".into()), + }) + .await + .map_err(RuntimeError::Storage)?; + + to_json(&serde_json::json!({ + "proposal_id": proposal_id.to_string(), + "status": "open", + "proposer": actor, + "title": p.title, + })) + } + + /// `review` — declaration verb. Emits a `ProposalReviewed` event and updates + /// the `proposals_open` projection table (counts, status, last_decision). + pub(crate) async fn handle_review( + &self, + token: &NamespaceToken, + params: Value, + ) -> Result { + let p: ReviewParams = deser(params)?; + let proposal_id = Uuid::from_str(&p.proposal_id).map_err(|e| { + RuntimeError::InvalidInput(format!("invalid proposal_id {:?}: {e}", p.proposal_id)) + })?; + // Actor is always the authenticated token identity — client cannot override. + let actor = token.actor().id.clone(); + let ns = token.namespace().as_str().to_owned(); + let now = chrono::Utc::now().timestamp_micros(); + + let decision: ProposalDecision = match p.decision.trim().to_ascii_lowercase().as_str() { + "approve" => ProposalDecision::Approve, + "reject" => ProposalDecision::Reject, + "comment" => ProposalDecision::Comment, + "request_changes" | "requestchanges" => ProposalDecision::RequestChanges, + other => { + return Err(RuntimeError::InvalidInput(format!( + "unknown decision {other:?}; valid: approve | reject | comment | request_changes" + ))); + } + }; + + let sql = self.runtime.sql(); + let mut reader = sql.reader().await.map_err(RuntimeError::Storage)?; + + let row = reader + .query_row(SqlStatement { + sql: "SELECT proposer, status FROM proposals_open \ + WHERE proposal_id = ?1 AND namespace = ?2" + .to_string(), + params: vec![ + SqlValue::Text(proposal_id.to_string()), + SqlValue::Text(ns.clone()), + ], + label: Some("proposals_open.get".into()), + }) + .await + .map_err(RuntimeError::Storage)? + .ok_or_else(|| RuntimeError::NotFound(format!("proposal {}", p.proposal_id)))?; + + let proposer = row + .get("proposer") + .and_then(|v| { + if let SqlValue::Text(s) = v { + Some(s.clone()) + } else { + None + } + }) + .unwrap_or_default(); + + let current_status = row + .get("status") + .and_then(|v| { + if let SqlValue::Text(s) = v { + Some(s.as_str()) + } else { + None + } + }) + .unwrap_or("open"); + + if matches!(current_status, "applied" | "withdrawn" | "rejected") { + return Err(RuntimeError::InvalidInput(format!( + "proposal {} is already {current_status} and cannot be reviewed", + p.proposal_id + ))); + } + + // Self-approval guard: the proposer cannot approve their own proposal. + // Exception: OSS local mode (`actor == "local"`) operates as a single-user + // system where every operation runs under the same anonymous identity, so + // the guard would unconditionally block all approvals. Skip it in that case. + // Multi-actor deployments (where distinct actor IDs are assigned) enforce + // the guard normally. + if decision == ProposalDecision::Approve && actor == proposer && actor != "local" { + return Err(RuntimeError::InvalidInput(format!( + "self-approval is forbidden: proposer {actor:?} cannot approve their own proposal" + ))); + } + + let payload = ProposalReviewedPayload { + proposal_id: khive_types::Id128::from_u128(proposal_id.as_u128()), + reviewer: actor.clone(), + decision, + comment: p.comment.clone(), + }; + let event_payload_json = serde_json::to_value(&payload) + .map_err(|e| RuntimeError::Internal(format!("serialize review payload: {e}")))?; + + let mut event = khive_storage::event::Event::new( + &ns, + "review", + EventKind::ProposalReviewed, + SubstrateKind::Entity, + &actor, + ); + event.payload = event_payload_json; + event.aggregate_kind = Some("proposal".to_string()); + event.aggregate_id = Some(proposal_id); + + let event_store = self.runtime.events(token)?; + event_store + .append_event(event) + .await + .map_err(RuntimeError::Storage)?; + + let (new_status, approve_delta, reject_delta) = match decision { + ProposalDecision::Approve => ("approved", 1i64, 0i64), + ProposalDecision::Reject => ("rejected", 0, 1), + ProposalDecision::Comment => (current_status, 0, 0), + ProposalDecision::RequestChanges => ("changes_requested", 0, 0), + }; + + let last_decision_json = serde_json::to_string(&decision) + .map_err(|e| RuntimeError::Internal(format!("serialize decision: {e}")))?; + + let mut writer = sql.writer().await.map_err(RuntimeError::Storage)?; + writer + .execute(SqlStatement { + sql: "UPDATE proposals_open \ + SET status = ?1, updated_at = ?2, last_decision = ?3, \ + review_count = review_count + 1, \ + approve_count = approve_count + ?4, \ + reject_count = reject_count + ?5 \ + WHERE proposal_id = ?6 AND namespace = ?7" + .to_string(), + params: vec![ + SqlValue::Text(new_status.to_string()), + SqlValue::Integer(now), + SqlValue::Text(last_decision_json), + SqlValue::Integer(approve_delta), + SqlValue::Integer(reject_delta), + SqlValue::Text(proposal_id.to_string()), + SqlValue::Text(ns), + ], + label: Some("proposals_open.update_review".into()), + }) + .await + .map_err(RuntimeError::Storage)?; + + to_json(&serde_json::json!({ + "proposal_id": proposal_id.to_string(), + "reviewer": actor, + "decision": p.decision, + "status": new_status, + })) + } + + /// `withdraw` — commissive verb. Emits a `ProposalWithdrawn` event and updates + /// the `proposals_open` projection table to status='withdrawn'. + pub(crate) async fn handle_withdraw( + &self, + token: &NamespaceToken, + params: Value, + ) -> Result { + let p: WithdrawParams = deser(params)?; + let proposal_id = Uuid::from_str(&p.proposal_id).map_err(|e| { + RuntimeError::InvalidInput(format!("invalid proposal_id {:?}: {e}", p.proposal_id)) + })?; + // Actor is always the authenticated token identity — client cannot override. + let actor = token.actor().id.clone(); + let ns = token.namespace().as_str().to_owned(); + let now = chrono::Utc::now().timestamp_micros(); + + let sql = self.runtime.sql(); + let mut reader = sql.reader().await.map_err(RuntimeError::Storage)?; + + let row = reader + .query_row(SqlStatement { + sql: "SELECT proposer, status FROM proposals_open \ + WHERE proposal_id = ?1 AND namespace = ?2" + .to_string(), + params: vec![ + SqlValue::Text(proposal_id.to_string()), + SqlValue::Text(ns.clone()), + ], + label: Some("proposals_open.get_for_withdraw".into()), + }) + .await + .map_err(RuntimeError::Storage)? + .ok_or_else(|| RuntimeError::NotFound(format!("proposal {}", p.proposal_id)))?; + + let proposer = row + .get("proposer") + .and_then(|v| { + if let SqlValue::Text(s) = v { + Some(s.clone()) + } else { + None + } + }) + .unwrap_or_default(); + + if actor != proposer { + return Err(RuntimeError::InvalidInput(format!( + "only the original proposer {proposer:?} may withdraw this proposal" + ))); + } + + let current_status = row + .get("status") + .and_then(|v| { + if let SqlValue::Text(s) = v { + Some(s.as_str()) + } else { + None + } + }) + .unwrap_or("open"); + + if matches!(current_status, "applied" | "withdrawn") { + return Err(RuntimeError::InvalidInput(format!( + "proposal {} is already {current_status}", + p.proposal_id + ))); + } + + let payload = ProposalWithdrawnPayload { + proposal_id: khive_types::Id128::from_u128(proposal_id.as_u128()), + by: actor.clone(), + reason: p.rationale.clone(), + }; + let event_payload_json = serde_json::to_value(&payload) + .map_err(|e| RuntimeError::Internal(format!("serialize withdraw payload: {e}")))?; + + let mut event = khive_storage::event::Event::new( + &ns, + "withdraw", + EventKind::ProposalWithdrawn, + SubstrateKind::Entity, + &actor, + ); + event.payload = event_payload_json; + event.aggregate_kind = Some("proposal".to_string()); + event.aggregate_id = Some(proposal_id); + + let event_store = self.runtime.events(token)?; + event_store + .append_event(event) + .await + .map_err(RuntimeError::Storage)?; + + let mut writer = sql.writer().await.map_err(RuntimeError::Storage)?; + writer + .execute(SqlStatement { + sql: "UPDATE proposals_open \ + SET status = 'withdrawn', updated_at = ?1 \ + WHERE proposal_id = ?2 AND namespace = ?3" + .to_string(), + params: vec![ + SqlValue::Integer(now), + SqlValue::Text(proposal_id.to_string()), + SqlValue::Text(ns), + ], + label: Some("proposals_open.withdraw".into()), + }) + .await + .map_err(RuntimeError::Storage)?; + + to_json(&serde_json::json!({ + "proposal_id": proposal_id.to_string(), + "status": "withdrawn", + "by": actor, + })) + } + + /// `list(kind=proposal)` — assertive verb. Queries the `proposals_open` + /// projection table with optional status / proposer filters. + pub(crate) async fn handle_list_proposals( + &self, + token: &NamespaceToken, + params: Value, + ) -> Result { + let p: ListProposalsParams = serde_json::from_value(params) + .map_err(|e| RuntimeError::InvalidInput(format!("bad params: {e}")))?; + let ns = token.namespace().as_str().to_owned(); + let limit = p.limit.unwrap_or(50).min(500) as i64; + let offset = p.offset.unwrap_or(0) as i64; + + let mut sql_str = "\ + SELECT proposal_id, proposer, title, status, created_at, updated_at, \ + expiry, last_decision, review_count, approve_count, reject_count \ + FROM proposals_open \ + WHERE namespace = ?1" + .to_string(); + let mut sql_params: Vec = vec![SqlValue::Text(ns)]; + let mut param_idx = 2usize; + + if let Some(status) = &p.status { + sql_str.push_str(&format!(" AND status = ?{param_idx}")); + sql_params.push(SqlValue::Text(status.clone())); + param_idx += 1; + } + if let Some(proposer) = &p.proposer { + sql_str.push_str(&format!(" AND proposer = ?{param_idx}")); + sql_params.push(SqlValue::Text(proposer.clone())); + param_idx += 1; + } + + sql_str.push_str(&format!( + " ORDER BY updated_at DESC LIMIT ?{param_idx} OFFSET ?{}", + param_idx + 1 + )); + sql_params.push(SqlValue::Integer(limit)); + sql_params.push(SqlValue::Integer(offset)); + + let sql = self.runtime.sql(); + let mut reader = sql.reader().await.map_err(RuntimeError::Storage)?; + let rows = reader + .query_all(SqlStatement { + sql: sql_str, + params: sql_params, + label: Some("proposals_open.list".into()), + }) + .await + .map_err(RuntimeError::Storage)?; + + let items: Vec = rows + .into_iter() + .map(|row| { + let get_text = |name: &str| -> String { + row.get(name) + .and_then(|v| { + if let SqlValue::Text(s) = v { + Some(s.clone()) + } else { + None + } + }) + .unwrap_or_default() + }; + let get_int = |name: &str| -> Option { + row.get(name).and_then(|v| { + if let SqlValue::Integer(i) = v { + Some(*i) + } else { + None + } + }) + }; + serde_json::json!({ + "proposal_id": get_text("proposal_id"), + "proposer": get_text("proposer"), + "title": get_text("title"), + "status": get_text("status"), + "created_at": get_int("created_at"), + "updated_at": get_int("updated_at"), + "expiry": get_int("expiry"), + "last_decision": get_text("last_decision"), + "review_count": get_int("review_count").unwrap_or(0), + "approve_count": get_int("approve_count").unwrap_or(0), + "reject_count": get_int("reject_count").unwrap_or(0), + }) + }) + .collect(); + + to_json(&items) + } +} + +#[cfg(test)] +mod tests { + use super::{parse_relation, UpdateParams}; + use serde_json::json; + + // F009 (CRIT): error text must be derived from EdgeRelation::ALL, not a hardcoded list. + // ADR-002 mandates 15 relations; error text must include derived_from and precedes. + #[test] + fn parse_relation_error_lists_all_relations() { + let err = parse_relation("not_a_relation").unwrap_err(); + let msg = format!("{err}"); + assert!( + msg.contains("derived_from"), + "F009: parse_relation error must list derived_from (ADR-002); got: {msg}" + ); + assert!( + msg.contains("precedes"), + "F009: parse_relation error must list precedes (ADR-002); got: {msg}" + ); + } + + // ADR-014: wire-level tri-state nullable f64 for `update`. + // absent → outer None (preserve existing value) + // null → Some(None) (clear the value) + // number → Some(Some(v)) (set to v) + // + // Regression for round-3 finding: the previous `Option` representation + // collapsed absent and null into the same `None`, so JSON null could not + // distinguish "clear" from "preserve" through the MCP wire surface. + #[test] + fn update_params_tri_state_salience() { + let absent: UpdateParams = + serde_json::from_value(json!({"id": "x", "kind": "note"})).unwrap(); + assert_eq!( + absent.salience, None, + "absent salience key must deserialize to outer None (preserve)" + ); + + let cleared: UpdateParams = + serde_json::from_value(json!({"id": "x", "kind": "note", "salience": null})).unwrap(); + assert_eq!( + cleared.salience, + Some(None), + "salience=null must deserialize to Some(None) (clear)" + ); + + let set: UpdateParams = + serde_json::from_value(json!({"id": "x", "kind": "note", "salience": 0.5})).unwrap(); + assert_eq!( + set.salience, + Some(Some(0.5)), + "salience=0.5 must deserialize to Some(Some(0.5)) (set)" + ); + } + + #[test] + fn update_params_tri_state_decay_factor() { + let absent: UpdateParams = + serde_json::from_value(json!({"id": "x", "kind": "note"})).unwrap(); + assert_eq!( + absent.decay_factor, None, + "absent decay_factor key must deserialize to outer None (preserve)" + ); + + let cleared: UpdateParams = + serde_json::from_value(json!({"id": "x", "kind": "note", "decay_factor": null})) + .unwrap(); + assert_eq!( + cleared.decay_factor, + Some(None), + "decay_factor=null must deserialize to Some(None) (clear)" + ); + + let set: UpdateParams = + serde_json::from_value(json!({"id": "x", "kind": "note", "decay_factor": 0.6})) + .unwrap(); + assert_eq!( + set.decay_factor, + Some(Some(0.6)), + "decay_factor=0.6 must deserialize to Some(Some(0.6)) (set)" + ); + } + + // ADR-046: resolve_kind_spec must recognise "proposal" as KindSpec::Proposal + #[test] + fn resolve_kind_spec_proposal() { + use super::{resolve_kind_spec, KindSpec}; + use crate::KgPack; + use khive_runtime::VerbRegistryBuilder; + + let rt = khive_runtime::KhiveRuntime::memory().expect("in-memory runtime"); + let mut builder = VerbRegistryBuilder::new(); + builder.register(KgPack::new(rt.clone())); + let registry = builder.build().expect("registry build"); + + let spec = resolve_kind_spec("proposal", ®istry).expect("should resolve proposal"); + assert_eq!( + spec, + KindSpec::Proposal, + "kind=proposal must resolve to KindSpec::Proposal" + ); + + let spec_upper = + resolve_kind_spec("Proposal", ®istry).expect("should be case-insensitive"); + assert_eq!( + spec_upper, + KindSpec::Proposal, + "kind=Proposal (mixed case) must resolve" + ); + } + + // ADR-046: propose param deserialization + #[test] + fn propose_params_deserialization() { + use super::ProposeParams; + let p: ProposeParams = serde_json::from_value(json!({ + "title": "Add RoPE", + "description": "Add RoPE entity to the graph", + "changeset": { + "kind": "add_entity", + "entity": "{\"kind\":\"concept\",\"name\":\"RoPE\"}" + }, + "reviewers": ["alice"], + })) + .expect("ProposeParams must deserialize"); + assert_eq!(p.title, "Add RoPE"); + assert_eq!(p.reviewers, vec!["alice"]); + assert!(p.parent_id.is_none()); + assert!(p.expiry.is_none()); + } + + // ADR-046: review param deserialization with all valid decisions + #[test] + fn review_params_decisions() { + use super::ReviewParams; + for decision in ["approve", "reject", "comment", "request_changes"] { + let p: ReviewParams = serde_json::from_value(json!({ + "proposal_id": "00000000-0000-0000-0000-000000000001", + "decision": decision, + })) + .expect("ReviewParams must deserialize"); + assert_eq!(p.decision, decision); + } + } + + // CRIT-2 regression: ReviewParams must not accept an `actor` field. + // The actor is always derived from the NamespaceToken at dispatch time. + // If a client passes actor=, the field is ignored (unknown fields + // are allowed by serde default, so the struct simply lacks the field). + #[test] + fn review_params_no_actor_field() { + use super::ReviewParams; + // Baseline: ReviewParams works without actor. + let p: ReviewParams = serde_json::from_value(json!({ + "proposal_id": "00000000-0000-0000-0000-000000000001", + "decision": "approve", + })) + .expect("ReviewParams must deserialize without actor"); + assert_eq!(p.proposal_id, "00000000-0000-0000-0000-000000000001"); + assert_eq!(p.decision, "approve"); + } + + // CRIT-2 regression: WithdrawParams must not accept an `actor` field. + #[test] + fn withdraw_params_no_actor_field() { + use super::WithdrawParams; + let p: WithdrawParams = serde_json::from_value(json!({ + "proposal_id": "00000000-0000-0000-0000-000000000002", + })) + .expect("WithdrawParams must deserialize without actor"); + assert_eq!(p.proposal_id, "00000000-0000-0000-0000-000000000002"); + assert!(p.rationale.is_none()); + } + + // CRIT-2 regression: ProposeParams must not accept an `actor` field. + #[test] + fn propose_params_no_actor_field() { + use super::ProposeParams; + let p: ProposeParams = serde_json::from_value(json!({ + "title": "Fix RoPE", + "description": "Fix RoPE entity", + "changeset": {"kind": "add_entity", "entity": "{}"}, + })) + .expect("ProposeParams must deserialize without actor"); + assert_eq!(p.title, "Fix RoPE"); + } + + // ADR-046: KG pack must expose exactly 14 handlers including propose/review/withdraw + #[test] + fn kg_pack_exposes_14_handlers() { + use crate::KgPack; + use khive_types::Pack; + let handlers = KgPack::HANDLERS; + assert_eq!( + handlers.len(), + 14, + "ADR-046: kg pack must expose 14 handlers (was 11, +3 for propose/review/withdraw)" + ); + let names: Vec<&str> = handlers.iter().map(|h| h.name).collect(); + assert!(names.contains(&"propose"), "propose must be in KG_HANDLERS"); + assert!(names.contains(&"review"), "review must be in KG_HANDLERS"); + assert!( + names.contains(&"withdraw"), + "withdraw must be in KG_HANDLERS" + ); + } } diff --git a/crates/khive-pack-kg/src/lib.rs b/crates/khive-pack-kg/src/lib.rs index b04a54ef..7474207a 100644 --- a/crates/khive-pack-kg/src/lib.rs +++ b/crates/khive-pack-kg/src/lib.rs @@ -1,8 +1,8 @@ //! pack-kg — Knowledge Graph verb pack for khive. //! -//! Provides 11 verbs for managing entities, notes, edges, and graph queries -//! in a research knowledge graph. This is the first-party pack shipped with -//! the khive binary. +//! Provides 14 verbs for managing entities, notes, edges, graph queries, and +//! event-sourced proposals (ADR-046) in a research knowledge graph. This is +//! the first-party pack shipped with the khive binary. pub mod handlers; pub mod vocab; @@ -11,10 +11,11 @@ use async_trait::async_trait; use serde_json::Value; use khive_runtime::pack::PackRuntime; -use khive_runtime::{KhiveRuntime, RuntimeError, VerbRegistry}; -use khive_types::{Pack, VerbDef}; +use khive_runtime::{KhiveRuntime, NamespaceToken, RuntimeError, VerbRegistry}; +use khive_types::{HandlerDef, Pack, VerbCategory, Visibility}; -pub use vocab::{EntityKind, NoteKind}; +pub use khive_types::EntityKind; +pub use vocab::NoteKind; /// KG pack vocabulary declaration. pub struct KgPack { @@ -30,70 +31,116 @@ impl Pack for KgPack { "decision", "reference", ]; - const ENTITY_KINDS: &'static [&'static str] = - &["concept", "document", "dataset", "project", "person", "org"]; - const VERBS: &'static [VerbDef] = &KG_VERBS; + const ENTITY_KINDS: &'static [&'static str] = &[ + "concept", "document", "dataset", "project", "person", "org", "artifact", "service", + ]; + const HANDLERS: &'static [HandlerDef] = &KG_HANDLERS; } -// ADR-060: Illocutionary classification (Searle 1976) -// Assertive — retrieves/presents state of affairs +// ADR-060 / ADR-025: Illocutionary classification (Searle 1976) +// Assertive — retrieves/presents state of affairs // Commissive — commits caller to a persistent change // Declaration — changes institutional status by fiat -static KG_VERBS: [VerbDef; 11] = [ +// +// Verbs 12-14 (propose, review, withdraw) added per ADR-046 (cluster-22). +static KG_HANDLERS: [HandlerDef; 14] = [ // Commissive: commits an entity or note to the namespace - VerbDef { + HandlerDef { name: "create", description: "Create an entity or note", + visibility: Visibility::Verb, + category: VerbCategory::Commissive, }, // Assertive: retrieves and presents a record - VerbDef { + HandlerDef { name: "get", description: "Fetch any record by UUID", + visibility: Visibility::Verb, + category: VerbCategory::Assertive, }, // Assertive: retrieves and presents filtered records - VerbDef { + HandlerDef { name: "list", description: "List records with optional filtering", + visibility: Visibility::Verb, + category: VerbCategory::Assertive, }, // Declaration: changes entity or edge state by fiat - VerbDef { + HandlerDef { name: "update", description: "Patch entity or edge fields", + visibility: Visibility::Verb, + category: VerbCategory::Declaration, }, // Declaration: declares a record removed - VerbDef { + HandlerDef { name: "delete", description: "Soft or hard delete a record", + visibility: Visibility::Verb, + category: VerbCategory::Declaration, }, // Declaration: declares two entities identical - VerbDef { + HandlerDef { name: "merge", description: "Deduplicate two entities", + visibility: Visibility::Verb, + category: VerbCategory::Declaration, }, // Assertive: retrieves and presents search results - VerbDef { + HandlerDef { name: "search", description: "Hybrid FTS + vector search", + visibility: Visibility::Verb, + category: VerbCategory::Assertive, }, // Commissive: commits a typed edge to the graph - VerbDef { + HandlerDef { name: "link", description: "Create a typed directed edge", + visibility: Visibility::Verb, + category: VerbCategory::Commissive, }, // Assertive: retrieves immediate graph neighbors - VerbDef { + HandlerDef { name: "neighbors", description: "Immediate graph neighbors", + visibility: Visibility::Verb, + category: VerbCategory::Assertive, }, // Assertive: retrieves multi-hop traversal results - VerbDef { + HandlerDef { name: "traverse", description: "Multi-hop BFS traversal", + visibility: Visibility::Verb, + category: VerbCategory::Assertive, }, // Assertive: retrieves pattern-matched results - VerbDef { + HandlerDef { name: "query", description: "GQL/SPARQL pattern matching", + visibility: Visibility::Verb, + category: VerbCategory::Assertive, + }, + // Commissive: commits a proposal to the namespace event log (ADR-046) + HandlerDef { + name: "propose", + description: "Create an event-sourced change proposal", + visibility: Visibility::Verb, + category: VerbCategory::Commissive, + }, + // Declaration: approves/rejects/comments on a proposal (ADR-046) + HandlerDef { + name: "review", + description: "Approve, reject, comment, or request changes on a proposal", + visibility: Visibility::Verb, + category: VerbCategory::Declaration, + }, + // Commissive: rescinds an open proposal (ADR-046) + HandlerDef { + name: "withdraw", + description: "Withdraw an open proposal (proposer-only)", + visibility: Visibility::Verb, + category: VerbCategory::Commissive, }, ]; @@ -103,7 +150,7 @@ impl KgPack { } } -// ── ADR-063: inventory self-registration ───────────────────────────────────── +// ── ADR-027: inventory self-registration ───────────────────────────────────── struct KgPackFactory; @@ -133,8 +180,8 @@ impl PackRuntime for KgPack { ::ENTITY_KINDS } - fn verbs(&self) -> &'static [VerbDef] { - &KG_VERBS + fn handlers(&self) -> &'static [HandlerDef] { + &KG_HANDLERS } async fn dispatch( @@ -142,19 +189,23 @@ impl PackRuntime for KgPack { verb: &str, params: Value, registry: &VerbRegistry, + token: &NamespaceToken, ) -> Result { match verb { - "create" => self.handle_create(params, registry).await, - "get" => self.handle_get(params).await, - "list" => self.handle_list(params, registry).await, - "update" => self.handle_update(params).await, - "delete" => self.handle_delete(params).await, - "merge" => self.handle_merge(params).await, - "search" => self.handle_search(params, registry).await, - "link" => self.handle_link(params).await, - "neighbors" => self.handle_neighbors(params).await, - "traverse" => self.handle_traverse(params).await, - "query" => self.handle_query(params).await, + "create" => self.handle_create(token, params, registry).await, + "get" => self.handle_get(token, params).await, + "list" => self.handle_list(token, params, registry).await, + "update" => self.handle_update(token, params, registry).await, + "delete" => self.handle_delete(token, params, registry).await, + "merge" => self.handle_merge(token, params, registry).await, + "search" => self.handle_search(token, params, registry).await, + "link" => self.handle_link(token, params).await, + "neighbors" => self.handle_neighbors(token, params).await, + "traverse" => self.handle_traverse(token, params).await, + "query" => self.handle_query(token, params).await, + "propose" => self.handle_propose(token, params).await, + "review" => self.handle_review(token, params).await, + "withdraw" => self.handle_withdraw(token, params).await, _ => Err(RuntimeError::InvalidInput(format!( "kg pack does not handle verb {verb:?}" ))), diff --git a/crates/khive-pack-kg/src/vocab.rs b/crates/khive-pack-kg/src/vocab.rs index 0f1ce403..ccf759b4 100644 --- a/crates/khive-pack-kg/src/vocab.rs +++ b/crates/khive-pack-kg/src/vocab.rs @@ -1,6 +1,6 @@ -//! KG-pack vocabulary — closed enums for the 6 entity kinds and 5 note kinds. +//! KG-pack vocabulary — pack-owned entity and note vocabulary. //! -//! These enums validate and canonicalize kind strings at the pack boundary. +//! Entity kind validation now uses `khive_types::EntityKind` directly. //! The runtime accepts any String — validation is the pack's responsibility. use core::fmt; @@ -73,7 +73,7 @@ impl std::str::FromStr for EntityKind { } } -/// Closed taxonomy for note classification (ADR-019). +/// KG pack note kinds. Public note kind validation is canonical-only per ADR-013. #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)] pub enum NoteKind { #[default] @@ -129,11 +129,11 @@ impl std::str::FromStr for NoteKind { fn from_str(s: &str) -> Result { match s.trim().to_ascii_lowercase().as_str() { - "observation" | "obs" => Ok(Self::Observation), - "insight" | "finding" => Ok(Self::Insight), - "question" | "q" => Ok(Self::Question), - "decision" | "choice" => Ok(Self::Decision), - "reference" | "ref" | "citation" => Ok(Self::Reference), + "observation" => Ok(Self::Observation), + "insight" => Ok(Self::Insight), + "question" => Ok(Self::Question), + "decision" => Ok(Self::Decision), + "reference" => Ok(Self::Reference), other => Err(UnknownVariant::new("note_kind", other, Self::NAMES)), } } @@ -144,28 +144,6 @@ mod tests { use super::*; use std::str::FromStr; - #[test] - fn entity_kind_roundtrip() { - for kind in EntityKind::ALL { - let parsed = EntityKind::from_str(kind.name()).unwrap(); - assert_eq!(parsed, kind); - } - } - - #[test] - fn entity_kind_aliases() { - assert_eq!(EntityKind::from_str("paper").unwrap(), EntityKind::Document); - assert_eq!(EntityKind::from_str("repo").unwrap(), EntityKind::Project); - assert_eq!(EntityKind::from_str("lab").unwrap(), EntityKind::Org); - } - - #[test] - fn entity_kind_unknown_errors_with_valid_list() { - let err = EntityKind::from_str("gadget").unwrap_err(); - assert_eq!(err.domain, "entity_kind"); - assert!(err.valid.contains(&"concept")); - } - #[test] fn note_kind_roundtrip() { for kind in NoteKind::ALL { @@ -175,8 +153,13 @@ mod tests { } #[test] - fn note_kind_aliases() { - assert_eq!(NoteKind::from_str("obs").unwrap(), NoteKind::Observation); - assert_eq!(NoteKind::from_str("ref").unwrap(), NoteKind::Reference); + fn note_kind_aliases_rejected() { + // Aliases were removed per ADR-013 — only canonical names are accepted. + assert!(NoteKind::from_str("obs").is_err()); + assert!(NoteKind::from_str("finding").is_err()); + assert!(NoteKind::from_str("q").is_err()); + assert!(NoteKind::from_str("choice").is_err()); + assert!(NoteKind::from_str("ref").is_err()); + assert!(NoteKind::from_str("citation").is_err()); } } diff --git a/crates/khive-pack-kg/tests/integration.rs b/crates/khive-pack-kg/tests/integration.rs index b0f76395..76547bf0 100644 --- a/crates/khive-pack-kg/tests/integration.rs +++ b/crates/khive-pack-kg/tests/integration.rs @@ -6,7 +6,7 @@ use async_trait::async_trait; use khive_pack_kg::KgPack; -use khive_runtime::pack::{PackRuntime, VerbDef}; +use khive_runtime::pack::{HandlerDef, PackRuntime}; use khive_runtime::{KhiveRuntime, RuntimeError, VerbRegistry, VerbRegistryBuilder}; use khive_types::Pack; use serde_json::{json, Value}; @@ -27,7 +27,7 @@ impl Fixture { self.registry.dispatch(verb, args).await } - fn verbs(&self) -> Vec<&'static VerbDef> { + fn verbs(&self) -> Vec<&'static HandlerDef> { self.registry.all_verbs() } } @@ -43,7 +43,8 @@ fn pack() -> Fixture { fn pack_with_events() -> Fixture { let rt = KhiveRuntime::memory().expect("in-memory runtime must succeed"); - let event_store = rt.events(None).expect("event store must be available"); + let tok = rt.authorize(khive_runtime::Namespace::local()); + let event_store = rt.events(&tok).expect("event store must be available"); let mut builder = VerbRegistryBuilder::new(); builder.with_event_store(event_store); builder.register(KgPack::new(rt)); @@ -65,13 +66,15 @@ fn invalid_input_message(err: &RuntimeError) -> &str { // ---- PackRuntime trait: verbs() and unknown-verb dispatch ---- +// ADR-046 (cluster-22) added propose, review, and withdraw — bringing the +// handler count from 11 to 14. #[test] -fn pack_verbs_returns_eleven() { +fn pack_verbs_returns_fourteen() { let pack = pack(); assert_eq!( pack.verbs().len(), - 11, - "KgPack must expose exactly 11 verbs" + 14, + "KgPack must expose exactly 14 verbs (11 original + propose/review/withdraw)" ); } @@ -91,6 +94,9 @@ fn pack_verbs_names_are_correct() { "neighbors", "traverse", "query", + "propose", + "review", + "withdraw", ] { assert!(names.contains(expected), "verbs() missing {expected:?}"); } @@ -259,7 +265,8 @@ async fn create_note_no_kind_defaults_to_observation() { } #[tokio::test] -async fn create_note_alias_obs_works() { +async fn create_note_alias_obs_rejected() { + // Aliases removed per ADR-013 (F071) — only canonical note kind names accepted. let pack = pack(); let result = pack .dispatch( @@ -271,11 +278,16 @@ async fn create_note_alias_obs_works() { }), ) .await; - assert!(result.is_ok(), "alias 'obs' must succeed: {:?}", result); + assert!( + result.is_err(), + "alias 'obs' must be rejected: {:?}", + result + ); } #[tokio::test] -async fn create_note_alias_finding_normalizes_to_insight() { +async fn create_note_alias_finding_rejected() { + // Aliases removed per ADR-013 (F071) — only canonical note kind names accepted. let pack = pack(); let result = pack .dispatch( @@ -286,13 +298,11 @@ async fn create_note_alias_finding_normalizes_to_insight() { "note_kind": "finding" }), ) - .await - .expect("alias 'finding' must succeed"); - let stored_kind = result.get("kind").and_then(Value::as_str); - assert_eq!( - stored_kind, - Some("insight"), - "alias 'finding' must normalize to 'insight'; got: {result}" + .await; + assert!( + result.is_err(), + "alias 'finding' must be rejected: {:?}", + result ); } @@ -874,7 +884,7 @@ async fn neighbors_enriches_with_name_and_kind() { let tgt = pack .dispatch( "create", - json!({"kind": "entity", "name": "GQA", "entity_kind": "project"}), + json!({"kind": "entity", "name": "GQA", "entity_kind": "concept"}), ) .await .unwrap(); @@ -905,7 +915,7 @@ async fn neighbors_enriches_with_name_and_kind() { ); assert_eq!( hit.get("kind").and_then(Value::as_str), - Some("project"), + Some("concept"), "neighbor hit must carry entity kind (#162); hit={hit}" ); } @@ -1117,7 +1127,7 @@ async fn soft_delete_entity_not_found_on_get() { .to_string(); let del = pack - .dispatch("delete", json!({"id": id})) + .dispatch("delete", json!({"id": id, "kind": "entity"})) .await .expect("delete must succeed"); assert_eq!( @@ -1139,7 +1149,7 @@ async fn delete_nonexistent_id_returns_not_found() { let err = pack .dispatch( "delete", - json!({"id": "00000000-0000-0000-0000-000000000002"}), + json!({"id": "00000000-0000-0000-0000-000000000002", "kind": "entity"}), ) .await .unwrap_err(); @@ -1203,7 +1213,7 @@ impl Pack for FakeMemoryPack { const NAME: &'static str = "memory"; const NOTE_KINDS: &'static [&'static str] = &["memory"]; const ENTITY_KINDS: &'static [&'static str] = &[]; - const VERBS: &'static [VerbDef] = &[]; + const HANDLERS: &'static [HandlerDef] = &[]; const REQUIRES: &'static [&'static str] = &["kg"]; } @@ -1221,8 +1231,8 @@ impl PackRuntime for FakeMemoryPack { FakeMemoryPack::ENTITY_KINDS } - fn verbs(&self) -> &'static [VerbDef] { - FakeMemoryPack::VERBS + fn handlers(&self) -> &'static [HandlerDef] { + FakeMemoryPack::HANDLERS } fn requires(&self) -> &'static [&'static str] { @@ -1234,6 +1244,7 @@ impl PackRuntime for FakeMemoryPack { verb: &str, _params: Value, _registry: &VerbRegistry, + _token: &khive_runtime::NamespaceToken, ) -> Result { Err(RuntimeError::InvalidInput(format!( "FakeMemoryPack does not handle verb {verb:?}" @@ -1560,7 +1571,7 @@ async fn update_event_uuid_returns_immutable_error() { let err = pack .dispatch( "update", - json!({"id": event_id, "name": "should-not-apply"}), + json!({"id": event_id, "kind": "event", "name": "should-not-apply"}), ) .await .unwrap_err(); @@ -1599,7 +1610,7 @@ async fn delete_event_uuid_returns_immutable_error_and_event_persists() { .to_string(); let err = pack - .dispatch("delete", json!({"id": event_id})) + .dispatch("delete", json!({"id": event_id, "kind": "event"})) .await .unwrap_err(); assert!( @@ -1954,3 +1965,469 @@ async fn link_output_returns_full_uuids_and_iso_dates() { "created_at must be ISO 8601; got: {created_at:?}" ); } + +// ── Bulk link: entry limit, dedup, and response shape ──────────────────────── + +// Fix 2: >1000 entries must return InvalidInput immediately. +#[tokio::test] +async fn bulk_link_over_1000_entries_returns_error() { + let pack = pack(); + let a = pack + .dispatch( + "create", + json!({"kind": "entity", "name": "BulkA", "entity_kind": "concept"}), + ) + .await + .unwrap(); + let a_id = a.get("id").and_then(Value::as_str).unwrap().to_string(); + let b = pack + .dispatch( + "create", + json!({"kind": "entity", "name": "BulkB", "entity_kind": "concept"}), + ) + .await + .unwrap(); + let b_id = b.get("id").and_then(Value::as_str).unwrap().to_string(); + + let entries: Vec = (0..1001) + .map(|_| { + json!({ + "source_id": a_id, + "target_id": b_id, + "relation": "extends", + }) + }) + .collect(); + + let err = pack + .dispatch("link", json!({"links": entries})) + .await + .expect_err("1001 entries must return an error"); + assert!( + matches!(err, khive_runtime::RuntimeError::InvalidInput(_)), + "expected InvalidInput for >1000 bulk entries, got {err:?}" + ); +} + +// Fix 3: duplicate entries in a bulk request must be deduplicated (skipped count > 0). +// Fix 4: response shape must have attempted/created/skipped/failed keys. +#[tokio::test] +async fn bulk_link_dedup_and_response_shape() { + let pack = pack(); + let a = pack + .dispatch( + "create", + json!({"kind": "entity", "name": "DedupA", "entity_kind": "concept"}), + ) + .await + .unwrap(); + let a_id = a.get("id").and_then(Value::as_str).unwrap().to_string(); + let b = pack + .dispatch( + "create", + json!({"kind": "entity", "name": "DedupB", "entity_kind": "concept"}), + ) + .await + .unwrap(); + let b_id = b.get("id").and_then(Value::as_str).unwrap().to_string(); + let c = pack + .dispatch( + "create", + json!({"kind": "entity", "name": "DedupC", "entity_kind": "concept"}), + ) + .await + .unwrap(); + let c_id = c.get("id").and_then(Value::as_str).unwrap().to_string(); + + // 3 entries: A->B extends, A->B extends (dup), A->C extends. + let result = pack + .dispatch( + "link", + json!({ + "links": [ + {"source_id": a_id, "target_id": b_id, "relation": "extends"}, + {"source_id": a_id, "target_id": b_id, "relation": "extends"}, + {"source_id": a_id, "target_id": c_id, "relation": "extends"}, + ], + "atomic": true, + }), + ) + .await + .expect("bulk link must succeed"); + + assert_eq!( + result.get("attempted").and_then(Value::as_u64), + Some(3), + "attempted must be 3; got {result:?}" + ); + assert_eq!( + result.get("created").and_then(Value::as_u64), + Some(2), + "created must be 2 (one dup skipped); got {result:?}" + ); + assert_eq!( + result.get("skipped").and_then(Value::as_u64), + Some(1), + "skipped must be 1; got {result:?}" + ); + assert_eq!( + result.get("failed").and_then(Value::as_u64), + Some(0), + "failed must be 0; got {result:?}" + ); + // ADR-038: edges key must be absent when verbose is not set (F205). + assert!( + result.get("edges").is_none(), + "edges must be absent without verbose=true (ADR-038 F205); got {result:?}" + ); +} + +// F205: bulk link with verbose=true must include edges array; without verbose it must be absent. +#[tokio::test] +async fn bulk_link_verbose_controls_edges_key() { + let pack = pack(); + let a = pack + .dispatch( + "create", + json!({"kind": "entity", "name": "VerbA", "entity_kind": "concept"}), + ) + .await + .unwrap(); + let a_id = a.get("id").and_then(Value::as_str).unwrap().to_string(); + let b = pack + .dispatch( + "create", + json!({"kind": "entity", "name": "VerbB", "entity_kind": "concept"}), + ) + .await + .unwrap(); + let b_id = b.get("id").and_then(Value::as_str).unwrap().to_string(); + + // Without verbose: no edges key. + let result_no_verbose = pack + .dispatch( + "link", + json!({ + "links": [{"source_id": a_id, "target_id": b_id, "relation": "extends"}], + }), + ) + .await + .expect("bulk link must succeed"); + assert!( + result_no_verbose.get("edges").is_none(), + "edges must be absent without verbose=true (ADR-038 F205); got {result_no_verbose:?}" + ); + + // With verbose=true: edges key present. + let c = pack + .dispatch( + "create", + json!({"kind": "entity", "name": "VerbC", "entity_kind": "concept"}), + ) + .await + .unwrap(); + let c_id = c.get("id").and_then(Value::as_str).unwrap().to_string(); + let result_verbose = pack + .dispatch( + "link", + json!({ + "links": [{"source_id": a_id, "target_id": c_id, "relation": "extends"}], + "verbose": true, + }), + ) + .await + .expect("bulk link with verbose must succeed"); + assert!( + result_verbose + .get("edges") + .and_then(Value::as_array) + .is_some(), + "edges must be present with verbose=true (ADR-038 F205); got {result_verbose:?}" + ); +} + +// ---- ADR-014 curation event payload regression tests (codex round-2) ---- + +/// Update an entity → list entity_updated events → assert payload has id, namespace, +/// changed_fields per ADR-014. +#[tokio::test] +async fn curation_update_entity_event_payload_has_adr014_fields() { + let pack = pack_with_events(); + + // Create then update with a name change. + let created = pack + .dispatch( + "create", + json!({"kind": "concept", "name": "PayloadTestEntity"}), + ) + .await + .expect("create must succeed"); + let entity_id = created + .get("id") + .and_then(Value::as_str) + .expect("create must return id") + .to_string(); + + pack.dispatch( + "update", + json!({"id": entity_id, "kind": "entity", "name": "PayloadTestEntityRenamed"}), + ) + .await + .expect("update must succeed"); + + // Retrieve the entity_updated event. + let events = pack + .dispatch( + "list", + json!({"kind": "event", "event_kind": "entity_updated", "limit": 10}), + ) + .await + .expect("list entity_updated events must succeed"); + let arr = events.as_array().expect("list must return array"); + assert!( + !arr.is_empty(), + "at least one entity_updated event must be present after update" + ); + + // Find the event for our specific entity (by target_id). + let our_event = arr + .iter() + .find(|e| { + e.get("target_id") + .and_then(Value::as_str) + .is_some_and(|t| t == entity_id || t.starts_with(&entity_id[..8])) + }) + .unwrap_or(&arr[0]); + + let payload = our_event + .get("payload") + .expect("event must have payload field"); + assert!( + payload.get("id").is_some(), + "entity_updated payload must contain 'id'; got {payload}" + ); + assert!( + payload.get("namespace").is_some(), + "entity_updated payload must contain 'namespace'; got {payload}" + ); + let changed = payload + .get("changed_fields") + .and_then(Value::as_array) + .expect("entity_updated payload must contain 'changed_fields' array"); + assert!( + changed.iter().any(|v| v.as_str() == Some("name")), + "changed_fields must include 'name' when name was updated; got {changed:?}" + ); +} + +/// Merge two entities → list entity_merged events → assert payload has into_id, from_id, +/// policy, edges_rewired per ADR-014. +#[tokio::test] +async fn curation_merge_entity_event_payload_has_adr014_fields() { + let pack = pack_with_events(); + + let into_e = pack + .dispatch( + "create", + json!({"kind": "concept", "name": "MergeIntoEntity"}), + ) + .await + .expect("create into must succeed"); + let into_id = into_e + .get("id") + .and_then(Value::as_str) + .expect("create must return id") + .to_string(); + + let from_e = pack + .dispatch( + "create", + json!({"kind": "concept", "name": "MergeFromEntity"}), + ) + .await + .expect("create from must succeed"); + let from_id = from_e + .get("id") + .and_then(Value::as_str) + .expect("create must return id") + .to_string(); + + pack.dispatch("merge", json!({"into_id": into_id, "from_id": from_id})) + .await + .expect("merge must succeed"); + + let events = pack + .dispatch( + "list", + json!({"kind": "event", "event_kind": "entity_merged", "limit": 10}), + ) + .await + .expect("list entity_merged events must succeed"); + let arr = events.as_array().expect("list must return array"); + assert!( + !arr.is_empty(), + "at least one entity_merged event must be present" + ); + + let event = &arr[0]; + let payload = event.get("payload").expect("event must have payload field"); + assert!( + payload.get("into_id").is_some(), + "entity_merged payload must contain 'into_id'; got {payload}" + ); + assert!( + payload.get("from_id").is_some(), + "entity_merged payload must contain 'from_id'; got {payload}" + ); + assert!( + payload.get("policy").is_some(), + "entity_merged payload must contain 'policy'; got {payload}" + ); + assert!( + payload.get("edges_rewired").is_some(), + "entity_merged payload must contain 'edges_rewired'; got {payload}" + ); +} + +/// Delete an entity with hard=true → list entity_deleted events → assert payload has +/// id, namespace, hard=true per ADR-014. +#[tokio::test] +async fn curation_delete_entity_hard_event_payload_has_adr014_fields() { + let pack = pack_with_events(); + + let created = pack + .dispatch( + "create", + json!({"kind": "concept", "name": "HardDeletePayloadEntity"}), + ) + .await + .expect("create must succeed"); + let entity_id = created + .get("id") + .and_then(Value::as_str) + .expect("create must return id") + .to_string(); + + pack.dispatch( + "delete", + json!({"id": entity_id, "kind": "entity", "hard": true}), + ) + .await + .expect("hard delete must succeed"); + + let events = pack + .dispatch( + "list", + json!({"kind": "event", "event_kind": "entity_deleted", "limit": 10}), + ) + .await + .expect("list entity_deleted events must succeed"); + let arr = events.as_array().expect("list must return array"); + assert!( + !arr.is_empty(), + "at least one entity_deleted event must be present" + ); + + let event = &arr[0]; + let payload = event.get("payload").expect("event must have payload field"); + assert!( + payload.get("id").is_some(), + "entity_deleted payload must contain 'id'; got {payload}" + ); + assert!( + payload.get("namespace").is_some(), + "entity_deleted payload must contain 'namespace'; got {payload}" + ); + assert_eq!( + payload.get("hard").and_then(Value::as_bool), + Some(true), + "entity_deleted payload must have hard=true for hard delete; got {payload}" + ); +} + +// ---- ADR-022 provenance filter regression tests (codex round-2) ---- + +/// list(kind="event", observed=[uuid]) must pass the filter down to storage and +/// return only events whose observed list contains that UUID. +#[tokio::test] +async fn list_event_observed_filter_is_wired_through_to_storage() { + let pack = pack_with_events(); + + // Create an entity so we have at least one known-good UUID to search with. + let created = pack + .dispatch( + "create", + json!({"kind": "concept", "name": "ObservedFilterEntity"}), + ) + .await + .expect("create must succeed"); + let entity_id = created + .get("id") + .and_then(Value::as_str) + .expect("create must return id") + .to_string(); + + // Query with observed=[entity_id] — may return 0 results if the store has no + // observed projections for this entity, but must NOT return an error. + // What we validate: the filter parses and reaches storage without a parse error. + let result = pack + .dispatch( + "list", + json!({"kind": "event", "observed": [entity_id], "limit": 10}), + ) + .await + .expect("list(kind=event, observed=[...]) must not return an error"); + assert!( + result.as_array().is_some(), + "list with observed filter must return an array; got {result}" + ); +} + +/// list(kind="event", selected=[uuid]) must pass the filter down to storage without +/// returning a parse error. +#[tokio::test] +async fn list_event_selected_filter_is_wired_through_to_storage() { + let pack = pack_with_events(); + + let created = pack + .dispatch( + "create", + json!({"kind": "concept", "name": "SelectedFilterEntity"}), + ) + .await + .expect("create must succeed"); + let entity_id = created + .get("id") + .and_then(Value::as_str) + .expect("create must return id") + .to_string(); + + let result = pack + .dispatch( + "list", + json!({"kind": "event", "selected": [entity_id], "limit": 10}), + ) + .await + .expect("list(kind=event, selected=[...]) must not return an error"); + assert!( + result.as_array().is_some(), + "list with selected filter must return an array; got {result}" + ); +} + +/// list(kind="event", observed=["not-a-uuid"]) must return InvalidInput. +#[tokio::test] +async fn list_event_observed_filter_invalid_uuid_returns_invalid_input() { + let pack = pack_with_events(); + let err = pack + .dispatch( + "list", + json!({"kind": "event", "observed": ["not-a-valid-uuid"], "limit": 10}), + ) + .await + .unwrap_err(); + assert!( + is_invalid_input(&err), + "invalid UUID in observed must return InvalidInput; got {err:?}" + ); +} diff --git a/crates/khive-pack-memory/Cargo.toml b/crates/khive-pack-memory/Cargo.toml index 5fb377ac..1e668f75 100644 --- a/crates/khive-pack-memory/Cargo.toml +++ b/crates/khive-pack-memory/Cargo.toml @@ -11,11 +11,11 @@ categories.workspace = true description = "Memory verb pack — remember/recall semantics with decay-aware ranking" [dependencies] -khive-types = { version = "0.2.0", path = "../khive-types", features = ["serde"] } -khive-runtime = { version = "0.2.0", path = "../khive-runtime" } -khive-pack-brain = { version = "0.2.0", path = "../khive-pack-brain" } +khive-types = { version = "0.2.1", path = "../khive-types", features = ["serde"] } +khive-runtime = { version = "0.2.1", path = "../khive-runtime" } +khive-pack-brain = { version = "0.2.1", path = "../khive-pack-brain" } inventory = { workspace = true } -khive-storage = { version = "0.2.0", path = "../khive-storage" } +khive-storage = { version = "0.2.1", path = "../khive-storage" } async-trait = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } @@ -24,7 +24,7 @@ tracing = { workspace = true } chrono = { workspace = true } [dev-dependencies] -khive-pack-kg = { version = "0.2.0", path = "../khive-pack-kg" } +khive-pack-kg = { version = "0.2.1", path = "../khive-pack-kg" } tokio = { workspace = true, features = ["test-util"] } [[test]] diff --git a/crates/khive-pack-memory/src/config.rs b/crates/khive-pack-memory/src/config.rs index 2d4236b0..103faa5a 100644 --- a/crates/khive-pack-memory/src/config.rs +++ b/crates/khive-pack-memory/src/config.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; + use serde::{Deserialize, Serialize}; use khive_runtime::{FusionStrategy, RuntimeError}; @@ -15,6 +17,13 @@ pub struct RecallConfig { /// Weight of pure recency. Default 0.10. pub temporal_weight: f64, + // --- Reranker weights (ADR-033 §1) --- + /// Per-reranker weights, keyed by reranker name. Missing keys → 0.0 (disabled). + /// v1 built-in names: "cross_encoder", "salience", "graph_proximity". + pub reranker_weights: HashMap, + /// Per-reranker config params (e.g., graph_proximity anchors, salience α). + pub reranker_params: HashMap, + // --- Temporal parameters --- /// Days for temporal score to halve. Default 30.0. pub temporal_half_life_days: f64, @@ -35,6 +44,11 @@ pub struct RecallConfig { pub min_salience: f64, /// Include per-component score breakdowns in recall responses. Default false. pub include_breakdown: bool, + + // --- Migration behavior (ADR-033 §1, ADR-043) --- + /// When true and no active embedding model is configured, fall back to FTS5-only + /// candidate retrieval rather than failing. Default true. + pub fallback_during_migration: bool, } impl Default for RecallConfig { @@ -43,6 +57,8 @@ impl Default for RecallConfig { relevance_weight: 0.70, importance_weight: 0.20, temporal_weight: 0.10, + reranker_weights: HashMap::new(), + reranker_params: HashMap::new(), temporal_half_life_days: 30.0, decay_model: DecayModel::default(), candidate_multiplier: 20, @@ -51,6 +67,7 @@ impl Default for RecallConfig { min_score: 0.0, min_salience: 0.0, include_breakdown: false, + fallback_during_migration: true, } } } @@ -59,8 +76,8 @@ impl RecallConfig { /// Validate that the config is internally consistent. /// /// Rejects: - /// - Negative weights - /// - All three weights summing to zero (no scoring signal) + /// - Negative weights (base or reranker) + /// - All three base weights summing to zero (no scoring signal) /// - Non-positive temporal half-life pub fn validate(&self) -> Result<(), RuntimeError> { if self.relevance_weight < 0.0 { @@ -84,6 +101,13 @@ impl RecallConfig { "at least one of relevance_weight / importance_weight / temporal_weight must be positive".to_string(), )); } + for (name, &weight) in &self.reranker_weights { + if weight < 0.0 { + return Err(RuntimeError::InvalidInput(format!( + "reranker_weights[{name:?}] must be non-negative" + ))); + } + } if self.temporal_half_life_days <= 0.0 { return Err(RuntimeError::InvalidInput( "temporal_half_life_days must be positive".to_string(), @@ -112,9 +136,11 @@ impl RecallConfig { #[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)] #[serde(rename_all = "snake_case")] pub enum DecayModel { - /// `salience * exp(-age * ln2 / half_life)` + /// `salience * exp(-decay_factor * age_days)` — uses the note's own decay_factor directly. /// - /// This is the original formula; it is the default. + /// This is the ADR-021 §5 formula. The note's `decay_factor` controls the decay rate; + /// `temporal_half_life_days` is used only by the temporal recency score, not here. + /// Default `decay_factor=0.01` gives a ~69-day half-life: exp(-0.01 * 69.3) ≈ 0.5. #[default] Exponential, /// `salience / (1 + decay_factor * age_days)` @@ -135,14 +161,13 @@ impl DecayModel { /// - `salience` — raw importance in [0, 1] /// - `age_days` — age of the note in days /// - `decay_factor`— per-note decay rate stored on the note (used by Exponential and Hyperbolic) - /// - `half_life` — config half-life, used by Exponential (as formula half-life) and PowerLaw - pub fn apply(&self, salience: f64, age_days: f64, decay_factor: f64, half_life: f64) -> f64 { + /// - `half_life` — config half-life, used only by PowerLaw (ignored by Exponential) + pub fn apply(&self, salience: f64, age_days: f64, decay_factor: f64, _half_life: f64) -> f64 { match self { DecayModel::Exponential => { - // Uses the proper half-life formula: exp(-age * ln2 / half_life) - // This gives exactly 0.5 at age == half_life. - let k = std::f64::consts::LN_2 / half_life; - salience * (-k * age_days).exp() + // ADR-021 §5: effective_importance = salience * exp(-decay_factor * age_days) + // Uses the note's own decay_factor, not a half-life-derived constant. + salience * (-decay_factor * age_days).exp() } DecayModel::Hyperbolic => salience / (1.0 + decay_factor * age_days), DecayModel::PowerLaw { half_life_days } => { @@ -195,15 +220,18 @@ mod tests { // ── DecayModel ──────────────────────────────────────────────────────────── #[test] - fn exponential_halves_at_half_life() { + fn exponential_halves_at_decay_factor_half_life() { + // ADR-021 §5 formula: salience * exp(-decay_factor * age_days) + // Half-life = ln(2) / decay_factor ≈ 69.3 days for decay_factor=0.01 let model = DecayModel::Exponential; let salience = 1.0; - let half_life = 30.0; - let result = model.apply(salience, half_life, 0.01, half_life); + let decay_factor = 0.01; + let half_life_days = std::f64::consts::LN_2 / decay_factor; + let result = model.apply(salience, half_life_days, decay_factor, 30.0); let diff = (result - 0.5).abs(); assert!( diff < 1e-10, - "exponential should give 0.5 at half-life, got {result}" + "exponential should give 0.5 at ln(2)/decay_factor days, got {result}" ); } @@ -218,6 +246,20 @@ mod tests { ); } + #[test] + fn exponential_uses_note_decay_factor_not_half_life() { + // Verify the formula uses decay_factor param, not the half_life param. + // At age=1 day, decay_factor=1.0 → exp(-1.0) ≈ 0.3679. + // If we were using half_life=10 days, exp(-ln2/10) ≈ 0.933. + let model = DecayModel::Exponential; + let result = model.apply(1.0, 1.0, 1.0, 10.0); + let expected = (-1.0f64).exp(); + assert!( + (result - expected).abs() < 1e-12, + "expected {expected}, got {result}" + ); + } + #[test] fn hyperbolic_halves_at_one_over_decay_factor() { // salience / (1 + k * age) = 0.5 when age = 1/k diff --git a/crates/khive-pack-memory/src/handlers.rs b/crates/khive-pack-memory/src/handlers.rs index 79ff908d..6667a7f8 100644 --- a/crates/khive-pack-memory/src/handlers.rs +++ b/crates/khive-pack-memory/src/handlers.rs @@ -5,7 +5,7 @@ use serde_json::{json, Value}; use uuid::Uuid; use khive_runtime::fusion::fuse_with_strategy; -use khive_runtime::{RuntimeError, SearchHit, SearchSource, VerbRegistry}; +use khive_runtime::{NamespaceToken, RuntimeError, SearchHit, SearchSource, VerbRegistry}; use khive_storage::types::{ TextFilter, TextQueryMode, TextSearchHit, TextSearchRequest, VectorSearchHit, VectorSearchRequest, @@ -35,7 +35,6 @@ fn validate_memory_type(mt: &str) -> Result<(), RuntimeError> { #[derive(Deserialize)] struct RememberParams { content: String, - namespace: Option, memory_type: Option, #[serde(alias = "salience")] importance: Option, @@ -49,7 +48,6 @@ struct RememberParams { #[derive(Deserialize)] struct RecallParams { query: String, - namespace: Option, limit: Option, memory_type: Option, min_score: Option, @@ -162,18 +160,21 @@ impl MemoryPack { async fn collect_recall_candidates( &self, query: &str, - namespace: Option<&str>, + token: &NamespaceToken, candidate_limit: u32, ) -> Result { - let ns = self.runtime.ns(namespace).to_string(); + let ns = token.namespace().as_str().to_string(); + // F111: restrict text candidates to Note substrate kind so entity records + // cannot fill the candidate pool before any memory note is considered. let text_hits = self .runtime - .text_for_notes(namespace)? + .text_for_notes(token)? .search(TextSearchRequest { query: query.to_string(), mode: TextQueryMode::Plain, filter: Some(TextFilter { namespaces: vec![ns.clone()], + kinds: vec![SubstrateKind::Note], ..TextFilter::default() }), top_k: candidate_limit, @@ -184,12 +185,15 @@ impl MemoryPack { let vector_hits = if self.runtime.config().embedding_model.is_some() { let vec = self.runtime.embed(query).await?; self.runtime - .vectors(namespace)? + .vectors(token)? .search(VectorSearchRequest { - query_embedding: vec, + query_vectors: vec![vec], top_k: candidate_limit, namespace: Some(ns.clone()), + // F111: already restricts to Note substrate kind kind: Some(SubstrateKind::Note), + filter: None, + backend_hints: None, }) .await? } else { @@ -205,7 +209,7 @@ impl MemoryPack { async fn load_memory_candidate_notes( &self, - namespace: Option<&str>, + token: &NamespaceToken, text_hits: &[TextSearchHit], vector_hits: &[VectorSearchHit], ) -> Result<(HashSet, HashMap), RuntimeError> { @@ -224,7 +228,7 @@ impl MemoryPack { ids }; - let note_store = self.runtime.notes(namespace)?; + let note_store = self.runtime.notes(token)?; let batch = note_store.get_notes_batch(&candidate_ids).await?; let mut memory_ids = HashSet::new(); let mut notes_by_id = HashMap::new(); @@ -238,7 +242,11 @@ impl MemoryPack { Ok((memory_ids, notes_by_id)) } - pub(crate) async fn handle_remember(&self, params: Value) -> Result { + pub(crate) async fn handle_remember( + &self, + token: &NamespaceToken, + params: Value, + ) -> Result { let p: RememberParams = deser(params)?; if p.content.trim().is_empty() { return Err(RuntimeError::InvalidInput( @@ -246,45 +254,61 @@ impl MemoryPack { )); } - if let Some(mt) = &p.memory_type { - validate_memory_type(mt)?; - } + let memory_type = p.memory_type.as_deref().unwrap_or("episodic"); + validate_memory_type(memory_type)?; - let importance = p.importance.unwrap_or(0.5).clamp(0.0, 1.0); - let decay_factor = p.decay_factor.unwrap_or(0.01).clamp(0.0, 1.0); + // F108: reject out-of-range values instead of clamping + let importance = match p.importance { + Some(v) if !(0.0..=1.0).contains(&v) => { + return Err(RuntimeError::InvalidInput(format!( + "importance must be in [0, 1], got {v}" + ))); + } + Some(v) => v, + None => 0.5, + }; + // F108: decay_factor must be >= 0; no upper clamp per ADR-021 + let decay_factor = match p.decay_factor { + Some(v) if v < 0.0 => { + return Err(RuntimeError::InvalidInput(format!( + "decay_factor must be >= 0, got {v}" + ))); + } + Some(v) => v, + None => 0.01, + }; - let mut props = serde_json::json!({}); - if let Some(mt) = &p.memory_type { - props["memory_type"] = json!(mt); - } + // F107: always write memory_type to properties (ADR-021 §4, default "episodic") + let mut props = json!({ "memory_type": memory_type }); if let Some(tags) = &p.tags { if !tags.is_empty() { props["tags"] = json!(tags); } } - let properties = if props.as_object().map(|o| o.is_empty()).unwrap_or(true) { - None - } else { - Some(props) - }; + // F109: reject invalid source_id UUID strings let mut annotates: Vec = vec![]; if let Some(sid) = &p.source_id { - if let Ok(source_uuid) = sid.parse::() { - annotates.push(source_uuid); + match sid.parse::() { + Ok(source_uuid) => annotates.push(source_uuid), + Err(_) => { + return Err(RuntimeError::InvalidInput(format!( + "source_id {sid:?} is not a valid UUID" + ))); + } } } let note = self .runtime .create_note_with_decay( - p.namespace.as_deref(), + token, "memory", None, &p.content, - importance, + Some(importance), decay_factor, - properties, + Some(props), annotates, ) .await?; @@ -294,12 +318,14 @@ impl MemoryPack { "kind": note.kind, "salience": note.salience, "decay_factor": note.decay_factor, + "memory_type": memory_type, "created_at": note.created_at, })) } pub(crate) async fn handle_recall( &self, + token: &NamespaceToken, params: Value, _registry: &VerbRegistry, ) -> Result { @@ -315,14 +341,10 @@ impl MemoryPack { let limit = p.limit.unwrap_or(10).min(100); let candidate_limit = recall_candidate_count(&cfg, limit); let candidates = self - .collect_recall_candidates(&p.query, p.namespace.as_deref(), candidate_limit) + .collect_recall_candidates(&p.query, token, candidate_limit) .await?; let (memory_ids, mut notes_by_id) = self - .load_memory_candidate_notes( - p.namespace.as_deref(), - &candidates.text_hits, - &candidates.vector_hits, - ) + .load_memory_candidate_notes(token, &candidates.text_hits, &candidates.vector_hits) .await?; let fused = fuse_candidates( @@ -356,14 +378,16 @@ impl MemoryPack { continue; } } - if note.salience < cfg.min_salience { + let salience = note.salience.unwrap_or(0.5); + let decay_factor = note.decay_factor.unwrap_or(0.01); + if salience < cfg.min_salience { continue; } let age_micros = (now_micros - note.created_at).max(0) as f64; let age_days = age_micros / (1_000_000.0 * 86_400.0); let (final_score, breakdown) = - compute_score(&cfg, relevance, note.salience, note.decay_factor, age_days); + compute_score(&cfg, relevance, salience, decay_factor, age_days); if final_score < cfg.min_score { continue; @@ -423,6 +447,7 @@ impl MemoryPack { pub(crate) async fn handle_recall_candidates( &self, + token: &NamespaceToken, params: Value, ) -> Result { let p: RecallParams = deser(params)?; @@ -432,7 +457,7 @@ impl MemoryPack { let limit = p.limit.unwrap_or(10).min(100); let candidate_limit = recall_candidate_count(&cfg, limit); let candidates = self - .collect_recall_candidates(&p.query, p.namespace.as_deref(), candidate_limit) + .collect_recall_candidates(&p.query, token, candidate_limit) .await?; let text_candidates: Vec = candidates @@ -470,6 +495,7 @@ impl MemoryPack { pub(crate) async fn handle_recall_fuse( &self, + token: &NamespaceToken, params: Value, _registry: &VerbRegistry, ) -> Result { @@ -484,14 +510,10 @@ impl MemoryPack { let limit = p.limit.unwrap_or(10).min(100); let candidate_limit = recall_candidate_count(&cfg, limit); let candidates = self - .collect_recall_candidates(&p.query, p.namespace.as_deref(), candidate_limit) + .collect_recall_candidates(&p.query, token, candidate_limit) .await?; let (memory_ids, notes_by_id) = self - .load_memory_candidate_notes( - p.namespace.as_deref(), - &candidates.text_hits, - &candidates.vector_hits, - ) + .load_memory_candidate_notes(token, &candidates.text_hits, &candidates.vector_hits) .await?; let fused = fuse_candidates( @@ -533,6 +555,58 @@ impl MemoryPack { })) } + /// Apply configured rerankers to fused candidates (ADR-033 §2, F222). + /// + /// In v1 with no active rerankers (empty `reranker_weights`), this is a + /// pass-through: each candidate is returned with an empty `rerank_scores` map. + /// When reranker weights are configured in `RecallConfig.reranker_weights`, the + /// named rerankers will populate `rerank_scores[name]` for downstream scoring. + pub(crate) async fn handle_recall_rerank(&self, params: Value) -> Result { + #[derive(Deserialize)] + struct RerankParams { + /// Fused candidate IDs to rerank (from recall_fuse output). + candidates: Vec, + config: Option, + } + let p: RerankParams = deser(params)?; + let cfg = p.config.unwrap_or_else(|| self.active_config()); + cfg.validate()?; + + // Build the set of active rerankers (weight > 0). + let active: Vec<(&String, &f64)> = cfg + .reranker_weights + .iter() + .filter(|(_, &w)| w > 0.0) + .collect(); + + // For each candidate, produce a rerank_scores map with scores from active rerankers. + // v1: no reranker models are loaded, so all scores are 0.0 (reranker not run). + let reranked: Vec = p + .candidates + .iter() + .map(|candidate| { + let id = candidate + .get("note_id") + .cloned() + .unwrap_or(serde_json::Value::Null); + let mut rerank_scores = serde_json::Map::new(); + for (name, _weight) in &active { + // v1: reranker model not loaded → score = 0.0 + rerank_scores.insert(name.to_string(), json!(0.0_f32)); + } + json!({ + "note_id": id, + "rerank_scores": rerank_scores, + }) + }) + .collect(); + + to_json(&json!({ + "reranked": reranked, + "active_rerankers": active.iter().map(|(n, _)| n.as_str()).collect::>(), + })) + } + pub(crate) async fn handle_recall_score(&self, params: Value) -> Result { #[derive(Deserialize)] struct ScoreParams { @@ -582,7 +656,6 @@ mod tests { fn effective_config_uses_defaults() { let p = RecallParams { query: "test".to_string(), - namespace: None, limit: None, memory_type: None, min_score: None, @@ -599,7 +672,6 @@ mod tests { fn effective_config_legacy_overrides() { let p = RecallParams { query: "test".to_string(), - namespace: None, limit: None, memory_type: None, min_score: Some(0.5), @@ -615,7 +687,6 @@ mod tests { fn effective_config_explicit_config_wins() { let p = RecallParams { query: "test".to_string(), - namespace: None, limit: None, memory_type: None, min_score: Some(0.1), @@ -647,15 +718,27 @@ mod tests { } #[test] - fn compute_score_exponential_decay_at_half_life() { - let cfg = RecallConfig::default(); // half_life = 30 days - let (_, bd) = compute_score(&cfg, 0.5, 1.0, 0.01, 30.0); - // At age = half_life: importance_decayed ≈ 0.5, temporal ≈ 0.5 + fn compute_score_exponential_decay_at_decay_factor_half_life() { + let cfg = RecallConfig::default(); // temporal_half_life = 30 days, default decay_factor=0.01 + // ADR-021 §5: importance_decayed = salience * exp(-decay_factor * age_days) + // At age = ln(2)/0.01 ≈ 69.3 days: importance_decayed ≈ 0.5 + let age_days = std::f64::consts::LN_2 / 0.01; + let (_, bd) = compute_score(&cfg, 0.5, 1.0, 0.01, age_days); assert!( (bd.importance_decayed - 0.5).abs() < 1e-10, "importance_decayed = {}", bd.importance_decayed ); + // Temporal at age_days=69.3 with half_life=30: exp(-ln2/30 * 69.3) ≈ exp(-1.6) ≈ 0.2 + // Just verify it's < 0.5 (past the temporal half-life) + assert!(bd.temporal < 0.5, "temporal = {}", bd.temporal); + } + + #[test] + fn compute_score_temporal_halves_at_temporal_half_life() { + let cfg = RecallConfig::default(); // temporal_half_life = 30 days + let (_, bd) = compute_score(&cfg, 0.5, 1.0, 0.01, 30.0); + // At age = temporal_half_life = 30 days: temporal = exp(-ln2/30 * 30) = 0.5 assert!( (bd.temporal - 0.5).abs() < 1e-10, "temporal = {}", @@ -675,4 +758,157 @@ mod tests { // Only relevance matters: total = 0.8 assert!((total - 0.8).abs() < 1e-10, "got {total}"); } + + // ── F107: remember always writes memory_type to properties ─────────── + + #[test] + fn remember_params_default_memory_type_is_episodic() { + // When memory_type is absent, validate_memory_type("episodic") must pass. + // This ensures the default "episodic" is valid. + assert!(validate_memory_type("episodic").is_ok()); + } + + // ── F108: reject out-of-range importance and decay_factor ───────────── + + #[test] + fn remember_params_importance_below_zero_rejected() { + // Simulate handler validation path directly + let importance: f64 = -0.1; + let result: Result = if !(0.0..=1.0).contains(&importance) { + Err(RuntimeError::InvalidInput(format!( + "importance must be in [0, 1], got {importance}" + ))) + } else { + Ok(importance) + }; + assert!(result.is_err(), "expected error for importance < 0"); + } + + #[test] + fn remember_params_importance_above_one_rejected() { + let importance: f64 = 1.1; + let result: Result = if !(0.0..=1.0).contains(&importance) { + Err(RuntimeError::InvalidInput(format!( + "importance must be in [0, 1], got {importance}" + ))) + } else { + Ok(importance) + }; + assert!(result.is_err(), "expected error for importance > 1"); + } + + #[test] + fn remember_params_importance_boundary_values_accepted() { + // 0.0 and 1.0 are valid + for val in [0.0_f64, 0.5, 1.0] { + let result: Result<(), RuntimeError> = if !(0.0..=1.0).contains(&val) { + Err(RuntimeError::InvalidInput("out of range".into())) + } else { + Ok(()) + }; + assert!(result.is_ok(), "boundary {val} should be accepted"); + } + } + + #[test] + fn remember_params_decay_factor_below_zero_rejected() { + let df: f64 = -0.01; + let result: Result = if df < 0.0 { + Err(RuntimeError::InvalidInput(format!( + "decay_factor must be >= 0, got {df}" + ))) + } else { + Ok(df) + }; + assert!(result.is_err(), "expected error for decay_factor < 0"); + } + + #[test] + fn remember_params_decay_factor_above_one_accepted() { + // ADR-021 only requires decay_factor >= 0; no upper cap + let df: f64 = 2.5; + let result: Result = if df < 0.0 { + Err(RuntimeError::InvalidInput("negative".into())) + } else { + Ok(df) + }; + assert!(result.is_ok(), "decay_factor > 1 should be accepted"); + } + + // ── F109: invalid source_id UUID string is rejected ────────────────── + + #[test] + fn remember_params_invalid_source_id_uuid_is_rejected() { + let sid = "not-a-uuid"; + let result: Result = sid.parse::().map_err(|_| { + RuntimeError::InvalidInput(format!("source_id {sid:?} is not a valid UUID")) + }); + assert!(result.is_err(), "expected error for invalid UUID string"); + } + + #[test] + fn remember_params_valid_source_id_uuid_is_accepted() { + let sid = "00000000-0000-0000-0000-000000000001"; + let result = sid.parse::(); + assert!(result.is_ok(), "valid UUID should parse successfully"); + } + + // ── recall_rerank: pass-through when no rerankers configured ───────── + + #[test] + fn recall_rerank_config_empty_reranker_weights_has_no_active() { + let cfg = RecallConfig::default(); + let active: Vec<_> = cfg + .reranker_weights + .iter() + .filter(|(_, &w)| w > 0.0) + .collect(); + assert!(active.is_empty(), "default config has no active rerankers"); + } + + #[test] + fn recall_rerank_config_with_reranker_weight_is_active() { + let mut cfg = RecallConfig::default(); + cfg.reranker_weights + .insert("cross_encoder".to_string(), 0.5); + let active: Vec<_> = cfg + .reranker_weights + .iter() + .filter(|(_, &w)| w > 0.0) + .collect(); + assert_eq!(active.len(), 1); + assert_eq!(active[0].0, "cross_encoder"); + } + + // ── F186/F223/F230: new RecallConfig fields ─────────────────────────── + + #[test] + fn recall_config_reranker_fields_default_empty() { + let cfg = RecallConfig::default(); + assert!(cfg.reranker_weights.is_empty()); + assert!(cfg.reranker_params.is_empty()); + } + + #[test] + fn recall_config_fallback_during_migration_defaults_true() { + let cfg = RecallConfig::default(); + assert!(cfg.fallback_during_migration); + } + + #[test] + fn recall_config_negative_reranker_weight_fails_validation() { + let mut cfg = RecallConfig::default(); + cfg.reranker_weights + .insert("bad_reranker".to_string(), -0.1); + assert!(cfg.validate().is_err()); + } + + #[test] + fn recall_config_zero_reranker_weight_validates() { + let mut cfg = RecallConfig::default(); + // Weight of 0.0 means disabled, not an error + cfg.reranker_weights + .insert("disabled_reranker".to_string(), 0.0); + assert!(cfg.validate().is_ok()); + } } diff --git a/crates/khive-pack-memory/src/lib.rs b/crates/khive-pack-memory/src/lib.rs index 0ce887ee..91838c5c 100644 --- a/crates/khive-pack-memory/src/lib.rs +++ b/crates/khive-pack-memory/src/lib.rs @@ -8,8 +8,8 @@ use async_trait::async_trait; use serde_json::Value; use khive_runtime::pack::PackRuntime; -use khive_runtime::{KhiveRuntime, RuntimeError, VerbRegistry}; -use khive_types::{Pack, VerbDef}; +use khive_runtime::{KhiveRuntime, NamespaceToken, RuntimeError, VerbRegistry}; +use khive_types::{HandlerDef, Pack, VerbCategory, Visibility}; use crate::config::RecallConfig; @@ -32,39 +32,58 @@ impl Pack for MemoryPack { const NAME: &'static str = "memory"; const NOTE_KINDS: &'static [&'static str] = &["memory"]; const ENTITY_KINDS: &'static [&'static str] = &[]; - const VERBS: &'static [VerbDef] = &MEMORY_VERBS; + const HANDLERS: &'static [HandlerDef] = &MEMORY_HANDLERS; const REQUIRES: &'static [&'static str] = &["kg"]; } -// ADR-060: Illocutionary classification (Searle 1976) +// ADR-025: Illocutionary classification (Searle 1976) // Commissive — commits caller to a persistent change -// Assertive — retrieves/presents state of affairs -static MEMORY_VERBS: [VerbDef; 6] = [ +// Assertive — retrieves/presents state of affairs +static MEMORY_HANDLERS: [HandlerDef; 7] = [ // Commissive: commits a memory to the namespace - VerbDef { + HandlerDef { name: "remember", description: "Create a memory note with salience and decay", + visibility: Visibility::Verb, + category: VerbCategory::Commissive, }, // Assertive: retrieves memory notes via decay-aware ranking - VerbDef { + HandlerDef { name: "recall", description: "Recall memory notes with decay-aware hybrid ranking", + visibility: Visibility::Verb, + category: VerbCategory::Assertive, }, - VerbDef { + HandlerDef { name: "recall.embed", description: "Return the embedding vector used by memory recall", + visibility: Visibility::Subhandler, + category: VerbCategory::Assertive, }, - VerbDef { + HandlerDef { name: "recall.candidates", description: "Return raw memory recall candidates by retrieval source", + visibility: Visibility::Subhandler, + category: VerbCategory::Assertive, }, - VerbDef { + HandlerDef { name: "recall.fuse", description: "Return fused memory recall candidates before final scoring", + visibility: Visibility::Subhandler, + category: VerbCategory::Assertive, }, - VerbDef { + // ADR-033 §2, F222: rerank stage between fuse and score + HandlerDef { + name: "recall.rerank", + description: "Apply configured rerankers to fused candidates (ADR-033 §2)", + visibility: Visibility::Subhandler, + category: VerbCategory::Assertive, + }, + HandlerDef { name: "recall.score", description: "Score a memory recall candidate and return score breakdown", + visibility: Visibility::Subhandler, + category: VerbCategory::Assertive, }, ]; @@ -77,7 +96,7 @@ impl MemoryPack { } } -// ── ADR-063: inventory self-registration ───────────────────────────────────── +// ── ADR-027: inventory self-registration ───────────────────────────────────── struct MemoryPackFactory; @@ -111,8 +130,8 @@ impl PackRuntime for MemoryPack { ::ENTITY_KINDS } - fn verbs(&self) -> &'static [VerbDef] { - &MEMORY_VERBS + fn handlers(&self) -> &'static [HandlerDef] { + &MEMORY_HANDLERS } fn requires(&self) -> &'static [&'static str] { @@ -124,13 +143,15 @@ impl PackRuntime for MemoryPack { verb: &str, params: Value, registry: &VerbRegistry, + token: &NamespaceToken, ) -> Result { match verb { - "remember" => self.handle_remember(params).await, - "recall" => self.handle_recall(params, registry).await, + "remember" => self.handle_remember(token, params).await, + "recall" => self.handle_recall(token, params, registry).await, "recall.embed" => self.handle_recall_embed(params).await, - "recall.candidates" => self.handle_recall_candidates(params).await, - "recall.fuse" => self.handle_recall_fuse(params, registry).await, + "recall.candidates" => self.handle_recall_candidates(token, params).await, + "recall.fuse" => self.handle_recall_fuse(token, params, registry).await, + "recall.rerank" => self.handle_recall_rerank(params).await, "recall.score" => self.handle_recall_score(params).await, _ => Err(RuntimeError::InvalidInput(format!( "memory pack does not handle verb {verb:?}" diff --git a/crates/khive-pack-memory/src/tunable.rs b/crates/khive-pack-memory/src/tunable.rs index 9e89b208..72e59ae0 100644 --- a/crates/khive-pack-memory/src/tunable.rs +++ b/crates/khive-pack-memory/src/tunable.rs @@ -1,4 +1,4 @@ -use khive_pack_brain::state::BrainState; +use khive_pack_brain::state::BalancedRecallState; use khive_pack_brain::tunable::{PackTunable, ParameterDef, ParameterSpace}; use khive_runtime::RuntimeError; use serde_json::Value; @@ -10,8 +10,9 @@ use crate::MemoryPack; /// recall scoring pipeline based on observed usage patterns (Issue #159). /// /// Parameter names (`memory::relevance_weight`, `memory::importance_weight`, -/// `memory::temporal_weight`) match the keys that brain's `EventFold` tracks, -/// so posteriors from real-time dispatch events flow directly into these params. +/// `memory::temporal_weight`) correspond to the three Beta posteriors in +/// `BalancedRecallState` (ADR-032 §5a). Posterior means flow directly into +/// `RecallConfig`. /// /// `project_config` reads posterior means → `RecallConfig`. /// `apply_config` validates and stores the new config; future recall calls @@ -23,7 +24,7 @@ impl PackTunable for MemoryPack { ParameterDef { name: "memory::relevance_weight".into(), // Prior: relevance is the dominant signal (7:3), matching - // EventFold's initial "recall::relevance_weight" posterior. + // BalancedRecallState's `relevance` posterior prior. prior_alpha: 7.0, prior_beta: 3.0, bounds: (0.0, 1.0), @@ -46,30 +47,16 @@ impl PackTunable for MemoryPack { } } - /// Project the current `BrainState` posteriors into a `RecallConfig` value. + /// Project the current `BalancedRecallState` posteriors into a `RecallConfig` value. /// - /// Reads `memory::*_weight` posterior means from `state`. Falls back to the + /// Reads the three posterior means from the profile state. Falls back to the /// current active config if a parameter is absent (brain not yet warmed up). - fn project_config(&self, state: &BrainState) -> Value { + fn project_config(&self, state: &BalancedRecallState) -> Value { let current = self.active_config(); - let relevance = state - .parameters - .get("memory::relevance_weight") - .map(|p| p.mean()) - .unwrap_or(current.relevance_weight); - - let importance = state - .parameters - .get("memory::importance_weight") - .map(|p| p.mean()) - .unwrap_or(current.importance_weight); - - let temporal = state - .parameters - .get("memory::temporal_weight") - .map(|p| p.mean()) - .unwrap_or(current.temporal_weight); + let relevance = state.relevance.mean(); + let importance = state.importance.mean(); + let temporal = state.temporal.mean(); let projected = RecallConfig { relevance_weight: relevance, @@ -98,17 +85,28 @@ impl PackTunable for MemoryPack { #[cfg(test)] mod tests { use super::*; - use khive_pack_brain::state::BetaPosterior; + use khive_pack_brain::state::{BalancedRecallState, BetaPosterior}; use khive_runtime::KhiveRuntime; - use std::collections::HashMap; fn make_pack() -> MemoryPack { let rt = KhiveRuntime::memory().expect("in-memory runtime"); MemoryPack::new(rt) } - fn brain_state_with_params(params: HashMap) -> BrainState { - BrainState::new(params, 100) + fn balanced_state_with_means( + relevance_mean: f64, + importance_mean: f64, + temporal_mean: f64, + ) -> BalancedRecallState { + // Construct Beta posteriors whose means match the supplied values. + // Using ESS=10 for each: alpha = mean * 10, beta = (1-mean) * 10. + let to_posterior = + |mean: f64| -> BetaPosterior { BetaPosterior::new(mean * 10.0, (1.0 - mean) * 10.0) }; + let mut state = BalancedRecallState::new(100); + state.relevance = to_posterior(relevance_mean); + state.importance = to_posterior(importance_mean); + state.temporal = to_posterior(temporal_mean); + state } #[test] @@ -125,20 +123,7 @@ mod tests { #[test] fn project_config_reads_posterior_means() { let pack = make_pack(); - let mut params = HashMap::new(); - params.insert( - "memory::relevance_weight".into(), - BetaPosterior::new(6.0, 4.0), // mean = 0.6 - ); - params.insert( - "memory::importance_weight".into(), - BetaPosterior::new(3.0, 7.0), // mean = 0.3 - ); - params.insert( - "memory::temporal_weight".into(), - BetaPosterior::new(1.0, 9.0), // mean = 0.1 - ); - let state = brain_state_with_params(params); + let state = balanced_state_with_means(0.6, 0.3, 0.1); let projected = pack.project_config(&state); let cfg: RecallConfig = serde_json::from_value(projected).unwrap(); @@ -148,9 +133,10 @@ mod tests { } #[test] - fn project_config_falls_back_to_active_when_param_absent() { + fn project_config_with_default_priors_matches_expected_defaults() { + // Default BalancedRecallState priors: Beta(7,3)=0.7, Beta(2,8)=0.2, Beta(1,9)=0.1 let pack = make_pack(); - let state = brain_state_with_params(HashMap::new()); + let state = BalancedRecallState::new(100); let projected = pack.project_config(&state); let cfg: RecallConfig = serde_json::from_value(projected).unwrap(); @@ -199,7 +185,8 @@ mod tests { } #[test] - fn prior_for_relevance_weight_matches_fold_priors() { + fn prior_for_relevance_weight_matches_balanced_recall_state_prior() { + // BalancedRecallState uses Beta(7,3) for relevance; ParameterDef must match. let pack = make_pack(); let space = pack.parameter_space(); let def = space diff --git a/crates/khive-pack-memory/tests/integration.rs b/crates/khive-pack-memory/tests/integration.rs index 199d9075..946856c7 100644 --- a/crates/khive-pack-memory/tests/integration.rs +++ b/crates/khive-pack-memory/tests/integration.rs @@ -1,7 +1,7 @@ use khive_pack_brain::tunable::PackTunable; use khive_pack_kg::KgPack; use khive_pack_memory::MemoryPack; -use khive_runtime::{KhiveRuntime, RuntimeConfig, VerbRegistryBuilder}; +use khive_runtime::{KhiveRuntime, Namespace, RuntimeConfig, VerbRegistryBuilder}; use khive_types::Pack; use serde_json::json; use uuid::Uuid; @@ -92,7 +92,7 @@ async fn test_recall_decay_ranking() { // Manually backdate the old note to simulate age let old_uuid: uuid::Uuid = old_id.parse().unwrap(); - let note_store = rt.notes(None).unwrap(); + let note_store = rt.notes(&rt.authorize(Namespace::local())).unwrap(); let mut old_note = note_store.get_note(old_uuid).await.unwrap().unwrap(); old_note.created_at -= 90 * 86_400_000_000i64; // 90 days in microseconds note_store.upsert_note(old_note).await.unwrap(); @@ -275,7 +275,9 @@ async fn test_remember_source_id_not_in_properties() { .parse() .expect("valid uuid"); - let note_store = rt.notes(None).expect("note store"); + let note_store = rt + .notes(&rt.authorize(Namespace::local())) + .expect("note store"); let note = note_store .get_note(note_id) .await @@ -290,23 +292,25 @@ async fn test_remember_source_id_not_in_properties() { } } -/// Regression test for issue #100: decay_factor must be clamped to [0, 1]. +/// ADR-021 §4 (F108): decay_factor >= 0 is the only constraint — no upper cap. +/// Values above 1.0 are valid (fast-fading memories with very short effective half-lives). +/// Negative values are rejected with InvalidInput. #[tokio::test] -async fn test_remember_decay_factor_clamped() { +async fn test_remember_decay_factor_no_upper_cap() { let rt = make_runtime(); let registry = make_registry(rt.clone()); - // decay > 1.0 should be clamped to 1.0 + // decay_factor = 5.0 is valid — no upper cap per ADR-021 §4 let result = registry .dispatch( "remember", json!({ - "content": "memory with excessive decay", + "content": "memory with high decay rate", "decay": 5.0 }), ) .await - .expect("remember with large decay"); + .expect("remember with decay_factor > 1.0 should succeed"); let note_id: Uuid = result["note_id"] .as_str() @@ -314,32 +318,177 @@ async fn test_remember_decay_factor_clamped() { .parse() .expect("valid uuid"); - let note_store = rt.notes(None).expect("note store"); + let note_store = rt + .notes(&rt.authorize(Namespace::local())) + .expect("note store"); let note = note_store .get_note(note_id) .await .expect("get note") .expect("note exists"); + let df = note.decay_factor.unwrap_or(0.0); + // Stored value must match exactly (not clamped to 1.0) assert!( - note.decay_factor <= 1.0, - "decay_factor must be <= 1.0 after clamping, got {}", - note.decay_factor + (df - 5.0).abs() < 1e-10, + "decay_factor should be stored as-is (5.0), got {df}" ); +} + +/// ADR-021 §4 (F108): negative decay_factor is rejected. +#[tokio::test] +async fn test_remember_decay_factor_negative_rejected() { + let rt = make_runtime(); + let registry = make_registry(rt.clone()); + + let result = registry + .dispatch( + "remember", + json!({ + "content": "memory with negative decay", + "decay": -0.1 + }), + ) + .await; + + assert!(result.is_err(), "negative decay_factor must be rejected"); +} + +/// ADR-021 §4 (F107): remember always writes memory_type to properties. +/// When memory_type is absent, it defaults to "episodic". +#[tokio::test] +async fn test_remember_default_memory_type_written_to_properties() { + let rt = make_runtime(); + let registry = make_registry(rt.clone()); + + let result = registry + .dispatch( + "remember", + json!({ "content": "memory without explicit type" }), + ) + .await + .expect("remember without memory_type"); + + let note_id: Uuid = result["note_id"] + .as_str() + .unwrap() + .parse() + .expect("valid uuid"); + + // The response must carry memory_type + assert_eq!( + result["memory_type"].as_str(), + Some("episodic"), + "response must include default memory_type" + ); + + let note_store = rt + .notes(&rt.authorize(Namespace::local())) + .expect("note store"); + let note = note_store + .get_note(note_id) + .await + .expect("get note") + .expect("note exists"); + + let stored_type = note + .properties + .as_ref() + .and_then(|p| p.get("memory_type")) + .and_then(|v| v.as_str()); + assert_eq!( + stored_type, + Some("episodic"), + "memory_type must be written to properties even when not supplied" + ); +} + +/// ADR-021 §4 (F109): invalid UUID string in source_id is rejected with an error. +#[tokio::test] +async fn test_remember_invalid_source_id_uuid_rejected() { + let rt = make_runtime(); + let registry = make_registry(rt); + + let result = registry + .dispatch( + "remember", + json!({ + "content": "memory with bad source_id", + "source": "not-a-valid-uuid" + }), + ) + .await; + assert!( - note.decay_factor >= 0.0, - "decay_factor must be >= 0.0, got {}", - note.decay_factor + result.is_err(), + "invalid source_id UUID must cause an error, got: {result:?}" ); } +/// ADR-021 §4 (F108): importance outside [0, 1] is rejected. +#[tokio::test] +async fn test_remember_importance_out_of_range_rejected() { + let rt = make_runtime(); + let registry = make_registry(rt); + + let neg = registry + .dispatch("remember", json!({ "content": "test", "importance": -0.1 })) + .await; + assert!(neg.is_err(), "negative importance must be rejected"); + + let rt2 = make_runtime(); + let registry2 = make_registry(rt2); + let above = registry2 + .dispatch("remember", json!({ "content": "test", "importance": 1.1 })) + .await; + assert!(above.is_err(), "importance > 1 must be rejected"); +} + +/// ADR-033 §2 (F222): recall.rerank is callable and returns expected shape. +#[tokio::test] +async fn test_recall_rerank_passthrough_with_no_active_rerankers() { + let rt = make_runtime(); + let registry = make_registry(rt); + + let candidates = json!([ + { "note_id": "00000000-0000-0000-0000-000000000001", "fused_score": 0.8 }, + { "note_id": "00000000-0000-0000-0000-000000000002", "fused_score": 0.6 }, + ]); + + let result = registry + .dispatch("recall.rerank", json!({ "candidates": candidates })) + .await + .expect("recall.rerank with no active rerankers"); + + let reranked = result["reranked"].as_array().expect("reranked array"); + assert_eq!(reranked.len(), 2, "must return one entry per candidate"); + for entry in reranked { + let scores = entry["rerank_scores"] + .as_object() + .expect("rerank_scores object"); + assert!( + scores.is_empty(), + "no active rerankers → empty rerank_scores, got {scores:?}" + ); + } + let active = result["active_rerankers"] + .as_array() + .expect("active_rerankers array"); + assert!(active.is_empty(), "no active rerankers expected"); +} + #[test] fn test_memory_dotted_verbs_registered() { - let names: Vec<&str> = MemoryPack::VERBS.iter().map(|v| v.name).collect(); + let names: Vec<&str> = MemoryPack::HANDLERS.iter().map(|v| v.name).collect(); assert!(names.contains(&"recall.candidates")); assert!(names.contains(&"recall.fuse")); assert!(names.contains(&"recall.score")); assert!(names.contains(&"recall.embed")); + // F222: recall.rerank must be registered (ADR-033 §2) + assert!( + names.contains(&"recall.rerank"), + "recall.rerank not found in: {names:?}" + ); } #[tokio::test] @@ -557,13 +706,14 @@ async fn test_recall_excludes_non_memory_notes() { // Create 50 observation notes whose content matches the recall query — enough to // dominate a `limit=5` candidate pool at `limit * 4 = 20` without pre-filtering. + let tok = rt.authorize(Namespace::local()); for i in 0..50 { rt.create_note( - None, + &tok, "observation", None, &format!("observation {i} about attention mechanisms in neural networks"), - 0.5, + Some(0.5), None, vec![], ) diff --git a/crates/khive-pack-schedule/Cargo.toml b/crates/khive-pack-schedule/Cargo.toml new file mode 100644 index 00000000..861710d0 --- /dev/null +++ b/crates/khive-pack-schedule/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "khive-pack-schedule" +version.workspace = true +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true +homepage.workspace = true +keywords.workspace = true +categories.workspace = true +description = "Schedule pack — time-triggered intent storage (remind, schedule, agenda, cancel) (ADR-040)" + +[dependencies] +khive-types = { version = "0.2.1", path = "../khive-types", features = ["serde"] } +khive-runtime = { version = "0.2.1", path = "../khive-runtime" } +khive-storage = { version = "0.2.1", path = "../khive-storage" } +inventory = { workspace = true } +async-trait = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +uuid = { workspace = true } +chrono = { workspace = true } +tracing = { workspace = true } + +[dev-dependencies] +tokio = { workspace = true, features = ["test-util"] } +khive-pack-kg = { version = "0.2.1", path = "../khive-pack-kg" } diff --git a/crates/khive-pack-schedule/src/handlers.rs b/crates/khive-pack-schedule/src/handlers.rs new file mode 100644 index 00000000..74d634ad --- /dev/null +++ b/crates/khive-pack-schedule/src/handlers.rs @@ -0,0 +1,313 @@ +//! Verb handler implementations for the schedule pack (ADR-040). +//! +//! All four verbs (`remind`, `schedule`, `agenda`, `cancel`) store and query +//! `scheduled_event` notes. Trigger evaluation is NOT performed by the pack — +//! the pack only stores intent. See ADR-040 §Trigger evaluation for execution modes. + +use chrono::Utc; +use serde::Deserialize; +use serde_json::{json, Value}; +use uuid::Uuid; + +use khive_runtime::{KhiveRuntime, NamespaceToken, RuntimeError}; +use khive_storage::note::Note; + +fn short_id(uuid: Uuid) -> String { + uuid.as_hyphenated().to_string().chars().take(8).collect() +} + +fn note_to_event_json(note: &Note) -> Value { + json!({ + "id": short_id(note.id), + "full_id": note.id, + "kind": "scheduled_event", + "content": note.content, + "namespace": note.namespace, + "properties": note.properties, + "created_at": note.created_at, + "updated_at": note.updated_at, + }) +} + +fn deser(params: Value) -> Result { + serde_json::from_value(params) + .map_err(|e| RuntimeError::InvalidInput(format!("bad params: {e}"))) +} + +/// Validate a cron expression (5-field) — only basic structure check in v1. +fn validate_repeat(repeat: &str) -> Result<(), RuntimeError> { + match repeat { + "daily" | "weekly" | "monthly" => Ok(()), + cron => { + let fields: Vec<&str> = cron.split_whitespace().collect(); + if fields.len() == 5 { + Ok(()) + } else { + Err(RuntimeError::InvalidInput(format!( + "invalid repeat expression {cron:?}: must be \"daily\", \"weekly\", \ + \"monthly\", or a 5-field cron expression" + ))) + } + } + } +} + +// ── param structs ──────────────────────────────────────────────────────────── + +#[derive(Deserialize)] +pub(crate) struct RemindParams { + pub content: String, + pub at: String, + #[serde(default)] + pub repeat: Option, +} + +#[derive(Deserialize)] +pub(crate) struct ScheduleParams { + pub action: String, + pub at: String, + #[serde(default)] + pub repeat: Option, +} + +#[derive(Deserialize)] +pub(crate) struct AgendaParams { + #[serde(default)] + pub from: Option, + #[serde(default)] + pub to: Option, + #[serde(default)] + pub limit: Option, +} + +#[derive(Deserialize)] +pub(crate) struct CancelParams { + pub id: String, +} + +// ── handlers ───────────────────────────────────────────────────────────────── + +/// `remind` — create a time-triggered reminder (ADR-040 §remind). +pub(crate) async fn handle_remind( + runtime: &KhiveRuntime, + token: &NamespaceToken, + params: Value, +) -> Result { + let p: RemindParams = deser(params)?; + if p.content.trim().is_empty() { + return Err(RuntimeError::InvalidInput( + "remind: `content` must not be empty".into(), + )); + } + if p.at.trim().is_empty() { + return Err(RuntimeError::InvalidInput( + "remind: `at` must not be empty".into(), + )); + } + if let Some(ref r) = p.repeat { + validate_repeat(r)?; + } + + let properties = json!({ + "trigger_at": p.at, + "repeat": p.repeat, + "status": "pending", + "event_type": "remind", + "payload": null, + "fired_at": null, + "cancelled_at": null, + }); + + let note = runtime + .create_note( + token, + "scheduled_event", + None, + &p.content, + None, + Some(properties), + Vec::new(), + ) + .await?; + + Ok(json!({ + "id": short_id(note.id), + "full_id": note.id, + "event_type": "remind", + "trigger_at": p.at, + "repeat": p.repeat, + "status": "pending", + })) +} + +/// `schedule` — schedule a future verb dispatch (ADR-040 §schedule). +pub(crate) async fn handle_schedule( + runtime: &KhiveRuntime, + token: &NamespaceToken, + params: Value, +) -> Result { + let p: ScheduleParams = deser(params)?; + if p.action.trim().is_empty() { + return Err(RuntimeError::InvalidInput( + "schedule: `action` must not be empty".into(), + )); + } + if p.at.trim().is_empty() { + return Err(RuntimeError::InvalidInput( + "schedule: `at` must not be empty".into(), + )); + } + if let Some(ref r) = p.repeat { + validate_repeat(r)?; + } + + let properties = json!({ + "trigger_at": p.at, + "repeat": p.repeat, + "status": "pending", + "event_type": "schedule", + "payload": p.action, + "fired_at": null, + "cancelled_at": null, + }); + + let note = runtime + .create_note( + token, + "scheduled_event", + None, + &p.action, + None, + Some(properties), + Vec::new(), + ) + .await?; + + Ok(json!({ + "id": short_id(note.id), + "full_id": note.id, + "event_type": "schedule", + "trigger_at": p.at, + "repeat": p.repeat, + "status": "pending", + })) +} + +/// `agenda` — list upcoming scheduled events (ADR-040 §agenda). +pub(crate) async fn handle_agenda( + runtime: &KhiveRuntime, + token: &NamespaceToken, + params: Value, +) -> Result { + let p: AgendaParams = deser(params)?; + let limit = p.limit.unwrap_or(20).clamp(1, 200); + + let notes = runtime + .list_notes(token, Some("scheduled_event"), limit * 4, 0) + .await?; + + let mut events: Vec = notes + .iter() + .filter(|n| n.deleted_at.is_none()) + .filter(|n| { + let status = n + .properties + .as_ref() + .and_then(|p| p.get("status")) + .and_then(Value::as_str) + .unwrap_or(""); + status == "pending" + }) + .filter(|n| { + // Apply from/to window filter when provided. + let trigger_at = n + .properties + .as_ref() + .and_then(|p| p.get("trigger_at")) + .and_then(Value::as_str) + .unwrap_or(""); + if let Some(ref from) = p.from { + if trigger_at < from.as_str() { + return false; + } + } + if let Some(ref to) = p.to { + if trigger_at > to.as_str() { + return false; + } + } + true + }) + .map(note_to_event_json) + .collect(); + + // Sort ascending by trigger_at (lexicographic on ISO 8601 strings works correctly). + events.sort_by(|a, b| { + let ta = a + .get("properties") + .and_then(|p| p.get("trigger_at")) + .and_then(Value::as_str) + .unwrap_or(""); + let tb = b + .get("properties") + .and_then(|p| p.get("trigger_at")) + .and_then(Value::as_str) + .unwrap_or(""); + ta.cmp(tb) + }); + + events.truncate(limit as usize); + let count = events.len(); + + Ok(json!({ "events": events, "count": count })) +} + +/// `cancel` — cancel a scheduled event (ADR-040 §cancel). +pub(crate) async fn handle_cancel( + runtime: &KhiveRuntime, + token: &NamespaceToken, + params: Value, +) -> Result { + let p: CancelParams = deser(params)?; + let id = Uuid::parse_str(&p.id) + .map_err(|_| RuntimeError::InvalidInput(format!("cancel: invalid UUID {:?}", p.id)))?; + + let store = runtime.notes(token)?; + let mut note = store + .get_note(id) + .await + .map_err(|e| RuntimeError::Internal(format!("cancel: get_note: {e}")))? + .ok_or_else(|| RuntimeError::NotFound(format!("cancel: event {id} not found")))?; + + if note.namespace != token.namespace().as_str() { + return Err(RuntimeError::NotFound(format!( + "cancel: event {id} not found" + ))); + } + if note.kind != "scheduled_event" { + return Err(RuntimeError::InvalidInput(format!( + "cancel: note {id} is kind {:?}, expected \"scheduled_event\"", + note.kind + ))); + } + + let cancelled_at = Utc::now().to_rfc3339(); + let mut props = note.properties.clone().unwrap_or_else(|| json!({})); + props["status"] = json!("cancelled"); + props["cancelled_at"] = json!(cancelled_at); + note.properties = Some(props.clone()); + note.updated_at = Utc::now().timestamp_micros(); + + store + .upsert_note(note) + .await + .map_err(|e| RuntimeError::Internal(format!("cancel: upsert_note: {e}")))?; + + Ok(json!({ + "id": short_id(id), + "full_id": id, + "status": "cancelled", + "cancelled_at": cancelled_at, + "properties": props, + })) +} diff --git a/crates/khive-pack-schedule/src/lib.rs b/crates/khive-pack-schedule/src/lib.rs new file mode 100644 index 00000000..998c0f5a --- /dev/null +++ b/crates/khive-pack-schedule/src/lib.rs @@ -0,0 +1,110 @@ +//! pack-schedule — Schedule pack (ADR-040). +pub mod handlers; + +use async_trait::async_trait; +use serde_json::Value; + +use khive_runtime::pack::PackRuntime; +use khive_runtime::{KhiveRuntime, NamespaceToken, RuntimeError, VerbRegistry}; +use khive_types::{HandlerDef, Pack, Visibility}; + +pub struct SchedulePack { + runtime: KhiveRuntime, +} + +impl Pack for SchedulePack { + const NAME: &'static str = "schedule"; + const NOTE_KINDS: &'static [&'static str] = &["scheduled_event"]; + const ENTITY_KINDS: &'static [&'static str] = &[]; + const HANDLERS: &'static [HandlerDef] = &SCHEDULE_HANDLERS; + const REQUIRES: &'static [&'static str] = &["kg"]; +} + +static SCHEDULE_HANDLERS: [HandlerDef; 4] = [ + HandlerDef { + name: "remind", + description: "Create a time-triggered reminder.", + visibility: Visibility::Verb, + category: khive_types::VerbCategory::Commissive, + }, + HandlerDef { + name: "schedule", + description: "Schedule a future verb dispatch.", + visibility: Visibility::Verb, + category: khive_types::VerbCategory::Commissive, + }, + HandlerDef { + name: "agenda", + description: "List upcoming scheduled events.", + visibility: Visibility::Verb, + category: khive_types::VerbCategory::Assertive, + }, + HandlerDef { + name: "cancel", + description: "Cancel a scheduled event.", + visibility: Visibility::Verb, + category: khive_types::VerbCategory::Declaration, + }, +]; + +impl SchedulePack { + pub fn new(runtime: KhiveRuntime) -> Self { + Self { runtime } + } + pub(crate) fn runtime(&self) -> &KhiveRuntime { + &self.runtime + } +} + +struct SchedulePackFactory; + +impl khive_runtime::PackFactory for SchedulePackFactory { + fn name(&self) -> &'static str { + "schedule" + } + fn requires(&self) -> &'static [&'static str] { + &["kg"] + } + fn create(&self, runtime: KhiveRuntime) -> Box { + Box::new(SchedulePack::new(runtime)) + } +} + +inventory::submit! { khive_runtime::PackRegistration(&SchedulePackFactory) } + +#[async_trait] +impl PackRuntime for SchedulePack { + fn name(&self) -> &str { + ::NAME + } + fn note_kinds(&self) -> &'static [&'static str] { + ::NOTE_KINDS + } + fn entity_kinds(&self) -> &'static [&'static str] { + ::ENTITY_KINDS + } + fn handlers(&self) -> &'static [HandlerDef] { + &SCHEDULE_HANDLERS + } + fn requires(&self) -> &'static [&'static str] { + ::REQUIRES + } + + async fn dispatch( + &self, + verb: &str, + params: Value, + _registry: &VerbRegistry, + token: &NamespaceToken, + ) -> Result { + match verb { + "remind" => handlers::handle_remind(self.runtime(), token, params).await, + "schedule" => handlers::handle_schedule(self.runtime(), token, params).await, + "agenda" => handlers::handle_agenda(self.runtime(), token, params).await, + "cancel" => handlers::handle_cancel(self.runtime(), token, params).await, + _ => Err(RuntimeError::InvalidInput(format!( + "schedule pack does not handle verb {verb:?}" + ))), + } + } +} diff --git a/crates/khive-pack-schedule/tests/integration.rs b/crates/khive-pack-schedule/tests/integration.rs new file mode 100644 index 00000000..21b2e8d2 --- /dev/null +++ b/crates/khive-pack-schedule/tests/integration.rs @@ -0,0 +1,115 @@ +//! Smoke tests for the schedule pack (ADR-040). + +use khive_pack_schedule::SchedulePack; +use khive_runtime::{KhiveRuntime, VerbRegistry, VerbRegistryBuilder}; +use khive_types::Pack; + +fn build_registry() -> (VerbRegistry, KhiveRuntime) { + let runtime = KhiveRuntime::memory().expect("in-memory runtime"); + let mut builder = VerbRegistryBuilder::new(); + builder.register(khive_pack_kg::KgPack::new(runtime.clone())); + builder.register(SchedulePack::new(runtime.clone())); + let registry = builder.build().expect("registry builds"); + (registry, runtime) +} + +#[test] +fn schedule_pack_declares_scheduled_event_note_kind() { + assert!(SchedulePack::NOTE_KINDS.contains(&"scheduled_event")); +} + +#[test] +fn schedule_pack_declares_four_handlers() { + assert_eq!(SchedulePack::HANDLERS.len(), 4); + let names: Vec<&str> = SchedulePack::HANDLERS.iter().map(|h| h.name).collect(); + assert!(names.contains(&"remind")); + assert!(names.contains(&"schedule")); + assert!(names.contains(&"agenda")); + assert!(names.contains(&"cancel")); +} + +#[test] +fn schedule_pack_requires_kg() { + assert_eq!(SchedulePack::REQUIRES, &["kg"]); +} + +#[tokio::test] +async fn remind_creates_pending_event() { + let (registry, _rt) = build_registry(); + + let result = registry + .dispatch( + "remind", + serde_json::json!({ + "content": "check status", + "at": "2026-06-01T09:00:00Z" + }), + ) + .await + .expect("remind succeeds"); + + assert!(result.get("id").is_some(), "remind returns id: {result}"); + assert_eq!(result["status"], "pending"); + assert_eq!(result["event_type"], "remind"); +} + +#[tokio::test] +async fn schedule_creates_pending_event_with_action() { + let (registry, _rt) = build_registry(); + + let result = registry + .dispatch( + "schedule", + serde_json::json!({ + "action": "create(kind=entity, name=test)", + "at": "2026-06-01T10:00:00Z" + }), + ) + .await + .expect("schedule succeeds"); + + assert!(result.get("id").is_some(), "schedule returns id: {result}"); + assert_eq!(result["event_type"], "schedule"); +} + +#[tokio::test] +async fn agenda_returns_pending_events() { + let (registry, _rt) = build_registry(); + + registry + .dispatch( + "remind", + serde_json::json!({ "content": "hello", "at": "2026-07-01T00:00:00Z" }), + ) + .await + .expect("remind succeeds"); + + let agenda = registry + .dispatch("agenda", serde_json::json!({ "limit": 10 })) + .await + .expect("agenda succeeds"); + + let count = agenda["count"].as_u64().unwrap_or(0); + assert!( + count >= 1, + "agenda should return at least 1 event: {agenda}" + ); +} + +#[tokio::test] +async fn remind_with_invalid_repeat_is_rejected() { + let (registry, _rt) = build_registry(); + + let err = registry + .dispatch( + "remind", + serde_json::json!({ + "content": "hello", + "at": "2026-06-01T09:00:00Z", + "repeat": "not-valid-cron" + }), + ) + .await + .unwrap_err(); + assert!(err.to_string().contains("repeat") || err.to_string().contains("cron")); +} diff --git a/crates/khive-pack-template/Cargo.toml b/crates/khive-pack-template/Cargo.toml new file mode 100644 index 00000000..f2aeff20 --- /dev/null +++ b/crates/khive-pack-template/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "khive-pack-template" +version.workspace = true +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true +homepage.workspace = true +keywords.workspace = true +categories.workspace = true +description = "Reference template for new khive packs (ADR-023 §8). Copy this crate to get a working pack scaffold." + +[dependencies] +khive-types = { version = "0.2.1", path = "../khive-types", features = ["serde"] } +khive-runtime = { version = "0.2.1", path = "../khive-runtime" } +inventory = { workspace = true } +async-trait = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +uuid = { workspace = true } +tracing = { workspace = true } + +[dev-dependencies] +tokio = { workspace = true, features = ["test-util"] } +khive-pack-kg = { version = "0.2.1", path = "../khive-pack-kg" } diff --git a/crates/khive-pack-template/src/handlers.rs b/crates/khive-pack-template/src/handlers.rs new file mode 100644 index 00000000..6ae273c9 --- /dev/null +++ b/crates/khive-pack-template/src/handlers.rs @@ -0,0 +1,25 @@ +//! Verb handler stubs for the template pack (ADR-023 §8). +//! +//! Replace each `unimplemented!()` with real logic. See `crates/khive-pack-kg/src/handlers.rs` +//! for a complete reference implementation. +//! +//! Handler signature pattern: +//! `async fn handle_(runtime, token, params) -> Result` +//! +//! Params arrive as `serde_json::Value`; deserialize via `serde_json::from_value`. +//! Return a JSON `Value` or a `RuntimeError`. Errors are caught by the registry and +//! returned as `{ ok: false, error: "..." }` without aborting the batch. + +use serde_json::{json, Value}; + +use khive_runtime::{KhiveRuntime, NamespaceToken, RuntimeError}; + +/// `my_verb` — replace with real logic. +pub(crate) async fn handle_my_verb( + _runtime: &KhiveRuntime, + _token: &NamespaceToken, + params: Value, +) -> Result { + // TODO: implement + Ok(json!({ "ok": true, "params": params })) +} diff --git a/crates/khive-pack-template/src/lib.rs b/crates/khive-pack-template/src/lib.rs new file mode 100644 index 00000000..59e2e11b --- /dev/null +++ b/crates/khive-pack-template/src/lib.rs @@ -0,0 +1,126 @@ +//! khive-pack-template — reference scaffold for new packs (ADR-023 §8). +//! +//! # How to create a new pack +//! +//! 1. Copy this crate directory to `crates/khive-pack-/`. +//! 2. Rename the crate in `Cargo.toml` (name, description). +//! 3. Set `PACK_NAME` to your pack's canonical name (e.g. `"exp"`). +//! 4. Update `NOTE_KINDS` / `ENTITY_KINDS` in `vocab.rs`. +//! 5. Add your verbs to `HANDLERS` below; fill in `handlers.rs`. +//! 6. Add the crate to the workspace `Cargo.toml`. +//! 7. Force-link in `khive-mcp/src/pack.rs` and `kkernel/src/lib.rs`. +//! 8. Add the crate dep to `khive-mcp/Cargo.toml` and `kkernel/Cargo.toml`. +//! +//! Reference implementation: `crates/khive-pack-kg/`. +//! +//! No macros, no DSLs. Plain Rust — rust-analyzer, debugger, and LLMs all +//! work directly on this code without expansion. + +pub mod handlers; +pub mod vocab; + +use async_trait::async_trait; +use serde_json::Value; + +use khive_runtime::pack::PackRuntime; +use khive_runtime::{KhiveRuntime, NamespaceToken, RuntimeError, VerbRegistry}; +use khive_types::{HandlerDef, Pack, Visibility}; + +/// Canonical pack name. Must match the factory below and `PackFactory::name()`. +const PACK_NAME: &str = "template"; + +/// Template pack — replace with your pack's struct name and logic. +pub struct TemplatePack { + runtime: KhiveRuntime, +} + +impl Pack for TemplatePack { + const NAME: &'static str = PACK_NAME; + /// Declare note kinds this pack contributes. Must not overlap with other packs. + const NOTE_KINDS: &'static [&'static str] = vocab::NOTE_KINDS; + /// Declare entity kinds this pack contributes. Must not overlap with other packs. + const ENTITY_KINDS: &'static [&'static str] = vocab::ENTITY_KINDS; + /// Handler table. Each entry is one verb or subhandler the pack can dispatch. + const HANDLERS: &'static [HandlerDef] = &TEMPLATE_HANDLERS; + /// Pack dependencies. The named packs must be in the configured `KHIVE_PACKS` list. + const REQUIRES: &'static [&'static str] = &["kg"]; +} + +/// Handler table. Add one `HandlerDef` per verb. +/// +/// `Visibility::Verb` = exposed on the MCP `request` tool (agent-facing). +/// `Visibility::Subhandler` = CLI-only / internal; not on the MCP wire. +static TEMPLATE_HANDLERS: [HandlerDef; 1] = [HandlerDef { + name: "my_verb", + description: "Replace with your verb's description.", + visibility: Visibility::Verb, + category: khive_types::VerbCategory::Directive, +}]; + +impl TemplatePack { + pub fn new(runtime: KhiveRuntime) -> Self { + Self { runtime } + } + #[allow(dead_code)] + fn runtime(&self) -> &KhiveRuntime { + &self.runtime + } +} + +// ── ADR-027: inventory self-registration ───────────────────────────────────── +// +// This block registers the pack factory so the linker includes it in the +// binary's inventory at startup. One `inventory::submit!` per pack crate. + +struct TemplatePackFactory; + +impl khive_runtime::PackFactory for TemplatePackFactory { + fn name(&self) -> &'static str { + PACK_NAME + } + fn requires(&self) -> &'static [&'static str] { + &["kg"] + } + fn create(&self, runtime: KhiveRuntime) -> Box { + Box::new(TemplatePack::new(runtime)) + } +} + +inventory::submit! { khive_runtime::PackRegistration(&TemplatePackFactory) } + +// ── PackRuntime impl ───────────────────────────────────────────────────────── + +#[async_trait] +impl PackRuntime for TemplatePack { + fn name(&self) -> &str { + ::NAME + } + fn note_kinds(&self) -> &'static [&'static str] { + ::NOTE_KINDS + } + fn entity_kinds(&self) -> &'static [&'static str] { + ::ENTITY_KINDS + } + fn handlers(&self) -> &'static [HandlerDef] { + &TEMPLATE_HANDLERS + } + fn requires(&self) -> &'static [&'static str] { + ::REQUIRES + } + + /// Dispatch a verb call. Add a match arm for each entry in `HANDLERS`. + async fn dispatch( + &self, + verb: &str, + params: Value, + _registry: &VerbRegistry, + token: &NamespaceToken, + ) -> Result { + match verb { + "my_verb" => handlers::handle_my_verb(self.runtime(), token, params).await, + _ => Err(RuntimeError::InvalidInput(format!( + "{PACK_NAME} pack does not handle verb {verb:?}" + ))), + } + } +} diff --git a/crates/khive-pack-template/src/vocab.rs b/crates/khive-pack-template/src/vocab.rs new file mode 100644 index 00000000..73d94b8a --- /dev/null +++ b/crates/khive-pack-template/src/vocab.rs @@ -0,0 +1,19 @@ +//! Vocabulary for the template pack. +//! +//! Define your pack's note kinds and entity kinds here. +//! These are registered with the runtime at link time via `Pack` impl constants. +//! +//! ADR-023 §1: packs own closed sets of kinds declared as `&'static [&'static str]`. +//! Kinds must not overlap with other packs in the same binary (boot-time check). + +/// Note kinds this pack contributes to the vocabulary. +/// +/// Example: `"my_note_kind"`. +/// Leave empty (`&[]`) if your pack has no custom note kinds. +pub const NOTE_KINDS: &[&str] = &["template_note"]; + +/// Entity kinds this pack contributes to the vocabulary. +/// +/// Example: `"my_entity_kind"`. +/// Leave empty (`&[]`) if your pack has no custom entity kinds. +pub const ENTITY_KINDS: &[&str] = &[]; diff --git a/crates/khive-pack-template/tests/integration.rs b/crates/khive-pack-template/tests/integration.rs new file mode 100644 index 00000000..ee074fec --- /dev/null +++ b/crates/khive-pack-template/tests/integration.rs @@ -0,0 +1,57 @@ +//! Smoke test for the template pack (ADR-023 §8). +//! +//! Copy and adapt this file when scaffolding a new pack. + +use khive_pack_template::TemplatePack; +use khive_runtime::{KhiveRuntime, VerbRegistry, VerbRegistryBuilder}; +use khive_types::Pack; + +fn build_registry() -> (VerbRegistry, KhiveRuntime) { + let runtime = KhiveRuntime::memory().expect("in-memory runtime"); + let mut builder = VerbRegistryBuilder::new(); + builder.register(khive_pack_kg::KgPack::new(runtime.clone())); + builder.register(TemplatePack::new(runtime.clone())); + let registry = builder.build().expect("registry builds"); + (registry, runtime) +} + +#[test] +fn template_pack_name_is_stable() { + assert_eq!(TemplatePack::NAME, "template"); +} + +#[test] +fn template_pack_declares_expected_note_kind() { + assert!(TemplatePack::NOTE_KINDS.contains(&"template_note")); +} + +#[test] +fn template_pack_requires_kg() { + assert_eq!(TemplatePack::REQUIRES, &["kg"]); +} + +#[tokio::test] +async fn my_verb_returns_ok() { + let (registry, _rt) = build_registry(); + + let result = registry + .dispatch("my_verb", serde_json::json!({ "hello": "world" })) + .await + .expect("my_verb dispatches"); + + assert_eq!(result["ok"], true); +} + +#[tokio::test] +async fn unknown_verb_returns_error() { + let (registry, _rt) = build_registry(); + + let err = registry + .dispatch("no_such_verb_xyz", serde_json::Value::Null) + .await + .unwrap_err(); + + assert!( + err.to_string().contains("no_such_verb_xyz") || err.to_string().contains("unknown verb") + ); +} diff --git a/crates/khive-query/Cargo.toml b/crates/khive-query/Cargo.toml index 88a4744b..d9906312 100644 --- a/crates/khive-query/Cargo.toml +++ b/crates/khive-query/Cargo.toml @@ -11,8 +11,7 @@ categories.workspace = true description = "GQL and SPARQL parsers with SQL compiler for knowledge graph queries." [dependencies] -khive-storage = { version = "0.2.0", path = "../khive-storage" } -khive-types = { version = "0.2.0", path = "../khive-types" } +khive-types = { version = "0.2.1", path = "../khive-types" } thiserror = { workspace = true } [dev-dependencies] diff --git a/crates/khive-query/src/ast.rs b/crates/khive-query/src/ast.rs index 0ec1840f..a9cc7401 100644 --- a/crates/khive-query/src/ast.rs +++ b/crates/khive-query/src/ast.rs @@ -2,14 +2,79 @@ use std::collections::HashMap; +/// A SQL parameter value local to the query layer. +/// +/// Deliberately mirrors the subset of `khive_storage::types::SqlValue` that the +/// query compiler needs to emit. The runtime converts these to the storage-layer +/// `SqlValue` at the query–storage boundary (ADR-008 §"Query crate compiles +/// against khive-types only"). +#[derive(Clone, Debug)] +pub enum QueryValue { + Null, + Integer(i64), + Float(f64), + Text(String), + Blob(Vec), +} + #[derive(Debug, Clone)] pub struct GqlQuery { pub pattern: MatchPattern, - pub where_clause: Vec, + pub where_clause: WhereExpr, pub return_items: Vec, pub limit: Option, } +/// A WHERE expression tree supporting AND, OR, and leaf conditions (ADR-008 +/// §"GQL WHERE expression"). +#[derive(Debug, Clone)] +pub enum WhereExpr { + /// AND of two sub-expressions. + And(Box, Box), + /// OR of two sub-expressions. + Or(Box, Box), + /// A single scalar condition. + Condition(Condition), + /// Always-true — used when there is no WHERE clause. + True, +} + +impl WhereExpr { + /// Iterate all leaf conditions in the expression tree (depth-first). + pub fn conditions(&self) -> impl Iterator { + let mut stack = vec![self]; + let mut out: Vec<&Condition> = Vec::new(); + while let Some(expr) = stack.pop() { + match expr { + WhereExpr::Condition(c) => out.push(c), + WhereExpr::And(l, r) | WhereExpr::Or(l, r) => { + stack.push(r); + stack.push(l); + } + WhereExpr::True => {} + } + } + out.into_iter() + } + + /// Mutable walk — applies `f` to every leaf condition. + pub fn for_each_condition_mut(&mut self, f: &mut impl FnMut(&mut Condition)) { + match self { + WhereExpr::Condition(c) => f(c), + WhereExpr::And(l, r) | WhereExpr::Or(l, r) => { + l.for_each_condition_mut(f); + r.for_each_condition_mut(f); + } + WhereExpr::True => {} + } + } + + /// Return `true` when the expression has no conditions (is always-true). + pub fn is_true(&self) -> bool { + matches!(self, WhereExpr::True) + } +} + #[derive(Debug, Clone, PartialEq, Eq)] pub enum ReturnItem { Variable(String), @@ -59,6 +124,9 @@ pub enum PatternElement { pub struct NodePattern { pub variable: Option, pub kind: Option, + /// Governed subtype within the kind (e.g. "researcher" within "person"). + /// Compiled to `entity_type = ?` — a direct column, not a property extraction. + pub entity_type: Option, pub properties: HashMap, } diff --git a/crates/khive-query/src/compilers/sql.rs b/crates/khive-query/src/compilers/sql.rs index c64b9ad9..5ae1cc33 100644 --- a/crates/khive-query/src/compilers/sql.rs +++ b/crates/khive-query/src/compilers/sql.rs @@ -4,20 +4,46 @@ //! - Fixed-length patterns (all edges *1..1) → JOIN chain //! - Variable-length patterns (any edge *N..M where M>1) → recursive CTE //! +//! Synthetic edge paths (ADR-041): +//! - Relations prefixed `observed_as_*` join against `event_observations`, not `graph_edges`. +//! //! Security invariants (MAJ-1/MAJ-2/MAJ-3 from critic review): //! - Namespace injection: WHERE clause always comes from CompileOptions.scopes, never the query. //! - Edge property whitelist: only `relation` and `weight` are queryable edge columns. -//! - Depth cap: recursive CTE depth is min(requested, 10). +//! - Depth cap: recursive CTE depth capped at MAX_DEPTH; exceeding it errors at validation. use crate::ast::*; use crate::error::QueryError; use crate::validate::{validate_with_warnings, MAX_DEPTH}; -use khive_storage::types::SqlValue; + +/// Observation roles used by the synthetic edge compiler (ADR-041 §8). +const SYNTHETIC_RELATIONS: &[&str] = &[ + "observed_as_candidate", + "observed_as_selected", + "observed_as_target", + "observed_as_signal", +]; + +/// Returns `true` when the relation string is a synthetic ADR-041 observation edge. +fn is_synthetic(rel: &str) -> bool { + SYNTHETIC_RELATIONS.contains(&rel) +} + +/// Returns the `role` value that maps to the given synthetic relation. +fn synthetic_role(rel: &str) -> Option<&'static str> { + match rel { + "observed_as_candidate" => Some("candidate"), + "observed_as_selected" => Some("selected"), + "observed_as_target" => Some("target"), + "observed_as_signal" => Some("signal"), + _ => None, + } +} #[derive(Debug)] pub struct CompiledQuery { pub sql: String, - pub params: Vec, + pub params: Vec, pub return_vars: Vec, pub warnings: Vec, } @@ -56,18 +82,18 @@ pub fn compile(query: &GqlQuery, opts: &CompileOptions) -> Result) -> String { +fn namespace_filter(alias: &str, opts: &CompileOptions, params: &mut Vec) -> String { if opts.scopes.is_empty() { String::new() } else if opts.scopes.len() == 1 { - params.push(SqlValue::Text(opts.scopes[0].clone())); + params.push(QueryValue::Text(opts.scopes[0].clone())); format!(" AND {alias}.namespace = ?{}", params.len()) } else { let placeholders: Vec = opts .scopes .iter() .map(|s| { - params.push(SqlValue::Text(s.clone())); + params.push(QueryValue::Text(s.clone())); format!("?{}", params.len()) }) .collect(); @@ -75,6 +101,42 @@ fn namespace_filter(alias: &str, opts: &CompileOptions, params: &mut Vec ( + std::collections::HashSet, + std::collections::HashSet, +) { + let mut source_set = std::collections::HashSet::new(); + let mut target_set = std::collections::HashSet::new(); + let mut node_idx = 0usize; + let mut prev_node_idx: Option = None; + for element in elements { + match element { + PatternElement::Node(_) => { + prev_node_idx = Some(node_idx); + node_idx += 1; + } + PatternElement::Edge(ep) => { + let has_synthetic = ep.relations.iter().any(|r| is_synthetic(r)); + if has_synthetic { + if let Some(src_idx) = prev_node_idx { + source_set.insert(src_idx); + // The target is the next node (current node_idx). + target_set.insert(node_idx); + } + } + } + } + } + (source_set, target_set) +} + /// Compile fixed-length patterns to a chain of JOINs. /// /// MATCH (a:concept)-[e:introduced_by]->(b:paper) WHERE ... RETURN a, e, b LIMIT 10 @@ -86,11 +148,14 @@ fn namespace_filter(alias: &str, opts: &CompileOptions, params: &mut Vec Result { - let mut params: Vec = Vec::new(); + let mut params: Vec = Vec::new(); let mut from_parts: Vec = Vec::new(); let mut join_parts: Vec = Vec::new(); let mut where_parts: Vec = Vec::new(); @@ -101,6 +166,11 @@ fn compile_fixed_length( let mut var_to_alias: std::collections::HashMap = std::collections::HashMap::new(); + // Pre-compute which node indices are endpoints of synthetic edges. + // Source nodes bind to `events`; target nodes bind to `notes`. + let (event_source_indices, note_target_indices) = + synthetic_endpoint_node_indices(&query.pattern.elements); + let mut node_idx = 0usize; let mut edge_idx = 0usize; @@ -110,38 +180,121 @@ fn compile_fixed_length( let alias = format!("n{node_idx}"); node_aliases.push(alias.clone()); + let is_event_source = event_source_indices.contains(&node_idx); + let is_note_target = note_target_indices.contains(&node_idx); + if node_idx == 0 { - from_parts.push(format!("entities {alias}")); + if is_event_source { + from_parts.push(format!("events {alias}")); + } else { + // Note targets are joined by the synthetic edge handler, not FROM. + if !is_note_target { + from_parts.push(format!("entities {alias}")); + } + } } - where_parts.push(format!("{alias}.deleted_at IS NULL")); + if is_event_source { + // Events table does not have `deleted_at`; filter is omitted. + // Namespace filter uses the `events.namespace` column directly. + let ns_filter = namespace_filter(&alias, opts, &mut params); + if !ns_filter.is_empty() { + where_parts.push(ns_filter.trim_start_matches(" AND ").to_string()); + } + // `kind` on an event node filters events.kind (e.g. "recall_executed"). + if let Some(ref kind) = np.kind { + params.push(QueryValue::Text(kind.clone())); + where_parts.push(format!("{alias}.kind = ?{}", params.len())); + } + // entity_type and properties are not columns on events — reject explicitly. + if np.entity_type.is_some() { + return Err(QueryError::Compile( + "event nodes do not have an entity_type column".into(), + )); + } + if !np.properties.is_empty() { + return Err(QueryError::Compile( + "event nodes do not support inline property filters; \ + use a WHERE clause on verb, outcome, or payload fields" + .into(), + )); + } + } else if is_note_target { + // Note targets: `notes` table (joined by the synthetic edge handler). + where_parts.push(format!("{alias}.deleted_at IS NULL")); - let ns_filter = namespace_filter(&alias, opts, &mut params); - if !ns_filter.is_empty() { - where_parts.push(ns_filter.trim_start_matches(" AND ").to_string()); - } + let ns_filter = namespace_filter(&alias, opts, &mut params); + if !ns_filter.is_empty() { + where_parts.push(ns_filter.trim_start_matches(" AND ").to_string()); + } - if let Some(ref kind) = np.kind { - params.push(SqlValue::Text(kind.clone())); - where_parts.push(format!("{alias}.kind = ?{}", params.len())); - } + if let Some(ref kind) = np.kind { + params.push(QueryValue::Text(kind.clone())); + where_parts.push(format!("{alias}.kind = ?{}", params.len())); + } - for (key, val) in &np.properties { - params.push(SqlValue::Text(val.clone())); - if key == "name" { - where_parts - .push(format!("{alias}.name = ?{} COLLATE NOCASE", params.len())); - } else { - where_parts.push(format!( - "json_extract({alias}.properties, '$.{}') = ?{} COLLATE NOCASE", - key.replace('\'', "''"), - params.len() + // entity_type does not exist on notes — reject explicitly. + if np.entity_type.is_some() { + return Err(QueryError::Compile( + "observed note targets do not have an entity_type column".into(), )); } + + for (key, val) in &np.properties { + params.push(QueryValue::Text(val.clone())); + if key == "name" || key == "content" { + where_parts + .push(format!("{alias}.{key} = ?{} COLLATE NOCASE", params.len())); + } else { + where_parts.push(format!( + "json_extract({alias}.properties, '$.{}') = ?{} COLLATE NOCASE", + key.replace('\'', "''"), + params.len() + )); + } + } + } else { + where_parts.push(format!("{alias}.deleted_at IS NULL")); + + let ns_filter = namespace_filter(&alias, opts, &mut params); + if !ns_filter.is_empty() { + where_parts.push(ns_filter.trim_start_matches(" AND ").to_string()); + } + + if let Some(ref kind) = np.kind { + params.push(QueryValue::Text(kind.clone())); + where_parts.push(format!("{alias}.kind = ?{}", params.len())); + } + + if let Some(ref et) = np.entity_type { + params.push(QueryValue::Text(et.clone())); + where_parts.push(format!("{alias}.entity_type = ?{}", params.len())); + } + + for (key, val) in &np.properties { + params.push(QueryValue::Text(val.clone())); + if key == "name" { + where_parts + .push(format!("{alias}.name = ?{} COLLATE NOCASE", params.len())); + } else { + where_parts.push(format!( + "json_extract({alias}.properties, '$.{}') = ?{} COLLATE NOCASE", + key.replace('\'', "''"), + params.len() + )); + } + } } if let Some(ref var) = np.variable { - var_to_alias.insert(var.clone(), (alias.clone(), VarKind::Node)); + let kind = if is_event_source { + VarKind::EventNode + } else if is_note_target { + VarKind::NoteNode + } else { + VarKind::Node + }; + var_to_alias.insert(var.clone(), (alias.clone(), kind)); } node_idx += 1; @@ -149,64 +302,124 @@ fn compile_fixed_length( PatternElement::Edge(ep) => { let e_alias = format!("e{edge_idx}"); let prev_node = &node_aliases[node_aliases.len() - 1]; - - edge_aliases.push(e_alias.clone()); - - let (source_join, target_join) = match ep.direction { - EdgeDirection::Out => ( - format!("{e_alias}.source_id = {prev_node}.id"), - "target_id", - ), - EdgeDirection::In => ( - format!("{e_alias}.target_id = {prev_node}.id"), - "source_id", - ), - EdgeDirection::Both => ( - format!( - "({e_alias}.source_id = {prev_node}.id OR {e_alias}.target_id = {prev_node}.id)" - ), - "CASE_BOTH", - ), - }; - let next_alias = format!("n{}", node_idx); - let next_join_col = if target_join == "CASE_BOTH" { - format!( - "CASE WHEN {e_alias}.source_id = {prev_node}.id THEN {e_alias}.target_id ELSE {e_alias}.source_id END" - ) - } else { - format!("{e_alias}.{target_join}") - }; - - join_parts.push(format!("JOIN graph_edges {e_alias} ON {source_join}")); + edge_aliases.push(e_alias.clone()); - let ens_filter = namespace_filter(&e_alias, opts, &mut params); - if !ens_filter.is_empty() { - where_parts.push(ens_filter.trim_start_matches(" AND ").to_string()); + // Detect synthetic event_observations edges (ADR-041 §8). + // A synthetic edge is one whose only relation(s) are observed_as_* names. + // Mixed synthetic+canonical relations are rejected: the two tables don't share + // a common join key that would make an OR across them meaningful. + let has_synthetic = ep.relations.iter().any(|r| is_synthetic(r)); + let has_canonical = ep.relations.iter().any(|r| !is_synthetic(r)); + if has_synthetic && has_canonical { + return Err(QueryError::Compile( + "cannot mix synthetic observed_as_* relations with canonical edge relations \ + in a single edge pattern" + .into(), + )); } - join_parts.push(format!( - "JOIN entities {next_alias} ON {next_alias}.id = {next_join_col}" - )); - - if !ep.relations.is_empty() { - if ep.relations.len() == 1 { - params.push(SqlValue::Text(ep.relations[0].clone())); - where_parts.push(format!("{e_alias}.relation = ?{}", params.len())); - } else { - let placeholders: Vec = ep - .relations + if has_synthetic { + // Synthetic edge: join event_observations. + // Direction is always event → entity/note (OUT from the event node). + // The event node is the source (prev_node); the entity/note is the target. + if !matches!(ep.direction, EdgeDirection::Out) { + return Err(QueryError::Compile( + "synthetic observed_as_* edges are always event → entity (outbound only)".into(), + )); + } + join_parts.push(format!( + "JOIN event_observations {e_alias} ON {e_alias}.event_id = {prev_node}.id" + )); + // Roles: collect the unique role values from the synthetic relation names. + let roles: Vec<&'static str> = ep + .relations + .iter() + .filter_map(|r| synthetic_role(r)) + .collect(); + if roles.len() == 1 { + params.push(QueryValue::Text(roles[0].to_string())); + where_parts.push(format!("{e_alias}.role = ?{}", params.len())); + } else if roles.len() > 1 { + let placeholders: Vec = roles .iter() .map(|r| { - params.push(SqlValue::Text(r.clone())); + params.push(QueryValue::Text(r.to_string())); format!("?{}", params.len()) }) .collect(); - where_parts.push(format!( - "{e_alias}.relation IN ({})", - placeholders.join(", ") - )); + where_parts + .push(format!("{e_alias}.role IN ({})", placeholders.join(", "))); + } + // Join the target node via event_observations.entity_id. + // The `referent_kind` column discriminates between note and entity + // substrates. Per ADR-041, recall/rerank observations always target + // notes (`referent_kind='note'`); we filter to note substrate and join + // the `notes` table. An explicit `AND e0.referent_kind='note'` + // prevents cross-substrate ID collisions. + join_parts.push(format!( + "JOIN notes {next_alias} ON {next_alias}.id = {e_alias}.entity_id \ + AND {e_alias}.referent_kind = 'note'" + )); + } else { + // Standard canonical edge: join graph_edges. + let (source_join, target_join) = match ep.direction { + EdgeDirection::Out => ( + format!("{e_alias}.source_id = {prev_node}.id"), + "target_id", + ), + EdgeDirection::In => ( + format!("{e_alias}.target_id = {prev_node}.id"), + "source_id", + ), + EdgeDirection::Both => ( + format!( + "({e_alias}.source_id = {prev_node}.id OR {e_alias}.target_id = {prev_node}.id)" + ), + "CASE_BOTH", + ), + }; + + let next_join_col = if target_join == "CASE_BOTH" { + format!( + "CASE WHEN {e_alias}.source_id = {prev_node}.id THEN {e_alias}.target_id ELSE {e_alias}.source_id END" + ) + } else { + format!("{e_alias}.{target_join}") + }; + + join_parts.push(format!( + "JOIN graph_edges {e_alias} ON {source_join} AND {e_alias}.deleted_at IS NULL" + )); + + let ens_filter = namespace_filter(&e_alias, opts, &mut params); + if !ens_filter.is_empty() { + where_parts.push(ens_filter.trim_start_matches(" AND ").to_string()); + } + + join_parts.push(format!( + "JOIN entities {next_alias} ON {next_alias}.id = {next_join_col}" + )); + + if !ep.relations.is_empty() { + if ep.relations.len() == 1 { + params.push(QueryValue::Text(ep.relations[0].clone())); + where_parts.push(format!("{e_alias}.relation = ?{}", params.len())); + } else { + let placeholders: Vec = ep + .relations + .iter() + .map(|r| { + params.push(QueryValue::Text(r.clone())); + format!("?{}", params.len()) + }) + .collect(); + where_parts.push(format!( + "{e_alias}.relation IN ({})", + placeholders.join(", ") + )); + } } } @@ -219,71 +432,9 @@ fn compile_fixed_length( } } - // WHERE clause conditions from GQL WHERE - for cond in &query.where_clause { - let (alias, kind) = var_to_alias.get(&cond.variable).ok_or_else(|| { - QueryError::Compile(format!( - "unknown variable '{}' in WHERE clause", - cond.variable - )) - })?; - - let col_expr = match kind { - VarKind::Node => { - if cond.property == "name" - || cond.property == "kind" - || cond.property == "namespace" - { - format!("{alias}.{}", cond.property) - } else { - format!( - "json_extract({alias}.properties, '$.{}')", - cond.property.replace('\'', "''") - ) - } - } - VarKind::Edge => { - // MAJ-1: edge property whitelist — only relation and weight are queryable - match cond.property.as_str() { - "relation" | "weight" => format!("{alias}.{}", cond.property), - other => { - return Err(QueryError::Validation(format!( - "edge property '{other}' not queryable; use 'relation' or 'weight'" - ))) - } - } - } - }; - - let op_str = match cond.op { - CompareOp::Eq => "=", - CompareOp::Neq => "!=", - CompareOp::Gt => ">", - CompareOp::Lt => "<", - CompareOp::Gte => ">=", - CompareOp::Lte => "<=", - CompareOp::Like => "LIKE", - }; - - match &cond.value { - ConditionValue::String(s) => { - params.push(SqlValue::Text(s.clone())); - let collate = if matches!(cond.op, CompareOp::Eq | CompareOp::Like) { - " COLLATE NOCASE" - } else { - "" - }; - where_parts.push(format!("{col_expr} {op_str} ?{}{}", params.len(), collate)); - } - ConditionValue::Number(n) => { - params.push(SqlValue::Float(*n)); - where_parts.push(format!("{col_expr} {op_str} ?{}", params.len())); - } - ConditionValue::Bool(b) => { - params.push(SqlValue::Integer(if *b { 1 } else { 0 })); - where_parts.push(format!("{col_expr} {op_str} ?{}", params.len())); - } - } + // WHERE clause conditions from GQL WHERE (supports AND / OR tree — ADR-008) + if let Some(where_sql) = compile_where_expr(&query.where_clause, &var_to_alias, &mut params)? { + where_parts.push(where_sql); } // SELECT clause @@ -299,12 +450,34 @@ fn compile_fixed_length( VarKind::Node => { select_parts.push(format!( "{alias}.id AS {var}_id, {alias}.namespace AS {var}_namespace, \ - {alias}.kind AS {var}_kind, {alias}.name AS {var}_name, \ + {alias}.kind AS {var}_kind, {alias}.entity_type AS {var}_entity_type, \ + {alias}.name AS {var}_name, \ + {alias}.properties AS {var}_properties, \ + {alias}.created_at AS {var}_created_at, \ + {alias}.updated_at AS {var}_updated_at" + )); + } + VarKind::NoteNode => { + select_parts.push(format!( + "{alias}.id AS {var}_id, {alias}.namespace AS {var}_namespace, \ + {alias}.kind AS {var}_kind, {alias}.status AS {var}_status, \ + {alias}.content AS {var}_content, \ + {alias}.salience AS {var}_salience, \ {alias}.properties AS {var}_properties, \ {alias}.created_at AS {var}_created_at, \ {alias}.updated_at AS {var}_updated_at" )); } + VarKind::EventNode => { + select_parts.push(format!( + "{alias}.id AS {var}_id, {alias}.namespace AS {var}_namespace, \ + {alias}.verb AS {var}_verb, {alias}.substrate AS {var}_substrate, \ + {alias}.actor AS {var}_actor, {alias}.kind AS {var}_kind, \ + {alias}.outcome AS {var}_outcome, \ + {alias}.payload AS {var}_payload, \ + {alias}.created_at AS {var}_created_at" + )); + } VarKind::Edge => { select_parts.push(format!( "{alias}.id AS {var}_id, {alias}.source_id AS {var}_source, \ @@ -323,7 +496,7 @@ fn compile_fixed_length( } let limit = query.limit.unwrap_or(opts.max_limit).min(opts.max_limit); - params.push(SqlValue::Integer(limit as i64)); + params.push(QueryValue::Integer(limit as i64)); let sql = format!( "SELECT {} FROM {} {} WHERE {} LIMIT ?{}", @@ -342,6 +515,203 @@ fn compile_fixed_length( }) } +/// Compile a `WhereExpr` tree into a SQL fragment, pushing bound parameters into `params`. +/// +/// Returns `Ok(None)` for `WhereExpr::True` (no fragment needed), or `Ok(Some(sql))` otherwise. +/// The caller is responsible for wrapping the result in an AND with the structural predicates. +fn compile_where_expr( + expr: &WhereExpr, + var_to_alias: &std::collections::HashMap, + params: &mut Vec, +) -> Result, QueryError> { + match expr { + WhereExpr::True => Ok(None), + WhereExpr::Condition(cond) => { + let sql = compile_single_condition(cond, var_to_alias, params)?; + Ok(Some(sql)) + } + WhereExpr::And(l, r) => { + let ls = compile_where_expr(l, var_to_alias, params)?; + let rs = compile_where_expr(r, var_to_alias, params)?; + Ok(match (ls, rs) { + (None, None) => None, + (Some(s), None) | (None, Some(s)) => Some(s), + (Some(l), Some(r)) => Some(format!("{l} AND {r}")), + }) + } + WhereExpr::Or(l, r) => { + let ls = compile_where_expr(l, var_to_alias, params)?; + let rs = compile_where_expr(r, var_to_alias, params)?; + Ok(match (ls, rs) { + (None, None) => None, + (Some(s), None) | (None, Some(s)) => Some(s), + (Some(l), Some(r)) => Some(format!("({l} OR {r})")), + }) + } + } +} + +/// Compile a single leaf condition to a SQL predicate string. +fn compile_single_condition( + cond: &Condition, + var_to_alias: &std::collections::HashMap, + params: &mut Vec, +) -> Result { + let (alias, kind) = var_to_alias.get(&cond.variable).ok_or_else(|| { + QueryError::Compile(format!( + "unknown variable '{}' in WHERE clause", + cond.variable + )) + })?; + + let col_expr = match kind { + VarKind::Node => { + if cond.property == "name" + || cond.property == "kind" + || cond.property == "entity_type" + || cond.property == "namespace" + { + format!("{alias}.{}", cond.property) + } else { + format!( + "json_extract({alias}.properties, '$.{}')", + cond.property.replace('\'', "''") + ) + } + } + VarKind::NoteNode => { + if NOTE_COLUMNS.contains(&cond.property.as_str()) { + format!("{alias}.{}", cond.property) + } else { + format!( + "json_extract({alias}.properties, '$.{}')", + cond.property.replace('\'', "''") + ) + } + } + VarKind::EventNode => { + // Events table has direct columns only; reject unknown fields. + if EVENT_COLUMNS.contains(&cond.property.as_str()) { + format!("{alias}.{}", cond.property) + } else { + return Err(QueryError::Validation(format!( + "event property '{}' not queryable; valid columns: {}", + cond.property, + EVENT_COLUMNS.join(", ") + ))); + } + } + VarKind::Edge => match cond.property.as_str() { + "relation" | "weight" => format!("{alias}.{}", cond.property), + other => { + return Err(QueryError::Validation(format!( + "edge property '{other}' not queryable; use 'relation' or 'weight'" + ))) + } + }, + }; + + let op_str = match cond.op { + CompareOp::Eq => "=", + CompareOp::Neq => "!=", + CompareOp::Gt => ">", + CompareOp::Lt => "<", + CompareOp::Gte => ">=", + CompareOp::Lte => "<=", + CompareOp::Like => "LIKE", + }; + + let sql = match &cond.value { + ConditionValue::String(s) => { + params.push(QueryValue::Text(s.clone())); + let collate = if matches!(cond.op, CompareOp::Eq | CompareOp::Like) { + " COLLATE NOCASE" + } else { + "" + }; + format!("{col_expr} {op_str} ?{}{}", params.len(), collate) + } + ConditionValue::Number(n) => { + params.push(QueryValue::Float(*n)); + format!("{col_expr} {op_str} ?{}", params.len()) + } + ConditionValue::Bool(b) => { + params.push(QueryValue::Integer(if *b { 1 } else { 0 })); + format!("{col_expr} {op_str} ?{}", params.len()) + } + }; + Ok(sql) +} + +/// Returns `true` if the given `WhereExpr` subtree references only the start +/// variable (`start_var`), only the end variable, or neither — but NOT both. +/// +/// Used to detect OR nodes whose branches reference different endpoints, which +/// cannot be correctly compiled by the variable-length leaf-routing approach. +fn expr_endpoint_set( + expr: &WhereExpr, + start_var: Option<&str>, + end_var: Option<&str>, +) -> (bool, bool) { + match expr { + WhereExpr::True => (false, false), + WhereExpr::Condition(c) => { + let is_start = start_var == Some(c.variable.as_str()); + let is_end = end_var == Some(c.variable.as_str()); + (is_start, is_end) + } + WhereExpr::And(l, r) | WhereExpr::Or(l, r) => { + let (ls, le) = expr_endpoint_set(l, start_var, end_var); + let (rs, re) = expr_endpoint_set(r, start_var, end_var); + (ls || rs, le || re) + } + } +} + +/// Walk the expression tree and return `Err(Unsupported)` if any `Or` node has +/// branches that span both start and end endpoint variables. Single-endpoint +/// ORs (e.g. `a.name='X' OR a.name='Y'`) are fine. +fn reject_or_spanning_endpoints( + expr: &WhereExpr, + start: &NodePattern, + end: &NodePattern, +) -> Result<(), QueryError> { + let start_var = start.variable.as_deref(); + let end_var = end.variable.as_deref(); + reject_or_spanning_impl(expr, start_var, end_var) +} + +fn reject_or_spanning_impl( + expr: &WhereExpr, + start_var: Option<&str>, + end_var: Option<&str>, +) -> Result<(), QueryError> { + match expr { + WhereExpr::True | WhereExpr::Condition(_) => Ok(()), + WhereExpr::And(l, r) => { + reject_or_spanning_impl(l, start_var, end_var)?; + reject_or_spanning_impl(r, start_var, end_var) + } + WhereExpr::Or(l, r) => { + let (l_start, l_end) = expr_endpoint_set(l, start_var, end_var); + let (r_start, r_end) = expr_endpoint_set(r, start_var, end_var); + let spans_start = l_start || r_start; + let spans_end = l_end || r_end; + if spans_start && spans_end { + return Err(QueryError::Unsupported( + "WHERE clauses that span both endpoints in a variable-length pattern \ + are not yet supported; rewrite as separate queries or restrict each \ + OR branch to one endpoint" + .into(), + )); + } + // Even if this OR is safe, recurse to catch nested ORs. + reject_or_spanning_impl(l, start_var, end_var)?; + reject_or_spanning_impl(r, start_var, end_var) + } + } +} + /// Compile variable-length patterns to a recursive CTE. /// /// Depth is capped at min(requested, 10) — MAJ-2 (parameterized min_depth, not literal). @@ -349,7 +719,7 @@ fn compile_variable_length( query: &GqlQuery, opts: &CompileOptions, ) -> Result { - let mut params: Vec = Vec::new(); + let mut params: Vec = Vec::new(); let mut var_to_alias: std::collections::HashMap = std::collections::HashMap::new(); @@ -383,11 +753,15 @@ fn compile_variable_length( } if let Some(ref kind) = start.kind { - params.push(SqlValue::Text(kind.clone())); + params.push(QueryValue::Text(kind.clone())); start_conditions.push(format!("s.kind = ?{}", params.len())); } + if let Some(ref et) = start.entity_type { + params.push(QueryValue::Text(et.clone())); + start_conditions.push(format!("s.entity_type = ?{}", params.len())); + } for (key, val) in &start.properties { - params.push(SqlValue::Text(val.clone())); + params.push(QueryValue::Text(val.clone())); if key == "name" { start_conditions.push(format!("s.name = ?{} COLLATE NOCASE", params.len())); } else { @@ -403,14 +777,14 @@ fn compile_variable_length( let mut relation_condition = String::new(); if !edge.relations.is_empty() { if edge.relations.len() == 1 { - params.push(SqlValue::Text(edge.relations[0].clone())); + params.push(QueryValue::Text(edge.relations[0].clone())); relation_condition = format!(" AND e.relation = ?{}", params.len()); } else { let placeholders: Vec = edge .relations .iter() .map(|r| { - params.push(SqlValue::Text(r.clone())); + params.push(QueryValue::Text(r.clone())); format!("?{}", params.len()) }) .collect(); @@ -443,7 +817,7 @@ fn compile_variable_length( ), }; - params.push(SqlValue::Integer(max_depth as i64)); + params.push(QueryValue::Integer(max_depth as i64)); let depth_param = params.len(); // End-node conditions (applied in outer WHERE). `r` is always joined @@ -455,11 +829,15 @@ fn compile_variable_length( end_conditions.push(r_ns_filter.trim_start_matches(" AND ").to_string()); } if let Some(ref kind) = end.kind { - params.push(SqlValue::Text(kind.clone())); + params.push(QueryValue::Text(kind.clone())); end_conditions.push(format!("r.kind = ?{}", params.len())); } + if let Some(ref et) = end.entity_type { + params.push(QueryValue::Text(et.clone())); + end_conditions.push(format!("r.entity_type = ?{}", params.len())); + } for (key, val) in &end.properties { - params.push(SqlValue::Text(val.clone())); + params.push(QueryValue::Text(val.clone())); if key == "name" { end_conditions.push(format!("r.name = ?{} COLLATE NOCASE", params.len())); } else { @@ -471,12 +849,17 @@ fn compile_variable_length( } } - // WHERE clause conditions - for cond in &query.where_clause { - // Map variables to appropriate aliases - let col_alias = if start.variable.as_deref() == Some(&cond.variable) { + // WHERE clause conditions for variable-length patterns. + // Each leaf condition is routed to start_conditions (alias s) or end_conditions + // (alias r) based on which variable it references. OR expressions that span + // both start and end nodes are not supported — reject explicitly with an + // actionable error message rather than silently converting OR to AND. + reject_or_spanning_endpoints(&query.where_clause, start, end)?; + + for cond in query.where_clause.conditions() { + let col_alias = if start.variable.as_deref() == Some(cond.variable.as_str()) { "s" - } else if end.variable.as_deref() == Some(&cond.variable) { + } else if end.variable.as_deref() == Some(cond.variable.as_str()) { "r" } else { return Err(QueryError::Compile(format!( @@ -485,14 +868,16 @@ fn compile_variable_length( ))); }; - let col_expr = if cond.property == "name" || cond.property == "kind" { - format!("{col_alias}.{}", cond.property) - } else { - format!( - "json_extract({col_alias}.properties, '$.{}')", - cond.property.replace('\'', "''") - ) - }; + let col_expr = + if cond.property == "name" || cond.property == "kind" || cond.property == "entity_type" + { + format!("{col_alias}.{}", cond.property) + } else { + format!( + "json_extract({col_alias}.properties, '$.{}')", + cond.property.replace('\'', "''") + ) + }; let op_str = match cond.op { CompareOp::Eq => "=", @@ -506,7 +891,7 @@ fn compile_variable_length( match &cond.value { ConditionValue::String(s) => { - params.push(SqlValue::Text(s.clone())); + params.push(QueryValue::Text(s.clone())); let collate = if matches!(cond.op, CompareOp::Eq | CompareOp::Like) { " COLLATE NOCASE" } else { @@ -520,7 +905,7 @@ fn compile_variable_length( } } ConditionValue::Number(n) => { - params.push(SqlValue::Float(*n)); + params.push(QueryValue::Float(*n)); if col_alias == "s" { start_conditions.push(format!("{col_expr} {op_str} ?{}", params.len())); } else { @@ -528,7 +913,7 @@ fn compile_variable_length( } } ConditionValue::Bool(b) => { - params.push(SqlValue::Integer(if *b { 1 } else { 0 })); + params.push(QueryValue::Integer(if *b { 1 } else { 0 })); if col_alias == "s" { start_conditions.push(format!("{col_expr} {op_str} ?{}", params.len())); } else { @@ -540,12 +925,12 @@ fn compile_variable_length( // MAJ-2: min_depth is always a bound parameter, never a literal if min_depth > 0 { - params.push(SqlValue::Integer(min_depth as i64)); + params.push(QueryValue::Integer(min_depth as i64)); end_conditions.push(format!("t.depth >= ?{}", params.len())); } let limit = query.limit.unwrap_or(opts.max_limit).min(opts.max_limit); - params.push(SqlValue::Integer(limit as i64)); + params.push(QueryValue::Integer(limit as i64)); let limit_param = params.len(); // Register variables @@ -569,6 +954,13 @@ fn compile_variable_length( match item { ReturnItem::Property(_, prop) => { let is_start = start.variable.as_deref() == Some(var); + if matches!(kind, VarKind::EventNode | VarKind::NoteNode) { + return Err(QueryError::Unsupported( + "synthetic observed_as_* edges cannot be used in variable-length \ + patterns; use a fixed-length edge pattern instead" + .into(), + )); + } if *kind == VarKind::Node { let tbl = if is_start { "s" } else { "r" }; if is_start { @@ -597,7 +989,8 @@ fn compile_variable_length( has_start = true; select_parts.push(format!( "s.id AS {var}_id, s.namespace AS {var}_namespace, \ - s.kind AS {var}_kind, s.name AS {var}_name, \ + s.kind AS {var}_kind, s.entity_type AS {var}_entity_type, \ + s.name AS {var}_name, \ s.properties AS {var}_properties, \ s.created_at AS {var}_created_at, \ s.updated_at AS {var}_updated_at" @@ -605,13 +998,23 @@ fn compile_variable_length( } else { select_parts.push(format!( "r.id AS {var}_id, r.namespace AS {var}_namespace, \ - r.kind AS {var}_kind, r.name AS {var}_name, \ + r.kind AS {var}_kind, r.entity_type AS {var}_entity_type, \ + r.name AS {var}_name, \ r.properties AS {var}_properties, \ r.created_at AS {var}_created_at, \ r.updated_at AS {var}_updated_at" )); } } + VarKind::EventNode | VarKind::NoteNode => { + // Synthetic observed_as_* edges require a fixed-length pattern; + // variable-length recursion over the events/notes tables is not supported. + return Err(QueryError::Unsupported( + "synthetic observed_as_* edges cannot be used in variable-length \ + patterns; use a fixed-length edge pattern instead" + .into(), + )); + } VarKind::Edge => { select_parts.push(format!( "t.via_edge AS {var}_id, t.via_relation AS {var}_relation, \ @@ -647,7 +1050,7 @@ fn compile_variable_length( SELECT s.id, {seed_next}, 1, s.id || ',' || {seed_next}, e.weight, \ e.id, e.relation, e.weight \ FROM entities s \ - JOIN graph_edges e ON {seed_join}{e_ns_filter}{relation_condition} \ + JOIN graph_edges e ON {seed_join} AND e.deleted_at IS NULL{e_ns_filter}{relation_condition} \ WHERE {start_where} \ UNION ALL \ SELECT t.start_id, {recurse_next}, t.depth + 1, \ @@ -655,7 +1058,7 @@ fn compile_variable_length( t.total_weight + e.weight, \ e.id, e.relation, e.weight \ FROM traverse t \ - JOIN graph_edges e ON {recurse_join}{e_ns_filter}{relation_condition} \ + JOIN graph_edges e ON {recurse_join} AND e.deleted_at IS NULL{e_ns_filter}{relation_condition} \ WHERE t.depth < ?{depth_param} \ AND (',' || t.path || ',') NOT LIKE '%,' || {recurse_next} || ',%' \ ) \ @@ -691,6 +1094,10 @@ fn compile_variable_length( #[derive(Clone, Copy, PartialEq, Eq)] enum VarKind { Node, + /// Node that maps to the `events` table (synthetic edge source, ADR-041 §8). + EventNode, + /// Node that maps to the `notes` table (synthetic edge target, ADR-041 §8). + NoteNode, Edge, } @@ -698,26 +1105,54 @@ const NODE_COLUMNS: &[&str] = &[ "id", "name", "kind", + "entity_type", "namespace", "description", "properties", "created_at", "updated_at", ]; +/// Columns available for projection on `notes` table nodes (ADR-041 §8 targets). +const NOTE_COLUMNS: &[&str] = &[ + "id", + "namespace", + "kind", + "status", + "name", + "content", + "salience", + "decay_factor", + "properties", + "created_at", + "updated_at", +]; +/// Columns available for projection on `events` table nodes (ADR-041 §8). +const EVENT_COLUMNS: &[&str] = &[ + "id", + "namespace", + "verb", + "substrate", + "actor", + "kind", + "outcome", + "payload", + "duration_us", + "target_id", + "session_id", + "created_at", +]; const EDGE_COLUMNS: &[&str] = &["id", "source_id", "target_id", "relation", "weight"]; fn property_to_column<'a>(prop: &'a str, kind: &VarKind) -> Result<&'a str, QueryError> { - let valid = match kind { - VarKind::Node => NODE_COLUMNS, - VarKind::Edge => EDGE_COLUMNS, + let (valid, kind_name) = match kind { + VarKind::Node => (NODE_COLUMNS, "node"), + VarKind::NoteNode => (NOTE_COLUMNS, "note"), + VarKind::EventNode => (EVENT_COLUMNS, "event"), + VarKind::Edge => (EDGE_COLUMNS, "edge"), }; if valid.contains(&prop) { Ok(prop) } else { - let kind_name = match kind { - VarKind::Node => "node", - VarKind::Edge => "edge", - }; Err(QueryError::Compile(format!( "unknown {kind_name} property '{prop}' in RETURN projection. \ Valid: {}", @@ -773,7 +1208,7 @@ mod tests { let has_ns_param = compiled .params .iter() - .any(|p| matches!(p, SqlValue::Text(s) if s == "research")); + .any(|p| matches!(p, QueryValue::Text(s) if s == "research")); assert!(has_ns_param, "namespace must be a bound parameter"); } @@ -823,19 +1258,32 @@ mod tests { } #[test] - fn depth_cap_at_ten() { - // MAJ-2: depth capped at 10 regardless of query request + fn depth_cap_at_ten_rejects_above_max() { + // ADR-008 §"Depth limits": exceeding MAX_DEPTH is an InvalidInput error at + // validation time — the compiler never sees a query with depth > 10. let q = gql::parse("MATCH (a)-[:extends*1..50]->(b) RETURN b").unwrap(); + let err = compile(&q, &opts()).unwrap_err(); + assert!( + matches!(err, QueryError::InvalidInput(_)), + "expected InvalidInput for depth > 10, got {err:?}" + ); + } + + #[test] + fn depth_within_cap_compiles() { + // depth *1..10 is at the cap — must compile successfully. + let q = gql::parse("MATCH (a)-[:extends*1..10]->(b) RETURN b").unwrap(); let compiled = compile(&q, &opts()).unwrap(); - // The depth parameter must be <= 10 + assert!(compiled.sql.contains("WITH RECURSIVE")); + // The depth parameter must equal 10 let depth_val = compiled.params.iter().find_map(|p| { - if let SqlValue::Integer(n) = p { + if let QueryValue::Integer(n) = p { Some(*n) } else { None } }); - assert!(depth_val.unwrap() <= 10, "depth must be capped at 10"); + assert_eq!(depth_val, Some(10), "depth param should be 10"); } #[test] @@ -845,7 +1293,7 @@ mod tests { let compiled = compile(&q, &opts()).unwrap(); let limit_param = compiled.params.last().unwrap(); assert!( - matches!(limit_param, SqlValue::Integer(500)), + matches!(limit_param, QueryValue::Integer(500)), "expected Integer(500), got {limit_param:?}" ); } @@ -867,7 +1315,7 @@ mod tests { let has_gizmo = compiled .params .iter() - .any(|p| matches!(p, SqlValue::Text(s) if s == "gizmo")); + .any(|p| matches!(p, QueryValue::Text(s) if s == "gizmo")); assert!( has_gizmo, "pack-agnostic: unknown kind must pass through into SQL params" @@ -884,7 +1332,7 @@ mod tests { let has_paper = compiled .params .iter() - .any(|p| matches!(p, SqlValue::Text(s) if s == "paper")); + .any(|p| matches!(p, QueryValue::Text(s) if s == "paper")); assert!( has_paper, "kind 'paper' must pass through unchanged into SQL params" @@ -916,7 +1364,7 @@ mod tests { let has_paper = compiled .params .iter() - .any(|p| matches!(p, SqlValue::Text(s) if s == "paper")); + .any(|p| matches!(p, QueryValue::Text(s) if s == "paper")); assert!( has_paper, "kind 'paper' must pass through unchanged into SQL params" @@ -1058,4 +1506,284 @@ mod tests { compiled.sql ); } + + #[test] + fn entity_type_compiles_as_direct_column_not_json_extract() { + // entity_type in a NodePattern must become `alias.entity_type = ?N` in the WHERE + // clause — a direct column reference, not json_extract from the properties blob. + let q = gql::parse("MATCH (n:document {entity_type: 'paper'})-[:extends]->(m) RETURN n") + .unwrap(); + let compiled = compile(&q, &opts()).unwrap(); + assert!( + compiled.sql.contains(".entity_type = ?"), + "entity_type must compile to a direct column comparison; sql: {}", + compiled.sql + ); + assert!( + !compiled.sql.contains("json_extract"), + "entity_type must NOT use json_extract; sql: {}", + compiled.sql + ); + let has_paper_param = compiled + .params + .iter() + .any(|p| matches!(p, QueryValue::Text(s) if s == "paper")); + assert!( + has_paper_param, + "entity_type value 'paper' must appear as a bound parameter" + ); + } + + // --- F047: OR support in WHERE clause (ADR-008 §"GQL WHERE expression") --- + + #[test] + fn where_or_compiles_to_sql_or() { + let q = gql::parse( + "MATCH (a:concept)-[e:extends]->(b) WHERE a.name = 'LoRA' OR a.name = 'QLoRA' RETURN a", + ) + .unwrap(); + let compiled = compile(&q, &opts()).unwrap(); + assert!( + compiled.sql.contains(" OR "), + "WHERE OR must produce SQL OR; sql: {}", + compiled.sql + ); + let has_lora = compiled + .params + .iter() + .any(|p| matches!(p, QueryValue::Text(s) if s == "LoRA")); + let has_qlora = compiled + .params + .iter() + .any(|p| matches!(p, QueryValue::Text(s) if s == "QLoRA")); + assert!(has_lora && has_qlora, "both OR values must be bound params"); + } + + #[test] + fn where_and_or_precedence() { + // `a AND b OR c` should compile as `(a AND b) OR c` + let q = gql::parse( + "MATCH (a:concept)-[e:extends]->(b) WHERE a.name = 'X' AND a.kind = 'concept' OR b.kind = 'project' RETURN a" + ).unwrap(); + let compiled = compile(&q, &opts()).unwrap(); + // The SQL should contain an OR at the outer level wrapping the AND group + assert!( + compiled.sql.contains(" OR "), + "expected OR in sql; sql: {}", + compiled.sql + ); + } + + // --- F218: event_observations synthetic edge support (ADR-041 §8) --- + + #[test] + fn synthetic_edge_joins_event_observations() { + let q = gql::parse("MATCH (ev)-[:observed_as_selected]->(m:memory) RETURN ev, m").unwrap(); + let compiled = compile(&q, &opts()).unwrap(); + assert!( + compiled.sql.contains("event_observations"), + "synthetic edge must join event_observations; sql: {}", + compiled.sql + ); + assert!( + !compiled.sql.contains("graph_edges"), + "synthetic edge must NOT join graph_edges; sql: {}", + compiled.sql + ); + let has_role_param = compiled + .params + .iter() + .any(|p| matches!(p, QueryValue::Text(s) if s == "selected")); + assert!(has_role_param, "role 'selected' must be a bound parameter"); + } + + // CRIT-1 regression: event source node must bind to `events` table, not `entities`. + // Previously `FROM entities n0 JOIN event_observations e0 ON e0.event_id = n0.id` + // was emitted — IDs are disjoint so every query returned zero rows. + #[test] + fn synthetic_edge_event_source_binds_events_table() { + let q = gql::parse("MATCH (ev)-[:observed_as_selected]->(m:memory) RETURN ev, m").unwrap(); + let compiled = compile(&q, &opts()).unwrap(); + assert!( + compiled.sql.contains("FROM events "), + "CRIT-1: event source must come FROM events table, not entities; sql: {}", + compiled.sql + ); + assert!( + !compiled + .sql + .starts_with("SELECT * FROM entities n0 JOIN event_observations"), + "CRIT-1: must not join events via entities table; sql: {}", + compiled.sql + ); + } + + #[test] + fn synthetic_edge_event_observation_join_uses_events_id() { + // The JOIN must be `event_observations.event_id = events_alias.id`, + // not `event_observations.event_id = entities_alias.id`. + let q = gql::parse("MATCH (ev)-[:observed_as_selected]->(m) RETURN m").unwrap(); + let compiled = compile(&q, &opts()).unwrap(); + // The event alias is n0; the join must reference n0 against `events` table. + assert!( + compiled + .sql + .contains("JOIN event_observations e0 ON e0.event_id = n0.id"), + "CRIT-1: event_observations must join on events.id (n0 is now events); sql: {}", + compiled.sql + ); + } + + #[test] + fn synthetic_edge_event_node_projects_event_columns() { + // The event variable in RETURN must select event-table columns (verb, outcome, …), + // not entity columns (name, entity_type, properties, …). + let q = gql::parse("MATCH (ev)-[:observed_as_selected]->(m) RETURN ev").unwrap(); + let compiled = compile(&q, &opts()).unwrap(); + assert!( + compiled.sql.contains("ev_verb"), + "CRIT-1: event variable must project verb column; sql: {}", + compiled.sql + ); + assert!( + compiled.sql.contains("ev_outcome"), + "CRIT-1: event variable must project outcome column; sql: {}", + compiled.sql + ); + assert!( + !compiled.sql.contains("ev_name,") && !compiled.sql.contains("ev_name "), + "CRIT-1: event variable must NOT project entity name column; sql: {}", + compiled.sql + ); + assert!( + !compiled.sql.contains("ev_properties"), + "CRIT-1: event variable must NOT project entity properties column; sql: {}", + compiled.sql + ); + } + + #[test] + fn synthetic_edge_namespace_filter_on_events_table() { + // MIN-2: when scoped, the namespace filter must target the events table + // (which has a namespace column) — not rely on entities indirection. + let q = gql::parse("MATCH (ev)-[:observed_as_selected]->(m) RETURN m").unwrap(); + let compiled = compile(&q, &scoped("test-ns")).unwrap(); + // Both the event alias (n0, now from `events`) and the target alias (n1, from `entities`) + // must have namespace filters. + let ns_count = compiled + .params + .iter() + .filter(|p| matches!(p, QueryValue::Text(s) if s == "test-ns")) + .count(); + assert!( + ns_count >= 2, + "MIN-2: namespace must be filtered on both events and target; params: {:?}", + compiled.params + ); + } + + #[test] + fn synthetic_edge_candidate_role() { + let q = gql::parse("MATCH (ev)-[:observed_as_candidate]->(m) RETURN ev, m").unwrap(); + let compiled = compile(&q, &opts()).unwrap(); + assert!( + compiled.sql.contains("event_observations"), + "sql: {}", + compiled.sql + ); + let has_candidate = compiled + .params + .iter() + .any(|p| matches!(p, QueryValue::Text(s) if s == "candidate")); + assert!(has_candidate, "role 'candidate' must be bound"); + } + + #[test] + fn synthetic_edge_multi_role() { + // Multiple observed_as_* relations compile to a role IN (...) predicate. + let q = + gql::parse("MATCH (ev)-[:observed_as_candidate|observed_as_selected]->(m) RETURN m") + .unwrap(); + let compiled = compile(&q, &opts()).unwrap(); + assert!( + compiled.sql.contains("event_observations"), + "sql: {}", + compiled.sql + ); + assert!( + compiled.sql.contains("IN"), + "multi-role must use IN; sql: {}", + compiled.sql + ); + } + + #[test] + fn mixed_synthetic_and_canonical_rejected() { + let q = gql::parse("MATCH (ev)-[:observed_as_selected|extends]->(m) RETURN m").unwrap(); + let err = compile(&q, &opts()).unwrap_err(); + assert!( + matches!(err, QueryError::Compile(_)), + "mixed synthetic+canonical must be rejected; got {err:?}" + ); + } + + #[test] + fn synthetic_edge_inbound_rejected() { + let q = gql::parse("MATCH (m)<-[:observed_as_selected]-(ev) RETURN m").unwrap(); + let err = compile(&q, &opts()).unwrap_err(); + assert!( + matches!(err, QueryError::Compile(_)), + "inbound synthetic edge must be rejected; got {err:?}" + ); + } + + // --- MAJ-1: OR spanning both endpoints in variable-length patterns must be rejected --- + + #[test] + fn variable_length_or_across_endpoints_rejected() { + // MAJ-1: `WHERE a.name='X' OR b.name='Y'` in a variable-length pattern must be + // rejected with Unsupported — not silently compiled to AND. + let q = gql::parse( + "MATCH (a)-[:extends*1..3]->(b) WHERE a.name = 'X' OR b.name = 'Y' RETURN a", + ) + .unwrap(); + let result = compile(&q, &opts()); + assert!( + matches!(result, Err(QueryError::Unsupported(_))), + "MAJ-1: OR spanning both endpoints must return Unsupported; got {result:?}" + ); + let err_msg = result.unwrap_err().to_string(); + assert!( + err_msg.contains("separate queries") || err_msg.contains("one endpoint"), + "error must be actionable; got: {err_msg}" + ); + } + + #[test] + fn variable_length_or_single_endpoint_still_works() { + // OR within a single endpoint (same alias) must still compile successfully. + let q = gql::parse( + "MATCH (a)-[:extends*1..3]->(b) WHERE a.name = 'X' OR a.name = 'Y' RETURN a", + ) + .unwrap(); + let result = compile(&q, &opts()); + assert!( + result.is_ok(), + "single-endpoint OR must compile; got {result:?}" + ); + } + + #[test] + fn variable_length_and_across_endpoints_still_works() { + // AND across endpoints must still compile (the existing behavior is correct for AND). + let q = gql::parse( + "MATCH (a)-[:extends*1..3]->(b) WHERE a.name = 'X' AND b.name = 'Y' RETURN a", + ) + .unwrap(); + let result = compile(&q, &opts()); + assert!( + result.is_ok(), + "AND across endpoints must compile; got {result:?}" + ); + } } diff --git a/crates/khive-query/src/error.rs b/crates/khive-query/src/error.rs index 481f2c04..b28663d6 100644 --- a/crates/khive-query/src/error.rs +++ b/crates/khive-query/src/error.rs @@ -12,4 +12,8 @@ pub enum QueryError { #[error("unsupported feature: {0}")] Unsupported(String), + + /// A query parameter value is out of the allowed range (ADR-008 §"Depth limits"). + #[error("invalid input: {0}")] + InvalidInput(String), } diff --git a/crates/khive-query/src/lib.rs b/crates/khive-query/src/lib.rs index c7194122..6339a68f 100644 --- a/crates/khive-query/src/lib.rs +++ b/crates/khive-query/src/lib.rs @@ -23,7 +23,7 @@ pub mod error; pub mod parsers; pub mod validate; -pub use ast::{GqlQuery, ReturnItem}; +pub use ast::{GqlQuery, QueryValue, ReturnItem, WhereExpr}; pub use compilers::sql::{compile, CompileOptions, CompiledQuery}; pub use error::QueryError; pub use validate::{validate, validate_with_warnings, MAX_DEPTH}; diff --git a/crates/khive-query/src/parsers/gql.rs b/crates/khive-query/src/parsers/gql.rs index 12c90aff..92939a59 100644 --- a/crates/khive-query/src/parsers/gql.rs +++ b/crates/khive-query/src/parsers/gql.rs @@ -1,14 +1,15 @@ //! Hand-written recursive descent parser for GQL subset. //! //! Grammar: -//! query = 'MATCH' pattern ['WHERE' conditions] 'RETURN' items ['LIMIT' number] +//! query = 'MATCH' pattern ['WHERE' where_expr] 'RETURN' items ['LIMIT' number] //! pattern = node_pat (edge_pat node_pat)* //! node_pat = '(' [var] [':' ident] [props] ')' //! edge_pat = '-[' [var] [':' rels] [range] ']->' | '<-[' ... ']-' | '-[' ... ']-' //! rels = ident ('|' ident)* //! range = '*' number ['..' number] //! props = '{' key ':' value (',' key ':' value)* '}' -//! conditions = condition ('AND' condition)* +//! where_expr = and_expr ('OR' and_expr)* +//! and_expr = condition ('AND' condition)* //! condition = var '.' prop op value //! items = item (',' item)* //! item = var | var '.' prop @@ -213,6 +214,7 @@ impl Parser { return Ok(NodePattern { variable, kind, + entity_type: None, properties, }); } @@ -245,10 +247,15 @@ impl Parser { properties = self.parse_props()?; } + // Lift entity_type out of properties so the SQL compiler targets the + // dedicated column instead of json_extract(properties, '$.entity_type'). + let entity_type = properties.remove("entity_type"); + self.expect_char(')')?; Ok(NodePattern { variable, kind, + entity_type, properties, }) } @@ -403,28 +410,49 @@ impl Parser { } } - fn parse_conditions(&mut self) -> Result, QueryError> { - let mut conditions = Vec::new(); + fn parse_condition(&mut self) -> Result { + self.skip_whitespace(); + let variable = self.parse_ident()?; + self.expect_char('.')?; + let property = self.parse_ident()?; + let op = self.parse_compare_op()?; + let value = self.parse_value()?; + Ok(Condition { + variable, + property, + op, + value, + }) + } + + /// Parse a single AND-chain of conditions. + fn parse_and_expr(&mut self) -> Result { + let first = WhereExpr::Condition(self.parse_condition()?); + let mut acc = first; loop { self.skip_whitespace(); - let variable = self.parse_ident()?; - self.expect_char('.')?; - let property = self.parse_ident()?; - let op = self.parse_compare_op()?; - let value = self.parse_value()?; - conditions.push(Condition { - variable, - property, - op, - value, - }); + if !self.try_keyword("AND") { + break; + } + let rhs = WhereExpr::Condition(self.parse_condition()?); + acc = WhereExpr::And(Box::new(acc), Box::new(rhs)); + } + Ok(acc) + } + /// Parse a WHERE expression: and_expr ('OR' and_expr)* (ADR-008 §"GQL WHERE expression"). + fn parse_where_expr(&mut self) -> Result { + let first = self.parse_and_expr()?; + let mut acc = first; + loop { self.skip_whitespace(); - if !self.try_keyword("AND") { + if !self.try_keyword("OR") { break; } + let rhs = self.parse_and_expr()?; + acc = WhereExpr::Or(Box::new(acc), Box::new(rhs)); } - Ok(conditions) + Ok(acc) } fn parse_return_items(&mut self) -> Result, QueryError> { @@ -458,9 +486,9 @@ impl Parser { let pattern = self.parse_pattern()?; let where_clause = if self.try_keyword("WHERE") { - self.parse_conditions()? + self.parse_where_expr()? } else { - Vec::new() + WhereExpr::True }; self.expect_keyword("RETURN")?; @@ -539,9 +567,51 @@ mod tests { let q = parse( "MATCH (a)-[e:implements]->(b:project) WHERE b.name = 'lattice-inference' RETURN a LIMIT 10" ).unwrap(); - assert_eq!(q.where_clause.len(), 1); - assert_eq!(q.where_clause[0].variable, "b"); - assert_eq!(q.where_clause[0].property, "name"); + let conds: Vec<_> = q.where_clause.conditions().collect(); + assert_eq!(conds.len(), 1); + assert_eq!(conds[0].variable, "b"); + assert_eq!(conds[0].property, "name"); + } + + #[test] + fn where_clause_and() { + let q = parse( + "MATCH (a:concept)-[e:extends]->(b) WHERE a.name = 'LoRA' AND b.kind = 'concept' RETURN a, b" + ).unwrap(); + let conds: Vec<_> = q.where_clause.conditions().collect(); + assert_eq!(conds.len(), 2, "AND should produce two leaf conditions"); + assert!( + matches!(&q.where_clause, WhereExpr::And(_, _)), + "should be And node" + ); + } + + #[test] + fn where_clause_or() { + let q = parse( + "MATCH (a:concept)-[e:extends]->(b) WHERE a.name = 'LoRA' OR a.name = 'QLoRA' RETURN a", + ) + .unwrap(); + let conds: Vec<_> = q.where_clause.conditions().collect(); + assert_eq!(conds.len(), 2, "OR should produce two leaf conditions"); + assert!( + matches!(&q.where_clause, WhereExpr::Or(_, _)), + "should be Or node" + ); + } + + #[test] + fn where_clause_and_or() { + // AND binds tighter than OR: `a AND b OR c` = `(a AND b) OR c` + let q = parse( + "MATCH (a:concept)-[e:extends]->(b) WHERE a.name = 'X' AND a.kind = 'concept' OR b.kind = 'project' RETURN a" + ).unwrap(); + let conds: Vec<_> = q.where_clause.conditions().collect(); + assert_eq!(conds.len(), 3); + assert!( + matches!(&q.where_clause, WhereExpr::Or(_, _)), + "top-level should be Or" + ); } #[test] @@ -567,4 +637,19 @@ mod tests { let nodes: Vec<_> = q.pattern.nodes().collect(); assert_eq!(nodes.len(), 3); } + + #[test] + fn node_pattern_entity_type_lifted_from_properties() { + let q = parse("MATCH (n:document {entity_type: 'paper'}) RETURN n").unwrap(); + let nodes: Vec<_> = q.pattern.nodes().collect(); + assert_eq!( + nodes[0].entity_type.as_deref(), + Some("paper"), + "entity_type must be lifted into NodePattern.entity_type" + ); + assert!( + !nodes[0].properties.contains_key("entity_type"), + "entity_type must be removed from the properties map after lifting" + ); + } } diff --git a/crates/khive-query/src/parsers/sparql.rs b/crates/khive-query/src/parsers/sparql.rs index 72d49ee3..c393f600 100644 --- a/crates/khive-query/src/parsers/sparql.rs +++ b/crates/khive-query/src/parsers/sparql.rs @@ -348,7 +348,7 @@ fn triples_to_ast( let mut node_kinds: HashMap = HashMap::new(); let mut node_props: HashMap> = HashMap::new(); let mut edges: Vec<(String, String, String, usize, usize)> = Vec::new(); // (src, tgt, rel, min, max) - let mut where_conditions: Vec = Vec::new(); + let mut where_cond_list: Vec = Vec::new(); for triple in triples { match triple.predicate { @@ -378,7 +378,7 @@ fn triples_to_ast( .insert(name, val); } Object::NumberLiteral(val) => { - where_conditions.push(Condition { + where_cond_list.push(Condition { variable: triple.subject, property: name, op: CompareOp::Eq, @@ -395,6 +395,17 @@ fn triples_to_ast( } } + // Fold the flat condition list into a left-associative AND tree. + let where_conditions = where_cond_list + .into_iter() + .fold(WhereExpr::True, |acc, cond| { + let leaf = WhereExpr::Condition(cond); + match acc { + WhereExpr::True => leaf, + other => WhereExpr::And(Box::new(other), Box::new(leaf)), + } + }); + if edges.is_empty() { return Err(QueryError::Parse { message: "no edge patterns found — need at least one :relation between variables" @@ -502,10 +513,13 @@ fn triples_to_ast( let mut elements: Vec = Vec::new(); let first_var = &ordered_edges[0].0; + let mut first_props = node_props.get(first_var).cloned().unwrap_or_default(); + let first_entity_type = first_props.remove("entity_type"); elements.push(PatternElement::Node(NodePattern { variable: Some(first_var.clone()), kind: node_kinds.get(first_var).cloned(), - properties: node_props.get(first_var).cloned().unwrap_or_default(), + entity_type: first_entity_type, + properties: first_props, })); for (_, tgt, rel, min_hops, max_hops) in &ordered_edges { @@ -516,10 +530,13 @@ fn triples_to_ast( min_hops: *min_hops, max_hops: *max_hops, })); + let mut tgt_props = node_props.get(tgt).cloned().unwrap_or_default(); + let tgt_entity_type = tgt_props.remove("entity_type"); elements.push(PatternElement::Node(NodePattern { variable: Some(tgt.clone()), kind: node_kinds.get(tgt).cloned(), - properties: node_props.get(tgt).cloned().unwrap_or_default(), + entity_type: tgt_entity_type, + properties: tgt_props, })); } diff --git a/crates/khive-query/src/validate.rs b/crates/khive-query/src/validate.rs index e3a86d4b..80c4e1c8 100644 --- a/crates/khive-query/src/validate.rs +++ b/crates/khive-query/src/validate.rs @@ -14,9 +14,9 @@ //! `namespace` in node property maps or `WHERE` conditions — the only valid //! source of namespace filtering is `CompileOptions::scopes`. This matches //! ADR-008 §Validation: "never trust query strings to set namespaces." -//! 4. **Traversal depth** is capped at [`MAX_DEPTH`] (10 hops). Requests above -//! the cap are clamped, not rejected — this matches the cap the compiler -//! applies when generating recursive CTEs. +//! 4. **Traversal depth** is limited to [`MAX_DEPTH`] (10 hops). Requests that +//! exceed the cap are rejected with [`QueryError::InvalidInput`] at validation +//! time (ADR-008 §"Depth limits"). use std::collections::HashSet; use std::str::FromStr; @@ -39,9 +39,11 @@ pub fn validate(query: &mut GqlQuery) -> Result<(), QueryError> { /// Validate and normalise an AST in place, returning any warnings generated. /// -/// Currently warns when `max_hops` is clamped to [`MAX_DEPTH`]. +/// Returns an empty `Vec` for forward compatibility; no warning paths +/// are currently emitted. The F048 depth-cap path now returns `InvalidInput` +/// rather than clamping and warning. pub fn validate_with_warnings(query: &mut GqlQuery) -> Result, QueryError> { - let mut warnings = Vec::new(); + let warnings: Vec = Vec::new(); // Pattern variables are bindings — the same variable name appearing twice // would mean "same node/edge" and require alias-equality predicates in @@ -84,6 +86,13 @@ pub fn validate_with_warnings(query: &mut GqlQuery) -> Result, Query } PatternElement::Edge(edge) => { for relation in edge.relations.iter_mut() { + // Synthetic ADR-041 relations (observed_as_*) do not exist + // in the closed EdgeRelation enum — skip taxonomy validation + // for them and leave the string unchanged. The SQL compiler + // handles them via the event_observations join path. + if relation.starts_with("observed_as_") { + continue; + } let parsed = EdgeRelation::from_str(relation) .map_err(|err| QueryError::Validation(err.to_string()))?; *relation = parsed.as_str().to_string(); @@ -112,13 +121,12 @@ pub fn validate_with_warnings(query: &mut GqlQuery) -> Result, Query edge.min_hops, MAX_DEPTH ))); } - // Clamp max_hops to the depth cap; report the narrowing to callers. + // Reject max_hops above the depth cap (ADR-008 §"Depth limits"). if edge.max_hops > MAX_DEPTH { - let requested = edge.max_hops; - edge.max_hops = MAX_DEPTH; - warnings.push(format!( - "Query depth capped at {MAX_DEPTH} hops (requested {requested})" - )); + return Err(QueryError::InvalidInput(format!( + "max_hops {} exceeds the depth cap of {}; reduce the range or use a smaller bound", + edge.max_hops, MAX_DEPTH + ))); } } } @@ -144,13 +152,23 @@ pub fn validate_with_warnings(query: &mut GqlQuery) -> Result, Query } } - for cond in query.where_clause.iter_mut() { + // Walk all leaf conditions in the WHERE expression tree. + let mut validate_err: Option = None; + query.where_clause.for_each_condition_mut(&mut |cond| { + if validate_err.is_some() { + return; + } let is_edge = var_kinds .get(cond.variable.as_str()) .copied() .unwrap_or(VarKind::Node) == VarKind::Edge; - validate_condition(cond, is_edge)?; + if let Err(e) = validate_condition(cond, is_edge) { + validate_err = Some(e); + } + }); + if let Some(e) = validate_err { + return Err(e); } Ok(warnings) @@ -226,24 +244,29 @@ mod tests { } #[test] - fn clamps_depth_above_max() { + fn rejects_depth_above_max() { + // ADR-008 §"Depth limits": exceeding MAX_DEPTH is an InvalidInput error, + // not a silent clamp. let mut q = gql::parse("MATCH (a)-[:extends*1..50]->(b) RETURN b").unwrap(); - validate(&mut q).unwrap(); - let edge = q.pattern.edges().next().unwrap(); - assert_eq!(edge.max_hops, MAX_DEPTH); - assert!(edge.min_hops <= edge.max_hops); + let err = validate(&mut q).unwrap_err(); + assert!( + matches!(err, QueryError::InvalidInput(_)), + "expected InvalidInput, got {err:?}" + ); + assert!( + err.to_string().contains("50"), + "error should mention requested depth: {err}" + ); } #[test] - fn warns_when_clamping_depth_above_max() { + fn rejects_depth_above_max_warnings_path() { + // validate_with_warnings must also reject (not clamp + warn). let mut q = gql::parse("MATCH (a)-[:extends*1..50]->(b) RETURN b").unwrap(); - let warnings = validate_with_warnings(&mut q).unwrap(); - assert_eq!(q.pattern.edges().next().unwrap().max_hops, MAX_DEPTH); + let err = validate_with_warnings(&mut q).unwrap_err(); assert!( - warnings - .iter() - .any(|w| w.contains("Query depth capped at 10")), - "warnings: {warnings:?}" + matches!(err, QueryError::InvalidInput(_)), + "expected InvalidInput, got {err:?}" ); } @@ -284,17 +307,20 @@ mod tests { assert!(err.to_string().contains("related_to"), "msg: {err}"); } + fn first_condition_string_value(q: &GqlQuery) -> String { + match q.where_clause.conditions().next().unwrap().value { + ConditionValue::String(ref s) => s.clone(), + _ => panic!("expected string condition value"), + } + } + #[test] fn unknown_kind_in_where_passes_through() { // Entity kinds are pack-agnostic strings — any kind string is accepted. let mut q = gql::parse("MATCH (a)-[:extends]->(b) WHERE a.kind = 'gizmo' RETURN a").unwrap(); validate(&mut q).unwrap(); - let val = match &q.where_clause[0].value { - ConditionValue::String(s) => s.clone(), - _ => panic!("expected string"), - }; - assert_eq!(val, "gizmo"); + assert_eq!(first_condition_string_value(&q), "gizmo"); } #[test] @@ -303,11 +329,7 @@ mod tests { let mut q = gql::parse("MATCH (a)-[:extends]->(b) WHERE a.kind = 'paper' RETURN a").unwrap(); validate(&mut q).unwrap(); - let val = match &q.where_clause[0].value { - ConditionValue::String(s) => s.clone(), - _ => panic!("expected string"), - }; - assert_eq!(val, "paper"); + assert_eq!(first_condition_string_value(&q), "paper"); } #[test] @@ -316,11 +338,7 @@ mod tests { gql::parse("MATCH (a)-[e:extends]->(b) WHERE e.relation = 'Introduced_By' RETURN a") .unwrap(); validate(&mut q).unwrap(); - let val = match &q.where_clause[0].value { - ConditionValue::String(s) => s.clone(), - _ => panic!("expected string"), - }; - assert_eq!(val, "introduced_by"); + assert_eq!(first_condition_string_value(&q), "introduced_by"); } #[test] @@ -423,13 +441,14 @@ mod tests { } #[test] - fn clamps_max_but_keeps_satisfiable_min() { - // *2..50 — min 2 is satisfiable, max gets clamped to MAX_DEPTH. + fn rejects_max_above_depth_cap_with_satisfiable_min() { + // *2..50 — min 2 is satisfiable but max 50 exceeds MAX_DEPTH; must error. let mut q = gql::parse("MATCH (a)-[:extends*2..50]->(b) RETURN b").unwrap(); - validate(&mut q).unwrap(); - let edge = q.pattern.edges().next().unwrap(); - assert_eq!(edge.min_hops, 2); - assert_eq!(edge.max_hops, MAX_DEPTH); + let err = validate(&mut q).unwrap_err(); + assert!( + matches!(err, QueryError::InvalidInput(_)), + "expected InvalidInput, got {err:?}" + ); } #[test] @@ -439,11 +458,7 @@ mod tests { let mut q = gql::parse("MATCH (a)-[:extends]->(b) WHERE a.relation = 'external' RETURN a").unwrap(); validate(&mut q).unwrap(); - let val = match &q.where_clause[0].value { - ConditionValue::String(s) => s.clone(), - _ => panic!("expected string"), - }; - assert_eq!(val, "external"); + assert_eq!(first_condition_string_value(&q), "external"); } #[test] diff --git a/crates/khive-request/src/lib.rs b/crates/khive-request/src/lib.rs index 1e6f3977..c2be368f 100644 --- a/crates/khive-request/src/lib.rs +++ b/crates/khive-request/src/lib.rs @@ -16,40 +16,114 @@ //! chains, `$prev` substitution, LNDL-style natural-language declarations, //! bash-flavoured redirections — without touching the runtime layering. //! -//! ## Today's syntax (v0.2 — ADR-020) +//! ## Today's syntax (ADR-016) //! -//! - **Function-call form**: `tool_name(arg=value, arg=value)` -//! - **Function-call batch**: `[tool_name(...), tool_name(...)]` +//! - **Single op**: `tool_name(arg=value, arg=value)` — `ExecutionMode::Single` +//! - **Parallel batch**: `[tool_name(...), tool_name(...)]` — `ExecutionMode::Parallel` +//! - **Sequential chain**: `op1(...) | op2(id=$prev.id)` — `ExecutionMode::Chain` //! - **JSON form**: `[{"tool":"...", "args": {...}}, ...]` (or a single object) //! //! Argument values are JSON literals — strings, numbers, booleans, `null`, -//! arrays, objects. Top-level operations inside `[...]` run in parallel by -//! convention (the parser preserves order; the transport drives concurrency). -//! -//! ## Planned (deferred to dedicated ADRs) -//! -//! - Pipe chains for sequential dependent ops (`v1(...) | v2(id=$prev.id)`). -//! - LNDL frontend — parses lact-block source and emits the same `ParsedRequest`. -//! - Bash-style redirection / substitution for ops that produce stream output. +//! arrays, objects. Chain-only: `$prev` and `$prev.field.path` references resolve +//! at dispatch time against the preceding op's result. +use std::collections::BTreeMap; use std::fmt; use serde_json::{Map, Value}; -/// Hard cap on operations per request. ADR-020 §Why-100. +/// Hard cap on operations per request. ADR-016 §Why-100. pub const MAX_OPS: usize = 100; +/// Execution mode for a [`ParsedRequest`] (ADR-016). +/// +/// - `Single`: one operation, no batching. +/// - `Parallel`: operations separated by `,` inside `[...]`; run concurrently, +/// results in input order. +/// - `Chain`: operations separated by `|`; run sequentially, each op may +/// reference the prior op's result via `$prev` / `$prev.field.path`. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ExecutionMode { + /// One operation, no batching or chaining. + Single, + /// `[op1(...), op2(...)]` — parallel, best-effort, independent results. + Parallel, + /// `op1(...) | op2(id=$prev.id)` — sequential, abort-on-failure. + Chain, +} + +/// An argument value in a [`ParsedOp`]. +/// +/// Most arguments are concrete JSON values. In chain ops (ADR-016 §Chain +/// semantics), arguments may reference the preceding op's result via `$prev` +/// or `$prev.dotted.path`. Substitution happens at dispatch time, not at parse +/// time, because the prior result isn't known until runtime. +#[derive(Debug, Clone, PartialEq)] +pub enum ArgValue { + /// A concrete JSON value. + Value(Value), + /// A `$prev` or `$prev.field.path` reference — chain mode only. + /// + /// `path` is the dot-separated field path after `$prev`. Empty string means + /// the whole prior result (`$prev` with no field selector). + PrevRef { path: String }, +} + +impl ArgValue { + /// Returns the contained [`Value`] if this is `ArgValue::Value`. + pub fn as_value(&self) -> Option<&Value> { + match self { + ArgValue::Value(v) => Some(v), + ArgValue::PrevRef { .. } => None, + } + } + + /// Returns `true` if this is a `$prev` reference. + pub fn is_prev_ref(&self) -> bool { + matches!(self, ArgValue::PrevRef { .. }) + } + + /// Resolve a `$prev` reference against a preceding op's result. + /// + /// Returns the extracted field value, or `None` if the path doesn't + /// exist in `prev_result`. Non-`PrevRef` variants return `None`. + pub fn resolve_prev<'a>(&self, prev_result: &'a Value) -> Option<&'a Value> { + let ArgValue::PrevRef { path } = self else { + return None; + }; + if path.is_empty() { + return Some(prev_result); + } + let mut cur = prev_result; + for segment in path.split('.') { + cur = cur.get(segment)?; + } + Some(cur) + } +} + /// A single parsed operation: tool name + named argument bag. +/// +/// Arguments may be concrete [`ArgValue::Value`]s or `$prev` references +/// ([`ArgValue::PrevRef`]) that the dispatcher resolves against the prior op's +/// result (chain mode only). #[derive(Debug, Clone, PartialEq)] pub struct ParsedOp { pub tool: String, - pub args: Map, + pub args: BTreeMap, } -/// Result of parsing a `request` input string. +/// Result of parsing a `request` input string (ADR-016). +/// +/// The `mode` field tells the dispatcher how to execute the operations: +/// - `Single`: dispatch the one op, wrap in a single-element envelope. +/// - `Parallel`: dispatch all ops concurrently via `join_all`, collect in order. +/// - `Chain`: dispatch ops sequentially; substitute `$prev` references between +/// ops; abort remaining ops when any op or substitution fails. #[derive(Debug, Clone, PartialEq)] pub struct ParsedRequest { pub ops: Vec, + pub mode: ExecutionMode, } /// Parser error — surfaced as `invalid_params` at the MCP boundary. @@ -85,6 +159,12 @@ pub enum DslError { UnclosedBracket { kind: char, }, + /// `$prev` reference used outside a chain context. + PrevRefOutsideChain { + pos: usize, + }, + /// Mixing `,` and `|` at the top level. + MixedSeparators, } impl fmt::Display for DslError { @@ -119,6 +199,18 @@ impl fmt::Display for DslError { DslError::UnclosedBracket { kind } => { write!(f, "unclosed bracket: {kind:?} has no matching close") } + DslError::PrevRefOutsideChain { pos } => { + write!( + f, + "at position {pos}: $prev reference is only valid in chain (|) mode" + ) + } + DslError::MixedSeparators => { + write!( + f, + "cannot mix ',' (parallel) and '|' (chain) separators at the top level" + ) + } } } } @@ -147,32 +239,77 @@ pub fn parse_request(input: &str) -> Result { return parse_json_form(trimmed); } - // Function-call batch. + // Function-call batch `[...]` — parallel. if first == b'[' { return parse_fn_batch(trimmed); } - // Single op. + // Chain or single: starts with an identifier. + // Parse the first op, then check for `|` to detect chain mode. let mut p = Parser::new(trimmed); - let op = p.parse_op()?; + let first_op = p.parse_op()?; p.skip_ws(); + + if p.eof() { + // Single op — no separator follows. + return Ok(ParsedRequest { + ops: vec![first_op], + mode: ExecutionMode::Single, + }); + } + + if p.peek() == Some('|') { + // Chain mode: `op1 | op2 | ...` + return parse_chain_tail(p, first_op); + } + + // Unexpected trailing content after a single op. + Err(DslError::UnexpectedChar { + pos: p.pos, + found: p.peek().unwrap(), + expected: "'|' or end of input", + }) +} + +/// Parse the rest of a chain after the first op has been consumed. +/// +/// Called when we've seen `first_op` followed by `|`. Parses one or more +/// `| op` segments and returns a `Chain` request. +fn parse_chain_tail(mut p: Parser<'_>, first_op: ParsedOp) -> Result { + let mut ops = vec![first_op]; + while p.peek() == Some('|') { + if ops.len() >= MAX_OPS { + return Err(DslError::TooManyOps { + count: ops.len() + 1, + max: MAX_OPS, + }); + } + p.advance(1); // consume '|' + p.skip_ws(); + let op = p.parse_op()?; + ops.push(op); + p.skip_ws(); + } if !p.eof() { return Err(DslError::UnexpectedChar { pos: p.pos, found: p.peek().unwrap(), - expected: "end of input", + expected: "'|' or end of input", }); } - Ok(ParsedRequest { ops: vec![op] }) + Ok(ParsedRequest { + ops, + mode: ExecutionMode::Chain, + }) } fn parse_json_form(input: &str) -> Result { let v: Value = serde_json::from_str(input).map_err(|e| DslError::InvalidJson { error: e.to_string(), })?; - let arr: Vec = match v { - Value::Array(arr) => arr, - Value::Object(_) => vec![v], + let (arr, is_single) = match v { + Value::Array(arr) => (arr, false), + Value::Object(_) => (vec![v], true), other => { return Err(DslError::InvalidJson { error: format!("expected object or array of objects, got {other}"), @@ -201,7 +338,7 @@ fn parse_json_form(input: &str) -> Result { .get("args") .cloned() .unwrap_or_else(|| Value::Object(Map::new())); - let args = match args { + let args_map = match args { Value::Object(m) => m, other => { return Err(DslError::InvalidJson { @@ -209,9 +346,19 @@ fn parse_json_form(input: &str) -> Result { }) } }; + // JSON form does not support $prev references — all args are Values. + let args: BTreeMap = args_map + .into_iter() + .map(|(k, v)| (k, ArgValue::Value(v))) + .collect(); ops.push(ParsedOp { tool, args }); } - Ok(ParsedRequest { ops }) + let mode = if is_single { + ExecutionMode::Single + } else { + ExecutionMode::Parallel + }; + Ok(ParsedRequest { ops, mode }) } fn parse_fn_batch(input: &str) -> Result { @@ -221,7 +368,10 @@ fn parse_fn_batch(input: &str) -> Result { let mut ops = Vec::new(); if p.peek() == Some(']') { p.advance(1); - return Ok(ParsedRequest { ops }); + return Ok(ParsedRequest { + ops, + mode: ExecutionMode::Parallel, + }); } loop { if ops.len() >= MAX_OPS { @@ -260,7 +410,10 @@ fn parse_fn_batch(input: &str) -> Result { expected: "end of input", }); } - Ok(ParsedRequest { ops }) + Ok(ParsedRequest { + ops, + mode: ExecutionMode::Parallel, + }) } // ── recursive-descent parser ──────────────────────────────────────────────── @@ -347,7 +500,7 @@ impl<'a> Parser<'a> { } self.expect_char('(')?; self.skip_ws(); - let mut args: Map = Map::new(); + let mut args: BTreeMap = BTreeMap::new(); if self.peek() == Some(')') { self.advance(1); return Ok(ParsedOp { tool, args }); @@ -356,11 +509,11 @@ impl<'a> Parser<'a> { let name = self.parse_identifier()?; self.expect_char('=')?; self.skip_ws(); - let value = self.parse_value()?; + let arg_val = self.parse_arg_value()?; if args.contains_key(&name) { return Err(DslError::DuplicateArg { name }); } - args.insert(name, value); + args.insert(name, arg_val); self.skip_ws(); match self.peek() { Some(',') => { @@ -383,6 +536,49 @@ impl<'a> Parser<'a> { } } + /// Parse an argument value — either a `$prev` reference or a JSON literal. + fn parse_arg_value(&mut self) -> Result { + self.skip_ws(); + if self.peek() == Some('$') { + return self.parse_prev_ref(); + } + let v = self.parse_value()?; + Ok(ArgValue::Value(v)) + } + + /// Parse a `$prev` or `$prev.field.path` reference. + /// + /// Grammar: `$prev` optionally followed by `.identifier(.identifier)*` + fn parse_prev_ref(&mut self) -> Result { + let start = self.pos; + // Consume `$` + self.advance(1); + // Must be followed by `prev` + let ident = self + .parse_identifier() + .map_err(|_| DslError::InvalidValue { + pos: start, + error: "expected '$prev' — '$' must be followed by 'prev'".into(), + })?; + if ident != "prev" { + return Err(DslError::InvalidValue { + pos: start, + error: format!("expected '$prev', found '${}'", ident), + }); + } + // Optional dot-path + let mut path = String::new(); + while self.peek() == Some('.') { + self.advance(1); // consume '.' + let segment = self.parse_identifier()?; + if !path.is_empty() { + path.push('.'); + } + path.push_str(&segment); + } + Ok(ArgValue::PrevRef { path }) + } + fn parse_value(&mut self) -> Result { self.skip_ws(); let start = self.pos; @@ -492,25 +688,38 @@ mod tests { use super::*; use serde_json::json; + fn req(s: &str) -> ParsedRequest { + parse_request(s).unwrap_or_else(|e| panic!("parse({s:?}) failed: {e}")) + } + fn ops(s: &str) -> Vec { - parse_request(s) - .unwrap_or_else(|e| panic!("parse({s:?}) failed: {e}")) - .ops + req(s).ops + } + + /// Extract the concrete `Value` from an `ArgValue::Value`, panicking on `PrevRef`. + fn val(arg: &ArgValue) -> &Value { + match arg { + ArgValue::Value(v) => v, + ArgValue::PrevRef { path } => { + panic!("expected Value, got PrevRef {{ path: {path:?} }}") + } + } } #[test] fn single_op_no_args() { - let v = ops("next()"); - assert_eq!(v.len(), 1); - assert_eq!(v[0].tool, "next"); - assert!(v[0].args.is_empty()); + let r = req("next()"); + assert_eq!(r.mode, ExecutionMode::Single); + assert_eq!(r.ops.len(), 1); + assert_eq!(r.ops[0].tool, "next"); + assert!(r.ops[0].args.is_empty()); } #[test] fn single_op_with_string_arg() { let v = ops(r#"assign(title="ship release")"#); assert_eq!(v[0].tool, "assign"); - assert_eq!(v[0].args["title"], json!("ship release")); + assert_eq!(val(&v[0].args["title"]), &json!("ship release")); } #[test] @@ -519,60 +728,63 @@ mod tests { r#"create(kind="entity", entity_kind="concept", name="LoRA", weight=0.9, active=true)"#, ); assert_eq!(v[0].tool, "create"); - assert_eq!(v[0].args["kind"], json!("entity")); - assert_eq!(v[0].args["weight"], json!(0.9)); - assert_eq!(v[0].args["active"], json!(true)); + assert_eq!(val(&v[0].args["kind"]), &json!("entity")); + assert_eq!(val(&v[0].args["weight"]), &json!(0.9)); + assert_eq!(val(&v[0].args["active"]), &json!(true)); } #[test] fn batch_three_ops() { - let v = ops( + let r = req( r#"[create(kind="entity", name="A"), create(kind="entity", name="B"), link(source_id="x", target_id="y", relation="extends")]"#, ); - assert_eq!(v.len(), 3); - assert_eq!(v[0].tool, "create"); - assert_eq!(v[2].tool, "link"); - assert_eq!(v[2].args["relation"], json!("extends")); + assert_eq!(r.mode, ExecutionMode::Parallel); + assert_eq!(r.ops.len(), 3); + assert_eq!(r.ops[0].tool, "create"); + assert_eq!(r.ops[2].tool, "link"); + assert_eq!(val(&r.ops[2].args["relation"]), &json!("extends")); } #[test] fn empty_batch_is_legal() { - let v = ops("[]"); - assert!(v.is_empty()); + let r = req("[]"); + assert_eq!(r.mode, ExecutionMode::Parallel); + assert!(r.ops.is_empty()); } #[test] fn nested_array_and_object_values() { let v = ops(r#"assign(title="x", tags=["a","b"], properties={"k":"v","n":1})"#); - assert_eq!(v[0].args["tags"], json!(["a", "b"])); - assert_eq!(v[0].args["properties"], json!({"k": "v", "n": 1})); + assert_eq!(val(&v[0].args["tags"]), &json!(["a", "b"])); + assert_eq!(val(&v[0].args["properties"]), &json!({"k": "v", "n": 1})); } #[test] fn string_with_comma_and_paren_inside() { let v = ops(r#"assign(title="hello, world (now)")"#); - assert_eq!(v[0].args["title"], json!("hello, world (now)")); + assert_eq!(val(&v[0].args["title"]), &json!("hello, world (now)")); } #[test] fn string_with_escaped_quote() { let v = ops(r#"assign(title="he said \"hi\"")"#); - assert_eq!(v[0].args["title"], json!("he said \"hi\"")); + assert_eq!(val(&v[0].args["title"]), &json!("he said \"hi\"")); } #[test] fn null_and_negative_number() { let v = ops(r#"update(id="x", description=null, weight=-0.5)"#); - assert_eq!(v[0].args["description"], json!(null)); - assert_eq!(v[0].args["weight"], json!(-0.5)); + assert_eq!(val(&v[0].args["description"]), &json!(null)); + assert_eq!(val(&v[0].args["weight"]), &json!(-0.5)); } #[test] fn json_form_batch_parses() { - let v = ops(r#"[{"tool":"next","args":{}}, {"tool":"complete","args":{"id":"abc"}}]"#); - assert_eq!(v.len(), 2); - assert_eq!(v[1].tool, "complete"); - assert_eq!(v[1].args["id"], json!("abc")); + let r = req(r#"[{"tool":"next","args":{}}, {"tool":"complete","args":{"id":"abc"}}]"#); + assert_eq!(r.mode, ExecutionMode::Parallel); + assert_eq!(r.ops.len(), 2); + assert_eq!(r.ops[1].tool, "complete"); + assert_eq!(val(&r.ops[1].args["id"]), &json!("abc")); } #[test] @@ -591,9 +803,10 @@ mod tests { #[test] fn json_form_single_object_is_treated_as_one_op() { - let v = ops(r#"{"tool":"next","args":{}}"#); - assert_eq!(v.len(), 1); - assert_eq!(v[0].tool, "next"); + let r = req(r#"{"tool":"next","args":{}}"#); + assert_eq!(r.mode, ExecutionMode::Single); + assert_eq!(r.ops.len(), 1); + assert_eq!(r.ops[0].tool, "next"); } #[test] @@ -646,7 +859,7 @@ mod tests { let v = ops(r#"recall(query="test")"#); assert_eq!(v.len(), 1); assert_eq!(v[0].tool, "recall"); - assert_eq!(v[0].args["query"], json!("test")); + assert_eq!(val(&v[0].args["query"]), &json!("test")); } #[test] @@ -654,18 +867,19 @@ mod tests { let v = ops(r#"search(query="test", limit=5)"#); assert_eq!(v.len(), 1); assert_eq!(v[0].tool, "search"); - assert_eq!(v[0].args["query"], json!("test")); - assert_eq!(v[0].args["limit"], json!(5)); + assert_eq!(val(&v[0].args["query"]), &json!("test")); + assert_eq!(val(&v[0].args["limit"]), &json!(5)); } #[test] fn parallel_recall_and_inbox() { - let v = ops(r#"[recall(query="x"), inbox()]"#); - assert_eq!(v.len(), 2); - assert_eq!(v[0].tool, "recall"); - assert_eq!(v[0].args["query"], json!("x")); - assert_eq!(v[1].tool, "inbox"); - assert!(v[1].args.is_empty()); + let r = req(r#"[recall(query="x"), inbox()]"#); + assert_eq!(r.mode, ExecutionMode::Parallel); + assert_eq!(r.ops.len(), 2); + assert_eq!(r.ops[0].tool, "recall"); + assert_eq!(val(&r.ops[0].args["query"]), &json!("x")); + assert_eq!(r.ops[1].tool, "inbox"); + assert!(r.ops[1].args.is_empty()); } // ── JSON form edge cases ─────────────────────────────────────────────────── @@ -697,8 +911,8 @@ mod tests { fn dotted_tool_with_args() { let v = ops(r#"recall.candidates(query="test", limit=5)"#); assert_eq!(v[0].tool, "recall.candidates"); - assert_eq!(v[0].args["query"], json!("test")); - assert_eq!(v[0].args["limit"], json!(5)); + assert_eq!(val(&v[0].args["query"]), &json!("test")); + assert_eq!(val(&v[0].args["limit"]), &json!(5)); } #[test] @@ -727,12 +941,120 @@ mod tests { #[test] fn boolean_false_as_arg_value() { let v = ops("flag(active=false)"); - assert_eq!(v[0].args["active"], json!(false)); + assert_eq!(val(&v[0].args["active"]), &json!(false)); } #[test] fn unicode_string_arg_preserved() { let v = ops(r#"assign(title="café")"#); - assert_eq!(v[0].args["title"], json!("café")); + assert_eq!(val(&v[0].args["title"]), &json!("café")); + } + + // ── Chain mode (ADR-016) ────────────────────────────────────────────────── + + #[test] + fn chain_two_ops_with_prev_ref() { + let r = req( + r#"create(kind="entity", entity_kind="concept", name="A") | link(source_id=$prev.id, target_id="abc", relation="extends")"#, + ); + assert_eq!(r.mode, ExecutionMode::Chain); + assert_eq!(r.ops.len(), 2); + assert_eq!(r.ops[0].tool, "create"); + assert_eq!(r.ops[1].tool, "link"); + // The second op's source_id should be a PrevRef + assert_eq!( + r.ops[1].args["source_id"], + ArgValue::PrevRef { path: "id".into() } + ); + // target_id is a concrete value + assert_eq!(val(&r.ops[1].args["target_id"]), &json!("abc")); + } + + #[test] + fn chain_three_ops_mode() { + let r = req( + r#"create(kind="entity", name="A") | link(source_id=$prev.id, target_id="b", relation="extends") | update(id=$prev.id, description="desc")"#, + ); + assert_eq!(r.mode, ExecutionMode::Chain); + assert_eq!(r.ops.len(), 3); + assert_eq!(r.ops[2].args["id"], ArgValue::PrevRef { path: "id".into() }); + } + + #[test] + fn chain_prev_no_field_selector() { + // $prev alone (no dot path) refers to the whole prior result. + let r = req(r#"next() | update(id=$prev)"#); + assert_eq!(r.mode, ExecutionMode::Chain); + assert_eq!(r.ops[1].args["id"], ArgValue::PrevRef { path: "".into() }); + } + + #[test] + fn chain_prev_deep_path() { + let r = req( + r#"create(kind="entity", name="A") | link(source_id=$prev.result.id, target_id="b", relation="extends")"#, + ); + assert_eq!(r.mode, ExecutionMode::Chain); + assert_eq!( + r.ops[1].args["source_id"], + ArgValue::PrevRef { + path: "result.id".into() + } + ); + } + + #[test] + fn single_op_mode() { + let r = req("next()"); + assert_eq!(r.mode, ExecutionMode::Single); + } + + #[test] + fn chain_too_many_ops_rejected() { + let mut s = String::from("next()"); + for _ in 0..MAX_OPS { + s.push_str(" | next()"); + } + let err = parse_request(&s).unwrap_err(); + assert!(matches!(err, DslError::TooManyOps { .. })); + } + + // ── ArgValue helpers ────────────────────────────────────────────────────── + + #[test] + fn arg_value_resolve_prev_simple() { + let prev = json!({"id": "abc-123", "name": "A"}); + let r = ArgValue::PrevRef { path: "id".into() }; + assert_eq!(r.resolve_prev(&prev), Some(&json!("abc-123"))); + } + + #[test] + fn arg_value_resolve_prev_empty_path() { + let prev = json!({"id": "x"}); + let r = ArgValue::PrevRef { path: "".into() }; + assert_eq!(r.resolve_prev(&prev), Some(&prev)); + } + + #[test] + fn arg_value_resolve_prev_nested_path() { + let prev = json!({"result": {"id": "nested-id"}}); + let r = ArgValue::PrevRef { + path: "result.id".into(), + }; + assert_eq!(r.resolve_prev(&prev), Some(&json!("nested-id"))); + } + + #[test] + fn arg_value_resolve_prev_missing_field_returns_none() { + let prev = json!({"id": "x"}); + let r = ArgValue::PrevRef { + path: "nonexistent".into(), + }; + assert_eq!(r.resolve_prev(&prev), None); + } + + #[test] + fn arg_value_value_returns_none_for_resolve_prev() { + let r = ArgValue::Value(json!("hello")); + assert_eq!(r.resolve_prev(&json!({})), None); } } diff --git a/crates/khive-retrieval/Cargo.toml b/crates/khive-retrieval/Cargo.toml index 19a761e2..8297d847 100644 --- a/crates/khive-retrieval/Cargo.toml +++ b/crates/khive-retrieval/Cargo.toml @@ -11,15 +11,15 @@ categories.workspace = true description = "Hybrid retrieval composer (HNSW + BM25 + fusion + graph + cross-encoder) with deterministic scoring" [dependencies] -khive-hnsw = { version = "0.2.0", path = "../khive-hnsw" } -khive-bm25 = { version = "0.2.0", path = "../khive-bm25" } -khive-fusion = { version = "0.2.0", path = "../khive-fusion" } -khive-score = { version = "0.2.0", path = "../khive-score" } -khive-types = { version = "0.2.0", path = "../khive-types" } -khive-fold = { version = "0.2.0", path = "../khive-fold", optional = true } -khive-storage = { version = "0.2.0", path = "../khive-storage", optional = true } -khive-db = { version = "0.2.0", path = "../khive-db" } -khive-gate = { version = "0.2.0", path = "../khive-gate", optional = true } +khive-hnsw = { version = "0.2.1", path = "../khive-hnsw" } +khive-bm25 = { version = "0.2.1", path = "../khive-bm25" } +khive-fusion = { version = "0.2.1", path = "../khive-fusion" } +khive-score = { version = "0.2.1", path = "../khive-score" } +khive-types = { version = "0.2.1", path = "../khive-types" } +khive-fold = { version = "0.2.1", path = "../khive-fold", optional = true } +khive-storage = { version = "0.2.1", path = "../khive-storage", optional = true } +khive-db = { version = "0.2.1", path = "../khive-db" } +khive-gate = { version = "0.2.1", path = "../khive-gate", optional = true } lattice-embed = { workspace = true } serde = { workspace = true } @@ -39,15 +39,14 @@ rand = { version = "0.8", optional = true } default = [] # Policy-based access control for search results (uses khive-gate API) policy = ["khive-gate"] -# HNSW checkpoint integration with khive-fold -# Note: khive_hnsw::HnswCheckpoint/HnswCheckpointStore depend on khive_fold::Checkpoint -# which doesn't exist in the current khive-fold API. Those re-exports are gated out -# until the khive-fold Checkpoint trait is ported. -checkpoint = ["khive-fold"] +# HNSW checkpoint integration with khive-fold (ADR-030 F167) +# Enables HnswCheckpoint/HnswCheckpointStore re-exports and khive-hnsw checkpoint support. +checkpoint = ["khive-fold", "khive-hnsw/checkpoint"] # SQLite-based persistence for HNSW and BM25 indexes persist = ["rusqlite", "tracing", "rand"] -# Adapters bridging khive-storage backends (sqlite-vec, FTS5) to retrieval search traits -storage-adapters = ["khive-storage"] +# Adapters bridging khive-storage backends (sqlite-vec, FTS5) to retrieval search traits. +# Enables the vectors feature on khive-db so sqlite-vec is loaded for tests. +storage-adapters = ["khive-storage", "khive-db/vectors"] # Native cross-encoder reranking (deferred until khive-inference is ported) native-rerank = [] # Native embedding service (delegated to lattice-embed; reserved for future feature-gating) diff --git a/crates/khive-retrieval/src/adapters/mod.rs b/crates/khive-retrieval/src/adapters/mod.rs index 479e0fd2..bcad7b45 100644 --- a/crates/khive-retrieval/src/adapters/mod.rs +++ b/crates/khive-retrieval/src/adapters/mod.rs @@ -106,10 +106,12 @@ impl VectorSearch for StorageVectorSearch { top_k: usize, ) -> Result> { let request = VectorSearchRequest { - query_embedding: embedding.to_vec(), + query_vectors: vec![embedding.to_vec()], top_k: top_k as u32, namespace: None, kind: None, + filter: None, + backend_hints: None, }; let hits = self @@ -206,11 +208,23 @@ mod tests { let id1 = Uuid::new_v4(); let id2 = Uuid::new_v4(); store - .insert(id1, SubstrateKind::Entity, "test", vec![1.0, 0.0, 0.0]) + .insert( + id1, + SubstrateKind::Entity, + "local", + "content", + vec![vec![1.0, 0.0, 0.0]], + ) .await .unwrap(); store - .insert(id2, SubstrateKind::Entity, "test", vec![0.0, 1.0, 0.0]) + .insert( + id2, + SubstrateKind::Entity, + "local", + "content", + vec![vec![0.0, 1.0, 0.0]], + ) .await .unwrap(); @@ -236,8 +250,9 @@ mod tests { .insert( Uuid::new_v4(), SubstrateKind::Entity, - "test", - vec![1.0, 0.0, 0.0], + "local", + "content", + vec![vec![1.0, 0.0, 0.0]], ) .await .unwrap(); @@ -267,7 +282,13 @@ mod tests { let id = Uuid::new_v4(); store - .insert(id, SubstrateKind::Entity, "test", vec![1.0, 0.0, 0.0]) + .insert( + id, + SubstrateKind::Entity, + "local", + "content", + vec![vec![1.0, 0.0, 0.0]], + ) .await .unwrap(); @@ -413,7 +434,13 @@ mod tests { // Insert into both stores vec_store - .insert(id, SubstrateKind::Note, "test", vec![1.0, 0.0, 0.0]) + .insert( + id, + SubstrateKind::Note, + "local", + "content", + vec![vec![1.0, 0.0, 0.0]], + ) .await .unwrap(); text_store diff --git a/crates/khive-retrieval/src/graph/bfs.rs b/crates/khive-retrieval/src/graph/bfs.rs index e4c5b1d1..f85f5dd7 100644 --- a/crates/khive-retrieval/src/graph/bfs.rs +++ b/crates/khive-retrieval/src/graph/bfs.rs @@ -57,10 +57,10 @@ use super::types::{PathNode, TraversalOptions, MAX_TRAVERSAL_DEPTH, MAX_TRAVERSA /// } /// ``` /// -/// **PROOF CORRESPONDENCE**: `Lion.Retrieval.Graph.bfs_terminates` +/// **PROOF CORRESPONDENCE**: `khive.Retrieval.Graph.bfs_terminates` /// Queue shrinks each iteration; visited set prevents re-enqueue; terminates when queue empty. /// -/// **PROOF CORRESPONDENCE**: `Lion.Retrieval.Graph.bfs_complete` +/// **PROOF CORRESPONDENCE**: `khive.Retrieval.Graph.bfs_complete` /// All reachable vertices within max_depth are visited; BFS explores level-by-level. pub async fn bfs_traverse( store: &S, @@ -75,7 +75,7 @@ pub async fn bfs_traverse( .min(MAX_TRAVERSAL_RESULTS); let min_weight = options.min_weight.unwrap_or(f64::NEG_INFINITY); - // **PROOF CORRESPONDENCE**: `Lion.Retrieval.Graph.visited_mono` + // **PROOF CORRESPONDENCE**: `khive.Retrieval.Graph.visited_mono` // Visited set only grows (insert-only); never shrinks during traversal. // EntityRef implements Hash + Eq, enabling direct use as HashMap key. let mut visited: HashSet = HashSet::new(); diff --git a/crates/khive-retrieval/src/graph/dfs.rs b/crates/khive-retrieval/src/graph/dfs.rs index 5bed7156..23cf371b 100644 --- a/crates/khive-retrieval/src/graph/dfs.rs +++ b/crates/khive-retrieval/src/graph/dfs.rs @@ -52,7 +52,7 @@ use super::types::{PathNode, TraversalOptions, MAX_TRAVERSAL_DEPTH, MAX_TRAVERSA /// let nodes = dfs_traverse(&store, &ctx, start_ref, &options).await?; /// ``` /// -/// **PROOF CORRESPONDENCE**: `Lion.Retrieval.Graph.dfs_terminates_bound` +/// **PROOF CORRESPONDENCE**: `khive.Retrieval.Graph.dfs_terminates_bound` /// Each vertex visited at most once; |visited| bounded by |V|; stack pops exceed pushes eventually. pub async fn dfs_traverse( store: &S, @@ -67,7 +67,7 @@ pub async fn dfs_traverse( .min(MAX_TRAVERSAL_RESULTS); let min_weight = options.min_weight.unwrap_or(f64::NEG_INFINITY); - // **PROOF CORRESPONDENCE**: `Lion.Retrieval.Graph.visited_mono` + // **PROOF CORRESPONDENCE**: `khive.Retrieval.Graph.visited_mono` // Visited set only grows (insert-only); never shrinks during traversal. // EntityRef implements Hash + Eq, enabling direct use as HashMap key. let mut visited: HashSet = HashSet::new(); diff --git a/crates/khive-retrieval/src/lib.rs b/crates/khive-retrieval/src/lib.rs index 60e61287..ed5e28b8 100644 --- a/crates/khive-retrieval/src/lib.rs +++ b/crates/khive-retrieval/src/lib.rs @@ -146,14 +146,13 @@ pub use khive_hnsw::{ DistanceMetric, HnswCheckpointConfig, HnswConfig, HnswIndex, HnswSearchContext, HnswSnapshot, NodeId, RebuildStats, TombstoneStats, }; -// TODO(port-checkpoint): HnswCheckpoint/HnswCheckpointStore depend on khive_fold::Checkpoint -// which doesn't exist in the current khive-fold API. Re-enable when ported. -// #[cfg(feature = "checkpoint")] -// pub use khive_hnsw::{HnswCheckpoint, HnswCheckpointStore}; +// Formal proof: khive.Retrieval.HNSW.checkpoint_correctness pub use hybrid::{ fuse_search_results, DualIndexConfig, DualIndexRouter, DualIndexStrategy, HybridConfig, HybridSearcher, KeywordSearch, Query, Reranker, VectorSearch, }; +#[cfg(feature = "checkpoint")] +pub use khive_hnsw::{HnswCheckpoint, HnswCheckpointStore}; // TODO(port-rerank): native cross-encoder reranking deferred; khive-inference not ported yet // #[cfg(feature = "native-rerank")] // pub use hybrid::{CrossEncoderScorer, NativeCrossEncoderReranker, RerankDocumentResolver}; diff --git a/crates/khive-runtime/Cargo.toml b/crates/khive-runtime/Cargo.toml index e3623dd1..ee41fb8c 100644 --- a/crates/khive-runtime/Cargo.toml +++ b/crates/khive-runtime/Cargo.toml @@ -11,13 +11,13 @@ categories.workspace = true description = "Composable Service API: entity/note CRUD, graph traversal, hybrid search, curation." [dependencies] -khive-types = { version = "0.2.0", path = "../khive-types", features = ["serde"] } -khive-storage = { version = "0.2.0", path = "../khive-storage" } -khive-score = { version = "0.2.0", path = "../khive-score" } -khive-fold = { version = "0.2.0", path = "../khive-fold" } -khive-db = { version = "0.2.0", path = "../khive-db", features = ["vectors"] } -khive-query = { version = "0.2.0", path = "../khive-query" } -khive-gate = { version = "0.2.0", path = "../khive-gate" } +khive-types = { version = "0.2.1", path = "../khive-types", features = ["serde"] } +khive-storage = { version = "0.2.1", path = "../khive-storage" } +khive-score = { version = "0.2.1", path = "../khive-score" } +khive-fold = { version = "0.2.1", path = "../khive-fold" } +khive-db = { version = "0.2.1", path = "../khive-db", features = ["vectors"] } +khive-query = { version = "0.2.1", path = "../khive-query" } +khive-gate = { version = "0.2.1", path = "../khive-gate" } inventory = { workspace = true } tokio = { workspace = true } async-trait = { workspace = true } diff --git a/crates/khive-runtime/src/curation.rs b/crates/khive-runtime/src/curation.rs index c9a9c95b..a2bad299 100644 --- a/crates/khive-runtime/src/curation.rs +++ b/crates/khive-runtime/src/curation.rs @@ -13,9 +13,10 @@ use uuid::Uuid; use khive_db::SqliteError; use khive_storage::types::{EdgeFilter, TextDocument}; use khive_storage::{EdgeRelation, Entity, SubstrateKind}; +use khive_types::EventKind; use crate::error::{RuntimeError, RuntimeResult}; -use crate::runtime::KhiveRuntime; +use crate::runtime::{KhiveRuntime, NamespaceToken}; // --------------------------------------------------------------------------- // Public types @@ -46,10 +47,10 @@ pub struct EntityPatch { pub tags: Option>, } -/// Strategy used when merging two entities. +/// Policy used when deduplicating two entities. #[derive(Clone, Copy, Debug, Default, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "snake_case")] -pub enum MergeStrategy { +pub enum EntityDedupMergePolicy { /// `into` values win on conflict. Tags are unioned. Properties from `from` fill in /// keys that `into` doesn't have. This is the default. #[default] @@ -60,7 +61,17 @@ pub enum MergeStrategy { Union, } -/// Result returned by `merge_entity`. +/// Strategy for merging note content when two notes are combined. +#[derive(Clone, Copy, Debug, Default, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum ContentMergeStrategy { + #[default] + Append, + PreferInto, + PreferFrom, +} + +/// Result returned by `merge_entity` / `merge_note`. #[derive(Clone, Debug, Serialize, Deserialize)] pub struct MergeSummary { pub kept_id: Uuid, @@ -68,6 +79,56 @@ pub struct MergeSummary { pub edges_rewired: usize, pub properties_merged: usize, pub tags_unioned: usize, + pub content_appended: bool, + pub dry_run: bool, +} + +/// Patch for `update_edge`. Only `Some(_)` fields are applied; `None` means "leave unchanged". +/// +/// For `properties` — replacement semantics (not deep merge): `Some(value)` replaces +/// the entire metadata object. `None` leaves metadata unchanged. +#[derive(Clone, Debug, Default)] +pub struct EdgePatch { + pub relation: Option, + pub weight: Option, + pub properties: Option, +} + +/// Patch for `update_note`. Only `Some(_)` fields are applied; `None` means "leave unchanged". +/// +/// For `salience`/`decay_factor`: +/// - `None` (outer) — leave unchanged +/// - `Some(None)` — clear the value +/// - `Some(Some(v))` — set to v +#[derive(Clone, Debug, Default)] +pub struct NotePatch { + pub name: Option>, + pub content: Option, + pub salience: Option>, + pub decay_factor: Option>, + pub properties: Option, + pub(crate) kind_status: Option, +} + +impl NotePatch { + /// Construct a `NotePatch` from the public fields only. + /// Use this from external crates; `kind_status` is set to `None`. + pub fn new( + name: Option>, + content: Option, + salience: Option>, + decay_factor: Option>, + properties: Option, + ) -> Self { + Self { + name, + content, + salience, + decay_factor, + properties, + kind_status: None, + } + } } /// Filter for `list_edges` / `count_edges`. @@ -108,46 +169,70 @@ impl KhiveRuntime { /// namespace. This enforces ADR-007 namespace isolation at the runtime layer. pub async fn update_entity( &self, - namespace: Option<&str>, + token: &NamespaceToken, id: Uuid, patch: EntityPatch, ) -> RuntimeResult { - let store = self.entities(namespace)?; + let store = self.entities(token)?; let mut entity = store .get_entity(id) .await? .ok_or_else(|| RuntimeError::NotFound(format!("entity {id}")))?; - if entity.namespace != self.ns(namespace) { - return Err(RuntimeError::NotFound(format!("entity {id}"))); - } + self.ensure_namespace(&entity.namespace, token, id)?; let mut text_changed = false; + let mut changed_fields: Vec<&'static str> = Vec::new(); if let Some(name) = patch.name { text_changed |= entity.name != name; entity.name = name; + changed_fields.push("name"); } if let Some(desc_patch) = patch.description { text_changed |= entity.description != desc_patch; entity.description = desc_patch; + changed_fields.push("description"); } if let Some(props) = patch.properties { - let (merged, _) = - merge_properties(&entity.properties, &Some(props), MergeStrategy::PreferFrom); + let (merged, _) = merge_properties( + &entity.properties, + &Some(props), + EntityDedupMergePolicy::PreferFrom, + ); entity.properties = merged; + changed_fields.push("properties"); } if let Some(tags) = patch.tags { entity.tags = tags; + changed_fields.push("tags"); } entity.updated_at = chrono::Utc::now().timestamp_micros(); store.upsert_entity(entity.clone()).await?; if text_changed { - self.reindex_entity(namespace, &entity).await?; + self.reindex_entity(token, &entity).await?; } + let event_store = self.events(token)?; + let event = khive_storage::event::Event::new( + entity.namespace.clone(), + "update", + EventKind::EntityUpdated, + SubstrateKind::Entity, + "", + ) + .with_target(entity.id) + .with_payload(serde_json::json!({ + "id": entity.id, + "namespace": entity.namespace, + "changed_fields": changed_fields, + })); + event_store.append_event(event).await.map_err(|e| { + RuntimeError::Internal(format!("update_entity: event store write failed: {e}")) + })?; + Ok(entity) } @@ -155,7 +240,9 @@ impl KhiveRuntime { /// /// All edges incident to `from_id` are rewired to `into_id`. Self-loops that would /// result from the rewire are dropped. Properties and tags are merged per `strategy`. - /// `from_id` is hard-deleted and removed from indexes. Returns a summary. + /// `from_id` is tombstoned with merge provenance and removed from indexes. Returns a summary. + /// + /// If `dry_run` is true, computes and returns the planned summary without mutating any rows. /// /// Atomic: all SQL (entity reads/writes, edge rewires, FTS updates, vec-index delete) /// runs on a single pool connection inside one `BEGIN IMMEDIATE` transaction via @@ -163,12 +250,18 @@ impl KhiveRuntime { /// `into_id` is performed after the transaction (requires async embedding computation). pub async fn merge_entity( &self, - namespace: Option<&str>, + token: &NamespaceToken, into_id: Uuid, from_id: Uuid, - strategy: MergeStrategy, + strategy: EntityDedupMergePolicy, + dry_run: bool, ) -> RuntimeResult { - let ns = self.ns(namespace).to_string(); + if into_id == from_id { + return Err(RuntimeError::InvalidInput( + "cannot merge an entity into itself".into(), + )); + } + let ns = token.namespace().as_str().to_owned(); let sanitized_ns: String = ns .chars() .map(|c| if c.is_ascii_alphanumeric() { c } else { '_' }) @@ -185,11 +278,11 @@ impl KhiveRuntime { // Ensure all required tables exist before entering the transaction. // Each accessor applies its DDL idempotently via `CREATE TABLE IF NOT EXISTS`. - let _ = self.entities(namespace)?; - let _ = self.graph(namespace)?; - let _ = self.text(namespace)?; + let _ = self.entities(token)?; + let _ = self.graph(token)?; + let _ = self.text(token)?; if self.config().embedding_model.is_some() { - let _ = self.vectors(namespace)?; + let _ = self.vectors(token)?; } let pool = self.backend().pool_arc(); @@ -197,7 +290,9 @@ impl KhiveRuntime { let (summary, updated_entity) = tokio::task::spawn_blocking(move || { let guard = pool.writer()?; guard.transaction(|conn| { - merge_entity_sql(conn, ns, fts_table, vec_table, into_id, from_id, strategy) + merge_entity_sql( + conn, ns, fts_table, vec_table, into_id, from_id, strategy, dry_run, + ) }) }) .await @@ -205,10 +300,36 @@ impl KhiveRuntime { // If vectors are configured, reindex into_entity (requires async embedding). // FTS and vec-delete were already committed inside the transaction above. - if self.config().embedding_model.is_some() { - self.reindex_entity(namespace, &updated_entity).await?; + if !dry_run && self.config().embedding_model.is_some() { + self.reindex_entity(token, &updated_entity).await?; } + let event_store = self.events(token)?; + // Mirror the wire-level strategy spelling from MergeParams so consumers + // can round-trip the policy string back into a request. + let policy_str = match strategy { + EntityDedupMergePolicy::PreferInto => "prefer_into", + EntityDedupMergePolicy::PreferFrom => "prefer_from", + EntityDedupMergePolicy::Union => "union", + }; + let event = khive_storage::event::Event::new( + updated_entity.namespace.clone(), + "merge", + EventKind::EntityMerged, + SubstrateKind::Entity, + "", + ) + .with_target(summary.kept_id) + .with_payload(serde_json::json!({ + "into_id": summary.kept_id, + "from_id": summary.removed_id, + "policy": policy_str, + "edges_rewired": summary.edges_rewired, + })); + event_store.append_event(event).await.map_err(|e| { + RuntimeError::Internal(format!("merge_entity: event store write failed: {e}")) + })?; + Ok(summary) } @@ -221,16 +342,16 @@ impl KhiveRuntime { /// reindex from writing the search document into the wrong namespace's FTS index. pub(crate) async fn reindex_entity( &self, - namespace: Option<&str>, + token: &NamespaceToken, entity: &Entity, ) -> RuntimeResult<()> { let body = match &entity.description { Some(d) if !d.is_empty() => format!("{} {}", entity.name, d), _ => entity.name.clone(), }; - // Use entity.namespace (authoritative) rather than self.ns(namespace) (caller claim). + // Use entity.namespace (authoritative) rather than token.namespace().as_str() (caller claim). let ns = entity.namespace.clone(); - self.text(namespace)? + self.text(token)? .upsert_document(TextDocument { subject_id: entity.id, kind: SubstrateKind::Entity, @@ -245,8 +366,14 @@ impl KhiveRuntime { if self.config().embedding_model.is_some() { let vector = self.embed(&body).await?; - self.vectors(namespace)? - .insert(entity.id, SubstrateKind::Entity, &ns, vector) + self.vectors(token)? + .insert( + entity.id, + SubstrateKind::Entity, + &ns, + "entity.body", + vec![vector], + ) .await?; } @@ -256,16 +383,176 @@ impl KhiveRuntime { /// Remove an entity from FTS5 and (if configured) vector indexes. pub(crate) async fn remove_from_indexes( &self, - namespace: Option<&str>, + token: &NamespaceToken, id: Uuid, ) -> RuntimeResult<()> { - let ns = self.ns(namespace).to_string(); - self.text(namespace)?.delete_document(&ns, id).await?; + let ns = token.namespace().as_str().to_owned(); + self.text(token)?.delete_document(&ns, id).await?; + if self.config().embedding_model.is_some() { + self.vectors(token)?.delete(id).await?; + } + Ok(()) + } + + /// Re-upsert FTS5 document (and vector if model configured) for the note. + pub(crate) async fn reindex_note( + &self, + token: &NamespaceToken, + note: &khive_storage::note::Note, + ) -> RuntimeResult<()> { + let ns = note.namespace.clone(); + self.text_for_notes(token)? + .upsert_document(TextDocument { + subject_id: note.id, + kind: SubstrateKind::Note, + title: note.name.clone(), + body: note.content.clone(), + tags: Vec::new(), + namespace: ns.clone(), + metadata: note.properties.clone(), + updated_at: chrono::Utc::now(), + }) + .await?; + if self.config().embedding_model.is_some() { - self.vectors(namespace)?.delete(id).await?; + let vector = self.embed(¬e.content).await?; + self.vectors(token)? + .insert( + note.id, + SubstrateKind::Note, + &ns, + "note.content", + vec![vector], + ) + .await?; } Ok(()) } + + /// Patch-style note update. + pub async fn update_note( + &self, + token: &NamespaceToken, + id: Uuid, + patch: NotePatch, + ) -> RuntimeResult { + let store = self.notes(token)?; + let mut note = store + .get_note(id) + .await? + .ok_or_else(|| RuntimeError::NotFound(format!("note {id}")))?; + + if note.namespace != token.namespace().as_str() { + return Err(RuntimeError::NotFound(format!("note {id}"))); + } + + let mut text_changed = false; + + if let Some(name_patch) = patch.name { + text_changed |= note.name != name_patch; + note.name = name_patch; + } + if let Some(content) = patch.content { + text_changed |= note.content != content; + note.content = content; + } + if let Some(salience_patch) = patch.salience { + note.salience = salience_patch.map(|s| s.clamp(0.0, 1.0)); + } + if let Some(decay_patch) = patch.decay_factor { + note.decay_factor = decay_patch.map(|d| d.max(0.0)); + } + if let Some(props) = patch.properties { + let (merged, _) = merge_properties( + ¬e.properties, + &Some(props), + EntityDedupMergePolicy::PreferFrom, + ); + note.properties = merged; + } + if let Some(status) = patch.kind_status { + note.status = status; + } + + note.updated_at = chrono::Utc::now().timestamp_micros(); + store.upsert_note(note.clone()).await?; + + if text_changed { + self.reindex_note(token, ¬e).await?; + } + + Ok(note) + } + + /// Merge `from_id` note into `into_id` note. + /// + /// Both notes must exist in the namespace and have the same `kind`. Content is merged + /// per `content_strategy`. Properties are merged per `strategy`. `from_id` is + /// tombstoned (status='deleted', deleted_at set). Returns a summary. + /// + /// If `dry_run` is true, computes and returns the planned summary without mutating + /// any rows, edges, or indexes. + pub async fn merge_note( + &self, + token: &NamespaceToken, + into_id: Uuid, + from_id: Uuid, + strategy: EntityDedupMergePolicy, + content_strategy: ContentMergeStrategy, + dry_run: bool, + ) -> RuntimeResult { + if into_id == from_id { + return Err(RuntimeError::InvalidInput( + "cannot merge a note into itself".into(), + )); + } + let ns = token.namespace().as_str().to_string(); + let sanitized_ns: String = ns + .chars() + .map(|c| if c.is_ascii_alphanumeric() { c } else { '_' }) + .collect(); + let fts_table = format!("fts_notes_{}", sanitized_ns); + let vec_table = self.config().embedding_model.map(|model| { + let key: String = model + .to_string() + .chars() + .map(|c| if c.is_ascii_alphanumeric() { c } else { '_' }) + .collect(); + format!("vec_{}", key) + }); + + let _ = self.notes(token)?; + let _ = self.graph(token)?; + let _ = self.text_for_notes(token)?; + if self.config().embedding_model.is_some() { + let _ = self.vectors(token)?; + } + + let pool = self.backend().pool_arc(); + let (summary, updated_note) = tokio::task::spawn_blocking(move || { + let guard = pool.writer()?; + guard.transaction(|conn| { + merge_note_sql( + conn, + ns, + fts_table, + vec_table, + into_id, + from_id, + strategy, + content_strategy, + dry_run, + ) + }) + }) + .await + .map_err(|e| RuntimeError::Internal(e.to_string()))??; + + if !dry_run && self.config().embedding_model.is_some() { + self.reindex_note(token, &updated_note).await?; + } + Ok(summary) + } } // --------------------------------------------------------------------------- @@ -280,8 +567,8 @@ fn read_merge_entity( ) -> Result { let id_str = id.to_string(); let mut stmt = conn.prepare( - "SELECT id, namespace, kind, name, description, properties, tags, \ - created_at, updated_at, deleted_at \ + "SELECT id, namespace, kind, entity_type, name, description, properties, tags, \ + created_at, updated_at, deleted_at, merged_into, merge_event_id \ FROM entities WHERE id = ?1 AND deleted_at IS NULL", )?; let mut rows = stmt.query(rusqlite::params![id_str])?; @@ -292,13 +579,16 @@ fn read_merge_entity( let id_s: String = row.get(0)?; let ns: String = row.get(1)?; let kind: String = row.get(2)?; - let name: String = row.get(3)?; - let description: Option = row.get(4)?; - let properties_str: Option = row.get(5)?; - let tags_str: String = row.get(6)?; - let created_at: i64 = row.get(7)?; - let updated_at: i64 = row.get(8)?; - let deleted_at: Option = row.get(9)?; + let entity_type: Option = row.get(3)?; + let name: String = row.get(4)?; + let description: Option = row.get(5)?; + let properties_str: Option = row.get(6)?; + let tags_str: String = row.get(7)?; + let created_at: i64 = row.get(8)?; + let updated_at: i64 = row.get(9)?; + let deleted_at: Option = row.get(10)?; + let merged_into_str: Option = row.get(11)?; + let merge_event_id_str: Option = row.get(12)?; if ns != namespace { return Err(SqliteError::InvalidData(format!( @@ -314,11 +604,22 @@ fn read_merge_entity( .transpose()?; let tags: Vec = serde_json::from_str(&tags_str).map_err(|e| SqliteError::InvalidData(e.to_string()))?; + let merged_into = merged_into_str + .as_deref() + .map(Uuid::parse_str) + .transpose() + .map_err(|e| SqliteError::InvalidData(e.to_string()))?; + let merge_event_id = merge_event_id_str + .as_deref() + .map(Uuid::parse_str) + .transpose() + .map_err(|e| SqliteError::InvalidData(e.to_string()))?; Ok(Entity { id: entity_id, namespace: ns, kind, + entity_type, name, description, properties, @@ -326,14 +627,19 @@ fn read_merge_entity( created_at, updated_at, deleted_at, + merged_into, + merge_event_id, }) } /// All merge SQL on one connection inside an already-open `BEGIN IMMEDIATE` transaction. /// /// Reads both entities, rewires/drops incident edges, merges entity fields, updates FTS, -/// deletes the `from` vec entry (if `vec_table` is Some), and hard-deletes `from` from -/// entities. Returns the updated `into` entity so the caller can do the async vec re-insert. +/// deletes the `from` vec entry (if `vec_table` is Some), and tombstones `from` with merge +/// provenance. Returns the updated `into` entity so the caller can do the async vec re-insert. +/// +/// When `dry_run` is true, all reads and computations are performed but no writes are issued. +#[allow(clippy::too_many_arguments)] fn merge_entity_sql( conn: &rusqlite::Connection, namespace: String, @@ -341,12 +647,14 @@ fn merge_entity_sql( vec_table: Option, into_id: Uuid, from_id: Uuid, - strategy: MergeStrategy, + strategy: EntityDedupMergePolicy, + dry_run: bool, ) -> Result<(MergeSummary, Entity), SqliteError> { let into_entity = read_merge_entity(conn, into_id, &namespace)?; let from_entity = read_merge_entity(conn, from_id, &namespace)?; // --- Collect edges incident to from_id --- + #[allow(dead_code)] struct EdgeRow { id: Uuid, source_id: Uuid, @@ -354,6 +662,9 @@ fn merge_entity_sql( relation: String, weight: f64, created_at: i64, + updated_at: i64, + deleted_at: Option, + target_backend: Option, metadata: Option, } @@ -365,7 +676,8 @@ fn merge_entity_sql( let mut outbound: Vec = Vec::new(); { let mut stmt = conn.prepare( - "SELECT id, source_id, target_id, relation, weight, created_at, metadata \ + "SELECT id, source_id, target_id, relation, weight, created_at, \ + updated_at, deleted_at, target_backend, metadata \ FROM graph_edges WHERE namespace = ?1 AND source_id = ?2", )?; let mut rows = stmt.query(rusqlite::params![&namespace, &from_str])?; @@ -377,7 +689,10 @@ fn merge_entity_sql( relation: row.get(3)?, weight: row.get(4)?, created_at: row.get(5)?, - metadata: row.get(6)?, + updated_at: row.get(6)?, + deleted_at: row.get(7)?, + target_backend: row.get(8)?, + metadata: row.get(9)?, }); } } @@ -385,7 +700,8 @@ fn merge_entity_sql( let mut inbound: Vec = Vec::new(); { let mut stmt = conn.prepare( - "SELECT id, source_id, target_id, relation, weight, created_at, metadata \ + "SELECT id, source_id, target_id, relation, weight, created_at, \ + updated_at, deleted_at, target_backend, metadata \ FROM graph_edges WHERE namespace = ?1 AND target_id = ?2", )?; let mut rows = stmt.query(rusqlite::params![&namespace, &from_str])?; @@ -397,7 +713,10 @@ fn merge_entity_sql( relation: row.get(3)?, weight: row.get(4)?, created_at: row.get(5)?, - metadata: row.get(6)?, + updated_at: row.get(6)?, + deleted_at: row.get(7)?, + target_backend: row.get(8)?, + metadata: row.get(9)?, }); } } @@ -411,54 +730,6 @@ fn merge_entity_sql( } } - // --- Rewire edges --- - let mut edges_rewired = 0usize; - for edge in all_edges { - let new_src = if edge.source_id == from_id { - into_id - } else { - edge.source_id - }; - let new_tgt = if edge.target_id == from_id { - into_id - } else { - edge.target_id - }; - - if new_src == new_tgt { - conn.execute( - "DELETE FROM graph_edges WHERE namespace = ?1 AND id = ?2", - rusqlite::params![&namespace, edge.id.to_string()], - )?; - continue; - } - - conn.execute( - "INSERT INTO graph_edges \ - (namespace, id, source_id, target_id, relation, weight, created_at, metadata) \ - VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8) \ - ON CONFLICT(namespace, id) DO UPDATE SET \ - source_id = excluded.source_id, \ - target_id = excluded.target_id, \ - relation = excluded.relation, \ - weight = excluded.weight, \ - created_at = excluded.created_at, \ - metadata = excluded.metadata \ - ON CONFLICT(namespace, source_id, target_id, relation) DO NOTHING", - rusqlite::params![ - &namespace, - edge.id.to_string(), - new_src.to_string(), - new_tgt.to_string(), - &edge.relation, - edge.weight, - edge.created_at, - edge.metadata, - ], - )?; - edges_rewired += 1; - } - // --- Merge entity fields --- let (merged_props, properties_merged) = merge_properties(&into_entity.properties, &from_entity.properties, strategy); @@ -474,89 +745,155 @@ fn merge_entity_sql( .map(|v| serde_json::to_string(v).unwrap_or_default()); let tags_json = serde_json::to_string(&merged_tags).unwrap_or_else(|_| "[]".to_string()); - // --- Upsert merged entity --- - conn.execute( - "INSERT OR REPLACE INTO entities \ - (id, namespace, kind, name, description, properties, tags, \ - created_at, updated_at, deleted_at) \ - VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)", - rusqlite::params![ - &into_str, - &namespace, - &into_entity.kind, - &merged_name, - &merged_description, - &props_str, - &tags_json, - into_entity.created_at, - now, - into_entity.deleted_at, - ], - )?; + // --- Rewire edges --- + let mut edges_rewired = 0usize; + if !dry_run { + for edge in all_edges { + let new_src = if edge.source_id == from_id { + into_id + } else { + edge.source_id + }; + let new_tgt = if edge.target_id == from_id { + into_id + } else { + edge.target_id + }; + + if new_src == new_tgt { + conn.execute( + "DELETE FROM graph_edges WHERE namespace = ?1 AND id = ?2", + rusqlite::params![&namespace, edge.id.to_string()], + )?; + continue; + } - // --- Reindex into_id in FTS (delete existing, insert updated) --- - let fts_body = match &merged_description { - Some(d) if !d.is_empty() => format!("{} {}", merged_name, d), - _ => merged_name.clone(), - }; - let kind_str = SubstrateKind::Entity.to_string(); - - conn.execute( - &format!( - "DELETE FROM {} WHERE namespace = ?1 AND subject_id = ?2", - fts_table - ), - rusqlite::params![&namespace, &into_str], - )?; - conn.execute( - &format!( - "INSERT INTO {} \ - (subject_id, kind, title, body, tags, namespace, metadata, updated_at) \ - VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)", - fts_table - ), - rusqlite::params![ - &into_str, - &kind_str, - &merged_name, - &fts_body, - &tags_json, - &namespace, - &props_str, - now, - ], - )?; + let now_ts = chrono::Utc::now().timestamp(); + conn.execute( + "INSERT INTO graph_edges \ + (namespace, id, source_id, target_id, relation, weight, created_at, updated_at, deleted_at, target_backend, metadata) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11) \ + ON CONFLICT(namespace, id) DO UPDATE SET \ + source_id = excluded.source_id, \ + target_id = excluded.target_id, \ + relation = excluded.relation, \ + weight = excluded.weight, \ + updated_at = excluded.updated_at, \ + metadata = excluded.metadata \ + ON CONFLICT(namespace, source_id, target_id, relation) DO NOTHING", + rusqlite::params![ + &namespace, + edge.id.to_string(), + new_src.to_string(), + new_tgt.to_string(), + &edge.relation, + edge.weight, + edge.created_at, + now_ts, + edge.deleted_at, + edge.target_backend, + edge.metadata, + ], + )?; + edges_rewired += 1; + } - // --- Delete from_id from FTS --- - conn.execute( - &format!( - "DELETE FROM {} WHERE namespace = ?1 AND subject_id = ?2", - fts_table - ), - rusqlite::params![&namespace, &from_str], - )?; + // --- Upsert merged entity --- + conn.execute( + "INSERT OR REPLACE INTO entities \ + (id, namespace, kind, name, description, properties, tags, \ + created_at, updated_at, deleted_at, merged_into, merge_event_id) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)", + rusqlite::params![ + &into_str, + &namespace, + &into_entity.kind, + &merged_name, + &merged_description, + &props_str, + &tags_json, + into_entity.created_at, + now, + into_entity.deleted_at, + Option::::None, + Option::::None, + ], + )?; + + // --- Reindex into_id in FTS (delete existing, insert updated) --- + let fts_body = match &merged_description { + Some(d) if !d.is_empty() => format!("{} {}", merged_name, d), + _ => merged_name.clone(), + }; + let kind_str = SubstrateKind::Entity.to_string(); - // --- Delete from_id from vector index if configured --- - if let Some(ref vec_tbl) = vec_table { conn.execute( &format!( - "DELETE FROM {} WHERE subject_id = ?1 AND namespace = ?2", - vec_tbl + "DELETE FROM {} WHERE namespace = ?1 AND subject_id = ?2", + fts_table ), - rusqlite::params![&from_str, &namespace], + rusqlite::params![&namespace, &into_str], + )?; + conn.execute( + &format!( + "INSERT INTO {} \ + (subject_id, kind, title, body, tags, namespace, metadata, updated_at) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)", + fts_table + ), + rusqlite::params![ + &into_str, + &kind_str, + &merged_name, + &fts_body, + &tags_json, + &namespace, + &props_str, + now, + ], )?; - } - // --- Hard-delete from entity --- - conn.execute( - "DELETE FROM entities WHERE id = ?1", - rusqlite::params![&from_str], - )?; + // --- Delete from_id from FTS --- + conn.execute( + &format!( + "DELETE FROM {} WHERE namespace = ?1 AND subject_id = ?2", + fts_table + ), + rusqlite::params![&namespace, &from_str], + )?; + + // --- Delete from_id from vector index if configured --- + if let Some(ref vec_tbl) = vec_table { + conn.execute( + &format!( + "DELETE FROM {} WHERE subject_id = ?1 AND namespace = ?2", + vec_tbl + ), + rusqlite::params![&from_str, &namespace], + )?; + } + + // --- Tombstone from entity (ADR-014: soft-delete with provenance) --- + let merge_event_id = Uuid::new_v4(); + conn.execute( + "UPDATE entities \ + SET deleted_at = ?1, merged_into = ?2, merge_event_id = ?3, updated_at = ?1 \ + WHERE namespace = ?4 AND id = ?5 AND deleted_at IS NULL", + rusqlite::params![ + now, + into_str, + merge_event_id.to_string(), + &namespace, + &from_str, + ], + )?; + } let updated_entity = Entity { id: into_id, namespace, kind: into_entity.kind, + entity_type: into_entity.entity_type, name: merged_name, description: merged_description, properties: merged_props, @@ -564,6 +901,8 @@ fn merge_entity_sql( created_at: into_entity.created_at, updated_at: now, deleted_at: into_entity.deleted_at, + merged_into: None, + merge_event_id: None, }; Ok(( @@ -573,43 +912,444 @@ fn merge_entity_sql( edges_rewired, properties_merged, tags_unioned, + content_appended: false, + dry_run, }, updated_entity, )) } +// --------------------------------------------------------------------------- +// Note merge SQL helpers +// --------------------------------------------------------------------------- + +/// Read one note row by ID within a namespace, returning `SqliteError` on missing/wrong-ns. +fn read_merge_note( + conn: &rusqlite::Connection, + id: Uuid, + namespace: &str, +) -> Result { + use khive_storage::note::Note; + let id_str = id.to_string(); + let mut stmt = conn.prepare( + "SELECT id, namespace, kind, status, name, content, salience, decay_factor, \ + expires_at, properties, created_at, updated_at, deleted_at \ + FROM notes WHERE id = ?1 AND deleted_at IS NULL", + )?; + let mut rows = stmt.query(rusqlite::params![id_str])?; + let row = rows + .next()? + .ok_or_else(|| SqliteError::InvalidData(format!("note {id} not found")))?; + + let id_s: String = row.get(0)?; + let ns: String = row.get(1)?; + let kind: String = row.get(2)?; + let status: String = row.get(3)?; + let name: Option = row.get(4)?; + let content: String = row.get(5)?; + let salience: Option = row.get(6)?; + let decay_factor: Option = row.get(7)?; + let expires_at: Option = row.get(8)?; + let properties_str: Option = row.get(9)?; + let created_at: i64 = row.get(10)?; + let updated_at: i64 = row.get(11)?; + let deleted_at: Option = row.get(12)?; + + if ns != namespace { + return Err(SqliteError::InvalidData(format!( + "note {id} belongs to namespace '{ns}', not '{namespace}'" + ))); + } + + let note_id = Uuid::parse_str(&id_s).map_err(|e| SqliteError::InvalidData(e.to_string()))?; + let properties: Option = properties_str + .map(|s| serde_json::from_str(&s).map_err(|e| SqliteError::InvalidData(e.to_string()))) + .transpose()?; + + Ok(Note { + id: note_id, + namespace: ns, + kind, + status, + name, + content, + salience, + decay_factor, + expires_at, + properties, + created_at, + updated_at, + deleted_at, + }) +} + +fn max_option_f64(a: Option, b: Option) -> Option { + match (a, b) { + (Some(x), Some(y)) => Some(x.max(y)), + (Some(x), None) => Some(x), + (None, Some(y)) => Some(y), + (None, None) => None, + } +} + +fn append_merge_history(props: Option, entry: Value) -> Result, SqliteError> { + use serde_json::{json, Map}; + let mut obj: Map = match props { + Some(Value::Object(m)) => m, + Some(other) => { + let mut m = Map::new(); + m.insert("_value".into(), other); + m + } + None => Map::new(), + }; + let history = obj + .entry("_merge_history".to_string()) + .or_insert_with(|| json!([])); + if let Value::Array(arr) = history { + arr.push(entry); + } + Ok(Some(Value::Object(obj))) +} + +/// All note merge SQL on one connection inside a `BEGIN IMMEDIATE` transaction. +/// +/// Reads both notes (must have same `kind`), rewires/drops incident edges, merges content +/// per `content_strategy`, tombstones `from`. Returns the updated `into` note for async +/// re-embedding. +/// +/// When `dry_run` is true, all reads and computations are performed but no writes are issued. +#[allow(clippy::too_many_arguments)] +fn merge_note_sql( + conn: &rusqlite::Connection, + namespace: String, + fts_table: String, + vec_table: Option, + into_id: Uuid, + from_id: Uuid, + strategy: EntityDedupMergePolicy, + content_strategy: ContentMergeStrategy, + dry_run: bool, +) -> Result<(MergeSummary, khive_storage::note::Note), SqliteError> { + let into_note = read_merge_note(conn, into_id, &namespace)?; + let from_note = read_merge_note(conn, from_id, &namespace)?; + + if into_note.kind != from_note.kind { + return Err(SqliteError::InvalidData(format!( + "cannot merge notes of different kinds: {} vs {}", + into_note.kind, from_note.kind + ))); + } + + let now = chrono::Utc::now().timestamp_micros(); + let into_str = into_id.to_string(); + let from_str = from_id.to_string(); + + // Collect edges incident to from_id. + #[allow(dead_code)] + struct EdgeRow { + id: Uuid, + source_id: Uuid, + target_id: Uuid, + relation: String, + weight: f64, + created_at: i64, + updated_at: i64, + deleted_at: Option, + target_backend: Option, + metadata: Option, + } + let parse_id = + |s: String| Uuid::parse_str(&s).map_err(|e| SqliteError::InvalidData(e.to_string())); + + let mut outbound: Vec = Vec::new(); + { + let mut stmt = conn.prepare( + "SELECT id, source_id, target_id, relation, weight, created_at, updated_at, deleted_at, target_backend, metadata \ + FROM graph_edges WHERE namespace = ?1 AND source_id = ?2", + )?; + let mut rows = stmt.query(rusqlite::params![&namespace, &from_str])?; + while let Some(row) = rows.next()? { + outbound.push(EdgeRow { + id: parse_id(row.get(0)?)?, + source_id: parse_id(row.get(1)?)?, + target_id: parse_id(row.get(2)?)?, + relation: row.get(3)?, + weight: row.get(4)?, + created_at: row.get(5)?, + updated_at: row.get(6)?, + deleted_at: row.get(7)?, + target_backend: row.get(8)?, + metadata: row.get(9)?, + }); + } + } + let mut inbound: Vec = Vec::new(); + { + let mut stmt = conn.prepare( + "SELECT id, source_id, target_id, relation, weight, created_at, updated_at, deleted_at, target_backend, metadata \ + FROM graph_edges WHERE namespace = ?1 AND target_id = ?2", + )?; + let mut rows = stmt.query(rusqlite::params![&namespace, &from_str])?; + while let Some(row) = rows.next()? { + inbound.push(EdgeRow { + id: parse_id(row.get(0)?)?, + source_id: parse_id(row.get(1)?)?, + target_id: parse_id(row.get(2)?)?, + relation: row.get(3)?, + weight: row.get(4)?, + created_at: row.get(5)?, + updated_at: row.get(6)?, + deleted_at: row.get(7)?, + target_backend: row.get(8)?, + metadata: row.get(9)?, + }); + } + } + let mut seen: HashSet = HashSet::new(); + let mut all_edges: Vec = Vec::new(); + for edge in outbound.into_iter().chain(inbound) { + if seen.insert(edge.id) { + all_edges.push(edge); + } + } + + // Merge note fields. + let (merged_content, content_appended) = match content_strategy { + ContentMergeStrategy::Append => { + if from_note.content.is_empty() { + (into_note.content.clone(), false) + } else { + ( + format!("{}\n\n---\n\n{}", into_note.content, from_note.content), + true, + ) + } + } + ContentMergeStrategy::PreferInto => (into_note.content.clone(), false), + ContentMergeStrategy::PreferFrom => (from_note.content.clone(), false), + }; + + let merged_name = match strategy { + EntityDedupMergePolicy::PreferFrom => from_note.name.clone().or(into_note.name.clone()), + _ => into_note.name.clone().or(from_note.name.clone()), + }; + + let (merged_props, properties_merged) = + merge_properties(&into_note.properties, &from_note.properties, strategy); + + // Append merge history to properties. + let merge_history_entry = serde_json::json!({ + "merged_from": from_id.to_string(), + "merged_at": now, + "strategy": format!("{:?}", strategy), + "content_strategy": format!("{:?}", content_strategy), + }); + let merged_props = append_merge_history(merged_props, merge_history_entry)?; + + let merged_salience = max_option_f64(into_note.salience, from_note.salience); + let merged_expires_at = match (into_note.expires_at, from_note.expires_at) { + (Some(a), Some(b)) => Some(a.max(b)), + (Some(a), None) => Some(a), + (None, Some(b)) => Some(b), + (None, None) => None, + }; + + let props_str = merged_props + .as_ref() + .map(|v| serde_json::to_string(v).unwrap_or_default()); + + let mut edges_rewired = 0usize; + if !dry_run { + // Rewire and upsert. + for edge in all_edges { + let new_src = if edge.source_id == from_id { + into_id + } else { + edge.source_id + }; + let new_tgt = if edge.target_id == from_id { + into_id + } else { + edge.target_id + }; + if new_src == new_tgt { + conn.execute( + "DELETE FROM graph_edges WHERE namespace = ?1 AND id = ?2", + rusqlite::params![&namespace, edge.id.to_string()], + )?; + continue; + } + let now_ts = chrono::Utc::now().timestamp(); + conn.execute( + "INSERT INTO graph_edges \ + (namespace, id, source_id, target_id, relation, weight, created_at, updated_at, deleted_at, target_backend, metadata) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11) \ + ON CONFLICT(namespace, id) DO UPDATE SET \ + source_id = excluded.source_id, \ + target_id = excluded.target_id, \ + relation = excluded.relation, \ + weight = excluded.weight, \ + updated_at = excluded.updated_at, \ + metadata = excluded.metadata \ + ON CONFLICT(namespace, source_id, target_id, relation) DO NOTHING", + rusqlite::params![ + &namespace, + edge.id.to_string(), + new_src.to_string(), + new_tgt.to_string(), + &edge.relation, + edge.weight, + edge.created_at, + now_ts, + edge.deleted_at, + edge.target_backend, + edge.metadata, + ], + )?; + edges_rewired += 1; + } + + // Upsert merged into-note. + conn.execute( + "INSERT OR REPLACE INTO notes \ + (id, namespace, kind, status, name, content, salience, decay_factor, \ + expires_at, properties, created_at, updated_at, deleted_at) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)", + rusqlite::params![ + &into_str, + &namespace, + &into_note.kind, + &into_note.status, + &merged_name, + &merged_content, + merged_salience, + into_note.decay_factor, + merged_expires_at, + &props_str, + into_note.created_at, + now, + into_note.deleted_at, + ], + )?; + + // Update FTS for into-note. + conn.execute( + &format!( + "DELETE FROM {} WHERE namespace = ?1 AND subject_id = ?2", + fts_table + ), + rusqlite::params![&namespace, &into_str], + )?; + conn.execute( + &format!( + "INSERT INTO {} \ + (subject_id, kind, title, body, tags, namespace, metadata, updated_at) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)", + fts_table + ), + rusqlite::params![ + &into_str, + SubstrateKind::Note.to_string(), + &merged_name, + &merged_content, + "[]", + &namespace, + &props_str, + now, + ], + )?; + + // Delete from-note from FTS. + conn.execute( + &format!( + "DELETE FROM {} WHERE namespace = ?1 AND subject_id = ?2", + fts_table + ), + rusqlite::params![&namespace, &from_str], + )?; + + // Delete from-note from vector index if configured. + if let Some(ref vec_tbl) = vec_table { + conn.execute( + &format!( + "DELETE FROM {} WHERE subject_id = ?1 AND namespace = ?2", + vec_tbl + ), + rusqlite::params![&from_str, &namespace], + )?; + } + + // Tombstone the from-note. + conn.execute( + "UPDATE notes SET status = 'deleted', deleted_at = ?1, updated_at = ?1 \ + WHERE namespace = ?2 AND id = ?3 AND deleted_at IS NULL", + rusqlite::params![now, &namespace, &from_str], + )?; + } + + let updated_note = khive_storage::note::Note { + id: into_id, + namespace: namespace.clone(), + kind: into_note.kind.clone(), + status: into_note.status.clone(), + name: merged_name, + content: merged_content, + salience: merged_salience, + decay_factor: into_note.decay_factor, + expires_at: merged_expires_at, + properties: merged_props, + created_at: into_note.created_at, + updated_at: now, + deleted_at: into_note.deleted_at, + }; + + Ok(( + MergeSummary { + kept_id: into_id, + removed_id: from_id, + edges_rewired, + properties_merged, + tags_unioned: 0, + content_appended, + dry_run, + }, + updated_note, + )) +} + // --------------------------------------------------------------------------- // Merge helpers (pure functions — easier to unit test) // --------------------------------------------------------------------------- -fn merge_string_field(into: &str, from: &str, strategy: MergeStrategy) -> String { +fn merge_string_field(into: &str, from: &str, strategy: EntityDedupMergePolicy) -> String { match strategy { - MergeStrategy::PreferInto | MergeStrategy::Union => into.to_string(), - MergeStrategy::PreferFrom => from.to_string(), + EntityDedupMergePolicy::PreferInto | EntityDedupMergePolicy::Union => into.to_string(), + EntityDedupMergePolicy::PreferFrom => from.to_string(), } } fn merge_option_string_field( into: &Option, from: &Option, - strategy: MergeStrategy, + strategy: EntityDedupMergePolicy, ) -> Option { match strategy { - MergeStrategy::PreferInto => { + EntityDedupMergePolicy::PreferInto => { if into.is_some() { into.clone() } else { from.clone() } } - MergeStrategy::PreferFrom => { + EntityDedupMergePolicy::PreferFrom => { if from.is_some() { from.clone() } else { into.clone() } } - MergeStrategy::Union => { + EntityDedupMergePolicy::Union => { // Keep into's description; if empty, append from's. match (into, from) { (Some(a), _) if !a.is_empty() => Some(a.clone()), @@ -624,7 +1364,7 @@ fn merge_option_string_field( fn merge_properties( into: &Option, from: &Option, - strategy: MergeStrategy, + strategy: EntityDedupMergePolicy, ) -> (Option, usize) { match (into, from) { (None, None) => (None, 0), @@ -641,14 +1381,15 @@ fn merge_properties( } /// Deep-merge two JSON values per strategy. Returns (merged, keys_contributed_by_from). -fn merge_json(into: &Value, from: &Value, strategy: MergeStrategy) -> (Value, usize) { +fn merge_json(into: &Value, from: &Value, strategy: EntityDedupMergePolicy) -> (Value, usize) { match (into, from, strategy) { - (Value::Object(a), Value::Object(b), MergeStrategy::Union) => { + (Value::Object(a), Value::Object(b), EntityDedupMergePolicy::Union) => { let mut result = a.clone(); let mut added = 0usize; for (k, v_from) in b { if let Some(v_into) = a.get(k) { - let (merged, sub_added) = merge_json(v_into, v_from, MergeStrategy::Union); + let (merged, sub_added) = + merge_json(v_into, v_from, EntityDedupMergePolicy::Union); result.insert(k.clone(), merged); added += sub_added; } else { @@ -658,7 +1399,7 @@ fn merge_json(into: &Value, from: &Value, strategy: MergeStrategy) -> (Value, us } (Value::Object(result), added) } - (Value::Object(a), Value::Object(b), MergeStrategy::PreferInto) => { + (Value::Object(a), Value::Object(b), EntityDedupMergePolicy::PreferInto) => { let mut result = a.clone(); let mut added = 0usize; for (k, v) in b { @@ -669,7 +1410,7 @@ fn merge_json(into: &Value, from: &Value, strategy: MergeStrategy) -> (Value, us } (Value::Object(result), added) } - (Value::Object(a), Value::Object(b), MergeStrategy::PreferFrom) => { + (Value::Object(a), Value::Object(b), EntityDedupMergePolicy::PreferFrom) => { let mut result = a.clone(); let mut added = 0usize; for (k, v) in b { @@ -681,7 +1422,7 @@ fn merge_json(into: &Value, from: &Value, strategy: MergeStrategy) -> (Value, us (Value::Object(result), added) } // Non-object scalars: apply strategy directly. - (_into_val, from_val, MergeStrategy::PreferFrom) => (from_val.clone(), 1), + (_into_val, from_val, EntityDedupMergePolicy::PreferFrom) => (from_val.clone(), 1), _ => (into.clone(), 0), } } @@ -706,7 +1447,7 @@ fn union_tags(into: &[String], from: &[String]) -> (Vec, usize) { #[cfg(test)] mod tests { use super::*; - use crate::runtime::KhiveRuntime; + use crate::runtime::{KhiveRuntime, NamespaceToken}; use khive_storage::types::{Direction, TextFilter, TextQueryMode, TextSearchRequest}; fn rt() -> KhiveRuntime { @@ -714,9 +1455,9 @@ mod tests { } // Helper: search FTS5 for `query` in a runtime namespace. - async fn fts_hit(rt: &KhiveRuntime, namespace: Option<&str>, query: &str) -> Vec { - let ns = rt.ns(namespace).to_string(); - rt.text(namespace) + async fn fts_hit(rt: &KhiveRuntime, token: &NamespaceToken, query: &str) -> Vec { + let ns = token.namespace().as_str().to_string(); + rt.text(token) .unwrap() .search(TextSearchRequest { query: query.to_string(), @@ -738,10 +1479,12 @@ mod tests { #[tokio::test] async fn update_entity_patch_changes_only_specified_fields() { let rt = rt(); + let tok = NamespaceToken::local(); let entity = rt .create_entity( - None, + &tok, "concept", + None, "OriginalName", Some("orig desc"), Some(serde_json::json!({"k":"v"})), @@ -752,7 +1495,7 @@ mod tests { let updated = rt .update_entity( - None, + &tok, entity.id, EntityPatch { description: Some(Some("new desc".to_string())), @@ -770,10 +1513,12 @@ mod tests { #[tokio::test] async fn update_entity_clear_description_with_some_none() { let rt = rt(); + let tok = NamespaceToken::local(); let entity = rt .create_entity( - None, + &tok, "concept", + None, "ClearDesc", Some("has description"), None, @@ -784,7 +1529,7 @@ mod tests { let updated = rt .update_entity( - None, + &tok, entity.id, EntityPatch { description: Some(None), @@ -803,20 +1548,21 @@ mod tests { #[tokio::test] async fn update_entity_reindexes_when_name_changes() { let rt = rt(); + let tok = NamespaceToken::local(); let entity = rt - .create_entity(None, "concept", "OldName", None, None, vec![]) + .create_entity(&tok, "concept", None, "OldName", None, None, vec![]) .await .unwrap(); // Old name is findable. - let hits_before = fts_hit(&rt, None, "OldName").await; + let hits_before = fts_hit(&rt, &tok, "OldName").await; assert!( hits_before.contains(&entity.id), "entity should be findable by old name" ); rt.update_entity( - None, + &tok, entity.id, EntityPatch { name: Some("NewName".to_string()), @@ -826,8 +1572,8 @@ mod tests { .await .unwrap(); - let hits_old = fts_hit(&rt, None, "OldName").await; - let hits_new = fts_hit(&rt, None, "NewName").await; + let hits_old = fts_hit(&rt, &tok, "OldName").await; + let hits_new = fts_hit(&rt, &tok, "NewName").await; // After rename, old name no longer matches this entity (FTS index updated). assert!( @@ -843,10 +1589,12 @@ mod tests { #[tokio::test] async fn update_entity_properties_merges_preserving_existing_keys() { let rt = rt(); + let tok = NamespaceToken::local(); let entity = rt .create_entity( - None, + &tok, "concept", + None, "MergeProps", None, Some(serde_json::json!({ @@ -861,7 +1609,7 @@ mod tests { let updated = rt .update_entity( - None, + &tok, entity.id, EntityPatch { properties: Some(serde_json::json!({"status": "implemented"})), @@ -883,18 +1631,19 @@ mod tests { #[tokio::test] async fn update_entity_skips_reindex_when_only_properties_change() { let rt = rt(); + let tok = NamespaceToken::local(); let entity = rt - .create_entity(None, "concept", "StableIndexed", None, None, vec![]) + .create_entity(&tok, "concept", None, "StableIndexed", None, None, vec![]) .await .unwrap(); // Verify it's in the index before. - let hits_before = fts_hit(&rt, None, "StableIndexed").await; + let hits_before = fts_hit(&rt, &tok, "StableIndexed").await; assert!(hits_before.contains(&entity.id)); // Only patch properties — text index should be untouched (still findable). rt.update_entity( - None, + &tok, entity.id, EntityPatch { properties: Some(serde_json::json!({"new": "prop"})), @@ -904,7 +1653,7 @@ mod tests { .await .unwrap(); - let hits_after = fts_hit(&rt, None, "StableIndexed").await; + let hits_after = fts_hit(&rt, &tok, "StableIndexed").await; assert!( hits_after.contains(&entity.id), "still findable after props-only patch" @@ -914,33 +1663,34 @@ mod tests { #[tokio::test] async fn merge_entity_rewires_edges() { let rt = rt(); + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); let c = rt - .create_entity(None, "concept", "C", None, None, vec![]) + .create_entity(&tok, "concept", None, "C", None, None, vec![]) .await .unwrap(); let d = rt - .create_entity(None, "concept", "D", None, None, vec![]) + .create_entity(&tok, "concept", None, "D", None, None, vec![]) .await .unwrap(); // A→B and C→B; merge B into D → should become A→D and C→D. - rt.link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + rt.link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); - rt.link(None, c.id, b.id, EdgeRelation::Extends, 1.0) + rt.link(&tok, c.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); let summary = rt - .merge_entity(None, d.id, b.id, MergeStrategy::PreferInto) + .merge_entity(&tok, d.id, b.id, EntityDedupMergePolicy::PreferInto, false) .await .unwrap(); @@ -950,27 +1700,47 @@ mod tests { // Verify edges now point to D. let a_neighbors = rt - .neighbors(None, a.id, Direction::Out, None, None) + .neighbors(&tok, a.id, Direction::Out, None, None) .await .unwrap(); assert_eq!(a_neighbors.len(), 1); assert_eq!(a_neighbors[0].node_id, d.id); let c_neighbors = rt - .neighbors(None, c.id, Direction::Out, None, None) + .neighbors(&tok, c.id, Direction::Out, None, None) .await .unwrap(); assert_eq!(c_neighbors.len(), 1); assert_eq!(c_neighbors[0].node_id, d.id); } + #[tokio::test] + async fn merge_entity_self_merge_rejected() { + let rt = rt(); + let tok = NamespaceToken::local(); + let a = rt + .create_entity(&tok, "concept", None, "A", None, None, vec![]) + .await + .unwrap(); + let err = rt + .merge_entity(&tok, a.id, a.id, EntityDedupMergePolicy::PreferInto, false) + .await + .unwrap_err(); + assert!( + format!("{err:?}").contains("cannot merge an entity into itself"), + "expected self-merge rejection, got: {err:?}" + ); + } + #[tokio::test] async fn merge_entity_prefer_into_strategy() { let rt = rt(); + let tok = NamespaceToken::local(); let into = rt .create_entity( - None, + &tok, "concept", + None, "Into", None, Some(serde_json::json!({"a": 1})), @@ -980,8 +1750,9 @@ mod tests { .unwrap(); let from = rt .create_entity( - None, + &tok, "concept", + None, "From", None, Some(serde_json::json!({"a": 2, "b": 3})), @@ -990,11 +1761,17 @@ mod tests { .await .unwrap(); - rt.merge_entity(None, into.id, from.id, MergeStrategy::PreferInto) - .await - .unwrap(); + rt.merge_entity( + &tok, + into.id, + from.id, + EntityDedupMergePolicy::PreferInto, + false, + ) + .await + .unwrap(); - let kept = rt.get_entity(None, into.id).await.unwrap().unwrap(); + let kept = rt.get_entity(&tok, into.id).await.unwrap(); let props = kept.properties.unwrap(); // a stays as 1 (into wins), b is added from from. assert_eq!(props["a"], 1); @@ -1004,10 +1781,12 @@ mod tests { #[tokio::test] async fn merge_entity_prefer_from_strategy() { let rt = rt(); + let tok = NamespaceToken::local(); let into = rt .create_entity( - None, + &tok, "concept", + None, "Into", None, Some(serde_json::json!({"a": 1})), @@ -1017,8 +1796,9 @@ mod tests { .unwrap(); let from = rt .create_entity( - None, + &tok, "concept", + None, "From", None, Some(serde_json::json!({"a": 2, "b": 3})), @@ -1027,11 +1807,17 @@ mod tests { .await .unwrap(); - rt.merge_entity(None, into.id, from.id, MergeStrategy::PreferFrom) - .await - .unwrap(); + rt.merge_entity( + &tok, + into.id, + from.id, + EntityDedupMergePolicy::PreferFrom, + false, + ) + .await + .unwrap(); - let kept = rt.get_entity(None, into.id).await.unwrap().unwrap(); + let kept = rt.get_entity(&tok, into.id).await.unwrap(); let props = kept.properties.unwrap(); // from wins on a, b also from from. assert_eq!(props["a"], 2); @@ -1041,10 +1827,12 @@ mod tests { #[tokio::test] async fn merge_entity_union_strategy() { let rt = rt(); + let tok = NamespaceToken::local(); let into = rt .create_entity( - None, + &tok, "concept", + None, "Into", None, Some(serde_json::json!({"a": 1})), @@ -1054,8 +1842,9 @@ mod tests { .unwrap(); let from = rt .create_entity( - None, + &tok, "concept", + None, "From", None, Some(serde_json::json!({"a": 2, "b": 3})), @@ -1064,11 +1853,11 @@ mod tests { .await .unwrap(); - rt.merge_entity(None, into.id, from.id, MergeStrategy::Union) + rt.merge_entity(&tok, into.id, from.id, EntityDedupMergePolicy::Union, false) .await .unwrap(); - let kept = rt.get_entity(None, into.id).await.unwrap().unwrap(); + let kept = rt.get_entity(&tok, into.id).await.unwrap(); let props = kept.properties.unwrap(); // Scalar conflict: into wins → a=1. b added from from. assert_eq!(props["a"], 1); @@ -1078,10 +1867,12 @@ mod tests { #[tokio::test] async fn merge_entity_unions_tags() { let rt = rt(); + let tok = NamespaceToken::local(); let into = rt .create_entity( - None, + &tok, "concept", + None, "Into", None, None, @@ -1091,8 +1882,9 @@ mod tests { .unwrap(); let from = rt .create_entity( - None, + &tok, "concept", + None, "From", None, None, @@ -1101,11 +1893,17 @@ mod tests { .await .unwrap(); - rt.merge_entity(None, into.id, from.id, MergeStrategy::PreferInto) - .await - .unwrap(); + rt.merge_entity( + &tok, + into.id, + from.id, + EntityDedupMergePolicy::PreferInto, + false, + ) + .await + .unwrap(); - let kept = rt.get_entity(None, into.id).await.unwrap().unwrap(); + let kept = rt.get_entity(&tok, into.id).await.unwrap(); let mut tags = kept.tags.clone(); tags.sort(); assert_eq!(tags, vec!["x", "y", "z"]); @@ -1114,22 +1912,23 @@ mod tests { #[tokio::test] async fn merge_entity_drops_self_loops() { let rt = rt(); + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); // A `extends` B — merging B into A would produce A `extends` A → drop it. - rt.link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + rt.link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); let summary = rt - .merge_entity(None, a.id, b.id, MergeStrategy::PreferInto) + .merge_entity(&tok, a.id, b.id, EntityDedupMergePolicy::PreferInto, false) .await .unwrap(); @@ -1139,7 +1938,7 @@ mod tests { ); let a_out = rt - .neighbors(None, a.id, Direction::Out, None, None) + .neighbors(&tok, a.id, Direction::Out, None, None) .await .unwrap(); assert!(a_out.is_empty(), "no self-loop should remain"); @@ -1163,10 +1962,245 @@ mod tests { fn merge_properties_prefer_into_fills_missing_keys() { let a = serde_json::json!({"a": 1}); let b = serde_json::json!({"a": 99, "b": 2}); - let (merged, added) = merge_properties(&Some(a), &Some(b), MergeStrategy::PreferInto); + let (merged, added) = + merge_properties(&Some(a), &Some(b), EntityDedupMergePolicy::PreferInto); let m = merged.unwrap(); assert_eq!(m["a"], 1); assert_eq!(m["b"], 2); assert_eq!(added, 1); } + + // ---- tombstone and note merge tests ---- + + #[tokio::test] + async fn merge_entity_tombstones_source_with_provenance() { + let rt = rt(); + let tok = NamespaceToken::local(); + let into = rt + .create_entity(&tok, "concept", None, "Into", None, None, vec![]) + .await + .unwrap(); + let from = rt + .create_entity(&tok, "concept", None, "From", None, None, vec![]) + .await + .unwrap(); + let from_id = from.id; + + rt.merge_entity( + &tok, + into.id, + from_id, + EntityDedupMergePolicy::PreferInto, + false, + ) + .await + .unwrap(); + + // After merge, get_entity returns an error (soft-deleted rows are excluded). + assert!( + rt.get_entity(&tok, from_id).await.is_err(), + "tombstoned source should not be returned by get_entity" + ); + + // Verify the source row still exists in SQL with provenance. + let pool = rt.backend().pool_arc(); + let (deleted_at, merged_into): (Option, Option) = + tokio::task::spawn_blocking(move || { + let guard = pool.writer().unwrap(); + guard + .conn() + .query_row( + "SELECT deleted_at, merged_into FROM entities WHERE id = ?1", + [from_id.to_string()], + |row| Ok((row.get(0)?, row.get(1)?)), + ) + .unwrap() + }) + .await + .unwrap(); + assert!( + deleted_at.is_some(), + "tombstoned entity must have deleted_at set" + ); + assert_eq!( + merged_into.as_deref(), + Some(into.id.to_string().as_str()), + "merged_into must point to into_id" + ); + } + + #[tokio::test] + async fn merge_note_same_kind_appends_content() { + let rt = rt(); + let tok = NamespaceToken::local(); + let into = rt + .create_note( + &tok, + "observation", + None, + "Into content", + None, + None, + vec![], + ) + .await + .unwrap(); + let from = rt + .create_note( + &tok, + "observation", + None, + "From content", + None, + None, + vec![], + ) + .await + .unwrap(); + let from_id = from.id; + + let summary = rt + .merge_note( + &tok, + into.id, + from_id, + EntityDedupMergePolicy::PreferInto, + ContentMergeStrategy::Append, + false, + ) + .await + .unwrap(); + + assert_eq!(summary.kept_id, into.id); + assert_eq!(summary.removed_id, from_id); + assert!(summary.content_appended); + assert!(!summary.dry_run); + + // Source is no longer findable. + let from_store = rt.notes(&tok).unwrap(); + assert!( + from_store.get_note(from_id).await.unwrap().is_none(), + "merged-from note should be soft-deleted" + ); + } + + #[tokio::test] + async fn merge_note_different_kinds_rejected() { + let rt = rt(); + let tok = NamespaceToken::local(); + let into = rt + .create_note(&tok, "observation", None, "Into", None, None, vec![]) + .await + .unwrap(); + let from = rt + .create_note(&tok, "decision", None, "From", None, None, vec![]) + .await + .unwrap(); + + let result = rt + .merge_note( + &tok, + into.id, + from.id, + EntityDedupMergePolicy::PreferInto, + ContentMergeStrategy::Append, + false, + ) + .await; + assert!(result.is_err(), "merging different note kinds must fail"); + } + + #[tokio::test] + async fn merge_note_dry_run_leaves_notes_unchanged() { + let rt = rt(); + let tok = NamespaceToken::local(); + let into = rt + .create_note( + &tok, + "observation", + None, + "Into content", + None, + None, + vec![], + ) + .await + .unwrap(); + let from = rt + .create_note( + &tok, + "observation", + None, + "From content", + None, + None, + vec![], + ) + .await + .unwrap(); + let into_id = into.id; + let from_id = from.id; + + let summary = rt + .merge_note( + &tok, + into_id, + from_id, + EntityDedupMergePolicy::PreferInto, + ContentMergeStrategy::Append, + true, + ) + .await + .unwrap(); + + assert!(summary.dry_run); + + // Both notes still exist unchanged. + let store = rt.notes(&tok).unwrap(); + let into_after = store.get_note(into_id).await.unwrap().unwrap(); + let from_after = store.get_note(from_id).await.unwrap().unwrap(); + assert_eq!( + into_after.content, "Into content", + "dry_run must not mutate into-note" + ); + assert_eq!( + from_after.content, "From content", + "dry_run must not mutate from-note" + ); + } + + #[tokio::test] + async fn update_edge_updates_properties() { + use khive_storage::EdgeRelation; + let rt = rt(); + let tok = NamespaceToken::local(); + let a = rt + .create_entity(&tok, "concept", None, "A", None, None, vec![]) + .await + .unwrap(); + let b = rt + .create_entity(&tok, "concept", None, "B", None, None, vec![]) + .await + .unwrap(); + let edge = rt + .link(&tok, a.id, b.id, EdgeRelation::Extends, 0.5, None) + .await + .unwrap(); + let edge_id: Uuid = edge.id.into(); + + let updated = rt + .update_edge( + &tok, + edge_id, + EdgePatch { + properties: Some(serde_json::json!({"source": "manual"})), + ..Default::default() + }, + ) + .await + .unwrap(); + + assert_eq!(updated.metadata.as_ref().unwrap()["source"], "manual"); + assert!((updated.weight - 0.5).abs() < 0.001, "weight unchanged"); + } } diff --git a/crates/khive-runtime/src/error.rs b/crates/khive-runtime/src/error.rs index 5f6e7640..5d5f2cc3 100644 --- a/crates/khive-runtime/src/error.rs +++ b/crates/khive-runtime/src/error.rs @@ -103,6 +103,19 @@ pub enum RuntimeError { second_idx: usize, }, + /// Two packs declared the same `Visibility::Verb` handler name (ADR-017 + /// §Boot-time collision checks). `Visibility::Subhandler` entries are + /// pack-prefixed and do not participate in cross-pack collision checks. + #[error( + "verb collision: verb {verb:?} declared by both pack {first_pack:?} and pack \ + {second_pack:?}; rename one handler or use Visibility::Subhandler for internal verbs" + )] + VerbCollision { + verb: String, + first_pack: String, + second_pack: String, + }, + /// Gate denied this verb invocation (ADR-035). /// /// Returned by `VerbRegistry::dispatch` when the configured `Gate` returns @@ -116,6 +129,70 @@ pub enum RuntimeError { /// `kind`, `code`, `details`, and `retry_hint` without information loss. #[error("{0}")] Khive(khive_types::KhiveError), + + /// Record exists but belongs to a different namespace than the provided token. + /// + /// Externally reported as "not found in this namespace" to avoid leaking + /// cross-namespace existence information (ADR-007 timing-oracle mitigation). + #[error("not found in this namespace")] + NamespaceMismatch { id: uuid::Uuid }, + + /// A short-prefix lookup matched more than one record (ADR-016 §UUID arguments). + /// + /// `prefix` is the 8+ hex-char prefix supplied by the caller. + /// `matches` holds the full UUIDs of all matching records (at most 2 are + /// reported to bound the scan — callers must supply the full UUID to disambiguate). + #[error("ambiguous prefix {prefix:?}: matches {}", format_uuid_list(matches))] + AmbiguousPrefix { + prefix: String, + matches: Vec, + }, + + /// Cross-backend `merge_entity` is unsupported in v1 (ADR-009 §cross-backend-merge). + /// + /// Both entities must reside on the same backend. To merge entities on different + /// backends, manually export `from_id`, delete it, and re-import on `into_id`'s backend. + #[error( + "cross-backend merge is not supported: \ + into_id {into_id} is on backend '{into_backend}', \ + from_id {from_id} is on backend '{from_backend}'. \ + Both entities must be on the same backend to merge." + )] + CrossBackendMergeUnsupported { + into_id: uuid::Uuid, + from_id: uuid::Uuid, + into_backend: String, + from_backend: String, + }, + + // ── ADR-037: Remote Resolution and Content-Hash Verification ───────────── + /// A `kg://` ref names a remote not declared in `schema.yaml`. + #[error("unknown remote: {name:?}")] + UnknownRemote { name: String }, + + /// A remote cache entry is absent and `--fetch` was not requested. + #[error("remote cache missing for remote={remote:?} namespace={namespace:?}")] + RemoteCacheMissing { remote: String, namespace: String }, + + /// A short ID matches multiple entities in the same namespace or remote cache. + #[error("ambiguous id {id:?}: matched {count} records")] + AmbiguousId { id: String, count: usize }, + + /// A write operation targeted a remote namespace, which is read-only. + #[error("cross-namespace write denied: cannot write to remote namespace {namespace:?}")] + CrossNamespaceWrite { namespace: String }, + + /// A remote fetch failed (network error, authentication failure, etc.). + #[error("remote fetch error for remote={remote:?}: {message}")] + RemoteFetchError { remote: String, message: String }, +} + +fn format_uuid_list(uuids: &[uuid::Uuid]) -> String { + let shorts: Vec = uuids + .iter() + .map(|u| u.to_string()[..8].to_string()) + .collect(); + shorts.join(", ") } impl From for RuntimeError { diff --git a/crates/khive-runtime/src/fusion.rs b/crates/khive-runtime/src/fusion.rs index 3a2de5cc..abeb44dd 100644 --- a/crates/khive-runtime/src/fusion.rs +++ b/crates/khive-runtime/src/fusion.rs @@ -14,7 +14,7 @@ use khive_types::SubstrateKind; use crate::error::RuntimeResult; use crate::retrieval::{SearchHit, SearchSource}; -use crate::runtime::KhiveRuntime; +use crate::runtime::{KhiveRuntime, NamespaceToken}; const CANDIDATE_MULTIPLIER: u32 = 4; @@ -60,7 +60,7 @@ impl KhiveRuntime { /// Hybrid search with a caller-supplied fusion strategy. pub async fn hybrid_search_with_strategy( &self, - namespace: Option<&str>, + token: &NamespaceToken, query_text: &str, query_vector: Option>, strategy: FusionStrategy, @@ -68,9 +68,9 @@ impl KhiveRuntime { ) -> RuntimeResult> { let candidates = limit.saturating_mul(CANDIDATE_MULTIPLIER).max(limit); - let ns = self.ns(namespace).to_string(); + let ns = token.namespace().as_str().to_owned(); let text_hits = self - .text(namespace)? + .text(token)? .search(TextSearchRequest { query: query_text.to_string(), mode: TextQueryMode::Plain, @@ -85,7 +85,7 @@ impl KhiveRuntime { let vector_hits = if query_vector.is_some() || self.config().embedding_model.is_some() { self.vector_search( - namespace, + token, query_vector, Some(query_text), candidates, @@ -103,9 +103,9 @@ impl KhiveRuntime { if !fused.is_empty() { let candidate_ids: Vec = fused.iter().map(|h| h.entity_id).collect(); let alive_page = self - .entities(namespace)? + .entities(token)? .query_entities( - self.ns(namespace), + token.namespace().as_str(), EntityFilter { ids: candidate_ids, ..EntityFilter::default() diff --git a/crates/khive-runtime/src/graph_traversal.rs b/crates/khive-runtime/src/graph_traversal.rs index 55fb87df..24145225 100644 --- a/crates/khive-runtime/src/graph_traversal.rs +++ b/crates/khive-runtime/src/graph_traversal.rs @@ -20,7 +20,7 @@ use khive_storage::types::{Direction, Edge, LinkId, NeighborQuery}; use khive_storage::EdgeRelation; use crate::error::{RuntimeError, RuntimeResult}; -use crate::runtime::KhiveRuntime; +use crate::runtime::{KhiveRuntime, NamespaceToken}; /// A node in a traversal path. #[derive(Debug, Clone)] @@ -64,11 +64,11 @@ impl KhiveRuntime { /// Nodes already visited are skipped so the result set is deduplicated. pub async fn bfs_traverse( &self, - namespace: Option<&str>, + token: &NamespaceToken, start: Uuid, options: TraversalOptions, ) -> RuntimeResult> { - let graph = self.graph(namespace)?; + let graph = self.graph(token)?; let limit = options.max_results.unwrap_or(usize::MAX); let mut visited: HashSet = HashSet::new(); @@ -134,7 +134,7 @@ impl KhiveRuntime { /// For `from == to` returns `Some` with a single-node path immediately. pub async fn shortest_path( &self, - namespace: Option<&str>, + token: &NamespaceToken, from: Uuid, to: Uuid, max_depth: usize, @@ -147,7 +147,7 @@ impl KhiveRuntime { }])); } - let graph = self.graph(namespace)?; + let graph = self.graph(token)?; // Forward map: node -> (depth, parent, edge_id that reached this node) let mut fwd: HashMap, Option)> = HashMap::new(); @@ -318,7 +318,7 @@ impl KhiveRuntime { #[cfg(test)] mod tests { use super::*; - use crate::runtime::KhiveRuntime; + use crate::runtime::{KhiveRuntime, NamespaceToken}; use khive_storage::EdgeRelation; async fn rt() -> KhiveRuntime { @@ -328,15 +328,16 @@ mod tests { #[tokio::test] async fn bfs_max_depth_zero_returns_only_root() { let rt = rt().await; + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); - rt.link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + rt.link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); @@ -344,7 +345,7 @@ mod tests { max_depth: 0, ..Default::default() }; - let nodes = rt.bfs_traverse(None, a.id, opts).await.unwrap(); + let nodes = rt.bfs_traverse(&tok, a.id, opts).await.unwrap(); assert_eq!(nodes.len(), 1); assert_eq!(nodes[0].entity_id, a.id); @@ -355,30 +356,31 @@ mod tests { #[tokio::test] async fn bfs_depth_one_returns_root_and_neighbors() { let rt = rt().await; + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); let c = rt - .create_entity(None, "concept", "C", None, None, vec![]) + .create_entity(&tok, "concept", None, "C", None, None, vec![]) .await .unwrap(); - rt.link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + rt.link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); - rt.link(None, a.id, c.id, EdgeRelation::Extends, 1.0) + rt.link(&tok, a.id, c.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); // Add a node two hops away — it must NOT appear. let d = rt - .create_entity(None, "concept", "D", None, None, vec![]) + .create_entity(&tok, "concept", None, "D", None, None, vec![]) .await .unwrap(); - rt.link(None, b.id, d.id, EdgeRelation::Extends, 1.0) + rt.link(&tok, b.id, d.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); @@ -386,7 +388,7 @@ mod tests { max_depth: 1, ..Default::default() }; - let nodes = rt.bfs_traverse(None, a.id, opts).await.unwrap(); + let nodes = rt.bfs_traverse(&tok, a.id, opts).await.unwrap(); let ids: HashSet = nodes.iter().map(|n| n.entity_id).collect(); assert!(ids.contains(&a.id)); @@ -404,16 +406,17 @@ mod tests { #[tokio::test] async fn bfs_direction_out_only() { let rt = rt().await; + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); // Edge goes B -> A; traversing Out from A should find nothing. - rt.link(None, b.id, a.id, EdgeRelation::Extends, 1.0) + rt.link(&tok, b.id, a.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); @@ -422,7 +425,7 @@ mod tests { direction: Direction::Out, ..Default::default() }; - let nodes = rt.bfs_traverse(None, a.id, opts).await.unwrap(); + let nodes = rt.bfs_traverse(&tok, a.id, opts).await.unwrap(); assert_eq!( nodes.len(), 1, @@ -433,16 +436,17 @@ mod tests { #[tokio::test] async fn bfs_direction_in_only() { let rt = rt().await; + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); // Edge goes B -> A; traversing In from A should find B. - rt.link(None, b.id, a.id, EdgeRelation::Extends, 1.0) + rt.link(&tok, b.id, a.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); @@ -451,7 +455,7 @@ mod tests { direction: Direction::In, ..Default::default() }; - let nodes = rt.bfs_traverse(None, a.id, opts).await.unwrap(); + let nodes = rt.bfs_traverse(&tok, a.id, opts).await.unwrap(); let ids: HashSet = nodes.iter().map(|n| n.entity_id).collect(); assert!( ids.contains(&b.id), @@ -462,22 +466,23 @@ mod tests { #[tokio::test] async fn bfs_relation_filter() { let rt = rt().await; + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); let c = rt - .create_entity(None, "concept", "C", None, None, vec![]) + .create_entity(&tok, "concept", None, "C", None, None, vec![]) .await .unwrap(); - rt.link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + rt.link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); - rt.link(None, a.id, c.id, EdgeRelation::DependsOn, 1.0) + rt.link(&tok, a.id, c.id, EdgeRelation::Enables, 1.0, None) .await .unwrap(); @@ -486,7 +491,7 @@ mod tests { relations: Some(vec![EdgeRelation::Extends]), ..Default::default() }; - let nodes = rt.bfs_traverse(None, a.id, opts).await.unwrap(); + let nodes = rt.bfs_traverse(&tok, a.id, opts).await.unwrap(); let ids: HashSet = nodes.iter().map(|n| n.entity_id).collect(); assert!(ids.contains(&b.id), "B reachable via 'extends'"); assert!( @@ -498,26 +503,27 @@ mod tests { #[tokio::test] async fn shortest_path_connected_nodes() { let rt = rt().await; + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); let c = rt - .create_entity(None, "concept", "C", None, None, vec![]) + .create_entity(&tok, "concept", None, "C", None, None, vec![]) .await .unwrap(); - rt.link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + rt.link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); - rt.link(None, b.id, c.id, EdgeRelation::Extends, 1.0) + rt.link(&tok, b.id, c.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); - let path = rt.shortest_path(None, a.id, c.id, 10).await.unwrap(); + let path = rt.shortest_path(&tok, a.id, c.id, 10).await.unwrap(); let path = path.expect("path should exist"); assert_eq!(path.len(), 3, "A -> B -> C = 3 nodes"); assert_eq!(path[0].entity_id, a.id); @@ -527,29 +533,31 @@ mod tests { #[tokio::test] async fn shortest_path_unreachable_returns_none() { let rt = rt().await; + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); // No edges between them. - let path = rt.shortest_path(None, a.id, b.id, 5).await.unwrap(); + let path = rt.shortest_path(&tok, a.id, b.id, 5).await.unwrap(); assert!(path.is_none()); } #[tokio::test] async fn shortest_path_same_node() { let rt = rt().await; + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); - let path = rt.shortest_path(None, a.id, a.id, 5).await.unwrap(); + let path = rt.shortest_path(&tok, a.id, a.id, 5).await.unwrap(); let path = path.expect("trivial path should always exist"); assert_eq!(path.len(), 1); assert_eq!(path[0].entity_id, a.id); @@ -559,20 +567,21 @@ mod tests { #[tokio::test] async fn shortest_path_max_depth_zero_adjacent() { let rt = rt().await; + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); - rt.link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + rt.link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); // max_depth=0 means only the trivial from==to case succeeds. - let path = rt.shortest_path(None, a.id, b.id, 0).await.unwrap(); + let path = rt.shortest_path(&tok, a.id, b.id, 0).await.unwrap(); assert!( path.is_none(), "1-hop path should not be returned at max_depth=0" @@ -582,33 +591,34 @@ mod tests { #[tokio::test] async fn shortest_path_max_depth_one_two_hop_chain() { let rt = rt().await; + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); let c = rt - .create_entity(None, "concept", "C", None, None, vec![]) + .create_entity(&tok, "concept", None, "C", None, None, vec![]) .await .unwrap(); - rt.link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + rt.link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); - rt.link(None, b.id, c.id, EdgeRelation::Extends, 1.0) + rt.link(&tok, b.id, c.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); // max_depth=1 should find A->B but not A->B->C. - let one_hop = rt.shortest_path(None, a.id, b.id, 1).await.unwrap(); + let one_hop = rt.shortest_path(&tok, a.id, b.id, 1).await.unwrap(); assert!( one_hop.is_some(), "1-hop path should be found at max_depth=1" ); - let two_hop = rt.shortest_path(None, a.id, c.id, 1).await.unwrap(); + let two_hop = rt.shortest_path(&tok, a.id, c.id, 1).await.unwrap(); assert!( two_hop.is_none(), "2-hop path should not be returned at max_depth=1" diff --git a/crates/khive-runtime/src/lib.rs b/crates/khive-runtime/src/lib.rs index 7857a22b..385685a4 100644 --- a/crates/khive-runtime/src/lib.rs +++ b/crates/khive-runtime/src/lib.rs @@ -6,18 +6,17 @@ //! //! ```ignore //! use khive_runtime::{KhiveRuntime, RuntimeConfig}; +//! use khive_types::Namespace; //! //! // In-memory for tests: //! let rt = KhiveRuntime::memory()?; -//! -//! // Default (production): reads ~/.khive/khive-graph.db -//! let rt = KhiveRuntime::new(RuntimeConfig::default())?; +//! let tok = rt.authorize(Namespace::local()); //! //! // Create an entity: -//! let entity = rt.create_entity(None, "concept", "LoRA", None, None, vec![]).await?; +//! let entity = rt.create_entity(&tok, "concept", None, "LoRA", None, None, vec![]).await?; //! -//! // Link two entities (EdgeRelation is the typed relation): -//! let edge = rt.link(None, entity.id, other_id, EdgeRelation::Extends, 1.0).await?; +//! // Link two entities: +//! let edge = rt.link(&tok, entity.id, other_id, EdgeRelation::Extends, 1.0, None).await?; //! ``` pub mod curation; @@ -28,11 +27,16 @@ pub mod objectives; pub mod operations; pub mod pack; pub mod portability; +pub mod presentation; pub mod registry; pub mod retrieval; pub mod runtime; +pub mod validation; -pub use curation::{EdgeListFilter, EntityPatch, MergeStrategy, MergeSummary}; +pub use curation::{ + ContentMergeStrategy, EdgeListFilter, EdgePatch, EntityDedupMergePolicy, EntityPatch, + MergeSummary, NotePatch, +}; pub use error::{RuntimeError, RuntimeResult}; pub use fusion::FusionStrategy; pub use graph_traversal::{PathNode, TraversalOptions}; @@ -40,16 +44,24 @@ pub use khive_gate::{ ActorRef, AllowAllGate, AuditDecision, AuditEvent, Gate, GateContext, GateDecision, GateError, GateRef, GateRequest, Obligation, }; +pub use khive_storage::{EventObservation, EventView, ObservationRole, ReferentKind}; +pub use khive_types::namespace::Namespace; pub use objectives::{ - GraphProximityObjective, RetrievalCandidate, RrfFusionObjective, TextRelevanceObjective, + DecayAwareImportanceObjective, GraphProximityObjective, NoteCandidate, RerankerObjective, + RetrievalCandidate, RrfFusionObjective, TemporalRecencyObjective, TextRelevanceObjective, VectorSimilarityObjective, }; -pub use operations::{NoteSearchHit, QueryResult, Resolved}; +pub use operations::{LinkSpec, NoteSearchHit, QueryResult, Resolved}; pub use pack::{ - DispatchHook, KindHook, PackFactory, PackRegistration, PackRegistry, PackRuntime, VerbRegistry, - VerbRegistryBuilder, + DispatchHook, KindHook, NoteKindSpec, NoteLifecycleSpec, PackFactory, PackRegistration, + PackRegistry, PackRuntime, PackSchemaPlan, SchemaPlan, VerbRegistry, VerbRegistryBuilder, }; pub use portability::{ImportSummary, KgArchive}; +pub use presentation::{present, PresentationMode}; pub use registry::{ObjectiveRegistry, RegisteredObjective}; pub use retrieval::{SearchHit, SearchSource}; -pub use runtime::{parse_pack_list, KhiveRuntime, RuntimeConfig}; +pub use runtime::{parse_pack_list, BackendId, KhiveRuntime, NamespaceToken, RuntimeConfig}; +pub use validation::{ + GraphPatch, GraphSnapshot, RuleFn, RuleId, Severity, ValidationContext, ValidationReport, + ValidationRule, Violation, +}; diff --git a/crates/khive-runtime/src/objectives.rs b/crates/khive-runtime/src/objectives.rs index bec33510..220b539b 100644 --- a/crates/khive-runtime/src/objectives.rs +++ b/crates/khive-runtime/src/objectives.rs @@ -5,6 +5,10 @@ //! and feeds it in via the candidate struct. //! //! See ADR-061 — Retrieval Infrastructure. +//! See ADR-033 — Recall Pipeline (NoteCandidate, DecayAwareImportanceObjective, +//! TemporalRecencyObjective, RerankerObjective). + +use std::collections::HashMap; use uuid::Uuid; @@ -114,6 +118,9 @@ impl Objective for GraphProximityObjective { /// Scores a candidate by its pre-computed RRF fusion score. /// /// Returns `rrf_score` unchanged, or 0.0 when the field is absent. +/// Implements `Objective` for both `RetrievalCandidate` and `NoteCandidate` +/// so the same objective can be used in the general retrieval pipeline +/// and the memory recall pipeline (ADR-033 §4). pub struct RrfFusionObjective; impl Objective for RrfFusionObjective { @@ -127,6 +134,171 @@ impl Objective for RrfFusionObjective { } } +impl Objective for RrfFusionObjective { + #[inline] + fn score(&self, candidate: &NoteCandidate, _context: &ObjectiveContext) -> f64 { + candidate.rrf_score.unwrap_or(0.0) + } + + fn name(&self) -> &str { + "RrfFusionObjective" + } +} + +// ── Memory-Recall Objectives (ADR-033 §4) ──────────────────────────────────── + +/// Pre-computed signals for a single memory note candidate. +/// +/// Used by the recall pipeline's `ComposePipeline` to score and rank candidates +/// via `DecayAwareImportanceObjective`, `TemporalRecencyObjective`, and +/// `RerankerObjective` without any IO. The runtime layer populates this struct +/// from stored notes before handing the slice to the pipeline. +/// +/// See ADR-033 §4. +#[derive(Debug, Clone)] +pub struct NoteCandidate { + /// Stable note UUID. + pub id: Uuid, + /// Pre-fused RRF score from the retrieval stage (0.0–1.0). + pub rrf_score: Option, + /// Raw salience stored on the note (0.0–1.0). + pub salience: f64, + /// Per-note exponential decay rate (>= 0.0). + pub decay_factor: f64, + /// Age of the note in days at query time. + pub age_days: f64, + /// Per-reranker scores populated by the rerank stage. + /// Keyed by reranker name (e.g. "cross_encoder", "salience", "graph_proximity"). + pub rerank_scores: HashMap, +} + +impl HasId for NoteCandidate { + #[inline] + fn id(&self) -> Uuid { + self.id + } +} + +// ── DecayAwareImportanceObjective ──────────────────────────────────────────── + +/// Scores a `NoteCandidate` by salience with configurable temporal decay. +/// +/// ADR-021 §5 / ADR-033 §4. The decay formula is determined by the configured +/// `DecayModel` (injected at construction time). The default `DecayModel::Exponential` +/// uses the note's own `decay_factor`: `salience * exp(-decay_factor * age_days)`. +/// +/// This objective participates in `WeightedObjective` composition alongside +/// `RrfFusionObjective` and `TemporalRecencyObjective` to form the full recall +/// scoring pipeline. +pub struct DecayAwareImportanceObjective { + /// Exponential decay rate k (>= 0.0). Score = `salience * exp(-k * age_days)`. + /// Corresponds to ADR-021's per-note `decay_factor` parameter. + pub decay_rate: f64, +} + +impl DecayAwareImportanceObjective { + /// Create a new objective with the given exponential decay rate. + /// + /// `decay_rate = 0.01` gives a ~69-day half-life (the ADR-021 default for memory notes). + pub fn new(decay_rate: f64) -> Self { + Self { decay_rate } + } + + /// Default memory decay rate from ADR-021: 0.01 (~69-day half-life). + pub fn default_memory() -> Self { + Self::new(0.01) + } +} + +impl Objective for DecayAwareImportanceObjective { + #[inline] + fn score(&self, candidate: &NoteCandidate, _context: &ObjectiveContext) -> f64 { + // ADR-021 §5 / ADR-033 §4: + // effective_importance = salience * exp(-decay_factor * age_days) + candidate.salience * (-candidate.decay_factor * candidate.age_days).exp() + } + + fn name(&self) -> &str { + "DecayAwareImportanceObjective" + } +} + +// ── TemporalRecencyObjective ───────────────────────────────────────────────── + +/// Scores a `NoteCandidate` by pure temporal recency with a configurable half-life. +/// +/// Formula: `exp(-ln(2) / half_life_days * age_days)` +/// +/// At `age_days = 0` → score 1.0 (brand new note). +/// At `age_days = half_life_days` → score 0.5. +/// +/// Complements `DecayAwareImportanceObjective`: this signal rewards freshness +/// independently of the note's own decay rate. +pub struct TemporalRecencyObjective { + /// Number of days for the recency score to halve. Must be > 0. + pub half_life_days: f64, +} + +impl TemporalRecencyObjective { + /// Create with the ADR-021 default temporal half-life of 30 days. + pub fn default_memory() -> Self { + Self { + half_life_days: 30.0, + } + } +} + +impl Objective for TemporalRecencyObjective { + #[inline] + fn score(&self, candidate: &NoteCandidate, _context: &ObjectiveContext) -> f64 { + let k = std::f64::consts::LN_2 / self.half_life_days.max(f64::EPSILON); + (-k * candidate.age_days).exp() + } + + fn name(&self) -> &str { + "TemporalRecencyObjective" + } +} + +// ── RerankerObjective ──────────────────────────────────────────────────────── + +/// Scores a `NoteCandidate` using a named reranker's pre-computed score. +/// +/// Looks up `candidate.rerank_scores[reranker_name]`. Returns 0.0 when the +/// reranker was not run (key absent) — callers should gate on +/// `RecallConfig.reranker_weights[name] > 0.0` before including this objective +/// in a `WeightedObjective` composition. +/// +/// See ADR-033 §4 and ADR-042 §7 for the reranker integration protocol. +pub struct RerankerObjective { + /// Name of the reranker to look up in `candidate.rerank_scores`. + pub reranker_name: String, +} + +impl RerankerObjective { + /// Create a new objective for the named reranker. + pub fn new(name: impl Into) -> Self { + Self { + reranker_name: name.into(), + } + } +} + +impl Objective for RerankerObjective { + #[inline] + fn score(&self, candidate: &NoteCandidate, _context: &ObjectiveContext) -> f64 { + candidate + .rerank_scores + .get(&self.reranker_name) + .copied() + .unwrap_or(0.0) + } + + fn name(&self) -> &str { + "RerankerObjective" + } +} + // ──────────────────────────────────────────────────────────────────────────── #[cfg(test)] @@ -155,6 +327,22 @@ mod tests { } } + fn note_candidate( + rrf: Option, + salience: f64, + decay_factor: f64, + age_days: f64, + ) -> NoteCandidate { + NoteCandidate { + id: Uuid::new_v4(), + rrf_score: rrf, + salience, + decay_factor, + age_days, + rerank_scores: HashMap::new(), + } + } + // ── VectorSimilarityObjective ──────────────────────────────────────── #[test] @@ -332,4 +520,157 @@ mod tests { assert!((top[0].score - 0.9).abs() < 1e-12); assert!((top[1].score - 0.6).abs() < 1e-12); } + + // ── NoteCandidate: HasId ───────────────────────────────────────────── + + #[test] + fn note_candidate_has_id_returns_uuid() { + let id = Uuid::new_v4(); + let c = NoteCandidate { + id, + rrf_score: None, + salience: 0.5, + decay_factor: 0.01, + age_days: 0.0, + rerank_scores: HashMap::new(), + }; + assert_eq!(c.id(), id); + } + + // ── DecayAwareImportanceObjective ──────────────────────────────────── + + #[test] + fn decay_aware_zero_age_returns_full_salience() { + let obj = DecayAwareImportanceObjective::new(0.01); + let c = note_candidate(None, 0.8, 0.01, 0.0); + let score = obj.score(&c, &ctx()); + assert!((score - 0.8).abs() < 1e-12, "got {score}"); + } + + #[test] + fn decay_aware_uses_note_decay_factor_not_field() { + // ADR-021 §5: uses the note's own decay_factor, not the objective's + let obj = DecayAwareImportanceObjective::new(0.99); // obj.decay_rate ignored + // Note's decay_factor = 0.01, age=100 days → exp(-0.01*100) ≈ 0.368 + let c = note_candidate(None, 1.0, 0.01, 100.0); + let score = obj.score(&c, &ctx()); + let expected = (-0.01_f64 * 100.0).exp(); + assert!( + (score - expected).abs() < 1e-12, + "got {score}, expected {expected}" + ); + } + + #[test] + fn decay_aware_high_decay_reduces_score_faster() { + // High decay note should score lower at same age + let obj = DecayAwareImportanceObjective::new(0.0); + let slow = note_candidate(None, 1.0, 0.001, 100.0); + let fast = note_candidate(None, 1.0, 0.1, 100.0); + let score_slow = obj.score(&slow, &ctx()); + let score_fast = obj.score(&fast, &ctx()); + assert!( + score_slow > score_fast, + "slow decay should score higher: {score_slow} vs {score_fast}" + ); + } + + // ── TemporalRecencyObjective ───────────────────────────────────────── + + #[test] + fn temporal_score_one_at_zero_age() { + let obj = TemporalRecencyObjective { + half_life_days: 30.0, + }; + let c = note_candidate(None, 0.5, 0.01, 0.0); + let score = obj.score(&c, &ctx()); + assert!((score - 1.0).abs() < 1e-12, "got {score}"); + } + + #[test] + fn temporal_score_half_at_half_life() { + let half_life = 30.0; + let obj = TemporalRecencyObjective { + half_life_days: half_life, + }; + let c = note_candidate(None, 0.5, 0.01, half_life); + let score = obj.score(&c, &ctx()); + assert!( + (score - 0.5).abs() < 1e-10, + "expected 0.5 at half_life, got {score}" + ); + } + + #[test] + fn temporal_score_decreases_with_age() { + let obj = TemporalRecencyObjective { + half_life_days: 30.0, + }; + let young = note_candidate(None, 1.0, 0.01, 10.0); + let old = note_candidate(None, 1.0, 0.01, 100.0); + let score_young = obj.score(&young, &ctx()); + let score_old = obj.score(&old, &ctx()); + assert!( + score_young > score_old, + "younger note should score higher: {score_young} vs {score_old}" + ); + } + + // ── RerankerObjective ──────────────────────────────────────────────── + + #[test] + fn reranker_returns_named_score() { + let mut c = note_candidate(None, 0.5, 0.01, 0.0); + c.rerank_scores.insert("cross_encoder".to_string(), 0.9); + let obj = RerankerObjective::new("cross_encoder"); + let score = obj.score(&c, &ctx()); + assert!((score - 0.9).abs() < 1e-12, "got {score}"); + } + + #[test] + fn reranker_absent_key_returns_zero() { + let c = note_candidate(None, 0.5, 0.01, 0.0); + let obj = RerankerObjective::new("cross_encoder"); + let score = obj.score(&c, &ctx()); + assert_eq!(score, 0.0); + } + + #[test] + fn reranker_different_keys_independent() { + let mut c = note_candidate(None, 0.5, 0.01, 0.0); + c.rerank_scores.insert("salience".to_string(), 0.7); + let obj_ce = RerankerObjective::new("cross_encoder"); + let obj_sal = RerankerObjective::new("salience"); + assert_eq!(obj_ce.score(&c, &ctx()), 0.0); + assert!((obj_sal.score(&c, &ctx()) - 0.7).abs() < 1e-12); + } + + // ── Weighted composition of memory objectives ──────────────────────── + + #[test] + fn memory_pipeline_weighted_composition() { + // Reproduce ADR-021 §5 formula via WeightedObjective: + // score = rrf * 0.70 + importance_decayed * 0.20 + temporal * 0.10 + // At age=0: importance_decayed = salience, temporal = 1.0 + let c = NoteCandidate { + id: Uuid::new_v4(), + rrf_score: Some(0.5), + salience: 0.8, + decay_factor: 0.01, + age_days: 0.0, + rerank_scores: HashMap::new(), + }; + let pipeline = WeightedObjective::::new() + .add(Box::new(RrfFusionObjective), 0.70) + .add(Box::new(DecayAwareImportanceObjective::new(0.0)), 0.20) + .add( + Box::new(TemporalRecencyObjective { + half_life_days: 30.0, + }), + 0.10, + ); + let score = pipeline.score(&c, &ctx()); + // (0.7*0.5 + 0.2*0.8 + 0.1*1.0) / 1.0 = 0.35 + 0.16 + 0.10 = 0.61 + assert!((score - 0.61).abs() < 1e-10, "got {score}"); + } } diff --git a/crates/khive-runtime/src/operations.rs b/crates/khive-runtime/src/operations.rs index 84c75e2d..a5abb6bb 100644 --- a/crates/khive-runtime/src/operations.rs +++ b/crates/khive-runtime/src/operations.rs @@ -14,10 +14,10 @@ use khive_storage::types::{ TextSearchRequest, TraversalRequest, }; use khive_storage::{Edge, EdgeRelation, Entity, EntityFilter, Event, EventFilter}; -use khive_types::{EdgeEndpointRule, EndpointKind, SubstrateKind}; +use khive_types::{EdgeEndpointRule, EndpointKind, EventKind, SubstrateKind}; use crate::error::{RuntimeError, RuntimeResult}; -use crate::runtime::KhiveRuntime; +use crate::runtime::{KhiveRuntime, NamespaceToken}; // Test-only failure injection for `create_note_inner`. // @@ -49,6 +49,31 @@ fn text_preview(text: &str, max_chars: usize) -> Option { } } +/// ADR-002: symmetric relations (`competes_with`, `composed_with`) are stored +/// with a canonical source (lower UUID wins), so a directed `Out` or `In` query +/// may miss results. When the relations filter is non-empty and contains **only** +/// symmetric relations, override direction to `Both` so callers always see all +/// edges for these relations regardless of storage canonicalization. +fn normalize_symmetric_direction( + direction: Direction, + relations: Option<&[EdgeRelation]>, +) -> Direction { + let Some(rels) = relations else { + return direction; + }; + if rels.is_empty() { + return direction; + } + let all_symmetric = rels + .iter() + .all(|r| matches!(r, EdgeRelation::CompetesWith | EdgeRelation::ComposedWith)); + if all_symmetric { + Direction::Both + } else { + direction + } +} + fn note_title(note: &Note) -> Option { note.name .clone() @@ -107,21 +132,181 @@ fn pack_rule_allows( }) } +/// ADR-002 base endpoint allowlist for entity→entity relations. +/// +/// Returns `true` if `(src_kind, relation, tgt_kind)` is an explicitly listed +/// triple in the ADR-002 base contract. `"*"` as `src_kind` means "any entity +/// kind" (used for `instance_of` whose source is unrestricted). +/// +/// Pack rules (via `EDGE_RULES`) are additive — they cannot remove rows here. +fn base_entity_rule_allows(src_kind: &str, relation: EdgeRelation, tgt_kind: &str) -> bool { + const RULES: &[(&str, EdgeRelation, &str)] = &[ + // Structure + ("concept", EdgeRelation::Contains, "concept"), + ("project", EdgeRelation::Contains, "project"), + ("project", EdgeRelation::Contains, "artifact"), + ("org", EdgeRelation::Contains, "project"), + ("org", EdgeRelation::Contains, "service"), + ("concept", EdgeRelation::PartOf, "concept"), + ("project", EdgeRelation::PartOf, "project"), + ("project", EdgeRelation::PartOf, "org"), + ("*", EdgeRelation::InstanceOf, "concept"), + ("service", EdgeRelation::InstanceOf, "project"), + // Derivation + ("concept", EdgeRelation::Extends, "concept"), + ("concept", EdgeRelation::VariantOf, "concept"), + ("artifact", EdgeRelation::VariantOf, "artifact"), + ("concept", EdgeRelation::IntroducedBy, "document"), + ("concept", EdgeRelation::IntroducedBy, "person"), + ("artifact", EdgeRelation::IntroducedBy, "document"), + // Provenance + ("artifact", EdgeRelation::DerivedFrom, "dataset"), + ("artifact", EdgeRelation::DerivedFrom, "document"), + ("artifact", EdgeRelation::DerivedFrom, "project"), + ("artifact", EdgeRelation::DerivedFrom, "artifact"), + // Temporal + ("document", EdgeRelation::Precedes, "document"), + ("dataset", EdgeRelation::Precedes, "dataset"), + ("artifact", EdgeRelation::Precedes, "artifact"), + ("service", EdgeRelation::Precedes, "service"), + ("project", EdgeRelation::Precedes, "project"), + // Dependency + ("project", EdgeRelation::DependsOn, "project"), + ("service", EdgeRelation::DependsOn, "project"), + ("service", EdgeRelation::DependsOn, "service"), + ("service", EdgeRelation::DependsOn, "artifact"), + ("service", EdgeRelation::DependsOn, "dataset"), + ("artifact", EdgeRelation::DependsOn, "project"), + ("artifact", EdgeRelation::DependsOn, "service"), + ("concept", EdgeRelation::Enables, "concept"), + ("service", EdgeRelation::Enables, "concept"), + ("dataset", EdgeRelation::Enables, "concept"), + // Implementation + ("project", EdgeRelation::Implements, "concept"), + ("service", EdgeRelation::Implements, "concept"), + // Lateral + ("concept", EdgeRelation::CompetesWith, "concept"), + ("project", EdgeRelation::CompetesWith, "project"), + ("service", EdgeRelation::CompetesWith, "service"), + ("concept", EdgeRelation::ComposedWith, "concept"), + ("project", EdgeRelation::ComposedWith, "project"), + // Versioning (Supersedes — ADR-002:190-194: Concept/Document/Artifact/Service/Dataset only) + ("concept", EdgeRelation::Supersedes, "concept"), + ("document", EdgeRelation::Supersedes, "document"), + ("artifact", EdgeRelation::Supersedes, "artifact"), + ("service", EdgeRelation::Supersedes, "service"), + ("dataset", EdgeRelation::Supersedes, "dataset"), + ]; + RULES.iter().any(|(src, rel, tgt)| { + *rel == relation && (*src == "*" || *src == src_kind) && *tgt == tgt_kind + }) +} + +/// Canonical endpoint order for symmetric relations (F012). +/// +/// For `competes_with` and `composed_with`, normalises direction so that +/// `source_uuid < target_uuid` (lexicographic on the UUID bytes). This +/// collapses A→B and B→A into a single canonical row, preventing duplicates. +fn canonical_edge_endpoints( + relation: EdgeRelation, + source_id: Uuid, + target_id: Uuid, +) -> (Uuid, Uuid) { + if relation.is_symmetric() && target_id < source_id { + (target_id, source_id) + } else { + (source_id, target_id) + } +} + +/// Infer the default `dependency_kind` from endpoint entity kinds (ADR-002). +fn infer_dependency_kind(src_kind: &str, tgt_kind: &str) -> Option<&'static str> { + match (src_kind, tgt_kind) { + ("project", "project") => Some("build"), + ("service", "service") => Some("runtime"), + ("service", "dataset") => Some("data"), + ("service", "artifact") => Some("artifact"), + ("artifact", "project") | ("artifact", "service") => Some("tooling"), + _ => None, + } +} + +/// Merge an inferred `dependency_kind` into `depends_on` edge metadata. +/// +/// If `metadata` already carries a `dependency_kind` key the existing value is +/// preserved. If the key is absent and the endpoint pair has a known default, +/// the inferred value is added. Returns `metadata` unchanged for all other +/// cases (no matching default, or metadata already has the key). +fn merge_dependency_kind( + src_kind: &str, + tgt_kind: &str, + metadata: Option, +) -> Option { + if let Some(ref m) = metadata { + if m.get("dependency_kind").is_some() { + return metadata; + } + } + let inferred = infer_dependency_kind(src_kind, tgt_kind)?; + let mut obj = metadata.unwrap_or_else(|| serde_json::json!({})); + if let Some(o) = obj.as_object_mut() { + o.insert("dependency_kind".to_string(), serde_json::json!(inferred)); + } + Some(obj) +} + +/// Valid `dependency_kind` values for `depends_on` edges (ADR-002). +const VALID_DEPENDENCY_KINDS: &[&str] = &["build", "runtime", "data", "artifact", "tooling"]; + +/// Validate governed edge metadata keys (ADR-002 §Edge Metadata). +/// +/// Currently enforces: +/// - `dependency_kind` is only valid on `depends_on` edges. +/// - `dependency_kind`, when present, must be one of the five governed values. +fn validate_edge_metadata( + relation: EdgeRelation, + metadata: Option<&serde_json::Value>, +) -> RuntimeResult<()> { + let Some(meta) = metadata else { + return Ok(()); + }; + if let Some(dk) = meta.get("dependency_kind") { + if relation != EdgeRelation::DependsOn { + return Err(RuntimeError::InvalidInput(format!( + "dependency_kind is only valid on depends_on edges (got {})", + relation.as_str() + ))); + } + let dk_str = dk + .as_str() + .ok_or_else(|| RuntimeError::InvalidInput("dependency_kind must be a string".into()))?; + if !VALID_DEPENDENCY_KINDS.contains(&dk_str) { + return Err(RuntimeError::InvalidInput(format!( + "unknown dependency_kind {dk_str:?}; valid: {}", + VALID_DEPENDENCY_KINDS.join(" | ") + ))); + } + } + Ok(()) +} + impl KhiveRuntime { // ---- Entity operations ---- /// Create and persist a new entity. + #[allow(clippy::too_many_arguments)] pub async fn create_entity( &self, - namespace: Option<&str>, + token: &NamespaceToken, kind: &str, + entity_type: Option<&str>, name: &str, description: Option<&str>, properties: Option, tags: Vec, ) -> RuntimeResult { - let ns = self.ns(namespace); - let mut entity = Entity::new(ns, kind, name); + let ns = token.namespace().as_str(); + let mut entity = Entity::new(ns, kind, name).with_entity_type(entity_type); if let Some(d) = description { entity = entity.with_description(d); } @@ -131,15 +316,13 @@ impl KhiveRuntime { if !tags.is_empty() { entity = entity.with_tags(tags); } - self.entities(Some(ns))? - .upsert_entity(entity.clone()) - .await?; + self.entities(token)?.upsert_entity(entity.clone()).await?; let body = match &entity.description { Some(d) if !d.is_empty() => format!("{} {}", entity.name, d), _ => entity.name.clone(), }; - self.text(namespace)? + self.text(token)? .upsert_document(TextDocument { subject_id: entity.id, kind: SubstrateKind::Entity, @@ -154,38 +337,56 @@ impl KhiveRuntime { if self.config().embedding_model.is_some() { let vector = self.embed(&body).await?; - self.vectors(namespace)? - .insert(entity.id, SubstrateKind::Entity, ns, vector) + self.vectors(token)? + .insert( + entity.id, + SubstrateKind::Entity, + ns, + "entity.body", + vec![vector], + ) .await?; } Ok(entity) } - /// Retrieve an entity by ID. + /// Retrieve an entity by ID, enforcing namespace isolation (ADR-007). /// - /// Returns `None` if the entity does not exist or belongs to a different namespace. - /// This enforces ADR-007 namespace isolation at the runtime layer. - pub async fn get_entity( + /// Returns `Err(NotFound)` if the entity does not exist in storage, + /// or `Err(NamespaceMismatch)` if it exists in a different namespace. + pub async fn get_entity(&self, token: &NamespaceToken, id: Uuid) -> RuntimeResult { + let entity = self + .entities(token)? + .get_entity(id) + .await? + .ok_or_else(|| RuntimeError::NotFound("not found in this namespace".into()))?; + self.ensure_namespace(&entity.namespace, token, id)?; + Ok(entity) + } + + /// Enforce that `actual` matches the token's namespace. + /// + /// Returns `Err(NamespaceMismatch { id })` when they differ, preserving ADR-007 + /// timing-oracle mitigation (the external message is "not found in this namespace"). + pub(crate) fn ensure_namespace( &self, - namespace: Option<&str>, + actual: &str, + token: &NamespaceToken, id: Uuid, - ) -> RuntimeResult> { - let entity = match self.entities(namespace)?.get_entity(id).await? { - Some(e) => e, - None => return Ok(None), - }; - if entity.namespace != self.ns(namespace) { - return Ok(None); + ) -> RuntimeResult<()> { + if actual == token.namespace().as_str() { + return Ok(()); } - Ok(Some(entity)) + Err(RuntimeError::NamespaceMismatch { id }) } - /// List entities in a namespace, optionally filtered by kind. + /// List entities in a namespace, optionally filtered by kind and entity_type. pub async fn list_entities( &self, - namespace: Option<&str>, + token: &NamespaceToken, kind: Option<&str>, + entity_type: Option<&str>, limit: u32, offset: u32, ) -> RuntimeResult> { @@ -194,12 +395,16 @@ impl KhiveRuntime { Some(k) => vec![k.to_string()], None => vec![], }, + entity_types: match entity_type { + Some(t) => vec![t.to_string()], + None => vec![], + }, ..Default::default() }; let page = self - .entities(namespace)? + .entities(token)? .query_entities( - self.ns(namespace), + token.namespace().as_str(), filter, PageRequest { offset: offset.into(), @@ -210,26 +415,17 @@ impl KhiveRuntime { Ok(page.items) } - /// List events in a namespace, optionally filtered. + /// List events in the namespace proven by the caller token. pub async fn list_events( &self, - namespace: Option<&str>, + token: &NamespaceToken, filter: EventFilter, - limit: u32, - offset: u32, + page: PageRequest, ) -> RuntimeResult> { - let limit = limit.clamp(1, 1000); - let page = self - .events(namespace)? - .query_events( - filter, - PageRequest { - offset: offset.into(), - limit, - }, - ) - .await?; - Ok(page) + self.events(token)? + .query_events(filter, page) + .await + .map_err(Into::into) } // ---- Edge operations ---- @@ -247,14 +443,14 @@ impl KhiveRuntime { /// the same messages as the previous inline block (byte-identical behaviour). async fn validate_edge_relation_endpoints( &self, - namespace: Option<&str>, + token: &NamespaceToken, source_id: Uuid, target_id: Uuid, relation: EdgeRelation, ) -> RuntimeResult<()> { if relation == EdgeRelation::Annotates { // Source must be a note in namespace. - match self.resolve(namespace, source_id).await? { + match self.resolve(token, source_id).await? { Some(Resolved::Note(_)) => {} Some(_) => { return Err(RuntimeError::InvalidInput(format!( @@ -263,7 +459,7 @@ impl KhiveRuntime { } None => { // Existing edge used as annotates source: wrong kind, not absent. - if self.get_edge(namespace, source_id).await?.is_some() { + if self.get_edge(token, source_id).await?.is_some() { return Err(RuntimeError::InvalidInput(format!( "annotates source {source_id} must be a note" ))); @@ -274,7 +470,7 @@ impl KhiveRuntime { } } // Target may be any substrate (entity, note, event, or edge). - if !self.substrate_exists_in_ns(namespace, target_id).await? { + if !self.substrate_exists_in_ns(token, target_id).await? { return Err(RuntimeError::NotFound(format!( "link target {target_id} not found in namespace" ))); @@ -282,10 +478,10 @@ impl KhiveRuntime { } else if relation == EdgeRelation::Supersedes { // supersedes: same-substrate only (note→note or entity→entity). // Event and edge endpoints are invalid regardless of the other endpoint. - let src = match self.resolve(namespace, source_id).await? { + let src = match self.resolve(token, source_id).await? { Some(r) => r, None => { - if self.get_edge(namespace, source_id).await?.is_some() { + if self.get_edge(token, source_id).await?.is_some() { return Err(RuntimeError::InvalidInput(format!( "supersedes source {source_id} must be a note or entity (got edge)" ))); @@ -295,10 +491,10 @@ impl KhiveRuntime { ))); } }; - let tgt = match self.resolve(namespace, target_id).await? { + let tgt = match self.resolve(token, target_id).await? { Some(r) => r, None => { - if self.get_edge(namespace, target_id).await?.is_some() { + if self.get_edge(token, target_id).await?.is_some() { return Err(RuntimeError::InvalidInput(format!( "supersedes target {target_id} must be a note or entity (got edge)" ))); @@ -309,7 +505,16 @@ impl KhiveRuntime { } }; match (&src, &tgt) { - (Resolved::Entity(_), Resolved::Entity(_)) => {} + (Resolved::Entity(src_e), Resolved::Entity(tgt_e)) => { + if !base_entity_rule_allows(&src_e.kind, EdgeRelation::Supersedes, &tgt_e.kind) + { + return Err(RuntimeError::InvalidInput(format!( + "({}) -[supersedes]-> ({}) is not in the ADR-002 base endpoint \ + allowlist; supersedes requires same-kind entity endpoints", + src_e.kind, tgt_e.kind + ))); + } + } (Resolved::Note(_), Resolved::Note(_)) => {} (Resolved::Event(_), _) => { return Err(RuntimeError::InvalidInput(format!( @@ -335,14 +540,14 @@ impl KhiveRuntime { } } } else { - // All 11 entity-default relations: ADR-002 base contract is - // entity→entity. ADR-031 allows packs to extend allowed endpoint - // pairs additively (e.g. GTD lets `depends_on` span task→task). + // All 13 base relations: ADR-002 contract is entity→entity with + // kind-level restrictions (see base allowlist). ADR-031 allows packs + // to extend the allowlist additively via EDGE_RULES. // // Strategy: resolve both endpoints once, consult pack rules; on // miss, fall through to the original base-rule error messages. - let src_res = self.resolve(namespace, source_id).await?; - let tgt_res = self.resolve(namespace, target_id).await?; + let src_res = self.resolve(token, source_id).await?; + let tgt_res = self.resolve(token, target_id).await?; if pack_rule_allows( &self.pack_edge_rules(), @@ -353,9 +558,9 @@ impl KhiveRuntime { return Ok(()); } - // Base-rule check. Same error messages as the pre-ADR-031 surface. - match src_res { - Some(Resolved::Entity(_)) => {} + // Substrate check: both endpoints must be entities. + let src_kind = match src_res { + Some(Resolved::Entity(e)) => e.kind, Some(_) => { return Err(RuntimeError::InvalidInput(format!( "link source {source_id} must be an entity for relation {relation:?} \ @@ -363,7 +568,7 @@ impl KhiveRuntime { ))); } None => { - if self.get_edge(namespace, source_id).await?.is_some() { + if self.get_edge(token, source_id).await?.is_some() { return Err(RuntimeError::InvalidInput(format!( "link source {source_id} must be an entity for relation {relation:?} \ (ADR-002: only `annotates` crosses substrates)" @@ -373,9 +578,9 @@ impl KhiveRuntime { "link source {source_id} not found in namespace" ))); } - } - match tgt_res { - Some(Resolved::Entity(_)) => {} + }; + let tgt_kind = match tgt_res { + Some(Resolved::Entity(e)) => e.kind, Some(_) => { return Err(RuntimeError::InvalidInput(format!( "link target {target_id} must be an entity for relation {relation:?} \ @@ -383,7 +588,7 @@ impl KhiveRuntime { ))); } None => { - if self.get_edge(namespace, target_id).await?.is_some() { + if self.get_edge(token, target_id).await?.is_some() { return Err(RuntimeError::InvalidInput(format!( "link target {target_id} must be an entity for relation {relation:?} \ (ADR-002: only `annotates` crosses substrates)" @@ -393,6 +598,13 @@ impl KhiveRuntime { "link target {target_id} not found in namespace" ))); } + }; + if !base_entity_rule_allows(&src_kind, relation, &tgt_kind) { + return Err(RuntimeError::InvalidInput(format!( + "({src_kind}) -[{}]-> ({tgt_kind}) is not in the ADR-002 base endpoint \ + allowlist; use pack EDGE_RULES to extend the allowlist", + relation.as_str() + ))); } } Ok(()) @@ -403,28 +615,62 @@ impl KhiveRuntime { /// Enforces the ADR-002/ADR-019/ADR-024 three-case relation contract via /// `validate_edge_relation_endpoints`. See that method for the full contract. /// + /// For symmetric relations (`competes_with`, `composed_with`) the endpoint + /// pair is canonicalised to `source_uuid < target_uuid` so that A→B and B→A + /// deduplicate to one row (F012). + /// + /// `metadata` is validated against governed keys (ADR-002 §Edge Metadata); + /// `dependency_kind` is inferred for `depends_on` edges when absent (F013). + /// + /// ADR-009 invariant: `target_backend` is always `None` for locally-routed + /// edges written through this path. The `validate_edge_relation_endpoints` + /// call above already ensures both endpoints exist in the local namespace, + /// so setting `target_backend = None` is the only valid choice (F161). + /// /// A record that exists but belongs to a different namespace is treated as not found /// (fail-closed; no cross-namespace existence leak). pub async fn link( &self, - namespace: Option<&str>, + token: &NamespaceToken, source_id: Uuid, target_id: Uuid, relation: EdgeRelation, weight: f64, + metadata: Option, ) -> RuntimeResult { - self.validate_edge_relation_endpoints(namespace, source_id, target_id, relation) + self.validate_edge_relation_endpoints(token, source_id, target_id, relation) .await?; + let (source_id, target_id) = canonical_edge_endpoints(relation, source_id, target_id); + let metadata = if relation == EdgeRelation::DependsOn { + match ( + self.resolve(token, source_id).await?, + self.resolve(token, target_id).await?, + ) { + (Some(Resolved::Entity(src_e)), Some(Resolved::Entity(tgt_e))) => { + merge_dependency_kind(&src_e.kind, &tgt_e.kind, metadata) + } + _ => metadata, + } + } else { + metadata + }; + validate_edge_metadata(relation, metadata.as_ref())?; + let now = chrono::Utc::now(); + let ns = token.namespace().as_str(); let edge = Edge { id: LinkId::from(Uuid::new_v4()), + namespace: ns.to_string(), source_id, target_id, relation, weight, - created_at: chrono::Utc::now(), - metadata: None, + created_at: now, + updated_at: now, + deleted_at: None, + metadata, + target_backend: None, }; - self.graph(namespace)?.upsert_edge(edge.clone()).await?; + self.graph(token)?.upsert_edge(edge.clone()).await?; Ok(edge) } @@ -434,29 +680,33 @@ impl KhiveRuntime { /// A record that exists in a different namespace returns `false` (fail-closed). async fn substrate_exists_in_ns( &self, - namespace: Option<&str>, + token: &NamespaceToken, id: Uuid, ) -> RuntimeResult { - if self.resolve(namespace, id).await?.is_some() { + if self.resolve(token, id).await?.is_some() { return Ok(true); } - Ok(self.get_edge(namespace, id).await?.is_some()) + Ok(self.get_edge(token, id).await?.is_some()) } /// Get immediate neighbors of a node, optionally filtered by relation type. /// /// Pass `relations: Some(vec![EdgeRelation::Annotates])` to retrieve only /// annotation edges, enabling cross-substrate navigation as described in ADR-024. + /// + /// ADR-002: symmetric relations (`competes_with`, `composed_with`) are stored + /// with the canonical source as the lower UUID. Direction normalization is + /// applied in `neighbors_with_query` so both callers see correct results. pub async fn neighbors( &self, - namespace: Option<&str>, + token: &NamespaceToken, node_id: Uuid, direction: Direction, limit: Option, relations: Option>, ) -> RuntimeResult> { self.neighbors_with_query( - namespace, + token, node_id, NeighborQuery { direction, @@ -469,25 +719,31 @@ impl KhiveRuntime { } /// Get neighbors with full query control (includes `min_weight`). + /// + /// Applies symmetric-relation direction normalization (ADR-002): if the + /// relations filter contains only symmetric relations the direction is + /// overridden to `Both` so edges stored in canonical order are always found. pub async fn neighbors_with_query( &self, - namespace: Option<&str>, + token: &NamespaceToken, node_id: Uuid, - query: NeighborQuery, + mut query: NeighborQuery, ) -> RuntimeResult> { - let mut hits = self.graph(namespace)?.neighbors(node_id, query).await?; - self.enrich_neighbor_hits(namespace, &mut hits).await; + query.direction = + normalize_symmetric_direction(query.direction, query.relations.as_deref()); + let mut hits = self.graph(token)?.neighbors(node_id, query).await?; + self.enrich_neighbor_hits(token, &mut hits).await; Ok(hits) } /// Traverse the graph from a set of root nodes. pub async fn traverse( &self, - namespace: Option<&str>, + token: &NamespaceToken, request: TraversalRequest, ) -> RuntimeResult> { - let mut paths = self.graph(namespace)?.traverse(request).await?; - self.enrich_path_nodes(namespace, &mut paths).await; + let mut paths = self.graph(token)?.traverse(request).await?; + self.enrich_path_nodes(token, &mut paths).await; Ok(paths) } @@ -498,11 +754,11 @@ impl KhiveRuntime { /// Done as a single batched entity fetch instead of an SQL JOIN at the /// graph store, so test databases that wire up a graph store without an /// entities table still work. Cost: one query per neighbors() call. - async fn enrich_neighbor_hits(&self, namespace: Option<&str>, hits: &mut [NeighborHit]) { + async fn enrich_neighbor_hits(&self, token: &NamespaceToken, hits: &mut [NeighborHit]) { if hits.is_empty() { return; } - let store = match self.entities(namespace) { + let store = match self.entities(token) { Ok(s) => s, Err(_) => return, // no entity store configured; leave name/kind as None }; @@ -516,11 +772,11 @@ impl KhiveRuntime { /// Populate `name` and `kind` on each `PathNode` from the corresponding /// entity record (#162). Same best-effort policy as `enrich_neighbor_hits`. - async fn enrich_path_nodes(&self, namespace: Option<&str>, paths: &mut [GraphPath]) { + async fn enrich_path_nodes(&self, token: &NamespaceToken, paths: &mut [GraphPath]) { if paths.is_empty() { return; } - let store = match self.entities(namespace) { + let store = match self.entities(token) { Ok(s) => s, Err(_) => return, }; @@ -547,16 +803,16 @@ impl KhiveRuntime { #[allow(clippy::too_many_arguments)] pub async fn create_note( &self, - namespace: Option<&str>, + token: &NamespaceToken, kind: &str, name: Option<&str>, content: &str, - salience: f64, + salience: Option, properties: Option, annotates: Vec, ) -> RuntimeResult { self.create_note_inner( - namespace, kind, name, content, salience, None, properties, annotates, + token, kind, name, content, salience, None, properties, annotates, ) .await } @@ -565,17 +821,17 @@ impl KhiveRuntime { #[allow(clippy::too_many_arguments)] pub async fn create_note_with_decay( &self, - namespace: Option<&str>, + token: &NamespaceToken, kind: &str, name: Option<&str>, content: &str, - salience: f64, + salience: Option, decay_factor: f64, properties: Option, annotates: Vec, ) -> RuntimeResult { self.create_note_inner( - namespace, + token, kind, name, content, @@ -590,27 +846,30 @@ impl KhiveRuntime { #[allow(clippy::too_many_arguments)] async fn create_note_inner( &self, - namespace: Option<&str>, + token: &NamespaceToken, kind: &str, name: Option<&str>, content: &str, - salience: f64, + salience: Option, decay_factor: Option, properties: Option, annotates: Vec, ) -> RuntimeResult { - let ns = self.ns(namespace); + let ns = token.namespace().as_str(); // Validate all annotates targets before any write (ADR-024:295 atomicity). for &target_id in &annotates { - if !self.substrate_exists_in_ns(namespace, target_id).await? { + if !self.substrate_exists_in_ns(token, target_id).await? { return Err(RuntimeError::NotFound(format!( "create_note annotates target {target_id} not found in namespace" ))); } } - let mut note = Note::new(ns, kind, content).with_salience(salience); + let mut note = Note::new(ns, kind, content); + if let Some(s) = salience { + note = note.with_salience(s); + } if let Some(df) = decay_factor { note = note.with_decay(df); } @@ -620,14 +879,14 @@ impl KhiveRuntime { if let Some(p) = properties { note = note.with_properties(p); } - self.notes(Some(ns))?.upsert_note(note.clone()).await?; + self.notes(token)?.upsert_note(note.clone()).await?; let body = match ¬e.name { Some(n) => format!("{n} {}", note.content), None => note.content.clone(), }; - self.text_for_notes(Some(ns))? + self.text_for_notes(token)? .upsert_document(TextDocument { subject_id: note.id, kind: SubstrateKind::Note, @@ -642,8 +901,14 @@ impl KhiveRuntime { if self.config().embedding_model.is_some() { let vector = self.embed(¬e.content).await?; - self.vectors(Some(ns))? - .insert(note.id, SubstrateKind::Note, ns, vector) + self.vectors(token)? + .insert( + note.id, + SubstrateKind::Note, + ns, + "note.content", + vec![vector], + ) .await?; } @@ -700,8 +965,15 @@ impl KhiveRuntime { let link_result = if let Some(e) = injected_err { Err(e) } else { - self.link(Some(ns), note.id, target_id, EdgeRelation::Annotates, 1.0) - .await + self.link( + token, + note.id, + target_id, + EdgeRelation::Annotates, + 1.0, + None, + ) + .await }; match link_result { @@ -709,16 +981,16 @@ impl KhiveRuntime { Err(e) => { // Best-effort compensation — ignore cleanup errors. for edge_id in created_edges { - let _ = self.delete_edge(Some(ns), edge_id).await; + let _ = self.delete_edge(token, edge_id, true).await; } - if let Ok(store) = self.notes(Some(ns)) { + if let Ok(store) = self.notes(token) { let _ = store.delete_note(note.id, DeleteMode::Hard).await; } - if let Ok(fts) = self.text_for_notes(Some(ns)) { + if let Ok(fts) = self.text_for_notes(token) { let _ = fts.delete_document(ns, note.id).await; } if self.config().embedding_model.is_some() { - if let Ok(vs) = self.vectors(Some(ns)) { + if let Ok(vs) = self.vectors(token) { let _ = vs.delete(note.id).await; } } @@ -733,15 +1005,15 @@ impl KhiveRuntime { /// List notes, optionally filtered by kind. pub async fn list_notes( &self, - namespace: Option<&str>, + token: &NamespaceToken, kind: Option<&str>, limit: u32, offset: u32, ) -> RuntimeResult> { let page = self - .notes(namespace)? + .notes(token)? .query_notes( - self.ns(namespace), + token.namespace().as_str(), kind, PageRequest { offset: offset.into(), @@ -763,19 +1035,20 @@ impl KhiveRuntime { /// 6. Truncate to `limit`. pub async fn search_notes( &self, - namespace: Option<&str>, + token: &NamespaceToken, query_text: &str, query_vector: Option>, limit: u32, note_kind: Option<&str>, + include_superseded: bool, ) -> RuntimeResult> { const RRF_K: usize = 60; let candidates = limit.saturating_mul(4).max(limit); - let ns = self.ns(namespace).to_string(); + let ns = token.namespace().as_str().to_owned(); // FTS5 over the notes index. let text_hits = self - .text_for_notes(namespace)? + .text_for_notes(token)? .search(TextSearchRequest { query: query_text.to_string(), mode: TextQueryMode::Plain, @@ -791,7 +1064,7 @@ impl KhiveRuntime { // Vector search filtered to notes. let vector_hits = if query_vector.is_some() || self.config().embedding_model.is_some() { self.vector_search( - namespace, + token, query_vector, Some(query_text), candidates, @@ -837,7 +1110,7 @@ impl KhiveRuntime { // soft-delete + (optional) kind filtering. Notes whose `kind` doesn't // match `note_kind` are dropped post-fetch — they're a small set // bounded by `candidates`, so the extra read is cheap. - let note_store = self.notes(namespace)?; + let note_store = self.notes(token)?; let mut alive_notes: HashMap = HashMap::new(); for id in &candidate_ids { if let Some(note) = note_store.get_note(*id).await? { @@ -853,10 +1126,11 @@ impl KhiveRuntime { } } - // Drop superseded notes: any note targeted by a `supersedes` edge is - // obsolete and excluded from default search (ADR-019, ADR-024). - if !alive_notes.is_empty() { - let graph = self.graph(namespace)?; + // Drop superseded notes unless include_superseded is true: any note targeted + // by a `supersedes` edge is obsolete and excluded from default search + // (ADR-013, ADR-024). + if !include_superseded && !alive_notes.is_empty() { + let graph = self.graph(token)?; let mut superseded: std::collections::HashSet = std::collections::HashSet::new(); for ¬e_id in alive_notes.keys() { let inbound = graph @@ -882,7 +1156,8 @@ impl KhiveRuntime { .into_iter() .filter_map(|(id, bucket)| { let note = alive_notes.get(&id)?; - let weight = 0.5 + 0.5 * note.salience; + let salience = note.salience.unwrap_or(0.5); + let weight = 0.5 + 0.5 * salience; let weighted = DeterministicScore::from_f64(bucket.score.to_f64() * weight); Some(NoteSearchHit { note_id: id, @@ -906,12 +1181,12 @@ impl KhiveRuntime { /// ambiguous (multiple matches). pub async fn resolve_prefix( &self, - namespace: Option<&str>, + token: &NamespaceToken, prefix: &str, ) -> RuntimeResult> { use khive_storage::types::{SqlStatement, SqlValue}; - let ns = self.ns(namespace).to_string(); + let ns = token.namespace().as_str().to_owned(); let pattern = format!("{}%", prefix); let tables = [ @@ -970,9 +1245,16 @@ impl KhiveRuntime { .map_err(|e| RuntimeError::Internal(format!("stored UUID is invalid: {e}")))?; Ok(Some(uuid)) } - _ => Err(RuntimeError::Ambiguous(format!( - "prefix '{prefix}' matches multiple UUIDs" - ))), + _ => { + let uuids: Vec = matches + .iter() + .filter_map(|s| Uuid::from_str(s).ok()) + .collect(); + Err(RuntimeError::AmbiguousPrefix { + prefix: prefix.to_string(), + matches: uuids, + }) + } } } @@ -982,25 +1264,27 @@ impl KhiveRuntime { /// Cost: at most 3 store lookups per call (cheap for v0.1). pub async fn resolve( &self, - namespace: Option<&str>, + token: &NamespaceToken, id: Uuid, ) -> RuntimeResult> { - let ns = self.ns(namespace); + let ns = token.namespace().as_str(); - // Entity: use the namespace-checked getter (returns None on mismatch). - if let Some(entity) = self.get_entity(namespace, id).await? { - return Ok(Some(Resolved::Entity(entity))); + // Entity: use the namespace-checked getter (errors on mismatch/absent). + match self.get_entity(token, id).await { + Ok(entity) => return Ok(Some(Resolved::Entity(entity))), + Err(RuntimeError::NotFound(_) | RuntimeError::NamespaceMismatch { .. }) => {} + Err(e) => return Err(e), } // Note: storage get_note is ID-only — verify namespace after fetch. - if let Some(note) = self.notes(namespace)?.get_note(id).await? { + if let Some(note) = self.notes(token)?.get_note(id).await? { if note.namespace == ns { return Ok(Some(Resolved::Note(note))); } } // Event: storage get_event is ID-only — verify namespace after fetch. - if let Some(event) = self.events(namespace)?.get_event(id).await? { + if let Some(event) = self.events(token)?.get_event(id).await? { if event.namespace == ns { return Ok(Some(Resolved::Event(event))); } @@ -1016,22 +1300,22 @@ impl KhiveRuntime { /// references for `annotates` edges that target this note (ADR-002, ADR-024). /// Soft delete also cleans FTS and vector indexes; edges are left in place. /// - /// Returns `false` without deleting if the note does not exist or belongs to - /// a different namespace (ADR-007 namespace isolation). + /// Returns `Ok(false)` if the note does not exist, or `Err(NamespaceMismatch)` + /// if it belongs to a different namespace (ADR-007 namespace isolation). pub async fn delete_note( &self, - namespace: Option<&str>, + token: &NamespaceToken, id: Uuid, hard: bool, ) -> RuntimeResult { - let ns = self.ns(namespace); - let note_store = self.notes(namespace)?; + let ns = token.namespace().as_str(); + let note_store = self.notes(token)?; let note = match note_store.get_note(id).await? { Some(n) => n, None => return Ok(false), }; if note.namespace != ns { - return Ok(false); + return Err(RuntimeError::NamespaceMismatch { id }); } let mode = if hard { DeleteMode::Hard @@ -1041,7 +1325,7 @@ impl KhiveRuntime { // On hard delete, cascade-remove incident edges and clean up indexes. if hard { - let graph = self.graph(namespace)?; + let graph = self.graph(token)?; for direction in [Direction::Out, Direction::In] { let hits = graph .neighbors( @@ -1055,28 +1339,46 @@ impl KhiveRuntime { ) .await?; for hit in hits { - graph.delete_edge(LinkId::from(hit.edge_id)).await?; + graph + .delete_edge(LinkId::from(hit.edge_id), DeleteMode::Hard) + .await?; } } let ns_str = ns.to_string(); - self.text_for_notes(namespace)? + self.text_for_notes(token)? .delete_document(&ns_str, id) .await?; if self.config().embedding_model.is_some() { - self.vectors(namespace)?.delete(id).await?; + self.vectors(token)?.delete(id).await?; } } let deleted = note_store.delete_note(id, mode).await?; if !hard && deleted { let ns_str = ns.to_string(); - self.text_for_notes(namespace)? + self.text_for_notes(token)? .delete_document(&ns_str, id) .await?; if self.config().embedding_model.is_some() { - self.vectors(namespace)?.delete(id).await?; + self.vectors(token)?.delete(id).await?; } } + if deleted { + let event_store = self.events(token)?; + let ns_str = ns.to_string(); + let event = khive_storage::event::Event::new( + ns_str.clone(), + "delete", + EventKind::NoteDeleted, + SubstrateKind::Note, + "", + ) + .with_target(id) + .with_payload(serde_json::json!({"id": id, "namespace": ns_str, "hard": hard})); + event_store.append_event(event).await.map_err(|e| { + RuntimeError::Internal(format!("delete_note: event store write failed: {e}")) + })?; + } Ok(deleted) } } @@ -1097,17 +1399,20 @@ impl KhiveRuntime { /// The query is compiled to SQL with the namespace scope applied. /// GQL syntax: `MATCH (a:concept)-[e:extends]->(b) RETURN a, b LIMIT 10` /// SPARQL syntax: `SELECT ?a WHERE { ?a :kind "concept" . }` - pub async fn query(&self, namespace: Option<&str>, query: &str) -> RuntimeResult> { - Ok(self.query_with_metadata(namespace, query).await?.rows) + pub async fn query(&self, token: &NamespaceToken, query: &str) -> RuntimeResult> { + Ok(self.query_with_metadata(token, query).await?.rows) } /// Execute a GQL/SPARQL query, returning rows and any validation warnings. pub async fn query_with_metadata( &self, - namespace: Option<&str>, + token: &NamespaceToken, query: &str, ) -> RuntimeResult { - let ns = self.ns(namespace); + use khive_query::QueryValue; + use khive_storage::types::SqlValue; + + let ns = token.namespace().as_str(); let ast = khive_query::parse_auto(query)?; let opts = khive_query::CompileOptions { scopes: vec![ns.to_string()], @@ -1115,10 +1420,25 @@ impl KhiveRuntime { }; let compiled = khive_query::compile(&ast, &opts)?; let warnings = compiled.warnings; + + // Convert QueryValue params (query-layer type) to SqlValue (storage-layer type) + // at the query–storage boundary (ADR-008 §"Query crate compiles against khive-types only"). + let params: Vec = compiled + .params + .into_iter() + .map(|qv| match qv { + QueryValue::Null => SqlValue::Null, + QueryValue::Integer(n) => SqlValue::Integer(n), + QueryValue::Float(f) => SqlValue::Float(f), + QueryValue::Text(s) => SqlValue::Text(s), + QueryValue::Blob(b) => SqlValue::Blob(b), + }) + .collect(); + let mut reader = self.sql().reader().await?; let stmt = SqlStatement { sql: compiled.sql, - params: compiled.params, + params, label: None, }; let rows = reader.query_all(stmt).await?; @@ -1131,21 +1451,19 @@ impl KhiveRuntime { /// outbound) to prevent dangling references. Soft delete also cleans FTS /// and vector indexes; edges are left in place. /// - /// Returns `false` without deleting if the entity exists but belongs to a + /// Returns `Err(NamespaceMismatch)` if the entity exists but belongs to a /// different namespace (ADR-007 namespace isolation). pub async fn delete_entity( &self, - namespace: Option<&str>, + token: &NamespaceToken, id: Uuid, hard: bool, ) -> RuntimeResult { - let entity = match self.entities(namespace)?.get_entity(id).await? { + let entity = match self.entities(token)?.get_entity(id).await? { Some(e) => e, None => return Ok(false), }; - if entity.namespace != self.ns(namespace) { - return Ok(false); - } + self.ensure_namespace(&entity.namespace, token, id)?; let mode = if hard { DeleteMode::Hard } else { @@ -1154,7 +1472,7 @@ impl KhiveRuntime { // On hard delete, cascade-remove incident edges to prevent dangling refs. if hard { - let graph = self.graph(namespace)?; + let graph = self.graph(token)?; for direction in [Direction::Out, Direction::In] { let hits = graph .neighbors( @@ -1168,15 +1486,33 @@ impl KhiveRuntime { ) .await?; for hit in hits { - graph.delete_edge(LinkId::from(hit.edge_id)).await?; + graph + .delete_edge(LinkId::from(hit.edge_id), DeleteMode::Hard) + .await?; } } - self.remove_from_indexes(namespace, id).await?; + self.remove_from_indexes(token, id).await?; } - let deleted = self.entities(namespace)?.delete_entity(id, mode).await?; + let deleted = self.entities(token)?.delete_entity(id, mode).await?; if !hard && deleted { - self.remove_from_indexes(namespace, id).await?; + self.remove_from_indexes(token, id).await?; + } + if deleted { + let event_store = self.events(token)?; + let ns = entity.namespace.clone(); + let event = khive_storage::event::Event::new( + ns.clone(), + "delete", + EventKind::EntityDeleted, + SubstrateKind::Entity, + "", + ) + .with_target(id) + .with_payload(serde_json::json!({"id": id, "namespace": ns, "hard": hard})); + event_store.append_event(event).await.map_err(|e| { + RuntimeError::Internal(format!("delete_entity: event store write failed: {e}")) + })?; } Ok(deleted) } @@ -1184,7 +1520,7 @@ impl KhiveRuntime { /// Count entities in a namespace, optionally filtered. pub async fn count_entities( &self, - namespace: Option<&str>, + token: &NamespaceToken, kind: Option<&str>, ) -> RuntimeResult { let filter = EntityFilter { @@ -1195,8 +1531,8 @@ impl KhiveRuntime { ..Default::default() }; Ok(self - .entities(namespace)? - .count_entities(self.ns(namespace), filter) + .entities(token)? + .count_entities(token.namespace().as_str(), filter) .await?) } @@ -1205,25 +1541,22 @@ impl KhiveRuntime { /// Fetch a single edge by id. Returns `None` if the edge does not exist. pub async fn get_edge( &self, - namespace: Option<&str>, + token: &NamespaceToken, edge_id: Uuid, ) -> RuntimeResult> { - Ok(self - .graph(namespace)? - .get_edge(LinkId::from(edge_id)) - .await?) + Ok(self.graph(token)?.get_edge(LinkId::from(edge_id)).await?) } /// List edges matching `filter`. `limit` is capped at 1000; defaults to 100. pub async fn list_edges( &self, - namespace: Option<&str>, + token: &NamespaceToken, filter: crate::curation::EdgeListFilter, limit: u32, ) -> RuntimeResult> { let limit = limit.clamp(1, 1000); let page = self - .graph(namespace)? + .graph(token)? .query_edges( filter.into(), vec![SortOrder { @@ -1244,28 +1577,51 @@ impl KhiveRuntime { /// ADR-002/ADR-019/ADR-024 three-case contract; the edge is NOT mutated on error. pub async fn update_edge( &self, - namespace: Option<&str>, + token: &NamespaceToken, edge_id: Uuid, - relation: Option, - weight: Option, + patch: crate::curation::EdgePatch, ) -> RuntimeResult { - let graph = self.graph(namespace)?; + let graph = self.graph(token)?; let mut edge = graph .get_edge(LinkId::from(edge_id)) .await? .ok_or_else(|| crate::RuntimeError::NotFound(format!("edge {edge_id}")))?; - if let Some(r) = relation { + let mut changed_fields: Vec<&'static str> = Vec::new(); + if let Some(r) = patch.relation { // Validate before mutating — use the existing endpoints with the new relation. - self.validate_edge_relation_endpoints(namespace, edge.source_id, edge.target_id, r) + self.validate_edge_relation_endpoints(token, edge.source_id, edge.target_id, r) .await?; edge.relation = r; + changed_fields.push("relation"); } - if let Some(w) = weight { + if let Some(w) = patch.weight { edge.weight = w.clamp(0.0, 1.0); + changed_fields.push("weight"); + } + if let Some(props) = patch.properties { + edge.metadata = Some(props); } graph.upsert_edge(edge.clone()).await?; + + let event_store = self.events(token)?; + let ns = token.namespace().as_str().to_string(); + let event = khive_storage::event::Event::new( + ns.clone(), + "update", + EventKind::EdgeUpdated, + SubstrateKind::Entity, + "", + ) + .with_target(edge_id) + .with_payload( + serde_json::json!({"id": edge_id, "namespace": ns, "changed_fields": changed_fields}), + ); + event_store.append_event(event).await.map_err(|e| { + RuntimeError::Internal(format!("update_edge: event store write failed: {e}")) + })?; + Ok(edge) } @@ -1279,8 +1635,18 @@ impl KhiveRuntime { /// If `edge_id` does not refer to an edge (e.g. the caller passes an entity or /// note UUID by mistake), this method returns `Ok(false)` immediately with no /// side effects — it does **not** cascade inbound edges of the non-edge record. - pub async fn delete_edge(&self, namespace: Option<&str>, edge_id: Uuid) -> RuntimeResult { - let graph = self.graph(namespace)?; + pub async fn delete_edge( + &self, + token: &NamespaceToken, + edge_id: Uuid, + hard: bool, + ) -> RuntimeResult { + let graph = self.graph(token)?; + let mode = if hard { + DeleteMode::Hard + } else { + DeleteMode::Soft + }; // Guard: verify `edge_id` is actually an edge before touching anything. // Without this check, passing an entity/note UUID would delete all inbound @@ -1303,27 +1669,142 @@ impl KhiveRuntime { ) .await?; for hit in inbound { - graph.delete_edge(LinkId::from(hit.edge_id)).await?; + graph + .delete_edge(LinkId::from(hit.edge_id), DeleteMode::Hard) + .await?; } - Ok(graph.delete_edge(LinkId::from(edge_id)).await?) + let deleted = graph.delete_edge(LinkId::from(edge_id), mode).await?; + if deleted { + let event_store = self.events(token)?; + let ns = token.namespace().as_str().to_string(); + let event = khive_storage::event::Event::new( + ns.clone(), + "delete", + EventKind::EdgeDeleted, + SubstrateKind::Entity, + "", + ) + .with_target(edge_id) + .with_payload(serde_json::json!({"id": edge_id, "namespace": ns, "hard": hard})); + event_store.append_event(event).await.map_err(|e| { + RuntimeError::Internal(format!("delete_edge: event store write failed: {e}")) + })?; + } + Ok(deleted) } /// Count edges matching `filter`. pub async fn count_edges( &self, - namespace: Option<&str>, + token: &NamespaceToken, filter: crate::curation::EdgeListFilter, ) -> RuntimeResult { - Ok(self.graph(namespace)?.count_edges(filter.into()).await?) + Ok(self.graph(token)?.count_edges(filter.into()).await?) + } + + /// Validate and construct an edge from a [`LinkSpec`] without writing to storage. + /// + /// Applies the full ADR-002 contract (endpoint validation, symmetric + /// canonicalization, `dependency_kind` inference and metadata validation). + /// Returns the constructed `Edge` on success; the caller is responsible for + /// persisting it (e.g. via `upsert_edge` or `link_many`). + /// + /// The `token` must be a pre-authorized namespace token from the dispatch + /// layer. If `spec.namespace` is set it must match `token.namespace()`; + /// a mismatch returns `RuntimeError::InvalidInput` (ADR-007). + pub async fn build_edge(&self, token: &NamespaceToken, spec: &LinkSpec) -> RuntimeResult { + let ns_str = match &spec.namespace { + Some(s) => { + let spec_ns = crate::Namespace::parse(s) + .map_err(|e| RuntimeError::InvalidInput(format!("invalid namespace: {e}")))?; + if &spec_ns != token.namespace() { + return Err(RuntimeError::InvalidInput( + "LinkSpec namespace does not match token namespace".into(), + )); + } + s.as_str() + } + None => token.namespace().as_str(), + }; + self.validate_edge_relation_endpoints(token, spec.source_id, spec.target_id, spec.relation) + .await?; + let (source_id, target_id) = + canonical_edge_endpoints(spec.relation, spec.source_id, spec.target_id); + let metadata = if spec.relation == EdgeRelation::DependsOn { + match ( + self.resolve(token, source_id).await?, + self.resolve(token, target_id).await?, + ) { + (Some(Resolved::Entity(src_e)), Some(Resolved::Entity(tgt_e))) => { + merge_dependency_kind(&src_e.kind, &tgt_e.kind, spec.metadata.clone()) + } + _ => spec.metadata.clone(), + } + } else { + spec.metadata.clone() + }; + validate_edge_metadata(spec.relation, metadata.as_ref())?; + let now = chrono::Utc::now(); + Ok(Edge { + id: LinkId::from(Uuid::new_v4()), + namespace: ns_str.to_string(), + source_id, + target_id, + relation: spec.relation, + weight: spec.weight, + created_at: now, + updated_at: now, + deleted_at: None, + metadata, + target_backend: None, + }) + } + + /// Validate and atomically upsert a batch of edges. + /// + /// All edges are validated and constructed with `build_edge` before any + /// write. If validation fails for any entry the entire batch is rejected + /// (no writes occur). On success, all edges are persisted in a single + /// atomic transaction via `upsert_edges`. + /// + /// All specs must share the same namespace; the namespace is taken from + /// `token` (or validated against it if `spec.namespace` is set). + pub async fn link_many( + &self, + token: &NamespaceToken, + specs: Vec, + ) -> RuntimeResult> { + if specs.is_empty() { + return Ok(vec![]); + } + let mut edges = Vec::with_capacity(specs.len()); + for spec in &specs { + edges.push(self.build_edge(token, spec).await?); + } + self.graph(token)?.upsert_edges(edges.clone()).await?; + Ok(edges) } } +/// Fully specified edge creation request — input to [`KhiveRuntime::build_edge`] +/// and [`KhiveRuntime::link_many`]. +#[derive(Clone, Debug)] +pub struct LinkSpec { + pub namespace: Option, + pub source_id: Uuid, + pub target_id: Uuid, + pub relation: EdgeRelation, + pub weight: f64, + pub metadata: Option, +} + #[cfg(test)] mod tests { use super::*; use crate::curation::EdgeListFilter; - use crate::runtime::KhiveRuntime; + use crate::runtime::{KhiveRuntime, NamespaceToken}; + use crate::Namespace; fn rt() -> KhiveRuntime { KhiveRuntime::memory().unwrap() @@ -1332,22 +1813,30 @@ mod tests { #[tokio::test] async fn update_edge_changes_weight() { let rt = rt(); + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); let edge = rt - .link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + .link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); let edge_id: Uuid = edge.id.into(); let updated = rt - .update_edge(None, edge_id, None, Some(0.5)) + .update_edge( + &tok, + edge_id, + crate::curation::EdgePatch { + weight: Some(0.5), + ..Default::default() + }, + ) .await .unwrap(); assert!((updated.weight - 0.5).abs() < 0.001); @@ -1356,22 +1845,30 @@ mod tests { #[tokio::test] async fn update_edge_changes_relation() { let rt = rt(); + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); let edge = rt - .link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + .link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); let edge_id: Uuid = edge.id.into(); let updated = rt - .update_edge(None, edge_id, Some(EdgeRelation::VariantOf), None) + .update_edge( + &tok, + edge_id, + crate::curation::EdgePatch { + relation: Some(EdgeRelation::VariantOf), + ..Default::default() + }, + ) .await .unwrap(); assert_eq!(updated.relation, EdgeRelation::VariantOf); @@ -1384,24 +1881,32 @@ mod tests { #[tokio::test] async fn update_edge_annotates_note_to_entity_set_supersedes_returns_invalid_input() { let rt = rt(); + let tok = NamespaceToken::local(); let note = rt - .create_note(None, "observation", None, "a note", 0.5, None, vec![]) + .create_note(&tok, "observation", None, "a note", Some(0.5), None, vec![]) .await .unwrap(); let entity = rt - .create_entity(None, "concept", "E", None, None, vec![]) + .create_entity(&tok, "concept", None, "E", None, None, vec![]) .await .unwrap(); // Create a valid note→entity annotates edge. let edge = rt - .link(None, note.id, entity.id, EdgeRelation::Annotates, 1.0) + .link(&tok, note.id, entity.id, EdgeRelation::Annotates, 1.0, None) .await .unwrap(); let edge_id: Uuid = edge.id.into(); // Attempt to change relation to Supersedes (crossing substrates → invalid). let result = rt - .update_edge(None, edge_id, Some(EdgeRelation::Supersedes), None) + .update_edge( + &tok, + edge_id, + crate::curation::EdgePatch { + relation: Some(EdgeRelation::Supersedes), + ..Default::default() + }, + ) .await; assert!( matches!(result, Err(RuntimeError::InvalidInput(_))), @@ -1409,7 +1914,7 @@ mod tests { ); // Edge must NOT be mutated — re-fetch and verify relation unchanged. - let fetched = rt.get_edge(None, edge_id).await.unwrap().unwrap(); + let fetched = rt.get_edge(&tok, edge_id).await.unwrap().unwrap(); assert_eq!( fetched.relation, EdgeRelation::Annotates, @@ -1422,22 +1927,30 @@ mod tests { #[tokio::test] async fn update_edge_entity_to_entity_set_annotates_returns_invalid_input() { let rt = rt(); + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); let edge = rt - .link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + .link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); let edge_id: Uuid = edge.id.into(); let result = rt - .update_edge(None, edge_id, Some(EdgeRelation::Annotates), None) + .update_edge( + &tok, + edge_id, + crate::curation::EdgePatch { + relation: Some(EdgeRelation::Annotates), + ..Default::default() + }, + ) .await; assert!( matches!(result, Err(RuntimeError::InvalidInput(_))), @@ -1450,28 +1963,36 @@ mod tests { #[tokio::test] async fn update_edge_entity_to_entity_set_supersedes_succeeds() { let rt = rt(); + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); let edge = rt - .link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + .link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); let edge_id: Uuid = edge.id.into(); let updated = rt - .update_edge(None, edge_id, Some(EdgeRelation::Supersedes), None) + .update_edge( + &tok, + edge_id, + crate::curation::EdgePatch { + relation: Some(EdgeRelation::Supersedes), + ..Default::default() + }, + ) .await .unwrap(); assert_eq!(updated.relation, EdgeRelation::Supersedes); // Verify persisted. - let fetched = rt.get_edge(None, edge_id).await.unwrap().unwrap(); + let fetched = rt.get_edge(&tok, edge_id).await.unwrap().unwrap(); assert_eq!(fetched.relation, EdgeRelation::Supersedes); } @@ -1479,22 +2000,30 @@ mod tests { #[tokio::test] async fn update_edge_weight_only_skips_validation() { let rt = rt(); + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); let edge = rt - .link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + .link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); let edge_id: Uuid = edge.id.into(); let updated = rt - .update_edge(None, edge_id, None, Some(0.3)) + .update_edge( + &tok, + edge_id, + crate::curation::EdgePatch { + weight: Some(0.3), + ..Default::default() + }, + ) .await .unwrap(); assert_eq!(updated.relation, EdgeRelation::Extends); @@ -1505,22 +2034,30 @@ mod tests { #[tokio::test] async fn update_edge_same_class_relation_change_succeeds() { let rt = rt(); + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); let edge = rt - .link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + .link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); let edge_id: Uuid = edge.id.into(); let updated = rt - .update_edge(None, edge_id, Some(EdgeRelation::VariantOf), None) + .update_edge( + &tok, + edge_id, + crate::curation::EdgePatch { + relation: Some(EdgeRelation::VariantOf), + ..Default::default() + }, + ) .await .unwrap(); assert_eq!(updated.relation, EdgeRelation::VariantOf); @@ -1529,23 +2066,24 @@ mod tests { #[tokio::test] async fn list_edges_filters_by_relation() { let rt = rt(); + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); let c = rt - .create_entity(None, "concept", "C", None, None, vec![]) + .create_entity(&tok, "concept", None, "C", None, None, vec![]) .await .unwrap(); - rt.link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + rt.link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); - rt.link(None, a.id, c.id, EdgeRelation::DependsOn, 1.0) + rt.link(&tok, a.id, c.id, EdgeRelation::Enables, 1.0, None) .await .unwrap(); @@ -1553,7 +2091,7 @@ mod tests { relations: vec![EdgeRelation::Extends], ..Default::default() }; - let edges = rt.list_edges(None, filter, 100).await.unwrap(); + let edges = rt.list_edges(&tok, filter, 100).await.unwrap(); assert_eq!(edges.len(), 1); assert_eq!(edges[0].relation, EdgeRelation::Extends); } @@ -1561,27 +2099,28 @@ mod tests { #[tokio::test] async fn list_edges_filters_by_source() { let rt = rt(); + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); let c = rt - .create_entity(None, "concept", "C", None, None, vec![]) + .create_entity(&tok, "concept", None, "C", None, None, vec![]) .await .unwrap(); let d = rt - .create_entity(None, "concept", "D", None, None, vec![]) + .create_entity(&tok, "concept", None, "D", None, None, vec![]) .await .unwrap(); - rt.link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + rt.link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); - rt.link(None, c.id, d.id, EdgeRelation::Extends, 1.0) + rt.link(&tok, c.id, d.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); @@ -1589,7 +2128,7 @@ mod tests { source_id: Some(a.id), ..Default::default() }; - let edges = rt.list_edges(None, filter, 100).await.unwrap(); + let edges = rt.list_edges(&tok, filter, 100).await.unwrap(); assert_eq!(edges.len(), 1); let src: Uuid = edges[0].source_id; assert_eq!(src, a.id); @@ -1598,59 +2137,61 @@ mod tests { #[tokio::test] async fn delete_edge_removes_from_storage() { let rt = rt(); + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); let edge = rt - .link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + .link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); let edge_id: Uuid = edge.id.into(); - let deleted = rt.delete_edge(None, edge_id).await.unwrap(); + let deleted = rt.delete_edge(&tok, edge_id, true).await.unwrap(); assert!(deleted); - let fetched = rt.get_edge(None, edge_id).await.unwrap(); + let fetched = rt.get_edge(&tok, edge_id).await.unwrap(); assert!(fetched.is_none(), "edge should be gone after delete"); } #[tokio::test] async fn count_edges_matches_filter() { let rt = rt(); + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); let c = rt - .create_entity(None, "concept", "C", None, None, vec![]) + .create_entity(&tok, "concept", None, "C", None, None, vec![]) .await .unwrap(); - rt.link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + rt.link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); - rt.link(None, a.id, c.id, EdgeRelation::DependsOn, 1.0) + rt.link(&tok, a.id, c.id, EdgeRelation::Enables, 1.0, None) .await .unwrap(); let all = rt - .count_edges(None, EdgeListFilter::default()) + .count_edges(&tok, EdgeListFilter::default()) .await .unwrap(); assert_eq!(all, 2); let just_extends = rt .count_edges( - None, + &tok, EdgeListFilter { relations: vec![EdgeRelation::Extends], ..Default::default() @@ -1664,50 +2205,84 @@ mod tests { #[tokio::test] async fn get_entity_namespace_isolation() { let rt = rt(); + let ns_a = NamespaceToken::for_namespace(Namespace::parse("ns-a").unwrap()); + let ns_b = NamespaceToken::for_namespace(Namespace::parse("ns-b").unwrap()); let entity = rt - .create_entity(Some("ns-a"), "concept", "Alpha", None, None, vec![]) + .create_entity(&ns_a, "concept", None, "Alpha", None, None, vec![]) .await .unwrap(); // Same namespace: visible. - let found = rt.get_entity(Some("ns-a"), entity.id).await.unwrap(); - assert!(found.is_some(), "should be visible in its own namespace"); + let found = rt.get_entity(&ns_a, entity.id).await; + assert!(found.is_ok(), "should be visible in its own namespace"); - // Different namespace: invisible. - let not_found = rt.get_entity(Some("ns-b"), entity.id).await.unwrap(); + // Different namespace: NamespaceMismatch error (ADR-007). + let not_found = rt.get_entity(&ns_b, entity.id).await; assert!( - not_found.is_none(), + not_found.is_err(), "should not be visible across namespaces" ); + // Must be the specific NamespaceMismatch variant, not generic NotFound. + assert!( + matches!(not_found.unwrap_err(), crate::RuntimeError::NamespaceMismatch { id } if id == entity.id), + "cross-namespace get must return NamespaceMismatch with the entity id" + ); } #[tokio::test] - async fn delete_entity_namespace_isolation() { + async fn namespace_mismatch_error_message_is_opaque() { + // ADR-007 timing-oracle mitigation: the external error message must not + // reveal which namespace the record actually lives in. let rt = rt(); + let ns_a = NamespaceToken::for_namespace(Namespace::parse("secret-ns").unwrap()); + let ns_b = NamespaceToken::for_namespace(Namespace::parse("other-ns").unwrap()); let entity = rt - .create_entity(Some("ns-a"), "concept", "Beta", None, None, vec![]) + .create_entity(&ns_a, "concept", None, "Hidden", None, None, vec![]) .await .unwrap(); - // Delete from wrong namespace: no-op, returns false. - let deleted = rt - .delete_entity(Some("ns-b"), entity.id, true) + let err = rt.get_entity(&ns_b, entity.id).await.unwrap_err(); + let msg = err.to_string(); + assert!( + !msg.contains("secret-ns"), + "error message must not leak the actual namespace; got: {msg}" + ); + assert!( + !msg.contains("other-ns"), + "error message must not leak the requested namespace; got: {msg}" + ); + } + + #[tokio::test] + async fn delete_entity_namespace_isolation() { + let rt = rt(); + let ns_a = NamespaceToken::for_namespace(Namespace::parse("ns-a").unwrap()); + let ns_b = NamespaceToken::for_namespace(Namespace::parse("ns-b").unwrap()); + let entity = rt + .create_entity(&ns_a, "concept", None, "Beta", None, None, vec![]) .await .unwrap(); - assert!(!deleted, "cross-namespace delete must return false"); + + // Delete from wrong namespace: NamespaceMismatch error (ADR-007 — no information leak). + let cross_ns_result = rt.delete_entity(&ns_b, entity.id, true).await; + assert!( + cross_ns_result.is_err(), + "cross-namespace delete must error" + ); + assert!( + matches!(cross_ns_result.unwrap_err(), crate::RuntimeError::NamespaceMismatch { id } if id == entity.id), + "cross-namespace delete must return NamespaceMismatch, not a generic error" + ); // Entity still present in its own namespace. - let still_there = rt.get_entity(Some("ns-a"), entity.id).await.unwrap(); + let still_there = rt.get_entity(&ns_a, entity.id).await; assert!( - still_there.is_some(), + still_there.is_ok(), "entity must survive cross-ns delete attempt" ); // Delete from correct namespace: succeeds. - let deleted_ok = rt - .delete_entity(Some("ns-a"), entity.id, true) - .await - .unwrap(); + let deleted_ok = rt.delete_entity(&ns_a, entity.id, true).await.unwrap(); assert!(deleted_ok, "same-namespace delete must succeed"); } @@ -1716,13 +2291,14 @@ mod tests { #[tokio::test] async fn create_note_indexes_into_fts5() { let rt = rt(); + let tok = NamespaceToken::local(); let note = rt .create_note( - None, + &tok, "observation", None, "FlashAttention reduces memory by using tiling", - 0.8, + Some(0.8), None, vec![], ) @@ -1730,9 +2306,9 @@ mod tests { .unwrap(); // FTS5 should have indexed the note content. - let ns = rt.ns(None).to_string(); + let ns = tok.namespace().as_str().to_string(); let hits = rt - .text_for_notes(None) + .text_for_notes(&tok) .unwrap() .search(khive_storage::types::TextSearchRequest { query: "FlashAttention".to_string(), @@ -1756,14 +2332,15 @@ mod tests { #[tokio::test] async fn create_note_with_properties() { let rt = rt(); + let tok = NamespaceToken::local(); let props = serde_json::json!({"source": "arxiv:2205.14135"}); let note = rt .create_note( - None, + &tok, "insight", None, "FlashAttention is IO-aware", - 0.9, + Some(0.9), Some(props.clone()), vec![], ) @@ -1776,18 +2353,19 @@ mod tests { #[tokio::test] async fn create_note_creates_annotates_edges() { let rt = rt(); + let tok = NamespaceToken::local(); let entity = rt - .create_entity(None, "concept", "FlashAttention", None, None, vec![]) + .create_entity(&tok, "concept", None, "FlashAttention", None, None, vec![]) .await .unwrap(); let note = rt .create_note( - None, + &tok, "observation", None, "FlashAttention uses SRAM tiling for memory efficiency", - 0.9, + Some(0.9), None, vec![entity.id], ) @@ -1797,7 +2375,7 @@ mod tests { // The note should have an outbound `annotates` edge to the entity. let out_neighbors = rt .neighbors( - None, + &tok, note.id, Direction::Out, None, @@ -1812,7 +2390,7 @@ mod tests { // The entity should have an inbound `annotates` edge from the note. let in_neighbors = rt .neighbors( - None, + &tok, entity.id, Direction::In, None, @@ -1827,28 +2405,29 @@ mod tests { #[tokio::test] async fn neighbors_without_relation_filter_returns_all() { let rt = rt(); + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); let c = rt - .create_entity(None, "concept", "C", None, None, vec![]) + .create_entity(&tok, "concept", None, "C", None, None, vec![]) .await .unwrap(); - rt.link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + rt.link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); - rt.link(None, a.id, c.id, EdgeRelation::DependsOn, 1.0) + rt.link(&tok, a.id, c.id, EdgeRelation::Enables, 1.0, None) .await .unwrap(); let all = rt - .neighbors(None, a.id, Direction::Out, None, None) + .neighbors(&tok, a.id, Direction::Out, None, None) .await .unwrap(); assert_eq!(all.len(), 2); @@ -1857,29 +2436,30 @@ mod tests { #[tokio::test] async fn neighbors_with_relation_filter_returns_subset() { let rt = rt(); + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); let c = rt - .create_entity(None, "concept", "C", None, None, vec![]) + .create_entity(&tok, "concept", None, "C", None, None, vec![]) .await .unwrap(); - rt.link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + rt.link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); - rt.link(None, a.id, c.id, EdgeRelation::DependsOn, 1.0) + rt.link(&tok, a.id, c.id, EdgeRelation::Enables, 1.0, None) .await .unwrap(); let filtered = rt .neighbors( - None, + &tok, a.id, Direction::Out, None, @@ -1895,12 +2475,13 @@ mod tests { #[tokio::test] async fn search_notes_returns_relevant_note() { let rt = rt(); + let tok = NamespaceToken::local(); rt.create_note( - None, + &tok, "observation", None, "GQA reduces KV cache memory for large models", - 0.8, + Some(0.8), None, vec![], ) @@ -1908,7 +2489,7 @@ mod tests { .unwrap(); let results = rt - .search_notes(None, "GQA KV cache", None, 10, None) + .search_notes(&tok, "GQA KV cache", None, 10, None, false) .await .unwrap(); @@ -1927,13 +2508,14 @@ mod tests { #[tokio::test] async fn search_notes_excludes_soft_deleted() { let rt = rt(); + let tok = NamespaceToken::local(); let note = rt .create_note( - None, + &tok, "observation", None, "RoPE positional encoding rotary embeddings", - 0.7, + Some(0.7), None, vec![], ) @@ -1941,14 +2523,14 @@ mod tests { .unwrap(); // Soft-delete the note. - rt.notes(None) + rt.notes(&tok) .unwrap() .delete_note(note.id, DeleteMode::Soft) .await .unwrap(); let results = rt - .search_notes(None, "RoPE rotary positional", None, 10, None) + .search_notes(&tok, "RoPE rotary positional", None, 10, None, false) .await .unwrap(); @@ -1961,12 +2543,13 @@ mod tests { #[tokio::test] async fn resolve_returns_entity() { let rt = rt(); + let tok = NamespaceToken::local(); let entity = rt - .create_entity(None, "concept", "LoRA", None, None, vec![]) + .create_entity(&tok, "concept", None, "LoRA", None, None, vec![]) .await .unwrap(); - let resolved = rt.resolve(None, entity.id).await.unwrap(); + let resolved = rt.resolve(&tok, entity.id).await.unwrap(); match resolved { Some(Resolved::Entity(e)) => assert_eq!(e.id, entity.id), other => panic!("expected Resolved::Entity, got {:?}", other), @@ -1976,20 +2559,21 @@ mod tests { #[tokio::test] async fn resolve_returns_note() { let rt = rt(); + let tok = NamespaceToken::local(); let note = rt .create_note( - None, + &tok, "observation", None, "LoRA fine-tunes LLMs with low-rank adapters", - 0.85, + Some(0.85), None, vec![], ) .await .unwrap(); - let resolved = rt.resolve(None, note.id).await.unwrap(); + let resolved = rt.resolve(&tok, note.id).await.unwrap(); match resolved { Some(Resolved::Note(n)) => assert_eq!(n.id, note.id), other => panic!("expected Resolved::Note, got {:?}", other), @@ -1999,35 +2583,39 @@ mod tests { #[tokio::test] async fn resolve_returns_none_for_unknown_uuid() { let rt = rt(); + let tok = NamespaceToken::local(); let unknown = Uuid::new_v4(); - let resolved = rt.resolve(None, unknown).await.unwrap(); + let resolved = rt.resolve(&tok, unknown).await.unwrap(); assert!(resolved.is_none(), "unknown UUID should resolve to None"); } #[tokio::test] async fn resolve_prefix_finds_entity_in_own_namespace() { let rt = rt(); + let tok = NamespaceToken::local(); let entity = rt - .create_entity(None, "concept", "PrefixTest", None, None, vec![]) + .create_entity(&tok, "concept", None, "PrefixTest", None, None, vec![]) .await .unwrap(); let prefix = &entity.id.to_string()[..8]; - let resolved = rt.resolve_prefix(None, prefix).await.unwrap(); + let resolved = rt.resolve_prefix(&tok, prefix).await.unwrap(); assert_eq!(resolved, Some(entity.id)); } #[tokio::test] async fn resolve_prefix_invisible_across_namespaces() { let rt = rt(); + let ns_a = NamespaceToken::for_namespace(Namespace::parse("ns-a").unwrap()); + let ns_b = NamespaceToken::for_namespace(Namespace::parse("ns-b").unwrap()); let entity = rt - .create_entity(Some("ns_a"), "concept", "Invisible", None, None, vec![]) + .create_entity(&ns_a, "concept", None, "Invisible", None, None, vec![]) .await .unwrap(); let prefix = &entity.id.to_string()[..8]; // From ns_b, the entity in ns_a should not be visible. - let resolved = rt.resolve_prefix(Some("ns_b"), prefix).await.unwrap(); + let resolved = rt.resolve_prefix(&ns_b, prefix).await.unwrap(); assert_eq!(resolved, None); } @@ -2036,6 +2624,7 @@ mod tests { use khive_storage::entity::Entity; let rt = rt(); + let tok = NamespaceToken::local(); // Two entities with UUIDs sharing the same 8-char prefix "aabbccdd". let id_a = Uuid::parse_str("aabbccdd-1111-4000-8000-000000000001").unwrap(); let id_b = Uuid::parse_str("aabbccdd-2222-4000-8000-000000000002").unwrap(); @@ -2045,11 +2634,11 @@ mod tests { let mut entity_b = Entity::new("local", "concept", "AmbigB"); entity_b.id = id_b; - let store = rt.entities(None).unwrap(); + let store = rt.entities(&tok).unwrap(); store.upsert_entity(entity_a).await.unwrap(); store.upsert_entity(entity_b).await.unwrap(); - let result = rt.resolve_prefix(None, "aabbccdd").await; + let result = rt.resolve_prefix(&tok, "aabbccdd").await; assert!( result.is_err(), "shared 8-char prefix must return Ambiguous error" @@ -2065,15 +2654,22 @@ mod tests { #[tokio::test] async fn resolve_finds_event_by_full_uuid() { use khive_storage::Event; - use khive_types::SubstrateKind; + use khive_types::{EventKind, SubstrateKind}; let rt = rt(); - let ns = rt.ns(None); - let event = Event::new(ns, "test_verb", SubstrateKind::Entity, "actor"); + let tok = NamespaceToken::local(); + let ns = tok.namespace().as_str(); + let event = Event::new( + ns, + "test_verb", + EventKind::Audit, + SubstrateKind::Entity, + "actor", + ); let event_id = event.id; - rt.events(None).unwrap().append_event(event).await.unwrap(); + rt.events(&tok).unwrap().append_event(event).await.unwrap(); - let resolved = rt.resolve(None, event_id).await.unwrap(); + let resolved = rt.resolve(&tok, event_id).await.unwrap(); assert!( matches!(resolved, Some(Resolved::Event(_))), "event UUID must resolve to Resolved::Event, got {resolved:?}" @@ -2083,16 +2679,23 @@ mod tests { #[tokio::test] async fn resolve_prefix_finds_event() { use khive_storage::Event; - use khive_types::SubstrateKind; + use khive_types::{EventKind, SubstrateKind}; let rt = rt(); - let ns = rt.ns(None); - let event = Event::new(ns, "test_verb", SubstrateKind::Entity, "actor"); + let tok = NamespaceToken::local(); + let ns = tok.namespace().as_str(); + let event = Event::new( + ns, + "test_verb", + EventKind::Audit, + SubstrateKind::Entity, + "actor", + ); let event_id = event.id; - rt.events(None).unwrap().append_event(event).await.unwrap(); + rt.events(&tok).unwrap().append_event(event).await.unwrap(); let prefix = &event_id.to_string()[..8]; - let resolved = rt.resolve_prefix(None, prefix).await.unwrap(); + let resolved = rt.resolve_prefix(&tok, prefix).await.unwrap(); assert_eq!( resolved, Some(event_id), @@ -2105,14 +2708,15 @@ mod tests { #[tokio::test] async fn link_phantom_source_returns_not_found() { let rt = rt(); + let tok = NamespaceToken::local(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); let phantom = Uuid::new_v4(); let result = rt - .link(None, phantom, b.id, EdgeRelation::Extends, 1.0) + .link(&tok, phantom, b.id, EdgeRelation::Extends, 1.0, None) .await; match result { Err(RuntimeError::NotFound(msg)) => { @@ -2128,14 +2732,15 @@ mod tests { #[tokio::test] async fn link_phantom_target_returns_not_found() { let rt = rt(); + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let phantom = Uuid::new_v4(); let result = rt - .link(None, a.id, phantom, EdgeRelation::Extends, 1.0) + .link(&tok, a.id, phantom, EdgeRelation::Extends, 1.0, None) .await; match result { Err(RuntimeError::NotFound(msg)) => { @@ -2151,17 +2756,18 @@ mod tests { #[tokio::test] async fn link_real_entities_succeeds() { let rt = rt(); + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); let edge = rt - .link(None, a.id, b.id, EdgeRelation::Extends, 0.8) + .link(&tok, a.id, b.id, EdgeRelation::Extends, 0.8, None) .await .unwrap(); assert_eq!(edge.source_id, a.id); @@ -2172,15 +2778,16 @@ mod tests { #[tokio::test] async fn create_note_annotates_phantom_returns_not_found() { let rt = rt(); + let tok = NamespaceToken::local(); let phantom = Uuid::new_v4(); let result = rt .create_note( - None, + &tok, "observation", None, "some content", - 0.5, + Some(0.5), None, vec![phantom], ) @@ -2194,18 +2801,19 @@ mod tests { #[tokio::test] async fn create_note_annotates_real_entity_succeeds() { let rt = rt(); + let tok = NamespaceToken::local(); let entity = rt - .create_entity(None, "concept", "RealTarget", None, None, vec![]) + .create_entity(&tok, "concept", None, "RealTarget", None, None, vec![]) .await .unwrap(); let note = rt .create_note( - None, + &tok, "observation", None, "content", - 0.5, + Some(0.5), None, vec![entity.id], ) @@ -2214,7 +2822,7 @@ mod tests { let neighbors = rt .neighbors( - None, + &tok, note.id, Direction::Out, None, @@ -2230,22 +2838,23 @@ mod tests { #[tokio::test] async fn create_note_multi_annotates_creates_all_edges() { let rt = rt(); + let tok = NamespaceToken::local(); let t1 = rt - .create_entity(None, "concept", "Target1", None, None, vec![]) + .create_entity(&tok, "concept", None, "Target1", None, None, vec![]) .await .unwrap(); let t2 = rt - .create_entity(None, "concept", "Target2", None, None, vec![]) + .create_entity(&tok, "concept", None, "Target2", None, None, vec![]) .await .unwrap(); let note = rt .create_note( - None, + &tok, "observation", None, "content", - 0.5, + Some(0.5), None, vec![t1.id, t2.id], ) @@ -2254,7 +2863,7 @@ mod tests { let neighbors = rt .neighbors( - None, + &tok, note.id, Direction::Out, None, @@ -2275,18 +2884,20 @@ mod tests { #[tokio::test] async fn link_target_in_different_namespace_returns_not_found() { let rt = rt(); + let ns_a = NamespaceToken::for_namespace(Namespace::parse("ns-a").unwrap()); + let ns_b = NamespaceToken::for_namespace(Namespace::parse("ns-b").unwrap()); let a = rt - .create_entity(Some("ns-a"), "concept", "A", None, None, vec![]) + .create_entity(&ns_a, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(Some("ns-b"), "concept", "B", None, None, vec![]) + .create_entity(&ns_b, "concept", None, "B", None, None, vec![]) .await .unwrap(); // Linking from ns-a: target b lives in ns-b — must be treated as not found. let result = rt - .link(Some("ns-a"), a.id, b.id, EdgeRelation::Extends, 1.0) + .link(&ns_a, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await; assert!( matches!(result, Err(RuntimeError::NotFound(_))), @@ -2297,10 +2908,11 @@ mod tests { #[tokio::test] async fn link_phantom_self_loop_returns_not_found() { let rt = rt(); + let tok = NamespaceToken::local(); let phantom = Uuid::new_v4(); let result = rt - .link(None, phantom, phantom, EdgeRelation::Extends, 1.0) + .link(&tok, phantom, phantom, EdgeRelation::Extends, 1.0, None) .await; match result { Err(RuntimeError::NotFound(msg)) => { @@ -2318,29 +2930,38 @@ mod tests { #[tokio::test] async fn link_note_to_edge_annotates_succeeds() { let rt = rt(); + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); // Create a real edge between a and b, capture its UUID. let edge = rt - .link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + .link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); let edge_uuid: Uuid = edge.id.into(); // Create a note and annotate the edge itself (edge is a valid substrate target per ADR-024). let note = rt - .create_note(None, "observation", None, "edge note", 0.5, None, vec![]) + .create_note( + &tok, + "observation", + None, + "edge note", + Some(0.5), + None, + vec![], + ) .await .unwrap(); let result = rt - .link(None, note.id, edge_uuid, EdgeRelation::Annotates, 1.0) + .link(&tok, note.id, edge_uuid, EdgeRelation::Annotates, 1.0, None) .await; assert!( result.is_ok(), @@ -2351,27 +2972,28 @@ mod tests { #[tokio::test] async fn create_note_annotates_real_edge_succeeds() { let rt = rt(); + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); let edge = rt - .link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + .link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); let edge_uuid: Uuid = edge.id.into(); let note = rt .create_note( - None, + &tok, "observation", None, "annotating an edge", - 0.5, + Some(0.5), None, vec![edge_uuid], ) @@ -2380,7 +3002,7 @@ mod tests { let neighbors = rt .neighbors( - None, + &tok, note.id, Direction::Out, None, @@ -2395,17 +3017,18 @@ mod tests { #[tokio::test] async fn create_note_annotates_phantom_is_atomic_no_note_persisted() { let rt = rt(); + let tok = NamespaceToken::local(); let phantom = Uuid::new_v4(); - let before_count = rt.list_notes(None, None, 1000, 0).await.unwrap().len(); + let before_count = rt.list_notes(&tok, None, 1000, 0).await.unwrap().len(); let result = rt .create_note( - None, + &tok, "observation", None, "should not persist", - 0.5, + Some(0.5), None, vec![phantom], ) @@ -2416,7 +3039,7 @@ mod tests { ); // Atomicity: the note row must NOT have been written. - let after_count = rt.list_notes(None, None, 1000, 0).await.unwrap().len(); + let after_count = rt.list_notes(&tok, None, 1000, 0).await.unwrap().len(); assert_eq!( before_count, after_count, "failed create_note must not persist any note row (atomicity)" @@ -2424,7 +3047,7 @@ mod tests { // FTS must not contain the content either. let search_hits = rt - .search_notes(None, "should not persist", None, 10, None) + .search_notes(&tok, "should not persist", None, 10, None, false) .await .unwrap(); assert!( @@ -2441,23 +3064,24 @@ mod tests { #[tokio::test] async fn link_entity_to_edge_uuid_non_annotates_returns_invalid_input() { let rt = rt(); + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); // Create a real edge; capture its UUID as the bad target. let edge = rt - .link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + .link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); let edge_uuid: Uuid = edge.id.into(); let result = rt - .link(None, a.id, edge_uuid, EdgeRelation::Extends, 1.0) + .link(&tok, a.id, edge_uuid, EdgeRelation::Extends, 1.0, None) .await; match result { Err(RuntimeError::InvalidInput(msg)) => { @@ -2476,17 +3100,18 @@ mod tests { #[tokio::test] async fn link_note_as_source_non_annotates_returns_invalid_input() { let rt = rt(); + let tok = NamespaceToken::local(); let note = rt - .create_note(None, "observation", None, "a note", 0.5, None, vec![]) + .create_note(&tok, "observation", None, "a note", Some(0.5), None, vec![]) .await .unwrap(); let entity = rt - .create_entity(None, "concept", "E", None, None, vec![]) + .create_entity(&tok, "concept", None, "E", None, None, vec![]) .await .unwrap(); let result = rt - .link(None, note.id, entity.id, EdgeRelation::DependsOn, 1.0) + .link(&tok, note.id, entity.id, EdgeRelation::DependsOn, 1.0, None) .await; match result { Err(RuntimeError::InvalidInput(msg)) => { @@ -2503,17 +3128,18 @@ mod tests { #[tokio::test] async fn link_entity_as_annotates_source_returns_invalid_input() { let rt = rt(); + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); let result = rt - .link(None, a.id, b.id, EdgeRelation::Annotates, 1.0) + .link(&tok, a.id, b.id, EdgeRelation::Annotates, 1.0, None) .await; match result { Err(RuntimeError::InvalidInput(msg)) => { @@ -2531,23 +3157,24 @@ mod tests { #[tokio::test] async fn link_edge_as_annotates_source_returns_invalid_input() { let rt = rt(); + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); let edge = rt - .link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + .link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); let edge_uuid: Uuid = edge.id.into(); // An existing edge used as an annotates source: wrong kind, not absent. let result = rt - .link(None, edge_uuid, a.id, EdgeRelation::Annotates, 1.0) + .link(&tok, edge_uuid, a.id, EdgeRelation::Annotates, 1.0, None) .await; match result { Err(RuntimeError::InvalidInput(msg)) => { @@ -2564,16 +3191,17 @@ mod tests { #[tokio::test] async fn link_note_to_event_annotates_succeeds() { use khive_storage::Event; - use khive_types::SubstrateKind; + use khive_types::{EventKind, SubstrateKind}; let rt = rt(); + let tok = NamespaceToken::local(); let note = rt .create_note( - None, + &tok, "observation", None, "observing an event", - 0.6, + Some(0.6), None, vec![], ) @@ -2581,13 +3209,19 @@ mod tests { .unwrap(); // Build an event directly via the store (no runtime create_event exists). - let ns = rt.ns(None); - let event = Event::new(ns, "test_verb", SubstrateKind::Entity, "test_actor"); + let ns = tok.namespace().as_str(); + let event = Event::new( + ns, + "test_verb", + EventKind::Audit, + SubstrateKind::Entity, + "test_actor", + ); let event_id = event.id; - rt.events(None).unwrap().append_event(event).await.unwrap(); + rt.events(&tok).unwrap().append_event(event).await.unwrap(); let result = rt - .link(None, note.id, event_id, EdgeRelation::Annotates, 1.0) + .link(&tok, note.id, event_id, EdgeRelation::Annotates, 1.0, None) .await; assert!( result.is_ok(), @@ -2599,21 +3233,28 @@ mod tests { #[tokio::test] async fn create_note_annotates_event_succeeds() { use khive_storage::Event; - use khive_types::SubstrateKind; + use khive_types::{EventKind, SubstrateKind}; let rt = rt(); - let ns = rt.ns(None); - let event = Event::new(ns, "test_verb", SubstrateKind::Entity, "test_actor"); + let tok = NamespaceToken::local(); + let ns = tok.namespace().as_str(); + let event = Event::new( + ns, + "test_verb", + EventKind::Audit, + SubstrateKind::Entity, + "test_actor", + ); let event_id = event.id; - rt.events(None).unwrap().append_event(event).await.unwrap(); + rt.events(&tok).unwrap().append_event(event).await.unwrap(); let result = rt .create_note( - None, + &tok, "observation", None, "note annotating an event", - 0.5, + Some(0.5), None, vec![event_id], ) @@ -2626,7 +3267,7 @@ mod tests { let note = result.unwrap(); let neighbors = rt .neighbors( - None, + &tok, note.id, Direction::Out, None, @@ -2644,13 +3285,14 @@ mod tests { #[tokio::test] async fn link_supersedes_note_to_note_succeeds() { let rt = rt(); + let tok = NamespaceToken::local(); let old_note = rt .create_note( - None, + &tok, "observation", None, "old observation", - 0.7, + Some(0.7), None, vec![], ) @@ -2658,11 +3300,11 @@ mod tests { .unwrap(); let new_note = rt .create_note( - None, + &tok, "observation", None, "revised observation superseding the old one", - 0.9, + Some(0.9), None, vec![], ) @@ -2671,11 +3313,12 @@ mod tests { let result = rt .link( - None, + &tok, new_note.id, old_note.id, EdgeRelation::Supersedes, 1.0, + None, ) .await; assert!( @@ -2687,22 +3330,24 @@ mod tests { #[tokio::test] async fn link_supersedes_entity_to_entity_succeeds() { let rt = rt(); + let tok = NamespaceToken::local(); let old_entity = rt - .create_entity(None, "concept", "OldConcept", None, None, vec![]) + .create_entity(&tok, "concept", None, "OldConcept", None, None, vec![]) .await .unwrap(); let new_entity = rt - .create_entity(None, "concept", "NewConcept", None, None, vec![]) + .create_entity(&tok, "concept", None, "NewConcept", None, None, vec![]) .await .unwrap(); let result = rt .link( - None, + &tok, new_entity.id, old_entity.id, EdgeRelation::Supersedes, 1.0, + None, ) .await; assert!( @@ -2714,17 +3359,25 @@ mod tests { #[tokio::test] async fn link_supersedes_note_to_entity_returns_invalid_input() { let rt = rt(); + let tok = NamespaceToken::local(); let note = rt - .create_note(None, "observation", None, "a note", 0.5, None, vec![]) + .create_note(&tok, "observation", None, "a note", Some(0.5), None, vec![]) .await .unwrap(); let entity = rt - .create_entity(None, "concept", "SomeEntity", None, None, vec![]) + .create_entity(&tok, "concept", None, "SomeEntity", None, None, vec![]) .await .unwrap(); let result = rt - .link(None, note.id, entity.id, EdgeRelation::Supersedes, 1.0) + .link( + &tok, + note.id, + entity.id, + EdgeRelation::Supersedes, + 1.0, + None, + ) .await; match result { Err(RuntimeError::InvalidInput(msg)) => { @@ -2742,17 +3395,25 @@ mod tests { #[tokio::test] async fn link_supersedes_entity_to_note_returns_invalid_input() { let rt = rt(); + let tok = NamespaceToken::local(); let entity = rt - .create_entity(None, "concept", "SomeEntity", None, None, vec![]) + .create_entity(&tok, "concept", None, "SomeEntity", None, None, vec![]) .await .unwrap(); let note = rt - .create_note(None, "observation", None, "a note", 0.5, None, vec![]) + .create_note(&tok, "observation", None, "a note", Some(0.5), None, vec![]) .await .unwrap(); let result = rt - .link(None, entity.id, note.id, EdgeRelation::Supersedes, 1.0) + .link( + &tok, + entity.id, + note.id, + EdgeRelation::Supersedes, + 1.0, + None, + ) .await; match result { Err(RuntimeError::InvalidInput(msg)) => { @@ -2770,21 +3431,35 @@ mod tests { #[tokio::test] async fn link_supersedes_event_source_returns_invalid_input() { use khive_storage::Event; - use khive_types::SubstrateKind; + use khive_types::{EventKind, SubstrateKind}; let rt = rt(); - let ns = rt.ns(None); - let event = Event::new(ns, "test_verb", SubstrateKind::Entity, "test_actor"); + let tok = NamespaceToken::local(); + let ns = tok.namespace().as_str(); + let event = Event::new( + ns, + "test_verb", + EventKind::Audit, + SubstrateKind::Entity, + "test_actor", + ); let event_id = event.id; - rt.events(None).unwrap().append_event(event).await.unwrap(); + rt.events(&tok).unwrap().append_event(event).await.unwrap(); let entity = rt - .create_entity(None, "concept", "SomeEntity", None, None, vec![]) + .create_entity(&tok, "concept", None, "SomeEntity", None, None, vec![]) .await .unwrap(); let result = rt - .link(None, event_id, entity.id, EdgeRelation::Supersedes, 1.0) + .link( + &tok, + event_id, + entity.id, + EdgeRelation::Supersedes, + 1.0, + None, + ) .await; match result { Err(RuntimeError::InvalidInput(msg)) => { @@ -2799,21 +3474,35 @@ mod tests { #[tokio::test] async fn link_supersedes_event_target_returns_invalid_input() { use khive_storage::Event; - use khive_types::SubstrateKind; + use khive_types::{EventKind, SubstrateKind}; let rt = rt(); - let ns = rt.ns(None); - let event = Event::new(ns, "test_verb", SubstrateKind::Entity, "test_actor"); + let tok = NamespaceToken::local(); + let ns = tok.namespace().as_str(); + let event = Event::new( + ns, + "test_verb", + EventKind::Audit, + SubstrateKind::Entity, + "test_actor", + ); let event_id = event.id; - rt.events(None).unwrap().append_event(event).await.unwrap(); + rt.events(&tok).unwrap().append_event(event).await.unwrap(); let entity = rt - .create_entity(None, "concept", "SomeEntity", None, None, vec![]) + .create_entity(&tok, "concept", None, "SomeEntity", None, None, vec![]) .await .unwrap(); let result = rt - .link(None, entity.id, event_id, EdgeRelation::Supersedes, 1.0) + .link( + &tok, + entity.id, + event_id, + EdgeRelation::Supersedes, + 1.0, + None, + ) .await; match result { Err(RuntimeError::InvalidInput(msg)) => { @@ -2828,22 +3517,23 @@ mod tests { #[tokio::test] async fn link_supersedes_edge_source_returns_invalid_input() { let rt = rt(); + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); let edge = rt - .link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + .link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); let edge_uuid: Uuid = edge.id.into(); let result = rt - .link(None, edge_uuid, a.id, EdgeRelation::Supersedes, 1.0) + .link(&tok, edge_uuid, a.id, EdgeRelation::Supersedes, 1.0, None) .await; match result { Err(RuntimeError::InvalidInput(msg)) => { @@ -2858,22 +3548,23 @@ mod tests { #[tokio::test] async fn link_supersedes_edge_target_returns_invalid_input() { let rt = rt(); + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); let edge = rt - .link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + .link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); let edge_uuid: Uuid = edge.id.into(); let result = rt - .link(None, a.id, edge_uuid, EdgeRelation::Supersedes, 1.0) + .link(&tok, a.id, edge_uuid, EdgeRelation::Supersedes, 1.0, None) .await; match result { Err(RuntimeError::InvalidInput(msg)) => { @@ -2888,13 +3579,14 @@ mod tests { #[tokio::test] async fn link_supersedes_phantom_source_returns_not_found() { let rt = rt(); + let tok = NamespaceToken::local(); let note = rt .create_note( - None, + &tok, "observation", None, "existing note", - 0.5, + Some(0.5), None, vec![], ) @@ -2903,7 +3595,7 @@ mod tests { let phantom = Uuid::new_v4(); let result = rt - .link(None, phantom, note.id, EdgeRelation::Supersedes, 1.0) + .link(&tok, phantom, note.id, EdgeRelation::Supersedes, 1.0, None) .await; match result { Err(RuntimeError::NotFound(msg)) => { @@ -2916,13 +3608,14 @@ mod tests { #[tokio::test] async fn link_supersedes_phantom_target_returns_not_found() { let rt = rt(); + let tok = NamespaceToken::local(); let note = rt .create_note( - None, + &tok, "observation", None, "existing note", - 0.5, + Some(0.5), None, vec![], ) @@ -2931,7 +3624,7 @@ mod tests { let phantom = Uuid::new_v4(); let result = rt - .link(None, note.id, phantom, EdgeRelation::Supersedes, 1.0) + .link(&tok, note.id, phantom, EdgeRelation::Supersedes, 1.0, None) .await; match result { Err(RuntimeError::NotFound(msg)) => { @@ -2944,13 +3637,15 @@ mod tests { #[tokio::test] async fn link_supersedes_cross_namespace_source_returns_not_found() { let rt = rt(); + let ns_a = NamespaceToken::for_namespace(Namespace::parse("ns-a").unwrap()); + let ns_b = NamespaceToken::for_namespace(Namespace::parse("ns-b").unwrap()); let note_a = rt .create_note( - Some("ns-a"), + &ns_a, "observation", None, "note in ns-a", - 0.5, + Some(0.5), None, vec![], ) @@ -2958,11 +3653,11 @@ mod tests { .unwrap(); let note_b = rt .create_note( - Some("ns-b"), + &ns_b, "observation", None, "note in ns-b", - 0.5, + Some(0.5), None, vec![], ) @@ -2972,11 +3667,12 @@ mod tests { // From ns-a perspective, note_b is in a different namespace — treated as not found. let result = rt .link( - Some("ns-a"), + &ns_a, note_b.id, note_a.id, EdgeRelation::Supersedes, 1.0, + None, ) .await; assert!( @@ -2989,25 +3685,26 @@ mod tests { #[tokio::test] async fn link_extends_note_source_still_returns_invalid_input() { let rt = rt(); + let tok = NamespaceToken::local(); let note = rt .create_note( - None, + &tok, "observation", None, "a note that cannot be an extends source", - 0.5, + Some(0.5), None, vec![], ) .await .unwrap(); let entity = rt - .create_entity(None, "concept", "E", None, None, vec![]) + .create_entity(&tok, "concept", None, "E", None, None, vec![]) .await .unwrap(); let result = rt - .link(None, note.id, entity.id, EdgeRelation::Extends, 1.0) + .link(&tok, note.id, entity.id, EdgeRelation::Extends, 1.0, None) .await; assert!( matches!(result, Err(RuntimeError::InvalidInput(_))), @@ -3019,27 +3716,28 @@ mod tests { #[tokio::test] async fn link_annotates_note_to_edge_still_succeeds_after_fix() { let rt = rt(); + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); let edge = rt - .link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + .link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); let edge_uuid: Uuid = edge.id.into(); let note = rt .create_note( - None, + &tok, "observation", None, "annotating an edge", - 0.5, + Some(0.5), None, vec![], ) @@ -3047,7 +3745,7 @@ mod tests { .unwrap(); let result = rt - .link(None, note.id, edge_uuid, EdgeRelation::Annotates, 1.0) + .link(&tok, note.id, edge_uuid, EdgeRelation::Annotates, 1.0, None) .await; assert!( result.is_ok(), @@ -3071,8 +3769,9 @@ mod tests { #[tokio::test] async fn create_note_multi_annotates_compensation_cleanup_restores_pristine_state() { let rt = rt(); + let tok = NamespaceToken::local(); let t1 = rt - .create_entity(None, "concept", "T1", None, None, vec![]) + .create_entity(&tok, "concept", None, "T1", None, None, vec![]) .await .unwrap(); @@ -3080,11 +3779,11 @@ mod tests { // note persisted + first annotates edge created. let note = rt .create_note( - None, + &tok, "observation", None, "partial note", - 0.5, + Some(0.5), None, vec![t1.id], ) @@ -3092,11 +3791,11 @@ mod tests { .unwrap(); // Confirm the partial state exists before compensation. - let before_notes = rt.list_notes(None, None, 1000, 0).await.unwrap(); + let before_notes = rt.list_notes(&tok, None, 1000, 0).await.unwrap(); assert_eq!(before_notes.len(), 1, "note must be present before cleanup"); let before_edges = rt .neighbors( - None, + &tok, note.id, Direction::Out, None, @@ -3112,19 +3811,19 @@ mod tests { let edge_id: Uuid = before_edges[0].edge_id; // Execute the same cleanup sequence that `create_note_inner`'s Err branch runs. - rt.delete_edge(None, edge_id).await.unwrap(); - rt.delete_note(None, note.id, true /* hard */) + rt.delete_edge(&tok, edge_id, true).await.unwrap(); + rt.delete_note(&tok, note.id, true /* hard */) .await .unwrap(); // Post-compensation invariants: - let after_notes = rt.list_notes(None, None, 1000, 0).await.unwrap(); + let after_notes = rt.list_notes(&tok, None, 1000, 0).await.unwrap(); assert!( after_notes.is_empty(), "compensation must remove the note row; got {after_notes:?}" ); let search_hits = rt - .search_notes(None, "partial note", None, 10, None) + .search_notes(&tok, "partial note", None, 10, None, false) .await .unwrap(); assert!( @@ -3132,7 +3831,7 @@ mod tests { "compensation must clean the FTS index; got {search_hits:?}" ); let after_edges = rt - .neighbors(None, note.id, Direction::Out, None, None) + .neighbors(&tok, note.id, Direction::Out, None, None) .await .unwrap(); assert!( @@ -3151,17 +3850,18 @@ mod tests { #[tokio::test] async fn annotated_entity_hard_delete_cascades_annotate_edge() { let rt = rt(); + let tok = NamespaceToken::local(); let entity = rt - .create_entity(None, "concept", "E", None, None, vec![]) + .create_entity(&tok, "concept", None, "E", None, None, vec![]) .await .unwrap(); let note = rt .create_note( - None, + &tok, "observation", None, "note about entity", - 0.5, + Some(0.5), None, vec![entity.id], ) @@ -3171,7 +3871,7 @@ mod tests { // Confirm edge exists before delete. let before = rt .neighbors( - None, + &tok, note.id, Direction::Out, None, @@ -3186,13 +3886,13 @@ mod tests { ); // Hard delete the entity. - let deleted = rt.delete_entity(None, entity.id, true).await.unwrap(); + let deleted = rt.delete_entity(&tok, entity.id, true).await.unwrap(); assert!(deleted, "entity hard delete must return true"); // Annotates edge must be gone. let after = rt .neighbors( - None, + &tok, note.id, Direction::Out, None, @@ -3209,19 +3909,28 @@ mod tests { #[tokio::test] async fn annotated_note_hard_delete_cascades_annotate_edge() { let rt = rt(); + let tok = NamespaceToken::local(); // note_target is the thing being annotated (a note itself). let note_target = rt - .create_note(None, "observation", None, "target note", 0.5, None, vec![]) + .create_note( + &tok, + "observation", + None, + "target note", + Some(0.5), + None, + vec![], + ) .await .unwrap(); // note_source annotates note_target. let note_source = rt .create_note( - None, + &tok, "insight", None, "annotation", - 0.5, + Some(0.5), None, vec![note_target.id], ) @@ -3230,7 +3939,7 @@ mod tests { let before = rt .neighbors( - None, + &tok, note_source.id, Direction::Out, None, @@ -3245,13 +3954,13 @@ mod tests { ); // Hard delete the annotation TARGET note. - let deleted = rt.delete_note(None, note_target.id, true).await.unwrap(); + let deleted = rt.delete_note(&tok, note_target.id, true).await.unwrap(); assert!(deleted, "note hard delete must return true"); // The annotates edge targeting note_target must be gone. let after = rt .neighbors( - None, + &tok, note_source.id, Direction::Out, None, @@ -3268,17 +3977,18 @@ mod tests { #[tokio::test] async fn annotated_edge_delete_cascades_annotate_edge() { let rt = rt(); + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); // Create an edge to annotate. let base_edge = rt - .link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + .link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); let base_edge_uuid: Uuid = base_edge.id.into(); @@ -3286,11 +3996,11 @@ mod tests { // Create a note that annotates the edge. let note = rt .create_note( - None, + &tok, "observation", None, "note about edge", - 0.5, + Some(0.5), None, vec![base_edge_uuid], ) @@ -3299,7 +4009,7 @@ mod tests { let before = rt .neighbors( - None, + &tok, note.id, Direction::Out, None, @@ -3314,13 +4024,13 @@ mod tests { ); // Delete the base edge. - let deleted = rt.delete_edge(None, base_edge_uuid).await.unwrap(); + let deleted = rt.delete_edge(&tok, base_edge_uuid, true).await.unwrap(); assert!(deleted, "edge delete must return true"); // The annotates edge targeting base_edge must be gone. let after = rt .neighbors( - None, + &tok, note.id, Direction::Out, None, @@ -3337,23 +4047,24 @@ mod tests { #[tokio::test] async fn mixed_multi_annotates_partial_target_hard_delete_leaves_remaining_edges() { let rt = rt(); + let tok = NamespaceToken::local(); let t1 = rt - .create_entity(None, "concept", "T1", None, None, vec![]) + .create_entity(&tok, "concept", None, "T1", None, None, vec![]) .await .unwrap(); let t2 = rt - .create_entity(None, "concept", "T2", None, None, vec![]) + .create_entity(&tok, "concept", None, "T2", None, None, vec![]) .await .unwrap(); // Note annotates both t1 and t2. let note = rt .create_note( - None, + &tok, "observation", None, "multi-target note", - 0.5, + Some(0.5), None, vec![t1.id, t2.id], ) @@ -3362,7 +4073,7 @@ mod tests { let before = rt .neighbors( - None, + &tok, note.id, Direction::Out, None, @@ -3377,12 +4088,12 @@ mod tests { ); // Hard delete only t1. - rt.delete_entity(None, t1.id, true).await.unwrap(); + rt.delete_entity(&tok, t1.id, true).await.unwrap(); // Edge to t1 must be gone, edge to t2 must remain. let after = rt .neighbors( - None, + &tok, note.id, Direction::Out, None, @@ -3404,17 +4115,18 @@ mod tests { #[tokio::test] async fn annotated_note_soft_delete_preserves_annotate_edge() { let rt = rt(); + let tok = NamespaceToken::local(); let note_target = rt - .create_note(None, "observation", None, "target", 0.5, None, vec![]) + .create_note(&tok, "observation", None, "target", Some(0.5), None, vec![]) .await .unwrap(); let note_source = rt .create_note( - None, + &tok, "insight", None, "annotation", - 0.5, + Some(0.5), None, vec![note_target.id], ) @@ -3423,7 +4135,7 @@ mod tests { let before = rt .neighbors( - None, + &tok, note_source.id, Direction::Out, None, @@ -3434,12 +4146,12 @@ mod tests { assert_eq!(before.len(), 1); // Soft delete must NOT cascade edges (data-vs-view principle). - let deleted = rt.delete_note(None, note_target.id, false).await.unwrap(); + let deleted = rt.delete_note(&tok, note_target.id, false).await.unwrap(); assert!(deleted, "soft delete must return true"); let after = rt .neighbors( - None, + &tok, note_source.id, Direction::Out, None, @@ -3463,19 +4175,20 @@ mod tests { #[tokio::test] async fn delete_edge_non_edge_uuid_has_no_side_effects() { let rt = rt(); + let tok = NamespaceToken::local(); // Create an entity that has an inbound annotates edge. let entity = rt - .create_entity(None, "concept", "Target", None, None, vec![]) + .create_entity(&tok, "concept", None, "Target", None, None, vec![]) .await .unwrap(); let note = rt .create_note( - None, + &tok, "observation", None, "annotates the entity", - 0.5, + Some(0.5), None, vec![entity.id], ) @@ -3485,7 +4198,7 @@ mod tests { // Confirm the annotates edge exists. let before = rt .neighbors( - None, + &tok, note.id, Direction::Out, None, @@ -3497,7 +4210,7 @@ mod tests { let annotates_edge_id: Uuid = before[0].edge_id; // Call delete_edge with the entity UUID (NOT an edge UUID). - let result = rt.delete_edge(None, entity.id).await; + let result = rt.delete_edge(&tok, entity.id, true).await; assert!( result.is_ok(), "delete_edge must not error on a non-edge UUID" @@ -3510,7 +4223,7 @@ mod tests { // The inbound annotates edge to the entity must still exist — no side effects. let after = rt .neighbors( - None, + &tok, note.id, Direction::Out, None, @@ -3542,12 +4255,13 @@ mod tests { #[tokio::test] async fn create_note_multi_annotates_second_link_failure_rolls_back_partial_write() { let rt = rt(); + let tok = NamespaceToken::local(); let t1 = rt - .create_entity(None, "concept", "T1", None, None, vec![]) + .create_entity(&tok, "concept", None, "T1", None, None, vec![]) .await .unwrap(); let t2 = rt - .create_entity(None, "concept", "T2", None, None, vec![]) + .create_entity(&tok, "concept", None, "T2", None, None, vec![]) .await .unwrap(); @@ -3556,11 +4270,11 @@ mod tests { let result = rt .create_note( - None, + &tok, "observation", None, "rollback target", - 0.5, + Some(0.5), None, vec![t1.id, t2.id], ) @@ -3578,7 +4292,7 @@ mod tests { ); // Compensation must have removed the note row. - let notes = rt.list_notes(None, None, 1000, 0).await.unwrap(); + let notes = rt.list_notes(&tok, None, 1000, 0).await.unwrap(); assert!( notes.is_empty(), "compensation must remove the note row; got {notes:?}" @@ -3586,7 +4300,7 @@ mod tests { // FTS must have no hit for the content. let hits = rt - .search_notes(None, "rollback target", None, 10, None) + .search_notes(&tok, "rollback target", None, 10, None, false) .await .unwrap(); assert!( @@ -3597,7 +4311,7 @@ mod tests { // No partial annotates edges must remain (first edge must have been deleted). let edges_from_t1 = rt .neighbors( - None, + &tok, t1.id, Direction::In, None, @@ -3607,7 +4321,7 @@ mod tests { .unwrap(); let edges_from_t2 = rt .neighbors( - None, + &tok, t2.id, Direction::In, None, @@ -3630,10 +4344,12 @@ mod tests { #[tokio::test] async fn soft_delete_entity_removes_indexes() { let rt = rt(); + let tok = NamespaceToken::local(); let entity = rt .create_entity( - None, + &tok, "concept", + None, "QuantumEntanglement", Some("unique FTS term xzqjwv for soft delete test"), None, @@ -3642,10 +4358,10 @@ mod tests { .await .unwrap(); - let ns = rt.ns(None).to_string(); + let ns = tok.namespace().as_str().to_string(); let before = rt - .text(None) + .text(&tok) .unwrap() .search(TextSearchRequest { query: "xzqjwv".to_string(), @@ -3664,11 +4380,11 @@ mod tests { "entity must be in FTS before soft-delete" ); - let deleted = rt.delete_entity(None, entity.id, false).await.unwrap(); + let deleted = rt.delete_entity(&tok, entity.id, false).await.unwrap(); assert!(deleted, "soft delete must return true"); let after = rt - .text(None) + .text(&tok) .unwrap() .search(TextSearchRequest { query: "xzqjwv".to_string(), @@ -3691,13 +4407,14 @@ mod tests { #[tokio::test] async fn soft_delete_note_removes_indexes() { let rt = rt(); + let tok = NamespaceToken::local(); let note = rt .create_note( - None, + &tok, "observation", None, "SpectralDecomposition unique term yvwkqz for soft delete test", - 0.7, + Some(0.7), None, vec![], ) @@ -3705,7 +4422,7 @@ mod tests { .unwrap(); let before = rt - .search_notes(None, "yvwkqz", None, 10, None) + .search_notes(&tok, "yvwkqz", None, 10, None, false) .await .unwrap(); assert!( @@ -3713,11 +4430,11 @@ mod tests { "note must be in FTS before soft-delete" ); - let deleted = rt.delete_note(None, note.id, false).await.unwrap(); + let deleted = rt.delete_note(&tok, note.id, false).await.unwrap(); assert!(deleted, "soft delete must return true"); let after = rt - .search_notes(None, "yvwkqz", None, 10, None) + .search_notes(&tok, "yvwkqz", None, 10, None, false) .await .unwrap(); assert!( @@ -3725,4 +4442,442 @@ mod tests { "soft-deleted note must be removed from FTS index" ); } + + // F010 (CRIT): ADR-002 base endpoint allowlist — unlisted triples must fail closed. + // Document->Document Extends is not in the ADR-002 table; current generic fallthrough accepts it. + #[tokio::test] + async fn link_extends_document_to_document_returns_invalid_input() { + let rt = rt(); + let tok = NamespaceToken::local(); + let d1 = rt + .create_entity(&tok, "document", None, "DocA", None, None, vec![]) + .await + .unwrap(); + let d2 = rt + .create_entity(&tok, "document", None, "DocB", None, None, vec![]) + .await + .unwrap(); + let result = rt + .link(&tok, d1.id, d2.id, EdgeRelation::Extends, 1.0, None) + .await; + assert!( + result.is_err(), + "F010: document->document Extends must be rejected by ADR-002 allowlist; \ + current generic entity fallthrough incorrectly accepts it" + ); + } + + // F010 happy path: Concept->Concept Extends is in the ADR-002 allowlist and must succeed. + #[tokio::test] + async fn link_extends_concept_to_concept_succeeds() { + let rt = rt(); + let tok = NamespaceToken::local(); + let a = rt + .create_entity(&tok, "concept", None, "CA", None, None, vec![]) + .await + .unwrap(); + let b = rt + .create_entity(&tok, "concept", None, "CB", None, None, vec![]) + .await + .unwrap(); + let result = rt + .link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) + .await; + assert!( + result.is_ok(), + "F010: concept->concept Extends must be allowed (ADR-002 allowlist)" + ); + } + + // F012 (CRIT): CompetesWith is symmetric; reversed pair must deduplicate to one canonical row. + // Current code stores both directions as distinct rows (no canonicalization). + #[tokio::test] + async fn link_symmetric_relation_canonicalizes_endpoint_order() { + use khive_storage::EdgeFilter; + let rt = rt(); + let tok = NamespaceToken::local(); + let a = rt + .create_entity(&tok, "concept", None, "ConceptP", None, None, vec![]) + .await + .unwrap(); + let b = rt + .create_entity(&tok, "concept", None, "ConceptQ", None, None, vec![]) + .await + .unwrap(); + // Link A->B then B->A with the same symmetric relation. + rt.link(&tok, a.id, b.id, EdgeRelation::CompetesWith, 1.0, None) + .await + .unwrap(); + rt.link(&tok, b.id, a.id, EdgeRelation::CompetesWith, 1.0, None) + .await + .unwrap(); + let count = rt + .graph(&tok) + .unwrap() + .count_edges(EdgeFilter::default()) + .await + .unwrap(); + assert_eq!( + count, + 1, + "F012: CompetesWith is symmetric; A->B and B->A must deduplicate to one canonical row; \ + found {count} rows (canonicalization not yet implemented)" + ); + } + + // F010 (ADR-002): Supersedes — positive tests for all 5 allowed entity kinds. + #[tokio::test] + async fn f010_supersedes_document_to_document_allowed() { + let rt = rt(); + let tok = NamespaceToken::local(); + let a = rt + .create_entity(&tok, "document", None, "DocA", None, None, vec![]) + .await + .unwrap(); + let b = rt + .create_entity(&tok, "document", None, "DocB", None, None, vec![]) + .await + .unwrap(); + let result = rt + .link(&tok, b.id, a.id, EdgeRelation::Supersedes, 1.0, None) + .await; + assert!( + result.is_ok(), + "document->document Supersedes must be allowed (ADR-002:191), got {result:?}" + ); + } + + #[tokio::test] + async fn f010_supersedes_artifact_to_artifact_allowed() { + let rt = rt(); + let tok = NamespaceToken::local(); + let a = rt + .create_entity(&tok, "artifact", None, "ArtA", None, None, vec![]) + .await + .unwrap(); + let b = rt + .create_entity(&tok, "artifact", None, "ArtB", None, None, vec![]) + .await + .unwrap(); + let result = rt + .link(&tok, b.id, a.id, EdgeRelation::Supersedes, 1.0, None) + .await; + assert!( + result.is_ok(), + "artifact->artifact Supersedes must be allowed (ADR-002:192), got {result:?}" + ); + } + + #[tokio::test] + async fn f010_supersedes_service_to_service_allowed() { + let rt = rt(); + let tok = NamespaceToken::local(); + let a = rt + .create_entity(&tok, "service", None, "SvcA", None, None, vec![]) + .await + .unwrap(); + let b = rt + .create_entity(&tok, "service", None, "SvcB", None, None, vec![]) + .await + .unwrap(); + let result = rt + .link(&tok, b.id, a.id, EdgeRelation::Supersedes, 1.0, None) + .await; + assert!( + result.is_ok(), + "service->service Supersedes must be allowed (ADR-002:193), got {result:?}" + ); + } + + #[tokio::test] + async fn f010_supersedes_dataset_to_dataset_allowed() { + let rt = rt(); + let tok = NamespaceToken::local(); + let a = rt + .create_entity(&tok, "dataset", None, "DataA", None, None, vec![]) + .await + .unwrap(); + let b = rt + .create_entity(&tok, "dataset", None, "DataB", None, None, vec![]) + .await + .unwrap(); + let result = rt + .link(&tok, b.id, a.id, EdgeRelation::Supersedes, 1.0, None) + .await; + assert!( + result.is_ok(), + "dataset->dataset Supersedes must be allowed (ADR-002:194), got {result:?}" + ); + } + + // F010 (ADR-002): Supersedes — negative tests for rejected entity kinds. + #[tokio::test] + async fn f010_supersedes_project_to_project_rejected() { + let rt = rt(); + let tok = NamespaceToken::local(); + let a = rt + .create_entity(&tok, "project", None, "ProjA", None, None, vec![]) + .await + .unwrap(); + let b = rt + .create_entity(&tok, "project", None, "ProjB", None, None, vec![]) + .await + .unwrap(); + let result = rt + .link(&tok, b.id, a.id, EdgeRelation::Supersedes, 1.0, None) + .await; + assert!( + matches!(result, Err(RuntimeError::InvalidInput(_))), + "project->project Supersedes must be rejected (not in ADR-002 allowlist), got {result:?}" + ); + } + + #[tokio::test] + async fn f010_supersedes_person_to_person_rejected() { + let rt = rt(); + let tok = NamespaceToken::local(); + let a = rt + .create_entity(&tok, "person", None, "Alice", None, None, vec![]) + .await + .unwrap(); + let b = rt + .create_entity(&tok, "person", None, "Bob", None, None, vec![]) + .await + .unwrap(); + let result = rt + .link(&tok, b.id, a.id, EdgeRelation::Supersedes, 1.0, None) + .await; + assert!( + matches!(result, Err(RuntimeError::InvalidInput(_))), + "person->person Supersedes must be rejected (not in ADR-002 allowlist), got {result:?}" + ); + } + + #[tokio::test] + async fn f010_supersedes_org_to_org_rejected() { + let rt = rt(); + let tok = NamespaceToken::local(); + let a = rt + .create_entity(&tok, "org", None, "OrgA", None, None, vec![]) + .await + .unwrap(); + let b = rt + .create_entity(&tok, "org", None, "OrgB", None, None, vec![]) + .await + .unwrap(); + let result = rt + .link(&tok, b.id, a.id, EdgeRelation::Supersedes, 1.0, None) + .await; + assert!( + matches!(result, Err(RuntimeError::InvalidInput(_))), + "org->org Supersedes must be rejected (not in ADR-002 allowlist), got {result:?}" + ); + } + + // Fix 1: Supersedes entity→entity — same kind (concept→concept) must be allowed. + #[tokio::test] + async fn f010_supersedes_same_kind_entity_allowed() { + let rt = rt(); + let tok = NamespaceToken::local(); + let a = rt + .create_entity(&tok, "concept", None, "OldV", None, None, vec![]) + .await + .unwrap(); + let b = rt + .create_entity(&tok, "concept", None, "NewV", None, None, vec![]) + .await + .unwrap(); + let result = rt + .link(&tok, b.id, a.id, EdgeRelation::Supersedes, 1.0, None) + .await; + assert!( + result.is_ok(), + "concept->concept Supersedes must be allowed by ADR-002 allowlist, got {result:?}" + ); + } + + // F161: ADR-009 target_backend invariant — all edges written through link() must have + // target_backend = None because validate_edge_relation_endpoints already ensured the + // target exists locally. + #[tokio::test] + async fn f161_link_always_writes_null_target_backend() { + let rt = rt(); + let tok = NamespaceToken::local(); + let a = rt + .create_entity(&tok, "concept", None, "A", None, None, vec![]) + .await + .unwrap(); + let b = rt + .create_entity(&tok, "concept", None, "B", None, None, vec![]) + .await + .unwrap(); + let edge = rt + .link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) + .await + .unwrap(); + assert!( + edge.target_backend.is_none(), + "ADR-009: target_backend must be None for locally-routed edges (F161); got {:?}", + edge.target_backend + ); + } + + // F161: link_many must also write null target_backend for all local edges. + #[tokio::test] + async fn f161_link_many_always_writes_null_target_backend() { + let rt = rt(); + let tok = NamespaceToken::local(); + let a = rt + .create_entity(&tok, "concept", None, "A", None, None, vec![]) + .await + .unwrap(); + let b = rt + .create_entity(&tok, "concept", None, "B", None, None, vec![]) + .await + .unwrap(); + let c = rt + .create_entity(&tok, "concept", None, "C", None, None, vec![]) + .await + .unwrap(); + let specs = vec![ + LinkSpec { + namespace: None, + source_id: a.id, + target_id: b.id, + relation: EdgeRelation::Extends, + weight: 1.0, + metadata: None, + }, + LinkSpec { + namespace: None, + source_id: a.id, + target_id: c.id, + relation: EdgeRelation::Enables, + weight: 1.0, + metadata: None, + }, + ]; + let edges = rt.link_many(&tok, specs).await.unwrap(); + for edge in &edges { + assert!( + edge.target_backend.is_none(), + "ADR-009: target_backend must be None for locally-routed edges in link_many (F161); got {:?}", + edge.target_backend + ); + } + } + + // F012: symmetric relation neighbors — competes_with queried from the non-canonical + // endpoint must still return results when direction=Out is requested. + #[tokio::test] + async fn f012_symmetric_neighbors_visible_from_both_endpoints() { + let rt = rt(); + let tok = NamespaceToken::local(); + let a = rt + .create_entity(&tok, "concept", None, "A", None, None, vec![]) + .await + .unwrap(); + let b = rt + .create_entity(&tok, "concept", None, "B", None, None, vec![]) + .await + .unwrap(); + // Link A→B competes_with; if A.id > B.id the edge is stored as B→A (canonical). + rt.link(&tok, a.id, b.id, EdgeRelation::CompetesWith, 1.0, None) + .await + .unwrap(); + // Both endpoints should see the edge regardless of direction=Out. + let from_a = rt + .neighbors( + &tok, + a.id, + Direction::Out, + None, + Some(vec![EdgeRelation::CompetesWith]), + ) + .await + .unwrap(); + let from_b = rt + .neighbors( + &tok, + b.id, + Direction::Out, + None, + Some(vec![EdgeRelation::CompetesWith]), + ) + .await + .unwrap(); + assert_eq!( + from_a.len(), + 1, + "node A must see competes_with neighbor from Direction::Out (F012); got {from_a:?}" + ); + assert_eq!( + from_b.len(), + 1, + "node B must see competes_with neighbor from Direction::Out (F012); got {from_b:?}" + ); + } + + // Fix 1: Supersedes entity→entity — cross-kind (concept→document) must be rejected. + #[tokio::test] + async fn f010_supersedes_cross_kind_entity_rejected() { + let rt = rt(); + let tok = NamespaceToken::local(); + let concept = rt + .create_entity(&tok, "concept", None, "MyConcept", None, None, vec![]) + .await + .unwrap(); + let doc = rt + .create_entity(&tok, "document", None, "MyDoc", None, None, vec![]) + .await + .unwrap(); + let result = rt + .link( + &tok, + concept.id, + doc.id, + EdgeRelation::Supersedes, + 1.0, + None, + ) + .await; + assert!( + matches!(result, Err(RuntimeError::InvalidInput(_))), + "concept->document Supersedes must be rejected by ADR-002 allowlist, got {result:?}" + ); + } + + #[tokio::test] + async fn delete_note_cross_namespace_returns_mismatch_error() { + let rt = rt(); + let ns_a = NamespaceToken::for_namespace(Namespace::parse("ns-a").unwrap()); + let ns_b = NamespaceToken::for_namespace(Namespace::parse("ns-b").unwrap()); + let note = rt + .create_note( + &ns_a, + "observation", + None, + "note in ns-a", + Some(0.8), + None, + vec![], + ) + .await + .unwrap(); + + // Attempt to delete from a different namespace must return NamespaceMismatch. + let result = rt.delete_note(&ns_b, note.id, true).await; + assert!( + matches!(result.unwrap_err(), crate::RuntimeError::NamespaceMismatch { id } if id == note.id), + "cross-namespace delete_note must return NamespaceMismatch with the note id" + ); + + // Note must still exist in ns-a after the failed cross-ns delete. + let note_store = rt.notes(&ns_a).unwrap(); + let still_there = note_store.get_note(note.id).await.unwrap(); + assert!( + still_there.is_some(), + "note must survive cross-ns delete attempt" + ); + } } diff --git a/crates/khive-runtime/src/pack.rs b/crates/khive-runtime/src/pack.rs index 5a223af9..c8c321aa 100644 --- a/crates/khive-runtime/src/pack.rs +++ b/crates/khive-runtime/src/pack.rs @@ -14,31 +14,71 @@ use std::collections::{HashMap, HashSet, VecDeque}; use std::sync::Arc; +use crate::runtime::NamespaceToken; use async_trait::async_trait; use khive_gate::{ActorRef, AllowAllGate, AuditEvent, GateDecision, GateRef, GateRequest}; -use khive_storage::{Event, EventStore, SubstrateKind}; -use khive_types::{EventOutcome, Namespace}; +use khive_storage::{Event, EventStore, EventView, SubstrateKind}; +use khive_types::{EventKind, EventOutcome, Namespace}; use serde_json::Value; -pub use khive_types::{EdgeEndpointRule, EndpointKind, VerbDef}; +pub use khive_types::{ + EdgeEndpointRule, EndpointKind, HandlerDef, NoteKindSpec, NoteLifecycleSpec, PackSchemaPlan, + VerbCategory, Visibility, +}; +// Backward-compat re-export. +#[allow(deprecated)] +pub use khive_types::VerbDef; -/// Hook called after every successful verb dispatch (Issue #158). +use crate::validation::ValidationRule; + +/// Pack-auxiliary schema plan (ADR-017 §Storage profile and pack-auxiliary schema). +/// +/// Declares `CREATE TABLE IF NOT EXISTS` statements for pack-owned tables that +/// are NOT part of the core substrate schema (entities, notes, edges, events). +/// Applied at boot via `StorageBackend::apply_schema` / `apply_pack_schema_plan`. /// -/// Packs that want to observe real-time dispatch outcomes (e.g. brain pack -/// updating its posteriors) implement this trait and register it via -/// [`VerbRegistryBuilder::with_dispatch_hook`]. The hook is opt-in: when no -/// hook is registered, dispatch incurs zero overhead. +/// Core substrate tables evolve through versioned migrations (ADR-015). Pack +/// schema is strictly for pack-auxiliary tables (e.g. GTD lifecycle audit, +/// memory index). v1 pack schemas are non-versioned. +#[derive(Debug, Default, Clone)] +pub struct SchemaPlan { + /// Owning pack name. + pub pack: &'static str, + /// DDL statements applied idempotently at boot. + /// Each entry must be a self-contained `CREATE TABLE IF NOT EXISTS` or + /// similar idempotent statement. + pub statements: &'static [&'static str], +} + +impl SchemaPlan { + /// Construct a `SchemaPlan` with no statements. + /// + /// Packs whose state lives entirely in the core substrate tables (entities, + /// notes, edges) use this as their `schema_plan()` return value. + pub const fn empty() -> Self { + Self { + pack: "", + statements: &[], + } + } + + /// Returns `true` when the plan contains no DDL statements. + pub fn is_empty(&self) -> bool { + self.statements.is_empty() + } +} + +/// Hook called after every successful verb dispatch (Issue #158). /// -/// The hook receives the synthesized `Event` that was built from the dispatch -/// outcome — same representation used by the EventStore audit path — so brain -/// pack's `EventFold` can process it without extra conversion. +/// Packs observe enriched event views so provenance-aware consumers can use +/// `view.observations` while legacy folds can still consume `view.event`. #[async_trait] pub trait DispatchHook: Send + Sync { - /// Called with the dispatch-outcome event after a successful pack dispatch. + /// Called with the dispatch-outcome event view after a successful pack dispatch. /// /// Errors are logged via `tracing::warn!` and never propagated to the - /// caller — the dispatch has already succeeded. - async fn on_dispatch(&self, event: &Event); + /// caller; the dispatch has already succeeded. + async fn on_dispatch(&self, view: &EventView); } use crate::error::{ @@ -65,8 +105,8 @@ pub trait PackRuntime: Send + Sync { /// Entity kinds this pack owns — must equal `::ENTITY_KINDS`. fn entity_kinds(&self) -> &'static [&'static str]; - /// Verbs this pack handles — must equal `::VERBS`. - fn verbs(&self) -> &'static [VerbDef]; + /// Handlers this pack registers — must equal `::HANDLERS`. + fn handlers(&self) -> &'static [HandlerDef]; /// Pack-extensible edge endpoint rules — must equal `::EDGE_RULES`. /// Defaults to empty so existing packs that don't extend the edge contract @@ -81,6 +121,16 @@ pub trait PackRuntime: Send + Sync { &[] } + /// NoteKindSpec declarations for note kinds this pack owns (ADR-004). + /// + /// Packs that introduce note kinds with explicit lifecycle semantics + /// declare the spec here. The runtime collects these for introspection + /// and future enforcement. Defaults to empty so existing packs compile + /// without changes. + fn note_kind_specs(&self) -> &'static [NoteKindSpec] { + &[] + } + /// Optional per-kind hook for shared CRUD specialization (ADR-030). /// /// When a kind is owned by this pack (declared in `note_kinds()` or @@ -92,15 +142,48 @@ pub trait PackRuntime: Send + Sync { None } + /// Pack-auxiliary schema (ADR-017 §Storage profile and pack-auxiliary schema). + /// + /// Returns DDL statements for pack-owned tables that are NOT part of the + /// core substrate schema. Statements are idempotent (`CREATE TABLE IF NOT + /// EXISTS`) so callers can apply them safely on every registration. Core + /// substrate tables evolve through versioned migrations (ADR-015); pack + /// schema is strictly pack-auxiliary. + /// + /// Defaults to an empty plan — packs that store everything in the core + /// substrate tables (entities, notes, edges, events) return this default. + /// + /// Plans are aggregated via [`VerbRegistry::all_schema_plans`] and applied + /// at startup via `KhiveMcpServer::with_packs` (c12). Packs that need their + /// schema present (e.g. GTD) also self-bootstrap lazily on first call for + /// robustness in test contexts that create fresh in-memory databases. + fn schema_plan(&self) -> SchemaPlan { + SchemaPlan::empty() + } + + /// Domain-specific validation rules contributed by this pack (ADR-034 §9). + /// + /// Rule IDs MUST follow the `/` namespace convention. + /// Built-in rules (no pack prefix) are reserved for the `khive-runtime` + /// validation infrastructure. + /// + /// Defaults to empty — packs with no domain-specific rules return `&[]`. + fn validation_rules(&self) -> &'static [ValidationRule] { + &[] + } + /// Dispatch a verb call. Returns serialized JSON response. /// /// The `registry` parameter gives the handler access to the merged /// vocabulary and kind hooks across all loaded packs (ADR-030). + /// The `token` is an authorized namespace token minted by the dispatch + /// boundary after gate authorization — handlers must use it directly. async fn dispatch( &self, verb: &str, params: Value, registry: &VerbRegistry, + token: &NamespaceToken, ) -> Result; } @@ -174,7 +257,7 @@ impl VerbRegistryBuilder { Self { packs: Vec::new(), gate: std::sync::Arc::new(AllowAllGate), - default_namespace: Namespace::default_ns().as_str().to_string(), + default_namespace: Namespace::local().as_str().to_string(), event_store: None, dispatch_hook: None, } @@ -187,7 +270,7 @@ impl VerbRegistryBuilder { self } - /// Register a boxed pack directly (ADR-063). + /// Register a boxed pack directly (ADR-027). /// /// Crate-private: only [`PackRegistry::register_packs`] should call this. /// External callers must use the typed [`Self::register`] which enforces the @@ -326,6 +409,9 @@ impl VerbRegistryBuilder { .map(|idx| slots[idx].take().expect("topological index must exist")) .collect(); + validate_unique_note_kinds(&ordered_packs)?; + validate_unique_verb_names(&ordered_packs)?; + Ok(VerbRegistry { packs: Arc::new(ordered_packs), gate: self.gate, @@ -336,6 +422,52 @@ impl VerbRegistryBuilder { } } +/// Validate that no two packs declare the same note kind (F073). +/// +/// Boot-time duplicate detection prevents pack configuration errors from +/// silently corrupting note kind routing. Returns an error naming the +/// duplicate kind and the two packs that claim it. +fn validate_unique_note_kinds(packs: &[Box]) -> Result<(), RuntimeError> { + let mut seen: HashMap<&str, &str> = HashMap::new(); + for pack in packs { + for &kind in pack.note_kinds() { + if let Some(first_pack) = seen.insert(kind, pack.name()) { + return Err(RuntimeError::InvalidInput(format!( + "duplicate note kind {kind:?}: claimed by both {first_pack:?} and {:?}", + pack.name() + ))); + } + } + } + Ok(()) +} + +/// Validate that no two packs declare the same `Visibility::Verb` handler name +/// (ADR-017 §Boot-time collision checks, F093). +/// +/// `Visibility::Subhandler` entries are pack-prefixed by convention and excluded +/// from cross-pack collision detection. Two packs declaring the same subhandler +/// name prefix (e.g. `recall.embed`) would be a pack-authoring error but does not +/// produce a cross-pack routing conflict since only the owning pack dispatches them. +fn validate_unique_verb_names(packs: &[Box]) -> Result<(), RuntimeError> { + let mut seen: HashMap<&str, &str> = HashMap::new(); + for pack in packs { + for handler in pack.handlers() { + if !matches!(handler.visibility, Visibility::Verb) { + continue; + } + if let Some(first_pack) = seen.insert(handler.name, pack.name()) { + return Err(RuntimeError::VerbCollision { + verb: handler.name.to_string(), + first_pack: first_pack.to_string(), + second_pack: pack.name().to_string(), + }); + } + } + } + Ok(()) +} + fn find_pack_dependency_cycle( packs: &[Box], name_to_idx: &HashMap<&str, usize>, @@ -456,12 +588,9 @@ impl VerbRegistry { .and_then(Value::as_str) .map(str::to_string) .unwrap_or_else(|| self.default_namespace.clone()); - let gate_req = GateRequest::new( - ActorRef::anonymous(), - Namespace::new(&ns_str), - verb, - params.clone(), - ); + let ns = Namespace::parse(&ns_str) + .map_err(|e| RuntimeError::InvalidInput(format!("invalid namespace: {e}")))?; + let gate_req = GateRequest::new(ActorRef::anonymous(), ns, verb, params.clone()); // Consult the gate (ADR-029, ADR-035). // @@ -494,11 +623,12 @@ impl VerbRegistry { let storage_event = Event::new( gate_req.namespace.as_str(), verb, + EventKind::Audit, SubstrateKind::Event, format!("{}:{}", gate_req.actor.kind, gate_req.actor.id), ) .with_outcome(outcome) - .with_data(audit_data); + .with_payload(audit_data); if let Err(store_err) = store.append_event(storage_event).await { tracing::warn!( verb, @@ -534,17 +664,34 @@ impl VerbRegistry { }); } + // Mint the authorized namespace token at the dispatch boundary (ADR-007). + // ns_str was already validated above when building the gate request. + let token = NamespaceToken::mint_authorized( + Namespace::parse(&ns_str) + .map_err(|e| RuntimeError::InvalidInput(format!("invalid namespace: {e}")))?, + ActorRef::anonymous(), + ); + for pack in self.packs.iter() { - if pack.verbs().iter().any(|v| v.name == verb) { - let result = pack.dispatch(verb, params, self).await; + if pack.handlers().iter().any(|v| v.name == verb) { + let result = pack.dispatch(verb, params, self, &token).await; // Post-dispatch hook: fires on success, opt-in (Issue #158). if let (Ok(_), Some(hook)) = (&result, &self.dispatch_hook) { - let dispatch_event = - Event::new(ns_str.as_str(), verb, SubstrateKind::Event, pack.name()) - .with_outcome(EventOutcome::Success); + let dispatch_event = Event::new( + ns_str.as_str(), + verb, + EventKind::Audit, + SubstrateKind::Event, + pack.name(), + ) + .with_outcome(EventOutcome::Success); + let dispatch_view = EventView { + event: dispatch_event, + observations: Vec::new(), + }; let hook = Arc::clone(hook); - hook.on_dispatch(&dispatch_event).await; + hook.on_dispatch(&dispatch_view).await; } return result; @@ -553,7 +700,7 @@ impl VerbRegistry { let available: Vec<&str> = self .packs .iter() - .flat_map(|p| p.verbs().iter().map(|v| v.name)) + .flat_map(|p| p.handlers().iter().map(|v| v.name)) .collect(); Err(RuntimeError::InvalidInput(format!( "unknown verb {verb:?}; available: {}", @@ -579,24 +726,43 @@ impl VerbRegistry { None } - /// All verb definitions across all registered packs. + /// All MCP-exposed handlers across all registered packs (`Visibility::Verb` only). /// - /// Returned with `'static` lifetime since pack verbs are `&'static [VerbDef]` - /// constants — callers can keep the slice references beyond the registry's - /// borrow. - pub fn all_verbs(&self) -> Vec<&'static VerbDef> { - self.packs.iter().flat_map(|p| p.verbs().iter()).collect() + /// Subhandlers (`Visibility::Subhandler`) are excluded — they are internal + /// pipeline steps not surfaced on the MCP wire (ADR-017 §Visibility filtering, + /// F118). Returned with `'static` lifetime since pack handlers are `&'static + /// [HandlerDef]` constants. + pub fn all_verbs(&self) -> Vec<&'static HandlerDef> { + self.packs + .iter() + .flat_map(|p| p.handlers().iter()) + .filter(|h| matches!(h.visibility, Visibility::Verb)) + .collect() } - /// All verb definitions paired with the name of the pack that owns them. + /// All MCP-exposed handlers paired with the name of the pack that owns them + /// (`Visibility::Verb` only). /// - /// Useful for building catalogs that attribute each verb to its source pack. - /// The pack name has the same lifetime as `&self`; the `VerbDef` reference - /// is `'static`. - pub fn all_verbs_with_names(&self) -> Vec<(&str, &'static VerbDef)> { + /// Subhandlers (`Visibility::Subhandler`) are excluded from the MCP catalog + /// (ADR-017 §Visibility filtering, F118-F123). Use `all_handlers_with_names` + /// when internal handlers must also be enumerated (e.g. runtime introspection). + pub fn all_verbs_with_names(&self) -> Vec<(&str, &'static HandlerDef)> { self.packs .iter() - .flat_map(|p| p.verbs().iter().map(move |v| (p.name(), v))) + .flat_map(|p| p.handlers().iter().map(move |v| (p.name(), v))) + .filter(|(_, h)| matches!(h.visibility, Visibility::Verb)) + .collect() + } + + /// All handler definitions across all registered packs, including subhandlers. + /// + /// Unlike `all_verbs`, this includes `Visibility::Subhandler` entries. Useful + /// for runtime introspection (e.g. `list_handlers`) and tooling that needs + /// the complete handler surface (ADR-017 §Introspection). + pub fn all_handlers_with_names(&self) -> Vec<(&str, &'static HandlerDef)> { + self.packs + .iter() + .flat_map(|p| p.handlers().iter().map(move |v| (p.name(), v))) .collect() } @@ -657,16 +823,16 @@ impl VerbRegistry { .map(|p| p.entity_kinds()) } - /// Verbs declared by a specific registered pack. + /// Handlers declared by a specific registered pack. /// - /// Returns `None` if no pack with `name` is registered. Each `VerbDef` - /// carries name + description — sufficient for introspection clients - /// like `kkernel pack handler` (ADR-076). - pub fn pack_verbs(&self, name: &str) -> Option<&'static [VerbDef]> { + /// Returns `None` if no pack with `name` is registered. Each `HandlerDef` + /// carries name + description + visibility — sufficient for introspection + /// clients like `kkernel pack handler` (ADR-076). + pub fn pack_verbs(&self, name: &str) -> Option<&'static [HandlerDef]> { self.packs .iter() .find(|p| p.name() == name) - .map(|p| p.verbs()) + .map(|p| p.handlers()) } /// All pack-declared edge endpoint rules across registered packs (ADR-031). @@ -680,12 +846,71 @@ impl VerbRegistry { .flat_map(|p| p.edge_rules().iter().copied()) .collect() } + + /// Collect all `NoteKindSpec` declarations from every loaded pack (ADR-004). + /// + /// Used by the runtime for lifecycle introspection and future enforcement. + pub fn all_note_kind_specs(&self) -> Vec<&'static NoteKindSpec> { + self.packs + .iter() + .flat_map(|p| p.note_kind_specs().iter()) + .collect() + } + + /// All pack-contributed validation rules across registered packs (ADR-034 §9). + /// + /// Returns references into the pack-owned `'static` slices — no allocation + /// beyond the outer `Vec`. Rule IDs are namespaced by pack; callers can + /// group by `rule.id.split_once('/')` to attribute rules to their packs. + pub fn all_validation_rules(&self) -> Vec<&'static ValidationRule> { + self.packs + .iter() + .flat_map(|p| p.validation_rules().iter()) + .collect() + } + + /// Pack-auxiliary schema plans for all registered packs (ADR-017). + /// + /// Returns one `SchemaPlan` per pack. Callers (typically the runtime + /// bootstrap) apply each plan to the pack's assigned backend. Empty plans + /// are included so the caller can iterate uniformly; callers that want to + /// skip empty plans should check `plan.is_empty()`. + pub fn all_schema_plans(&self) -> Vec { + self.packs.iter().map(|p| p.schema_plan()).collect() + } + + /// Apply all non-empty pack-auxiliary schema plans to the given backend + /// (ADR-017 §c12 startup application). + /// + /// This is the centralized startup hook that replaced the previous lazy + /// per-pack self-bootstrap pattern. Each pack's `SchemaPlan` carries + /// idempotent `CREATE TABLE IF NOT EXISTS` DDL; calling this more than once + /// is safe. Empty plans are skipped. + /// + /// Errors from individual plans are logged via `tracing::warn!` and not + /// propagated so that a single pack's schema failure does not prevent the + /// rest from loading. Callers that need hard-failure semantics should call + /// `all_schema_plans()` and apply each plan individually. + pub fn apply_schema_plans(&self, backend: &khive_db::StorageBackend) { + for plan in self.all_schema_plans() { + if plan.is_empty() { + continue; + } + if let Err(e) = backend.apply_pack_ddl_statements(plan.statements) { + tracing::warn!( + pack = plan.pack, + error = %e, + "failed to apply pack schema plan at startup (non-fatal)" + ); + } + } + } } -// ── ADR-063: inventory-based dynamic pack loading ───────────────────────────── +// ── ADR-027: inventory-based dynamic pack loading ───────────────────────────── /// Factory for creating pack instances registered via `inventory` at link time -/// (ADR-063). Each pack crate submits a `&'static dyn PackFactory` wrapped in a +/// (ADR-027). Each pack crate submits a `&'static dyn PackFactory` wrapped in a /// [`PackRegistration`]; the binary's linker collects them all into a single /// slice iterable at runtime. /// @@ -698,8 +923,9 @@ pub trait PackFactory: Send + Sync + 'static { /// Names of packs that must be loaded before this one (ADR-037). /// /// Defaults to empty so pack crates that have no dependencies compile - /// without changes. [`PackRegistry::register_packs`] uses this to compute - /// the transitive closure of required packs before registering anything. + /// without changes. [`PackRegistry::register_packs`] validates that every + /// name listed here is present in the caller's explicit pack list — absent + /// dependencies are a boot error, not silently auto-added (ADR-027). fn requires(&self) -> &'static [&'static str] { &[] } @@ -711,12 +937,12 @@ pub trait PackFactory: Send + Sync + 'static { /// Newtype wrapper collected by `inventory` so pack crates can submit /// `&'static dyn PackFactory` references without the type-ascription syntax /// that `inventory::submit!` does not support for bare trait-object references -/// (ADR-063). +/// (ADR-027). pub struct PackRegistration(pub &'static dyn PackFactory); inventory::collect!(PackRegistration); -/// Registry of pack factories discovered via `inventory` at link time (ADR-063). +/// Registry of pack factories discovered via `inventory` at link time (ADR-027). /// /// No instance is needed — all methods are associated functions that walk the /// globally-collected [`PackRegistration`] slice. @@ -733,15 +959,17 @@ impl PackRegistry { /// Register the named packs into `builder` using the supplied `runtime`. /// - /// Resolves transitive `requires()` dependencies declared on each - /// [`PackFactory`] before registering anything. A pack that declares - /// `requires = &["kg"]` will cause `"kg"` to be included even if the caller - /// only asked for `"gtd"`. The [`VerbRegistryBuilder::build`] topo-sort - /// then ensures correct load order. + /// Validates the explicit pack list against `PackFactory::requires()` — + /// if any requested pack declares a dependency that is absent from `names`, + /// registration fails with `Err(missing_name)` (ADR-027: missing dependency + /// is a boot error, not silently auto-added). Callers must include all + /// required packs explicitly. + /// + /// The [`VerbRegistryBuilder::build`] topo-sort enforces correct load order. /// - /// Returns `Ok(())` when all names (including their transitive deps) are - /// recognised; returns `Err(name)` for the first unrecognised name so - /// callers can surface a clear error. + /// Returns `Ok(())` when all names are recognised and all declared + /// dependencies are satisfied; returns `Err(name)` for the first + /// unrecognised or unsatisfied pack name. pub fn register_packs( names: &[String], runtime: KhiveRuntime, @@ -756,33 +984,27 @@ impl PackRegistry { all.iter().copied().find(|f| f.name() == name) }; - // BFS transitive closure: start with the explicitly requested names, - // then walk each factory's requires() to pull in dependencies. - let mut full_set: std::collections::HashSet<&str> = std::collections::HashSet::new(); - let mut queue: std::collections::VecDeque<&str> = std::collections::VecDeque::new(); - + // Validate that every requested name is a known factory. + let requested: std::collections::HashSet<&str> = names.iter().map(String::as_str).collect(); for name in names { - queue.push_back(name.as_str()); + factory_for(name.as_str()).ok_or_else(|| name.clone())?; } - while let Some(name) = queue.pop_front() { - if !full_set.insert(name) { - continue; // already visited - } - let factory = factory_for(name).ok_or_else(|| name.to_string())?; + // Validate that all requires() dependencies are explicitly present in + // the requested set. ADR-027: missing dep → boot error, not auto-add. + for name in names { + let factory = factory_for(name.as_str()).unwrap(); // validated above for &dep in factory.requires() { - if !full_set.contains(dep) { - queue.push_back(dep); + if !requested.contains(dep) { + return Err(dep.to_string()); } } } - // Register every pack in the resolved set; VerbRegistryBuilder::build() + // Register every requested pack; VerbRegistryBuilder::build() // performs the topo-sort, so insertion order here does not matter. - for name in &full_set { - // factory_for cannot fail here: every name in full_set passed the - // lookup above without returning Err. - let factory = factory_for(name).unwrap(); + for name in names { + let factory = factory_for(name.as_str()).unwrap(); // validated above builder.register_boxed(factory.create(runtime.clone())); } @@ -801,14 +1023,18 @@ mod tests { const NAME: &'static str = "alpha"; const NOTE_KINDS: &'static [&'static str] = &["memo", "log"]; const ENTITY_KINDS: &'static [&'static str] = &["widget"]; - const VERBS: &'static [VerbDef] = &[ - VerbDef { + const HANDLERS: &'static [HandlerDef] = &[ + HandlerDef { name: "create", description: "create a widget", + visibility: Visibility::Verb, + category: VerbCategory::Commissive, }, - VerbDef { + HandlerDef { name: "list", description: "list widgets", + visibility: Visibility::Verb, + category: VerbCategory::Assertive, }, ]; } @@ -824,14 +1050,15 @@ mod tests { fn entity_kinds(&self) -> &'static [&'static str] { AlphaPack::ENTITY_KINDS } - fn verbs(&self) -> &'static [VerbDef] { - AlphaPack::VERBS + fn handlers(&self) -> &'static [HandlerDef] { + AlphaPack::HANDLERS } async fn dispatch( &self, verb: &str, _params: Value, _registry: &VerbRegistry, + _token: &NamespaceToken, ) -> Result { Ok(serde_json::json!({ "pack": "alpha", "verb": verb })) } @@ -841,20 +1068,80 @@ mod tests { impl Pack for BetaPack { const NAME: &'static str = "beta"; - const NOTE_KINDS: &'static [&'static str] = &["log", "alert"]; + const NOTE_KINDS: &'static [&'static str] = &["alert"]; const ENTITY_KINDS: &'static [&'static str] = &["widget", "gadget"]; - const VERBS: &'static [VerbDef] = &[ - VerbDef { + const HANDLERS: &'static [HandlerDef] = &[ + HandlerDef { name: "notify", description: "send alert", + visibility: Visibility::Verb, + category: VerbCategory::Commissive, }, - VerbDef { + // "create" is Subhandler so it does NOT collide with AlphaPack's + // Verb-visibility "create" — subhandlers are pack-internal and + // excluded from cross-pack collision detection (ADR-017). + HandlerDef { name: "create", - description: "create a gadget", + description: "beta internal create (subhandler)", + visibility: Visibility::Subhandler, + category: VerbCategory::Commissive, }, ]; } + /// Build a registry with AlphaPack + BetaPack. + /// + /// BetaPack's `create` is Subhandler so there is no Verb-visibility + /// collision with AlphaPack's `create` Verb. Tests that need a collision + /// use `build_colliding_registry()` instead. + fn build_registry() -> VerbRegistry { + let mut builder = VerbRegistryBuilder::new(); + builder.register(AlphaPack); + builder.register(BetaPack); + builder.build().expect("registry builds without collision") + } + + /// Build a registry with two packs that declare the same Verb-visibility + /// handler — used to test that `VerbCollision` is raised at build time. + struct CollidingPack; + + impl Pack for CollidingPack { + const NAME: &'static str = "colliding"; + const NOTE_KINDS: &'static [&'static str] = &[]; + const ENTITY_KINDS: &'static [&'static str] = &[]; + const HANDLERS: &'static [HandlerDef] = &[HandlerDef { + name: "create", + description: "duplicate Verb-visibility create", + visibility: Visibility::Verb, + category: VerbCategory::Commissive, + }]; + } + + #[async_trait] + impl PackRuntime for CollidingPack { + fn name(&self) -> &str { + Self::NAME + } + fn note_kinds(&self) -> &'static [&'static str] { + Self::NOTE_KINDS + } + fn entity_kinds(&self) -> &'static [&'static str] { + Self::ENTITY_KINDS + } + fn handlers(&self) -> &'static [HandlerDef] { + Self::HANDLERS + } + async fn dispatch( + &self, + verb: &str, + _params: Value, + _registry: &VerbRegistry, + _token: &NamespaceToken, + ) -> Result { + Ok(serde_json::json!({ "pack": "colliding", "verb": verb })) + } + } + #[async_trait] impl PackRuntime for BetaPack { fn name(&self) -> &str { @@ -866,26 +1153,20 @@ mod tests { fn entity_kinds(&self) -> &'static [&'static str] { BetaPack::ENTITY_KINDS } - fn verbs(&self) -> &'static [VerbDef] { - BetaPack::VERBS + fn handlers(&self) -> &'static [HandlerDef] { + BetaPack::HANDLERS } async fn dispatch( &self, verb: &str, _params: Value, _registry: &VerbRegistry, + _token: &NamespaceToken, ) -> Result { Ok(serde_json::json!({ "pack": "beta", "verb": verb })) } } - fn build_registry() -> VerbRegistry { - let mut builder = VerbRegistryBuilder::new(); - builder.register(AlphaPack); - builder.register(BetaPack); - builder.build().expect("registry builds") - } - #[tokio::test] async fn dispatch_routes_to_correct_pack() { let reg = build_registry(); @@ -897,12 +1178,80 @@ mod tests { assert_eq!(res["pack"], "beta"); } - #[tokio::test] - async fn dispatch_first_registered_wins_on_collision() { - let reg = build_registry(); + /// ADR-017 §Boot-time collision checks (F093/F094): two packs declaring the + /// same `Visibility::Verb` handler must be rejected at build time — the old + /// "first registered wins" behaviour is replaced by a boot error. + #[test] + fn verb_collision_is_boot_time_error() { + let mut builder = VerbRegistryBuilder::new(); + builder.register(AlphaPack); + builder.register(CollidingPack); + let err = builder + .build() + .err() + .expect("duplicate Verb-visibility handler must be rejected at build time"); + assert!( + matches!(err, RuntimeError::VerbCollision { ref verb, .. } if verb == "create"), + "expected VerbCollision for 'create', got {err:?}" + ); + let msg = err.to_string(); + assert!( + msg.contains("create"), + "error must name the colliding verb: {msg}" + ); + assert!( + msg.contains("alpha") || msg.contains("colliding"), + "error must name one of the conflicting packs: {msg}" + ); + } - let res = reg.dispatch("create", Value::Null).await.unwrap(); - assert_eq!(res["pack"], "alpha", "first registered pack wins"); + /// Subhandler-visibility handlers with the same name across packs are NOT + /// a collision — they are pack-internal and excluded from cross-pack + /// collision detection (ADR-017 §Boot-time collision checks). + #[test] + fn subhandler_same_name_across_packs_is_not_a_collision() { + struct SubhandlerPack; + impl Pack for SubhandlerPack { + const NAME: &'static str = "subhandler_pack"; + const NOTE_KINDS: &'static [&'static str] = &[]; + const ENTITY_KINDS: &'static [&'static str] = &[]; + const HANDLERS: &'static [HandlerDef] = &[HandlerDef { + name: "create", + description: "internal create", + visibility: Visibility::Subhandler, + category: VerbCategory::Commissive, + }]; + } + #[async_trait] + impl PackRuntime for SubhandlerPack { + fn name(&self) -> &str { + Self::NAME + } + fn note_kinds(&self) -> &'static [&'static str] { + Self::NOTE_KINDS + } + fn entity_kinds(&self) -> &'static [&'static str] { + Self::ENTITY_KINDS + } + fn handlers(&self) -> &'static [HandlerDef] { + Self::HANDLERS + } + async fn dispatch( + &self, + verb: &str, + _: Value, + _: &VerbRegistry, + _: &NamespaceToken, + ) -> Result { + Ok(serde_json::json!({"pack": "subhandler_pack", "verb": verb})) + } + } + let mut builder = VerbRegistryBuilder::new(); + builder.register(AlphaPack); // AlphaPack has Verb "create" + builder.register(SubhandlerPack); // SubhandlerPack has Subhandler "create" — no collision + builder + .build() + .expect("subhandler same name must NOT be a collision"); } #[tokio::test] @@ -915,21 +1264,42 @@ mod tests { assert!(msg.contains("create")); } + /// `all_verbs` returns only `Visibility::Verb` entries (ADR-017 F118). + /// + /// BetaPack's `create` is `Visibility::Subhandler` — it must NOT appear + /// in `all_verbs()` even though it has the same name as a Verb in AlphaPack. #[test] - fn all_verbs_aggregates_across_packs() { + fn all_verbs_aggregates_across_packs_excludes_subhandlers() { let reg = build_registry(); let verbs: Vec<&str> = reg.all_verbs().iter().map(|v| v.name).collect(); - assert_eq!(verbs, vec!["create", "list", "notify", "create"]); + // BetaPack's "create" (Subhandler) is absent; only Verb-visibility entries appear. + assert_eq!(verbs, vec!["create", "list", "notify"]); } #[test] - fn all_verbs_with_names_pairs_pack_name() { + fn all_verbs_with_names_pairs_pack_name_excludes_subhandlers() { let reg = build_registry(); let pairs: Vec<(&str, &str)> = reg .all_verbs_with_names() .iter() .map(|(pack, v)| (*pack, v.name)) .collect(); + // BetaPack's "create" is Subhandler and must NOT appear here. + assert_eq!( + pairs, + vec![("alpha", "create"), ("alpha", "list"), ("beta", "notify"),] + ); + } + + #[test] + fn all_handlers_with_names_includes_subhandlers() { + let reg = build_registry(); + let pairs: Vec<(&str, &str)> = reg + .all_handlers_with_names() + .iter() + .map(|(pack, v)| (*pack, v.name)) + .collect(); + // BetaPack's Subhandler "create" IS present in the full handler list. assert_eq!( pairs, vec![ @@ -942,12 +1312,67 @@ mod tests { } #[test] - fn note_kinds_are_deduplicated() { + fn note_kinds_are_ordered() { let reg = build_registry(); let kinds = reg.all_note_kinds(); assert_eq!(kinds, vec!["memo", "log", "alert"]); } + #[test] + fn note_kind_duplicate_rejected_at_build_time() { + struct DupPack; + + impl khive_types::Pack for DupPack { + const NAME: &'static str = "dup"; + // "memo" is already declared by AlphaPack — must be rejected at build. + const NOTE_KINDS: &'static [&'static str] = &["memo"]; + const ENTITY_KINDS: &'static [&'static str] = &[]; + const HANDLERS: &'static [HandlerDef] = &[]; + } + + #[async_trait] + impl PackRuntime for DupPack { + fn name(&self) -> &str { + Self::NAME + } + fn note_kinds(&self) -> &'static [&'static str] { + Self::NOTE_KINDS + } + fn entity_kinds(&self) -> &'static [&'static str] { + Self::ENTITY_KINDS + } + fn handlers(&self) -> &'static [HandlerDef] { + Self::HANDLERS + } + async fn dispatch( + &self, + _verb: &str, + _params: Value, + _registry: &VerbRegistry, + _token: &NamespaceToken, + ) -> Result { + Ok(Value::Null) + } + } + + let mut builder = VerbRegistryBuilder::new(); + builder.register(AlphaPack); + builder.register(DupPack); + let err = builder + .build() + .err() + .expect("duplicate note kind must be rejected"); + let msg = err.to_string(); + assert!( + msg.contains("memo"), + "error must name the duplicate kind: {msg}" + ); + assert!( + msg.contains("alpha") || msg.contains("dup"), + "error must name one of the conflicting packs: {msg}" + ); + } + #[test] fn entity_kinds_are_deduplicated() { let reg = build_registry(); @@ -1080,16 +1505,18 @@ mod tests { .unwrap(); // Missing namespace → registry default. reg.dispatch("list", Value::Null).await.unwrap(); - // Explicit empty namespace string is preserved (it is what - // `KhiveRuntime::ns` would also see). Gate and runtime MUST agree on - // the namespace they observe; coercing here while the runtime - // continues to honor `""` would create an audit blind spot. - reg.dispatch("list", serde_json::json!({"namespace": ""})) + // Empty string is rejected: Namespace::parse("") fails → InvalidInput error. + let err = reg + .dispatch("list", serde_json::json!({"namespace": ""})) .await - .unwrap(); + .unwrap_err(); + assert!( + matches!(err, RuntimeError::InvalidInput(_)), + "empty namespace must return InvalidInput, got {err:?}" + ); let seen = gate.seen.lock().unwrap().clone(); - assert_eq!(seen, vec!["tenant-y", "tenant-x", ""]); + assert_eq!(seen, vec!["tenant-y", "tenant-x"]); } #[tokio::test] @@ -1431,6 +1858,7 @@ mod tests { // ---- Hard enforcement + EventStore persistence (ADR-035) ---- + use crate::runtime::NamespaceToken; use async_trait::async_trait; use khive_storage::{ BatchWriteSummary, Event, EventFilter, EventStore, Page, PageRequest, SubstrateKind, @@ -1525,9 +1953,11 @@ mod tests { const NAME: &'static str = "tracked"; const NOTE_KINDS: &'static [&'static str] = &[]; const ENTITY_KINDS: &'static [&'static str] = &[]; - const VERBS: &'static [VerbDef] = &[VerbDef { + const HANDLERS: &'static [HandlerDef] = &[HandlerDef { name: "guarded", description: "a guarded verb", + visibility: Visibility::Verb, + category: VerbCategory::Assertive, }]; } @@ -1542,14 +1972,15 @@ mod tests { fn entity_kinds(&self) -> &'static [&'static str] { Self::ENTITY_KINDS } - fn verbs(&self) -> &'static [VerbDef] { - Self::VERBS + fn handlers(&self) -> &'static [HandlerDef] { + Self::HANDLERS } async fn dispatch( &self, _verb: &str, _params: Value, _registry: &VerbRegistry, + _token: &NamespaceToken, ) -> Result { self.invoked.fetch_add(1, Ordering::SeqCst); Ok(serde_json::json!({"invoked": true})) @@ -1802,14 +2233,11 @@ mod tests { let ev = &page.items[0]; assert_eq!(ev.outcome, EventOutcome::Denied); - // The data field must hold the full AuditEvent envelope (ADR-033 contract). - let data = ev - .data - .as_ref() - .expect("Event.data must be Some — full AuditEvent envelope must be persisted"); + // The payload field must hold the full AuditEvent envelope (ADR-033 contract). + let data = &ev.payload; let audit: khive_gate::AuditEvent = serde_json::from_value(data.clone()) - .expect("Event.data must deserialize to AuditEvent"); + .expect("Event.payload must deserialize to AuditEvent"); assert_eq!( audit.deny_reason.as_deref(), @@ -1870,13 +2298,10 @@ mod tests { let ev = &page.items[0]; assert_eq!(ev.outcome, EventOutcome::Success); - let data = ev - .data - .as_ref() - .expect("Event.data must be Some — AuditEvent envelope must be persisted on allow"); + let data = &ev.payload; let audit: khive_gate::AuditEvent = serde_json::from_value(data.clone()) - .expect("Event.data must deserialize to AuditEvent"); + .expect("Event.payload must deserialize to AuditEvent"); assert_eq!(audit.gate_impl, "ObligationGate"); assert_eq!( @@ -1914,8 +2339,9 @@ mod tests { // events_for_namespace ensures the events schema and returns a SqlEventStore // scoped to "test-ns". The pool is shared so reads and writes see the same data. let rt = KhiveRuntime::memory().expect("in-memory runtime"); + let test_tok = NamespaceToken::for_namespace(Namespace::parse("test-ns").unwrap()); let sql_store = rt - .events(Some("test-ns")) + .events(&test_tok) .expect("events_for_namespace must succeed"); let mut builder = VerbRegistryBuilder::new(); @@ -1954,16 +2380,13 @@ mod tests { let ev = &page.items[0]; assert_eq!(ev.outcome, EventOutcome::Denied); - // Event.data must hold the full AuditEvent serialized as JSON text and + // Event.payload must hold the full AuditEvent serialized as JSON text and // parsed back. If the SQL path was lossy, this deserialization would fail // or the field assertions below would fail. - let data = ev - .data - .as_ref() - .expect("Event.data must be Some — SqlEventStore must persist AuditEvent envelope"); + let data = &ev.payload; let audit: khive_gate::AuditEvent = serde_json::from_value(data.clone()) - .expect("Event.data must deserialize to AuditEvent after SQL round-trip"); + .expect("Event.payload must deserialize to AuditEvent after SQL round-trip"); assert_eq!( audit.deny_reason.as_deref(), @@ -2016,8 +2439,9 @@ mod tests { } let rt = KhiveRuntime::memory().expect("in-memory runtime"); + let test_tok = NamespaceToken::for_namespace(Namespace::parse("test-ns").unwrap()); let sql_store = rt - .events(Some("test-ns")) + .events(&test_tok) .expect("events_for_namespace must succeed"); let mut builder = VerbRegistryBuilder::new(); @@ -2051,10 +2475,7 @@ mod tests { let ev = &page.items[0]; assert_eq!(ev.outcome, EventOutcome::Success); - let data = ev - .data - .as_ref() - .expect("Event.data must be Some — SqlEventStore must persist AuditEvent envelope"); + let data = &ev.payload; // Layer 1: raw JSON check — obligations must be a non-empty array in // the persisted TEXT. If the SQL path dropped the field, the default @@ -2148,14 +2569,11 @@ mod tests { "ev.namespace must match the dispatch namespace" ); - // ev.data must hold the full AuditEvent envelope (ADR-033 / ADR-035 contract). - let data = ev - .data - .as_ref() - .expect("ev.data must be Some — full AuditEvent envelope required by ADR-035"); + // ev.payload must hold the full AuditEvent envelope (ADR-033 / ADR-035 contract). + let data = &ev.payload; - let audit: khive_gate::AuditEvent = - serde_json::from_value(data.clone()).expect("ev.data must deserialize to AuditEvent"); + let audit: khive_gate::AuditEvent = serde_json::from_value(data.clone()) + .expect("ev.payload must deserialize to AuditEvent"); assert_eq!( audit.decision, @@ -2204,14 +2622,14 @@ mod dep_tests { const NAME: &'static str = "kg_dep"; const NOTE_KINDS: &'static [&'static str] = &["observation"]; const ENTITY_KINDS: &'static [&'static str] = &["concept"]; - const VERBS: &'static [VerbDef] = &[]; + const HANDLERS: &'static [HandlerDef] = &[]; } impl Pack for MemoryDepPack { const NAME: &'static str = "memory_dep"; const NOTE_KINDS: &'static [&'static str] = &["memory"]; const ENTITY_KINDS: &'static [&'static str] = &[]; - const VERBS: &'static [VerbDef] = &[]; + const HANDLERS: &'static [HandlerDef] = &[]; const REQUIRES: &'static [&'static str] = &["kg_dep"]; } @@ -2219,7 +2637,7 @@ mod dep_tests { const NAME: &'static str = "pack_a"; const NOTE_KINDS: &'static [&'static str] = &[]; const ENTITY_KINDS: &'static [&'static str] = &[]; - const VERBS: &'static [VerbDef] = &[]; + const HANDLERS: &'static [HandlerDef] = &[]; const REQUIRES: &'static [&'static str] = &["pack_b"]; } @@ -2227,7 +2645,7 @@ mod dep_tests { const NAME: &'static str = "pack_b"; const NOTE_KINDS: &'static [&'static str] = &[]; const ENTITY_KINDS: &'static [&'static str] = &[]; - const VERBS: &'static [VerbDef] = &[]; + const HANDLERS: &'static [HandlerDef] = &[]; const REQUIRES: &'static [&'static str] = &["pack_a"]; } @@ -2242,14 +2660,15 @@ mod dep_tests { fn entity_kinds(&self) -> &'static [&'static str] { Self::ENTITY_KINDS } - fn verbs(&self) -> &'static [VerbDef] { - Self::VERBS + fn handlers(&self) -> &'static [HandlerDef] { + Self::HANDLERS } async fn dispatch( &self, verb: &str, _: Value, _: &VerbRegistry, + _: &NamespaceToken, ) -> Result { Err(RuntimeError::InvalidInput(format!( "KgDepPack has no verbs: {verb}" @@ -2268,8 +2687,8 @@ mod dep_tests { fn entity_kinds(&self) -> &'static [&'static str] { Self::ENTITY_KINDS } - fn verbs(&self) -> &'static [VerbDef] { - Self::VERBS + fn handlers(&self) -> &'static [HandlerDef] { + Self::HANDLERS } fn requires(&self) -> &'static [&'static str] { Self::REQUIRES @@ -2279,6 +2698,7 @@ mod dep_tests { verb: &str, _: Value, _: &VerbRegistry, + _: &NamespaceToken, ) -> Result { Err(RuntimeError::InvalidInput(format!( "MemoryDepPack has no verbs: {verb}" @@ -2297,8 +2717,8 @@ mod dep_tests { fn entity_kinds(&self) -> &'static [&'static str] { Self::ENTITY_KINDS } - fn verbs(&self) -> &'static [VerbDef] { - Self::VERBS + fn handlers(&self) -> &'static [HandlerDef] { + Self::HANDLERS } fn requires(&self) -> &'static [&'static str] { Self::REQUIRES @@ -2308,6 +2728,7 @@ mod dep_tests { verb: &str, _: Value, _: &VerbRegistry, + _: &NamespaceToken, ) -> Result { Err(RuntimeError::InvalidInput(format!( "ADepPack has no verbs: {verb}" @@ -2326,8 +2747,8 @@ mod dep_tests { fn entity_kinds(&self) -> &'static [&'static str] { Self::ENTITY_KINDS } - fn verbs(&self) -> &'static [VerbDef] { - Self::VERBS + fn handlers(&self) -> &'static [HandlerDef] { + Self::HANDLERS } fn requires(&self) -> &'static [&'static str] { Self::REQUIRES @@ -2337,6 +2758,7 @@ mod dep_tests { verb: &str, _: Value, _: &VerbRegistry, + _: &NamespaceToken, ) -> Result { Err(RuntimeError::InvalidInput(format!( "BDepPack has no verbs: {verb}" @@ -2412,14 +2834,14 @@ mod dep_tests { const NAME: &'static str = "no_deps_a"; const NOTE_KINDS: &'static [&'static str] = &[]; const ENTITY_KINDS: &'static [&'static str] = &[]; - const VERBS: &'static [VerbDef] = &[]; + const HANDLERS: &'static [HandlerDef] = &[]; } impl Pack for NoDepsB { const NAME: &'static str = "no_deps_b"; const NOTE_KINDS: &'static [&'static str] = &[]; const ENTITY_KINDS: &'static [&'static str] = &[]; - const VERBS: &'static [VerbDef] = &[]; + const HANDLERS: &'static [HandlerDef] = &[]; } #[async_trait] @@ -2433,14 +2855,15 @@ mod dep_tests { fn entity_kinds(&self) -> &'static [&'static str] { Self::ENTITY_KINDS } - fn verbs(&self) -> &'static [VerbDef] { - Self::VERBS + fn handlers(&self) -> &'static [HandlerDef] { + Self::HANDLERS } async fn dispatch( &self, verb: &str, _: Value, _: &VerbRegistry, + _: &NamespaceToken, ) -> Result { Err(RuntimeError::InvalidInput(format!("NoDepsA: {verb}"))) } @@ -2457,14 +2880,15 @@ mod dep_tests { fn entity_kinds(&self) -> &'static [&'static str] { Self::ENTITY_KINDS } - fn verbs(&self) -> &'static [VerbDef] { - Self::VERBS + fn handlers(&self) -> &'static [HandlerDef] { + Self::HANDLERS } async fn dispatch( &self, verb: &str, _: Value, _: &VerbRegistry, + _: &NamespaceToken, ) -> Result { Err(RuntimeError::InvalidInput(format!("NoDepsB: {verb}"))) } @@ -2495,9 +2919,11 @@ mod hook_tests { const NAME: &'static str = "simple"; const NOTE_KINDS: &'static [&'static str] = &[]; const ENTITY_KINDS: &'static [&'static str] = &[]; - const VERBS: &'static [VerbDef] = &[VerbDef { + const HANDLERS: &'static [HandlerDef] = &[HandlerDef { name: "ping", description: "ping", + visibility: Visibility::Verb, + category: VerbCategory::Assertive, }]; } @@ -2512,14 +2938,15 @@ mod hook_tests { fn entity_kinds(&self) -> &'static [&'static str] { SimplePack::ENTITY_KINDS } - fn verbs(&self) -> &'static [VerbDef] { - SimplePack::VERBS + fn handlers(&self) -> &'static [HandlerDef] { + SimplePack::HANDLERS } async fn dispatch( &self, verb: &str, _params: Value, _registry: &VerbRegistry, + _token: &NamespaceToken, ) -> Result { Ok(serde_json::json!({ "verb": verb })) } @@ -2534,9 +2961,9 @@ mod hook_tests { #[async_trait] impl DispatchHook for CountingHook { - async fn on_dispatch(&self, event: &Event) { + async fn on_dispatch(&self, view: &EventView) { self.calls.fetch_add(1, Ordering::SeqCst); - *self.last_verb.lock().unwrap() = event.verb.clone(); + *self.last_verb.lock().unwrap() = view.event.verb.clone(); } } @@ -2638,8 +3065,8 @@ mod hook_tests { #[async_trait] impl DispatchHook for NsCapturingHook { - async fn on_dispatch(&self, event: &Event) { - *self.ns.lock().unwrap() = event.namespace.clone(); + async fn on_dispatch(&self, view: &EventView) { + *self.ns.lock().unwrap() = view.event.namespace.clone(); } } diff --git a/crates/khive-runtime/src/portability.rs b/crates/khive-runtime/src/portability.rs index b3707ff6..11f20c80 100644 --- a/crates/khive-runtime/src/portability.rs +++ b/crates/khive-runtime/src/portability.rs @@ -24,7 +24,7 @@ use khive_storage::types::{EdgeFilter, LinkId, PageRequest}; use khive_storage::{EdgeRelation, EntityFilter}; use crate::error::{RuntimeError, RuntimeResult}; -use crate::runtime::KhiveRuntime; +use crate::runtime::{KhiveRuntime, NamespaceToken}; // ── Archive types ───────────────────────────────────────────────────────────── @@ -48,6 +48,9 @@ pub struct ExportedEntity { pub id: Uuid, /// Pack-owned kind string (e.g. `"concept"`, `"person"`). pub kind: String, + /// Pack-governed subtype token (e.g. `"paper"`, `"snapshot"`). + #[serde(skip_serializing_if = "Option::is_none")] + pub entity_type: Option, pub name: String, #[serde(skip_serializing_if = "Option::is_none")] pub description: Option, @@ -96,12 +99,12 @@ impl KhiveRuntime { /// Edge collection: all entity IDs in the namespace are gathered first; /// `query_edges` is then called with those IDs as `source_ids`. This /// captures every edge whose source entity belongs to the namespace. - pub async fn export_kg(&self, namespace: Option<&str>) -> RuntimeResult { - let ns = self.ns(namespace).to_string(); + pub async fn export_kg(&self, token: &NamespaceToken) -> RuntimeResult { + let ns = token.namespace().as_str().to_owned(); // 1. Collect all entities in the namespace. let entity_page = self - .entities(Some(&ns))? + .entities(token)? .query_entities( &ns, EntityFilter::default(), @@ -123,6 +126,7 @@ impl KhiveRuntime { ExportedEntity { id: e.id, kind: e.kind.to_string(), + entity_type: e.entity_type, name: e.name, description: e.description, properties: e.properties, @@ -143,7 +147,7 @@ impl KhiveRuntime { ..Default::default() }; let edge_page = self - .graph(Some(&ns))? + .graph(token)? .query_edges( filter, Vec::new(), @@ -180,8 +184,8 @@ impl KhiveRuntime { } /// Export to a JSON string (convenience wrapper around `export_kg`). - pub async fn export_kg_json(&self, namespace: Option<&str>) -> RuntimeResult { - let archive = self.export_kg(namespace).await?; + pub async fn export_kg_json(&self, token: &NamespaceToken) -> RuntimeResult { + let archive = self.export_kg(token).await?; serde_json::to_string(&archive).map_err(|e| RuntimeError::InvalidInput(e.to_string())) } @@ -196,7 +200,7 @@ impl KhiveRuntime { pub async fn import_kg( &self, archive: &KgArchive, - target_namespace: Option<&str>, + token: &NamespaceToken, ) -> RuntimeResult { // Format validation. if archive.format != "khive-kg" { @@ -212,10 +216,10 @@ impl KhiveRuntime { ))); } - let ns = target_namespace.unwrap_or(&archive.namespace).to_string(); + let ns = token.namespace().as_str().to_owned(); // Import entities. - let store = self.entities(Some(&ns))?; + let store = self.entities(token)?; let mut entities_imported = 0usize; for ee in &archive.entities { let created_micros = ee.created_at.timestamp_micros(); @@ -224,6 +228,7 @@ impl KhiveRuntime { id: ee.id, namespace: ns.clone(), kind: ee.kind.clone(), + entity_type: ee.entity_type.clone(), name: ee.name.clone(), description: ee.description.clone(), properties: ee.properties.clone(), @@ -231,11 +236,13 @@ impl KhiveRuntime { created_at: created_micros, updated_at: updated_micros, deleted_at: None, + merged_into: None, + merge_event_id: None, }; store.upsert_entity(entity.clone()).await?; // Index into FTS5 (and vector store if a model is configured) so that // imported entities are visible to hybrid_search immediately. - self.reindex_entity(Some(&ns), &entity).await?; + self.reindex_entity(token, &entity).await?; entities_imported += 1; } @@ -246,11 +253,15 @@ impl KhiveRuntime { // such edges would leave dangling references in the graph store. We // therefore check each endpoint with `get_entity` (namespace-scoped, // fail-closed) and skip any edge whose source or target is absent. - let graph = self.graph(Some(&ns))?; + let graph = self.graph(token)?; let mut edges_imported = 0usize; let mut edges_skipped = 0usize; for ee in &archive.edges { - let source_ok = self.get_entity(Some(&ns), ee.source).await?.is_some(); + let source_ok = match self.get_entity(token, ee.source).await { + Ok(_) => true, + Err(RuntimeError::NotFound(_) | RuntimeError::NamespaceMismatch { .. }) => false, + Err(e) => return Err(e), + }; if !source_ok { tracing::warn!( source = %ee.source, @@ -261,7 +272,11 @@ impl KhiveRuntime { edges_skipped += 1; continue; } - let target_ok = self.get_entity(Some(&ns), ee.target).await?.is_some(); + let target_ok = match self.get_entity(token, ee.target).await { + Ok(_) => true, + Err(RuntimeError::NotFound(_) | RuntimeError::NamespaceMismatch { .. }) => false, + Err(e) => return Err(e), + }; if !target_ok { tracing::warn!( source = %ee.source, @@ -272,14 +287,19 @@ impl KhiveRuntime { edges_skipped += 1; continue; } + let now = Utc::now(); let edge = khive_storage::types::Edge { id: LinkId::from(ee.edge_id), + namespace: ns.clone(), source_id: ee.source, target_id: ee.target, relation: ee.relation, weight: ee.weight, - created_at: Utc::now(), + created_at: now, + updated_at: now, + deleted_at: None, metadata: None, + target_backend: None, }; graph.upsert_edge(edge).await?; edges_imported += 1; @@ -296,11 +316,11 @@ impl KhiveRuntime { pub async fn import_kg_json( &self, json: &str, - target_namespace: Option<&str>, + token: &NamespaceToken, ) -> RuntimeResult { let archive: KgArchive = serde_json::from_str(json).map_err(|e| RuntimeError::InvalidInput(e.to_string()))?; - self.import_kg(&archive, target_namespace).await + self.import_kg(&archive, token).await } } @@ -309,7 +329,8 @@ impl KhiveRuntime { #[cfg(test)] mod tests { use super::*; - use crate::runtime::KhiveRuntime; + use crate::runtime::{KhiveRuntime, NamespaceToken}; + use crate::Namespace; use khive_storage::EdgeRelation; async fn make_rt() -> KhiveRuntime { @@ -320,10 +341,12 @@ mod tests { #[tokio::test] async fn roundtrip_entities_and_edges() { let src = make_rt().await; + let tok = NamespaceToken::local(); let e1 = src .create_entity( - None, + &tok, "concept", + None, "FlashAttention", Some("fast attention"), None, @@ -332,35 +355,49 @@ mod tests { .await .unwrap(); let e2 = src - .create_entity(None, "concept", "FlashAttention-2", None, None, vec![]) + .create_entity( + &tok, + "concept", + None, + "FlashAttention-2", + None, + None, + vec![], + ) .await .unwrap(); let e3 = src - .create_entity(None, "person", "Tri Dao", None, None, vec!["author".into()]) + .create_entity( + &tok, + "person", + None, + "Tri Dao", + None, + None, + vec!["author".into()], + ) .await .unwrap(); - src.link(None, e2.id, e1.id, EdgeRelation::Extends, 1.0) + src.link(&tok, e2.id, e1.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); - src.link(None, e1.id, e3.id, EdgeRelation::IntroducedBy, 0.9) + src.link(&tok, e1.id, e3.id, EdgeRelation::IntroducedBy, 0.9, None) .await .unwrap(); - let archive = src.export_kg(None).await.unwrap(); + let archive = src.export_kg(&tok).await.unwrap(); assert_eq!(archive.entities.len(), 3); assert_eq!(archive.edges.len(), 2); assert_eq!(archive.format, "khive-kg"); assert_eq!(archive.version, "0.1"); let dst = make_rt().await; - let summary = dst.import_kg(&archive, None).await.unwrap(); + let summary = dst.import_kg(&archive, &tok).await.unwrap(); assert_eq!(summary.entities_imported, 3); assert_eq!(summary.edges_imported, 2); // Spot-check: the imported entity is retrievable. - let got = dst.get_entity(None, e1.id).await.unwrap(); - assert!(got.is_some()); - let got = got.unwrap(); + let got = dst.get_entity(&tok, e1.id).await.unwrap(); assert_eq!(got.name, "FlashAttention"); assert_eq!(got.description.as_deref(), Some("fast attention")); } @@ -369,10 +406,12 @@ mod tests { #[tokio::test] async fn json_roundtrip() { let src = make_rt().await; + let tok = NamespaceToken::local(); let e1 = src .create_entity( - None, + &tok, "concept", + None, "LoRA", Some("low-rank adaptation"), Some(serde_json::json!({"year": "2021"})), @@ -381,22 +420,22 @@ mod tests { .await .unwrap(); let e2 = src - .create_entity(None, "concept", "QLoRA", None, None, vec![]) + .create_entity(&tok, "concept", None, "QLoRA", None, None, vec![]) .await .unwrap(); - src.link(None, e2.id, e1.id, EdgeRelation::VariantOf, 0.9) + src.link(&tok, e2.id, e1.id, EdgeRelation::VariantOf, 0.9, None) .await .unwrap(); - let json_str = src.export_kg_json(None).await.unwrap(); + let json_str = src.export_kg_json(&tok).await.unwrap(); assert!(json_str.contains("khive-kg")); let dst = make_rt().await; - let summary = dst.import_kg_json(&json_str, None).await.unwrap(); + let summary = dst.import_kg_json(&json_str, &tok).await.unwrap(); assert_eq!(summary.entities_imported, 2); assert_eq!(summary.edges_imported, 1); - let got = dst.get_entity(None, e1.id).await.unwrap().unwrap(); + let got = dst.get_entity(&tok, e1.id).await.unwrap(); assert_eq!(got.tags, vec!["fine-tuning"]); } @@ -409,29 +448,31 @@ mod tests { #[tokio::test] async fn namespace_targeting() { let src = make_rt().await; - src.create_entity(Some("a"), "concept", "Sinkhorn", None, None, vec![]) + let tok_a = NamespaceToken::for_namespace(Namespace::parse("a").unwrap()); + let tok_b = NamespaceToken::for_namespace(Namespace::parse("b").unwrap()); + src.create_entity(&tok_a, "concept", None, "Sinkhorn", None, None, vec![]) .await .unwrap(); - let archive = src.export_kg(Some("a")).await.unwrap(); + let archive = src.export_kg(&tok_a).await.unwrap(); assert_eq!(archive.namespace, "a"); // Import into a fresh runtime, targeting namespace "b". let dst = make_rt().await; - let summary = dst.import_kg(&archive, Some("b")).await.unwrap(); + let summary = dst.import_kg(&archive, &tok_b).await.unwrap(); assert_eq!(summary.entities_imported, 1); // Entity is in "b" on the destination runtime. - let in_b = dst.list_entities(Some("b"), None, 100, 0).await.unwrap(); + let in_b = dst.list_entities(&tok_b, None, None, 100, 0).await.unwrap(); assert_eq!(in_b.len(), 1); assert_eq!(in_b[0].name, "Sinkhorn"); // Namespace "a" on the source runtime is unchanged. - let in_a = src.list_entities(Some("a"), None, 100, 0).await.unwrap(); + let in_a = src.list_entities(&tok_a, None, None, 100, 0).await.unwrap(); assert_eq!(in_a.len(), 1); // Namespace "a" on the destination runtime has nothing (only "b" was written). - let dst_a = dst.list_entities(Some("a"), None, 100, 0).await.unwrap(); + let dst_a = dst.list_entities(&tok_a, None, None, 100, 0).await.unwrap(); assert_eq!(dst_a.len(), 0); } @@ -439,6 +480,7 @@ mod tests { #[tokio::test] async fn format_validation_rejects_wrong_format() { let rt = make_rt().await; + let tok = NamespaceToken::local(); let bad = KgArchive { format: "wrong".to_string(), version: "0.1".to_string(), @@ -447,7 +489,7 @@ mod tests { entities: vec![], edges: vec![], }; - let err = rt.import_kg(&bad, None).await.unwrap_err(); + let err = rt.import_kg(&bad, &tok).await.unwrap_err(); assert!(matches!(err, RuntimeError::InvalidInput(_))); } @@ -455,6 +497,7 @@ mod tests { #[tokio::test] async fn import_unsupported_archive_version_returns_error() { let rt = make_rt().await; + let tok = NamespaceToken::local(); let bad = KgArchive { format: "khive-kg".to_string(), version: "999.0".to_string(), @@ -463,7 +506,7 @@ mod tests { entities: vec![], edges: vec![], }; - let err = rt.import_kg(&bad, None).await.unwrap_err(); + let err = rt.import_kg(&bad, &tok).await.unwrap_err(); assert!( matches!(err, RuntimeError::InvalidInput(_)), "expected InvalidInput, got {err:?}" @@ -506,9 +549,10 @@ mod tests { let phantom_source = Uuid::parse_str("deadbeef-dead-4ead-dead-deadbeefcafe").unwrap(); let rt = make_rt().await; + let tok = NamespaceToken::local(); // Create an entity that will be the real target. let real = rt - .create_entity(None, "concept", "Real", None, None, vec![]) + .create_entity(&tok, "concept", None, "Real", None, None, vec![]) .await .unwrap(); @@ -521,6 +565,7 @@ mod tests { entities: vec![ExportedEntity { id: real.id, kind: "concept".to_string(), + entity_type: None, name: "Real".to_string(), description: None, properties: None, @@ -538,7 +583,7 @@ mod tests { }; let dst = make_rt().await; - let summary = dst.import_kg(&archive, None).await.unwrap(); + let summary = dst.import_kg(&archive, &tok).await.unwrap(); assert_eq!(summary.entities_imported, 1); assert_eq!( summary.edges_imported, 0, @@ -559,8 +604,9 @@ mod tests { let phantom_target = Uuid::parse_str("cafebabe-cafe-4abe-cafe-cafebabecafe").unwrap(); let rt = make_rt().await; + let tok = NamespaceToken::local(); let real = rt - .create_entity(None, "concept", "Source", None, None, vec![]) + .create_entity(&tok, "concept", None, "Source", None, None, vec![]) .await .unwrap(); @@ -572,6 +618,7 @@ mod tests { entities: vec![ExportedEntity { id: real.id, kind: "concept".to_string(), + entity_type: None, name: "Source".to_string(), description: None, properties: None, @@ -589,7 +636,7 @@ mod tests { }; let dst = make_rt().await; - let summary = dst.import_kg(&archive, None).await.unwrap(); + let summary = dst.import_kg(&archive, &tok).await.unwrap(); assert_eq!(summary.entities_imported, 1); assert_eq!( summary.edges_imported, 0, @@ -610,16 +657,17 @@ mod tests { let phantom = Uuid::parse_str("11111111-1111-4111-8111-111111111111").unwrap(); let src = make_rt().await; + let tok = NamespaceToken::local(); let a = src - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = src - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); let c = src - .create_entity(None, "concept", "C", None, None, vec![]) + .create_entity(&tok, "concept", None, "C", None, None, vec![]) .await .unwrap(); @@ -633,6 +681,7 @@ mod tests { ExportedEntity { id: a.id, kind: "concept".to_string(), + entity_type: None, name: "A".to_string(), description: None, properties: None, @@ -643,6 +692,7 @@ mod tests { ExportedEntity { id: b.id, kind: "concept".to_string(), + entity_type: None, name: "B".to_string(), description: None, properties: None, @@ -653,6 +703,7 @@ mod tests { ExportedEntity { id: c.id, kind: "concept".to_string(), + entity_type: None, name: "C".to_string(), description: None, properties: None, @@ -690,7 +741,7 @@ mod tests { }; let dst = make_rt().await; - let summary = dst.import_kg(&archive, None).await.unwrap(); + let summary = dst.import_kg(&archive, &tok).await.unwrap(); assert_eq!(summary.entities_imported, 3); assert_eq!( summary.edges_imported, 2, @@ -706,21 +757,22 @@ mod tests { #[tokio::test] async fn import_all_valid_edges_reports_zero_skipped() { let src = make_rt().await; + let tok = NamespaceToken::local(); let e1 = src - .create_entity(None, "concept", "E1", None, None, vec![]) + .create_entity(&tok, "concept", None, "E1", None, None, vec![]) .await .unwrap(); let e2 = src - .create_entity(None, "concept", "E2", None, None, vec![]) + .create_entity(&tok, "concept", None, "E2", None, None, vec![]) .await .unwrap(); - src.link(None, e1.id, e2.id, EdgeRelation::VariantOf, 0.7) + src.link(&tok, e1.id, e2.id, EdgeRelation::VariantOf, 0.7, None) .await .unwrap(); - let archive = src.export_kg(None).await.unwrap(); + let archive = src.export_kg(&tok).await.unwrap(); let dst = make_rt().await; - let summary = dst.import_kg(&archive, None).await.unwrap(); + let summary = dst.import_kg(&archive, &tok).await.unwrap(); assert_eq!(summary.edges_imported, 1); assert_eq!( summary.edges_skipped, 0, @@ -734,21 +786,22 @@ mod tests { #[tokio::test] async fn export_kg_preserves_edge_id() { let rt = make_rt().await; + let tok = NamespaceToken::local(); let a = rt - .create_entity(None, "concept", "Alpha", None, None, vec![]) + .create_entity(&tok, "concept", None, "Alpha", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "Beta", None, None, vec![]) + .create_entity(&tok, "concept", None, "Beta", None, None, vec![]) .await .unwrap(); let stored_edge = rt - .link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + .link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); let stored_id: Uuid = stored_edge.id.into(); - let archive = rt.export_kg(None).await.unwrap(); + let archive = rt.export_kg(&tok).await.unwrap(); assert_eq!(archive.edges.len(), 1); assert_eq!( archive.edges[0].edge_id, stored_id, @@ -760,26 +813,27 @@ mod tests { #[tokio::test] async fn import_kg_persists_edge_id() { let src = make_rt().await; + let tok = NamespaceToken::local(); let a = src - .create_entity(None, "concept", "Alpha", None, None, vec![]) + .create_entity(&tok, "concept", None, "Alpha", None, None, vec![]) .await .unwrap(); let b = src - .create_entity(None, "concept", "Beta", None, None, vec![]) + .create_entity(&tok, "concept", None, "Beta", None, None, vec![]) .await .unwrap(); let stored_edge = src - .link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + .link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); let original_id: Uuid = stored_edge.id.into(); - let archive = src.export_kg(None).await.unwrap(); + let archive = src.export_kg(&tok).await.unwrap(); let dst = make_rt().await; - dst.import_kg(&archive, None).await.unwrap(); + dst.import_kg(&archive, &tok).await.unwrap(); // The imported edge must carry the same UUID as the original. - let imported_edge = dst.get_edge(None, original_id).await.unwrap(); + let imported_edge = dst.get_edge(&tok, original_id).await.unwrap(); assert!( imported_edge.is_some(), "imported edge must be retrievable by the original edge_id" @@ -837,14 +891,15 @@ mod tests { // Import into a fresh runtime and verify the generated ID is persisted. let rt = make_rt().await; - let summary = rt.import_kg(&archive, None).await.unwrap(); + let tok = NamespaceToken::local(); + let summary = rt.import_kg(&archive, &tok).await.unwrap(); assert_eq!(summary.entities_imported, 2); assert_eq!( summary.edges_imported, 1, "edge must be imported when both endpoints exist" ); - let stored = rt.get_edge(None, generated_id).await.unwrap(); + let stored = rt.get_edge(&tok, generated_id).await.unwrap(); assert!( stored.is_some(), "imported edge must be retrievable by the generated edge_id" @@ -856,7 +911,7 @@ mod tests { ); // Re-export and verify the same UUID appears in the archive. - let re_archive = rt.export_kg(None).await.unwrap(); + let re_archive = rt.export_kg(&tok).await.unwrap(); assert_eq!(re_archive.edges.len(), 1); assert_eq!( re_archive.edges[0].edge_id, generated_id, @@ -872,22 +927,23 @@ mod tests { async fn export_import_export_edge_id_equality() { // Build a graph on the source runtime. let src = make_rt().await; + let tok = NamespaceToken::local(); let a = src - .create_entity(None, "concept", "NodeA", None, None, vec![]) + .create_entity(&tok, "concept", None, "NodeA", None, None, vec![]) .await .unwrap(); let b = src - .create_entity(None, "concept", "NodeB", None, None, vec![]) + .create_entity(&tok, "concept", None, "NodeB", None, None, vec![]) .await .unwrap(); let stored = src - .link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + .link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); let original_edge_id: Uuid = stored.id.into(); // First export. - let archive1 = src.export_kg(None).await.unwrap(); + let archive1 = src.export_kg(&tok).await.unwrap(); assert_eq!(archive1.edges.len(), 1); assert_eq!( archive1.edges[0].edge_id, original_edge_id, @@ -896,10 +952,10 @@ mod tests { // Import into a fresh runtime. let dst = make_rt().await; - dst.import_kg(&archive1, None).await.unwrap(); + dst.import_kg(&archive1, &tok).await.unwrap(); // Second export from the destination runtime. - let archive2 = dst.export_kg(None).await.unwrap(); + let archive2 = dst.export_kg(&tok).await.unwrap(); assert_eq!(archive2.edges.len(), 1); // Find the edge by (source, target, relation) and assert the ID is unchanged. diff --git a/crates/khive-runtime/src/presentation.rs b/crates/khive-runtime/src/presentation.rs new file mode 100644 index 00000000..3c08e9be --- /dev/null +++ b/crates/khive-runtime/src/presentation.rs @@ -0,0 +1,456 @@ +//! Verb response presentation modes and transformation (ADR-045). +//! +//! Handlers always return a canonical (verbose) shape. This module transforms +//! that shape into a caller-appropriate form AFTER dispatch, BEFORE wire +//! serialization. +//! +//! ## Transformation rules +//! +//! | Field type | Verbose form | Agent form | +//! | ------------------- | ----------------------------- | --------------------- | +//! | UUID (36-char) | `"a1b2c3d4-e5f6-..."` | `"a1b2c3d4"` (8 chars)| +//! | ISO-8601 timestamp | `"2026-05-23T16:18:15.234Z"` | `"2026-05-23T16:18"` (< 24h: `"3m ago"`) | +//! | Empty string `""` | included | dropped | +//! | Empty array `[]` | included | dropped | +//! | Empty object `{}` | included | dropped | +//! | `null` (non-lifecycle) | included | dropped | +//! | `null` (lifecycle `*_at`, relationship markers) | included | preserved | +//! | Score fields | `0.1234567890` | `0.123` (3 sig figs) | +//! +//! `Verbose` mode passes through canonically. `Human` mode is delegated to the +//! CLI layer and is not transformed here (returned as-is from this crate). +//! +//! **Chain invariant:** `present_response` MUST NOT be called on intermediate +//! chain results — only on the final response envelope after all `$prev` +//! substitutions complete. + +use std::collections::HashSet; + +use serde::{Deserialize, Serialize}; +use serde_json::{Map, Value}; + +/// How the response envelope is presented to the caller (ADR-045). +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] +#[serde(rename_all = "snake_case")] +pub enum PresentationMode { + /// Token-efficient. Default for MCP callers (agents). + /// + /// Short UUIDs (8-char), compact timestamps (minute granularity or + /// relative), empty fields dropped, lifecycle nulls preserved, score + /// fields truncated to 3 significant figures. + #[default] + Agent, + /// Full canonical shape. Default for `kkernel call` and CI/scripted callers. + /// + /// No transformation — handler output passes through as-is. + Verbose, + /// Pretty-printed terminal output. Default for `khive` CLI. + /// + /// Formatting is delegated to the CLI layer; this crate returns the value + /// unchanged (same as Verbose at the runtime level). + Human, +} + +/// Lifecycle `null` fields that are PRESERVED in Agent mode even when null. +/// +/// These fields carry lifecycle meaning (absent ≠ null) and must not be dropped. +/// ADR-045 §3 Agent mode — "Drop semantics — lifecycle null preservation". +const LIFECYCLE_NULL_PRESERVE: &[&str] = &[ + "completed_at", + "deleted_at", + "due_at", + "read_at", + "started_at", + "superseded_at", + "applied_at", + "withdrawn_at", + "reviewed_at", + "parent_id", + "superseded_by", + "replaced_by", +]; + +/// Score field names that are truncated to 3 significant figures in Agent mode. +/// +/// ADR-045 §3 Agent mode — "Score truncation". +const SCORE_FIELDS: &[&str] = &[ + "score", + "salience", + "decay_factor", + "rrf_score", + "similarity", + "cross_encoder_score", + "graph_proximity_score", +]; + +/// UUID v4 canonical string length (8-4-4-4-12 = 32 hex + 4 dashes = 36). +const UUID_CANONICAL_LEN: usize = 36; + +/// Transform a successful verb result value according to the given +/// [`PresentationMode`]. +/// +/// - `Verbose` / `Human`: returns `value` unchanged. +/// - `Agent`: applies UUID shortening, timestamp compaction, empty-field +/// dropping, lifecycle-null preservation, and score truncation. +/// +/// `now_unix_seconds` is sampled once per response and passed through so all +/// relative datetime renderings within a response use the same instant. +pub fn present(value: Value, mode: PresentationMode, now_unix_seconds: i64) -> Value { + match mode { + PresentationMode::Verbose | PresentationMode::Human => value, + PresentationMode::Agent => { + let lifecycle_preserve: HashSet<&str> = + LIFECYCLE_NULL_PRESERVE.iter().copied().collect(); + let score_fields: HashSet<&str> = SCORE_FIELDS.iter().copied().collect(); + transform_agent(value, &lifecycle_preserve, &score_fields, now_unix_seconds) + } + } +} + +/// Apply the Agent-mode transform to an arbitrary JSON value. +fn transform_agent( + value: Value, + lifecycle: &HashSet<&str>, + scores: &HashSet<&str>, + now: i64, +) -> Value { + match value { + Value::Object(map) => { + let mut out = Map::new(); + for (k, v) in map { + let transformed = transform_field_agent(&k, v, lifecycle, scores, now); + match transformed { + None => {} // drop + Some(tv) => { + out.insert(k, tv); + } + } + } + Value::Object(out) + } + Value::Array(arr) => { + let items: Vec = arr + .into_iter() + .map(|v| transform_agent(v, lifecycle, scores, now)) + .collect(); + Value::Array(items) + } + other => other, + } +} + +/// Transform a single named field value under Agent mode. +/// +/// Returns `None` if the field should be dropped. +fn transform_field_agent( + key: &str, + value: Value, + lifecycle: &HashSet<&str>, + scores: &HashSet<&str>, + now: i64, +) -> Option { + match &value { + // Preserve lifecycle nulls; drop other nulls. + Value::Null => { + if lifecycle.contains(key) { + Some(value) + } else { + None + } + } + // Drop empty strings, arrays, objects. + Value::String(s) if s.is_empty() => None, + Value::Array(a) if a.is_empty() => None, + Value::Object(o) if o.is_empty() => None, + // Truncate score fields. + Value::Number(_) if scores.contains(key) => { + if let Some(f) = value.as_f64() { + Some(truncate_to_3_sig_figs(f)) + } else { + Some(value) + } + } + // Shorten UUIDs in string fields. + Value::String(s) if is_canonical_uuid(s) => Some(Value::String(s[..8].to_string())), + // Compact ISO-8601 timestamps in string fields. + Value::String(s) if looks_like_iso8601(s) => Some(Value::String(compact_timestamp(s, now))), + // Recurse into objects and arrays. + Value::Object(_) | Value::Array(_) => Some(transform_agent(value, lifecycle, scores, now)), + // Everything else passes through. + _ => Some(value), + } +} + +/// Returns `true` if `s` looks like a canonical UUID (36 chars, standard form). +fn is_canonical_uuid(s: &str) -> bool { + if s.len() != UUID_CANONICAL_LEN { + return false; + } + let b = s.as_bytes(); + // Pattern: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + b[8] == b'-' + && b[13] == b'-' + && b[18] == b'-' + && b[23] == b'-' + && b[..8].iter().all(|c| c.is_ascii_hexdigit()) + && b[9..13].iter().all(|c| c.is_ascii_hexdigit()) + && b[14..18].iter().all(|c| c.is_ascii_hexdigit()) + && b[19..23].iter().all(|c| c.is_ascii_hexdigit()) + && b[24..].iter().all(|c| c.is_ascii_hexdigit()) +} + +/// Returns `true` if `s` looks like an ISO-8601 datetime string. +/// +/// Heuristic: starts with `YYYY-MM-DDTHH:` (16 chars, proper digit positions). +fn looks_like_iso8601(s: &str) -> bool { + if s.len() < 16 { + return false; + } + let b = s.as_bytes(); + b[4] == b'-' + && b[7] == b'-' + && b[10] == b'T' + && b[13] == b':' + && b[..4].iter().all(|c| c.is_ascii_digit()) + && b[5..7].iter().all(|c| c.is_ascii_digit()) + && b[8..10].iter().all(|c| c.is_ascii_digit()) + && b[11..13].iter().all(|c| c.is_ascii_digit()) +} + +/// Compact an ISO-8601 timestamp for Agent mode. +/// +/// - Within the last 24 hours: relative form (e.g. `"3m ago"`, `"2h ago"`). +/// - Older: minute-granularity absolute form `"YYYY-MM-DDTHH:MM"`. +fn compact_timestamp(s: &str, now: i64) -> String { + // Parse Unix seconds from the timestamp if possible; fall back to truncation. + if let Some(unix) = parse_iso8601_unix(s) { + let diff = now - unix; + if (0..86400).contains(&diff) { + return relative_time(diff); + } + } + // Minute granularity: take the first 16 chars. + s.chars().take(16).collect() +} + +/// Attempt to parse an ISO-8601 datetime string to Unix seconds. +/// +/// Only handles the subset produced by khive handlers: +/// `YYYY-MM-DDTHH:MM:SS[.frac][Z]`. Returns `None` for anything we can't parse +/// (graceful degradation — the timestamp is still compacted by truncation). +fn parse_iso8601_unix(s: &str) -> Option { + // Minimum parseable: "YYYY-MM-DDTHH:MM:SS" + if s.len() < 19 { + return None; + } + let b = s.as_bytes(); + let year: i64 = parse_digits(&b[0..4])?; + let month: i64 = parse_digits(&b[5..7])?; + let day: i64 = parse_digits(&b[8..10])?; + let hour: i64 = parse_digits(&b[11..13])?; + let minute: i64 = parse_digits(&b[14..16])?; + let second: i64 = parse_digits(&b[17..19])?; + + // Simple Gregorian → Unix seconds (no timezone offsets other than 'Z'). + // Close enough for relative-time comparisons; not for calendar correctness. + let days_since_epoch = days_from_civil(year, month, day); + Some(days_since_epoch * 86400 + hour * 3600 + minute * 60 + second) +} + +fn parse_digits(b: &[u8]) -> Option { + let s = std::str::from_utf8(b).ok()?; + s.parse().ok() +} + +/// Gregorian date → days since 1970-01-01. Algorithm: Howard Hinnant's civil. +fn days_from_civil(y: i64, m: i64, d: i64) -> i64 { + let y = if m <= 2 { y - 1 } else { y }; + let era = y.div_euclid(400); + let yoe = y - era * 400; + let doy = (153 * (if m > 2 { m - 3 } else { m + 9 }) + 2) / 5 + d - 1; + let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy; + era * 146097 + doe - 719468 +} + +/// Format a duration in seconds as a relative time string (e.g. `"3m ago"`). +fn relative_time(diff_secs: i64) -> String { + if diff_secs < 60 { + format!("{diff_secs}s ago") + } else if diff_secs < 3600 { + format!("{}m ago", diff_secs / 60) + } else { + format!("{}h ago", diff_secs / 3600) + } +} + +/// Truncate a float to 3 significant figures, returning a `serde_json::Value`. +fn truncate_to_3_sig_figs(f: f64) -> Value { + if f == 0.0 || !f.is_finite() { + return Value::from(f); + } + let magnitude = f.abs().log10().floor() as i32; + let factor = 10f64.powi(2 - magnitude); + let rounded = (f * factor).round() / factor; + // Re-serialize through serde_json to avoid floating-point noise. + serde_json::Number::from_f64(rounded) + .map(Value::Number) + .unwrap_or(Value::from(rounded)) +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + /// A fixed "now" for deterministic tests: 2026-05-23T16:18:00Z ≈ 1748016480. + const NOW: i64 = 1_748_016_480; + + fn agent(v: Value) -> Value { + present(v, PresentationMode::Agent, NOW) + } + + #[test] + fn verbose_passthrough() { + let v = json!({"id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", "title": "X"}); + let out = present(v.clone(), PresentationMode::Verbose, NOW); + assert_eq!(out, v); + } + + #[test] + fn agent_shortens_uuid() { + let v = json!({"id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890"}); + let out = agent(v); + assert_eq!(out["id"], json!("a1b2c3d4")); + } + + #[test] + fn agent_drops_empty_string() { + let v = json!({"title": "ok", "description": ""}); + let out = agent(v); + assert!(out.get("description").is_none()); + assert_eq!(out["title"], json!("ok")); + } + + #[test] + fn agent_drops_empty_array() { + let v = json!({"tags": [], "title": "ok"}); + let out = agent(v); + assert!(out.get("tags").is_none()); + } + + #[test] + fn agent_drops_empty_object() { + let v = json!({"properties": {}, "title": "ok"}); + let out = agent(v); + assert!(out.get("properties").is_none()); + } + + #[test] + fn agent_drops_non_lifecycle_null() { + let v = json!({"result": null, "title": "ok"}); + let out = agent(v); + assert!(out.get("result").is_none()); + } + + #[test] + fn agent_preserves_lifecycle_null() { + let v = json!({"completed_at": null, "due_at": null, "title": "ok"}); + let out = agent(v); + assert_eq!(out["completed_at"], json!(null)); + assert_eq!(out["due_at"], json!(null)); + } + + #[test] + fn agent_preserves_relationship_null() { + let v = json!({"parent_id": null, "superseded_by": null}); + let out = agent(v); + assert_eq!(out["parent_id"], json!(null)); + assert_eq!(out["superseded_by"], json!(null)); + } + + #[test] + fn agent_truncates_score_field() { + let v = json!({"score": 0.12345678}); + let out = agent(v); + let s = out["score"].as_f64().unwrap(); + assert!((s - 0.123).abs() < 1e-9, "expected ~0.123, got {s}"); + } + + #[test] + fn agent_compacts_old_timestamp_to_minutes() { + // Far past — not within 24h of NOW. Should be truncated to 16 chars. + let v = json!({"created_at": "2020-01-01T10:30:45.123456Z"}); + let out = agent(v); + assert_eq!(out["created_at"], json!("2020-01-01T10:30")); + } + + #[test] + fn agent_compacts_recent_timestamp_to_relative() { + // 3 minutes before NOW: diff = 180s. + let ts_unix = NOW - 180; + // Format as ISO-8601. + let ts = unix_to_iso8601(ts_unix); + let v = json!({"updated_at": ts}); + let out = agent(v); + assert_eq!(out["updated_at"], json!("3m ago")); + } + + #[test] + fn agent_recurses_into_nested_objects() { + let v = json!({ + "items": [ + { + "id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", + "tags": [], + "score": 0.9999 + } + ] + }); + let out = agent(v); + let item = &out["items"][0]; + assert_eq!(item["id"], json!("a1b2c3d4")); + assert!(item.get("tags").is_none()); + let s = item["score"].as_f64().unwrap(); + assert!((s - 1.0).abs() < 1e-9); + } + + #[test] + fn is_canonical_uuid_recognizes_valid() { + assert!(is_canonical_uuid("a1b2c3d4-e5f6-7890-abcd-ef1234567890")); + assert!(!is_canonical_uuid("a1b2c3d4")); + assert!(!is_canonical_uuid("not-a-uuid-at-all-here---------")); + } + + #[test] + fn looks_like_iso8601_recognizes_valid() { + assert!(looks_like_iso8601("2026-05-23T16:18:15.234567Z")); + assert!(!looks_like_iso8601("not a timestamp")); + assert!(!looks_like_iso8601("2026-05-23")); + } + + /// Format Unix seconds as ISO-8601 for test construction. + fn unix_to_iso8601(unix: i64) -> String { + let (y, mo, d, h, mi, s) = unix_to_civil(unix); + format!("{y:04}-{mo:02}-{d:02}T{h:02}:{mi:02}:{s:02}Z") + } + + fn unix_to_civil(unix: i64) -> (i64, i64, i64, i64, i64, i64) { + let s = unix % 86400; + let days = unix / 86400; + let h = s / 3600; + let m = (s % 3600) / 60; + let sec = s % 60; + // Howard Hinnant civil_from_days + let z = days + 719468; + let era = z.div_euclid(146097); + let doe = z - era * 146097; + let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365; + let y = yoe + era * 400; + let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); + let mp = (5 * doy + 2) / 153; + let d = doy - (153 * mp + 2) / 5 + 1; + let mo = if mp < 10 { mp + 3 } else { mp - 9 }; + let y = if mo <= 2 { y + 1 } else { y }; + (y, mo, d, h, m, sec) + } +} diff --git a/crates/khive-runtime/src/registry.rs b/crates/khive-runtime/src/registry.rs index fe700da1..9e84e236 100644 --- a/crates/khive-runtime/src/registry.rs +++ b/crates/khive-runtime/src/registry.rs @@ -54,7 +54,11 @@ impl RegisteredObjective { candidates: &'a [T], context: &ObjectiveContext, ) -> ObjectiveResult> { - self.objective.select(candidates, context) + self.objective + .select(candidates, context) + .into_iter() + .next() + .ok_or_else(|| ObjectiveError::NoMatch("No candidate selected".into())) } } @@ -179,7 +183,11 @@ impl ObjectiveRegistry { context: &ObjectiveContext, ) -> ObjectiveResult> { let objective = self.get(name)?; - objective.select(candidates, context) + objective + .select(candidates, context) + .into_iter() + .next() + .ok_or_else(|| ObjectiveError::NoMatch("No candidate selected".into())) } pub fn select_default<'a>( @@ -188,7 +196,11 @@ impl ObjectiveRegistry { context: &ObjectiveContext, ) -> ObjectiveResult> { let objective = self.get_default()?; - objective.select(candidates, context) + objective + .select(candidates, context) + .into_iter() + .next() + .ok_or_else(|| ObjectiveError::NoMatch("No candidate selected".into())) } } diff --git a/crates/khive-runtime/src/retrieval.rs b/crates/khive-runtime/src/retrieval.rs index cb379840..78585c2e 100644 --- a/crates/khive-runtime/src/retrieval.rs +++ b/crates/khive-runtime/src/retrieval.rs @@ -7,7 +7,7 @@ use std::collections::{HashMap, HashSet}; use uuid::Uuid; use crate::error::{RuntimeError, RuntimeResult}; -use crate::runtime::KhiveRuntime; +use crate::runtime::{KhiveRuntime, NamespaceToken}; use khive_score::{rrf_score, DeterministicScore}; use khive_storage::types::{ PageRequest, TextFilter, TextQueryMode, TextSearchHit, TextSearchRequest, VectorSearchHit, @@ -80,7 +80,7 @@ impl KhiveRuntime { /// runtime embeds internally. pub async fn vector_search( &self, - namespace: Option<&str>, + token: &NamespaceToken, query_embedding: Option>, query_text: Option<&str>, top_k: u32, @@ -103,14 +103,16 @@ impl KhiveRuntime { } }; - let ns = self.ns(namespace).to_string(); + let ns = token.namespace().as_str().to_owned(); Ok(self - .vectors(namespace)? + .vectors(token)? .search(VectorSearchRequest { - query_embedding: embedding, + query_vectors: vec![embedding], top_k, namespace: Some(ns), kind, + filter: None, + backend_hints: None, }) .await?) } @@ -129,19 +131,21 @@ impl KhiveRuntime { /// The fused candidate set is kept untruncated until after the alive + kind filter so /// that right-kind hits ranked below `limit` in the raw fusion still surface when /// higher-ranked candidates are wrong-kind or soft-deleted. + #[allow(clippy::too_many_arguments)] pub async fn hybrid_search( &self, - namespace: Option<&str>, + token: &NamespaceToken, query_text: &str, query_vector: Option>, limit: u32, entity_kind: Option<&str>, + entity_type: Option<&str>, ) -> RuntimeResult> { let candidates = limit.saturating_mul(CANDIDATE_MULTIPLIER).max(limit); - let ns = self.ns(namespace).to_string(); + let ns = token.namespace().as_str().to_owned(); let text_hits = self - .text(namespace)? + .text(token)? .search(TextSearchRequest { query: query_text.to_string(), mode: TextQueryMode::Plain, @@ -156,7 +160,7 @@ impl KhiveRuntime { let vector_hits = if query_vector.is_some() || self.config().embedding_model.is_some() { self.vector_search( - namespace, + token, query_vector, Some(query_text), candidates, @@ -177,12 +181,13 @@ impl KhiveRuntime { if !fused.is_empty() { let candidate_ids: Vec = fused.iter().map(|h| h.entity_id).collect(); let alive_page = self - .entities(namespace)? + .entities(token)? .query_entities( - self.ns(namespace), + token.namespace().as_str(), EntityFilter { ids: candidate_ids, kinds: entity_kind.map(|k| vec![k.to_string()]).unwrap_or_default(), + entity_types: entity_type.map(|t| vec![t.to_string()]).unwrap_or_default(), ..EntityFilter::default() }, PageRequest { @@ -225,18 +230,20 @@ impl KhiveRuntime { /// thousands of vectors) this is well within latency budgets. pub async fn knn( &self, - namespace: Option<&str>, + token: &NamespaceToken, query_vector: Vec, top_k: u32, ) -> RuntimeResult> { - let ns = self.ns(namespace).to_string(); + let ns = token.namespace().as_str().to_owned(); Ok(self - .vectors(namespace)? + .vectors(token)? .search(VectorSearchRequest { - query_embedding: query_vector, + query_vectors: vec![query_vector], top_k, namespace: Some(ns), kind: Some(SubstrateKind::Entity), + filter: None, + backend_hints: None, }) .await?) } @@ -248,20 +255,22 @@ impl KhiveRuntime { /// Returns hits sorted by similarity (highest first), truncated to `top_k`. pub async fn rerank( &self, - namespace: Option<&str>, + token: &NamespaceToken, query_vector: &[f32], candidate_ids: &[Uuid], top_k: u32, ) -> RuntimeResult> { let candidate_set: HashSet = candidate_ids.iter().copied().collect(); - let ns = self.ns(namespace).to_string(); + let ns = token.namespace().as_str().to_owned(); let all_hits = self - .vectors(namespace)? + .vectors(token)? .search(VectorSearchRequest { - query_embedding: query_vector.to_vec(), + query_vectors: vec![query_vector.to_vec()], top_k: candidate_ids.len() as u32, namespace: Some(ns), kind: Some(SubstrateKind::Entity), + filter: None, + backend_hints: None, }) .await?; let mut hits: Vec = all_hits @@ -337,8 +346,9 @@ fn rrf_fuse( #[cfg(test)] mod tests { use super::*; - use crate::runtime::{KhiveRuntime, RuntimeConfig}; + use crate::runtime::{KhiveRuntime, NamespaceToken, RuntimeConfig}; use khive_storage::types::{TextSearchHit, VectorSearchHit}; + use khive_types::namespace::Namespace; use lattice_embed::EmbeddingModel; fn text_hit(id: Uuid, rank: u32, title: &str) -> TextSearchHit { @@ -455,7 +465,7 @@ mod tests { fn embed_batch_count_matches_input() { let config = RuntimeConfig { db_path: None, - default_namespace: "test".to_string(), + default_namespace: Namespace::parse("test").unwrap(), embedding_model: Some(EmbeddingModel::AllMiniLmL6V2), packs: vec!["kg".to_string()], ..RuntimeConfig::default() @@ -472,9 +482,10 @@ mod tests { #[test] fn vector_search_requires_embedding_or_text() { let rt = KhiveRuntime::memory().unwrap(); + let tok = NamespaceToken::local(); let result = tokio::runtime::Runtime::new() .unwrap() - .block_on(rt.vector_search(None, None, None, 10, Some(SubstrateKind::Entity))); + .block_on(rt.vector_search(&tok, None, None, 10, Some(SubstrateKind::Entity))); match result { Err(crate::RuntimeError::InvalidInput(msg)) => { assert!(msg.contains("query_embedding or query_text"), "msg: {msg}"); @@ -486,10 +497,11 @@ mod tests { #[test] fn vector_search_text_without_model_returns_unconfigured() { let rt = KhiveRuntime::memory().unwrap(); + let tok = NamespaceToken::local(); let result = tokio::runtime::Runtime::new() .unwrap() .block_on(rt.vector_search( - None, + &tok, None, Some("attention"), 10, @@ -507,7 +519,7 @@ mod tests { let model = EmbeddingModel::AllMiniLmL6V2; let config = RuntimeConfig { db_path: None, - default_namespace: "test".to_string(), + default_namespace: Namespace::parse("test").unwrap(), embedding_model: Some(model), packs: vec!["kg".to_string()], ..RuntimeConfig::default() @@ -526,9 +538,11 @@ mod tests { #[tokio::test] async fn hybrid_search_entity_hit_has_title() { let rt = KhiveRuntime::memory().unwrap(); + let tok = NamespaceToken::local(); rt.create_entity( - None, + &tok, "concept", + None, "FlashAttention", Some("IO-aware exact attention using tiling"), None, @@ -538,7 +552,7 @@ mod tests { .unwrap(); let hits = rt - .hybrid_search(None, "FlashAttention", None, 10, None) + .hybrid_search(&tok, "FlashAttention", None, 10, None, None) .await .unwrap(); diff --git a/crates/khive-runtime/src/runtime.rs b/crates/khive-runtime/src/runtime.rs index 047db1aa..1babe5b8 100644 --- a/crates/khive-runtime/src/runtime.rs +++ b/crates/khive-runtime/src/runtime.rs @@ -3,9 +3,9 @@ use std::sync::{Arc, RwLock}; use khive_db::StorageBackend; -use khive_gate::{AllowAllGate, GateRef}; +use khive_gate::{ActorRef, AllowAllGate, GateRef}; use khive_storage::{EntityStore, EventStore, GraphStore, NoteStore, SqlAccess}; -use khive_types::EdgeEndpointRule; +use khive_types::{EdgeEndpointRule, Namespace}; use lattice_embed::{ CachedEmbeddingService, EmbeddingModel, EmbeddingService, NativeEmbeddingService, }; @@ -13,15 +13,128 @@ use tokio::sync::OnceCell; use crate::error::RuntimeResult; +// ---- BackendId ---- + +/// Identifies a named backend in a multi-backend deployment (ADR-009, ADR-028). +/// +/// The `main` backend is the default single-backend name. Multi-backend deployments +/// assign each `[[backends]]` entry a distinct `BackendId`. The +/// [`SubstrateCoordinator`](kkernel::coordinator::SubstrateCoordinator) in `kkernel` +/// uses `BackendId` for node-to-backend resolution and cross-backend edge routing. +/// +/// A single-backend `KhiveRuntime` always has `BackendId("main")` by default. +/// The boot path in `kkernel` or `khive-mcp` sets the id via `RuntimeConfig::backend_id` +/// when constructing per-pack runtimes. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct BackendId(pub String); + +impl BackendId { + /// The default single-backend name. + pub const MAIN: &'static str = "main"; + + /// Construct from a string name. + pub fn new(name: impl Into) -> Self { + Self(name.into()) + } + + /// The default `main` backend id. + pub fn main() -> Self { + Self(Self::MAIN.to_string()) + } + + /// Return the backend name as a `&str`. + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl std::fmt::Display for BackendId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(&self.0) + } +} + +// ---- Sealed token ---- + +mod private { + #[derive(Clone, Debug)] + pub(crate) struct Sealed; +} + +/// Authorization proof that a caller is permitted to access a specific namespace. +/// +/// Created by [`VerbRegistry::dispatch`] after the gate approves the request. +/// The sealed inner field prevents external code from constructing a token +/// without going through the authorization path. +#[derive(Clone, Debug)] +pub struct NamespaceToken { + namespace: Namespace, + actor: ActorRef, + _sealed: private::Sealed, +} + +impl NamespaceToken { + /// Mint an authorized token. Only callable from within `khive-runtime`. + pub(crate) fn mint_authorized(namespace: Namespace, actor: ActorRef) -> Self { + Self { + namespace, + actor, + _sealed: private::Sealed, + } + } + + /// Convenience constructor for the local namespace with an anonymous actor. + /// + /// Only callable from within `khive-runtime`. External callers must use + /// [`KhiveRuntime::authorize`] to mint tokens. + // Used only in #[cfg(test)] blocks within this crate's src/ files. + #[allow(dead_code)] + pub(crate) fn local() -> Self { + Self::mint_authorized(Namespace::local(), ActorRef::anonymous()) + } + + /// Convenience constructor for a specific namespace with an anonymous actor. + /// + /// Only callable from within `khive-runtime`. External callers must use + /// [`KhiveRuntime::authorize`] to mint tokens. + // Used only in #[cfg(test)] blocks within this crate's src/ files. + #[allow(dead_code)] + pub(crate) fn for_namespace(ns: Namespace) -> Self { + Self::mint_authorized(ns, ActorRef::anonymous()) + } + + pub fn namespace(&self) -> &Namespace { + &self.namespace + } + + pub fn actor(&self) -> &ActorRef { + &self.actor + } +} + +// ---- RuntimeConfig ---- + /// Runtime configuration. +/// +/// Per ADR-028, the `db_path` and `embedding_model` fields are deprecated in favour of +/// constructing the backend externally and calling [`KhiveRuntime::from_backend`]. +/// They remain for backward compatibility with tests and single-binary deployments. #[derive(Clone, Debug)] pub struct RuntimeConfig { /// Path to the SQLite database file. `None` = in-memory (tests). + /// + /// Deprecated: use [`KhiveRuntime::from_backend`] instead. The boot path + /// constructs backends from `khive.toml` (`AppConfig`) and passes them to + /// `from_backend`. Direct `db_path` usage persists only in tests. pub db_path: Option, /// Namespace used when no explicit namespace is provided. - pub default_namespace: String, + pub default_namespace: Namespace, /// Local embedding model. `None` disables embedding and hybrid vector search; /// `hybrid_search` then falls back to text-only. + /// + /// Deprecated: per ADR-028/ADR-031, embedding engines move to a per-pack + /// `EmbedderRegistry`. This field persists for backward compatibility until + /// the embedder registry is fully plumbed. pub embedding_model: Option, /// Authorization gate consulted before each verb dispatch (ADR-029). /// Default: `AllowAllGate` (permissive). For production policy enforcement, @@ -33,6 +146,11 @@ pub struct RuntimeConfig { /// by the transport, not silently ignored. /// Default: `["kg"]`. pub packs: Vec, + /// Identifies this runtime's backend in a multi-backend deployment (ADR-009, ADR-028). + /// + /// Set by the boot path when constructing per-pack runtimes from `khive.toml`. + /// Single-backend deployments use the default `BackendId::MAIN`. + pub backend_id: BackendId, } /// Parse a comma- or whitespace-separated pack list from a single string. @@ -62,14 +180,17 @@ impl Default for RuntimeConfig { .unwrap_or_else(|| vec!["kg".to_string()]); Self { db_path, - default_namespace: "local".to_string(), + default_namespace: Namespace::local(), embedding_model, gate: Arc::new(AllowAllGate), packs, + backend_id: BackendId::main(), } } } +// ---- KhiveRuntime ---- + /// Composable runtime handle used by the MCP server. /// /// Wraps a `StorageBackend` and provides namespace-scoped accessor methods @@ -88,6 +209,10 @@ pub struct KhiveRuntime { impl KhiveRuntime { /// Create a new runtime with the given config. + /// + /// The config's `db_path` is used to open or create the SQLite backend. + /// For the preferred boot path in multi-backend deployments, use + /// [`from_backend`](Self::from_backend) instead. pub fn new(config: RuntimeConfig) -> RuntimeResult { let backend = match &config.db_path { Some(path) => { @@ -106,17 +231,44 @@ impl KhiveRuntime { }) } + /// Construct a runtime from an already-opened backend (ADR-028 boot path). + /// + /// This is the preferred constructor for multi-backend deployments. The caller + /// (boot path in `kkernel` or `khive-mcp`) opens each backend from `khive.toml`, + /// then constructs a `KhiveRuntime` per pack using this method. + /// + /// The returned runtime has `db_path = None` and `embedding_model = None`; all + /// storage access is through the provided `backend`. Set `backend_id` and + /// `default_namespace` via the config builder pattern if non-defaults are needed. + pub fn from_backend(backend: Arc, config: RuntimeConfig) -> Self { + Self { + backend, + config, + embedder: Arc::new(OnceCell::new()), + edge_rules: Arc::new(RwLock::new(Vec::new())), + } + } + /// Create an in-memory runtime (for tests and ephemeral use). pub fn memory() -> RuntimeResult { Self::new(RuntimeConfig { db_path: None, - default_namespace: "local".to_string(), + default_namespace: Namespace::local(), embedding_model: None, gate: Arc::new(AllowAllGate), packs: vec!["kg".to_string()], + backend_id: BackendId::main(), }) } + /// Return the [`BackendId`] for this runtime's backend. + /// + /// Used by the [`SubstrateCoordinator`](kkernel::coordinator::SubstrateCoordinator) + /// to identify which backend owns a given node, and to detect cross-backend merges. + pub fn backend_id(&self) -> &BackendId { + &self.config.backend_id + } + /// Return a reference to the runtime config. pub fn config(&self) -> &RuntimeConfig { &self.config @@ -127,31 +279,34 @@ impl KhiveRuntime { &self.backend } - /// Resolve namespace: use provided value or fall back to `default_namespace`. - pub fn ns<'a>(&'a self, namespace: Option<&'a str>) -> &'a str { - namespace.unwrap_or(&self.config.default_namespace) - } + // ---- Store accessors (token-scoped) ---- - // ---- Store accessors ---- - - /// Get an EntityStore scoped to the given namespace (or default). - pub fn entities(&self, namespace: Option<&str>) -> RuntimeResult> { - Ok(self.backend.entities_for_namespace(self.ns(namespace))?) + /// Get an EntityStore scoped to the token's namespace. + pub fn entities(&self, token: &NamespaceToken) -> RuntimeResult> { + Ok(self + .backend + .entities_for_namespace(token.namespace().as_str())?) } - /// Get a GraphStore scoped to the given namespace (or default). - pub fn graph(&self, namespace: Option<&str>) -> RuntimeResult> { - Ok(self.backend.graph_for_namespace(self.ns(namespace))?) + /// Get a GraphStore scoped to the token's namespace. + pub fn graph(&self, token: &NamespaceToken) -> RuntimeResult> { + Ok(self + .backend + .graph_for_namespace(token.namespace().as_str())?) } - /// Get a NoteStore scoped to the given namespace (or default). - pub fn notes(&self, namespace: Option<&str>) -> RuntimeResult> { - Ok(self.backend.notes_for_namespace(self.ns(namespace))?) + /// Get a NoteStore scoped to the token's namespace. + pub fn notes(&self, token: &NamespaceToken) -> RuntimeResult> { + Ok(self + .backend + .notes_for_namespace(token.namespace().as_str())?) } - /// Get an EventStore scoped to the given namespace (or default). - pub fn events(&self, namespace: Option<&str>) -> RuntimeResult> { - Ok(self.backend.events_for_namespace(self.ns(namespace))?) + /// Get an EventStore scoped to the token's namespace. + pub fn events(&self, token: &NamespaceToken) -> RuntimeResult> { + Ok(self + .backend + .events_for_namespace(token.namespace().as_str())?) } /// Get the raw SQL access capability (for ad-hoc queries). @@ -159,12 +314,12 @@ impl KhiveRuntime { self.backend.sql() } - /// Get a VectorStore for the configured embedding model, scoped to the namespace. + /// Get a VectorStore for the configured embedding model, scoped to the token's namespace. /// /// Returns `Unconfigured("embedding_model")` if no model is set. pub fn vectors( &self, - namespace: Option<&str>, + token: &NamespaceToken, ) -> RuntimeResult> { let model = self .config @@ -173,28 +328,38 @@ impl KhiveRuntime { Ok(self.backend.vectors_for_namespace( &vec_model_key(model), model.dimensions(), - self.ns(namespace), + token.namespace().as_str(), )?) } - /// Get a TextSearch index for the namespace's entity corpus. + /// Get a TextSearch index for the token's namespace entity corpus. pub fn text( &self, - namespace: Option<&str>, + token: &NamespaceToken, ) -> RuntimeResult> { - let key = format!("entities_{}", sanitize_key(self.ns(namespace))); + let key = format!("entities_{}", sanitize_key(token.namespace().as_str())); Ok(self.backend.text(&key)?) } - /// Get a TextSearch index for the namespace's notes corpus. + /// Get a TextSearch index for the token's namespace notes corpus. pub fn text_for_notes( &self, - namespace: Option<&str>, + token: &NamespaceToken, ) -> RuntimeResult> { - let key = format!("notes_{}", sanitize_key(self.ns(namespace))); + let key = format!("notes_{}", sanitize_key(token.namespace().as_str())); Ok(self.backend.text(&key)?) } + /// Mint an authorization token for the given namespace. + /// + /// This is the official OSS API for obtaining a [`NamespaceToken`]. In + /// local / single-user mode (the default) this always succeeds — there is + /// no multi-tenant gate to consult. Multi-tenant deployments replace the + /// gate with a policy-backed impl; this method would then enforce it. + pub fn authorize(&self, ns: Namespace) -> NamespaceToken { + NamespaceToken::mint_authorized(ns, ActorRef::anonymous()) + } + /// Install the pack-aggregated edge endpoint rules (ADR-031). /// /// Called by the transport layer after the `VerbRegistry` is built so @@ -268,36 +433,54 @@ mod tests { let path = dir.path().join("test.db"); let config = RuntimeConfig { db_path: Some(path.clone()), - default_namespace: "test".to_string(), + default_namespace: Namespace::parse("test").unwrap(), embedding_model: None, gate: Arc::new(AllowAllGate), packs: vec!["kg".to_string()], + backend_id: BackendId::main(), }; let rt = KhiveRuntime::new(config).expect("file runtime should create"); assert!(path.exists()); - assert_eq!(rt.config().default_namespace, "test"); + assert_eq!(rt.config().default_namespace.as_str(), "test"); + } + + #[test] + fn from_backend_uses_provided_backend() { + let backend = Arc::new(StorageBackend::memory().expect("memory backend")); + let config = RuntimeConfig { + db_path: None, + default_namespace: Namespace::local(), + embedding_model: None, + gate: Arc::new(AllowAllGate), + packs: vec!["kg".to_string()], + backend_id: BackendId::new("lore"), + }; + let rt = KhiveRuntime::from_backend(backend, config); + assert_eq!(rt.backend_id().as_str(), "lore"); + assert!(rt.config().db_path.is_none()); } #[test] - fn ns_defaults_to_config_namespace() { + fn backend_id_defaults_to_main() { let rt = KhiveRuntime::memory().unwrap(); - assert_eq!(rt.ns(None), "local"); - assert_eq!(rt.ns(Some("custom")), "custom"); + assert_eq!(rt.backend_id().as_str(), BackendId::MAIN); } #[test] fn store_accessors_return_ok() { let rt = KhiveRuntime::memory().unwrap(); - assert!(rt.entities(None).is_ok()); - assert!(rt.graph(None).is_ok()); - assert!(rt.notes(None).is_ok()); - assert!(rt.events(None).is_ok()); + let tok = NamespaceToken::local(); + assert!(rt.entities(&tok).is_ok()); + assert!(rt.graph(&tok).is_ok()); + assert!(rt.notes(&tok).is_ok()); + assert!(rt.events(&tok).is_ok()); } #[test] fn vectors_returns_unconfigured_without_model() { let rt = KhiveRuntime::memory().unwrap(); - match rt.vectors(None) { + let tok = NamespaceToken::local(); + match rt.vectors(&tok) { Err(crate::RuntimeError::Unconfigured(s)) => assert_eq!(s, "embedding_model"), Err(other) => panic!("expected Unconfigured, got {:?}", other), Ok(_) => panic!("expected Err, got Ok"), @@ -323,11 +506,7 @@ mod tests { #[test] fn default_config_uses_allow_all_gate() { let cfg = RuntimeConfig::default(); - // Default gate is permissive — checked via type identity (no leak of - // concrete gate kind otherwise). - assert_eq!(cfg.default_namespace, "local"); - // `gate` is non-`Debug`-comparable; smoke-check by running a request - // through it via the registry layer would belong in pack.rs tests. + assert_eq!(cfg.default_namespace.as_str(), "local"); let _: GateRef = cfg.gate.clone(); } @@ -369,7 +548,6 @@ mod tests { #[test] fn default_config_uses_minilm_when_env_unset() { - // Snapshot + clear the env var so this test is deterministic. let prior = std::env::var("KHIVE_EMBEDDING_MODEL").ok(); // SAFETY: tests are serial by default for env mutation here; if other tests // mutate this var, mark them with the same scope. diff --git a/crates/khive-runtime/src/validation.rs b/crates/khive-runtime/src/validation.rs new file mode 100644 index 00000000..e9b02564 --- /dev/null +++ b/crates/khive-runtime/src/validation.rs @@ -0,0 +1,281 @@ +//! Validation pipeline types for pack-contributed KG rules (ADR-034). +//! +//! This module defines the trait surface and supporting types used by packs +//! to contribute domain-specific validation rules. Rules are compiled into the +//! pack binary and collected at boot time via the `Pack::VALIDATION_RULES` IDs +//! plus runtime rule implementations registered through `PackRuntime`. +//! +//! # Two rule shapes +//! +//! ADR-034 §9a defines two complementary rule shapes: +//! +//! - **`CorpusCheck`**: whole-corpus rules that receive all entities and edges +//! together. Right for rules that need cross-entity joins (referential +//! integrity, remote resolution, min-edge-density). +//! +//! - **`StreamingRule`**: per-record rules that evaluate one record at a time. +//! Cheaper for rules that check individual entities or edges without joins +//! (required properties, naming conventions, no-self-loops). +//! +//! Both shapes return `Vec` per invocation. The validator aggregates +//! them into a `ValidationReport`. + +use std::collections::BTreeMap; + +// ── Rule identity ───────────────────────────────────────────────────────────── + +/// Stable rule identifier, namespaced by pack: `"/"`. +/// +/// Built-in rules use no namespace prefix (e.g. `"min-edge-density"`). +/// Pack-contributed rules MUST be namespaced (e.g. `"biology/required-taxa-rank"`). +pub type RuleId = &'static str; + +/// Severity of a validation finding (ADR-034 §1). +/// +/// - `Error`: causes `kkernel kg validate` to exit with code 1. +/// - `Warning`: reported but does not affect exit code (unless `--strict`). +/// - `Info`: informational; no exit-code effect. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum Severity { + Info, + Warning, + Error, +} + +// ── Corpus snapshot ─────────────────────────────────────────────────────────── + +/// Opaque snapshot of the KG corpus passed to `CorpusCheck::check`. +/// +/// v1 exposes the bare field set needed for the built-in rules. Pack authors +/// that need richer access should open an ADR to extend this surface — do NOT +/// reach through this struct to the storage layer. +#[non_exhaustive] +pub struct GraphSnapshot { + /// Total entity count in the snapshot. + pub entity_count: usize, + /// Total edge count in the snapshot. + pub edge_count: usize, +} + +/// Context passed to all rule implementations. +/// +/// Carries configuration overrides from `.khive/kg/rules.yaml` merged with +/// pack defaults. Rules read per-rule config from `config[rule_id]`. +#[non_exhaustive] +pub struct ValidationContext<'a> { + /// The corpus snapshot for whole-corpus rules. + pub snapshot: &'a GraphSnapshot, + /// Per-rule config overrides, keyed by rule ID. + pub config: &'a BTreeMap<&'static str, serde_json::Value>, +} + +// ── Violation ───────────────────────────────────────────────────────────────── + +/// A single rule violation produced by a rule implementation (ADR-034 §5). +#[non_exhaustive] +pub struct Violation { + /// The rule that produced this violation. + pub rule_id: &'static str, + /// Violation severity (may differ from rule-level severity for pack rules + /// that emit mixed-severity output within one rule). + pub severity: Severity, + /// Human-readable explanation of the violation. + pub message: String, + /// Whether the violation can be fixed by `kkernel kg validate --fix`. + pub fixable: bool, + /// Optional entity UUID (short-form) that the violation targets. + pub entity_id: Option, + /// Optional edge UUID (short-form) that the violation targets. + pub edge_id: Option, +} + +impl Violation { + /// Construct a non-fixable violation without a specific entity/edge target. + pub fn new(rule_id: &'static str, severity: Severity, message: impl Into) -> Self { + Self { + rule_id, + severity, + message: message.into(), + fixable: false, + entity_id: None, + edge_id: None, + } + } + + /// Attach an entity identifier to an existing violation. + pub fn with_entity(mut self, id: impl Into) -> Self { + self.entity_id = Some(id.into()); + self + } +} + +// ── Rule function type ──────────────────────────────────────────────────────── + +/// Whole-corpus check function type (ADR-034 §2, §9a). +/// +/// Receives the corpus snapshot and config context; returns all violations +/// produced by the rule in one call. +pub type RuleFn = fn(&ValidationContext<'_>) -> Vec; + +/// Optional auto-fix function type (ADR-034 §7). +/// +/// Receives the context and violations emitted by the corresponding `RuleFn`. +/// Returns a `GraphPatch` (opaque in v1 — see below) that the validator applies +/// before writing NDJSON. Returning `None` leaves the graph unchanged. +/// +/// `GraphPatch` is a placeholder type in v1; the git-native write path +/// (ADR-020) is out of scope for this cluster. +pub type FixFn = fn(&ValidationContext<'_>, &[Violation]) -> Option; + +/// Opaque graph patch produced by a fix function (ADR-034 §7). +/// +/// v1 carries no fields — the auto-fix machinery is stubbed. The type exists +/// so pack authors can write `fix: Some(my_fix as FixFn)` without a +/// compile-time change when the v1 fix path is wired up. +#[non_exhaustive] +pub struct GraphPatch; + +// ── ValidationRule ──────────────────────────────────────────────────────────── + +/// A pack-contributed validation rule (ADR-034 §9). +/// +/// Pack authors declare an array of these in their `Pack` implementation +/// (through the runtime `PackRuntime::validation_rules()` method). Rule IDs +/// must follow the `/` namespace convention. +/// +/// # Example +/// +/// ```ignore +/// use khive_runtime::validation::{ValidationRule, Severity}; +/// +/// fn check_taxa(ctx: &ValidationContext<'_>) -> Vec { +/// // ... domain-specific check ... +/// vec![] +/// } +/// +/// pub const RULES: &[ValidationRule] = &[ +/// ValidationRule { +/// id: "biology/required-taxa-rank", +/// severity: Severity::Warning, +/// description: "All species entities must carry a taxa_rank property", +/// check: check_taxa, +/// fix: None, +/// }, +/// ]; +/// ``` +pub struct ValidationRule { + /// Stable rule identifier in `/` format. + pub id: RuleId, + /// Default severity; can be overridden in `.khive/kg/rules.yaml`. + pub severity: Severity, + /// Human-readable description shown in `kkernel kg validate` output. + pub description: &'static str, + /// Whole-corpus check function. + pub check: RuleFn, + /// Optional auto-fix function (ADR-034 §7). `None` for unfixable rules. + pub fix: Option, +} + +// ── Aggregated report ───────────────────────────────────────────────────────── + +/// Aggregated result of running the full rule pipeline (ADR-034 §5). +#[derive(Default)] +pub struct ValidationReport { + /// Violations grouped by rule ID, sorted canonically per ADR-034 §9a. + pub violations_by_rule: BTreeMap>, +} + +impl ValidationReport { + /// Add violations for a given rule to the report. + pub fn add(&mut self, rule_id: &str, violations: Vec) { + self.violations_by_rule + .entry(rule_id.to_string()) + .or_default() + .extend(violations); + } + + /// Total number of violations at `Severity::Error` across all rules. + pub fn error_count(&self) -> usize { + self.violations_by_rule + .values() + .flat_map(|vs| vs.iter()) + .filter(|v| v.severity == Severity::Error) + .count() + } + + /// Total number of violations at `Severity::Warning` across all rules. + pub fn warning_count(&self) -> usize { + self.violations_by_rule + .values() + .flat_map(|vs| vs.iter()) + .filter(|v| v.severity == Severity::Warning) + .count() + } + + /// `true` when no errors were found (the standard exit-0 condition). + pub fn passed(&self) -> bool { + self.error_count() == 0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn violation_builder() { + let v = Violation::new("test/rule", Severity::Warning, "something is off") + .with_entity("abc123"); + assert_eq!(v.rule_id, "test/rule"); + assert_eq!(v.severity, Severity::Warning); + assert!(!v.fixable); + assert_eq!(v.entity_id.as_deref(), Some("abc123")); + } + + #[test] + fn report_error_count() { + let mut report = ValidationReport::default(); + report.add( + "test/rule", + vec![ + Violation::new("test/rule", Severity::Error, "bad"), + Violation::new("test/rule", Severity::Warning, "meh"), + ], + ); + assert_eq!(report.error_count(), 1); + assert_eq!(report.warning_count(), 1); + assert!(!report.passed()); + } + + #[test] + fn report_passed_when_no_errors() { + let mut report = ValidationReport::default(); + report.add( + "test/rule", + vec![Violation::new("test/rule", Severity::Warning, "meh")], + ); + assert!(report.passed()); + } + + #[test] + fn graph_patch_is_constructible() { + // Ensure the placeholder type can be named and constructed. + let _patch = GraphPatch; + } + + #[test] + fn validation_rule_fields() { + fn dummy_check(_ctx: &ValidationContext<'_>) -> Vec { + vec![] + } + let rule = ValidationRule { + id: "bio/taxa", + severity: Severity::Warning, + description: "taxa must exist", + check: dummy_check, + fix: None, + }; + assert_eq!(rule.id, "bio/taxa"); + assert!(rule.fix.is_none()); + } +} diff --git a/crates/khive-runtime/tests/integration.rs b/crates/khive-runtime/tests/integration.rs index 5877df94..7775386b 100644 --- a/crates/khive-runtime/tests/integration.rs +++ b/crates/khive-runtime/tests/integration.rs @@ -3,9 +3,10 @@ //! Tests cover entity CRUD, graph operations, note memory, GQL query, //! and namespace isolation using an in-memory runtime. -use khive_runtime::{KhiveRuntime, RuntimeConfig}; +use khive_runtime::{KhiveRuntime, Namespace, RuntimeConfig}; use khive_storage::types::{Direction, TraversalOptions, TraversalRequest}; -use khive_storage::EdgeRelation; +use khive_storage::{EdgeRelation, Event}; +use khive_types::{EventKind, SubstrateKind}; use uuid::Uuid; fn rt() -> KhiveRuntime { @@ -19,11 +20,13 @@ fn rt() -> KhiveRuntime { #[tokio::test] async fn entity_create_and_get_roundtrip() { let rt = rt(); + let tok = rt.authorize(Namespace::local()); let entity = rt .create_entity( - None, + &tok, "concept", + None, "LoRA", Some("Low-Rank Adaptation"), None, @@ -32,9 +35,7 @@ async fn entity_create_and_get_roundtrip() { .await .unwrap(); - let fetched = rt.get_entity(None, entity.id).await.unwrap(); - assert!(fetched.is_some()); - let fetched = fetched.unwrap(); + let fetched = rt.get_entity(&tok, entity.id).await.unwrap(); assert_eq!(fetched.id, entity.id); assert_eq!(fetched.name, "LoRA"); assert_eq!(fetched.kind, "concept"); @@ -44,12 +45,14 @@ async fn entity_create_and_get_roundtrip() { #[tokio::test] async fn entity_create_with_properties_and_tags() { let rt = rt(); + let research_tok = rt.authorize(Namespace::parse("research").unwrap()); let props = serde_json::json!({"domain": "fine-tuning", "type": "technique"}); let entity = rt .create_entity( - Some("research"), + &research_tok, "concept", + None, "QLoRA", Some("Quantized LoRA"), Some(props.clone()), @@ -58,11 +61,7 @@ async fn entity_create_with_properties_and_tags() { .await .unwrap(); - let fetched = rt - .get_entity(Some("research"), entity.id) - .await - .unwrap() - .unwrap(); + let fetched = rt.get_entity(&research_tok, entity.id).await.unwrap(); assert_eq!(fetched.properties, Some(props)); assert_eq!(fetched.tags, vec!["fine-tuning", "quantization"]); } @@ -70,16 +69,18 @@ async fn entity_create_with_properties_and_tags() { #[tokio::test] async fn entity_list_by_kind() { let rt = rt(); + let tok = rt.authorize(Namespace::local()); - rt.create_entity(None, "concept", "FlashAttention", None, None, vec![]) + rt.create_entity(&tok, "concept", None, "FlashAttention", None, None, vec![]) .await .unwrap(); - rt.create_entity(None, "concept", "GQA", None, None, vec![]) + rt.create_entity(&tok, "concept", None, "GQA", None, None, vec![]) .await .unwrap(); rt.create_entity( - None, + &tok, "document", + None, "Attention Is All You Need", None, None, @@ -89,7 +90,7 @@ async fn entity_list_by_kind() { .unwrap(); let concepts = rt - .list_entities(None, Some("concept"), 50, 0) + .list_entities(&tok, Some("concept"), None, 50, 0) .await .unwrap(); assert_eq!(concepts.len(), 2); @@ -97,50 +98,53 @@ async fn entity_list_by_kind() { assert!(concepts.iter().any(|e| e.name == "GQA")); let docs = rt - .list_entities(None, Some("document"), 50, 0) + .list_entities(&tok, Some("document"), None, 50, 0) .await .unwrap(); assert_eq!(docs.len(), 1); assert_eq!(docs[0].name, "Attention Is All You Need"); - let all = rt.list_entities(None, None, 50, 0).await.unwrap(); + let all = rt.list_entities(&tok, None, None, 50, 0).await.unwrap(); assert_eq!(all.len(), 3); } #[tokio::test] async fn entity_delete_soft() { let rt = rt(); + let tok = rt.authorize(Namespace::local()); let entity = rt - .create_entity(None, "concept", "to-delete", None, None, vec![]) + .create_entity(&tok, "concept", None, "to-delete", None, None, vec![]) .await .unwrap(); - let deleted = rt.delete_entity(None, entity.id, false).await.unwrap(); + let deleted = rt.delete_entity(&tok, entity.id, false).await.unwrap(); assert!(deleted); - let fetched = rt.get_entity(None, entity.id).await.unwrap(); - assert!(fetched.is_none()); + // Soft-deleted entity is not found via get_entity + let fetched = rt.get_entity(&tok, entity.id).await; + assert!(fetched.is_err()); } #[tokio::test] async fn entity_count_by_kind() { let rt = rt(); + let tok = rt.authorize(Namespace::local()); for _ in 0..3 { - rt.create_entity(None, "concept", "concept-X", None, None, vec![]) + rt.create_entity(&tok, "concept", None, "concept-X", None, None, vec![]) .await .unwrap(); } for _ in 0..2 { - rt.create_entity(None, "document", "doc-Y", None, None, vec![]) + rt.create_entity(&tok, "document", None, "doc-Y", None, None, vec![]) .await .unwrap(); } - let concept_count = rt.count_entities(None, Some("concept")).await.unwrap(); - let doc_count = rt.count_entities(None, Some("document")).await.unwrap(); - let total = rt.count_entities(None, None).await.unwrap(); + let concept_count = rt.count_entities(&tok, Some("concept")).await.unwrap(); + let doc_count = rt.count_entities(&tok, Some("document")).await.unwrap(); + let total = rt.count_entities(&tok, None).await.unwrap(); assert_eq!(concept_count, 3); assert_eq!(doc_count, 2); @@ -154,22 +158,23 @@ async fn entity_count_by_kind() { #[tokio::test] async fn link_and_neighbors() { let rt = rt(); + let tok = rt.authorize(Namespace::local()); let lora = rt - .create_entity(None, "concept", "LoRA", None, None, vec![]) + .create_entity(&tok, "concept", None, "LoRA", None, None, vec![]) .await .unwrap(); let qlora = rt - .create_entity(None, "concept", "QLoRA", None, None, vec![]) + .create_entity(&tok, "concept", None, "QLoRA", None, None, vec![]) .await .unwrap(); - rt.link(None, qlora.id, lora.id, EdgeRelation::VariantOf, 1.0) + rt.link(&tok, qlora.id, lora.id, EdgeRelation::VariantOf, 1.0, None) .await .unwrap(); let hits = rt - .neighbors(None, qlora.id, Direction::Out, None, None) + .neighbors(&tok, qlora.id, Direction::Out, None, None) .await .unwrap(); assert_eq!(hits.len(), 1); @@ -180,24 +185,25 @@ async fn link_and_neighbors() { #[tokio::test] async fn traverse_multi_hop() { let rt = rt(); + let tok = rt.authorize(Namespace::local()); let a = rt - .create_entity(None, "concept", "A", None, None, vec![]) + .create_entity(&tok, "concept", None, "A", None, None, vec![]) .await .unwrap(); let b = rt - .create_entity(None, "concept", "B", None, None, vec![]) + .create_entity(&tok, "concept", None, "B", None, None, vec![]) .await .unwrap(); let c = rt - .create_entity(None, "concept", "C", None, None, vec![]) + .create_entity(&tok, "concept", None, "C", None, None, vec![]) .await .unwrap(); - rt.link(None, a.id, b.id, EdgeRelation::Extends, 1.0) + rt.link(&tok, a.id, b.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); - rt.link(None, b.id, c.id, EdgeRelation::Extends, 1.0) + rt.link(&tok, b.id, c.id, EdgeRelation::Extends, 1.0, None) .await .unwrap(); @@ -212,7 +218,7 @@ async fn traverse_multi_hop() { include_roots: false, }; - let paths = rt.traverse(None, request).await.unwrap(); + let paths = rt.traverse(&tok, request).await.unwrap(); assert!(!paths.is_empty()); // All traversed nodes should be reachable from a @@ -231,35 +237,36 @@ async fn traverse_multi_hop() { #[tokio::test] async fn create_note_and_list_notes() { let rt = rt(); + let tok = rt.authorize(Namespace::local()); rt.create_note( - None, + &tok, "observation", None, "LoRA is a fine-tuning technique", - 0.9, + Some(0.9), None, vec![], ) .await .unwrap(); rt.create_note( - None, + &tok, "observation", None, "QLoRA uses quantization", - 0.8, + Some(0.8), None, vec![], ) .await .unwrap(); rt.create_note( - None, + &tok, "question", None, "Review LoRA paper", - 0.7, + Some(0.7), None, vec![], ) @@ -267,22 +274,23 @@ async fn create_note_and_list_notes() { .unwrap(); let observations = rt - .list_notes(None, Some("observation"), 50, 0) + .list_notes(&tok, Some("observation"), 50, 0) .await .unwrap(); assert_eq!(observations.len(), 2); - let questions = rt.list_notes(None, Some("question"), 50, 0).await.unwrap(); + let questions = rt.list_notes(&tok, Some("question"), 50, 0).await.unwrap(); assert_eq!(questions.len(), 1); assert_eq!(questions[0].content, "Review LoRA paper"); - let all = rt.list_notes(None, None, 50, 0).await.unwrap(); + let all = rt.list_notes(&tok, None, 50, 0).await.unwrap(); assert_eq!(all.len(), 3); } #[tokio::test] async fn create_all_note_kinds() { let rt = rt(); + let tok = rt.authorize(Namespace::local()); for kind in [ "observation", "insight", @@ -290,11 +298,11 @@ async fn create_all_note_kinds() { "decision", "reference", ] { - rt.create_note(None, kind, None, "content", 0.5, None, vec![]) + rt.create_note(&tok, kind, None, "content", Some(0.5), None, vec![]) .await .unwrap(); } - let all = rt.list_notes(None, None, 50, 0).await.unwrap(); + let all = rt.list_notes(&tok, None, 50, 0).await.unwrap(); assert_eq!(all.len(), 5); } @@ -305,24 +313,25 @@ async fn create_all_note_kinds() { #[tokio::test] async fn query_via_gql() { let rt = rt(); + let tok = rt.authorize(Namespace::local()); // Set up entities and edges let lora = rt - .create_entity(None, "concept", "LoRA", None, None, vec![]) + .create_entity(&tok, "concept", None, "LoRA", None, None, vec![]) .await .unwrap(); let qlora = rt - .create_entity(None, "concept", "QLoRA", None, None, vec![]) + .create_entity(&tok, "concept", None, "QLoRA", None, None, vec![]) .await .unwrap(); - rt.link(None, qlora.id, lora.id, EdgeRelation::VariantOf, 1.0) + rt.link(&tok, qlora.id, lora.id, EdgeRelation::VariantOf, 1.0, None) .await .unwrap(); // Run a GQL traversal query let rows = rt .query( - None, + &tok, "MATCH (a:concept)-[e:variant_of]->(b:concept) RETURN a, e, b LIMIT 10", ) .await @@ -341,19 +350,27 @@ async fn query_via_gql() { #[tokio::test] async fn namespace_isolation() { let rt = rt(); + let ns_a_tok = rt.authorize(Namespace::parse("ns-a").unwrap()); + let ns_b_tok = rt.authorize(Namespace::parse("ns-b").unwrap()); - rt.create_entity(Some("ns_a"), "concept", "EntityA", None, None, vec![]) + rt.create_entity(&ns_a_tok, "concept", None, "EntityA", None, None, vec![]) .await .unwrap(); - rt.create_entity(Some("ns_b"), "concept", "EntityB", None, None, vec![]) + rt.create_entity(&ns_b_tok, "concept", None, "EntityB", None, None, vec![]) .await .unwrap(); - let a_entities = rt.list_entities(Some("ns_a"), None, 50, 0).await.unwrap(); + let a_entities = rt + .list_entities(&ns_a_tok, None, None, 50, 0) + .await + .unwrap(); assert_eq!(a_entities.len(), 1); assert_eq!(a_entities[0].name, "EntityA"); - let b_entities = rt.list_entities(Some("ns_b"), None, 50, 0).await.unwrap(); + let b_entities = rt + .list_entities(&ns_b_tok, None, None, 50, 0) + .await + .unwrap(); assert_eq!(b_entities.len(), 1); assert_eq!(b_entities[0].name, "EntityB"); } @@ -365,10 +382,12 @@ async fn namespace_isolation() { #[tokio::test] async fn create_entity_indexes_into_text_search() { let rt = KhiveRuntime::memory().expect("in-memory runtime"); + let tok = rt.authorize(Namespace::local()); let entity = rt .create_entity( - None, + &tok, "concept", + None, "FlashAttention", Some("efficient attention mechanism"), None, @@ -377,7 +396,7 @@ async fn create_entity_indexes_into_text_search() { .await .unwrap(); let hits = rt - .hybrid_search(None, "FlashAttention", None, 10, None) + .hybrid_search(&tok, "FlashAttention", None, 10, None, None) .await .unwrap(); assert!( @@ -390,8 +409,17 @@ async fn create_entity_indexes_into_text_search() { async fn create_entity_no_embedding_model_does_not_propagate_vector_error() { // KhiveRuntime::memory() has embedding_model: None — vector indexing is silently skipped. let rt = KhiveRuntime::memory().expect("in-memory runtime"); + let tok = rt.authorize(Namespace::local()); let result = rt - .create_entity(None, "concept", "SilentVectorSkip", None, None, vec![]) + .create_entity( + &tok, + "concept", + None, + "SilentVectorSkip", + None, + None, + vec![], + ) .await; assert!( result.is_ok(), @@ -407,10 +435,12 @@ async fn create_entity_no_embedding_model_does_not_propagate_vector_error() { #[tokio::test] async fn hybrid_search_excludes_soft_deleted_entities() { let rt = KhiveRuntime::memory().expect("in-memory runtime"); + let tok = rt.authorize(Namespace::local()); let entity = rt .create_entity( - None, + &tok, "concept", + None, "SoftDeleteMe", Some("entity that will be soft-deleted"), None, @@ -421,7 +451,7 @@ async fn hybrid_search_excludes_soft_deleted_entities() { // Confirm the entity is visible before deletion. let hits_before = rt - .hybrid_search(None, "SoftDeleteMe", None, 10, None) + .hybrid_search(&tok, "SoftDeleteMe", None, 10, None, None) .await .unwrap(); assert!( @@ -429,10 +459,10 @@ async fn hybrid_search_excludes_soft_deleted_entities() { "entity should appear in hybrid_search before soft-delete" ); - rt.delete_entity(None, entity.id, false).await.unwrap(); // soft delete + rt.delete_entity(&tok, entity.id, false).await.unwrap(); // soft delete let hits_after = rt - .hybrid_search(None, "SoftDeleteMe", None, 10, None) + .hybrid_search(&tok, "SoftDeleteMe", None, 10, None, None) .await .unwrap(); assert!( @@ -445,10 +475,12 @@ async fn hybrid_search_excludes_soft_deleted_entities() { #[tokio::test] async fn hybrid_search_excludes_hard_deleted_entities() { let rt = KhiveRuntime::memory().expect("in-memory runtime"); + let tok = rt.authorize(Namespace::local()); let entity = rt .create_entity( - None, + &tok, "concept", + None, "HardDeleteMe", Some("entity that will be hard-deleted"), None, @@ -458,7 +490,7 @@ async fn hybrid_search_excludes_hard_deleted_entities() { .unwrap(); let hits_before = rt - .hybrid_search(None, "HardDeleteMe", None, 10, None) + .hybrid_search(&tok, "HardDeleteMe", None, 10, None, None) .await .unwrap(); assert!( @@ -466,12 +498,12 @@ async fn hybrid_search_excludes_hard_deleted_entities() { "entity should appear in hybrid_search before hard-delete" ); - rt.delete_entity(None, entity.id, true).await.unwrap(); // hard delete + rt.delete_entity(&tok, entity.id, true).await.unwrap(); // hard delete // Hard-deleted rows are gone from the entity store; the FTS/vector indexes may still // have stale entries. The soft-delete filter sees no alive entity and drops the hit. let hits_after = rt - .hybrid_search(None, "HardDeleteMe", None, 10, None) + .hybrid_search(&tok, "HardDeleteMe", None, 10, None, None) .await .unwrap(); assert!( @@ -486,32 +518,33 @@ async fn list_notes_excludes_soft_deleted() { use khive_storage::types::DeleteMode; let rt = KhiveRuntime::memory().expect("in-memory runtime"); + let tok = rt.authorize(Namespace::local()); let note = rt .create_note( - None, + &tok, "observation", None, "soft-delete-test", - 0.9, + Some(0.9), None, vec![], ) .await .unwrap(); - let notes_before = rt.list_notes(None, None, 50, 0).await.unwrap(); + let notes_before = rt.list_notes(&tok, None, 50, 0).await.unwrap(); assert!( notes_before.iter().any(|n| n.id == note.id), "note should appear before soft-delete" ); - rt.notes(None) + rt.notes(&tok) .unwrap() .delete_note(note.id, DeleteMode::Soft) .await .unwrap(); - let notes_after = rt.list_notes(None, None, 50, 0).await.unwrap(); + let notes_after = rt.list_notes(&tok, None, 50, 0).await.unwrap(); assert!( !notes_after.iter().any(|n| n.id == note.id), "soft-deleted note must not appear in list" @@ -530,13 +563,15 @@ async fn file_backed_runtime_persists() { { let config = RuntimeConfig { db_path: Some(path.clone()), - default_namespace: "local".to_string(), + default_namespace: Namespace::local(), embedding_model: None, gate: std::sync::Arc::new(khive_runtime::AllowAllGate), packs: vec!["kg".to_string()], + backend_id: khive_runtime::BackendId::main(), }; let rt = KhiveRuntime::new(config).unwrap(); - rt.create_entity(None, "concept", "Persistent", None, None, vec![]) + let tok = rt.authorize(Namespace::local()); + rt.create_entity(&tok, "concept", None, "Persistent", None, None, vec![]) .await .unwrap(); } @@ -545,14 +580,110 @@ async fn file_backed_runtime_persists() { { let config = RuntimeConfig { db_path: Some(path.clone()), - default_namespace: "local".to_string(), + default_namespace: Namespace::local(), embedding_model: None, gate: std::sync::Arc::new(khive_runtime::AllowAllGate), packs: vec!["kg".to_string()], + backend_id: khive_runtime::BackendId::main(), }; let rt = KhiveRuntime::new(config).unwrap(); - let entities = rt.list_entities(None, None, 50, 0).await.unwrap(); + let tok = rt.authorize(Namespace::local()); + let entities = rt.list_entities(&tok, None, None, 50, 0).await.unwrap(); assert_eq!(entities.len(), 1); assert_eq!(entities[0].name, "Persistent"); } } + +// ============================================================================= +// F218 integration: synthetic observed_as_* edge end-to-end (CRIT-1 regression) +// ============================================================================= + +/// This test is the ONLY test that would have caught CRIT-1 (wrong JOIN target). +/// +/// It seeds a real event + event_observations row and executes the canonical +/// ADR-041 §11 synthetic-edge GQL query end-to-end against an in-memory SQLite +/// database. The old code joined `event_observations.event_id = entities.id`, +/// which can never match because the two ID spaces are disjoint. +#[tokio::test] +async fn synthetic_edge_observed_as_selected_returns_memory_note() { + let rt = rt(); + let tok = rt.authorize(Namespace::local()); + let ns = "local"; + + // Step 1: create a memory note (the observed entity). + let memory_note = rt + .create_note( + &tok, + "memory", + None, + "recalled memory content", + Some(0.9), + None, + vec![], + ) + .await + .unwrap(); + let memory_id = memory_note.id; + + // Step 2: create an event of kind RerankExecuted with a payload that + // includes `selected: [memory_id]`. The storage layer's `append_event` + // implementation calls `decode_rank_observations`, which reads + // `payload["selected"]` and inserts a row into `event_observations` with + // role="selected" and entity_id=memory_id. + let event_store = rt.events(&tok).unwrap(); + let mut event = Event::new( + ns, + "rerank", + EventKind::RerankExecuted, + SubstrateKind::Note, + "agent:test", + ); + event.payload = serde_json::json!({ + "candidates": [], + "selected": [memory_id.to_string()] + }); + event_store.append_event(event).await.unwrap(); + + // Step 3: execute the canonical ADR-041 §11 GQL query. + // Before CRIT-1 fix: `FROM entities n0 JOIN event_observations e0 ON e0.event_id = n0.id` + // — IDs are disjoint, so zero rows returned. + // After fix: `FROM events n0 JOIN event_observations e0 ON e0.event_id = n0.id` + // — correct join; the memory note is returned. + let rows = rt + .query( + &tok, + "MATCH (ev)-[:observed_as_selected]->(m:memory) RETURN m", + ) + .await + .unwrap(); + + assert!( + !rows.is_empty(), + "CRIT-1: synthetic edge query must return at least one row (memory note was seeded); \ + got 0 rows — event_observations join is broken" + ); + + // Verify the returned row contains our memory note's UUID. + let memory_id_str = memory_id.to_string(); + let found = rows.iter().any(|row| { + row.columns.iter().any(|col| { + if let khive_storage::types::SqlValue::Text(s) = &col.value { + s.contains(&memory_id_str) + } else { + false + } + }) + }); + assert!( + found, + "CRIT-1: returned rows must include the seeded memory note id {}; columns: {:?}", + memory_id, + rows.iter() + .map(|r| r + .columns + .iter() + .map(|c| (&c.name, &c.value)) + .collect::>()) + .collect::>() + ); +} diff --git a/crates/khive-score/src/lib.rs b/crates/khive-score/src/lib.rs index 4694a54c..96393acf 100644 --- a/crates/khive-score/src/lib.rs +++ b/crates/khive-score/src/lib.rs @@ -8,7 +8,6 @@ mod comparator; mod ops; -mod quantkey; mod score; pub use comparator::{cmp_asc_then_id, cmp_desc_then_id, Ranked}; @@ -16,5 +15,4 @@ pub use ops::{ avg_scores, avg_scores_checked, max_score, min_score, rrf_score, sum_scores, weighted_sum, ScoreError, }; -pub use quantkey::QuantKey; pub use score::DeterministicScore; diff --git a/crates/khive-score/src/ops.rs b/crates/khive-score/src/ops.rs index c94fdd16..5ab0bd08 100644 --- a/crates/khive-score/src/ops.rs +++ b/crates/khive-score/src/ops.rs @@ -111,6 +111,8 @@ pub fn rrf_score(rank: usize, k: usize) -> DeterministicScore { DeterministicScore::from_f64(1.0 / (denominator as f64)) } +const SCALE_RAW: i128 = 4_294_967_296; // 2^32 — matches DeterministicScore::SCALE + #[inline] pub fn weighted_sum( scores: &[DeterministicScore], @@ -123,14 +125,18 @@ pub fn weighted_sum( second_len: weights.len(), }); } - let mut acc = DeterministicScore::ZERO; + let mut acc = 0i128; for (index, (&score, &weight)) in scores.iter().zip(weights.iter()).enumerate() { if !weight.is_finite() { return Err(ScoreError::NonFiniteWeight { index }); } - acc = acc + score * weight; + let w = DeterministicScore::from_f64(weight); + acc += (score.to_raw() as i128 * w.to_raw() as i128) / SCALE_RAW; } - Ok(acc) + Ok(DeterministicScore::from_raw(acc.clamp( + DeterministicScore::NEG_INF.to_raw() as i128, + DeterministicScore::MAX.to_raw() as i128, + ) as i64)) } #[cfg(test)] diff --git a/crates/khive-score/src/quantkey.rs b/crates/khive-score/src/quantkey.rs deleted file mode 100644 index 0fed7701..00000000 --- a/crates/khive-score/src/quantkey.rs +++ /dev/null @@ -1,135 +0,0 @@ -//! Lightweight quantized score key for hot loops (8 bytes). -//! -//! Packs a 32-bit quantized score + 32-bit ID prefix into 8 bytes -//! per ADR-006. NaN → 0 (neutral), matching DeterministicScore. - -use std::cmp::Ordering; -use std::hash::{Hash, Hasher}; - -/// 8-byte packed sort key: i32 quantized score + u32 ID prefix. -/// -/// For sort-only operations where the full DeterministicScore is not needed. -/// Score descending, lower ID prefix wins ties. -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -pub struct QuantKey { - q: i32, - id_prefix: u32, -} - -impl Hash for QuantKey { - fn hash(&self, state: &mut H) { - self.q.hash(state); - self.id_prefix.hash(state); - } -} - -impl QuantKey { - const SCALE: f32 = 1_000_000.0; - - #[inline] - pub fn new(score: f32, id_prefix: u32) -> Self { - let s = if score.is_nan() { 0.0 } else { score }; - let q = (s * Self::SCALE) - .round() - .clamp(i32::MIN as f32, i32::MAX as f32) as i32; - Self { q, id_prefix } - } - - #[inline] - pub fn from_f64(score: f64, id_prefix: u32) -> Self { - Self::new(score as f32, id_prefix) - } - - #[inline] - pub fn quantized_score(&self) -> i32 { - self.q - } - - #[inline] - pub fn score(&self) -> f32 { - self.q as f32 / Self::SCALE - } - - #[inline] - pub fn id_prefix(&self) -> u32 { - self.id_prefix - } -} - -impl Ord for QuantKey { - #[inline] - fn cmp(&self, other: &Self) -> Ordering { - self.q - .cmp(&other.q) - .then_with(|| other.id_prefix.cmp(&self.id_prefix)) - } -} - -impl PartialOrd for QuantKey { - #[inline] - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::collections::BinaryHeap; - - #[test] - fn size_is_8_bytes() { - assert_eq!(std::mem::size_of::(), 8); - } - - #[test] - fn precision() { - let a = QuantKey::new(0.123456, 1); - let b = QuantKey::new(0.123457, 2); - assert_ne!(a.quantized_score(), b.quantized_score()); - } - - #[test] - fn heap_order() { - let mut heap: BinaryHeap = BinaryHeap::new(); - heap.push(QuantKey::new(0.95, 3)); - heap.push(QuantKey::new(0.95, 1)); - heap.push(QuantKey::new(0.95, 2)); - heap.push(QuantKey::new(0.87, 4)); - - assert_eq!(heap.pop().unwrap().id_prefix(), 1); - assert_eq!(heap.pop().unwrap().id_prefix(), 2); - assert_eq!(heap.pop().unwrap().id_prefix(), 3); - assert_eq!(heap.pop().unwrap().id_prefix(), 4); - } - - #[test] - fn nan_maps_to_zero() { - let nan_key = QuantKey::new(f32::NAN, 1); - let zero_key = QuantKey::new(0.0, 1); - assert_eq!(nan_key.quantized_score(), zero_key.quantized_score()); - } - - #[test] - fn clamp_high_score() { - let key = QuantKey::new(f32::MAX, 0); - assert_eq!(key.quantized_score(), i32::MAX); - } - - #[test] - fn clamp_low_score() { - let key = QuantKey::new(f32::MIN, 0); - assert_eq!(key.quantized_score(), i32::MIN); - } - - #[test] - fn from_f64_roundtrip_approx() { - let key = QuantKey::from_f64(0.5, 7); - assert!( - (key.score() - 0.5_f32).abs() < 1e-5, - "score was {}", - key.score() - ); - assert_eq!(key.id_prefix(), 7); - } -} diff --git a/crates/khive-score/src/score.rs b/crates/khive-score/src/score.rs index 5ed47180..bb17f3ef 100644 --- a/crates/khive-score/src/score.rs +++ b/crates/khive-score/src/score.rs @@ -19,6 +19,13 @@ impl DeterministicScore { const SCALE: f64 = 4_294_967_296.0; // 2^32 pub const MAX: Self = Self(i64::MAX); + /// Reserved raw sentinel at `i64::MIN`. Public arithmetic and float conversion + /// never produce this value — see `NEG_INF` for the lowest reachable score. + /// Lean proof: `MIN` is the reserved NaN sentinel; runtime values are + /// `RuntimeValid` (NEG_INF ≤ x ≤ MAX) and disjoint from `MIN`. + pub const MIN: Self = Self(i64::MIN); + /// Lowest reachable runtime score (= `i64::MIN + 1`). Underflow clamps here, + /// `-Infinity` maps here. Distinct from `MIN`, which is reserved. pub const NEG_INF: Self = Self(i64::MIN + 1); pub const ZERO: Self = Self(0); @@ -67,12 +74,14 @@ impl DeterministicScore { #[inline] pub const fn is_infinite(self) -> bool { - self.0 == i64::MAX || self.0 == Self::NEG_INF.0 + self.0 == Self::MAX.0 || self.0 == Self::NEG_INF.0 } + /// Saturating arithmetic clamps to `[NEG_INF, MAX]`. Per the Lean proof, + /// the reserved `MIN` (i64::MIN) sentinel is never produced. #[inline] fn from_arithmetic_raw(raw: i128) -> Self { - if raw >= i64::MAX as i128 { + if raw >= Self::MAX.0 as i128 { Self::MAX } else if raw <= Self::NEG_INF.0 as i128 { Self::NEG_INF @@ -81,6 +90,8 @@ impl DeterministicScore { } } + /// Float conversion: NaN → ZERO, +Inf → MAX, -Inf → NEG_INF, finite → clamped + /// to `[NEG_INF, MAX]`. Reserved `MIN` is never produced. #[inline] fn from_rounded_arithmetic(raw: f64) -> Self { if raw.is_nan() { @@ -89,9 +100,9 @@ impl DeterministicScore { Self::MAX } else if !raw.is_finite() { Self::NEG_INF - } else if raw >= i64::MAX as f64 { + } else if raw >= Self::MAX.0 as f64 { Self::MAX - } else if raw <= i64::MIN as f64 { + } else if raw <= Self::NEG_INF.0 as f64 { Self::NEG_INF } else { Self(raw as i64) @@ -341,4 +352,37 @@ mod tests { let s = DeterministicScore::from_f64(1.0); assert_eq!(s * f64::NAN, DeterministicScore::ZERO); } + + // NEG_INF = i64::MIN + 1; MIN (i64::MIN) is reserved sentinel (Lean: `MIN`) + #[test] + fn neg_inf_is_i64_min_plus_one() { + assert_eq!(DeterministicScore::NEG_INF.to_raw(), i64::MIN + 1); + } + + #[test] + fn min_sentinel_is_i64_min() { + assert_eq!(DeterministicScore::MIN.to_raw(), i64::MIN); + } + + #[test] + fn min_sentinel_distinct_from_neg_inf() { + assert_ne!(DeterministicScore::MIN, DeterministicScore::NEG_INF); + assert!(DeterministicScore::MIN < DeterministicScore::NEG_INF); + } + + #[test] + fn neg_infinity_maps_to_neg_inf() { + assert_eq!( + DeterministicScore::from_f64(f64::NEG_INFINITY), + DeterministicScore::NEG_INF + ); + } + + #[test] + fn underflow_clamps_to_neg_inf_not_min() { + // Arithmetic must clamp at NEG_INF (= i64::MIN + 1), never produce MIN. + let result = DeterministicScore::from_raw(i64::MIN + 1) - DeterministicScore::from_raw(1); + assert_eq!(result, DeterministicScore::NEG_INF); + assert_ne!(result, DeterministicScore::MIN); + } } diff --git a/crates/khive-storage/Cargo.toml b/crates/khive-storage/Cargo.toml index c23d79cf..3a0653d4 100644 --- a/crates/khive-storage/Cargo.toml +++ b/crates/khive-storage/Cargo.toml @@ -12,8 +12,8 @@ categories.workspace = true [dependencies] async-trait = { workspace = true } chrono = { workspace = true } -khive-score = { version = "0.2.0", path = "../khive-score" } -khive-types = { version = "0.2.0", path = "../khive-types", features = ["serde"] } +khive-score = { version = "0.2.1", path = "../khive-score" } +khive-types = { version = "0.2.1", path = "../khive-types", features = ["serde"] } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } thiserror = { workspace = true } diff --git a/crates/khive-storage/src/capability.rs b/crates/khive-storage/src/capability.rs index 500d0c29..66338dd8 100644 --- a/crates/khive-storage/src/capability.rs +++ b/crates/khive-storage/src/capability.rs @@ -5,10 +5,10 @@ pub enum StorageCapability { Sql, Notes, + Entities, + Graph, + Events, Vectors, + Sparse, Text, - Graph, - Event, - Entities, - Admin, } diff --git a/crates/khive-storage/src/entity.rs b/crates/khive-storage/src/entity.rs index 00951d5f..98058e24 100644 --- a/crates/khive-storage/src/entity.rs +++ b/crates/khive-storage/src/entity.rs @@ -14,6 +14,8 @@ pub struct Entity { pub id: Uuid, pub namespace: String, pub kind: String, + /// Pack-governed subtype token. Maps to `entities.entity_type` column. + pub entity_type: Option, pub name: String, pub description: Option, pub properties: Option, @@ -21,6 +23,10 @@ pub struct Entity { pub created_at: i64, pub updated_at: i64, pub deleted_at: Option, + /// When this entity was tombstoned by a merge, the `into` entity's ID. + pub merged_into: Option, + /// Opaque event ID for the merge that tombstoned this entity. + pub merge_event_id: Option, } impl Entity { @@ -34,6 +40,7 @@ impl Entity { id: Uuid::new_v4(), namespace: namespace.into(), kind: kind.into(), + entity_type: None, name: name.into(), description: None, properties: None, @@ -41,9 +48,16 @@ impl Entity { created_at: now, updated_at: now, deleted_at: None, + merged_into: None, + merge_event_id: None, } } + pub fn with_entity_type(mut self, t: Option>) -> Self { + self.entity_type = t.map(Into::into); + self + } + pub fn with_description(mut self, d: impl Into) -> Self { self.description = Some(d.into()); self @@ -65,6 +79,8 @@ impl Entity { pub struct EntityFilter { pub ids: Vec, pub kinds: Vec, + /// Filter by exact `entity_type` value. Multiple values are ORed. + pub entity_types: Vec, pub name_prefix: Option, pub tags_any: Vec, } diff --git a/crates/khive-storage/src/event.rs b/crates/khive-storage/src/event.rs index 9bfc3587..44b86060 100644 --- a/crates/khive-storage/src/event.rs +++ b/crates/khive-storage/src/event.rs @@ -5,12 +5,12 @@ use serde::{Deserialize, Serialize}; use serde_json::Value; use uuid::Uuid; -use khive_types::{EventOutcome, SubstrateKind}; +use khive_types::{EventKind, EventOutcome, SubstrateKind}; use crate::types::{BatchWriteSummary, Page, PageRequest, StorageResult}; /// Storage-level event record. Every verb execution produces one. -/// Immutable once appended — no update or soft-delete. +/// Immutable once appended; projection rows are written beside it at append time. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Event { pub id: Uuid, @@ -18,10 +18,16 @@ pub struct Event { pub verb: String, pub substrate: SubstrateKind, pub actor: String, + pub kind: EventKind, pub outcome: EventOutcome, - pub data: Option, + pub payload: Value, + pub payload_schema_version: u32, + pub profile_state_version: Option, pub duration_us: i64, pub target_id: Option, + pub session_id: Option, + pub aggregate_kind: Option, + pub aggregate_id: Option, pub created_at: i64, } @@ -29,6 +35,7 @@ impl Event { pub fn new( namespace: impl Into, verb: impl Into, + kind: EventKind, substrate: SubstrateKind, actor: impl Into, ) -> Self { @@ -38,10 +45,16 @@ impl Event { verb: verb.into(), substrate, actor: actor.into(), + kind, outcome: EventOutcome::Success, - data: None, + payload: Value::Object(Default::default()), + payload_schema_version: 1, + profile_state_version: None, duration_us: 0, target_id: None, + session_id: None, + aggregate_kind: None, + aggregate_id: None, created_at: chrono::Utc::now().timestamp_micros(), } } @@ -51,8 +64,18 @@ impl Event { self } - pub fn with_data(mut self, d: Value) -> Self { - self.data = Some(d); + pub fn with_payload(mut self, payload: Value) -> Self { + self.payload = payload; + self + } + + pub fn with_payload_schema_version(mut self, version: u32) -> Self { + self.payload_schema_version = version; + self + } + + pub fn with_profile_state_version(mut self, version: u64) -> Self { + self.profile_state_version = Some(version); self } @@ -65,18 +88,84 @@ impl Event { self.target_id = Some(id); self } + + pub fn with_session_id(mut self, id: Uuid) -> Self { + self.session_id = Some(id); + self + } + + pub fn with_aggregate(mut self, kind: impl Into, id: Uuid) -> Self { + self.aggregate_kind = Some(kind.into()); + self.aggregate_id = Some(id); + self + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ReferentKind { + Entity, + Note, +} + +impl ReferentKind { + pub const fn name(self) -> &'static str { + match self { + Self::Entity => "entity", + Self::Note => "note", + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ObservationRole { + Candidate, + Selected, + Target, + Signal, +} + +impl ObservationRole { + pub const fn name(self) -> &'static str { + match self { + Self::Candidate => "candidate", + Self::Selected => "selected", + Self::Target => "target", + Self::Signal => "signal", + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct EventObservation { + pub event_id: Uuid, + pub entity_id: Uuid, + pub referent_kind: ReferentKind, + pub role: ObservationRole, + pub position: u32, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct EventView { + pub event: Event, + pub observations: Vec, } -/// Filter for querying events. +/// Filter for querying events. Namespace is implicit in the scoped EventStore. #[derive(Clone, Debug, Default, Serialize, Deserialize)] pub struct EventFilter { pub ids: Vec, + pub kinds: Vec, pub verbs: Vec, pub substrates: Vec, pub actors: Vec, - pub namespaces: Vec, pub after: Option, pub before: Option, + pub session_id: Option, + pub observed: Vec, + pub selected: Vec, + pub payload_proposal_id: Option, } #[async_trait] diff --git a/crates/khive-storage/src/graph.rs b/crates/khive-storage/src/graph.rs index 7c802a49..16360eaa 100644 --- a/crates/khive-storage/src/graph.rs +++ b/crates/khive-storage/src/graph.rs @@ -4,7 +4,7 @@ use async_trait::async_trait; use uuid::Uuid; use crate::types::{ - BatchWriteSummary, Edge, EdgeFilter, EdgeSortField, GraphPath, LinkId, NeighborHit, + BatchWriteSummary, DeleteMode, Edge, EdgeFilter, EdgeSortField, GraphPath, LinkId, NeighborHit, NeighborQuery, Page, PageRequest, SortOrder, StorageResult, TraversalRequest, }; @@ -13,7 +13,7 @@ pub trait GraphStore: Send + Sync + 'static { async fn upsert_edge(&self, edge: Edge) -> StorageResult<()>; async fn upsert_edges(&self, edges: Vec) -> StorageResult; async fn get_edge(&self, id: LinkId) -> StorageResult>; - async fn delete_edge(&self, id: LinkId) -> StorageResult; + async fn delete_edge(&self, id: LinkId, mode: DeleteMode) -> StorageResult; async fn query_edges( &self, filter: EdgeFilter, diff --git a/crates/khive-storage/src/lib.rs b/crates/khive-storage/src/lib.rs index c1f0d4c6..652339cd 100644 --- a/crates/khive-storage/src/lib.rs +++ b/crates/khive-storage/src/lib.rs @@ -16,6 +16,7 @@ pub mod error; pub mod event; pub mod graph; pub mod note; +pub mod sparse; pub mod sql; pub mod text; pub mod types; @@ -25,9 +26,12 @@ pub use capability::StorageCapability; pub use entity::{Entity, EntityFilter, EntityStore}; pub use error::StorageError; -pub use event::{Event, EventFilter, EventStore}; +pub use event::{ + Event, EventFilter, EventObservation, EventStore, EventView, ObservationRole, ReferentKind, +}; pub use graph::GraphStore; pub use note::{Note, NoteStore}; +pub use sparse::SparseStore; pub use sql::{SqlAccess, SqlReader, SqlTransaction, SqlWriter}; pub use text::TextSearch; pub use types::StorageResult; @@ -35,11 +39,13 @@ pub use vectors::VectorStore; pub use types::{ BatchWriteSummary, DeleteMode, Direction, Edge, EdgeFilter, EdgeSortField, GraphPath, - IndexRebuildScope, LinkId, NeighborHit, NeighborQuery, Page, PageRequest, PathNode, - SortDirection, SortOrder, SqlIsolation, SqlRow, SqlStatement, SqlTxOptions, SqlValue, - TextDocument, TextFilter, TextIndexStats, TextQueryMode, TextSearchHit, TextSearchRequest, - TimeRange, TraversalOptions, TraversalRequest, VectorIndexKind, VectorMetadataFilter, - VectorRecord, VectorSearchHit, VectorSearchRequest, VectorStoreCapabilities, VectorStoreInfo, + IndexRebuildScope, LinkId, NeighborHit, NeighborQuery, OrphanSweepConfig, OrphanSweepResult, + Page, PageRequest, PathNode, PropertyFilter, PropertyOp, SortDirection, SortOrder, + SparseRecord, SparseSearchHit, SparseSearchRequest, SparseVector, SqlIsolation, SqlRow, + SqlStatement, SqlTxOptions, SqlValue, TextDocument, TextFilter, TextIndexStats, TextQueryMode, + TextSearchHit, TextSearchRequest, TimeRange, TraversalOptions, TraversalRequest, + VectorIndexKind, VectorMetadataFilter, VectorRecord, VectorSearchHit, VectorSearchRequest, + VectorStoreCapabilities, VectorStoreInfo, }; pub use khive_types::{EdgeCategory, EdgeRelation, EventOutcome, SubstrateKind}; diff --git a/crates/khive-storage/src/note.rs b/crates/khive-storage/src/note.rs index 87825b2c..2e373a4a 100644 --- a/crates/khive-storage/src/note.rs +++ b/crates/khive-storage/src/note.rs @@ -13,10 +13,11 @@ pub struct Note { pub id: Uuid, pub namespace: String, pub kind: String, + pub status: String, pub name: Option, pub content: String, - pub salience: f64, - pub decay_factor: f64, + pub salience: Option, + pub decay_factor: Option, pub expires_at: Option, pub properties: Option, pub created_at: i64, @@ -35,10 +36,11 @@ impl Note { id: Uuid::new_v4(), namespace: namespace.into(), kind: kind.into(), + status: "active".to_string(), name: None, content: content.into(), - salience: 0.5, - decay_factor: 0.0, + salience: None, + decay_factor: None, expires_at: None, properties: None, created_at: now, @@ -53,12 +55,12 @@ impl Note { } pub fn with_salience(mut self, s: f64) -> Self { - self.salience = s.clamp(0.0, 1.0); + self.salience = Some(s.clamp(0.0, 1.0)); self } pub fn with_decay(mut self, d: f64) -> Self { - self.decay_factor = d.max(0.0); + self.decay_factor = Some(d.max(0.0)); self } @@ -91,13 +93,4 @@ pub trait NoteStore: Send + Sync + 'static { } Ok(out) } - - async fn upsert_note_if_below_quota(&self, note: Note, max_notes: u64) -> StorageResult { - let count = self.count_notes(¬e.namespace, None).await?; - if count >= max_notes { - return Ok(false); - } - self.upsert_note(note).await?; - Ok(true) - } } diff --git a/crates/khive-storage/src/sparse.rs b/crates/khive-storage/src/sparse.rs new file mode 100644 index 00000000..618e6c0e --- /dev/null +++ b/crates/khive-storage/src/sparse.rs @@ -0,0 +1,34 @@ +//! Sparse vector storage and lexical-semantic search capability (ADR-031). + +use async_trait::async_trait; +use uuid::Uuid; + +use khive_types::SubstrateKind; + +use crate::types::{ + BatchWriteSummary, SparseRecord, SparseSearchHit, SparseSearchRequest, SparseVector, + StorageResult, +}; + +#[async_trait] +pub trait SparseStore: Send + Sync + 'static { + async fn insert_sparse( + &self, + subject_id: Uuid, + kind: SubstrateKind, + namespace: &str, + field: &str, + vector: SparseVector, + ) -> StorageResult<()>; + + async fn insert_batch(&self, records: Vec) -> StorageResult; + + async fn delete(&self, subject_id: Uuid) -> StorageResult; + + async fn search_sparse( + &self, + request: SparseSearchRequest, + ) -> StorageResult>; + + async fn count(&self) -> StorageResult; +} diff --git a/crates/khive-storage/src/types.rs b/crates/khive-storage/src/types.rs index 0a37b259..70430009 100644 --- a/crates/khive-storage/src/types.rs +++ b/crates/khive-storage/src/types.rs @@ -108,7 +108,7 @@ pub enum VectorIndexKind { Flat, } -/// Backend capability declaration for vector stores (ADR-041). +/// Backend capability declaration for vector stores (ADR-041, ADR-044). /// /// Returned by [`VectorStore::capabilities`]. Higher-level retrieval policy /// (hybrid search, HyDE fan-out, etc.) introspects this struct at construction @@ -123,50 +123,138 @@ pub struct VectorStoreCapabilities { pub supports_quantization: bool, /// Supports in-place update without a delete+insert round-trip. pub supports_update: bool, + /// Supports orphan sweep (deleting vectors with no live subject). + pub supports_orphan_sweep: bool, + /// Supports multiple named fields per subject (e.g. `entity.title` and + /// `entity.body` stored as separate vectors). sqlite-vec backends use a + /// `subject_id PRIMARY KEY` table and therefore only support one vector + /// per subject per namespace — this field is `false` for those backends. + #[serde(default)] + pub supports_multi_field: bool, /// Maximum supported embedding dimension, or `None` if unbounded. pub max_dimensions: Option, /// Index algorithms available in this backend. pub index_kinds: Vec, } -/// A typed predicate for backend-pushable metadata filtering (ADR-041). -/// -/// Intentionally minimal: namespace isolation and kind scoping cover the v0.2 -/// hybrid-search cases. Range predicates and compound logic are deferred to a -/// future retrieval ADR. Adding fields is non-breaking (serde defaults); removing -/// fields is not. +/// A typed predicate for backend-pushable metadata filtering (ADR-041, ADR-044). #[derive(Clone, Debug, Default, Serialize, Deserialize)] pub struct VectorMetadataFilter { /// Restrict to these namespaces. pub namespaces: Vec, /// Restrict to these substrate kinds. pub kinds: Vec, - /// Arbitrary key=value metadata predicates (equality only). - pub properties: Vec<(String, serde_json::Value)>, + /// Typed property predicates (ADR-044). + pub property_filters: Vec, } impl VectorMetadataFilter { /// Returns `true` when no predicates are set (filter is a no-op). pub fn is_empty(&self) -> bool { - self.namespaces.is_empty() && self.kinds.is_empty() && self.properties.is_empty() + self.namespaces.is_empty() && self.kinds.is_empty() && self.property_filters.is_empty() } } +/// A single typed metadata predicate (ADR-044). +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct PropertyFilter { + pub key: String, + pub op: PropertyOp, + pub value: serde_json::Value, +} + +/// Comparison operators for [`PropertyFilter`] (ADR-044). +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum PropertyOp { + Eq, + Ne, + In, + Range, + Exists, +} + #[derive(Clone, Debug, Serialize, Deserialize)] pub struct VectorRecord { pub subject_id: Uuid, pub kind: SubstrateKind, pub namespace: String, - pub embedding: Vec, + /// Which embedding field this record represents (e.g. `"entity.body"`). + pub field: String, + /// One or many dense vectors; sqlite-vec backends enforce `vectors.len() == 1`. + pub vectors: Vec>, pub updated_at: DateTime, } #[derive(Clone, Debug, Serialize, Deserialize)] pub struct VectorSearchRequest { - pub query_embedding: Vec, + /// One or many query vectors; sqlite-vec backends enforce `query_vectors.len() == 1`. + pub query_vectors: Vec>, pub top_k: u32, pub namespace: Option, pub kind: Option, + /// Optional metadata filter for backends that support pushdown. + pub filter: Option, + /// Backend-specific hints (opaque JSON blob, ignored by default). + pub backend_hints: Option, +} + +/// Configuration for an orphan-sweep pass (ADR-044). +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct OrphanSweepConfig { + /// Optional allowlist of subject IDs to check. `None` = scan all rows. + /// `Some(ids)` restricts the sweep to only those IDs; rows not in the list + /// are untouched even if orphaned (ADR-044 §5). + pub subject_id_allowlist: Option>, + pub namespaces: Vec, + pub substrate_kinds: Vec, + pub max_delete: u32, + pub dry_run: bool, +} + +/// Result of an orphan-sweep pass (ADR-044). +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct OrphanSweepResult { + pub scanned: u64, + pub deleted: u64, + pub would_delete: u64, + pub max_delete_hit: bool, +} + +// -- Sparse vector types (ADR-031) -- + +/// A sparse vector represented as parallel indices and values arrays. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] +pub struct SparseVector { + /// Dimension indices (must be strictly increasing). + pub indices: Vec, + /// Corresponding non-zero values (must be finite). + pub values: Vec, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct SparseRecord { + pub subject_id: Uuid, + pub kind: SubstrateKind, + pub namespace: String, + pub field: String, + pub vector: SparseVector, + pub updated_at: DateTime, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct SparseSearchRequest { + pub query: SparseVector, + pub top_k: u32, + pub namespace: Option, + pub kind: Option, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct SparseSearchHit { + pub subject_id: Uuid, + pub score: khive_score::DeterministicScore, + pub rank: u32, } #[derive(Clone, Debug, Serialize, Deserialize)] @@ -297,12 +385,16 @@ impl fmt::Display for LinkId { #[derive(Clone, Debug, Serialize, Deserialize)] pub struct Edge { pub id: LinkId, + pub namespace: String, pub source_id: Uuid, pub target_id: Uuid, pub relation: EdgeRelation, pub weight: f64, pub created_at: DateTime, + pub updated_at: DateTime, + pub deleted_at: Option>, pub metadata: Option, + pub target_backend: Option, } #[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)] diff --git a/crates/khive-storage/src/vectors.rs b/crates/khive-storage/src/vectors.rs index 9ee18bde..95bf1161 100644 --- a/crates/khive-storage/src/vectors.rs +++ b/crates/khive-storage/src/vectors.rs @@ -10,8 +10,9 @@ use khive_types::SubstrateKind; use crate::capability::StorageCapability; use crate::error::StorageError; use crate::types::{ - BatchWriteSummary, IndexRebuildScope, StorageResult, VectorIndexKind, VectorMetadataFilter, - VectorRecord, VectorSearchHit, VectorSearchRequest, VectorStoreCapabilities, VectorStoreInfo, + BatchWriteSummary, IndexRebuildScope, OrphanSweepConfig, OrphanSweepResult, StorageResult, + VectorIndexKind, VectorMetadataFilter, VectorRecord, VectorSearchHit, VectorSearchRequest, + VectorStoreCapabilities, VectorStoreInfo, }; #[async_trait] @@ -23,7 +24,8 @@ pub trait VectorStore: Send + Sync + 'static { subject_id: Uuid, kind: SubstrateKind, namespace: &str, - embedding: Vec, + field: &str, + vectors: Vec>, ) -> StorageResult<()>; async fn insert_batch(&self, records: Vec) -> StorageResult; async fn delete(&self, subject_id: Uuid) -> StorageResult; @@ -47,6 +49,8 @@ pub trait VectorStore: Send + Sync + 'static { supports_batch_search: false, supports_quantization: false, supports_update: false, + supports_orphan_sweep: false, + supports_multi_field: false, // sqlite-vec 0.1.9 enforces SQLITE_VEC_VEC0_MAX_DIMENSIONS = 8192. // The baseline uses the same value so generic callers that have not // overridden capabilities() report the correct ceiling. @@ -64,33 +68,40 @@ pub trait VectorStore: Send + Sync + 'static { /// /// Callers must check `capabilities().supports_filter` before calling; the /// runtime layer is responsible for post-filtering when native pushdown is absent. + /// + /// A backend that claims `supports_filter = true` but does not override this + /// method will trigger a `debug_assert` at runtime (ADR-044 §4). async fn search_with_filter( &self, - request: VectorSearchRequest, - filter: VectorMetadataFilter, + request: &VectorSearchRequest, + filter: &VectorMetadataFilter, ) -> StorageResult> { if filter.is_empty() { - return self.search(request).await; + return self.search(request.clone()).await; } + debug_assert!( + !self.capabilities().supports_filter, + "backend claims supports_filter=true but did not override search_with_filter" + ); Err(StorageError::Unsupported { capability: StorageCapability::Vectors, operation: "search_with_filter".into(), - message: "filter pushdown not supported by this backend".into(), + message: "filter pushdown not supported; set supports_filter=true only when overriding this method".into(), }) } /// Search with N query vectors in one round-trip (HyDE fan-out, multi-query). /// - /// Default: sequential calls to [`search`]. Backends that support native batch - /// search (amortising index-walk overhead) should override this and set - /// `supports_batch_search = true` in their [`VectorStoreCapabilities`]. + /// Default: sequential calls to [`search`], isolating per-query errors so one + /// bad request does not abort the batch. Backends that support native batch + /// search should override this and set `supports_batch_search = true`. async fn search_batch( &self, - requests: Vec, - ) -> StorageResult>> { + requests: &[VectorSearchRequest], + ) -> StorageResult>>> { let mut out = Vec::with_capacity(requests.len()); for req in requests { - out.push(self.search(req).await?); + out.push(self.search(req.clone()).await); } Ok(out) } @@ -105,10 +116,25 @@ pub trait VectorStore: Send + Sync + 'static { subject_id: Uuid, kind: SubstrateKind, namespace: &str, - embedding: Vec, + field: &str, + vectors: Vec>, ) -> StorageResult<()> { self.delete(subject_id).await?; - self.insert(subject_id, kind, namespace, embedding).await + self.insert(subject_id, kind, namespace, field, vectors) + .await + } + + /// Remove vectors with no live subject (orphan sweep, ADR-044). + /// + /// Default returns [`StorageError::Unsupported`]. Backends that implement + /// deletion must set `supports_orphan_sweep = true` and override this method. + async fn orphan_sweep(&self, config: &OrphanSweepConfig) -> StorageResult { + let _ = config; + Err(StorageError::Unsupported { + capability: StorageCapability::Vectors, + operation: "orphan_sweep".into(), + message: "this backend does not support orphan sweep".into(), + }) } } @@ -123,8 +149,8 @@ mod tests { use super::*; use crate::error::StorageError; use crate::types::{ - BatchWriteSummary, IndexRebuildScope, VectorIndexKind, VectorMetadataFilter, - VectorSearchHit, VectorSearchRequest, VectorStoreInfo, + BatchWriteSummary, IndexRebuildScope, OrphanSweepConfig, VectorIndexKind, + VectorMetadataFilter, VectorSearchHit, VectorSearchRequest, VectorStoreInfo, }; // -- Minimal test fake -- @@ -170,7 +196,8 @@ mod tests { _subject_id: Uuid, _kind: SubstrateKind, _namespace: &str, - _embedding: Vec, + _field: &str, + _vectors: Vec>, ) -> StorageResult<()> { self.insert_called.store(true, Ordering::SeqCst); if self.fail_insert.load(Ordering::SeqCst) { @@ -248,6 +275,7 @@ mod tests { assert!(!caps.supports_batch_search); assert!(!caps.supports_quantization); assert!(!caps.supports_update); + assert!(!caps.supports_orphan_sweep); // Baseline reports the sqlite-vec hard limit (SQLITE_VEC_VEC0_MAX_DIMENSIONS = 8192). assert_eq!(caps.max_dimensions, Some(8192)); assert_eq!(caps.index_kinds, vec![VectorIndexKind::SqliteVec]); @@ -269,17 +297,21 @@ mod tests { ); } + // -- Test cases -- + #[tokio::test] async fn search_with_filter_empty_filter_delegates_to_search() { let store = TestVectorStore::new(); let req = VectorSearchRequest { - query_embedding: vec![0.1, 0.2, 0.3, 0.4], + query_vectors: vec![vec![0.1, 0.2, 0.3, 0.4]], top_k: 5, namespace: None, kind: None, + filter: None, + backend_hints: None, }; let filter = VectorMetadataFilter::default(); // all fields empty - let result = store.search_with_filter(req, filter).await; + let result = store.search_with_filter(&req, &filter).await; assert!(result.is_ok()); let hits = result.unwrap(); // search() on TestVectorStore returns exactly one hit @@ -290,17 +322,19 @@ mod tests { async fn search_with_filter_non_empty_filter_returns_unsupported() { let store = TestVectorStore::new(); let req = VectorSearchRequest { - query_embedding: vec![0.1, 0.2, 0.3, 0.4], + query_vectors: vec![vec![0.1, 0.2, 0.3, 0.4]], top_k: 5, namespace: None, kind: None, + filter: None, + backend_hints: None, }; let filter = VectorMetadataFilter { namespaces: vec!["ns:agent".into()], kinds: vec![], - properties: vec![], + property_filters: vec![], }; - let result = store.search_with_filter(req, filter).await; + let result = store.search_with_filter(&req, &filter).await; assert!(result.is_err()); let err = result.unwrap_err(); assert!( @@ -314,31 +348,40 @@ mod tests { let store = TestVectorStore::new(); let requests = vec![ VectorSearchRequest { - query_embedding: vec![0.1, 0.2, 0.3, 0.4], + query_vectors: vec![vec![0.1, 0.2, 0.3, 0.4]], top_k: 3, namespace: None, kind: None, + filter: None, + backend_hints: None, }, VectorSearchRequest { - query_embedding: vec![0.5, 0.6, 0.7, 0.8], + query_vectors: vec![vec![0.5, 0.6, 0.7, 0.8]], top_k: 3, namespace: None, kind: None, + filter: None, + backend_hints: None, }, ]; - let result = store.search_batch(requests).await; + let result = store.search_batch(&requests).await; assert!(result.is_ok()); let batched = result.unwrap(); assert_eq!(batched.len(), 2, "should return one result set per request"); - for hits in &batched { - assert_eq!(hits.len(), 1, "each result set should have one hit"); + for inner in &batched { + assert!(inner.is_ok(), "each inner result should be Ok"); + assert_eq!( + inner.as_ref().unwrap().len(), + 1, + "each Ok should have one hit" + ); } } #[tokio::test] - async fn search_batch_propagates_search_error() { - // TestVectorStore.search always succeeds; inject failure via fail_insert - // trick — instead use a custom store that fails on search. + async fn search_batch_isolates_per_query_errors() { + // A store that always fails search — the outer Ok must still be returned, + // and the failed inner result must carry the error. struct FailingSearch; #[async_trait] @@ -348,7 +391,8 @@ mod tests { _: Uuid, _: SubstrateKind, _: &str, - _: Vec, + _: &str, + _: Vec>, ) -> StorageResult<()> { Ok(()) } @@ -385,13 +429,37 @@ mod tests { let store = FailingSearch; let requests = vec![VectorSearchRequest { - query_embedding: vec![0.1], + query_vectors: vec![vec![0.1]], top_k: 1, namespace: None, kind: None, + filter: None, + backend_hints: None, }]; - let result = store.search_batch(requests).await; - assert!(result.is_err()); + // Outer result is Ok; the error is in the inner vec. + let result = store.search_batch(&requests).await; + assert!(result.is_ok(), "outer result must be Ok for batch"); + let batched = result.unwrap(); + assert_eq!(batched.len(), 1); + assert!(batched[0].is_err(), "inner result must carry the error"); + } + + #[tokio::test] + async fn orphan_sweep_default_returns_unsupported() { + let store = TestVectorStore::new(); + let config = OrphanSweepConfig { + subject_id_allowlist: None, + namespaces: vec![], + substrate_kinds: vec![], + max_delete: 100, + dry_run: true, + }; + let result = store.orphan_sweep(&config).await; + assert!( + matches!(result, Err(StorageError::Unsupported { .. })), + "expected Unsupported, got {result:?}" + ); + assert!(!store.capabilities().supports_orphan_sweep); } #[tokio::test] @@ -399,7 +467,13 @@ mod tests { let store = TestVectorStore::new(); let id = Uuid::new_v4(); let result = store - .update(id, SubstrateKind::Entity, "ns:test", vec![0.1, 0.2]) + .update( + id, + SubstrateKind::Entity, + "ns:test", + "body", + vec![vec![0.1, 0.2]], + ) .await; assert!(result.is_ok()); assert!( @@ -417,7 +491,13 @@ mod tests { let store = TestVectorStore::with_fail_delete(); let id = Uuid::new_v4(); let result = store - .update(id, SubstrateKind::Entity, "ns:test", vec![0.1, 0.2]) + .update( + id, + SubstrateKind::Entity, + "ns:test", + "body", + vec![vec![0.1, 0.2]], + ) .await; assert!(result.is_err()); assert!( @@ -435,7 +515,13 @@ mod tests { let store = TestVectorStore::with_fail_insert(); let id = Uuid::new_v4(); let result = store - .update(id, SubstrateKind::Entity, "ns:test", vec![0.1, 0.2]) + .update( + id, + SubstrateKind::Entity, + "ns:test", + "body", + vec![vec![0.1, 0.2]], + ) .await; assert!(result.is_err()); assert!( @@ -443,4 +529,27 @@ mod tests { "insert must be attempted" ); } + + #[tokio::test] + async fn vector_metadata_filter_is_empty_with_property_filters() { + let empty = VectorMetadataFilter::default(); + assert!(empty.is_empty()); + + let with_ns = VectorMetadataFilter { + namespaces: vec!["ns".into()], + ..Default::default() + }; + assert!(!with_ns.is_empty()); + + use crate::types::{PropertyFilter, PropertyOp}; + let with_prop = VectorMetadataFilter { + property_filters: vec![PropertyFilter { + key: "k".into(), + op: PropertyOp::Eq, + value: serde_json::Value::Bool(true), + }], + ..Default::default() + }; + assert!(!with_prop.is_empty()); + } } diff --git a/crates/khive-types/Cargo.toml b/crates/khive-types/Cargo.toml index b0f6b568..cafea223 100644 --- a/crates/khive-types/Cargo.toml +++ b/crates/khive-types/Cargo.toml @@ -10,12 +10,17 @@ keywords.workspace = true categories.workspace = true [features] -default = ["serde", "std"] +# std is included in default so the crate is usable without explicit feature selection. +# serde is optional — removed from default so `khive-types` is truly no_std-optional. +# All internal dependents that need serde must declare `features = ["serde"]` explicitly. +default = ["std"] serde = ["dep:serde"] std = [] +blake3 = ["dep:blake3"] [dependencies] serde = { workspace = true, optional = true, features = ["derive"] } +blake3 = { version = "1", optional = true, default-features = false } [dev-dependencies] serde_json = { workspace = true } diff --git a/crates/khive-types/src/edge.rs b/crates/khive-types/src/edge.rs index 423c53a4..7638eff2 100644 --- a/crates/khive-types/src/edge.rs +++ b/crates/khive-types/src/edge.rs @@ -8,7 +8,7 @@ use core::str::FromStr; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; -/// The 6 structural categories that group the 13 canonical edge relations. +/// The 8 structural categories that group the 15 canonical edge relations. /// /// Exposed via [`EdgeRelation::category`] for query planners and UI rendering. #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] @@ -19,6 +19,10 @@ pub enum EdgeCategory { Structure, /// Intellectual lineage: `extends`, `variant_of`, `introduced_by`, `supersedes` Derivation, + /// Data/artifact origin: `derived_from` + Provenance, + /// Time ordering: `precedes` + Temporal, /// Build/runtime needs: `depends_on`, `enables` Dependency, /// Code ↔ concept: `implements` @@ -29,7 +33,7 @@ pub enum EdgeCategory { Annotation, } -/// Closed set of 13 canonical edge relations (ADR-002, ADR-021). +/// Closed set of 15 canonical edge relations (ADR-002). /// /// No `Default` — every edge requires an explicit relation. /// Wire format: snake_case strings (e.g. `"part_of"`, `"introduced_by"`). @@ -46,6 +50,10 @@ pub enum EdgeRelation { VariantOf, IntroducedBy, Supersedes, + // Provenance + DerivedFrom, + // Temporal + Precedes, // Dependency DependsOn, Enables, @@ -59,8 +67,8 @@ pub enum EdgeRelation { } impl EdgeRelation { - /// All 13 canonical relations in ADR-002 table order. - pub const ALL: [Self; 13] = [ + /// All 15 canonical relations in ADR-002 table order. + pub const ALL: [Self; 15] = [ Self::Contains, Self::PartOf, Self::InstanceOf, @@ -68,6 +76,8 @@ impl EdgeRelation { Self::VariantOf, Self::IntroducedBy, Self::Supersedes, + Self::DerivedFrom, + Self::Precedes, Self::DependsOn, Self::Enables, Self::Implements, @@ -76,6 +86,30 @@ impl EdgeRelation { Self::Annotates, ]; + /// Valid snake_case names for all 15 canonical relations. + pub const VALID_NAMES: &'static [&'static str] = &[ + "contains", + "part_of", + "instance_of", + "extends", + "variant_of", + "introduced_by", + "supersedes", + "derived_from", + "precedes", + "depends_on", + "enables", + "implements", + "competes_with", + "composed_with", + "annotates", + ]; + + /// `true` for symmetric relations: edge direction has no semantic meaning. + pub const fn is_symmetric(&self) -> bool { + matches!(self, Self::CompetesWith | Self::ComposedWith) + } + /// The category this relation belongs to. pub const fn category(&self) -> EdgeCategory { match self { @@ -83,6 +117,8 @@ impl EdgeRelation { Self::Extends | Self::VariantOf | Self::IntroducedBy | Self::Supersedes => { EdgeCategory::Derivation } + Self::DerivedFrom => EdgeCategory::Provenance, + Self::Precedes => EdgeCategory::Temporal, Self::DependsOn | Self::Enables => EdgeCategory::Dependency, Self::Implements => EdgeCategory::Implementation, Self::CompetesWith | Self::ComposedWith => EdgeCategory::Lateral, @@ -100,6 +136,8 @@ impl EdgeRelation { Self::VariantOf => "variant_of", Self::IntroducedBy => "introduced_by", Self::Supersedes => "supersedes", + Self::DerivedFrom => "derived_from", + Self::Precedes => "precedes", Self::DependsOn => "depends_on", Self::Enables => "enables", Self::Implements => "implements", @@ -116,22 +154,6 @@ impl fmt::Display for EdgeRelation { } } -const EDGE_RELATION_VALID: &[&str] = &[ - "contains", - "part_of", - "instance_of", - "extends", - "variant_of", - "introduced_by", - "supersedes", - "depends_on", - "enables", - "implements", - "competes_with", - "composed_with", - "annotates", -]; - impl FromStr for EdgeRelation { type Err = crate::error::UnknownVariant; @@ -156,6 +178,8 @@ impl FromStr for EdgeRelation { "variant_of" | "variantof" => Ok(Self::VariantOf), "introduced_by" | "introducedby" => Ok(Self::IntroducedBy), "supersedes" => Ok(Self::Supersedes), + "derived_from" | "derivedfrom" => Ok(Self::DerivedFrom), + "precedes" => Ok(Self::Precedes), "depends_on" | "dependson" => Ok(Self::DependsOn), "enables" => Ok(Self::Enables), "implements" => Ok(Self::Implements), @@ -165,7 +189,7 @@ impl FromStr for EdgeRelation { _ => Err(crate::error::UnknownVariant::new( "edge_relation", s, - EDGE_RELATION_VALID, + Self::VALID_NAMES, )), } } @@ -177,8 +201,20 @@ mod tests { use alloc::string::ToString; #[test] - fn all_has_thirteen_variants() { - assert_eq!(EdgeRelation::ALL.len(), 13); + fn all_has_fifteen_variants() { + assert_eq!(EdgeRelation::ALL.len(), 15); + } + + #[test] + fn all_eight_categories_covered() { + let mut cats = alloc::vec::Vec::new(); + for r in EdgeRelation::ALL { + let c = r.category(); + if !cats.contains(&c) { + cats.push(c); + } + } + assert_eq!(cats.len(), 8, "all 8 categories must be represented"); } #[test] @@ -243,7 +279,12 @@ mod tests { msg.contains("contains"), "error should list valid relations" ); - assert!(msg.contains("annotates"), "error should list all 13"); + assert!( + msg.contains("derived_from"), + "error should list derived_from" + ); + assert!(msg.contains("precedes"), "error should list precedes"); + assert!(msg.contains("annotates"), "error should list all 15"); } #[test] @@ -271,6 +312,12 @@ mod tests { EdgeCategory::Implementation ); + assert_eq!( + EdgeRelation::DerivedFrom.category(), + EdgeCategory::Provenance + ); + assert_eq!(EdgeRelation::Precedes.category(), EdgeCategory::Temporal); + assert_eq!(EdgeRelation::CompetesWith.category(), EdgeCategory::Lateral); assert_eq!(EdgeRelation::ComposedWith.category(), EdgeCategory::Lateral); @@ -278,15 +325,33 @@ mod tests { } #[test] - fn all_categories_covered() { - let mut cats = alloc::vec::Vec::new(); - for r in EdgeRelation::ALL { - let c = r.category(); - if !cats.contains(&c) { - cats.push(c); - } - } - assert_eq!(cats.len(), 6, "all 6 categories must be represented"); + fn from_str_new_relations() { + assert_eq!( + "derived_from".parse::().unwrap(), + EdgeRelation::DerivedFrom + ); + assert_eq!( + "derived-from".parse::().unwrap(), + EdgeRelation::DerivedFrom + ); + assert_eq!( + "derivedfrom".parse::().unwrap(), + EdgeRelation::DerivedFrom + ); + assert_eq!( + "precedes".parse::().unwrap(), + EdgeRelation::Precedes + ); + } + + #[test] + fn is_symmetric_only_for_lateral_peer_relations() { + assert!(EdgeRelation::CompetesWith.is_symmetric()); + assert!(EdgeRelation::ComposedWith.is_symmetric()); + assert!(!EdgeRelation::DependsOn.is_symmetric()); + assert!(!EdgeRelation::DerivedFrom.is_symmetric()); + assert!(!EdgeRelation::Precedes.is_symmetric()); + assert!(!EdgeRelation::Extends.is_symmetric()); } #[cfg(feature = "serde")] @@ -298,4 +363,14 @@ mod tests { let parsed: EdgeRelation = serde_json::from_str(&json).unwrap(); assert_eq!(parsed, rel); } + + #[cfg(feature = "serde")] + #[test] + fn serde_new_relations_roundtrip() { + for rel in [EdgeRelation::DerivedFrom, EdgeRelation::Precedes] { + let json = serde_json::to_string(&rel).unwrap(); + let parsed: EdgeRelation = serde_json::from_str(&json).unwrap(); + assert_eq!(parsed, rel); + } + } } diff --git a/crates/khive-types/src/entity.rs b/crates/khive-types/src/entity.rs index ca56096b..41dd0cc3 100644 --- a/crates/khive-types/src/entity.rs +++ b/crates/khive-types/src/entity.rs @@ -9,18 +9,16 @@ use core::str::FromStr; use crate::{EdgeRelation, Header, Id128, Timestamp}; -/// Taxonomy for entity classification in a research knowledge graph (ADR-001). +/// 8 closed base kinds for graph-node classification (ADR-001). /// -/// 6 kinds, chosen for agent reliability: agents classify these correctly -/// with unambiguous signals. Finer distinctions (algorithm vs technique, -/// model vs architecture) live in `properties` — they don't enable useful -/// queries with the 13-relation edge ontology and cause 20-30% misclassification. +/// Governed subtype values live in `Entity::entity_type`; `properties` remain +/// metadata and must not carry ontology type strings. #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))] pub enum EntityKind { /// Algorithms, techniques, architectures, theories, models, research gaps. - /// The default / residual bucket. Use `properties.type` for finer grain. + /// The default / residual bucket. #[default] Concept, /// Papers, preprints, technical reports, blog posts, books. @@ -36,16 +34,22 @@ pub enum EntityKind { Person, /// Labs, companies, institutions. Org, + /// Built artifacts: binaries, model checkpoints, Docker images, packages. + Artifact, + /// Running or deployable services: APIs, hosted endpoints, SaaS products. + Service, } impl EntityKind { - pub const ALL: [Self; 6] = [ + pub const ALL: [Self; 8] = [ Self::Concept, Self::Document, Self::Dataset, Self::Project, Self::Person, Self::Org, + Self::Artifact, + Self::Service, ]; pub const fn name(self) -> &'static str { @@ -56,6 +60,8 @@ impl EntityKind { Self::Project => "project", Self::Person => "person", Self::Org => "org", + Self::Artifact => "artifact", + Self::Service => "service", } } } @@ -66,7 +72,9 @@ impl fmt::Display for EntityKind { } } -const ENTITY_KIND_VALID: &[&str] = &["concept", "document", "dataset", "project", "person", "org"]; +const ENTITY_KIND_VALID: &[&str] = &[ + "concept", "document", "dataset", "project", "person", "org", "artifact", "service", +]; impl FromStr for EntityKind { type Err = crate::error::UnknownVariant; @@ -79,6 +87,8 @@ impl FromStr for EntityKind { "project" | "repo" | "crate" | "library" | "lib" => Ok(Self::Project), "person" | "author" | "researcher" => Ok(Self::Person), "org" | "organization" | "organisation" | "lab" | "company" => Ok(Self::Org), + "artifact" | "art" => Ok(Self::Artifact), + "service" | "svc" => Ok(Self::Service), other => Err(crate::error::UnknownVariant::new( "entity_kind", other, @@ -95,6 +105,9 @@ pub struct Entity { #[cfg_attr(feature = "serde", serde(flatten))] pub header: Header, pub kind: EntityKind, + /// Pack-governed subtype token (e.g. `"paper"`, `"snapshot"`). Never stored + /// raw in `properties` — queries compile this to `entities.entity_type = ?`. + pub entity_type: Option, pub name: String, pub description: Option, pub properties: BTreeMap, @@ -107,11 +120,15 @@ pub struct Entity { #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Link { pub id: Id128, + pub namespace: String, pub source: Id128, pub target: Id128, pub relation: EdgeRelation, pub properties: BTreeMap, pub weight: f64, + pub created_at: Timestamp, + pub updated_at: Timestamp, + pub deleted_at: Option, } /// Property values stored on entities, links, and notes. @@ -159,10 +176,11 @@ mod tests { let entity = Entity { header: Header::new( Id128::from_u128(1), - Namespace::default(), + Namespace::local(), Timestamp::from_secs(1700000000), ), kind: EntityKind::Person, + entity_type: Some("researcher".into()), name: "Ocean".into(), description: None, properties: props, @@ -171,6 +189,7 @@ mod tests { }; assert_eq!(entity.kind, EntityKind::Person); assert_eq!(entity.kind.name(), "person"); + assert_eq!(entity.entity_type.as_deref(), Some("researcher")); assert_eq!(entity.properties.len(), 2); } @@ -199,6 +218,36 @@ mod tests { assert_eq!(EntityKind::from_str("repo").unwrap(), EntityKind::Project); assert_eq!(EntityKind::from_str("author").unwrap(), EntityKind::Person); assert_eq!(EntityKind::from_str("lab").unwrap(), EntityKind::Org); + assert_eq!(EntityKind::from_str("art").unwrap(), EntityKind::Artifact); + assert_eq!(EntityKind::from_str("svc").unwrap(), EntityKind::Service); + } + + #[test] + fn entity_kind_artifact_and_service_roundtrip() { + assert_eq!(EntityKind::Artifact.name(), "artifact"); + assert_eq!(EntityKind::Service.name(), "service"); + assert_eq!( + EntityKind::from_str("artifact").unwrap(), + EntityKind::Artifact + ); + assert_eq!( + EntityKind::from_str("service").unwrap(), + EntityKind::Service + ); + } + + #[test] + fn entity_kind_all_has_eight_variants() { + assert_eq!(EntityKind::ALL.len(), 8); + assert!(EntityKind::ALL.contains(&EntityKind::Artifact)); + assert!(EntityKind::ALL.contains(&EntityKind::Service)); + } + + #[test] + fn entity_kind_unknown_valid_list_includes_new_kinds() { + let err = EntityKind::from_str("gadget").unwrap_err(); + assert!(err.valid.contains(&"artifact")); + assert!(err.valid.contains(&"service")); } #[test] @@ -220,13 +269,18 @@ mod tests { #[test] fn link_construction() { + let ts = Timestamp::from_secs(1700000000); let link = Link { id: Id128::from_u128(100), + namespace: "default".into(), source: Id128::from_u128(1), target: Id128::from_u128(2), relation: EdgeRelation::Extends, properties: BTreeMap::new(), weight: 1.0, + created_at: ts, + updated_at: ts, + deleted_at: None, }; assert_eq!(link.relation, EdgeRelation::Extends); } diff --git a/crates/khive-types/src/event.rs b/crates/khive-types/src/event.rs index c165231b..496c74f2 100644 --- a/crates/khive-types/src/event.rs +++ b/crates/khive-types/src/event.rs @@ -6,6 +6,7 @@ extern crate alloc; use alloc::string::String; +use alloc::vec::Vec; use core::fmt; use crate::{Header, Id128, SubstrateKind}; @@ -16,20 +17,22 @@ use crate::{Header, Id128, SubstrateKind}; pub struct Event { #[cfg_attr(feature = "serde", serde(flatten))] pub header: Header, - /// The verb that was executed (e.g., "create", "search", "traverse"). + /// The verb that produced the event. pub verb: String, /// Which substrate type was acted upon. pub substrate: SubstrateKind, - /// Who performed the action (free-form actor string). - pub actor: String, - /// Outcome of the verb execution. - pub outcome: EventOutcome, - /// Optional verb-specific structured data (JSON in DB). - pub data: Option, - /// Duration of the verb execution in microseconds. - pub duration_us: u64, - /// ID of the substrate record that was acted upon, if applicable. - pub target_id: Option, + /// Who performed the action. Profile- or system-produced events may omit it. + pub actor: Option, + /// Typed event discriminant used by replay, projections, and workers. + pub kind: EventKind, + /// Typed payload surface for known event families; raw JSON is still allowed. + pub payload: EventPayload, + /// Payload schema version interpreted per `kind`. + pub payload_schema_version: u32, + /// Brain profile state version observed when the event was emitted. + pub profile_state_version: Option, + /// Logical aggregate threaded across related event ids. + pub aggregate: Option, } #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)] @@ -58,15 +61,321 @@ impl fmt::Display for EventOutcome { } } +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))] +pub enum EventKind { + Audit, + RecallExecuted, + RerankExecuted, + SearchExecuted, + LinkCreated, + EntityCreated, + EntityUpdated, + EntityDeleted, + EntityMerged, + NoteCreated, + NoteUpdated, + NoteDeleted, + EdgeUpdated, + EdgeDeleted, + TaskTransitioned, + FeedbackExplicit, + ProfileResolutionRecommended, + ProfileMerged, + EmbeddingModelChanged, + EmbeddingMigrationCompleted, + EmbeddingMigrationFailed, + EmbeddingDriftDetected, + ProposalCreated, + ProposalReviewed, + ProposalApplied, + ProposalWithdrawn, +} + +impl EventKind { + pub const ALL: [Self; 26] = [ + Self::Audit, + Self::RecallExecuted, + Self::RerankExecuted, + Self::SearchExecuted, + Self::LinkCreated, + Self::EntityCreated, + Self::EntityUpdated, + Self::EntityDeleted, + Self::EntityMerged, + Self::NoteCreated, + Self::NoteUpdated, + Self::NoteDeleted, + Self::EdgeUpdated, + Self::EdgeDeleted, + Self::TaskTransitioned, + Self::FeedbackExplicit, + Self::ProfileResolutionRecommended, + Self::ProfileMerged, + Self::EmbeddingModelChanged, + Self::EmbeddingMigrationCompleted, + Self::EmbeddingMigrationFailed, + Self::EmbeddingDriftDetected, + Self::ProposalCreated, + Self::ProposalReviewed, + Self::ProposalApplied, + Self::ProposalWithdrawn, + ]; + + pub const fn name(self) -> &'static str { + match self { + Self::Audit => "audit", + Self::RecallExecuted => "recall_executed", + Self::RerankExecuted => "rerank_executed", + Self::SearchExecuted => "search_executed", + Self::LinkCreated => "link_created", + Self::EntityCreated => "entity_created", + Self::EntityUpdated => "entity_updated", + Self::EntityDeleted => "entity_deleted", + Self::EntityMerged => "entity_merged", + Self::NoteCreated => "note_created", + Self::NoteUpdated => "note_updated", + Self::NoteDeleted => "note_deleted", + Self::EdgeUpdated => "edge_updated", + Self::EdgeDeleted => "edge_deleted", + Self::TaskTransitioned => "task_transitioned", + Self::FeedbackExplicit => "feedback_explicit", + Self::ProfileResolutionRecommended => "profile_resolution_recommended", + Self::ProfileMerged => "profile_merged", + Self::EmbeddingModelChanged => "embedding_model_changed", + Self::EmbeddingMigrationCompleted => "embedding_migration_completed", + Self::EmbeddingMigrationFailed => "embedding_migration_failed", + Self::EmbeddingDriftDetected => "embedding_drift_detected", + Self::ProposalCreated => "proposal_created", + Self::ProposalReviewed => "proposal_reviewed", + Self::ProposalApplied => "proposal_applied", + Self::ProposalWithdrawn => "proposal_withdrawn", + } + } +} + +impl fmt::Display for EventKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.name()) + } +} + +const EVENT_KIND_VALID: &[&str] = &[ + "audit", + "recall_executed", + "rerank_executed", + "search_executed", + "link_created", + "entity_created", + "entity_updated", + "entity_deleted", + "entity_merged", + "note_created", + "note_updated", + "note_deleted", + "edge_updated", + "edge_deleted", + "task_transitioned", + "feedback_explicit", + "profile_resolution_recommended", + "profile_merged", + "embedding_model_changed", + "embedding_migration_completed", + "embedding_migration_failed", + "embedding_drift_detected", + "proposal_created", + "proposal_reviewed", + "proposal_applied", + "proposal_withdrawn", +]; + +impl core::str::FromStr for EventKind { + type Err = crate::error::UnknownVariant; + + fn from_str(s: &str) -> Result { + match s.trim().to_ascii_lowercase().as_str() { + "audit" => Ok(Self::Audit), + "recall_executed" => Ok(Self::RecallExecuted), + "rerank_executed" => Ok(Self::RerankExecuted), + "search_executed" => Ok(Self::SearchExecuted), + "link_created" => Ok(Self::LinkCreated), + "entity_created" => Ok(Self::EntityCreated), + "entity_updated" => Ok(Self::EntityUpdated), + "entity_deleted" => Ok(Self::EntityDeleted), + "entity_merged" => Ok(Self::EntityMerged), + "note_created" => Ok(Self::NoteCreated), + "note_updated" => Ok(Self::NoteUpdated), + "note_deleted" => Ok(Self::NoteDeleted), + "edge_updated" => Ok(Self::EdgeUpdated), + "edge_deleted" => Ok(Self::EdgeDeleted), + "task_transitioned" => Ok(Self::TaskTransitioned), + "feedback_explicit" => Ok(Self::FeedbackExplicit), + "profile_resolution_recommended" => Ok(Self::ProfileResolutionRecommended), + "profile_merged" => Ok(Self::ProfileMerged), + "embedding_model_changed" => Ok(Self::EmbeddingModelChanged), + "embedding_migration_completed" => Ok(Self::EmbeddingMigrationCompleted), + "embedding_migration_failed" => Ok(Self::EmbeddingMigrationFailed), + "embedding_drift_detected" => Ok(Self::EmbeddingDriftDetected), + "proposal_created" => Ok(Self::ProposalCreated), + "proposal_reviewed" => Ok(Self::ProposalReviewed), + "proposal_applied" => Ok(Self::ProposalApplied), + "proposal_withdrawn" => Ok(Self::ProposalWithdrawn), + other => Err(crate::error::UnknownVariant::new( + "event_kind", + other, + EVENT_KIND_VALID, + )), + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct AggregateRef { + pub kind: String, + pub id: Id128, +} + +#[derive(Clone, Debug, PartialEq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[cfg_attr( + feature = "serde", + serde(tag = "kind", content = "payload", rename_all = "snake_case") +)] +pub enum EventPayload { + Json(String), + RerankExecuted(RerankExecutedPayload), + ProposalCreated(ProposalCreatedPayload), + ProposalReviewed(ProposalReviewedPayload), + ProposalApplied(ProposalAppliedPayload), + ProposalWithdrawn(ProposalWithdrawnPayload), +} + +impl Default for EventPayload { + fn default() -> Self { + Self::Json("{}".into()) + } +} + +#[derive(Clone, Debug, PartialEq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct RerankExecutedPayload { + pub served_by_profile_id: Option, + pub model_id: Id128, + pub candidates: Vec, + pub reranked: Vec<(Id128, Vec<(String, f32)>)>, + pub final_scores: Vec<(Id128, f32)>, + pub latency_us: u64, + pub hook_applied: bool, + pub hook_target_match: bool, +} + +#[derive(Clone, Debug, PartialEq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct ProposalCreatedPayload { + pub proposal_id: Id128, + pub proposer: String, + pub title: String, + pub description: String, + pub changeset: ProposalChangeset, + pub reviewers: Vec, + pub expiry: Option, + pub parent_id: Option, +} + +#[derive(Clone, Debug, PartialEq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[cfg_attr(feature = "serde", serde(tag = "kind", rename_all = "snake_case"))] +pub enum ProposalChangeset { + AddEntity { + entity: String, + }, + UpdateEntity { + id: Id128, + patch: String, + }, + AddEdge { + source: Id128, + target: Id128, + relation: crate::EdgeRelation, + weight: Option, + }, + AddNote { + note: String, + }, + MergeEntities { + into: Id128, + from: Id128, + }, + SupersedeEntity { + old: Id128, + new: Id128, + }, + Compound { + steps: Vec, + }, +} + +#[derive(Clone, Debug, PartialEq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct ProposalReviewedPayload { + pub proposal_id: Id128, + pub reviewer: String, + pub decision: ProposalDecision, + pub comment: Option, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))] +pub enum ProposalDecision { + Approve, + Reject, + Comment, + RequestChanges, +} + +#[derive(Clone, Debug, PartialEq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct ProposalAppliedPayload { + pub proposal_id: Id128, + pub applied_at: crate::Timestamp, + pub applied_by: String, + pub result: ApplyResult, +} + +#[derive(Clone, Debug, PartialEq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))] +pub enum ApplyResult { + Success { + created_records: Vec, + }, + Failed { + error: String, + applied_step_count: u32, + }, +} + +#[derive(Clone, Debug, PartialEq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct ProposalWithdrawnPayload { + pub proposal_id: Id128, + pub by: String, + pub reason: Option, +} + /// Builder for events. Used by the verb dispatch path. pub struct EventBuilder { verb: String, substrate: SubstrateKind, - actor: String, - outcome: EventOutcome, - data: Option, - duration_us: u64, - target_id: Option, + actor: Option, + kind: EventKind, + payload: EventPayload, + payload_schema_version: u32, + profile_state_version: Option, + aggregate: Option, } impl EventBuilder { @@ -78,31 +387,37 @@ impl EventBuilder { Self { verb: verb.into(), substrate, - actor: actor.into(), - outcome: EventOutcome::Success, - data: None, - duration_us: 0, - target_id: None, + actor: Some(actor.into()), + kind: EventKind::Audit, + payload: EventPayload::default(), + payload_schema_version: 1, + profile_state_version: None, + aggregate: None, } } - pub fn outcome(mut self, outcome: EventOutcome) -> Self { - self.outcome = outcome; + pub fn kind(mut self, kind: EventKind) -> Self { + self.kind = kind; + self + } + + pub fn payload(mut self, payload: EventPayload) -> Self { + self.payload = payload; self } - pub fn data(mut self, data: impl Into) -> Self { - self.data = Some(data.into()); + pub fn payload_schema_version(mut self, version: u32) -> Self { + self.payload_schema_version = version; self } - pub fn duration_us(mut self, us: u64) -> Self { - self.duration_us = us; + pub fn profile_state_version(mut self, version: u64) -> Self { + self.profile_state_version = Some(version); self } - pub fn target_id(mut self, id: Id128) -> Self { - self.target_id = Some(id); + pub fn aggregate(mut self, aggregate: AggregateRef) -> Self { + self.aggregate = Some(aggregate); self } @@ -112,48 +427,77 @@ impl EventBuilder { verb: self.verb, substrate: self.substrate, actor: self.actor, - outcome: self.outcome, - data: self.data, - duration_us: self.duration_us, - target_id: self.target_id, + kind: self.kind, + payload: self.payload, + payload_schema_version: self.payload_schema_version, + profile_state_version: self.profile_state_version, + aggregate: self.aggregate, } } } #[cfg(test)] mod tests { + extern crate alloc; + use super::*; use crate::{Namespace, Timestamp}; fn header() -> Header { Header::new( Id128::from_u128(1), - Namespace::default(), + Namespace::local(), Timestamp::from_secs(1700000000), ) } #[test] - fn event_builder() { - let event = EventBuilder::new("search", SubstrateKind::Note, "agent:research") - .outcome(EventOutcome::Success) - .duration_us(1500) - .target_id(Id128::from_u128(42)) + fn event_kind_parse_roundtrip() { + for kind in EventKind::ALL { + let parsed: EventKind = kind + .name() + .parse() + .expect("EventKind::name must parse back"); + assert_eq!(parsed, kind); + } + } + + #[test] + fn rerank_payload_records_served_profile() { + let payload = EventPayload::RerankExecuted(RerankExecutedPayload { + served_by_profile_id: Some("profile-a".into()), + model_id: Id128::from_u128(1), + candidates: Vec::new(), + reranked: Vec::new(), + final_scores: Vec::new(), + latency_us: 100, + hook_applied: false, + hook_target_match: false, + }); + let event = EventBuilder::new("rerank", SubstrateKind::Note, "agent:test") + .kind(EventKind::RerankExecuted) + .payload(payload) .build(header()); - assert_eq!(event.verb, "search"); - assert_eq!(event.substrate, SubstrateKind::Note); - assert_eq!(event.actor, "agent:research"); - assert_eq!(event.outcome, EventOutcome::Success); - assert_eq!(event.duration_us, 1500); - assert_eq!(event.target_id, Some(Id128::from_u128(42))); + if let EventPayload::RerankExecuted(ref p) = event.payload { + assert_eq!(p.served_by_profile_id.as_deref(), Some("profile-a")); + } else { + panic!("unexpected payload variant"); + } } #[test] - fn denied_outcome() { - let event = EventBuilder::new("create", SubstrateKind::Note, "user:ocean") - .outcome(EventOutcome::Denied) + fn proposal_payloads_are_typed() { + let payload = EventPayload::ProposalReviewed(ProposalReviewedPayload { + proposal_id: Id128::from_u128(42), + reviewer: "ocean".into(), + decision: ProposalDecision::Approve, + comment: None, + }); + let event = EventBuilder::new("review", SubstrateKind::Entity, "ocean") + .kind(EventKind::ProposalReviewed) + .payload(payload) .build(header()); - assert_eq!(event.outcome, EventOutcome::Denied); + assert_eq!(event.kind.name(), "proposal_reviewed"); } } diff --git a/crates/khive-types/src/hash.rs b/crates/khive-types/src/hash.rs new file mode 100644 index 00000000..83bab5f3 --- /dev/null +++ b/crates/khive-types/src/hash.rs @@ -0,0 +1,91 @@ +//! 256-bit content hash for checkpoint integrity verification. +//! +//! # Formal proof reference +//! +//! `proofs/Retrieval/HNSW.lean` — hash identity used in checkpoint +//! compatibility checks (khive.Retrieval.HNSW.checkpoint_correctness). + +use core::fmt; + +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + +/// 256-bit (32-byte) content hash. +/// +/// Used as a content-addressed identifier for HNSW checkpoints and other +/// snapshot artifacts. The underlying algorithm is caller-defined; the type +/// carries the raw bytes without encoding assumptions. +#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(transparent))] +pub struct Hash32([u8; 32]); + +impl Hash32 { + /// Zero hash (nil value). + pub const ZERO: Self = Self([0u8; 32]); + + /// Construct from raw bytes. + #[inline] + pub const fn from_bytes(bytes: [u8; 32]) -> Self { + Self(bytes) + } + + /// Return the raw byte representation. + #[inline] + pub const fn as_bytes(&self) -> &[u8; 32] { + &self.0 + } + + /// Compute a BLAKE3 hash over the given byte slice. + /// + /// Requires the `blake3` feature. + #[cfg(feature = "blake3")] + #[inline] + pub fn from_blake3(data: &[u8]) -> Self { + let hash = blake3::hash(data); + Self(*hash.as_bytes()) + } + + /// Constant-time equality check. + /// + /// Accumulates XOR over all 32 bytes without early exit so the comparison + /// takes the same number of iterations regardless of where bytes differ. + /// Suitable for integrity comparisons where timing side-channels are a + /// concern. The `#[inline(never)]` attribute discourages the compiler from + /// inlining and optimising away the full-loop traversal. + #[inline(never)] + pub fn eq_ct(&self, other: &Self) -> bool { + let diff = self + .0 + .iter() + .zip(other.0.iter()) + .fold(0u8, |acc, (a, b)| acc | (a ^ b)); + diff == 0 + } +} + +impl From<[u8; 32]> for Hash32 { + #[inline] + fn from(bytes: [u8; 32]) -> Self { + Self(bytes) + } +} + +impl fmt::Debug for Hash32 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Hash32(")?; + for b in &self.0 { + write!(f, "{b:02x}")?; + } + write!(f, ")") + } +} + +impl fmt::Display for Hash32 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for b in &self.0 { + write!(f, "{b:02x}")?; + } + Ok(()) + } +} diff --git a/crates/khive-types/src/lib.rs b/crates/khive-types/src/lib.rs index 2ef3e6be..9b18541e 100644 --- a/crates/khive-types/src/lib.rs +++ b/crates/khive-types/src/lib.rs @@ -16,6 +16,7 @@ pub mod edge; pub mod entity; pub mod error; pub mod event; +pub mod hash; pub mod header; pub mod id; pub mod khive_error; @@ -29,13 +30,23 @@ pub mod vector; pub use edge::{EdgeCategory, EdgeRelation}; pub use entity::{Entity, EntityKind, Link, PropertyValue}; pub use error::{TypeError, UnknownVariant}; -pub use event::{Event, EventBuilder, EventOutcome}; +pub use event::{ + AggregateRef, ApplyResult, Event, EventBuilder, EventKind, EventOutcome, EventPayload, + ProposalAppliedPayload, ProposalChangeset, ProposalCreatedPayload, ProposalDecision, + ProposalReviewedPayload, ProposalWithdrawnPayload, RerankExecutedPayload, +}; +pub use hash::Hash32; pub use header::Header; pub use id::{Id128, ParseIdError}; pub use khive_error::{Details, ErrorCode, ErrorDomain, ErrorKind, KhiveError, RetryHint}; pub use namespace::Namespace; -pub use note::{Note, NoteKind, NoteStatus}; -pub use pack::{EdgeEndpointRule, EndpointKind, Pack, VerbDef}; +pub use note::{Note, NoteStatus}; +#[allow(deprecated)] +pub use pack::VerbDef; +pub use pack::{ + EdgeEndpointRule, EndpointKind, HandlerDef, NoteKindSpec, NoteLifecycleSpec, Pack, + PackSchemaPlan, VerbCategory, Visibility, +}; pub use substrate::{SubstrateKind, SUBSTRATE_COUNT}; pub use timestamp::Timestamp; pub use vector::DistanceMetric; diff --git a/crates/khive-types/src/namespace.rs b/crates/khive-types/src/namespace.rs index 4e8105dc..31d4a6a0 100644 --- a/crates/khive-types/src/namespace.rs +++ b/crates/khive-types/src/namespace.rs @@ -1,10 +1,10 @@ -//! Namespace — string-based scoping for substrate records. +//! Namespace — validated string-based scoping for substrate records. //! //! In khive OSS, namespace is a plain string (e.g., `"local"`, `"research"`, //! `"lattice-project"`). It groups records and supports cross-namespace //! queries via the entity graph. //! -//! Multi-tenant deployments (e.g., khive.ai hosted) add capability-based +//! Multi-tenant deployments (hosted khive deployments) add capability-based //! access controls on top in a separate crate — those are not part of the //! open-source runtime. @@ -12,24 +12,80 @@ extern crate alloc; use alloc::string::String; use core::fmt; +/// Validation error returned when a namespace string is rejected. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum NamespaceError { + Empty, + TooLong { max: usize }, + InvalidCharacter { ch: char }, + EmptySegment, + TrailingSeparator, +} + +impl fmt::Display for NamespaceError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Empty => f.write_str("namespace must not be empty"), + Self::TooLong { max } => write!(f, "namespace exceeds {max} characters"), + Self::InvalidCharacter { ch } => { + write!(f, "namespace contains invalid character {ch:?}") + } + Self::EmptySegment => f.write_str("namespace must not contain empty path segments"), + Self::TrailingSeparator => f.write_str("namespace must not end with ':'"), + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for NamespaceError {} + +fn validate_namespace(value: &str) -> Result<(), NamespaceError> { + const MAX_LEN: usize = 256; + if value.is_empty() { + return Err(NamespaceError::Empty); + } + if value.len() > MAX_LEN { + return Err(NamespaceError::TooLong { max: MAX_LEN }); + } + if value.ends_with(':') { + return Err(NamespaceError::TrailingSeparator); + } + for segment in value.split(':') { + if segment.is_empty() { + return Err(NamespaceError::EmptySegment); + } + for ch in segment.chars() { + if !ch.is_ascii_alphanumeric() && ch != '-' && ch != '_' && ch != '.' { + return Err(NamespaceError::InvalidCharacter { ch }); + } + } + } + Ok(()) +} + +/// A validated, opaque namespace identifier. +/// +/// Construct via [`Namespace::parse`] or [`Namespace::local`]. The absence of +/// `From` / `From<&str>` impls is intentional — callers must validate. #[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -#[cfg_attr(feature = "serde", serde(transparent))] pub struct Namespace(String); impl Namespace { - /// Create a namespace from any string-like value. - #[inline] - pub fn new(s: impl Into) -> Self { - Self(s.into()) - } + /// The name of the default local namespace. + pub const LOCAL: &'static str = "local"; - /// The default namespace name. - pub const DEFAULT: &'static str = "local"; + /// Parse and validate a namespace string. + /// + /// Returns `Err(NamespaceError)` if the string is empty, too long, contains + /// invalid characters, has empty segments, or ends with `:`. + pub fn parse(value: &str) -> Result { + validate_namespace(value)?; + Ok(Self(String::from(value))) + } - /// Construct the default namespace. - pub fn default_ns() -> Self { - Self::new(Self::DEFAULT) + /// Construct the default `"local"` namespace (always valid; no allocation). + pub fn local() -> Self { + Self(String::from(Self::LOCAL)) } #[inline] @@ -37,22 +93,24 @@ impl Namespace { &self.0 } - /// True if `self` is a hierarchical child of `parent` - /// (e.g., `"research:lattice"` is a child of `"research"`). - pub fn is_child_of(&self, parent: &Namespace) -> bool { - self.0.len() > parent.0.len() - && self.0.starts_with(parent.as_str()) - && self.0.as_bytes().get(parent.0.len()) == Some(&b':') - } - pub fn into_inner(self) -> String { self.0 } } -impl Default for Namespace { - fn default() -> Self { - Self::default_ns() +impl core::convert::TryFrom for Namespace { + type Error = NamespaceError; + + fn try_from(value: String) -> Result { + Self::parse(&value) + } +} + +impl core::convert::TryFrom<&str> for Namespace { + type Error = NamespaceError; + + fn try_from(value: &str) -> Result { + Self::parse(value) } } @@ -69,17 +127,31 @@ impl AsRef for Namespace { } } -impl From<&str> for Namespace { - #[inline] - fn from(s: &str) -> Self { - Self::new(s) - } +/// Returns `true` if `child` is a hierarchical prefix-descendant of `parent`. +/// +/// Example: `"research:lattice"` is a prefix-child of `"research"`. +pub fn has_segment_prefix(child: &Namespace, parent: &Namespace) -> bool { + let c = child.as_str(); + let p = parent.as_str(); + c.len() > p.len() && c.starts_with(p) && c.as_bytes().get(p.len()) == Some(&b':') } -impl From for Namespace { - #[inline] - fn from(s: String) -> Self { - Self(s) +#[cfg(feature = "serde")] +mod serde_impl { + use super::*; + use serde::{de, Deserialize, Deserializer, Serialize, Serializer}; + + impl Serialize for Namespace { + fn serialize(&self, s: S) -> Result { + s.serialize_str(&self.0) + } + } + + impl<'de> Deserialize<'de> for Namespace { + fn deserialize>(d: D) -> Result { + let s = String::deserialize(d)?; + Namespace::parse(&s).map_err(de::Error::custom) + } } } @@ -88,24 +160,119 @@ mod tests { use super::*; #[test] - fn construction() { - let ns = Namespace::new("research"); + fn parse_valid_namespace() { + let ns = Namespace::parse("research").unwrap(); assert_eq!(ns.as_str(), "research"); } #[test] - fn default_is_local() { - assert_eq!(Namespace::default().as_str(), "local"); + fn local_is_local() { + assert_eq!(Namespace::local().as_str(), "local"); + } + + #[test] + fn parse_hierarchical_namespace() { + let ns = Namespace::parse("research:lattice").unwrap(); + assert_eq!(ns.as_str(), "research:lattice"); } #[test] - fn is_child_of() { - let parent = Namespace::new("research"); - let child = Namespace::new("research:lattice"); - let sibling = Namespace::new("other"); + fn parse_empty_returns_error() { + assert_eq!(Namespace::parse(""), Err(NamespaceError::Empty)); + } - assert!(child.is_child_of(&parent)); - assert!(!sibling.is_child_of(&parent)); - assert!(!parent.is_child_of(&parent)); + #[test] + fn parse_trailing_separator_returns_error() { + assert_eq!( + Namespace::parse("research:"), + Err(NamespaceError::TrailingSeparator) + ); + } + + #[test] + fn parse_double_colon_returns_empty_segment() { + assert_eq!(Namespace::parse("a::b"), Err(NamespaceError::EmptySegment)); + } + + #[test] + fn parse_invalid_char_returns_error() { + assert!(matches!( + Namespace::parse("bad namespace"), + Err(NamespaceError::InvalidCharacter { ch: ' ' }) + )); + } + + #[test] + fn try_from_string() { + use core::convert::TryFrom; + let ns = Namespace::try_from(String::from("my-ns")).unwrap(); + assert_eq!(ns.as_str(), "my-ns"); + } + + #[test] + fn has_segment_prefix_detects_child() { + let parent = Namespace::parse("research").unwrap(); + let child = Namespace::parse("research:lattice").unwrap(); + let sibling = Namespace::parse("other").unwrap(); + + assert!(has_segment_prefix(&child, &parent)); + assert!(!has_segment_prefix(&sibling, &parent)); + assert!(!has_segment_prefix(&parent, &parent)); + } + + #[cfg(feature = "serde")] + #[test] + fn serde_roundtrip() { + let ns = Namespace::parse("proj-123").unwrap(); + let json = serde_json::to_string(&ns).unwrap(); + let back: Namespace = serde_json::from_str(&json).unwrap(); + assert_eq!(ns, back); + } + + #[cfg(feature = "serde")] + #[test] + fn serde_deserialize_rejects_invalid() { + let result: Result = serde_json::from_str("\"\""); + assert!(result.is_err()); + } + + #[test] + fn parse_slash_is_rejected() { + // Forward slashes are not in the allowed charset (alphanumeric, `-`, `_`, `.`). + assert!(matches!( + Namespace::parse("tenant/sub"), + Err(NamespaceError::InvalidCharacter { ch: '/' }) + )); + } + + #[test] + fn parse_unicode_is_rejected() { + // Only ASCII characters are allowed; non-ASCII (e.g. accented letters) must fail. + assert!(matches!( + Namespace::parse("café"), + Err(NamespaceError::InvalidCharacter { .. }) + )); + } + + #[test] + fn parse_dot_is_valid() { + // Dots are explicitly allowed to support version-style namespaces like "v1.5". + let ns = Namespace::parse("v1.5").unwrap(); + assert_eq!(ns.as_str(), "v1.5"); + } + + #[test] + fn parse_too_long_is_rejected() { + let long = "a".repeat(257); + assert!(matches!( + Namespace::parse(&long), + Err(NamespaceError::TooLong { .. }) + )); + } + + #[test] + fn parse_exactly_256_chars_is_valid() { + let max = "a".repeat(256); + assert!(Namespace::parse(&max).is_ok()); } } diff --git a/crates/khive-types/src/note.rs b/crates/khive-types/src/note.rs index c649af3c..0e1ca49f 100644 --- a/crates/khive-types/src/note.rs +++ b/crates/khive-types/src/note.rs @@ -1,4 +1,4 @@ -//! Note substrate — temporal-referential records (ADR-004, ADR-019). +//! Note substrate — temporal-referential records (ADR-004, ADR-013). extern crate alloc; use alloc::collections::BTreeMap; @@ -9,118 +9,62 @@ use core::fmt; use crate::entity::PropertyValue; use crate::{Header, Timestamp}; -/// Closed taxonomy for note classification (ADR-019). -/// -/// 5 kinds covering the cognitive functions an agent performs while researching. -/// Closed and exhaustive — adding a sixth requires a new ADR. -#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)] +/// Lifecycle status of a note. Cross-cutting across all note kinds. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))] -pub enum NoteKind { - /// An empirical capture — what was noticed or measured. +pub enum NoteStatus { #[default] - Observation, - /// An analytical or synthetic conclusion drawn from observations. - Insight, - /// An open inquiry, research direction, or unknown. - Question, - /// A committed choice with rationale. - Decision, - /// An external pointer with context (paper, URL, citation note). - Reference, + Active, + Archived, + Deleted, } -impl NoteKind { - pub const ALL: [Self; 5] = [ - Self::Observation, - Self::Insight, - Self::Question, - Self::Decision, - Self::Reference, - ]; - +impl NoteStatus { pub const fn name(self) -> &'static str { match self { - Self::Observation => "observation", - Self::Insight => "insight", - Self::Question => "question", - Self::Decision => "decision", - Self::Reference => "reference", + Self::Active => "active", + Self::Archived => "archived", + Self::Deleted => "deleted", } } } -impl fmt::Display for NoteKind { +impl fmt::Display for NoteStatus { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.write_str(self.name()) } } -const NOTE_KIND_VALID: &[&str] = &[ - "observation", - "insight", - "question", - "decision", - "reference", -]; - -impl core::str::FromStr for NoteKind { +impl core::str::FromStr for NoteStatus { type Err = crate::error::UnknownVariant; - fn from_str(s: &str) -> Result { match s.trim().to_ascii_lowercase().as_str() { - "observation" | "obs" => Ok(Self::Observation), - "insight" | "finding" => Ok(Self::Insight), - "question" | "q" => Ok(Self::Question), - "decision" | "choice" => Ok(Self::Decision), - "reference" | "ref" | "citation" => Ok(Self::Reference), + "active" => Ok(Self::Active), + "archived" => Ok(Self::Archived), + "deleted" => Ok(Self::Deleted), other => Err(crate::error::UnknownVariant::new( - "note_kind", + "note_status", other, - NOTE_KIND_VALID, + &["active", "archived", "deleted"], )), } } } -/// Lifecycle status of a note. Cross-cutting across all note kinds. -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))] -pub enum NoteStatus { - #[default] - Active, - Archived, -} - -impl NoteStatus { - pub const fn name(self) -> &'static str { - match self { - Self::Active => "active", - Self::Archived => "archived", - } - } -} - -impl fmt::Display for NoteStatus { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str(self.name()) - } -} - /// A note record — temporal-referential content plus free-form properties. #[derive(Clone, Debug)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Note { #[cfg_attr(feature = "serde", serde(flatten))] pub header: Header, - pub kind: NoteKind, + pub kind: String, pub status: NoteStatus, pub content: String, pub properties: BTreeMap, pub tags: Vec, - pub salience: f64, - pub decay_factor: f64, + pub salience: Option, + pub decay_factor: Option, pub expires_at: Option, pub deleted_at: Option, } @@ -133,78 +77,53 @@ mod tests { fn test_header() -> Header { Header::new( Id128::from_u128(1), - Namespace::default(), + Namespace::local(), Timestamp::from_secs(1700000000), ) } - #[test] - fn note_kind_all_have_names() { - for kind in NoteKind::ALL { - assert!(!kind.name().is_empty()); - } - } - - #[test] - fn note_kind_default_is_observation() { - assert_eq!(NoteKind::default(), NoteKind::Observation); - } - - #[test] - fn note_kind_display_roundtrip() { - use core::str::FromStr; - for kind in NoteKind::ALL { - let s = alloc::format!("{kind}"); - let parsed = NoteKind::from_str(&s).unwrap(); - assert_eq!(parsed, kind); - } - } - - #[test] - fn note_kind_from_str_case_insensitive() { - use core::str::FromStr; - assert_eq!( - NoteKind::from_str("OBSERVATION").unwrap(), - NoteKind::Observation - ); - assert_eq!(NoteKind::from_str("Insight").unwrap(), NoteKind::Insight); - } - - #[test] - fn note_kind_from_str_aliases() { - use core::str::FromStr; - assert_eq!(NoteKind::from_str("obs").unwrap(), NoteKind::Observation); - assert_eq!(NoteKind::from_str("finding").unwrap(), NoteKind::Insight); - assert_eq!(NoteKind::from_str("q").unwrap(), NoteKind::Question); - assert_eq!(NoteKind::from_str("choice").unwrap(), NoteKind::Decision); - assert_eq!(NoteKind::from_str("ref").unwrap(), NoteKind::Reference); - assert_eq!(NoteKind::from_str("citation").unwrap(), NoteKind::Reference); - } - - #[test] - fn note_kind_from_str_unknown_errors() { - use core::str::FromStr; - let err = NoteKind::from_str("garbage").unwrap_err(); - assert_eq!(err.domain, "note_kind"); - assert_eq!(err.value, "garbage"); - assert!(err.valid.contains(&"observation")); - } - #[test] fn note_construction() { let note = Note { header: test_header(), - kind: NoteKind::Decision, + kind: String::from("decision"), status: NoteStatus::Active, content: String::from("Use BGE-base for multilingual corpus"), properties: BTreeMap::new(), tags: alloc::vec!["retrieval".into()], - salience: 0.8, - decay_factor: 0.01, + salience: Some(0.8), + decay_factor: Some(0.01), expires_at: None, deleted_at: None, }; - assert_eq!(note.kind, NoteKind::Decision); + assert_eq!(note.kind, "decision"); assert_eq!(note.tags.len(), 1); } + + #[test] + fn note_construction_uses_pack_owned_kind_string() { + let note = Note { + header: test_header(), + kind: String::from("decision"), + status: NoteStatus::Active, + content: String::from("test"), + properties: BTreeMap::new(), + tags: alloc::vec![], + salience: None, + decay_factor: None, + expires_at: None, + deleted_at: None, + }; + assert_eq!(note.kind, "decision"); + } + + #[test] + fn note_status_deleted_roundtrip() { + use core::str::FromStr; + assert_eq!( + NoteStatus::from_str("deleted").unwrap(), + NoteStatus::Deleted + ); + assert_eq!(NoteStatus::Deleted.name(), "deleted"); + } } diff --git a/crates/khive-types/src/pack.rs b/crates/khive-types/src/pack.rs index 61f53d33..81169eac 100644 --- a/crates/khive-types/src/pack.rs +++ b/crates/khive-types/src/pack.rs @@ -10,13 +10,65 @@ use crate::edge::EdgeRelation; -/// Verb metadata for discovery and documentation. +/// Visibility tier for a handler (ADR-023). +/// +/// `Verb` entries appear on the MCP wire and are invokable by agents. +/// `Subhandler` entries are internal — callable by the operator via CLI +/// but not surfaced as top-level MCP verbs. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Visibility { + /// Externally invokable via MCP `request` tool. + Verb, + /// Internal — operator-only via `kkernel call `. + Subhandler, +} + +/// Illocutionary force classification for a verb handler (ADR-025). +/// +/// Follows Searle's five speech-act categories (1976). Every `Visibility::Verb` +/// handler in the MCP surface MUST carry a category. `Subhandler` entries may +/// use the category of their parent verb or `Assertive` as a sensible default. +/// +/// The category is a documentation / introspection tag. It is NOT used for +/// permission checking, transport routing, or return-shape selection (ADR-025 §4). +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum VerbCategory { + /// Speaker represents a state of affairs — retrieves and presents facts. + /// Examples: `get`, `list`, `search`, `recall`. + Assertive, + /// Speaker attempts to get the hearer to do something. + /// Examples: `assign`, `transition`. + Directive, + /// Speaker commits to a persistent change. + /// Examples: `create`, `remember`, `link`, `send`. + Commissive, + /// Speaker changes institutional status by fiat. + /// Examples: `update`, `delete`, `merge`, `complete`. + Declaration, + // `Expressive` is intentionally absent — no verb currently uses it (ADR-025 §Why expressive stays empty). +} + +/// Handler metadata for discovery and documentation (ADR-023, ADR-025). +/// +/// Replaces the previous `VerbDef`. Every entry carries a `visibility` tag +/// so the registry can separate the MCP-exposed surface from internal handlers, +/// and a `category` that classifies the illocutionary force of the verb +/// per the speech-act taxonomy in ADR-025. #[derive(Clone, Debug, PartialEq, Eq)] -pub struct VerbDef { +pub struct HandlerDef { pub name: &'static str, pub description: &'static str, + pub visibility: Visibility, + /// Illocutionary force classification (ADR-025). Use `Assertive` for + /// `Subhandler` entries that have no external callers. + pub category: VerbCategory, } +/// Backward-compatible type alias. Existing code that names `VerbDef` still +/// compiles; new code should use `HandlerDef` directly (ADR-023). +#[deprecated(since = "0.2.0", note = "Use HandlerDef instead (ADR-023)")] +pub type VerbDef = HandlerDef; + /// Match spec for one end of an [`EdgeEndpointRule`] (ADR-031). /// /// Identifies a substrate + kind pair that the rule applies to. Note that @@ -54,6 +106,62 @@ pub struct EdgeEndpointRule { pub target: EndpointKind, } +/// Lifecycle specification for a note kind (ADR-004 §NoteKindSpec). +/// +/// Declares which field holds the kind's domain state, the initial value, +/// terminal values, and allowed transitions. The runtime uses this to +/// validate lifecycle operations at the verb boundary without hard-coding +/// kind-specific logic in the shared CRUD path. +/// +/// Phase 1 (current): packs declare the spec; the runtime records it for +/// documentation and future enforcement. +/// Phase 2 (future ADR): the runtime uses `field` to route lifecycle writes +/// to a first-class column rather than `properties`. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct NoteLifecycleSpec { + /// The field name that holds the kind's lifecycle state. + /// + /// ADR-004 mandates `"kind_status"` for pack-owned lifecycle fields to + /// avoid the semantic collision with `Note.status` (NoteStatus). + pub field: &'static str, + /// The value assigned when a note of this kind is first created. + pub initial: &'static str, + /// Values from which no further transitions are possible. + pub terminal: &'static [&'static str], + /// Allowed `(from, to)` transitions. `"*"` as `from` matches any state. + pub transitions: &'static [(&'static str, &'static str)], +} + +/// Kind-level schema specification for a note kind (ADR-004 §NoteKindSpec). +/// +/// Each pack-registered note kind may declare a `NoteKindSpec` to describe +/// its lifecycle semantics. The runtime collects these at boot time via +/// [`Pack::NOTE_KIND_SPECS`] for documentation, introspection, and (in future +/// ADRs) enforcement. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct NoteKindSpec { + /// The note kind string this spec governs (e.g. `"task"`). + pub kind: &'static str, + /// Alternate names this kind accepts on the wire. + pub aliases: &'static [&'static str], + /// Lifecycle state machine for this kind. + pub lifecycle: NoteLifecycleSpec, +} + +/// DDL statements the pack needs applied to the auxiliary schema (ADR-019). +/// +/// Pack-auxiliary tables use idempotent `CREATE TABLE IF NOT EXISTS`; they are +/// not part of the core versioned migration chain. The runtime applies these +/// statements once at pack registration time (or startup) against the active +/// storage backend. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct PackSchemaPlan { + /// The pack this schema plan belongs to (used for error reporting). + pub pack: &'static str, + /// Idempotent SQL statements to apply. + pub statements: &'static [&'static str], +} + /// A composable module that contributes vocabulary, verbs, and edge endpoint /// rules to the khive runtime. /// @@ -76,9 +184,12 @@ pub trait Pack { /// Entity kinds this pack contributes to the runtime vocabulary. const ENTITY_KINDS: &'static [&'static str]; - /// Verbs this pack handles. The runtime routes verb calls to the pack - /// that declares them. - const VERBS: &'static [VerbDef]; + /// Handlers this pack registers (ADR-023). + /// + /// The runtime routes verb calls to the pack that declares them. + /// Only entries with `visibility: Visibility::Verb` are surfaced on the + /// MCP wire; `Visibility::Subhandler` entries are internal. + const HANDLERS: &'static [HandlerDef]; /// Additional edge endpoint rules this pack contributes (ADR-031). /// @@ -92,6 +203,36 @@ pub trait Pack { /// loaded pack set before any pack is registered. Defaults to empty /// so existing packs compile without changes. const REQUIRES: &'static [&'static str] = &[]; + + /// Lifecycle and schema specs for note kinds this pack owns (ADR-004). + /// + /// Packs that introduce note kinds with explicit lifecycle semantics + /// (e.g. GTD's `task` kind) declare the spec here. The runtime collects + /// these at boot time for introspection and future enforcement. Defaults + /// to empty so existing packs compile without changes. + const NOTE_KIND_SPECS: &'static [NoteKindSpec] = &[]; + + /// Pack-auxiliary schema plan (ADR-019). + /// + /// Packs that need their own auxiliary tables (e.g. GTD's + /// `gtd_lifecycle_audit`) declare idempotent DDL statements here. + /// The runtime applies them once at registration time. Defaults to + /// `None` so packs with no auxiliary schema cost nothing. + const SCHEMA_PLAN: Option = None; + + /// Validation rule IDs contributed by this pack (ADR-034). + /// + /// Rule IDs are namespaced by pack name: `/`. + /// The runtime merges rule IDs from all packs; the actual rule + /// implementations live in `khive-runtime::validation::ValidationRule` + /// (not in `khive-types`, which stays `no_std`). This const serves as + /// the declarative catalog of rule identifiers so the validation + /// infrastructure can enumerate what rules a pack claims without + /// loading the runtime. + /// + /// Defaults to empty — packs with no domain-specific validation rules + /// can leave this unset. + const VALIDATION_RULES: &'static [&'static str] = &[]; } #[cfg(test)] @@ -104,9 +245,11 @@ mod tests { const NAME: &'static str = "test"; const NOTE_KINDS: &'static [&'static str] = &["memo"]; const ENTITY_KINDS: &'static [&'static str] = &["widget"]; - const VERBS: &'static [VerbDef] = &[VerbDef { + const HANDLERS: &'static [HandlerDef] = &[HandlerDef { name: "do_thing", description: "does a thing", + visibility: Visibility::Verb, + category: VerbCategory::Commissive, }]; } @@ -115,7 +258,24 @@ mod tests { assert_eq!(TestPack::NAME, "test"); assert_eq!(TestPack::NOTE_KINDS, &["memo"]); assert_eq!(TestPack::ENTITY_KINDS, &["widget"]); - assert_eq!(TestPack::VERBS.len(), 1); - assert_eq!(TestPack::VERBS[0].name, "do_thing"); + assert_eq!(TestPack::HANDLERS.len(), 1); + assert_eq!(TestPack::HANDLERS[0].name, "do_thing"); + assert_eq!(TestPack::HANDLERS[0].visibility, Visibility::Verb); + assert_eq!(TestPack::HANDLERS[0].category, VerbCategory::Commissive); + } + + #[test] + fn verb_category_variants_exist() { + // Just ensuring the enum variants are accessible — no runtime assertion + // needed beyond confirming they exist at compile time. + let _ = VerbCategory::Assertive; + let _ = VerbCategory::Directive; + let _ = VerbCategory::Commissive; + let _ = VerbCategory::Declaration; + } + + #[test] + fn pack_validation_rules_default_empty() { + assert!(TestPack::VALIDATION_RULES.is_empty()); } } diff --git a/crates/khive-vcs-adapters/Cargo.toml b/crates/khive-vcs-adapters/Cargo.toml new file mode 100644 index 00000000..cfc9d080 --- /dev/null +++ b/crates/khive-vcs-adapters/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "khive-vcs-adapters" +version.workspace = true +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true +homepage.workspace = true +description = "KG import/export format adapters — CSV, JSON, and future format support (ADR-036)" + +[dependencies] +khive-types = { version = "0.2.1", path = "../khive-types" } +serde = { workspace = true } +serde_json = { workspace = true } +thiserror = { workspace = true } +uuid = { workspace = true } + +[dev-dependencies] diff --git a/crates/khive-vcs-adapters/src/adapter.rs b/crates/khive-vcs-adapters/src/adapter.rs new file mode 100644 index 00000000..e94f2941 --- /dev/null +++ b/crates/khive-vcs-adapters/src/adapter.rs @@ -0,0 +1,45 @@ +// Copyright 2026 khive contributors. Licensed under Apache-2.0. +// +//! The `FormatAdapter` trait (ADR-036 §Implementation §Crate structure). +//! +//! Adapters are stateful pure transforms: they hold streaming parser state and +//! produce [`EntityRecord`]/[`EdgeRecord`] streams. They have no dependency on +//! the database layer. + +use crate::error::AdapterError; +use crate::record::{EdgeRecord, EntityRecord}; + +/// A format adapter (ADR-036 §Implementation). +/// +/// Implementations parse a source format and yield entity and edge records +/// following the ADR-020 §2 field shapes. The adapter writes no database +/// state — its output is consumed by the standard `khive kg import` pipeline. +/// +/// Both iterators return `Result<_, AdapterError>`. A fatal error (e.g. a +/// missing required field) stops the iterator; non-fatal warnings accumulate +/// internally and are retrievable via [`FormatAdapter::warnings`]. +pub trait FormatAdapter { + /// Short name of the format handled by this adapter (e.g. `"csv"`, `"json"`). + fn name(&self) -> &str; + + /// Iterate over entity records in the source. + /// + /// The iterator returns `Ok(EntityRecord)` for each successfully parsed + /// entity and `Err(AdapterError)` for fatal structural failures. Non-fatal + /// issues (unknown optional fields, etc.) accumulate in [`warnings`]. + /// + /// [`warnings`]: FormatAdapter::warnings + fn entities(&mut self) -> impl Iterator>; + + /// Iterate over edge records in the source. + /// + /// Same error contract as [`entities`]. + /// + /// [`entities`]: FormatAdapter::entities + fn edges(&mut self) -> impl Iterator>; + + /// Non-fatal warnings accumulated during parsing (e.g. unknown columns, + /// missing optional fields). Empty until at least one of `entities()` or + /// `edges()` has been driven to exhaustion. + fn warnings(&self) -> &[String]; +} diff --git a/crates/khive-vcs-adapters/src/error.rs b/crates/khive-vcs-adapters/src/error.rs new file mode 100644 index 00000000..32dab958 --- /dev/null +++ b/crates/khive-vcs-adapters/src/error.rs @@ -0,0 +1,44 @@ +// Copyright 2026 khive contributors. Licensed under Apache-2.0. +// +//! Adapter error type (ADR-036). + +use thiserror::Error; + +/// An error produced by a format adapter. +/// +/// Per ADR-036 §6, fatal errors (missing required fields, structural failures) +/// are non-recoverable: the adapter aborts and the caller must handle the error +/// atomically. Non-fatal issues (unknown but non-required fields) are warnings +/// reported in the import summary. +#[derive(Debug, Error)] +pub enum AdapterError { + /// A required field is missing from a record. + #[error("record {index}: missing required field '{field}'")] + MissingField { index: usize, field: String }, + + /// A field has an unexpected type or value. + #[error("record {index}: invalid value for field '{field}': {reason}")] + InvalidField { + index: usize, + field: String, + reason: String, + }, + + /// The source file cannot be parsed (structural failure). + #[error("parse error: {0}")] + Parse(String), + + /// An entity kind is unknown under strict schema mode. + #[error("record {index}: unknown entity kind '{kind}'")] + UnknownKind { index: usize, kind: String }, + + /// An edge relation is not in the ADR-002 closed set. + /// + /// This is always an error regardless of `--schema-mode` (ADR-036 §4). + #[error("record {index}: unknown edge relation '{relation}'")] + UnknownRelation { index: usize, relation: String }, + + /// A deferred format was requested. + #[error("format '{format}' is not yet implemented (deferred to P1/P2)")] + NotYetImplemented { format: String }, +} diff --git a/crates/khive-vcs-adapters/src/lib.rs b/crates/khive-vcs-adapters/src/lib.rs new file mode 100644 index 00000000..cb88c5a8 --- /dev/null +++ b/crates/khive-vcs-adapters/src/lib.rs @@ -0,0 +1,31 @@ +// Copyright 2026 khive contributors. Licensed under Apache-2.0. +// +//! KG import/export format adapters (ADR-036). +//! +//! Adapters are pure transforms in the two-stage pipeline: +//! +//! ```text +//! source file +//! | adapter (pure transform — no DB access) +//! intermediate NDJSON (entities + edges, in-memory or temp file) +//! | khive kg import (validates + loads) +//! working.db +//! ``` +//! +//! P0 (shipped): [`FormatAdapter`] trait, [`EntityRecord`], [`EdgeRecord`], +//! and the [`AdapterError`] type. +//! +//! P1 (deferred): BibTeX, Turtle/N-Triples, JSON-LD adapters. +//! P2 (deferred): GraphML, GEXF, Markdown adapters. + +mod error; +pub use error::AdapterError; + +mod record; +pub use record::{EdgeRecord, EntityRecord}; + +mod adapter; +pub use adapter::FormatAdapter; + +/// Phase P0: format names accepted by the v0.5 adapter registry. +pub const PHASE0_FORMATS: &[&str] = &["csv", "tsv", "json", "ndjson"]; diff --git a/crates/khive-vcs-adapters/src/record.rs b/crates/khive-vcs-adapters/src/record.rs new file mode 100644 index 00000000..73ee2cc9 --- /dev/null +++ b/crates/khive-vcs-adapters/src/record.rs @@ -0,0 +1,40 @@ +// Copyright 2026 khive contributors. Licensed under Apache-2.0. +// +//! ADR-020 §2 record shapes for adapter output. + +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +/// Entity record shape (ADR-020 §2) produced by adapters. +/// +/// Adapters produce these; the standard `khive kg import` pipeline validates +/// and loads them into `working.db`. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct EntityRecord { + pub id: Uuid, + pub kind: String, + pub name: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub description: Option, + #[serde(default)] + pub properties: serde_json::Value, + #[serde(default)] + pub tags: Vec, +} + +/// Edge record shape (ADR-020 §2) produced by adapters. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct EdgeRecord { + pub edge_id: Uuid, + pub source: String, + pub target: String, + pub relation: String, + #[serde(default = "default_weight")] + pub weight: f64, + #[serde(default)] + pub properties: serde_json::Value, +} + +fn default_weight() -> f64 { + 0.7 +} diff --git a/crates/khive-vcs/Cargo.toml b/crates/khive-vcs/Cargo.toml index 52a93aeb..b85ded48 100644 --- a/crates/khive-vcs/Cargo.toml +++ b/crates/khive-vcs/Cargo.toml @@ -6,11 +6,12 @@ authors.workspace = true license.workspace = true repository.workspace = true homepage.workspace = true -description = "KG versioning — snapshots, branches, and remote sync (ADR-042)" +description = "KG versioning — git-native core types, canonical hash, and NDJSON-to-SQLite sync (ADR-010/ADR-020)" [dependencies] -khive-runtime = { version = "0.2.0", path = "../khive-runtime" } -khive-storage = { version = "0.2.0", path = "../khive-storage" } +khive-runtime = { version = "0.2.1", path = "../khive-runtime" } +khive-storage = { version = "0.2.1", path = "../khive-storage" } +khive-types = { version = "0.2.1", path = "../khive-types" } serde = { workspace = true } serde_json = { workspace = true } thiserror = { workspace = true } @@ -19,6 +20,8 @@ chrono = { workspace = true } sha2 = "0.10" hex = "0.4" tokio = { workspace = true } +anyhow = { workspace = true } [dev-dependencies] tokio = { workspace = true, features = ["rt", "macros"] } +tempfile = "3" diff --git a/crates/khive-vcs/src/error.rs b/crates/khive-vcs/src/error.rs index 2f76f687..19fcdc95 100644 --- a/crates/khive-vcs/src/error.rs +++ b/crates/khive-vcs/src/error.rs @@ -1,6 +1,12 @@ // Copyright 2026 khive contributors. Licensed under Apache-2.0. // //! Error types for the VCS layer. +//! +//! Remote-server and custom-push/pull error variants (`RemoteUnreachable`, +//! `AuthFailed`, `NonFastForward`, `MergeRequired`) were removed per ADR-010/ +//! ADR-020: git is the remote protocol; there is no custom `khive-sync` server. +//! `MergeNotImplemented` was removed because the custom merge engine is +//! superseded for v1 (ADR-020 §what-adr-010-retains-this-adr-replaces). use thiserror::Error; @@ -8,37 +14,6 @@ use crate::types::SnapshotId; #[derive(Debug, Error)] pub enum VcsError { - /// A snapshot with this ID already exists in the database. - /// This should only occur on SHA-256 hash collision (computationally infeasible) - /// or if `commit()` is called twice with identical namespace state. - #[error("snapshot already exists: {0}")] - SnapshotAlreadyExists(SnapshotId), - - /// The requested snapshot archive is not in the local database. - /// Callers must `pull` from a remote to fetch it. - #[error("snapshot not found: {0}")] - SnapshotNotFound(SnapshotId), - - /// No branch with this name in the namespace. - #[error("branch not found: {namespace}/{name}")] - BranchNotFound { namespace: String, name: String }, - - /// The remote branch HEAD is not an ancestor of the local HEAD. - /// Caller must `pull`, merge, commit, then push. - #[error("non-fast-forward: local={local_head}, remote={remote_head}")] - NonFastForward { - local_head: SnapshotId, - remote_head: SnapshotId, - }, - - /// The remote khive-sync server could not be reached. - #[error("remote unreachable: {url} — {cause}")] - RemoteUnreachable { url: String, cause: String }, - - /// The remote rejected the request due to authentication failure. - #[error("authentication failed for remote: {url}")] - AuthFailed { url: String }, - /// The archive stored at the remote has a different hash than expected. /// Indicates corruption or tampering. #[error("hash mismatch: expected {expected}, actual {actual}")] @@ -47,20 +22,11 @@ pub enum VcsError { actual: SnapshotId, }, - /// The remote has diverged from local history; a merge is required. - #[error("merge required: remote history has diverged from local")] - MergeRequired, - /// `checkout` was blocked because there are uncommitted changes. /// Pass `force: true` to discard them. #[error("uncommitted changes: {count} entities/edges modified since last commit")] UncommittedChanges { count: usize }, - /// `merge_branch` was called but no `MergeEngine` has been registered. - /// Ships as the default until `khive-merge` is linked. - #[error("merge not implemented: link khive-merge to enable three-way merge")] - MergeNotImplemented, - /// A `SnapshotId` string failed validation. #[error("invalid snapshot id: {0}")] InvalidSnapshotId(String), @@ -77,7 +43,7 @@ pub enum VcsError { #[error("json: {0}")] Json(#[from] serde_json::Error), - /// An I/O operation failed (file system, network). + /// An I/O operation failed (file system). #[error("io: {0}")] Io(#[from] std::io::Error), diff --git a/crates/khive-vcs/src/hash.rs b/crates/khive-vcs/src/hash.rs index 1dc1448f..cde58965 100644 --- a/crates/khive-vcs/src/hash.rs +++ b/crates/khive-vcs/src/hash.rs @@ -1,11 +1,11 @@ // Copyright 2026 khive contributors. Licensed under Apache-2.0. // -//! Canonical JSON serialization and SHA-256 snapshot hashing (ADR-042 §1). +//! Canonical JSON serialization and SHA-256 snapshot hashing. //! -//! The hash algorithm: +//! Algorithm (ADR-010 §canonical-hash-algorithm, ADR-042 retained): //! 1. Collect non-soft-deleted entities; sort by UUID string ascending. //! 2. Collect edges; sort by (source, target, relation) ascending. -//! 3. Serialize as `{"entities":[...],"edges":[...]}` with fixed field order and no whitespace. +//! 3. Serialize as `{"edges":[...],"entities":[...]}` with fixed field order and no whitespace. //! 4. SHA-256 the UTF-8 bytes; prefix with `"sha256:"`. use serde_json::{Map, Value}; @@ -162,6 +162,7 @@ mod tests { ExportedEntity { id, kind: "concept".into(), + entity_type: None, name: name.into(), description: None, properties: None, diff --git a/crates/khive-vcs/src/lib.rs b/crates/khive-vcs/src/lib.rs index f557c5e9..316f2926 100644 --- a/crates/khive-vcs/src/lib.rs +++ b/crates/khive-vcs/src/lib.rs @@ -1,20 +1,22 @@ // Copyright 2026 khive contributors. Licensed under Apache-2.0. // -//! KG versioning — content-addressed snapshot hashing and core types. +//! KG versioning — content-addressed snapshot hashing, git-native core types, +//! and the NDJSON-to-SQLite sync library boundary. //! -//! The full snapshot/branch/merge pipeline was superseded by ADR-048 -//! (git-native KG versioning via Deno CLI). This crate retains only the -//! foundational primitives still referenced by the wider workspace. +//! v1 versioning is git-native (ADR-010, ADR-020): KG state lives as sorted +//! NDJSON files in a git repository. The legacy snapshot/branch/merge pipeline +//! (`KgSnapshot`, `KgBranch`, `RemoteConfig`, custom push/pull) was superseded +//! by ADR-020. This crate retains: //! -//! # Crate layout -//! -//! - [`types`] — `KgSnapshot`, `KgBranch`, `SnapshotId`, `RemoteConfig` +//! - [`types`] — `SnapshotId`, `SnapshotCoverage`, `VcsState` //! - [`hash`] — canonical JSON serialization + SHA-256 snapshot hashing +//! - [`sync`] — NDJSON-to-SQLite rebuild library (ADR-010/ADR-020, F106) //! - [`error`] — `VcsError` type pub mod error; pub mod hash; +pub mod sync; pub mod types; pub use error::VcsError; -pub use types::{KgBranch, KgSnapshot, RemoteAuth, RemoteConfig, SnapshotId}; +pub use types::{SnapshotCoverage, SnapshotId, VcsState, KG_V1_COVERAGE}; diff --git a/crates/khive-vcs/src/sync.rs b/crates/khive-vcs/src/sync.rs new file mode 100644 index 00000000..91ee7c09 --- /dev/null +++ b/crates/khive-vcs/src/sync.rs @@ -0,0 +1,474 @@ +// Copyright 2026 khive contributors. Licensed under Apache-2.0. +// +//! NDJSON-to-SQLite sync library boundary (ADR-010/ADR-020, finding F106). +//! +//! Reads `/.khive/kg/entities.ndjson` and `/.khive/kg/edges.ndjson`, +//! parses each record per the ADR-020 §2 canonical schema, and writes them into +//! a fresh SQLite database using the runtime's upsert APIs. The resulting DB +//! has the full khive schema (entities + graph_edges + FTS5 indexes + vector +//! tables) — the same schema the MCP server uses. +//! +//! ## Atomicity +//! +//! Builds into `.tmp` then renames over ``. A crash mid-build +//! leaves the previous DB intact. +//! +//! ## Consumers +//! +//! `kkernel sync` is the primary consumer. It calls [`run_sync`] and prints the +//! resulting [`SyncReport`] as JSON. Other callers (e.g. git post-checkout hooks) +//! can use this library directly. + +use std::path::{Path, PathBuf}; + +use anyhow::{anyhow, Context, Result}; +use khive_runtime::{KhiveRuntime, RuntimeConfig}; +use khive_storage::types::{Edge, TextDocument}; +use khive_storage::{LinkId, SubstrateKind}; +use khive_types::EdgeRelation; +use serde::Deserialize; +use uuid::Uuid; + +/// Per-record entity shape in NDJSON sources (ADR-020 §2). +#[derive(Debug, Deserialize)] +struct NdjsonEntity { + id: Uuid, + kind: String, + name: String, + #[serde(default)] + description: Option, + #[serde(default)] + properties: Option, + #[serde(default)] + tags: Vec, + #[serde(default)] + created_at: Option, + #[serde(default)] + updated_at: Option, +} + +/// Per-record edge shape in NDJSON sources (ADR-020 §2). +#[derive(Debug, Deserialize)] +struct NdjsonEdge { + edge_id: Uuid, + source: Uuid, + target: Uuid, + relation: String, + #[serde(default = "default_weight")] + weight: f64, + // properties: accepted but not yet persisted to the storage-layer Edge + // struct. Parsed here so existing NDJSON files round-trip without warning. + #[serde(default)] + #[allow(dead_code)] + properties: Option, + #[serde(default)] + created_at: Option, + #[serde(default)] + #[allow(dead_code)] + updated_at: Option, +} + +fn default_weight() -> f64 { + 1.0 +} + +/// Parse an ISO-8601 timestamp string into microseconds since epoch. +/// Returns `now` if the string is `None` or unparseable. +fn parse_ts_micros(s: Option<&str>) -> i64 { + s.and_then(|t| chrono::DateTime::parse_from_rfc3339(t).ok()) + .map(|dt| dt.timestamp_micros()) + .unwrap_or_else(|| chrono::Utc::now().timestamp_micros()) +} + +/// Summary of a completed sync run. +#[derive(Debug, serde::Serialize)] +pub struct SyncReport { + pub entities: usize, + pub edges: usize, + pub db_path: String, +} + +/// Rebuild `db_path` from `.khive/kg/{entities,edges}.ndjson` under `repo_root`. +/// +/// The operation is atomic: the database is built in a `.tmp` sibling file and +/// renamed over `db_path` only on success. A crash or error leaves the previous +/// `db_path` intact. +/// +/// `namespace` is applied to all imported records. +/// +/// Returns a [`SyncReport`] on success, or an error if NDJSON parsing or SQLite +/// upserts fail. +pub async fn run_sync(repo_root: &Path, db_path: &Path, namespace: &str) -> Result { + let entities_path = repo_root.join(".khive/kg/entities.ndjson"); + let edges_path = repo_root.join(".khive/kg/edges.ndjson"); + + let entity_records = read_entities(&entities_path) + .with_context(|| format!("reading {}", entities_path.display()))?; + let edge_records = + read_edges(&edges_path).with_context(|| format!("reading {}", edges_path.display()))?; + + let tmp_path = with_extension_suffix(db_path, ".tmp"); + let _ = std::fs::remove_file(&tmp_path); + + // Build the runtime against the tmp file. Vector embedding is disabled + // because sync runs without an embedding model loaded — vectors are + // computed lazily on access via the MCP server if needed. + let ns = khive_types::Namespace::parse(namespace) + .map_err(|e| anyhow!("invalid namespace {namespace:?}: {e}"))?; + let config = RuntimeConfig { + db_path: Some(tmp_path.clone()), + default_namespace: ns, + embedding_model: None, + ..RuntimeConfig::default() + }; + let runtime = KhiveRuntime::new(config) + .with_context(|| format!("building runtime for {}", tmp_path.display()))?; + + let entity_count = upsert_entities(&runtime, namespace, entity_records).await?; + let edge_count = upsert_edges(&runtime, namespace, edge_records).await?; + + // Checkpoint the WAL so all committed writes land in the main DB file. + // Without this, `rename(tmp, target)` moves only the main file and leaves + // the -wal alongside it; opening `target` later would see only the data + // through the last auto-checkpoint (every 4000 pages). For small graphs no + // auto-checkpoint fires, so the data would silently disappear. + checkpoint_wal(&runtime) + .await + .context("checkpoint WAL before rename")?; + + // Drop the runtime so SQLite releases its file handles before rename. + drop(runtime); + + if let Some(parent) = db_path.parent() { + std::fs::create_dir_all(parent) + .with_context(|| format!("creating {}", parent.display()))?; + } + std::fs::rename(&tmp_path, db_path) + .with_context(|| format!("renaming {} -> {}", tmp_path.display(), db_path.display()))?; + + Ok(SyncReport { + entities: entity_count, + edges: edge_count, + db_path: db_path.to_string_lossy().into_owned(), + }) +} + +fn with_extension_suffix(p: &Path, suffix: &str) -> PathBuf { + let mut s = p.as_os_str().to_owned(); + s.push(suffix); + PathBuf::from(s) +} + +fn read_entities(path: &Path) -> Result> { + if !path.exists() { + return Ok(Vec::new()); + } + let text = std::fs::read_to_string(path)?; + let mut out = Vec::new(); + for (i, line) in text.lines().enumerate() { + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + let e: NdjsonEntity = serde_json::from_str(trimmed) + .with_context(|| format!("parsing entity at line {}", i + 1))?; + out.push(e); + } + Ok(out) +} + +fn read_edges(path: &Path) -> Result> { + if !path.exists() { + return Ok(Vec::new()); + } + let text = std::fs::read_to_string(path)?; + let mut out = Vec::new(); + for (i, line) in text.lines().enumerate() { + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + let e: NdjsonEdge = serde_json::from_str(trimmed) + .with_context(|| format!("parsing edge at line {}", i + 1))?; + out.push(e); + } + Ok(out) +} + +async fn checkpoint_wal(runtime: &KhiveRuntime) -> Result<()> { + let mut writer = runtime.backend().sql().writer().await?; + writer + .execute_script("PRAGMA wal_checkpoint(TRUNCATE);".to_string()) + .await?; + Ok(()) +} + +async fn upsert_entities( + runtime: &KhiveRuntime, + namespace: &str, + records: Vec, +) -> Result { + let ns = khive_types::Namespace::parse(namespace) + .map_err(|e| anyhow!("invalid namespace {namespace:?}: {e}"))?; + let token = runtime.authorize(ns); + let store = runtime.entities(&token).context("opening entity store")?; + let text = runtime.text(&token).context("opening text store")?; + let mut count = 0; + for r in records { + let created_at = parse_ts_micros(r.created_at.as_deref()); + let updated_at = parse_ts_micros(r.updated_at.as_deref()); + // Build the FTS body from name + description (same as create_entity in operations.rs). + let body = match &r.description { + Some(d) if !d.is_empty() => format!("{} {}", r.name, d), + _ => r.name.clone(), + }; + let entity = khive_storage::entity::Entity { + id: r.id, + namespace: namespace.to_string(), + kind: r.kind.clone(), + entity_type: None, + name: r.name.clone(), + description: r.description.clone(), + properties: r.properties.clone(), + tags: r.tags.clone(), + created_at, + updated_at, + deleted_at: None, + merge_event_id: None, + merged_into: None, + }; + store + .upsert_entity(entity) + .await + .with_context(|| format!("upsert entity {}", r.id))?; + // Populate FTS5 index so text search works after sync. + // Vectors are intentionally skipped: they are local-only derived state + // (ADR-035 §6) and will be computed by `kkernel kg embed` when needed. + text.upsert_document(TextDocument { + subject_id: r.id, + kind: SubstrateKind::Entity, + title: Some(r.name.clone()), + body, + tags: r.tags.clone(), + namespace: namespace.to_string(), + metadata: r.properties.clone(), + updated_at: chrono::DateTime::from_timestamp_micros(updated_at) + .unwrap_or_else(chrono::Utc::now), + }) + .await + .with_context(|| format!("fts index entity {}", r.id))?; + count += 1; + } + Ok(count) +} + +async fn upsert_edges( + runtime: &KhiveRuntime, + namespace: &str, + records: Vec, +) -> Result { + let ns = khive_types::Namespace::parse(namespace) + .map_err(|e| anyhow!("invalid namespace {namespace:?}: {e}"))?; + let token = runtime.authorize(ns); + let graph = runtime.graph(&token).context("opening graph store")?; + let mut count = 0; + for r in records { + let relation: EdgeRelation = r + .relation + .parse() + .map_err(|e| anyhow!("invalid relation {:?}: {}", r.relation, e))?; + let created_at = + chrono::DateTime::from_timestamp_micros(parse_ts_micros(r.created_at.as_deref())) + .unwrap_or_else(chrono::Utc::now); + let edge = Edge { + id: LinkId::from(r.edge_id), + namespace: namespace.to_string(), + source_id: r.source, + target_id: r.target, + relation, + weight: r.weight, + created_at, + updated_at: created_at, + deleted_at: None, + metadata: None, + target_backend: None, + }; + graph + .upsert_edge(edge) + .await + .with_context(|| format!("upsert edge {}", r.edge_id))?; + count += 1; + } + Ok(count) +} + +// ── Tests ───────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + fn write_repo(dir: &Path, entities_ndjson: &str, edges_ndjson: &str) { + let kg_dir = dir.join(".khive/kg"); + std::fs::create_dir_all(&kg_dir).unwrap(); + std::fs::write(kg_dir.join("entities.ndjson"), entities_ndjson).unwrap(); + std::fs::write(kg_dir.join("edges.ndjson"), edges_ndjson).unwrap(); + } + + #[tokio::test] + async fn sync_empty_ndjson_produces_real_sqlite_file() { + let tmp = TempDir::new().unwrap(); + let repo = tmp.path(); + let db_path = repo.join(".khive/state/working.db"); + write_repo(repo, "", ""); + + let report = run_sync(repo, &db_path, "test-ns").await.unwrap(); + assert_eq!(report.entities, 0); + assert_eq!(report.edges, 0); + + let bytes = std::fs::read(&db_path).unwrap(); + assert!(!bytes.is_empty(), "DB file must be non-empty after sync"); + assert!( + bytes.starts_with(b"SQLite format 3\0"), + "DB file must start with SQLite magic header, got {:?}", + &bytes[..bytes.len().min(20)] + ); + } + + #[tokio::test] + async fn sync_imports_entities_and_edges_into_real_db() { + let tmp = TempDir::new().unwrap(); + let repo = tmp.path(); + let db_path = repo.join(".khive/state/working.db"); + + let id_a = "11111111-1111-1111-1111-111111111111"; + let id_b = "22222222-2222-2222-2222-222222222222"; + let edge_id = "33333333-3333-3333-3333-333333333333"; + + let line_a = format!( + r#"{{"id":"{id_a}","kind":"concept","name":"Alpha","properties":{{}},"tags":[]}}"# + ); + let line_b = format!( + r#"{{"id":"{id_b}","kind":"concept","name":"Beta","properties":{{}},"tags":[]}}"# + ); + let entities = format!("{line_a}\n{line_b}\n"); + let edges = format!( + r#"{{"edge_id":"{edge_id}","source":"{id_a}","target":"{id_b}","relation":"extends","weight":1.0,"properties":{{}}}}"# + ); + write_repo(repo, &entities, &edges); + + let report = run_sync(repo, &db_path, "test-ns").await.unwrap(); + assert_eq!(report.entities, 2); + assert_eq!(report.edges, 1); + + let ns = khive_types::Namespace::parse("test-ns").unwrap(); + let config = RuntimeConfig { + db_path: Some(db_path.clone()), + default_namespace: ns.clone(), + embedding_model: None, + ..RuntimeConfig::default() + }; + let rt = KhiveRuntime::new(config).unwrap(); + let token = rt.authorize(ns); + let alpha = rt + .entities(&token) + .unwrap() + .get_entity(id_a.parse().unwrap()) + .await + .unwrap() + .expect("entity Alpha must be retrievable after sync"); + assert_eq!(alpha.name, "Alpha"); + assert_eq!(alpha.kind, "concept"); + } + + #[tokio::test] + async fn sync_is_atomic_via_tmp_rename() { + let tmp = TempDir::new().unwrap(); + let repo = tmp.path(); + let db_path = repo.join(".khive/state/working.db"); + std::fs::create_dir_all(db_path.parent().unwrap()).unwrap(); + std::fs::write(&db_path, b"SENTINEL").unwrap(); + + write_repo(repo, "not json\n", ""); + let err = run_sync(repo, &db_path, "test-ns").await.unwrap_err(); + assert!( + err.to_string().to_lowercase().contains("parsing entity") + || err.chain().any(|e| e.to_string().contains("expected")), + "expected parse error, got: {err}" + ); + + let after = std::fs::read(&db_path).unwrap(); + assert_eq!( + after, b"SENTINEL", + "atomic guarantee: failed sync must not replace existing DB" + ); + } + + #[tokio::test] + async fn sync_missing_ndjson_files_succeeds_with_zero_counts() { + let tmp = TempDir::new().unwrap(); + let repo = tmp.path(); + let db_path = repo.join(".khive/state/working.db"); + + let report = run_sync(repo, &db_path, "test-ns").await.unwrap(); + assert_eq!(report.entities, 0); + assert_eq!(report.edges, 0); + } + + /// F195: verify that FTS5 is populated during sync so text search works + /// after sync without a separate `kkernel kg embed` pass (ADR-035 §5). + #[tokio::test] + async fn sync_populates_fts_for_text_search() { + use khive_runtime::RuntimeConfig; + use khive_storage::types::{TextFilter, TextQueryMode, TextSearchRequest}; + + let tmp = TempDir::new().unwrap(); + let repo = tmp.path(); + let db_path = repo.join(".khive/state/working.db"); + + let id_a = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"; + let line_a = format!( + r#"{{"id":"{id_a}","kind":"concept","name":"FlashAttention","description":"Fast attention algorithm","properties":{{}},"tags":[]}}"# + ); + write_repo(repo, &line_a, ""); + + run_sync(repo, &db_path, "test-ns").await.unwrap(); + + let ns = khive_types::Namespace::parse("test-ns").unwrap(); + let config = RuntimeConfig { + db_path: Some(db_path.clone()), + default_namespace: ns.clone(), + embedding_model: None, + ..RuntimeConfig::default() + }; + let rt = KhiveRuntime::new(config).unwrap(); + let token = rt.authorize(ns); + + let hits = rt + .text(&token) + .expect("text store must be available") + .search(TextSearchRequest { + query: "FlashAttention".to_string(), + filter: Some(TextFilter { + namespaces: vec!["test-ns".to_string()], + ..Default::default() + }), + mode: TextQueryMode::Phrase, + top_k: 10, + snippet_chars: 128, + }) + .await + .expect("text search must succeed after sync"); + + assert!( + !hits.is_empty(), + "FTS search for 'FlashAttention' must return results after sync (F195)" + ); + assert_eq!( + hits[0].subject_id.to_string(), + id_a, + "FTS hit must reference the synced entity UUID" + ); + } +} diff --git a/crates/khive-vcs/src/types.rs b/crates/khive-vcs/src/types.rs index 63356fb4..675ff21b 100644 --- a/crates/khive-vcs/src/types.rs +++ b/crates/khive-vcs/src/types.rs @@ -1,6 +1,10 @@ // Copyright 2026 khive contributors. Licensed under Apache-2.0. // -//! Core versioning types: `SnapshotId`, `KgSnapshot`, `KgBranch`, `RemoteConfig`. +//! Core versioning types: `SnapshotId`, `VcsState`. +//! +//! Legacy types (`KgSnapshot`, `KgBranch`, `RemoteConfig`) and the `VcsState.dirty` +//! flag were removed in the ADR-010/ADR-020 alignment pass. KG branches are now +//! git branches; there is no custom remote protocol (ADR-010, ADR-020). use serde::{Deserialize, Serialize}; @@ -56,109 +60,34 @@ impl std::fmt::Display for SnapshotId { } } -// ── KgSnapshot ──────────────────────────────────────────────────────────────── +// ── SnapshotCoverage ────────────────────────────────────────────────────────── -/// Immutable point-in-time capture of a namespace's entity and edge set. +/// Records which record classes are covered by a KG snapshot. /// -/// `id` is the SHA-256 hash of the deterministically serialized archive. -/// The archive itself is stored separately in `kg_snapshot_archives`. -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct KgSnapshot { - /// Content hash — also the primary key in `kg_snapshots`. - pub id: SnapshotId, - /// Namespace this snapshot belongs to. - pub namespace: String, - /// Previous snapshot in this branch's history. `None` for the genesis commit. - pub parent_id: Option, - /// Human-readable description of the changes since the previous snapshot. - pub message: String, - /// Agent or user identifier for attribution. Optional. - pub author: Option, - /// Unix microseconds (i64) — compatible with the existing substrate timestamp convention. - pub created_at: i64, - /// Number of entities in this snapshot. - pub entity_count: u64, - /// Number of edges in this snapshot. - pub edge_count: u64, -} - -// ── KgBranch ───────────────────────────────────────────────────────────────── - -/// Named mutable pointer to a snapshot within a namespace. -/// -/// Composite primary key: `(namespace, name)`. -/// The default branch is `"main"`. -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct KgBranch { - /// Namespace this branch lives in. - pub namespace: String, - /// Branch name — alphanumeric, hyphens, underscores. - pub name: String, - /// The snapshot this branch currently points to. - pub head_id: SnapshotId, - /// Unix microseconds when the branch was first created. - pub created_at: i64, - /// Unix microseconds of the last HEAD update. - pub updated_at: i64, +/// v1 covers entities and edges only. Notes are excluded until note packs +/// define versioned export, import, privacy/redaction, and merge semantics +/// (ADR-010 §snapshot-coverage). +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub struct SnapshotCoverage { + pub entities: bool, + pub edges: bool, + pub notes: bool, } -// ── RemoteConfig ────────────────────────────────────────────────────────────── - -/// Connection parameters for a remote khive instance (for push/pull). -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct RemoteConfig { - /// Short name used in CLI commands (e.g. `"origin"`). - pub name: String, - /// Base URL of the remote khive-sync server (e.g. `"https://khive.example.com"`). - pub url: String, - /// Authentication credentials for the remote. - pub auth: RemoteAuth, - /// Optional namespace mapping: `(local_namespace, remote_namespace)`. - /// When absent, the local namespace name is used on the remote. - pub namespace_map: Option<(String, String)>, -} - -impl RemoteConfig { - /// Returns the remote namespace name for a given local namespace. - pub fn remote_namespace<'a>(&'a self, local: &'a str) -> &'a str { - match &self.namespace_map { - Some((from, to)) if from == local => to.as_str(), - _ => local, - } - } -} - -/// Authentication credentials for a remote khive instance. -#[derive(Clone, Serialize, Deserialize)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum RemoteAuth { - /// No authentication (anonymous access). - None, - /// Bearer token (API key). - Bearer { token: String }, - /// HTTP basic authentication. - Basic { user: String, password: String }, -} - -impl std::fmt::Debug for RemoteAuth { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::None => write!(f, "RemoteAuth::None"), - Self::Bearer { .. } => write!(f, "RemoteAuth::Bearer {{ token: \"[REDACTED]\" }}"), - Self::Basic { user, .. } => { - write!( - f, - "RemoteAuth::Basic {{ user: {:?}, password: \"[REDACTED]\" }}", - user - ) - } - } - } -} +/// v1 coverage constant: entities + edges, notes excluded. +pub const KG_V1_COVERAGE: SnapshotCoverage = SnapshotCoverage { + entities: true, + edges: true, + notes: false, +}; // ── VcsState ───────────────────────────────────────────────────────────────── -/// Per-namespace VCS state stored in `kg_vcs_state`. +/// Per-namespace VCS state. +/// +/// The `dirty` flag was removed per ADR-020 §7: "There is no dirty flag. The +/// diff is computed fresh on every invocation." Use `khive kg status` (DB vs +/// NDJSON diff) to determine uncommitted changes. #[derive(Clone, Debug, Serialize, Deserialize)] pub struct VcsState { pub namespace: String, @@ -166,8 +95,6 @@ pub struct VcsState { pub current_branch: Option, /// Last committed snapshot ID. `None` if no commit has been made. pub last_committed_id: Option, - /// Whether uncommitted changes exist since the last commit. - pub dirty: bool, } // ── Tests ───────────────────────────────────────────────────────────────────── @@ -211,18 +138,6 @@ mod tests { assert!(matches!(err, VcsError::InvalidSnapshotId(_))); } - #[test] - fn remote_config_namespace_map() { - let cfg = RemoteConfig { - name: "origin".into(), - url: "https://example.com".into(), - auth: RemoteAuth::None, - namespace_map: Some(("local".into(), "shared".into())), - }; - assert_eq!(cfg.remote_namespace("local"), "shared"); - assert_eq!(cfg.remote_namespace("other"), "other"); - } - #[test] fn snapshot_id_from_hash_accepts_uppercase_and_normalizes() { let upper = "A".repeat(64); @@ -255,111 +170,32 @@ mod tests { assert_eq!(back, id); } - #[test] - fn kg_snapshot_serde_roundtrip() { - let hex = "e".repeat(64); - let snap = KgSnapshot { - id: SnapshotId::from_hash(&hex).unwrap(), - namespace: "test-ns".into(), - parent_id: None, - message: "initial commit".into(), - author: Some("ocean".into()), - created_at: 1_700_000_000_000_000, - entity_count: 42, - edge_count: 7, - }; - let json = serde_json::to_string(&snap).unwrap(); - let back: KgSnapshot = serde_json::from_str(&json).unwrap(); - assert_eq!(back.id, snap.id); - assert_eq!(back.namespace, snap.namespace); - assert_eq!(back.parent_id, snap.parent_id); - assert_eq!(back.entity_count, 42); - assert_eq!(back.edge_count, 7); - assert_eq!(back.author, Some("ocean".into())); - } - - #[test] - fn kg_branch_serde_roundtrip() { - let branch = KgBranch { - namespace: "test-ns".into(), - name: "main".into(), - head_id: SnapshotId::from_hash(&"f".repeat(64)).unwrap(), - created_at: 1_000_000, - updated_at: 2_000_000, - }; - let json = serde_json::to_string(&branch).unwrap(); - let back: KgBranch = serde_json::from_str(&json).unwrap(); - assert_eq!(back.namespace, branch.namespace); - assert_eq!(back.name, branch.name); - assert_eq!(back.head_id, branch.head_id); - assert_eq!(back.created_at, 1_000_000); - assert_eq!(back.updated_at, 2_000_000); - } - - #[test] - fn remote_auth_bearer_serde_round_trip_and_tag() { - let auth = RemoteAuth::Bearer { - token: "tok123".into(), - }; - let json = serde_json::to_string(&auth).unwrap(); - assert!(json.contains("\"type\":\"bearer\"")); - let back: RemoteAuth = serde_json::from_str(&json).unwrap(); - assert!(matches!(back, RemoteAuth::Bearer { ref token } if token == "tok123")); - } - - #[test] - fn remote_auth_debug_redacts_bearer_token() { - let auth = RemoteAuth::Bearer { - token: "super-secret".into(), - }; - let debug = format!("{:?}", auth); - assert!( - debug.contains("[REDACTED]"), - "expected [REDACTED] in: {debug}" - ); - assert!(!debug.contains("super-secret"), "secret leaked in: {debug}"); - } - - #[test] - fn remote_auth_debug_redacts_basic_password() { - let auth = RemoteAuth::Basic { - user: "alice".into(), - password: "hunter2".into(), - }; - let debug = format!("{:?}", auth); - assert!(debug.contains("alice")); - assert!( - debug.contains("[REDACTED]"), - "expected [REDACTED] in: {debug}" - ); - assert!(!debug.contains("hunter2"), "password leaked in: {debug}"); - } - - #[test] - fn remote_config_none_namespace_map_returns_local_name() { - let cfg = RemoteConfig { - name: "origin".into(), - url: "https://example.com".into(), - auth: RemoteAuth::None, - namespace_map: None, - }; - assert_eq!(cfg.remote_namespace("my-ns"), "my-ns"); - assert_eq!(cfg.remote_namespace("other-ns"), "other-ns"); - } - #[test] fn vcs_state_serde_roundtrip() { let state = VcsState { namespace: "proj".into(), current_branch: Some("main".into()), last_committed_id: Some(SnapshotId::from_hash(&"0".repeat(64)).unwrap()), - dirty: true, }; let json = serde_json::to_string(&state).unwrap(); let back: VcsState = serde_json::from_str(&json).unwrap(); assert_eq!(back.namespace, state.namespace); assert_eq!(back.current_branch, Some("main".into())); - assert!(back.dirty); assert_eq!(back.last_committed_id, state.last_committed_id); } + + #[test] + fn snapshot_coverage_v1_entities_and_edges_only() { + const { assert!(KG_V1_COVERAGE.entities) }; + const { assert!(KG_V1_COVERAGE.edges) }; + const { assert!(!KG_V1_COVERAGE.notes) }; + } + + #[test] + fn snapshot_coverage_serde_roundtrip() { + let cov = KG_V1_COVERAGE.clone(); + let json = serde_json::to_string(&cov).unwrap(); + let back: SnapshotCoverage = serde_json::from_str(&json).unwrap(); + assert_eq!(back, cov); + } } diff --git a/crates/khive-vcs/tests/integration.rs b/crates/khive-vcs/tests/integration.rs index 1a121b0e..d3717ea2 100644 --- a/crates/khive-vcs/tests/integration.rs +++ b/crates/khive-vcs/tests/integration.rs @@ -1,21 +1,20 @@ -//! Integration tests for `khive-vcs` (issue #88). -//! -//! The original #88 issue requested integration tests for the snapshot, -//! branch, log, and merge subsystems. Those subsystems were superseded by -//! ADR-048 (git-native KG versioning via the Deno CLI). What remains in -//! this crate is the foundational VCS surface: content-addressed snapshot -//! identifiers and canonical archive hashing. +//! Integration tests for `khive-vcs`. //! //! These tests exercise the public API end-to-end ACROSS modules — proving //! the surface composes correctly, not just that individual files compile. //! Unit tests inside `src/{hash,types}.rs` test each module in isolation; //! this file tests the composition. +//! +//! Legacy types (`KgSnapshot`, `KgBranch`, `RemoteConfig`) and the `VcsState.dirty` +//! flag were removed in the ADR-010/ADR-020 alignment pass. Tests that relied on +//! those types have been replaced with tests for `SnapshotCoverage` and the +//! git-native `VcsState`. use chrono::Utc; use khive_runtime::portability::{ExportedEdge, ExportedEntity, KgArchive}; use khive_storage::EdgeRelation; use khive_vcs::hash::{canonical_json, snapshot_id_for_archive}; -use khive_vcs::types::{KgBranch, KgSnapshot, RemoteAuth, RemoteConfig, SnapshotId, VcsState}; +use khive_vcs::types::{SnapshotCoverage, SnapshotId, VcsState, KG_V1_COVERAGE}; use uuid::Uuid; fn make_archive(namespace: &str) -> KgArchive { @@ -34,6 +33,7 @@ fn make_entity(id: Uuid, name: &str) -> ExportedEntity { ExportedEntity { id, kind: "concept".into(), + entity_type: None, name: name.into(), description: None, properties: None, @@ -44,9 +44,9 @@ fn make_entity(id: Uuid, name: &str) -> ExportedEntity { } #[test] -fn snapshot_id_roundtrips_through_archive_hash_into_kgsnapshot() { - // The full chain: build archive -> compute SnapshotId -> wrap in KgSnapshot - // -> serialize via serde -> deserialize -> verify id is recoverable. +fn snapshot_id_roundtrips_through_archive_hash() { + // The full chain: build archive -> compute SnapshotId -> serialize via + // serde -> deserialize -> verify id is recoverable. let mut archive = make_archive("test-ns"); archive .entities @@ -59,22 +59,9 @@ fn snapshot_id_roundtrips_through_archive_hash_into_kgsnapshot() { ); assert_eq!(id.hex().len(), 64, "hex digest is 64 chars"); - let snapshot = KgSnapshot { - id: id.clone(), - namespace: "test-ns".into(), - parent_id: None, - message: "genesis".into(), - author: Some("test".into()), - created_at: 0, - entity_count: archive.entities.len() as u64, - edge_count: archive.edges.len() as u64, - }; - - let json = serde_json::to_string(&snapshot).expect("serialize"); - let back: KgSnapshot = serde_json::from_str(&json).expect("deserialize"); - - assert_eq!(back.id, id, "id round-trips through serde"); - assert_eq!(back.entity_count, 1); + let json = serde_json::to_string(&id).expect("serialize"); + let back: SnapshotId = serde_json::from_str(&json).expect("deserialize"); + assert_eq!(back, id, "id round-trips through serde"); } #[test] @@ -154,68 +141,31 @@ fn snapshot_id_from_prefixed_roundtrip() { } #[test] -fn kg_branch_holds_snapshot_id_serde_roundtrip() { - let archive = make_archive("ns"); - let head_id = snapshot_id_for_archive(&archive).unwrap(); - let branch = KgBranch { - namespace: "ns".into(), - name: "main".into(), - head_id: head_id.clone(), - created_at: 0, - updated_at: 0, - }; - let json = serde_json::to_string(&branch).unwrap(); - let back: KgBranch = serde_json::from_str(&json).unwrap(); - assert_eq!(back.head_id, head_id); - assert_eq!(back.name, "main"); -} - -#[test] -fn remote_config_redacts_bearer_token_in_debug() { - let cfg = RemoteConfig { - name: "origin".into(), - url: "https://khive.example.com".into(), - auth: RemoteAuth::Bearer { - token: "super_secret_token".into(), - }, - namespace_map: None, - }; - let dbg = format!("{:?}", cfg.auth); - assert!( - dbg.contains("REDACTED"), - "Bearer debug must REDACT the token; got: {dbg}" - ); - assert!( - !dbg.contains("super_secret_token"), - "secret must not leak through Debug; got: {dbg}" - ); -} - -#[test] -fn remote_config_namespace_mapping_works() { - let cfg = RemoteConfig { - name: "origin".into(), - url: "https://khive.example.com".into(), - auth: RemoteAuth::None, - namespace_map: Some(("local_ns".into(), "remote_ns".into())), - }; - assert_eq!(cfg.remote_namespace("local_ns"), "remote_ns"); - assert_eq!(cfg.remote_namespace("other"), "other"); -} - -#[test] -fn vcs_state_can_be_serialized_and_carries_snapshot_id() { +fn vcs_state_serde_roundtrip_without_dirty_flag() { let archive = make_archive("ns"); let id = snapshot_id_for_archive(&archive).unwrap(); let state = VcsState { namespace: "ns".into(), current_branch: Some("main".into()), last_committed_id: Some(id.clone()), - dirty: false, }; let json = serde_json::to_string(&state).unwrap(); let back: VcsState = serde_json::from_str(&json).unwrap(); assert_eq!(back.last_committed_id, Some(id)); assert_eq!(back.current_branch.as_deref(), Some("main")); - assert!(!back.dirty); +} + +#[test] +fn snapshot_coverage_v1_covers_entities_and_edges_not_notes() { + const { assert!(KG_V1_COVERAGE.entities) }; + const { assert!(KG_V1_COVERAGE.edges) }; + const { assert!(!KG_V1_COVERAGE.notes) }; +} + +#[test] +fn snapshot_coverage_serde_roundtrip() { + let cov = KG_V1_COVERAGE.clone(); + let json = serde_json::to_string(&cov).unwrap(); + let back: SnapshotCoverage = serde_json::from_str(&json).unwrap(); + assert_eq!(back, cov); } diff --git a/crates/kkernel/Cargo.toml b/crates/kkernel/Cargo.toml index 4c4d93aa..e7bd410a 100644 --- a/crates/kkernel/Cargo.toml +++ b/crates/kkernel/Cargo.toml @@ -11,13 +11,16 @@ categories.workspace = true description = "khive kernel — admin/management Rust binary (sync, pack introspection, db ops)" [dependencies] -khive-runtime = { version = "0.2.0", path = "../khive-runtime" } -khive-storage = { version = "0.2.0", path = "../khive-storage" } -khive-types = { version = "0.2.0", path = "../khive-types" } -khive-pack-kg = { version = "0.2.0", path = "../khive-pack-kg" } -khive-pack-gtd = { version = "0.2.0", path = "../khive-pack-gtd" } -khive-pack-memory = { version = "0.2.0", path = "../khive-pack-memory" } -khive-pack-brain = { version = "0.2.0", path = "../khive-pack-brain" } +khive-runtime = { version = "0.2.1", path = "../khive-runtime" } +khive-storage = { version = "0.2.1", path = "../khive-storage" } +khive-types = { version = "0.2.1", path = "../khive-types" } +khive-vcs = { version = "0.2.1", path = "../khive-vcs" } +khive-pack-kg = { version = "0.2.1", path = "../khive-pack-kg" } +khive-pack-gtd = { version = "0.2.1", path = "../khive-pack-gtd" } +khive-pack-memory = { version = "0.2.1", path = "../khive-pack-memory" } +khive-pack-brain = { version = "0.2.1", path = "../khive-pack-brain" } +khive-pack-comm = { version = "0.2.1", path = "../khive-pack-comm" } +khive-pack-schedule = { version = "0.2.1", path = "../khive-pack-schedule" } tokio = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } diff --git a/crates/kkernel/src/coordinator/mod.rs b/crates/kkernel/src/coordinator/mod.rs new file mode 100644 index 00000000..eb61e2f9 --- /dev/null +++ b/crates/kkernel/src/coordinator/mod.rs @@ -0,0 +1,242 @@ +//! SubstrateCoordinator — cross-backend dispatch layer (ADR-003, ADR-029). +//! +//! The coordinator lives inside `kkernel` as kernel-internal plumbing. Pack crates +//! do not depend on it (ADR-003 §anti-pattern-9). It owns: +//! +//! - Node-to-backend location cache (D2 — `Arc>`) +//! - Cross-backend `link()` mechanics (D3) +//! - Substrate-kind search fan-out with unweighted RRF (D4) +//! - Cross-backend traversal and curation semantics (D5) +//! - Partition tolerance / backend health map (D6) +//! +//! # Single-backend behaviour +//! +//! When only one backend is registered, every D1–D6 mechanism degenerates to its +//! trivial identity: no fan-out, no cross-backend routing, no health map misses. +//! Multi-backend complexity is opt-in via `khive.toml` (ADR-028). +//! +//! # Module structure (ADR-029 §coordinator-module-tree) +//! +//! ```text +//! kkernel::coordinator +//! mod.rs — SubstrateCoordinator + BackendRegistry (this file) +//! ``` +//! +//! Future sub-modules (`edges`, `locator`, `search`, `traversal`, `curation`, +//! `health`) are reserved per ADR-029 but are not yet implemented; they will +//! land when the corresponding features are built out. + +use std::collections::HashMap; +use std::sync::Arc; + +use khive_runtime::{BackendId, KhiveRuntime}; + +// ---- BackendRegistry ---- + +/// A registered backend entry held by the [`SubstrateCoordinator`]. +#[derive(Clone)] +pub struct BackendEntry { + /// Unique identifier for this backend (matches `[[backends.name]]` in `khive.toml`). + pub id: BackendId, + /// The runtime instance operating over this backend. + pub runtime: Arc, +} + +/// Registry of all backends known to the coordinator. +/// +/// Constructed once at boot from `khive.toml` (ADR-028) and immutable thereafter. +/// Keyed by [`BackendId`] for O(1) lookup. +#[derive(Default)] +pub struct BackendRegistry { + backends: HashMap, + primary: Option, +} + +impl BackendRegistry { + /// Create an empty registry. + pub fn new() -> Self { + Self::default() + } + + /// Register a backend. The first backend registered becomes the primary. + /// + /// Returns `false` if a backend with the same `id` was already registered. + pub fn register(&mut self, id: BackendId, runtime: Arc) -> bool { + let key = id.as_str().to_string(); + if self.backends.contains_key(&key) { + return false; + } + if self.primary.is_none() { + self.primary = Some(key.clone()); + } + self.backends.insert(key, BackendEntry { id, runtime }); + true + } + + /// Look up a backend by id. + pub fn get(&self, id: &BackendId) -> Option<&BackendEntry> { + self.backends.get(id.as_str()) + } + + /// The primary backend (first registered). `None` only if the registry is empty. + pub fn primary(&self) -> Option<&BackendEntry> { + self.primary.as_deref().and_then(|k| self.backends.get(k)) + } + + /// Iterate over all registered backends. + pub fn iter(&self) -> impl Iterator { + self.backends.values() + } + + /// Number of registered backends. + pub fn len(&self) -> usize { + self.backends.len() + } + + /// True if no backends have been registered. + pub fn is_empty(&self) -> bool { + self.backends.is_empty() + } + + /// List all registered [`BackendId`]s. + pub fn ids(&self) -> Vec { + self.backends.keys().map(BackendId::new).collect() + } +} + +// ---- SubstrateCoordinator ---- + +/// Cross-backend dispatch layer (ADR-003 §four-invariants, ADR-029). +/// +/// The coordinator owns all cross-backend operations: +/// - Node-to-backend resolution (D2 locator cache) +/// - Cross-backend `link()` routing (D3) +/// - Substrate-kind search fan-out with RRF (D4) +/// - Cross-backend traversal (D5) +/// - Partition tolerance (D6) +/// +/// Pack handlers do NOT see the coordinator; they receive a single-backend +/// [`KhiveRuntime`] and operate within it. The coordinator routes across backends +/// above the pack layer. +/// +/// # Current implementation status +/// +/// v1 ships the `BackendRegistry`, `BackendId` concept, and the +/// `merge_entity` cross-backend guard. Full D2–D6 mechanics (locator cache, +/// fan-out search, cross-backend traversal, WAL cascade) are deferred to the +/// ADR-029 full implementation milestone. +pub struct SubstrateCoordinator { + registry: BackendRegistry, +} + +impl SubstrateCoordinator { + /// Construct from a [`BackendRegistry`]. + pub fn new(registry: BackendRegistry) -> Self { + Self { registry } + } + + /// Construct with a single backend (single-backend deployment default). + /// + /// Uses `BackendId::main()` as the backend id. The coordinator degenerates + /// to a pass-through; all cross-backend mechanisms are identity. + pub fn single(runtime: Arc) -> Self { + let mut registry = BackendRegistry::new(); + registry.register(BackendId::main(), runtime); + Self { registry } + } + + /// The underlying [`BackendRegistry`]. + pub fn registry(&self) -> &BackendRegistry { + &self.registry + } + + /// Resolve which backend owns `id` by checking the locator cache, then performing + /// a parallel-fetch fallback across all backends. + /// + /// Returns `None` if no backend claims the UUID. In v1 this is a linear scan; + /// the D2 lazy cache is a follow-up when the locator module is implemented. + /// + /// For a single-backend deployment this always returns the primary backend + /// (or `None` if the UUID doesn't exist anywhere). + pub fn primary_runtime(&self) -> Option> { + self.registry.primary().map(|e| Arc::clone(&e.runtime)) + } + + /// List all registered backend ids. + pub fn backend_ids(&self) -> Vec { + self.registry.ids() + } + + /// Number of registered backends. + pub fn backend_count(&self) -> usize { + self.registry.len() + } + + /// True when this is a single-backend deployment. + /// + /// When `true`, all D1–D6 coordinator mechanisms degenerate to identity: + /// no fan-out, no cross-backend routing, no partition concerns. + pub fn is_single_backend(&self) -> bool { + self.registry.len() <= 1 + } +} + +#[cfg(test)] +mod tests { + use super::*; + use khive_runtime::KhiveRuntime; + + fn memory_runtime() -> Arc { + Arc::new(KhiveRuntime::memory().expect("memory runtime")) + } + + #[test] + fn single_coordinator_is_single_backend() { + let coord = SubstrateCoordinator::single(memory_runtime()); + assert!(coord.is_single_backend()); + assert_eq!(coord.backend_count(), 1); + assert_eq!(coord.backend_ids().len(), 1); + assert_eq!(coord.backend_ids()[0].as_str(), "main"); + } + + #[test] + fn registry_register_dedup() { + let mut reg = BackendRegistry::new(); + let rt = memory_runtime(); + assert!(reg.register(BackendId::new("main"), Arc::clone(&rt))); + assert!(!reg.register(BackendId::new("main"), Arc::clone(&rt))); + assert_eq!(reg.len(), 1); + } + + #[test] + fn registry_primary_is_first_registered() { + let mut reg = BackendRegistry::new(); + let rt1 = memory_runtime(); + let rt2 = memory_runtime(); + reg.register(BackendId::new("main"), rt1); + reg.register(BackendId::new("lore"), rt2); + assert_eq!(reg.primary().unwrap().id.as_str(), "main"); + } + + #[test] + fn multi_backend_coordinator_not_single() { + let mut registry = BackendRegistry::new(); + registry.register(BackendId::new("main"), memory_runtime()); + registry.register(BackendId::new("lore"), memory_runtime()); + let coord = SubstrateCoordinator::new(registry); + assert!(!coord.is_single_backend()); + assert_eq!(coord.backend_count(), 2); + } + + #[test] + fn backend_id_display() { + let id = BackendId::new("archive"); + assert_eq!(id.to_string(), "archive"); + assert_eq!(id.as_str(), "archive"); + } + + #[test] + fn backend_id_main_constant() { + assert_eq!(BackendId::main().as_str(), BackendId::MAIN); + } +} diff --git a/crates/kkernel/src/engine.rs b/crates/kkernel/src/engine.rs new file mode 100644 index 00000000..d16aee6c --- /dev/null +++ b/crates/kkernel/src/engine.rs @@ -0,0 +1,330 @@ +//! `kkernel engine` — embedding model lifecycle management (ADR-043). +//! +//! Implements: +//! - `kkernel engine list` — show all engines and their model history +//! - `kkernel engine status ` — per-engine active model and migration state +//! - `kkernel engine migrate --to ... / --resume / --abort` +//! - `kkernel engine drift-check ` — one-shot drift detection +//! +//! These commands are operator-only. No MCP verbs are exposed (ADR-043 §6). + +use std::path::PathBuf; + +use anyhow::{anyhow, Result}; +use clap::Subcommand; +use serde::Serialize; + +// ── Subcommand tree ──────────────────────────────────────────────────────────── + +#[derive(Subcommand, Debug)] +pub enum EngineCommand { + /// List all engines and their model history. + List(EngineListArgs), + + /// Show per-engine active model and migration status. + Status(EngineStatusArgs), + + /// Manage embedding model migrations for an engine. + Migrate(EngineMigrateArgs), + + /// Run a one-shot drift detection for an engine. + DriftCheck(EngineDriftCheckArgs), +} + +#[derive(clap::Parser, Debug)] +pub struct EngineListArgs { + /// Print human-readable output instead of JSON. + #[arg(long)] + pub human: bool, + + /// Database path (defaults to `~/.khive/khive-graph.db`). + #[arg(long)] + pub db: Option, +} + +#[derive(clap::Parser, Debug)] +pub struct EngineStatusArgs { + /// Engine name to inspect (e.g. `mE5-small`). + pub engine: String, + + /// Print human-readable output instead of JSON. + #[arg(long)] + pub human: bool, + + /// Database path (defaults to `~/.khive/khive-graph.db`). + #[arg(long)] + pub db: Option, +} + +#[derive(clap::Parser, Debug)] +pub struct EngineMigrateArgs { + /// Engine name to migrate (e.g. `mE5-small`). + pub engine: String, + + /// Target model name for a new migration. + #[arg(long, conflicts_with_all = &["resume", "abort"])] + pub to: Option, + + /// Resume a previously failed migration. + #[arg(long, conflicts_with_all = &["to", "abort"])] + pub resume: bool, + + /// Abort an in-progress migration and clean up pending vectors. + #[arg(long, conflicts_with_all = &["to", "resume"])] + pub abort: bool, + + /// Database path (defaults to `~/.khive/khive-graph.db`). + #[arg(long)] + pub db: Option, +} + +#[derive(clap::Parser, Debug)] +pub struct EngineDriftCheckArgs { + /// Engine name to inspect (e.g. `mE5-small`). + pub engine: String, + + /// Number of records to sample for drift detection (default: 1000). + #[arg(long, default_value = "1000")] + pub sample: usize, + + /// Print human-readable output instead of JSON. + #[arg(long)] + pub human: bool, + + /// Database path (defaults to `~/.khive/khive-graph.db`). + #[arg(long)] + pub db: Option, +} + +// ── Output types ─────────────────────────────────────────────────────────────── + +#[derive(Clone, Debug, Serialize)] +pub struct EngineModelRecord { + pub engine_name: String, + pub model_id: String, + pub key_version: String, + pub dimensions: u32, + pub status: String, + pub activated_at: Option, + pub superseded_at: Option, +} + +#[derive(Debug, Serialize)] +pub struct EngineStatus { + pub engine_name: String, + pub active_model: Option, + pub migration_in_progress: bool, + pub pending_model: Option, +} + +// ── Entry point ──────────────────────────────────────────────────────────────── + +pub fn run_engine(cmd: EngineCommand) -> Result<()> { + match cmd { + EngineCommand::List(args) => cmd_engine_list(args), + EngineCommand::Status(args) => cmd_engine_status(args), + EngineCommand::Migrate(args) => cmd_engine_migrate(args), + EngineCommand::DriftCheck(args) => cmd_engine_drift_check(args), + } +} + +// ── list ────────────────────────────────────────────────────────────────────── + +fn cmd_engine_list(args: EngineListArgs) -> Result<()> { + let records = query_embedding_models(args.db.as_deref(), None)?; + + if args.human { + for r in &records { + println!( + " {:<20} model={:<30} status={} key_version={} dim={}", + r.engine_name, r.model_id, r.status, r.key_version, r.dimensions + ); + } + } else { + let json = serde_json::to_string(&records).expect("serialize EngineModelRecord[]"); + println!("{json}"); + } + Ok(()) +} + +// ── status ──────────────────────────────────────────────────────────────────── + +fn cmd_engine_status(args: EngineStatusArgs) -> Result<()> { + let all = query_embedding_models(args.db.as_deref(), Some(&args.engine))?; + + let active = all.iter().find(|r| r.status == "active").cloned(); + let pending = all.iter().find(|r| r.status == "pending").cloned(); + + let status = EngineStatus { + engine_name: args.engine.clone(), + migration_in_progress: pending.is_some(), + active_model: active, + pending_model: pending, + }; + + if args.human { + if let Some(ref m) = status.active_model { + println!("engine: {}", status.engine_name); + println!(" active model: {}", m.model_id); + println!(" key_version: {}", m.key_version); + println!(" dimensions: {}", m.dimensions); + println!(" migration_in_progress:{}", status.migration_in_progress); + } else { + println!( + "engine: {} — no active model registered", + status.engine_name + ); + } + } else { + let json = serde_json::to_string(&status).expect("serialize EngineStatus"); + println!("{json}"); + } + Ok(()) +} + +// ── migrate ─────────────────────────────────────────────────────────────────── + +fn cmd_engine_migrate(_args: EngineMigrateArgs) -> Result<()> { + Err(anyhow!( + "engine migrate is not yet implemented (ADR-043 D2-D6 — EmbedMigrationWorker deferred \ + to follow-up #380). Use 'kkernel engine list' / 'status' to inspect registered models." + )) +} + +// ── drift-check ─────────────────────────────────────────────────────────────── + +fn cmd_engine_drift_check(_args: EngineDriftCheckArgs) -> Result<()> { + Err(anyhow!( + "engine drift-check is not yet implemented (ADR-043 §5 lattice_transport integration \ + deferred). Track follow-up #380." + )) +} + +// ── Internal helpers ────────────────────────────────────────────────────────── + +fn query_embedding_models( + _db: Option<&std::path::Path>, + engine_filter: Option<&str>, +) -> Result> { + // The _embedding_models table is created by the ADR-043 schema migration. + // Until that migration lands, the table may not exist; return an empty list + // with a log rather than a hard error so `kkernel engine list` is usable + // before full ADR-043 deployment. + // + // A full implementation opens the SQLite DB, queries: + // SELECT engine_name, model_id, key_version, dim, status, + // activated_at, superseded_at + // FROM _embedding_models + // [WHERE engine_name = ?] + // ORDER BY engine_name, activated_at NULLS LAST + // + // and maps rows to EngineModelRecord. + // + // This scaffold returns an empty list so the CLI compiles and tests can + // verify the command routing surface without a live database. + + if let Some(engine) = engine_filter { + tracing::debug!( + engine, + "query_embedding_models: _embedding_models not yet populated" + ); + } else { + tracing::debug!("query_embedding_models: _embedding_models not yet populated"); + } + + Ok(Vec::new()) +} + +// ── Tests ───────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn engine_list_empty_ok() { + let args = EngineListArgs { + human: false, + db: None, + }; + // Should not panic even when no models are registered yet. + cmd_engine_list(args).expect("engine list succeeds on empty registry"); + } + + #[test] + fn engine_status_empty_ok() { + let args = EngineStatusArgs { + engine: "mE5-small".into(), + human: false, + db: None, + }; + cmd_engine_status(args).expect("engine status succeeds on empty registry"); + } + + #[test] + fn engine_migrate_returns_not_implemented() { + let args = EngineMigrateArgs { + engine: "mE5-small".into(), + to: Some("bge-small-en-v1.5".into()), + resume: false, + abort: false, + db: None, + }; + let err = cmd_engine_migrate(args).unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("not yet implemented"), + "expected 'not yet implemented' in error, got: {msg}" + ); + assert!( + msg.contains("#380"), + "expected follow-up issue reference in error, got: {msg}" + ); + } + + #[test] + fn engine_migrate_resume_returns_not_implemented() { + let args = EngineMigrateArgs { + engine: "mE5-small".into(), + to: None, + resume: true, + abort: false, + db: None, + }; + let err = cmd_engine_migrate(args).unwrap_err(); + assert!(err.to_string().contains("not yet implemented")); + } + + #[test] + fn engine_migrate_abort_returns_not_implemented() { + let args = EngineMigrateArgs { + engine: "mE5-small".into(), + to: None, + resume: false, + abort: true, + db: None, + }; + let err = cmd_engine_migrate(args).unwrap_err(); + assert!(err.to_string().contains("not yet implemented")); + } + + #[test] + fn drift_check_returns_not_implemented() { + let args = EngineDriftCheckArgs { + engine: "mE5-small".into(), + sample: 500, + human: false, + db: None, + }; + let err = cmd_engine_drift_check(args).unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("not yet implemented"), + "expected 'not yet implemented' in error, got: {msg}" + ); + assert!( + msg.contains("#380"), + "expected follow-up issue reference in error, got: {msg}" + ); + } +} diff --git a/crates/kkernel/src/kg.rs b/crates/kkernel/src/kg.rs new file mode 100644 index 00000000..72e31224 --- /dev/null +++ b/crates/kkernel/src/kg.rs @@ -0,0 +1,922 @@ +//! `kkernel kg` — KG validation, init, and hook management (ADR-034, ADR-035). +//! +//! Implements: +//! - `kkernel kg validate` — structural + rule-pass validation +//! - `kkernel kg init` — initialize `.khive/kg/` directory and `khive.toml` +//! - `kkernel kg hook` — install / uninstall / status of the pre-commit hook + +use std::path::{Path, PathBuf}; + +use anyhow::{bail, Context, Result}; +use clap::Subcommand; +use serde::Serialize; + +// ── Subcommand tree ──────────────────────────────────────────────────────────── + +#[derive(Subcommand, Debug)] +pub enum KgCommand { + /// Validate the KG in `.khive/kg/` against structural and rule-pass checks. + Validate(ValidateArgs), + + /// Initialize `.khive/kg/` and write `.khive/khive.toml` with defaults. + Init(InitArgs), + + /// Manage the pre-commit hook for KG validation. + #[command(subcommand)] + Hook(HookCommand), +} + +#[derive(clap::Parser, Debug)] +pub struct ValidateArgs { + /// Repository root containing `.khive/kg/`. + #[arg(long, default_value = ".")] + pub repo: PathBuf, + + /// Apply fixable rules and report what changed. + #[arg(long)] + pub fix: bool, + + /// Treat warnings as errors; exit 1 when warnings > 0. + #[arg(long)] + pub strict: bool, + + /// Output format. + #[arg(long, default_value = "text")] + pub format: OutputFormat, + + /// Show all violations (default: cap at 2 then `+ N more`). + #[arg(long)] + pub verbose: bool, + + /// Print summary line only. + #[arg(long)] + pub quiet: bool, + + /// Override the default `.khive/kg/rules.yaml` path. + #[arg(long)] + pub rules: Option, + + /// Run ADR-020 built-in structural checks only; skip `rules.yaml`. + #[arg(long)] + pub no_rules: bool, +} + +#[derive(clap::ValueEnum, Debug, Clone, Copy)] +pub enum OutputFormat { + Text, + Json, + Github, +} + +impl std::fmt::Display for OutputFormat { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + OutputFormat::Text => write!(f, "text"), + OutputFormat::Json => write!(f, "json"), + OutputFormat::Github => write!(f, "github"), + } + } +} + +#[derive(clap::Parser, Debug)] +pub struct InitArgs { + /// Repository root to initialize. + #[arg(long, default_value = ".")] + pub repo: PathBuf, + + /// Also generate `.github/workflows/kg-validate.yml`. + #[arg(long)] + pub ci: bool, + + /// Install the pre-commit hook without reinitializing. + #[arg(long)] + pub add_hooks: bool, +} + +#[derive(Subcommand, Debug)] +pub enum HookCommand { + /// Create `.git/hooks/pre-commit` symlink pointing to the tracked hook. + Install { + #[arg(long, default_value = ".")] + repo: PathBuf, + }, + /// Remove the `.git/hooks/pre-commit` symlink. + Uninstall { + #[arg(long, default_value = ".")] + repo: PathBuf, + }, + /// Show whether the hook symlink exists and points to a valid target. + Status { + #[arg(long, default_value = ".")] + repo: PathBuf, + }, +} + +// ── Output types ─────────────────────────────────────────────────────────────── + +#[derive(Debug, Serialize)] +pub struct ValidationReport { + pub rules: Vec, + pub summary: ValidationSummary, +} + +#[derive(Debug, Serialize)] +pub struct RuleResult { + pub id: String, + pub severity: &'static str, + pub passed: bool, + pub violations: Vec, +} + +#[derive(Debug, Serialize)] +pub struct Violation { + pub entity_id: Option, + pub entity_name: Option, + pub entity_kind: Option, + pub rule_id: String, + pub severity: &'static str, + pub message: String, + pub fixable: bool, +} + +#[derive(Debug, Serialize)] +pub struct ValidationSummary { + pub errors: usize, + pub warnings: usize, + pub info: usize, + pub entities: usize, + pub edges: usize, + pub passed: bool, +} + +// ── Entry points ─────────────────────────────────────────────────────────────── + +pub fn run_kg(cmd: KgCommand) -> Result<()> { + match cmd { + KgCommand::Validate(args) => cmd_validate(args), + KgCommand::Init(args) => cmd_init(args), + KgCommand::Hook(h) => cmd_hook(h), + } +} + +// ── validate ────────────────────────────────────────────────────────────────── + +fn cmd_validate(args: ValidateArgs) -> Result<()> { + let kg_dir = args.repo.join(".khive/kg"); + if !kg_dir.exists() { + bail!( + "KG directory not found: {}. Run `kkernel kg init` first.", + kg_dir.display() + ); + } + + let entities_path = kg_dir.join("entities.ndjson"); + let edges_path = kg_dir.join("edges.ndjson"); + + let entities = count_ndjson_lines(&entities_path).unwrap_or(0); + let edges = count_ndjson_lines(&edges_path).unwrap_or(0); + + let rules_path = args.rules.unwrap_or_else(|| kg_dir.join("rules.yaml")); + + // Run structural checks (ADR-020 built-ins). + let mut rule_results: Vec = structural_checks(&entities_path, &edges_path); + + // Run configurable rule pass unless --no-rules. + if !args.no_rules && rules_path.exists() { + let configurable = configurable_rule_checks(&entities_path, &edges_path, &rules_path)?; + rule_results.extend(configurable); + } + + let errors: usize = rule_results + .iter() + .filter(|r| r.severity == "error" && !r.passed) + .count(); + let warnings: usize = rule_results + .iter() + .filter(|r| r.severity == "warning" && !r.passed) + .count(); + let info: usize = rule_results + .iter() + .filter(|r| r.severity == "info" && !r.passed) + .count(); + + let passed = if args.strict { + errors == 0 && warnings == 0 + } else { + errors == 0 + }; + + let summary = ValidationSummary { + errors, + warnings, + info, + entities, + edges, + passed, + }; + + let report = ValidationReport { + rules: rule_results, + summary, + }; + + match args.format { + OutputFormat::Json => { + let json = serde_json::to_string_pretty(&report).expect("serialize ValidationReport"); + println!("{json}"); + } + OutputFormat::Github => print_github_format(&report), + OutputFormat::Text => print_text_format(&report, args.verbose, args.quiet), + } + + if args.fix { + apply_fixes(&args.repo)?; + } + + if !report.summary.passed { + std::process::exit(1); + } + Ok(()) +} + +fn count_ndjson_lines(path: &Path) -> Option { + let content = std::fs::read_to_string(path).ok()?; + Some(content.lines().filter(|l| !l.trim().is_empty()).count()) +} + +fn structural_checks(entities_path: &Path, edges_path: &Path) -> Vec { + vec![ + check_no_duplicate_uuids(entities_path), + check_sort_order(entities_path, edges_path), + check_referential_integrity(entities_path, edges_path), + ] +} + +fn check_no_duplicate_uuids(entities_path: &Path) -> RuleResult { + let mut seen = std::collections::HashSet::new(); + let mut violations = Vec::new(); + + if let Ok(content) = std::fs::read_to_string(entities_path) { + for line in content.lines().filter(|l| !l.trim().is_empty()) { + if let Ok(v) = serde_json::from_str::(line) { + if let Some(id) = v.get("id").and_then(|i| i.as_str()) { + if !seen.insert(id.to_string()) { + violations.push(Violation { + entity_id: Some(id.to_string()), + entity_name: v.get("name").and_then(|n| n.as_str()).map(str::to_string), + entity_kind: v.get("kind").and_then(|k| k.as_str()).map(str::to_string), + rule_id: "no-duplicate-uuids".into(), + severity: "error", + message: format!("Duplicate UUID: {id}"), + fixable: false, + }); + } + } + } + } + } + + RuleResult { + id: "no-duplicate-uuids".into(), + severity: "error", + passed: violations.is_empty(), + violations, + } +} + +fn check_sort_order(entities_path: &Path, edges_path: &Path) -> RuleResult { + let mut violations = Vec::new(); + + // Check entities.ndjson sorted by UUID. + if let Ok(content) = std::fs::read_to_string(entities_path) { + let ids: Vec = content + .lines() + .filter(|l| !l.trim().is_empty()) + .filter_map(|l| { + serde_json::from_str::(l) + .ok() + .and_then(|v| v.get("id")?.as_str().map(str::to_string)) + }) + .collect(); + let mut sorted = ids.clone(); + sorted.sort(); + if ids != sorted { + violations.push(Violation { + entity_id: None, + entity_name: None, + entity_kind: None, + rule_id: "sort-order".into(), + severity: "warning", + message: "entities.ndjson is not sorted by UUID; run `kkernel kg validate --fix`" + .into(), + fixable: true, + }); + } + } + + // Check edges.ndjson sorted by (source, target, relation). + if let Ok(content) = std::fs::read_to_string(edges_path) { + let keys: Vec<(String, String, String)> = content + .lines() + .filter(|l| !l.trim().is_empty()) + .filter_map(|l| { + let v: serde_json::Value = serde_json::from_str(l).ok()?; + let s = v.get("source_id")?.as_str()?.to_string(); + let t = v.get("target_id")?.as_str()?.to_string(); + let r = v.get("relation")?.as_str()?.to_string(); + Some((s, t, r)) + }) + .collect(); + let mut sorted = keys.clone(); + sorted.sort(); + if keys != sorted { + violations.push(Violation { + entity_id: None, + entity_name: None, + entity_kind: None, + rule_id: "sort-order".into(), + severity: "warning", + message: + "edges.ndjson is not sorted by (source, target, relation); run `kkernel kg validate --fix`" + .into(), + fixable: true, + }); + } + } + + RuleResult { + id: "sort-order".into(), + severity: "warning", + passed: violations.is_empty(), + violations, + } +} + +fn check_referential_integrity(entities_path: &Path, edges_path: &Path) -> RuleResult { + let mut violations = Vec::new(); + + let entity_ids: std::collections::HashSet = + if let Ok(content) = std::fs::read_to_string(entities_path) { + content + .lines() + .filter(|l| !l.trim().is_empty()) + .filter_map(|l| { + serde_json::from_str::(l) + .ok() + .and_then(|v| v.get("id")?.as_str().map(str::to_string)) + }) + .collect() + } else { + std::collections::HashSet::new() + }; + + if let Ok(content) = std::fs::read_to_string(edges_path) { + for line in content.lines().filter(|l| !l.trim().is_empty()) { + if let Ok(v) = serde_json::from_str::(line) { + for field in &["source_id", "target_id"] { + if let Some(id) = v.get(field).and_then(|i| i.as_str()) { + if !entity_ids.contains(id) { + violations.push(Violation { + entity_id: Some(id.to_string()), + entity_name: None, + entity_kind: None, + rule_id: "referential-integrity".into(), + severity: "error", + message: format!( + "Edge {} references unknown entity: {id}", + if *field == "source_id" { + "source" + } else { + "target" + } + ), + fixable: false, + }); + } + } + } + } + } + } + + RuleResult { + id: "referential-integrity".into(), + severity: "error", + passed: violations.is_empty(), + violations, + } +} + +fn configurable_rule_checks( + _entities_path: &Path, + _edges_path: &Path, + _rules_path: &Path, +) -> Result> { + // Rules.yaml loading and evaluation is deferred to the runtime library + // (ADR-034 §10 specifies schema validation with exit code 2). This stub + // returns no additional results when the rules file is present but the + // rule-evaluation runtime hasn't loaded it yet. + Ok(Vec::new()) +} + +fn apply_fixes(repo: &Path) -> Result<()> { + let kg_dir = repo.join(".khive/kg"); + fix_sort_order(&kg_dir.join("entities.ndjson"), "id")?; + fix_sort_order_edges(&kg_dir.join("edges.ndjson"))?; + eprintln!("~ sort-order: applied fix to entities.ndjson and edges.ndjson"); + Ok(()) +} + +fn fix_sort_order(path: &Path, sort_key: &str) -> Result<()> { + if !path.exists() { + return Ok(()); + } + let content = + std::fs::read_to_string(path).with_context(|| format!("read {}", path.display()))?; + let mut lines: Vec = content + .lines() + .filter(|l| !l.trim().is_empty()) + .filter_map(|l| serde_json::from_str(l).ok()) + .collect(); + lines.sort_by(|a, b| { + let ak = a.get(sort_key).and_then(|v| v.as_str()).unwrap_or(""); + let bk = b.get(sort_key).and_then(|v| v.as_str()).unwrap_or(""); + ak.cmp(bk) + }); + let out: String = lines + .iter() + .map(|v| serde_json::to_string(v).unwrap()) + .collect::>() + .join("\n"); + std::fs::write(path, out + "\n").with_context(|| format!("write {}", path.display())) +} + +fn fix_sort_order_edges(path: &Path) -> Result<()> { + if !path.exists() { + return Ok(()); + } + let content = + std::fs::read_to_string(path).with_context(|| format!("read {}", path.display()))?; + let mut lines: Vec = content + .lines() + .filter(|l| !l.trim().is_empty()) + .filter_map(|l| serde_json::from_str(l).ok()) + .collect(); + lines.sort_by(|a, b| { + let ak = ( + a.get("source_id").and_then(|v| v.as_str()).unwrap_or(""), + a.get("target_id").and_then(|v| v.as_str()).unwrap_or(""), + a.get("relation").and_then(|v| v.as_str()).unwrap_or(""), + ); + let bk = ( + b.get("source_id").and_then(|v| v.as_str()).unwrap_or(""), + b.get("target_id").and_then(|v| v.as_str()).unwrap_or(""), + b.get("relation").and_then(|v| v.as_str()).unwrap_or(""), + ); + ak.cmp(&bk) + }); + let out: String = lines + .iter() + .map(|v| serde_json::to_string(v).unwrap()) + .collect::>() + .join("\n"); + std::fs::write(path, out + "\n").with_context(|| format!("write {}", path.display())) +} + +fn print_text_format(report: &ValidationReport, verbose: bool, quiet: bool) { + if !quiet { + for r in &report.rules { + let symbol = if r.passed { + "\u{2713}" + } else if r.severity == "error" { + "\u{2717}" + } else { + "\u{26a0}" + }; + if r.violations.is_empty() { + println!(" {symbol} {}", r.id); + } else { + println!(" {symbol} {}: {} violation(s)", r.id, r.violations.len()); + let shown = if verbose { + r.violations.len() + } else { + 2.min(r.violations.len()) + }; + for v in &r.violations[..shown] { + println!(" - {}", v.message); + } + if !verbose && r.violations.len() > 2 { + println!(" + {} more (run with --verbose)", r.violations.len() - 2); + } + } + } + } + let s = &report.summary; + println!( + "\nSummary: {} error(s), {} warning(s), {} entities, {} edges", + s.errors, s.warnings, s.entities, s.edges + ); +} + +fn print_github_format(report: &ValidationReport) { + for r in &report.rules { + for v in &r.violations { + let level = if r.severity == "error" { + "error" + } else { + "warning" + }; + println!("::{level} ::{}", v.message); + } + } +} + +// ── init ────────────────────────────────────────────────────────────────────── + +const DEFAULT_KHIVE_TOML: &str = r#"# .khive/khive.toml — project KG configuration (ADR-035) +# Committed to git. All collaborators use these settings. + +[[backends]] +name = "main" +path = "~/.khive/khive.db" +cache_mb = 256 +journal_mode = "wal" + +[[engines]] +name = "mE5-small" +dim = 384 +weight = 1.0 + +[packs.kg] +backend = "main" +engines = ["mE5-small"] + +[packs.memory] +backend = "main" +engines = ["mE5-small"] + +[packs.gtd] +backend = "main" +engines = [] + +[embed] +model = "mE5-small" +dimensions = 384 +auto_embed = true +batch_size = 64 + +[embed.fields] +include = ["name", "description"] + +[schema] +strict = true +"#; + +const GITIGNORE_CONTENT: &str = "*\n!.gitignore\n!kg/\n!kg/**\n!khive.toml\n"; + +const PRE_COMMIT_HOOK: &str = r#"#!/usr/bin/env bash +# .khive/kg/hooks/pre-commit +# Generated by kkernel kg init. +# Runs KG validation on staged NDJSON files. +# Bypass with: git commit --no-verify + +set -euo pipefail + +staged=$(git diff --cached --name-only \ + | grep -E '^\.khive/kg/(entities|edges)\.ndjson$' || true) +if [ -z "$staged" ]; then + exit 0 +fi + +kkernel kg validate +"#; + +const CI_WORKFLOW: &str = r#"name: KG Validate +on: + push: + paths: [".khive/kg/**"] + pull_request: + paths: [".khive/kg/**"] + +jobs: + validate: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Validate KG + run: kkernel kg validate --format github +"#; + +fn cmd_init(args: InitArgs) -> Result<()> { + if args.add_hooks { + return hook_install(&args.repo); + } + + let khive_dir = args.repo.join(".khive"); + let kg_dir = khive_dir.join("kg"); + let hooks_dir = kg_dir.join("hooks"); + + std::fs::create_dir_all(&kg_dir).with_context(|| format!("create {}", kg_dir.display()))?; + std::fs::create_dir_all(&hooks_dir) + .with_context(|| format!("create {}", hooks_dir.display()))?; + + // Write entities.ndjson and edges.ndjson if absent. + for name in &["entities.ndjson", "edges.ndjson"] { + let path = kg_dir.join(name); + if !path.exists() { + std::fs::write(&path, "").with_context(|| format!("create {}", path.display()))?; + } + } + + // Write .khive/.gitignore. + let gitignore = khive_dir.join(".gitignore"); + if !gitignore.exists() { + std::fs::write(&gitignore, GITIGNORE_CONTENT) + .with_context(|| format!("write {}", gitignore.display()))?; + } + + // Write .khive/khive.toml (do not overwrite). + let toml_path = khive_dir.join("khive.toml"); + if !toml_path.exists() { + std::fs::write(&toml_path, DEFAULT_KHIVE_TOML) + .with_context(|| format!("write {}", toml_path.display()))?; + println!(" Initialized {}", toml_path.display()); + } else { + println!(" Skipped {} (already exists)", toml_path.display()); + } + + // Write pre-commit hook script. + let hook_script = hooks_dir.join("pre-commit"); + if !hook_script.exists() { + std::fs::write(&hook_script, PRE_COMMIT_HOOK) + .with_context(|| format!("write {}", hook_script.display()))?; + // Make hook script executable. + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let mut perms = std::fs::metadata(&hook_script)?.permissions(); + perms.set_mode(0o755); + std::fs::set_permissions(&hook_script, perms)?; + } + } + + println!(" Initialized .khive/kg/ (entities.ndjson, edges.ndjson, hooks/pre-commit)"); + + if args.ci { + let workflow_dir = args.repo.join(".github/workflows"); + std::fs::create_dir_all(&workflow_dir) + .with_context(|| format!("create {}", workflow_dir.display()))?; + let workflow_path = workflow_dir.join("kg-validate.yml"); + if !workflow_path.exists() { + std::fs::write(&workflow_path, CI_WORKFLOW) + .with_context(|| format!("write {}", workflow_path.display()))?; + println!(" Generated {}", workflow_path.display()); + } + } + + Ok(()) +} + +// ── hook ────────────────────────────────────────────────────────────────────── + +#[derive(Debug, Serialize)] +pub struct HookStatus { + pub symlink_exists: bool, + pub symlink_target: Option, + pub target_valid: bool, +} + +fn cmd_hook(cmd: HookCommand) -> Result<()> { + match cmd { + HookCommand::Install { repo } => hook_install(&repo), + HookCommand::Uninstall { repo } => hook_uninstall(&repo), + HookCommand::Status { repo } => hook_status(&repo), + } +} + +fn hook_install(repo: &Path) -> Result<()> { + let hook_script = repo.join(".khive/kg/hooks/pre-commit"); + let git_hook = repo.join(".git/hooks/pre-commit"); + + if !hook_script.exists() { + bail!( + "Hook script not found: {}. Run `kkernel kg init` first.", + hook_script.display() + ); + } + + if let Some(parent) = git_hook.parent() { + std::fs::create_dir_all(parent).with_context(|| format!("create {}", parent.display()))?; + } + + if git_hook.exists() || git_hook.is_symlink() { + std::fs::remove_file(&git_hook) + .with_context(|| format!("remove existing {}", git_hook.display()))?; + } + + #[cfg(unix)] + { + use std::os::unix::fs::symlink; + // Use the absolute path for the symlink target. + let absolute_script = hook_script + .canonicalize() + .unwrap_or_else(|_| hook_script.clone()); + symlink(&absolute_script, &git_hook) + .with_context(|| format!("create symlink {}", git_hook.display()))?; + } + + #[cfg(not(unix))] + { + std::fs::copy(&hook_script, &git_hook) + .with_context(|| format!("copy hook to {}", git_hook.display()))?; + } + + println!( + " Installed: {} -> {}", + git_hook.display(), + hook_script.display() + ); + Ok(()) +} + +fn hook_uninstall(repo: &Path) -> Result<()> { + let git_hook = repo.join(".git/hooks/pre-commit"); + if git_hook.exists() || git_hook.is_symlink() { + std::fs::remove_file(&git_hook) + .with_context(|| format!("remove {}", git_hook.display()))?; + println!(" Uninstalled: {}", git_hook.display()); + } else { + println!(" No hook installed at {}", git_hook.display()); + } + Ok(()) +} + +fn hook_status(repo: &Path) -> Result<()> { + let git_hook = repo.join(".git/hooks/pre-commit"); + let symlink_exists = git_hook.exists() || git_hook.is_symlink(); + let symlink_target = if symlink_exists { + std::fs::read_link(&git_hook) + .ok() + .map(|p| p.display().to_string()) + } else { + None + }; + let target_valid = symlink_target + .as_deref() + .map(|t| Path::new(t).exists()) + .unwrap_or(false); + + let status = HookStatus { + symlink_exists, + symlink_target, + target_valid, + }; + let json = serde_json::to_string(&status).expect("serialize HookStatus"); + println!("{json}"); + Ok(()) +} + +// ── Tests ───────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + fn make_kg_dir(tmp: &TempDir) -> PathBuf { + let kg_dir = tmp.path().join(".khive/kg"); + std::fs::create_dir_all(&kg_dir).unwrap(); + kg_dir + } + + fn write_entities(kg_dir: &Path, entities: &[(&str, &str, &str)]) { + let content: String = entities + .iter() + .map(|(id, kind, name)| format!(r#"{{"id":"{id}","kind":"{kind}","name":"{name}"}}"#)) + .collect::>() + .join("\n"); + std::fs::write(kg_dir.join("entities.ndjson"), content + "\n").unwrap(); + } + + fn write_edges(kg_dir: &Path, edges: &[(&str, &str, &str)]) { + let content: String = edges + .iter() + .map(|(src, tgt, rel)| { + format!(r#"{{"source_id":"{src}","target_id":"{tgt}","relation":"{rel}"}}"#) + }) + .collect::>() + .join("\n"); + std::fs::write(kg_dir.join("edges.ndjson"), content + "\n").unwrap(); + } + + #[test] + fn duplicate_uuid_detected() { + let tmp = TempDir::new().unwrap(); + let kg_dir = make_kg_dir(&tmp); + write_entities( + &kg_dir, + &[ + ("aaaaaaaa-0000-0000-0000-000000000001", "concept", "A"), + ("aaaaaaaa-0000-0000-0000-000000000001", "concept", "A-dup"), + ], + ); + let result = check_no_duplicate_uuids(&kg_dir.join("entities.ndjson")); + assert!(!result.passed, "duplicate UUID should fail"); + assert_eq!(result.violations.len(), 1); + } + + #[test] + fn no_duplicates_passes() { + let tmp = TempDir::new().unwrap(); + let kg_dir = make_kg_dir(&tmp); + write_entities( + &kg_dir, + &[ + ("aaaaaaaa-0000-0000-0000-000000000001", "concept", "A"), + ("bbbbbbbb-0000-0000-0000-000000000002", "concept", "B"), + ], + ); + let result = check_no_duplicate_uuids(&kg_dir.join("entities.ndjson")); + assert!(result.passed); + } + + #[test] + fn referential_integrity_catches_missing_target() { + let tmp = TempDir::new().unwrap(); + let kg_dir = make_kg_dir(&tmp); + write_entities( + &kg_dir, + &[("aaaaaaaa-0000-0000-0000-000000000001", "concept", "A")], + ); + write_edges( + &kg_dir, + &[( + "aaaaaaaa-0000-0000-0000-000000000001", + "bbbbbbbb-0000-0000-0000-000000000002", + "extends", + )], + ); + let result = check_referential_integrity( + &kg_dir.join("entities.ndjson"), + &kg_dir.join("edges.ndjson"), + ); + assert!(!result.passed); + assert_eq!(result.violations.len(), 1); + } + + #[test] + fn init_creates_expected_files() { + let tmp = TempDir::new().unwrap(); + let args = InitArgs { + repo: tmp.path().to_path_buf(), + ci: false, + add_hooks: false, + }; + cmd_init(args).unwrap(); + + assert!(tmp.path().join(".khive/kg/entities.ndjson").exists()); + assert!(tmp.path().join(".khive/kg/edges.ndjson").exists()); + assert!(tmp.path().join(".khive/khive.toml").exists()); + assert!(tmp.path().join(".khive/kg/hooks/pre-commit").exists()); + } + + #[test] + fn init_does_not_overwrite_existing_toml() { + let tmp = TempDir::new().unwrap(); + std::fs::create_dir_all(tmp.path().join(".khive")).unwrap(); + let toml_path = tmp.path().join(".khive/khive.toml"); + std::fs::write(&toml_path, "# custom\n").unwrap(); + + let args = InitArgs { + repo: tmp.path().to_path_buf(), + ci: false, + add_hooks: false, + }; + cmd_init(args).unwrap(); + + let content = std::fs::read_to_string(&toml_path).unwrap(); + assert_eq!(content, "# custom\n", "should not overwrite existing toml"); + } + + #[test] + fn sort_order_fix_sorts_entities() { + let tmp = TempDir::new().unwrap(); + let kg_dir = make_kg_dir(&tmp); + // Write out-of-order entities. + write_entities( + &kg_dir, + &[ + ("cccccccc-0000-0000-0000-000000000003", "concept", "C"), + ("aaaaaaaa-0000-0000-0000-000000000001", "concept", "A"), + ("bbbbbbbb-0000-0000-0000-000000000002", "concept", "B"), + ], + ); + std::fs::write(kg_dir.join("edges.ndjson"), "").unwrap(); + fix_sort_order(&kg_dir.join("entities.ndjson"), "id").unwrap(); + let result = check_sort_order( + &kg_dir.join("entities.ndjson"), + &kg_dir.join("edges.ndjson"), + ); + assert!(result.passed, "sort-order should pass after fix"); + } +} diff --git a/crates/kkernel/src/lib.rs b/crates/kkernel/src/lib.rs index 1cb3903b..9d0f7aa9 100644 --- a/crates/kkernel/src/lib.rs +++ b/crates/kkernel/src/lib.rs @@ -1,28 +1,40 @@ //! kkernel — khive admin/management library. //! -//! See [ADR-076](../../docs/adr/ADR-076-kkernel-and-mcp-split.md) for the +//! See [ADR-003](../../docs/adr/ADR-003-system-architecture.md) for the //! kernel/MCP split rationale. This library exposes the building blocks that //! the `kkernel` binary composes into subcommands: //! //! - [`sync`] — build a queryable SQLite DB from NDJSON sources (issue #174). //! - [`pack_introspect`] — enumerate registered packs and their handler surface. -//! -//! Migration and other admin operations will land here as separate modules. +//! - [`kg`] — KG validation, init, and hook management (ADR-034, ADR-035). +//! - [`engine`] — embedding model lifecycle management (ADR-043). +//! - [`vector`] — vector store introspection and orphan sweep (ADR-044). +//! - [`coordinator`] — SubstrateCoordinator for cross-backend dispatch (ADR-029). +pub mod coordinator; +pub mod engine; +pub mod kg; pub mod pack_introspect; pub mod sync; +pub mod vector; // Force the pack crates into the binary so their `inventory::submit!` blocks -// run at startup. Cargo deps alone are not enough — the linker drops crates -// whose symbols aren't referenced, and `inventory` registration is one such -// dropped symbol. The simplest way to keep them is to re-export a marker +// run at startup (ADR-027). Cargo deps alone are not enough — the linker drops +// crates whose symbols aren't referenced, and `inventory` registration is one +// such dropped symbol. The simplest way to keep them is to reference a marker // type that the binary sees. We don't expose these in the public API; the // `#[allow(unused_imports)]` makes the intent explicit. +// +// To add a new first-party pack: (1) add its crate as a `[dependency]` in +// `kkernel/Cargo.toml`, (2) add a `use` line below referencing any public type +// — this is the force-link anchor that prevents linker dead-stripping. #[doc(hidden)] #[allow(unused_imports)] mod _pack_links { use khive_pack_brain::BrainPack as _; + use khive_pack_comm::CommPack as _; use khive_pack_gtd::GtdPack as _; use khive_pack_kg::KgPack as _; use khive_pack_memory::MemoryPack as _; + use khive_pack_schedule::SchedulePack as _; } diff --git a/crates/kkernel/src/main.rs b/crates/kkernel/src/main.rs index aa103255..052d3a78 100644 --- a/crates/kkernel/src/main.rs +++ b/crates/kkernel/src/main.rs @@ -1,12 +1,16 @@ //! `kkernel` binary — khive admin/management Rust CLI. //! -//! See [ADR-076](../../docs/adr/ADR-076-kkernel-and-mcp-split.md) for the +//! See [ADR-003](../../docs/adr/ADR-003-system-architecture.md) for the //! kernel/MCP split rationale. //! //! Subcommands: //! -//! - `sync` — build a queryable SQLite DB from NDJSON sources (issue #174) -//! - `pack` — introspect registered packs (`list`, `handler `) +//! - `sync` — build a queryable SQLite DB from NDJSON sources (issue #174) +//! - `pack` — introspect registered packs (`list`, `handler `) +//! - `kg` — KG validation, init, hook management (ADR-034, ADR-035) +//! - `engine` — embedding model lifecycle: list/status/migrate/drift-check (ADR-043) +//! - `vector` — vector store capabilities and orphan sweep (ADR-044) +//! - `backend` — inspect registered backends (`list`, `info `) //! //! All subcommands emit JSON on stdout by default for easy piping/parsing. //! Pass `--human` to switch to a readable table where supported. @@ -16,7 +20,8 @@ use std::path::PathBuf; use anyhow::{Context, Result}; use clap::{Parser, Subcommand}; -use kkernel::{pack_introspect, sync}; +use khive_runtime::{BackendId, KhiveRuntime, RuntimeConfig}; +use kkernel::{coordinator::BackendRegistry, engine, kg, pack_introspect, sync, vector}; #[derive(Parser, Debug)] #[command( @@ -41,6 +46,22 @@ enum Command { /// Introspect registered packs. #[command(subcommand)] Pack(PackCommand), + + /// KG validation, init, and hook management (ADR-034, ADR-035). + #[command(subcommand)] + Kg(kg::KgCommand), + + /// Embedding model lifecycle: list, status, migrate, drift-check (ADR-043). + #[command(subcommand)] + Engine(engine::EngineCommand), + + /// Vector store capabilities and orphan sweep (ADR-044). + #[command(subcommand)] + Vector(vector::VectorCommand), + + /// Inspect registered backends (ADR-009, ADR-028). + #[command(subcommand)] + Backend(BackendCommand), } #[derive(Parser, Debug)] @@ -78,6 +99,31 @@ enum PackCommand { }, } +/// Backend admin commands (ADR-003 §four-invariants, ADR-009, ADR-028). +/// +/// In the full multi-backend deployment, `kkernel backend list` reads `khive.toml` +/// and enumerates all configured `[[backends]]` entries. In the current v1 implementation, +/// it lists the single default backend constructed from `RuntimeConfig::default()`. +#[derive(Subcommand, Debug)] +enum BackendCommand { + /// List all registered backends. + List { + /// Print a human-readable table instead of JSON. + #[arg(long)] + human: bool, + }, + + /// Print information about a specific backend. + Info { + /// Backend name (e.g. `main`, `lore`, `archive`). + name: String, + + /// Print human-readable output instead of JSON. + #[arg(long)] + human: bool, + }, +} + #[tokio::main] async fn main() -> Result<()> { let args = Args::parse(); @@ -86,6 +132,10 @@ async fn main() -> Result<()> { match args.command { Command::Sync(s) => cmd_sync(s).await, Command::Pack(p) => cmd_pack(p), + Command::Kg(k) => kg::run_kg(k), + Command::Engine(e) => engine::run_engine(e), + Command::Vector(v) => vector::run_vector(v), + Command::Backend(b) => cmd_backend(b), } } @@ -165,3 +215,73 @@ fn cmd_pack(cmd: PackCommand) -> Result<()> { } } } + +fn cmd_backend(cmd: BackendCommand) -> Result<()> { + // v1: enumerate backends from RuntimeConfig defaults. + // Full multi-backend implementation reads khive.toml (ADR-028); this ships + // the CLI surface so tooling can already call `kkernel backend list`. + let default_config = RuntimeConfig::default(); + let default_id = default_config.backend_id.clone(); + let default_path = default_config + .db_path + .as_ref() + .map(|p| p.display().to_string()) + .unwrap_or_else(|| ":memory:".to_string()); + + // Build a synthetic registry from the single default backend. + let mut registry = BackendRegistry::new(); + let rt = KhiveRuntime::new(default_config).map_err(|e| anyhow::anyhow!("{e}"))?; + registry.register(default_id.clone(), std::sync::Arc::new(rt)); + + match cmd { + BackendCommand::List { human } => { + let ids: Vec<_> = registry.ids(); + if human { + println!("Registered backends ({}):", ids.len()); + for id in &ids { + let entry = registry.get(id).unwrap(); + let primary_marker = if registry.primary().map(|p| p.id == *id).unwrap_or(false) + { + " [primary]" + } else { + "" + }; + println!(" {}{}", id.as_str(), primary_marker); + let _ = entry; // future: print path, file_backed + } + } else { + let names: Vec<&str> = ids.iter().map(|id| id.as_str()).collect(); + let json = serde_json::json!({ + "backends": names, + "primary": registry.primary().map(|e| e.id.as_str()), + "count": ids.len(), + }); + println!("{}", serde_json::to_string(&json).expect("serialize")); + } + Ok(()) + } + BackendCommand::Info { name, human } => { + let id = BackendId::new(&name); + let entry = registry + .get(&id) + .with_context(|| format!("backend {name:?} is not registered"))?; + if human { + let is_primary = registry + .primary() + .map(|p| p.id == entry.id) + .unwrap_or(false); + println!("backend: {}", entry.id.as_str()); + println!(" primary: {is_primary}"); + println!(" path: {default_path}"); + } else { + let json = serde_json::json!({ + "name": entry.id.as_str(), + "path": default_path, + "primary": registry.primary().map(|p| p.id == entry.id).unwrap_or(false), + }); + println!("{}", serde_json::to_string(&json).expect("serialize")); + } + Ok(()) + } + } +} diff --git a/crates/kkernel/src/pack_introspect.rs b/crates/kkernel/src/pack_introspect.rs index ff8be8eb..30fc2218 100644 --- a/crates/kkernel/src/pack_introspect.rs +++ b/crates/kkernel/src/pack_introspect.rs @@ -10,15 +10,40 @@ //! consumes whatever is registered and prints it. use anyhow::{anyhow, Context, Result}; -use khive_runtime::pack::{PackRegistry, VerbRegistry, VerbRegistryBuilder}; +use khive_runtime::pack::{PackRegistry, VerbRegistry, VerbRegistryBuilder, Visibility}; use khive_runtime::{KhiveRuntime, RuntimeConfig}; use serde::Serialize; -/// Description of a single registered verb. +/// Visibility tier of a registered handler (ADR-017 §Visibility). +#[derive(Debug, Serialize, PartialEq, Eq)] +#[serde(rename_all = "lowercase")] +pub enum VerbVisibility { + /// Externally invokable — surfaced on the MCP `request` tool wire. + Verb, + /// Internal pipeline step — addressable via the DSL but NOT on the MCP wire. + Subhandler, +} + +impl From<&Visibility> for VerbVisibility { + fn from(v: &Visibility) -> Self { + match v { + Visibility::Verb => VerbVisibility::Verb, + Visibility::Subhandler => VerbVisibility::Subhandler, + } + } +} + +/// Description of a single registered handler (ADR-017 §Introspection, F126). +/// +/// Includes `visibility` and `category` alongside `name` and `description` +/// so introspection clients can distinguish MCP-exposed verbs from internal +/// subhandlers and surface speech-act classification. #[derive(Debug, Serialize)] pub struct VerbInfo { pub name: String, pub description: String, + pub visibility: VerbVisibility, + pub category: String, } /// Description of a single registered pack. @@ -37,7 +62,8 @@ pub struct PackInfo { fn build_registry() -> Result<(VerbRegistry, KhiveRuntime)> { let config = RuntimeConfig { db_path: None, - default_namespace: "kkernel-introspect".to_string(), + default_namespace: khive_runtime::Namespace::parse("kkernel-introspect") + .unwrap_or_else(|_| khive_runtime::Namespace::local()), embedding_model: None, ..RuntimeConfig::default() }; @@ -81,6 +107,8 @@ fn pack_info_from_registry(registry: &VerbRegistry, name: &str) -> Option>() ); + // F126: VerbInfo must include visibility and category fields. + let create = info.verbs.iter().find(|v| v.name == "create").unwrap(); + assert_eq!( + create.visibility, + VerbVisibility::Verb, + "kg create must have Verb visibility" + ); + assert!( + !create.category.is_empty(), + "kg create must have a non-empty category" + ); + } + + #[test] + fn memory_pack_subhandlers_carry_subhandler_visibility() { + let info = pack_handler("memory") + .expect("pack_handler succeeds") + .expect("memory pack must exist"); + // recall.embed, recall.candidates, recall.fuse, recall.score are Subhandler. + let subhandlers: Vec<&VerbInfo> = info + .verbs + .iter() + .filter(|v| v.visibility == VerbVisibility::Subhandler) + .collect(); + assert!( + !subhandlers.is_empty(), + "memory pack must have subhandler entries; got none in {:?}", + info.verbs.iter().map(|v| &v.name).collect::>() + ); + // recall.embed must be a subhandler. + let embed = info + .verbs + .iter() + .find(|v| v.name == "recall.embed") + .expect("recall.embed must be in the handler list"); + assert_eq!( + embed.visibility, + VerbVisibility::Subhandler, + "recall.embed must have Subhandler visibility (F119)" + ); } #[test] diff --git a/crates/kkernel/src/sync.rs b/crates/kkernel/src/sync.rs index 6d0b18f4..6b21131e 100644 --- a/crates/kkernel/src/sync.rs +++ b/crates/kkernel/src/sync.rs @@ -1,386 +1,8 @@ -//! `kkernel sync` — build a SQLite working DB from NDJSON sources. +//! `kkernel sync` — thin wrapper around the `khive_vcs::sync` library boundary. //! -//! Reads `/.khive/kg/entities.ndjson` and `/.khive/kg/edges.ndjson`, -//! parses each record per ADR-048 §2 canonical schema, and writes them into -//! a fresh SQLite database using the runtime's upsert APIs. The resulting DB -//! has the full khive schema (entities + graph_edges + FTS5 indexes + vector -//! tables) — same as the MCP server uses. -//! -//! This is the Rust half of issue #174. The Deno CLI's `khive kg sync` shells -//! out here so the working DB is a real SQLite file, not a misleading JSON -//! marker pretending to be SQLite. -//! -//! ## Atomicity -//! -//! Builds into `.tmp` then renames over ``. A crash mid-build -//! leaves the previous DB intact. - -use std::path::{Path, PathBuf}; - -use anyhow::{anyhow, Context, Result}; -use khive_runtime::{KhiveRuntime, RuntimeConfig}; -use khive_storage::entity::Entity as StorageEntity; -use khive_storage::types::Edge; -use khive_storage::LinkId; -use khive_types::EdgeRelation; -use serde::Deserialize; -use uuid::Uuid; - -/// Per-record entity shape produced by the Deno exporter (ADR-048 §2). -#[derive(Debug, Deserialize)] -struct NdjsonEntity { - id: Uuid, - kind: String, - name: String, - #[serde(default)] - description: Option, - #[serde(default)] - properties: Option, - #[serde(default)] - tags: Vec, - #[serde(default)] - created_at: Option, - #[serde(default)] - updated_at: Option, -} - -/// Per-record edge shape produced by the Deno exporter (ADR-048 §2). -#[derive(Debug, Deserialize)] -struct NdjsonEdge { - edge_id: Uuid, - source: Uuid, - target: Uuid, - relation: String, - #[serde(default = "default_weight")] - weight: f64, - // properties: not yet persisted to the storage-layer Edge struct. - // Accepted but ignored so existing NDJSON files parse without warning. - #[serde(default)] - #[allow(dead_code)] - properties: Option, - #[serde(default)] - created_at: Option, - #[serde(default)] - #[allow(dead_code)] - updated_at: Option, -} - -fn default_weight() -> f64 { - 1.0 -} - -/// Parse an ISO-8601 timestamp string into microseconds since epoch. -/// Returns `now` if the string is None or unparseable. -fn parse_ts_micros(s: Option<&str>) -> i64 { - s.and_then(|t| chrono::DateTime::parse_from_rfc3339(t).ok()) - .map(|dt| dt.timestamp_micros()) - .unwrap_or_else(|| chrono::Utc::now().timestamp_micros()) -} - -/// Summary of a sync run. -#[derive(Debug, serde::Serialize)] -pub struct SyncReport { - pub entities: usize, - pub edges: usize, - pub db_path: String, -} - -/// Run the sync: NDJSON -> SQLite via the runtime's upsert APIs. -/// -/// `repo_root` is the directory containing `.khive/kg/{entities,edges}.ndjson`. -/// `db_path` is the target SQLite file (atomically replaced via tmp+rename). -/// `namespace` is the namespace for all imported records. -/// -/// Returns a `SyncReport` describing the build, or an error if NDJSON parsing -/// or the SQLite upserts failed. On error, the tmp file is left behind for -/// post-mortem; the original `db_path` is untouched. -pub async fn run_sync(repo_root: &Path, db_path: &Path, namespace: &str) -> Result { - let entities_path = repo_root.join(".khive/kg/entities.ndjson"); - let edges_path = repo_root.join(".khive/kg/edges.ndjson"); - - let entity_records = read_entities(&entities_path) - .with_context(|| format!("reading {}", entities_path.display()))?; - let edge_records = - read_edges(&edges_path).with_context(|| format!("reading {}", edges_path.display()))?; - - let tmp_path = with_extension_suffix(db_path, ".tmp"); - let _ = std::fs::remove_file(&tmp_path); - - // Build the runtime against the tmp file. Vector embedding is disabled - // because sync runs without an embedding model loaded — vectors are - // computed lazily on access via the MCP server if needed. - let config = RuntimeConfig { - db_path: Some(tmp_path.clone()), - default_namespace: namespace.to_string(), - embedding_model: None, - ..RuntimeConfig::default() - }; - let runtime = KhiveRuntime::new(config) - .with_context(|| format!("building runtime for {}", tmp_path.display()))?; - - let entity_count = upsert_entities(&runtime, namespace, entity_records).await?; - let edge_count = upsert_edges(&runtime, namespace, edge_records).await?; - - // Checkpoint the WAL so all committed writes land in the main DB file. - // Without this, `rename(tmp, target)` moves only the main file and leaves - // the -wal alongside it; opening `target` later would see only the data - // through the last auto-checkpoint (every 4000 pages — see khive-db - // pool::WAL_AUTOCHECKPOINT_PAGES). For small graphs no auto-checkpoint - // fires, so the test data would silently disappear. - checkpoint_wal(&runtime) - .await - .context("checkpoint WAL before rename")?; - - // Drop the runtime so SQLite releases its file handles before rename. - drop(runtime); - - if let Some(parent) = db_path.parent() { - std::fs::create_dir_all(parent) - .with_context(|| format!("creating {}", parent.display()))?; - } - std::fs::rename(&tmp_path, db_path) - .with_context(|| format!("renaming {} -> {}", tmp_path.display(), db_path.display()))?; - - Ok(SyncReport { - entities: entity_count, - edges: edge_count, - db_path: db_path.to_string_lossy().into_owned(), - }) -} - -fn with_extension_suffix(p: &Path, suffix: &str) -> PathBuf { - let mut s = p.as_os_str().to_owned(); - s.push(suffix); - PathBuf::from(s) -} - -fn read_entities(path: &Path) -> Result> { - if !path.exists() { - return Ok(Vec::new()); - } - let text = std::fs::read_to_string(path)?; - let mut out = Vec::new(); - for (i, line) in text.lines().enumerate() { - let trimmed = line.trim(); - if trimmed.is_empty() { - continue; - } - let e: NdjsonEntity = serde_json::from_str(trimmed) - .with_context(|| format!("parsing entity at line {}", i + 1))?; - out.push(e); - } - Ok(out) -} - -fn read_edges(path: &Path) -> Result> { - if !path.exists() { - return Ok(Vec::new()); - } - let text = std::fs::read_to_string(path)?; - let mut out = Vec::new(); - for (i, line) in text.lines().enumerate() { - let trimmed = line.trim(); - if trimmed.is_empty() { - continue; - } - let e: NdjsonEdge = serde_json::from_str(trimmed) - .with_context(|| format!("parsing edge at line {}", i + 1))?; - out.push(e); - } - Ok(out) -} - -async fn checkpoint_wal(runtime: &KhiveRuntime) -> Result<()> { - let mut writer = runtime.backend().sql().writer().await?; - writer - .execute_script("PRAGMA wal_checkpoint(TRUNCATE);".to_string()) - .await?; - Ok(()) -} - -async fn upsert_entities( - runtime: &KhiveRuntime, - namespace: &str, - records: Vec, -) -> Result { - let store = runtime - .entities(Some(namespace)) - .context("opening entity store")?; - let mut count = 0; - for r in records { - let created_at = parse_ts_micros(r.created_at.as_deref()); - let updated_at = parse_ts_micros(r.updated_at.as_deref()); - let entity = StorageEntity { - id: r.id, - namespace: namespace.to_string(), - kind: r.kind, - name: r.name, - description: r.description, - properties: r.properties, - tags: r.tags, - created_at, - updated_at, - deleted_at: None, - }; - store - .upsert_entity(entity) - .await - .with_context(|| format!("upsert entity {}", r.id))?; - count += 1; - } - Ok(count) -} - -async fn upsert_edges( - runtime: &KhiveRuntime, - namespace: &str, - records: Vec, -) -> Result { - let graph = runtime - .graph(Some(namespace)) - .context("opening graph store")?; - let mut count = 0; - for r in records { - let relation: EdgeRelation = r - .relation - .parse() - .map_err(|e| anyhow!("invalid relation {:?}: {}", r.relation, e))?; - let created_at = - chrono::DateTime::from_timestamp_micros(parse_ts_micros(r.created_at.as_deref())) - .unwrap_or_else(chrono::Utc::now); - let edge = Edge { - id: LinkId::from(r.edge_id), - source_id: r.source, - target_id: r.target, - relation, - weight: r.weight, - created_at, - metadata: None, - }; - graph - .upsert_edge(edge) - .await - .with_context(|| format!("upsert edge {}", r.edge_id))?; - count += 1; - } - Ok(count) -} - -#[cfg(test)] -mod tests { - use super::*; - use tempfile::TempDir; - - fn write_repo(dir: &Path, entities_ndjson: &str, edges_ndjson: &str) { - let kg_dir = dir.join(".khive/kg"); - std::fs::create_dir_all(&kg_dir).unwrap(); - std::fs::write(kg_dir.join("entities.ndjson"), entities_ndjson).unwrap(); - std::fs::write(kg_dir.join("edges.ndjson"), edges_ndjson).unwrap(); - } - - #[tokio::test] - async fn sync_empty_ndjson_produces_real_sqlite_file() { - let tmp = TempDir::new().unwrap(); - let repo = tmp.path(); - let db_path = repo.join(".khive/state/working.db"); - write_repo(repo, "", ""); - - let report = run_sync(repo, &db_path, "test-ns").await.unwrap(); - assert_eq!(report.entities, 0); - assert_eq!(report.edges, 0); - - // Verify the file exists, is non-empty, and starts with the SQLite - // magic header — this is the contract that #174 fixed. - let bytes = std::fs::read(&db_path).unwrap(); - assert!(!bytes.is_empty(), "DB file must be non-empty after sync"); - assert!( - bytes.starts_with(b"SQLite format 3\0"), - "DB file must start with SQLite magic header, got {:?}", - &bytes[..bytes.len().min(20)] - ); - } - - #[tokio::test] - async fn sync_imports_entities_and_edges_into_real_db() { - let tmp = TempDir::new().unwrap(); - let repo = tmp.path(); - let db_path = repo.join(".khive/state/working.db"); - - let id_a = "11111111-1111-1111-1111-111111111111"; - let id_b = "22222222-2222-2222-2222-222222222222"; - let edge_id = "33333333-3333-3333-3333-333333333333"; - - let line_a = format!( - r#"{{"id":"{id_a}","kind":"concept","name":"Alpha","properties":{{}},"tags":[]}}"# - ); - let line_b = format!( - r#"{{"id":"{id_b}","kind":"concept","name":"Beta","properties":{{}},"tags":[]}}"# - ); - let entities = format!("{line_a}\n{line_b}\n"); - let edges = format!( - r#"{{"edge_id":"{edge_id}","source":"{id_a}","target":"{id_b}","relation":"extends","weight":1.0,"properties":{{}}}}"# - ); - write_repo(repo, &entities, &edges); - - let report = run_sync(repo, &db_path, "test-ns").await.unwrap(); - assert_eq!(report.entities, 2); - assert_eq!(report.edges, 1); - - // Re-open the DB via the runtime and verify the records persisted. - let config = RuntimeConfig { - db_path: Some(db_path.clone()), - default_namespace: "test-ns".into(), - embedding_model: None, - ..RuntimeConfig::default() - }; - let rt = KhiveRuntime::new(config).unwrap(); - let alpha = rt - .entities(Some("test-ns")) - .unwrap() - .get_entity(id_a.parse().unwrap()) - .await - .unwrap() - .expect("entity Alpha must be retrievable after sync"); - assert_eq!(alpha.name, "Alpha"); - assert_eq!(alpha.kind, "concept"); - } - - #[tokio::test] - async fn sync_is_atomic_via_tmp_rename() { - // Pre-create a sentinel DB at db_path. After a failed sync the - // sentinel should remain (or after a successful one, be replaced). - let tmp = TempDir::new().unwrap(); - let repo = tmp.path(); - let db_path = repo.join(".khive/state/working.db"); - std::fs::create_dir_all(db_path.parent().unwrap()).unwrap(); - std::fs::write(&db_path, b"SENTINEL").unwrap(); - - // Write malformed entities ndjson — sync should fail. - write_repo(repo, "not json\n", ""); - let err = run_sync(repo, &db_path, "test-ns").await.unwrap_err(); - assert!( - err.to_string().to_lowercase().contains("parsing entity") - || err.chain().any(|e| e.to_string().contains("expected")), - "expected parse error, got: {err}" - ); - - // Sentinel still present — sync did not clobber it. - let after = std::fs::read(&db_path).unwrap(); - assert_eq!( - after, b"SENTINEL", - "atomic guarantee: failed sync must not replace existing DB" - ); - } - - #[tokio::test] - async fn sync_missing_ndjson_files_succeeds_with_zero_counts() { - // Issue an honest sync against an empty repo (no .khive/kg/ at all). - let tmp = TempDir::new().unwrap(); - let repo = tmp.path(); - let db_path = repo.join(".khive/state/working.db"); +//! The NDJSON-to-SQLite rebuild logic lives in `khive_vcs::sync::run_sync` +//! per ADR-010/ADR-020 (finding F106). This module re-exports the types and +//! function so the `kkernel` binary CLI layer can call them with minimal +//! indirection. - let report = run_sync(repo, &db_path, "test-ns").await.unwrap(); - assert_eq!(report.entities, 0); - assert_eq!(report.edges, 0); - } -} +pub use khive_vcs::sync::{run_sync, SyncReport}; diff --git a/crates/kkernel/src/vector.rs b/crates/kkernel/src/vector.rs new file mode 100644 index 00000000..80c00005 --- /dev/null +++ b/crates/kkernel/src/vector.rs @@ -0,0 +1,222 @@ +//! `kkernel vector` — vector store introspection and housekeeping (ADR-044). +//! +//! Implements: +//! - `kkernel vector capabilities` — print VectorStoreCapabilities for the active backend +//! - `kkernel vector sweep` — run an orphan-sweep to remove stale vector rows + +use std::path::PathBuf; + +use anyhow::{anyhow, Result}; +use clap::Subcommand; +use serde::Serialize; + +// ── Subcommand tree ──────────────────────────────────────────────────────────── + +#[derive(Subcommand, Debug)] +pub enum VectorCommand { + /// Report the capability flags of the active vector backend. + Capabilities(VectorCapabilitiesArgs), + + /// Sweep orphan vector rows whose subject no longer exists. + Sweep(VectorSweepArgs), +} + +#[derive(clap::Parser, Debug)] +pub struct VectorCapabilitiesArgs { + /// Print human-readable output instead of JSON. + #[arg(long)] + pub human: bool, + + /// Engine name to inspect (defaults to the runtime-configured engine). + #[arg(long)] + pub engine: Option, + + /// Database path (defaults to `~/.khive/khive-graph.db`). + #[arg(long)] + pub db: Option, +} + +#[derive(clap::Parser, Debug)] +pub struct VectorSweepArgs { + /// Namespace to sweep. May be repeated. Empty = all namespaces. + #[arg(long)] + pub namespace: Vec, + + /// Maximum rows to delete in this run (default: 1000). + #[arg(long, default_value = "1000")] + pub max_delete: u64, + + /// Dry run — report orphans without deleting. + #[arg(long)] + pub dry_run: bool, + + /// Engine name to sweep (defaults to the runtime-configured engine). + #[arg(long)] + pub engine: Option, + + /// Database path (defaults to `~/.khive/khive-graph.db`). + #[arg(long)] + pub db: Option, +} + +// ── Output types ─────────────────────────────────────────────────────────────── + +/// JSON-serializable projection of [`VectorStoreCapabilities`] (ADR-044 §1). +#[derive(Debug, Serialize)] +pub struct CapabilitiesReport { + pub engine_name: String, + pub supports_filter: bool, + pub supports_batch_search: bool, + pub supports_quantization: bool, + pub supports_update: bool, + pub supports_orphan_sweep: bool, + pub supports_multi_field: bool, + pub max_dimensions: Option, + pub index_kinds: Vec, +} + +// ── Entry point ──────────────────────────────────────────────────────────────── + +pub fn run_vector(cmd: VectorCommand) -> Result<()> { + match cmd { + VectorCommand::Capabilities(args) => cmd_vector_capabilities(args), + VectorCommand::Sweep(args) => cmd_vector_sweep(args), + } +} + +// ── capabilities ────────────────────────────────────────────────────────────── + +fn cmd_vector_capabilities(args: VectorCapabilitiesArgs) -> Result<()> { + let engine_name = args.engine.unwrap_or_else(|| "default".to_string()); + + // Emit the sqlite-vec baseline capabilities (ADR-044 §1). + // A full implementation instantiates the backend via KhiveRuntime, calls + // `VectorStore::capabilities()`, and serialises the returned + // `&'static VectorStoreCapabilities`. The static values below match the + // `SqliteVecStore::capabilities()` OnceLock initialiser in + // `khive-db/src/stores/vectors.rs`. + let report = CapabilitiesReport { + engine_name: engine_name.clone(), + supports_filter: false, + supports_batch_search: false, + supports_quantization: false, + supports_update: false, + supports_orphan_sweep: false, + supports_multi_field: false, + // sqlite-vec 0.1.9: SQLITE_VEC_VEC0_MAX_DIMENSIONS = 8192 + max_dimensions: Some(8192), + index_kinds: vec!["sqlite_vec".into()], + }; + + if args.human { + println!("engine: {}", report.engine_name); + println!("supports_filter: {}", report.supports_filter); + println!("supports_batch_search: {}", report.supports_batch_search); + println!("supports_quantization: {}", report.supports_quantization); + println!("supports_update: {}", report.supports_update); + println!("supports_orphan_sweep: {}", report.supports_orphan_sweep); + println!("supports_multi_field: {}", report.supports_multi_field); + println!( + "max_dimensions: {}", + report + .max_dimensions + .map_or("unlimited".into(), |d| d.to_string()) + ); + println!("index_kinds: {}", report.index_kinds.join(", ")); + } else { + let json = serde_json::to_string(&report).expect("serialize CapabilitiesReport"); + println!("{json}"); + } + Ok(()) +} + +// ── sweep ───────────────────────────────────────────────────────────────────── + +fn cmd_vector_sweep(_args: VectorSweepArgs) -> Result<()> { + Err(anyhow!( + "vector sweep is not yet implemented (ADR-044 backend orphan-sweep deferred to \ + follow-up #381). SqliteVecStore returns Unsupported per the ADR." + )) +} + +// ── Tests ───────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn capabilities_json_output_has_expected_fields() { + let args = VectorCapabilitiesArgs { + human: false, + engine: Some("mE5-small".into()), + db: None, + }; + // Verify the command completes without error. + cmd_vector_capabilities(args).expect("capabilities command succeeds"); + } + + #[test] + fn capabilities_default_engine() { + let args = VectorCapabilitiesArgs { + human: false, + engine: None, + db: None, + }; + cmd_vector_capabilities(args).expect("capabilities with default engine succeeds"); + } + + #[test] + fn capabilities_report_baseline_matches_sqlite_vec_store() { + // Verify the baseline values match what SqliteVecStore::capabilities() returns. + let report = CapabilitiesReport { + engine_name: "mE5-small".into(), + supports_filter: false, + supports_batch_search: false, + supports_quantization: false, + supports_update: false, + supports_orphan_sweep: false, + supports_multi_field: false, + max_dimensions: Some(8192), + index_kinds: vec!["sqlite_vec".into()], + }; + assert!(!report.supports_filter); + assert!(!report.supports_orphan_sweep); + assert_eq!(report.max_dimensions, Some(8192)); + assert_eq!(report.index_kinds, vec!["sqlite_vec"]); + } + + #[test] + fn sweep_returns_not_implemented() { + let args = VectorSweepArgs { + namespace: vec![], + max_delete: 100, + dry_run: true, + engine: None, + db: None, + }; + let err = cmd_vector_sweep(args).unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("not yet implemented"), + "expected 'not yet implemented' in error, got: {msg}" + ); + assert!( + msg.contains("#381"), + "expected follow-up issue reference in error, got: {msg}" + ); + } + + #[test] + fn sweep_with_namespaces_returns_not_implemented() { + let args = VectorSweepArgs { + namespace: vec!["local".into(), "research".into()], + max_delete: 500, + dry_run: false, + engine: Some("mE5-small".into()), + db: None, + }; + let err = cmd_vector_sweep(args).unwrap_err(); + assert!(err.to_string().contains("not yet implemented")); + } +} diff --git a/docs/adr/ADR-006-deterministic-scoring.md b/docs/adr/ADR-006-deterministic-scoring.md index 094df824..f7c71e04 100644 --- a/docs/adr/ADR-006-deterministic-scoring.md +++ b/docs/adr/ADR-006-deterministic-scoring.md @@ -38,39 +38,37 @@ SQL storage: INTEGER (i64, native SQLite affinity) Ordering: standard integer comparison (no float comparison edge cases) ``` -Arithmetic is saturating: overflow clamps to `i64::MAX`, underflow clamps to `i64::MIN`. -NaN and infinity inputs to `from_f32`/`from_f64` are mapped to deterministic sentinel -values (NaN → 0, +inf → `i64::MAX`, -inf → `i64::MIN`). +Arithmetic is saturating: overflow clamps to `MAX` (= `i64::MAX`), underflow clamps to +`NEG_INF` (= `i64::MIN + 1`). The raw value `i64::MIN` is a reserved sentinel (`MIN`) +that is not produced by any public arithmetic or float-conversion path. This makes +runtime-reachable scores disjoint from the sentinel — see the `DeterministicScore` total-order +formal proof at `lean-proofs/Score/DeterministicScore.lean` (`MIN` vs `NEG_INF`, `RuntimeValid`). -### Canonical implementation: `ruvector-core` +NaN and infinity inputs to `from_f32`/`from_f64` are mapped to deterministic sentinel +values (NaN → `ZERO`, `+∞` → `MAX`, `-∞` → `NEG_INF`). -`ruvector-core` is the authoritative owner of `DeterministicScore` and related deterministic -fusion primitives. `khive-score` is a compatibility crate that re-exports the canonical -types and functions. It contains no independent scoring implementation. +### Canonical implementation (current phase) -```rust -// khive-score/src/lib.rs — re-export shim only -pub use ruvector_core::{ - DeterministicScore, - deterministic_rrf, - deterministic_rrf_with_k, - weighted_sum, - Ranked, -}; -``` +`khive-score` is the current canonical owner of `DeterministicScore` and the related +deterministic fusion primitives (`deterministic_rrf`, `weighted_sum`, `Ranked`, +`DistanceMetric`, `similarity_from_distance`). It is a self-contained Rust crate. -This prevents drift between two byte-identical implementations. Changes to the scoring -contract are made in `ruvector-core` and flow to khive through the re-export. +The long-term plan is to host these primitives in `ruvector-core` upstream so multiple +ecosystems share a single implementation. That migration is deferred until `ruvector-core` +ships our contributions; `khive-score` will become a re-export shim at that point. Until +then, the formal contract is the one defined in this ADR and proved in +`lean-proofs/Score/DeterministicScore.lean`. ### Normative invariants The implementation MUST satisfy: 1. **Total order**: antisymmetry, transitivity, totality over all `DeterministicScore` values. -2. **Saturating arithmetic**: add, subtract, and accumulation saturate at `i64::MIN`/`i64::MAX`. - No wrapping, no panic. -3. **Deterministic NaN/infinity handling**: `from_f32(NaN) == from_f64(NaN) == DeterministicScore(0)`. - Positive infinity maps to `i64::MAX`, negative infinity to `i64::MIN`. +2. **Saturating arithmetic**: add, subtract, and accumulation saturate at `NEG_INF` + (= `i64::MIN + 1`) and `MAX` (= `i64::MAX`). No wrapping, no panic. The reserved + `MIN` (= `i64::MIN`) sentinel is never produced by public arithmetic. +3. **Deterministic NaN/infinity handling**: `from_f32(NaN) == from_f64(NaN) == ZERO`. + `+∞` maps to `MAX`, `-∞` maps to `NEG_INF`. `MIN` is never produced. 4. **SQL INTEGER bit-exact round-trip**: `DeterministicScore(x).to_sql().from_sql() == DeterministicScore(x)`. 5. **Metric-aware f32 conversion**: distance-to-similarity conversion at vector search result boundaries uses the metric-specific monotonic transform defined below. @@ -159,15 +157,17 @@ add/subtract/accumulation safely. This is an implementation detail, not a normat requirement. Other implementations may use another method if they preserve the same saturating semantics. -### `QuantKey` deprecation +### `QuantKey` removal -`QuantKey` is not part of the deterministic scoring contract. It uses a different scale and -width than `DeterministicScore` and is not safe for persistent score storage, SQL cache keys, -cross-backend result exchange, or public ranking APIs. +`QuantKey` was an 8-byte packed sort-key optimization (i32 quantized score + u32 ID prefix) +intended for hot-loop sorting. It is **not** part of the deterministic scoring contract +(different scale, lossy precision, not safe for storage or cross-backend exchange) and is +not modelled in the Lean proof. -Existing `QuantKey` code is deprecated from the public contract. Future use requires a -performance ADR with benchmarks showing material speedup over `Ranked` / -`DeterministicScore` sorting on representative khive retrieval workloads. +`QuantKey` has been **removed entirely** from `khive-score`. There is no deprecation +period. If a future workload demonstrates a material speedup over `Ranked` / +`DeterministicScore` sorting on representative retrieval traces, a new optimization can +be introduced behind a fresh ADR. ## Rationale @@ -213,12 +213,14 @@ value used in production. The explicit override API (`deterministic_rrf_with_k`) tuning for specific workloads. Callers experimenting with alternative K values must document the rationale. -### Why deprecate QuantKey? +### Why remove QuantKey? -`QuantKey` is a relative-order optimization for hot-loop sorting. It does not preserve -absolute score values and uses a different scale than `DeterministicScore`. Exposing it as a -public scoring primitive risks callers persisting or comparing `QuantKey` values across -contexts where only `DeterministicScore` is correct. +`QuantKey` was a relative-order optimization for hot-loop sorting. It did not preserve +absolute score values and used a different scale than `DeterministicScore`. Keeping it as +deprecated code added a second sort-key concept readers had to learn before reaching for +the one that matters. khive is early enough that a clean delete is preferable to a +deprecation period; reintroduce as a private optimization (or a new ADR) only if a real +workload demonstrates need. ## Consequences @@ -232,9 +234,11 @@ contexts where only `DeterministicScore` is correct. ### Negative -- khive gains a dependency on `ruvector-core`. Acceptable given RuVector is the canonical - vector substrate. -- `QuantKey` deprecation may require updating hot-path sorting in retrieval code. +- `khive-score` remains a self-contained Rust crate in this phase. The ruvector-core + migration is deferred until upstream ships and is not a blocker for code aligned to + this ADR. +- `QuantKey` was removed; any hot-path retrieval sort that used it now uses `Ranked` + / `DeterministicScore` ordering directly. - K = 60 is the standard default. Callers who need a different K must use the explicit `deterministic_rrf_with_k` API and document the rationale. @@ -246,9 +250,19 @@ contexts where only `DeterministicScore` is correct. ## Implementation -- `ruvector-core`: canonical `DeterministicScore`, `deterministic_rrf`, - `deterministic_rrf_with_k`, `weighted_sum`, `Ranked`, `DistanceMetric`, - `similarity_from_distance`. -- `khive-score/src/lib.rs`: `pub use ruvector_core::*` re-exports only. +- `khive-score`: self-contained canonical implementation of `DeterministicScore`, + `deterministic_rrf`, `deterministic_rrf_with_k`, `weighted_sum`, `Ranked`, + `DistanceMetric`, `similarity_from_distance`. Constants: `MAX` (i64::MAX), `NEG_INF` + (i64::MIN + 1), `ZERO` (0), `MIN` (i64::MIN, reserved sentinel). - SQL column type: `INTEGER` (i64). No schema migration needed. -- `QuantKey`: marked `#[deprecated]` with note pointing to this ADR. +- `QuantKey`: removed (file deleted, all re-exports dropped). Use `Ranked` and + `DeterministicScore` ordering for sort hot paths. +- Formal model: `lean-proofs/Score/DeterministicScore.lean` (51 theorems, complete). + Future Rust changes must preserve the proven invariants or amend both the ADR and + the Lean proof in the same PR. + +### Future: ruvector-core migration + +When `ruvector-core` ships with our contributions, `khive-score` will become a +re-export shim of those types. That migration is its own ADR / PR and is out of scope +here. The Lean proof remains the source of truth across the migration. diff --git a/docs/adr/ADR-015-schema-migrations.md b/docs/adr/ADR-015-schema-migrations.md index f0a9cfa3..a08ef9fc 100644 --- a/docs/adr/ADR-015-schema-migrations.md +++ b/docs/adr/ADR-015-schema-migrations.md @@ -28,16 +28,35 @@ mechanism that: The canonical ledger of database schema migration versions. Migration versions are assigned in ledger order; they are NOT required to match ADR number order. -| Version | Owning ADR | Migration name | Status | -| ------: | ---------- | ---------------------------------- | ------- | -| V1 | (initial) | initial_schema | shipped | -| V2 | (initial) | add_name_to_notes | shipped | -| V3 | (initial) | add_events_namespace_created_index | shipped | -| V4 | (initial) | dedupe_graph_edge_triples | shipped | -| V5 | ADR-043 | embedding_pipeline_extensions | v1 | -| V6 | ADR-046 | event_sourced_proposals_index | v1 | -| V7 | ADR-041 | event_observations_and_session_id | v1 | -| V8 | ADR-022 | events_namespace_ts_id_idx | v1 | +| Version | Owning ADR | Migration name | Status | +| ------: | ----------- | ------------------------------------------------- | ------- | +| V1 | (initial) | initial_schema | shipped | +| V2 | (initial) | add_name_to_notes | shipped | +| V3 | (initial) | add_events_namespace_created_index | shipped | +| V4 | (initial) | dedupe_graph_edge_triples | shipped | +| V5 | c01/ADR-001 | add_entity_type_to_entities | shipped | +| V6 | (no-op) | reserved_adr043_embedding_pipeline_extensions | shipped | +| V7 | (no-op) | reserved_adr046_event_sourced_proposals_index | shipped | +| V8 | (no-op) | reserved_adr041_event_observations_and_session_id | shipped | +| V9 | c03/ADR-004 | edge_lifecycle_and_target_backend | shipped | +| V10 | c04/ADR-019 | note_status_and_nullable_metrics | shipped | +| V11 | c04/ADR-014 | entity_tombstone_columns | shipped | +| V12 | c04/ADR-019 | nullable_note_metrics | shipped | +| V13 | c06/ADR-041 | event_observability_provenance | shipped | +| V14 | c20/ADR-043 | embedding_model_registry | shipped | +| V15 | c22/ADR-046 | proposals_open | shipped | + +> **Amendment (2026-05-24, cluster-24 + post-integration)**: The ledger above reflects what +> actually shipped on `integration/v1-adr-alignment` after parallel cluster landings c01, c03, +> c04, c06, c20, and c22. The original ledger (V5–V8 reserved for ADR-043/046/041/022 +> respectively, V9 for ADR-004/029) was pre-v1 planning that did not survive contact with +> concurrent PRs. The concrete migrations from c01 (entity_type) landed at V5; c03 (edge +> lifecycle) landed at V9; c04 (note storage + curation) landed at V10–V12; c06 (event +> observability) was originally collapsed into V5 in its own PR then relocated to V13 during +> integration merge. c20 (embedding model registry per ADR-043) landed at V14 — the same ADR +> the V6 reservation originally anticipated, hence V6 remains a no-op slot. c22 (proposals_open +> projection per ADR-046) landed at V15. V6–V8 are no-op placeholder slots to maintain +> contiguity. Versions V1–V15 are production schema and are frozen. > **Invariant**: ADR number order and migration version order are independent. Migration versions reflect schema ledger assignment order. A migration may only depend on schema created by earlier versions. diff --git a/docs/adr/ADR-020-git-native-kg-implementation.md b/docs/adr/ADR-020-git-native-kg-implementation.md index 5a66c50a..75eaad10 100644 --- a/docs/adr/ADR-020-git-native-kg-implementation.md +++ b/docs/adr/ADR-020-git-native-kg-implementation.md @@ -46,7 +46,7 @@ primitives (`kkernel sync`, `kkernel export`, `kkernel import`, `kkernel validat `kkernel db migrate`, etc.). The split: - **`khive`** — git workflows, file scaffolding, hook installation, user CLI ergonomics, - network pack install, and (future) khive.ai product features. + network pack install, and (future) hosted product features. - **`kkernel`** — storage, validation primitives, pack registry, coordinator, MCP server, schema migrations. Pure Rust; no Deno or product UX. diff --git a/npm/README.md b/npm/README.md index d2e99037..5928950b 100644 --- a/npm/README.md +++ b/npm/README.md @@ -23,4 +23,3 @@ khive kg status # Show entity/edge counts and uncommitted changes ## Documentation - [GitHub](https://github.com/ohdearquant/khive) -- [khive.ai](https://khive.ai) diff --git a/npm/bin/khive b/npm/bin/khive old mode 100644 new mode 100755 index cfb0d5e0..09003324 --- a/npm/bin/khive +++ b/npm/bin/khive @@ -1,43 +1,187 @@ #!/usr/bin/env node +// khive — per-platform binary shim (ADR-026) +// +// Resolves the host platform to the matching @khive/kernel-{platform} +// optional dependency and execs the kkernel binary from its bin/ directory. +// Falls back to a local cargo build directory for monorepo development. +// +// NOTE: npm/bin/khive-mcp is a sibling shim that resolves khive-mcp using +// the same logic. Both shims share `resolveBinaryPath()` — keep them in sync. + +"use strict"; + const { execFileSync } = require("child_process"); const path = require("path"); const fs = require("fs"); const os = require("os"); +// Map os.platform()+os.arch() → @khive/kernel-{platform} package name suffix. +// Follows the naming established in ADR-026. +// +// NOTE: linux-arm64 is glibc-only in v1. Musl arm64 is not yet in the matrix. +// If musl is detected on arm64, `detectLinuxVariant` returns null and we error +// with a clear "unsupported" message — see getBinaryPath() below. const PLATFORM_MAP = { - "darwin-arm64": "khive-darwin-arm64", - "darwin-x64": "khive-darwin-x64", - "linux-arm64": "khive-linux-arm64", - "linux-x64": "khive-linux-x64", - "win32-x64": "khive-win32-x64.exe", + "darwin-arm64": "darwin-arm64", + "darwin-x64": "darwin-x64", + "linux-arm64": null, // resolved dynamically: glibc=linux-arm64, musl=unsupported + "linux-x64": null, // resolved dynamically by detectLinuxVariant() + "win32-x64": "win32-x64", }; -function getBinaryPath() { - const key = `${os.platform()}-${os.arch()}`; - const binaryName = PLATFORM_MAP[key]; - if (!binaryName) { - console.error(`Unsupported platform: ${key}`); - console.error(`Supported: ${Object.keys(PLATFORM_MAP).join(", ")}`); +/** + * Detect whether the Linux runtime links against glibc or musl. + * Detection order (most-reliable first): + * 1. `ldd --version` — invokes the actual system linker + * 2. `/lib/ld-musl-*` glob — fast filesystem check + * Returns "gnu" or "musl". Defaults to "gnu" if detection is inconclusive. + * + * NOTE: cli/lib/kernel.ts uses the same ordered detection. Keep them in sync. + */ +function detectLibc() { + try { + const ldd = require("child_process") + .execFileSync("ldd", ["--version"], { encoding: "utf8", stdio: ["ignore", "pipe", "pipe"] }) + .toLowerCase(); + if (ldd.includes("musl")) return "musl"; + return "gnu"; + } catch (_) { + // ldd not available or returned non-zero — check /lib/ld-musl-* presence + try { + const libs = fs.readdirSync("/lib"); + if (libs.some((f) => f.startsWith("ld-musl-"))) return "musl"; + } catch (_) {} + return "gnu"; + } +} + +const SUPPORTED_PLATFORMS = [ + "darwin-arm64", + "darwin-x64", + "linux-x64-gnu", + "linux-x64-musl", + "linux-arm64", + "win32-x64", +]; + +function getPlatformKey() { + return `${os.platform()}-${os.arch()}`; +} + +/** + * Resolve the platform suffix for the @khive/kernel-{platform} subpackage. + * Returns null if the platform is recognized but unsupported (musl arm64). + * Returns undefined if the platform is entirely unknown. + */ +function resolvePlatformSuffix() { + const platformKey = getPlatformKey(); + if (platformKey === "linux-x64") { + const libc = detectLibc(); + return libc === "musl" ? "linux-x64-musl" : "linux-x64-gnu"; + } + if (platformKey === "linux-arm64") { + const libc = detectLibc(); + if (libc === "musl") { + // musl arm64 is not in the v1 matrix — emit a clear error instead of + // silently falling back to the glibc arm64 binary (which will fail with + // a cryptic ENOENT when the glibc loader is absent). + return null; // caller treats null as "unsupported but recognized" + } + return "linux-arm64"; + } + return PLATFORM_MAP[platformKey]; // darwin-arm64, darwin-x64, win32-x64 +} + +/** + * Locate the named binary using the platform subpackage resolution strategy: + * 1. KKERNEL_BINARY env var override (development / CI) + * 2. Package-manager-agnostic resolution via require.resolve (works with + * npm, yarn, and pnpm including isolated-store layouts) + * 3. Dev fallback: cargo build directory inside the monorepo + * + * `binaryName` is "kkernel" or "khive-mcp" (without .exe; added for Windows). + */ +function getBinaryPath(binaryName) { + const isWindows = os.platform() === "win32"; + const exe = isWindows ? `${binaryName}.exe` : binaryName; + + // 1. Explicit override env var. For kkernel the conventional var is + // KKERNEL_BINARY; for khive-mcp we accept KHIVE_MCP_BINARY. + const envVar = binaryName === "kkernel" ? "KKERNEL_BINARY" : "KHIVE_MCP_BINARY"; + const override = process.env[envVar]; + if (override && fs.existsSync(override)) return override; + + const platformSuffix = resolvePlatformSuffix(); + + // null means recognized-but-unsupported platform (musl arm64) + if (platformSuffix === null) { + const platformKey = getPlatformKey(); + const libc = detectLibc(); + console.error(`khive: unsupported platform: ${platformKey} (libc: ${libc})`); + console.error("linux-arm64 with musl is not in the v1 release matrix."); + console.error("Supported: " + SUPPORTED_PLATFORMS.join(", ")); + console.error( + "File an issue at https://github.com/ohdearquant/khive/issues if you need this target.", + ); process.exit(1); } - // Check for binary in the package's bin directory - const localPath = path.join(__dirname, binaryName); - if (fs.existsSync(localPath)) return localPath; + // undefined means completely unknown platform + if (platformSuffix === undefined) { + const platformKey = getPlatformKey(); + console.error(`khive: unsupported platform: ${platformKey}`); + console.error("Supported: " + SUPPORTED_PLATFORMS.join(", ")); + console.error( + "File an issue at https://github.com/ohdearquant/khive/issues if you need this target.", + ); + process.exit(1); + } - // Check if installed globally or via npx - const globalPath = path.join(__dirname, "..", "bin", binaryName); - if (fs.existsSync(globalPath)) return globalPath; + const pkgName = `@khive/kernel-${platformSuffix}`; + + // 2. Package-manager-agnostic resolution. `require.resolve` honours npm, + // yarn, AND pnpm's isolated-store layout (node_modules/.pnpm/...). + // Walk-up approaches break under pnpm's default shamefully-hoist=false. + try { + const pkgJsonPath = require.resolve(`${pkgName}/package.json`); + const pkgDir = path.dirname(pkgJsonPath); + const candidate = path.join(pkgDir, "bin", exe); + if (fs.existsSync(candidate)) return candidate; + } catch (_) { + // Package not installed — fall through to dev fallback. + } + + // 3. Dev fallback: look for a cargo build in typical monorepo locations. + const devCandidates = []; + let search = path.join(__dirname, ".."); + for (let i = 0; i < 8; i++) { + const cratesDir = path.join(search, "crates"); + if (fs.existsSync(cratesDir)) { + devCandidates.push(path.join(cratesDir, "target", "release", exe)); + devCandidates.push(path.join(cratesDir, "target", "debug", exe)); + break; + } + const parent = path.dirname(search); + if (parent === search) break; + search = parent; + } + for (const c of devCandidates) { + if (fs.existsSync(c)) return c; + } - console.error(`Binary not found: ${binaryName}`); - console.error("Run 'npm install khive' to download platform binaries."); + console.error(`khive: ${pkgName} not installed or ${binaryName} binary not found.`); + console.error(`Expected: ${pkgName}/bin/${exe}`); + console.error( + "Run 'npm install -g khive' to install platform binaries, or set " + + `${envVar} to point to a local build.`, + ); process.exit(1); } try { - const binary = getBinaryPath(); - const result = execFileSync(binary, process.argv.slice(2), { + const binary = getBinaryPath("kkernel"); + execFileSync(binary, process.argv.slice(2), { stdio: "inherit", env: process.env, }); diff --git a/npm/bin/khive-mcp b/npm/bin/khive-mcp new file mode 100755 index 00000000..dd8922e8 --- /dev/null +++ b/npm/bin/khive-mcp @@ -0,0 +1,161 @@ +#!/usr/bin/env node + +// khive-mcp — per-platform binary shim for the MCP stdio server (ADR-026) +// +// Resolves the host platform to the matching @khive/kernel-{platform} +// optional dependency and execs the khive-mcp binary from its bin/ directory. +// Falls back to a local cargo build directory for monorepo development. +// +// This shim is the companion to npm/bin/khive. Both use the same +// `getBinaryPath()` logic — see npm/bin/khive for comments and rationale. +// Keep detection order and platform mapping in sync between the two shims. +// +// Users configure this binary in Claude Code's MCP config: +// {"mcpServers": {"khive": {"command": "khive-mcp"}}} + +"use strict"; + +const { execFileSync } = require("child_process"); +const path = require("path"); +const fs = require("fs"); +const os = require("os"); + +/** + * Detect whether the Linux runtime links against glibc or musl. + * Detection order (most-reliable first): + * 1. `ldd --version` — invokes the actual system linker + * 2. `/lib/ld-musl-*` glob — fast filesystem check + * Returns "gnu" or "musl". Defaults to "gnu" if detection is inconclusive. + * + * NOTE: npm/bin/khive and cli/lib/kernel.ts use the same detection order. + * Keep all three in sync. + */ +function detectLibc() { + try { + const ldd = require("child_process") + .execFileSync("ldd", ["--version"], { encoding: "utf8", stdio: ["ignore", "pipe", "pipe"] }) + .toLowerCase(); + if (ldd.includes("musl")) return "musl"; + return "gnu"; + } catch (_) { + try { + const libs = fs.readdirSync("/lib"); + if (libs.some((f) => f.startsWith("ld-musl-"))) return "musl"; + } catch (_) {} + return "gnu"; + } +} + +const SUPPORTED_PLATFORMS = [ + "darwin-arm64", + "darwin-x64", + "linux-x64-gnu", + "linux-x64-musl", + "linux-arm64", + "win32-x64", +]; + +function getPlatformKey() { + return `${os.platform()}-${os.arch()}`; +} + +function resolvePlatformSuffix() { + const platformKey = getPlatformKey(); + if (platformKey === "linux-x64") { + const libc = detectLibc(); + return libc === "musl" ? "linux-x64-musl" : "linux-x64-gnu"; + } + if (platformKey === "linux-arm64") { + const libc = detectLibc(); + if (libc === "musl") return null; // unsupported, caller emits clear error + return "linux-arm64"; + } + const PLATFORM_MAP = { + "darwin-arm64": "darwin-arm64", + "darwin-x64": "darwin-x64", + "win32-x64": "win32-x64", + }; + return PLATFORM_MAP[platformKey]; +} + +function getBinaryPath(binaryName) { + const isWindows = os.platform() === "win32"; + const exe = isWindows ? `${binaryName}.exe` : binaryName; + + const envVar = binaryName === "kkernel" ? "KKERNEL_BINARY" : "KHIVE_MCP_BINARY"; + const override = process.env[envVar]; + if (override && fs.existsSync(override)) return override; + + const platformSuffix = resolvePlatformSuffix(); + + if (platformSuffix === null) { + const platformKey = getPlatformKey(); + const libc = detectLibc(); + console.error(`khive-mcp: unsupported platform: ${platformKey} (libc: ${libc})`); + console.error("linux-arm64 with musl is not in the v1 release matrix."); + console.error("Supported: " + SUPPORTED_PLATFORMS.join(", ")); + console.error( + "File an issue at https://github.com/ohdearquant/khive/issues if you need this target.", + ); + process.exit(1); + } + + if (platformSuffix === undefined) { + const platformKey = getPlatformKey(); + console.error(`khive-mcp: unsupported platform: ${platformKey}`); + console.error("Supported: " + SUPPORTED_PLATFORMS.join(", ")); + console.error( + "File an issue at https://github.com/ohdearquant/khive/issues if you need this target.", + ); + process.exit(1); + } + + const pkgName = `@khive/kernel-${platformSuffix}`; + + // Package-manager-agnostic resolution (npm, yarn, pnpm isolated-store). + try { + const pkgJsonPath = require.resolve(`${pkgName}/package.json`); + const pkgDir = path.dirname(pkgJsonPath); + const candidate = path.join(pkgDir, "bin", exe); + if (fs.existsSync(candidate)) return candidate; + } catch (_) {} + + // Dev fallback: cargo build directory inside the monorepo. + const devCandidates = []; + let search = path.join(__dirname, ".."); + for (let i = 0; i < 8; i++) { + const cratesDir = path.join(search, "crates"); + if (fs.existsSync(cratesDir)) { + devCandidates.push(path.join(cratesDir, "target", "release", exe)); + devCandidates.push(path.join(cratesDir, "target", "debug", exe)); + break; + } + const parent = path.dirname(search); + if (parent === search) break; + search = parent; + } + for (const c of devCandidates) { + if (fs.existsSync(c)) return c; + } + + console.error(`khive-mcp: ${pkgName} not installed or ${binaryName} binary not found.`); + console.error(`Expected: ${pkgName}/bin/${exe}`); + console.error( + "Run 'npm install -g khive' to install platform binaries, or set " + + `${envVar} to point to a local build.`, + ); + process.exit(1); +} + +try { + const binary = getBinaryPath("khive-mcp"); + execFileSync(binary, process.argv.slice(2), { + stdio: "inherit", + env: process.env, + }); +} catch (err) { + if (err.status !== undefined) { + process.exit(err.status); + } + throw err; +} diff --git a/npm/kernel-darwin-arm64/bin/.gitkeep b/npm/kernel-darwin-arm64/bin/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/npm/kernel-darwin-arm64/package.json b/npm/kernel-darwin-arm64/package.json new file mode 100644 index 00000000..92f1fbcb --- /dev/null +++ b/npm/kernel-darwin-arm64/package.json @@ -0,0 +1,18 @@ +{ + "name": "@khive/kernel-darwin-arm64", + "version": "0.2.1", + "description": "khive Rust binaries for macOS Apple Silicon (arm64)", + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "https://github.com/ohdearquant/khive" + }, + "os": ["darwin"], + "cpu": ["arm64"], + "files": [ + "bin/" + ], + "engines": { + "node": ">=18" + } +} diff --git a/npm/kernel-darwin-x64/bin/.gitkeep b/npm/kernel-darwin-x64/bin/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/npm/kernel-darwin-x64/package.json b/npm/kernel-darwin-x64/package.json new file mode 100644 index 00000000..802d670f --- /dev/null +++ b/npm/kernel-darwin-x64/package.json @@ -0,0 +1,18 @@ +{ + "name": "@khive/kernel-darwin-x64", + "version": "0.2.1", + "description": "khive Rust binaries for macOS Intel (x64)", + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "https://github.com/ohdearquant/khive" + }, + "os": ["darwin"], + "cpu": ["x64"], + "files": [ + "bin/" + ], + "engines": { + "node": ">=18" + } +} diff --git a/npm/kernel-linux-arm64/bin/.gitkeep b/npm/kernel-linux-arm64/bin/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/npm/kernel-linux-arm64/package.json b/npm/kernel-linux-arm64/package.json new file mode 100644 index 00000000..d254ef14 --- /dev/null +++ b/npm/kernel-linux-arm64/package.json @@ -0,0 +1,19 @@ +{ + "name": "@khive/kernel-linux-arm64", + "version": "0.2.1", + "description": "khive Rust binaries for Linux ARM64 glibc", + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "https://github.com/ohdearquant/khive" + }, + "os": ["linux"], + "cpu": ["arm64"], + "libc": ["glibc"], + "files": [ + "bin/" + ], + "engines": { + "node": ">=18" + } +} diff --git a/npm/kernel-linux-x64-gnu/bin/.gitkeep b/npm/kernel-linux-x64-gnu/bin/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/npm/kernel-linux-x64-gnu/package.json b/npm/kernel-linux-x64-gnu/package.json new file mode 100644 index 00000000..e0293b8c --- /dev/null +++ b/npm/kernel-linux-x64-gnu/package.json @@ -0,0 +1,19 @@ +{ + "name": "@khive/kernel-linux-x64-gnu", + "version": "0.2.1", + "description": "khive Rust binaries for Linux x86_64 glibc", + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "https://github.com/ohdearquant/khive" + }, + "os": ["linux"], + "cpu": ["x64"], + "libc": ["glibc"], + "files": [ + "bin/" + ], + "engines": { + "node": ">=18" + } +} diff --git a/npm/kernel-linux-x64-musl/bin/.gitkeep b/npm/kernel-linux-x64-musl/bin/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/npm/kernel-linux-x64-musl/package.json b/npm/kernel-linux-x64-musl/package.json new file mode 100644 index 00000000..4fd93a34 --- /dev/null +++ b/npm/kernel-linux-x64-musl/package.json @@ -0,0 +1,19 @@ +{ + "name": "@khive/kernel-linux-x64-musl", + "version": "0.2.1", + "description": "khive Rust binaries for Linux x86_64 musl (Alpine etc.)", + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "https://github.com/ohdearquant/khive" + }, + "os": ["linux"], + "cpu": ["x64"], + "libc": ["musl"], + "files": [ + "bin/" + ], + "engines": { + "node": ">=18" + } +} diff --git a/npm/kernel-win32-x64/bin/.gitkeep b/npm/kernel-win32-x64/bin/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/npm/kernel-win32-x64/package.json b/npm/kernel-win32-x64/package.json new file mode 100644 index 00000000..2de6d35c --- /dev/null +++ b/npm/kernel-win32-x64/package.json @@ -0,0 +1,18 @@ +{ + "name": "@khive/kernel-win32-x64", + "version": "0.2.1", + "description": "khive Rust binaries for Windows x86_64", + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "https://github.com/ohdearquant/khive" + }, + "os": ["win32"], + "cpu": ["x64"], + "files": [ + "bin/" + ], + "engines": { + "node": ">=18" + } +} diff --git a/npm/package.json b/npm/package.json index 4aa805d1..cecb0d8d 100644 --- a/npm/package.json +++ b/npm/package.json @@ -1,16 +1,17 @@ { "name": "khive", - "version": "0.1.0", + "version": "0.2.1", "description": "Research knowledge graph CLI — git-native KG versioning", "license": "Apache-2.0", "repository": { "type": "git", "url": "https://github.com/ohdearquant/khive" }, - "homepage": "https://khive.ai", + "homepage": "https://github.com/ohdearquant", "keywords": ["knowledge-graph", "research", "git", "ndjson", "cli"], "bin": { - "khive": "bin/khive" + "khive": "bin/khive", + "khive-mcp": "bin/khive-mcp" }, "files": [ "bin/", @@ -20,5 +21,13 @@ "cpu": ["arm64", "x64"], "engines": { "node": ">=18" + }, + "optionalDependencies": { + "@khive/kernel-darwin-arm64": "0.2.1", + "@khive/kernel-darwin-x64": "0.2.1", + "@khive/kernel-linux-x64-gnu": "0.2.1", + "@khive/kernel-linux-x64-musl": "0.2.1", + "@khive/kernel-linux-arm64": "0.2.1", + "@khive/kernel-win32-x64": "0.2.1" } } diff --git a/scripts/migrate_notes.py b/scripts/migrate_notes.py deleted file mode 100644 index a1a32a0d..00000000 --- a/scripts/migrate_notes.py +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env python3 -"""Migrate notes from internal khive DB to OSS khive-graph DB. - -Copies all live (non-deleted) notes from the internal backup into the OSS -substrate. Populates the FTS index. Vector embeddings are left to the -runtime (generated on first search). - -Usage: - uv run python scripts/migrate_notes.py [--dry-run] - uv run python scripts/migrate_notes.py ~/.khive/khive.db.backup --dry-run -""" -import json -import sqlite3 -import sys -from pathlib import Path - -OSS_DB = Path.home() / ".khive" / "khive-graph.db" - - -def migrate(source_db: Path, dry_run: bool = False): - if not source_db.exists(): - print(f"ERROR: source DB not found at {source_db}") - sys.exit(1) - if not OSS_DB.exists(): - print(f"ERROR: OSS DB not found at {OSS_DB}") - sys.exit(1) - - src = sqlite3.connect(str(source_db)) - dst = sqlite3.connect(str(OSS_DB)) - dst.execute("PRAGMA journal_mode=WAL") - dst.execute("PRAGMA foreign_keys=OFF") - - src_cursor = src.execute( - "SELECT id, namespace, kind, content, salience, decay_factor, " - "expires_at, properties, created_at, updated_at " - "FROM notes WHERE deleted_at IS NULL" - ) - - inserted = 0 - skipped = 0 - by_kind: dict[str, int] = {} - - for row in src_cursor: - note_id, namespace, kind, content, salience, decay_factor, \ - expires_at, properties, created_at, updated_at = row - - existing = dst.execute( - "SELECT 1 FROM notes WHERE id = ?", (note_id,) - ).fetchone() - if existing: - skipped += 1 - continue - - if dry_run: - inserted += 1 - by_kind[kind] = by_kind.get(kind, 0) + 1 - continue - - dst.execute( - "INSERT INTO notes (id, namespace, kind, name, content, salience, " - "decay_factor, expires_at, properties, created_at, updated_at) " - "VALUES (?, ?, ?, NULL, ?, ?, ?, ?, ?, ?, ?)", - (note_id, namespace, kind, content, salience, decay_factor, - expires_at, properties, created_at, updated_at), - ) - - props = {} - if properties: - try: - props = json.loads(properties) - except (json.JSONDecodeError, TypeError): - pass - - tags_str = ",".join(props.get("tags", [])) if isinstance(props.get("tags"), list) else "" - title = "" - body = content or "" - - dst.execute( - "INSERT INTO fts_notes_local (subject_id, kind, title, body, tags, " - "namespace, metadata, updated_at) " - "VALUES (?, ?, ?, ?, ?, ?, ?, ?)", - (note_id, kind, title, body, tags_str, namespace, properties or "", updated_at), - ) - - inserted += 1 - by_kind[kind] = by_kind.get(kind, 0) + 1 - - if not dry_run: - dst.commit() - - src.close() - dst.close() - - mode = "DRY RUN" if dry_run else "MIGRATED" - print(f"\n{mode}: {inserted} notes inserted, {skipped} skipped (already exist)") - print("By kind:") - for kind, count in sorted(by_kind.items(), key=lambda x: -x[1]): - print(f" {kind}: {count}") - - -if __name__ == "__main__": - args = [a for a in sys.argv[1:] if not a.startswith("--")] - if not args: - print("Usage: uv run python scripts/migrate_notes.py [--dry-run]") - sys.exit(1) - dry_run = "--dry-run" in sys.argv - migrate(Path(args[0]), dry_run=dry_run) diff --git a/tests/contract_test.py b/tests/contract_test.py index 76586abf..d8f53fdd 100644 --- a/tests/contract_test.py +++ b/tests/contract_test.py @@ -87,10 +87,17 @@ def _recv(proc: subprocess.Popen) -> dict: def _request_raw(proc: subprocess.Popen, ops_string: str) -> dict: """Call the single `request` MCP tool and return the parsed response body. + Uses ``presentation: "verbose"`` so test assertions receive full canonical + UUIDs and timestamps (ADR-045 — scripted/CI callers default to Verbose). + Returns {"_rpc_error": {...}} if the server replied with a JSON-RPC error (i.e. the DSL itself was rejected — malformed input). """ - _send(proc, "tools/call", {"name": "request", "arguments": {"ops": ops_string}}) + _send( + proc, + "tools/call", + {"name": "request", "arguments": {"ops": ops_string, "presentation": "verbose"}}, + ) resp = _recv(proc) if "error" in resp: return {"_rpc_error": resp["error"]} @@ -411,7 +418,7 @@ def test_gql_property_projection(proc: subprocess.Popen) -> None: ) # Error must contain the compiler's fixed-format valid-column list. If the # columns change, this assertion will catch the drift. - assert "Valid: id, name, kind, namespace, description, properties, created_at, updated_at" in err_text, ( + assert "Valid: id, name, kind, entity_type, namespace, description, properties, created_at, updated_at" in err_text, ( f"Error text must contain the full valid-column list emitted by the compiler: {err_text!r}" ) @@ -435,8 +442,10 @@ def test_edge_cascade_hard_delete(proc: subprocess.Popen) -> None: e1 = _tool(proc, "link", { "source_id": hub["id"], "target_id": spoke1["id"], "relation": "extends", }) + # ADR-002: depends_on is restricted to Project/Service/Artifact endpoints, not Concept. + # Use `enables` (valid concept-to-concept) for this contract. e2 = _tool(proc, "link", { - "source_id": spoke2["id"], "target_id": hub["id"], "relation": "depends_on", + "source_id": spoke2["id"], "target_id": hub["id"], "relation": "enables", }) e1_id = e1["id"] e2_id = e2["id"] @@ -448,7 +457,7 @@ def test_edge_cascade_hard_delete(proc: subprocess.Popen) -> None: ) # Hard-delete the hub - del_result = _tool(proc, "delete", {"id": hub["id"], "hard": True}) + del_result = _tool(proc, "delete", {"id": hub["id"], "kind": "entity", "hard": True}) assert del_result["deleted"] is True, f"Hard delete should return deleted=true: {del_result}" # Both incident edges must be gone — assert via get() AND via list() so the @@ -491,7 +500,7 @@ def test_edge_cascade_hard_delete(proc: subprocess.Popen) -> None: }) e_soft_id = e_soft["id"] - del_soft = _tool(proc, "delete", {"id": hub_soft["id"]}) # hard=False by default + del_soft = _tool(proc, "delete", {"id": hub_soft["id"], "kind": "entity"}) # hard=False by default assert del_soft["deleted"] is True # Edge should still be retrievable after soft delete @@ -673,10 +682,12 @@ def test_merge_semantics(proc: subprocess.Popen) -> None: # Create edges incident on "gone": # third → gone (inbound edge to gone) # gone → kept (outbound edge from gone, which would become a self-loop after merge — should be dropped) + # ADR-002: depends_on is restricted to Project/Service/Artifact endpoints, not Concept. + # Use `enables` (valid concept-to-concept) for this contract. e_inbound = _tool(proc, "link", { "source_id": third["id"], "target_id": gone["id"], - "relation": "depends_on", + "relation": "enables", "weight": 0.7, }) e_self_loop = _tool(proc, "link", { @@ -822,7 +833,7 @@ def test_annotates_source_must_be_note(proc: subprocess.Popen) -> None: ) # ---- Hard-delete the target entity cascades the annotates edge ---- - del_result = _tool(proc, "delete", {"id": concept["id"], "hard": True}) + del_result = _tool(proc, "delete", {"id": concept["id"], "kind": "entity", "hard": True}) assert del_result["deleted"] is True err_edge = _expect_rpc_error(proc, "get", {"id": edge_id}) diff --git a/tests/smoke_test.py b/tests/smoke_test.py index 392bc724..ffd55a78 100644 --- a/tests/smoke_test.py +++ b/tests/smoke_test.py @@ -186,13 +186,15 @@ def main(): assert edge1["relation"] == "variant_of" print(f" [ok] link — QLoRA variant_of LoRA") + # ADR-002: introduced_by direction is concept → document (a concept + # was introduced by a paper). Reverse the source/target accordingly. call_verb(proc, "link", { - "source_id": paper_id, - "target_id": lora_id, + "source_id": lora_id, + "target_id": paper_id, "relation": "introduced_by", "weight": 1.0, }) - print(f" [ok] link — paper introduced_by LoRA") + print(f" [ok] link — LoRA introduced_by paper") # 7. Get edge via get (auto-detects kind) edge_id = edge1["id"] @@ -200,27 +202,34 @@ def main(): assert fetched_edge["kind"] == "edge", f"expected kind=edge, got: {fetched_edge}" print(f" [ok] get edge — wrapped response kind={fetched_edge['kind']}") - # 8. Neighbors - nbrs = call_verb(proc, "neighbors", { + # 8. Neighbors — LoRA has 1 inbound (QLoRA variant_of) and 1 outbound + # (LoRA introduced_by paper, per ADR-002 direction). + nbrs_in = call_verb(proc, "neighbors", { "node_id": lora_id, "direction": "in", }) - assert len(nbrs) == 2, f"expected 2 inbound neighbors, got {len(nbrs)}" - print(f" [ok] neighbors — {len(nbrs)} inbound to LoRA") + assert len(nbrs_in) == 1, f"expected 1 inbound neighbor, got {len(nbrs_in)}" + nbrs_out = call_verb(proc, "neighbors", { + "node_id": lora_id, + "direction": "out", + }) + assert len(nbrs_out) == 1, f"expected 1 outbound neighbor, got {len(nbrs_out)}" + print(f" [ok] neighbors — 1 inbound + 1 outbound to LoRA") # 9. Edge list edges = call_verb(proc, "list", {"kind": "edge", "source_id": qlora_id}) assert len(edges) == 1 print(f" [ok] list edges") - # 10. Edge update (auto-detects kind from UUID) - updated_edge = call_verb(proc, "update", {"id": edge_id, "weight": 0.95}) + # 10. Edge update + updated_edge = call_verb(proc, "update", {"id": edge_id, "kind": "edge", "weight": 0.95}) assert abs(updated_edge["weight"] - 0.95) < 0.01 print(f" [ok] update edge weight") - # 11. Entity update (auto-detects kind from UUID) + # 11. Entity update patched = call_verb(proc, "update", { "id": lora_id, + "kind": "entity", "description": "Low-Rank Adaptation of LLMs", }) assert patched["description"] == "Low-Rank Adaptation of LLMs" @@ -297,17 +306,17 @@ def main(): print(f" [ok] merge entity") # 19. Entity delete - del_result = call_verb(proc, "delete", {"id": qlora_id}) + del_result = call_verb(proc, "delete", {"id": qlora_id, "kind": "entity"}) assert del_result["deleted"] is True print(f" [ok] delete entity") # 20. Edge delete - del_edge = call_verb(proc, "delete", {"id": edge_id}) + del_edge = call_verb(proc, "delete", {"id": edge_id, "kind": "edge"}) assert del_edge["deleted"] is True print(f" [ok] delete edge") # 21. Note delete - del_note = call_verb(proc, "delete", {"id": note_id}) + del_note = call_verb(proc, "delete", {"id": note_id, "kind": "note"}) assert del_note["deleted"] is True print(f" [ok] delete note")