vouchdev · plind-junior · Jun 17, 2026 · Jun 17, 2026 · Jun 17, 2026 · Jun 17, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,13 @@ All notable changes to vouch are documented here. Format follows
 ## [Unreleased]
 
 ### Added
+- `kb.synthesize` — answer-mode retrieval over the review-gated KB. Answers a
+  query in prose from approved claims only, with an inline `[claim_id]`
+  citation behind every sentence, an explicit `gaps` block listing query
+  topics no approved claim covered, and a `synthesis_confidence` grade derived
+  from the cited claims' lifecycle status. Deterministic in v1 (no LLM in the
+  loop). Exposed across the CLI (`vouch synthesize`), MCP (`kb_synthesize`),
+  and JSONL (`kb.synthesize`) surfaces (#222).
 - Entity-salience retrieval reflex: a per-session, in-memory ring buffer of
   recent caller queries drives a zero-LLM substring/FTS entity pass that
   attaches top-K matched claim candidates as `_meta.vouch_salience` on

diff --git a/PR_BODY.md b/PR_BODY.md
@@ -0,0 +1,79 @@
+# feat(synthesize): `kb.synthesize` answer-mode retrieval over the review-gated KB
+
+## What changed
+
+Adds `kb.synthesize` — an answer-mode counterpart to `kb.context`. Where
+`kb.context` returns a *ranked list* of relevant items, `kb.synthesize`
+answers a query in prose, but strictly from **approved (durable) claims**,
+with an inline `[claim_id]` citation behind every sentence.
+
+New surface, wired across all three transports that the capabilities test
+keeps in sync:
+
+- `src/vouch/synthesize.py` — `synthesize(store, *, query, depth=3,
+  max_chars=4000, llm=False)`. Walks `build_context_pack(... limit=depth)`,
+  keeps only `claim` items that resolve to a durable claim via
+  `store.get_claim`, and composes a deterministic answer: one short,
+  single-clause sentence per claim, each carrying at least one `[claim_id]`
+  citation. No sentence is emitted that isn't traceable to a claim id.
+  `max_chars` truncates by dropping trailing claims (never by cutting a
+  citation). Returns
+  `{"query", "answer", "claims", "gaps", "_meta": {"synthesis_confidence"}}`.
+  `gaps` lists the query's salient terms for which no approved claim was
+  found (and is the whole answer when nothing matched). `synthesis_confidence`
+  is `high` when every cited claim is `stable`, `medium` when any is
+  `working`/`actionable`, `low` when any is `contested`. `llm=True` raises
+  (reserved for an opt-in generative backend; deterministic synthesis is the
+  v1 default).
+- `src/vouch/capabilities.py` — `kb.synthesize` appended to `METHODS`.
+- `src/vouch/jsonl_server.py` — `_h_synthesize` handler + `HANDLERS` entry.
+- `src/vouch/server.py` — `@mcp.tool() kb_synthesize(query, depth=3,
+  max_chars=4000)`.
+- `src/vouch/cli.py` — `vouch synthesize "<query>" [--depth N] [--max-chars N]`.
+- `CHANGELOG.md` — `### Added` bullet under `## [Unreleased]`.
+
+## Why / root cause
+
+`kb.context` is a retrieval primitive: it ranks and budgets items but leaves
+answer composition (and the discipline of *only* using approved knowledge) to
+the caller. There was no first-class way to ask the KB a question and get a
+prose answer whose every clause is provably backed by a reviewed claim, with
+the uncovered parts of the question surfaced rather than silently dropped.
+`kb.synthesize` fills that gap deterministically — citation-gated by
+construction, so it cannot fabricate an unbacked sentence — and grades its own
+confidence from the lifecycle status of the claims it actually cited.
+
+## Test plan
+
+`tests/test_synthesize.py` covers:
+
+- 3 approved `auth` claims → non-empty answer citing all 3 ids by `[id]`,
+  confidence `high`.
+- A query the KB doesn't cover → `answer == ""`, `claims == []`, `gaps`
+  populated with the query's salient terms.
+- Fuzz/traceability: every sentence in a non-empty answer carries at least one
+  `[id]` citation whose id is in `claims` and resolves via `store.get_claim`.
+- `max_chars` drops trailing claims without cutting a citation
+  (citation count == cited-claim count).
+- Confidence reflects claim status (`working` → medium, `contested` → low).
+- `llm=True` raises the reserved-backend `ValueError`.
+- `kb.synthesize` is in `capabilities().methods` and in the JSONL `HANDLERS`,
+  and is callable via `handle_request` end-to-end.
+
+Verification gate (fresh venv, editable install of this worktree):
+
+```
+$ ./.venv/bin/ruff check src tests
+All checks passed!
+
+$ ./.venv/bin/mypy src
+Success: no issues found in 30 source files
+
+$ ./.venv/bin/python -m pytest -q
+94 passed, 6 skipped in 0.81s
+```
+
+(The 6 skips are pre-existing numpy/embedding-optional tests, unrelated to this
+change.)
+
+Closes #222
diff --git a/src/vouch/capabilities.py b/src/vouch/capabilities.py
@@ -19,6 +19,7 @@
     "kb.stats",
     "kb.search",
     "kb.context",
+    "kb.synthesize",
     "kb.read_page",
     "kb.read_claim",
     "kb.read_entity",

diff --git a/src/vouch/cli.py b/src/vouch/cli.py
@@ -31,6 +31,7 @@
 from . import sessions as sess_mod
 from . import stats as stats_mod
 from . import sync as sync_mod
+from . import synthesize as synth
 from . import vault_sync as vault_sync_mod
 from . import verify as verify_mod
 from .capabilities import capabilities as build_caps
@@ -1378,6 +1379,20 @@ def context(
     _emit_json(pack)
 
 
+@cli.command()
+@click.argument("query")
+@click.option("--depth", default=3, show_default=True, type=int)
+@click.option("--max-chars", default=4000, show_default=True, type=int)
+def synthesize(query: str, depth: int, max_chars: int) -> None:
+    """Answer a query from approved claims only, with inline citations."""
+    store = _load_store()
+    with _cli_errors():
+        result = synth.synthesize(
+            store, query=query, depth=depth, max_chars=max_chars,
+        )
+    _emit_json(result)
+
+
 @cli.command()
 def index() -> None:
     """Rebuild state.db from durable files."""

diff --git a/src/vouch/jsonl_server.py b/src/vouch/jsonl_server.py
@@ -26,6 +26,8 @@
 from pathlib import Path
 from typing import Any
 
+import yaml
+
 from . import audit, bundle, health, volunteer_context
 from . import lifecycle as life
 from . import salience as salience_mod
@@ -54,6 +56,7 @@
     KBStore,
     discover_root,
 )
+from .synthesize import synthesize
 
 # Per-request actor override. The HTTP transport sets this from the
 # X-Vouch-Agent header so audit attribution is correct without mutating
@@ -189,6 +192,16 @@ def _h_context(p: dict) -> dict:
     return salience_mod.attach_salience(result, store, session_id, cfg)
 
 
+def _h_synthesize(p: dict) -> dict:
+    return synthesize(
+        _store(),
+        query=p["query"],
+        depth=int(p.get("depth", 3)),
+        max_chars=int(p.get("max_chars", 4000)),
+        llm=bool(p.get("llm", False)),
+    )
+
+
 def _h_read_page(p: dict) -> dict:
     return _store().get_page(p["page_id"]).model_dump(mode="json")
 
@@ -601,6 +614,7 @@ def _h_provenance_rebuild(_: dict) -> dict:
     "kb.stats": _h_stats,
     "kb.search": _h_search,
     "kb.context": _h_context,
+    "kb.synthesize": _h_synthesize,
     "kb.read_page": _h_read_page,
     "kb.read_claim": _h_read_claim,
     "kb.read_entity": _h_read_entity,

diff --git a/src/vouch/server.py b/src/vouch/server.py
@@ -48,6 +48,7 @@
     KBStore,
     discover_root,
 )
+from .synthesize import synthesize
 
 mcp = FastMCP("vouch")
 
@@ -210,6 +211,21 @@ def kb_context(
     return salience_mod.attach_salience(result, store, session_id, cfg)
 
 
+@mcp.tool()
+def kb_synthesize(
+    query: str,
+    depth: int = 3,
+    max_chars: int = 4000,
+) -> dict[str, Any]:
+    """Answer a query from approved claims only, with inline `[claim_id]`
+    citations, an explicit gaps block, and a synthesis_confidence grade.
+
+    Unlike `kb_context` (a ranked list), this returns prose where every
+    sentence is traceable to an approved claim.
+    """
+    return synthesize(_store(), query=query, depth=depth, max_chars=max_chars)
+
+
 @mcp.tool()
 def kb_read_page(page_id: str) -> dict[str, Any]:
     """Return a page (title, body, claim ids)."""

diff --git a/src/vouch/sessions.py b/src/vouch/sessions.py
@@ -12,7 +12,7 @@
 import uuid
 from datetime import UTC, datetime
 
-from . import audit, index_db, volunteer_context
+from . import audit, index_db, salience, volunteer_context
 from .models import Page, PageType, ProposalStatus, Session
 from .proposals import approve
 from .storage import KBStore

diff --git a/src/vouch/synthesize.py b/src/vouch/synthesize.py
@@ -0,0 +1,140 @@
+"""Answer-mode synthesis over the review-gated KB.
+
+`kb.context` returns a *ranked list* of relevant items; `kb.synthesize`
+answers a query in prose, but only from APPROVED (durable) claims, with an
+inline `[claim_id]` citation behind every sentence. It never invents a
+sentence that isn't traceable to a claim, reports the query topics it found
+no claim for in an explicit `gaps` block, and grades its own confidence from
+the lifecycle status of the claims it cited.
+
+The synthesis is deterministic in v1 — there is no LLM in the loop. The
+`llm` flag is reserved so the wire shape is stable when an opt-in generative
+backend lands; passing `llm=True` raises rather than silently degrading.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Literal
+
+from .context import build_context_pack
+from .models import Claim, ClaimStatus
+from .storage import ArtifactNotFoundError, KBStore
+
+Confidence = Literal["high", "medium", "low"]
+
+_STOPWORDS = frozenset(
+    {
+        "a", "an", "and", "are", "as", "at", "be", "by", "do", "does", "for",
+        "from", "how", "in", "into", "is", "it", "its", "of", "on", "or",
+        "the", "their", "them", "then", "there", "these", "this", "to", "was",
+        "were", "what", "when", "where", "which", "who", "why", "will", "with",
+        "you", "your",
+    }
+)
+
+
+def _salient_terms(query: str) -> list[str]:
+    """Lowercased, de-duplicated, order-preserving content words of the query."""
+    seen: set[str] = set()
+    terms: list[str] = []
+    for raw in query.split():
+        token = "".join(ch for ch in raw.lower() if ch.isalnum())
+        if len(token) < 3 or token in _STOPWORDS or token in seen:
+            continue
+        seen.add(token)
+        terms.append(token)
+    return terms
+
+
+def _clause(text: str) -> str:
+    """One short, single-clause rendering of a claim's text."""
+    clause = text.strip().split("\n", 1)[0].strip()
+    for sep in (". ", "; ", " — ", " - "):
+        head = clause.split(sep, 1)[0]
+        if head:
+            clause = head
+    clause = clause.rstrip(".;,")
+    return clause
+
+
+def _covers(term: str, *claims: Claim) -> bool:
+    return any(term in c.text.lower() for c in claims)
+
+
+def _confidence(statuses: list[ClaimStatus]) -> Confidence:
+    if any(s == ClaimStatus.CONTESTED for s in statuses):
+        return "low"
+    if any(s in (ClaimStatus.WORKING, ClaimStatus.ACTIONABLE) for s in statuses):
+        return "medium"
+    if statuses and all(s == ClaimStatus.STABLE for s in statuses):
+        return "high"
+    return "medium"
+
+
+def synthesize(
+    store: KBStore,
+    *,
+    query: str,
+    depth: int = 3,
+    max_chars: int = 4000,
+    llm: bool = False,
+) -> dict[str, Any]:
+    """Answer `query` from approved claims only, with inline citations.
+
+    Returns a dict with `query`, `answer` (citation-bearing prose, possibly
+    empty), `claims` (the cited claim ids), `gaps` (query topics no approved
+    claim covered) and `_meta.synthesis_confidence`.
+    """
+    if llm:
+        raise ValueError(
+            "llm synthesis backend not configured; "
+            "deterministic synthesis is the default"
+        )
+
+    pack = build_context_pack(store, query=query, limit=depth)
+    items = pack["items"] if isinstance(pack, dict) else pack.items
+
+    approved: list[Claim] = []
+    seen_ids: set[str] = set()
+    for item in items:
+        if (item["type"] if isinstance(item, dict) else item.type) != "claim":
+            continue
+        cid = item["id"] if isinstance(item, dict) else item.id
+        if cid in seen_ids:
+            continue
+        try:
+            claim = store.get_claim(cid)
+        except ArtifactNotFoundError:
+            continue
+        seen_ids.add(cid)
+        approved.append(claim)
+
+    sentences: list[str] = []
+    cited: list[str] = []
+    statuses: list[ClaimStatus] = []
+    used = 0
+    for claim in approved:
+        sentence = f"{_clause(claim.text)} [{claim.id}]."
+        projected = used + len(sentence) + (1 if sentences else 0)
+        if projected > max_chars:
+            break
+        sentences.append(sentence)
+        cited.append(claim.id)
+        statuses.append(claim.status)
+        used = projected
+
+    answer = " ".join(sentences)
+    cited_claims = [c for c in approved if c.id in set(cited)]
+    gaps = [
+        term
+        for term in _salient_terms(query)
+        if not (cited_claims and _covers(term, *cited_claims))
+    ]
+
+    return {
+        "query": query,
+        "answer": answer,
+        "claims": cited,
+        "gaps": gaps,
+        "_meta": {"synthesis_confidence": _confidence(statuses)},
+    }