From 552eeec4bded77e0494be79ee60d4333e0ad11ea Mon Sep 17 00:00:00 2001 From: jsdevninja Date: Tue, 9 Jun 2026 06:26:24 -0500 Subject: [PATCH] feat(retrieval): add kb.neighbors and graph expansion for context packs --- src/vouch/capabilities.py | 1 + src/vouch/cli.py | 31 +++++ src/vouch/context.py | 72 ++++++++++- src/vouch/graph.py | 249 ++++++++++++++++++++++++++++++++++++++ src/vouch/jsonl_server.py | 17 +++ src/vouch/server.py | 22 ++++ tests/test_graph.py | 164 +++++++++++++++++++++++++ 7 files changed, 555 insertions(+), 1 deletion(-) create mode 100644 src/vouch/graph.py create mode 100644 tests/test_graph.py diff --git a/src/vouch/capabilities.py b/src/vouch/capabilities.py index a5bad38..498b8b3 100644 --- a/src/vouch/capabilities.py +++ b/src/vouch/capabilities.py @@ -18,6 +18,7 @@ "kb.status", "kb.stats", "kb.search", + "kb.neighbors", "kb.context", "kb.read_page", "kb.read_claim", diff --git a/src/vouch/cli.py b/src/vouch/cli.py index 5157bad..427a4fa 100644 --- a/src/vouch/cli.py +++ b/src/vouch/cli.py @@ -1309,6 +1309,27 @@ def search( click.echo(f"{k}/{i}\t{snip} ({used})") +@cli.command() +@click.argument("node_id") +@click.option("--depth", default=1, show_default=True, type=int) +@click.option("--rel-type", "rel_types", multiple=True, + help="Filter to relation types (repeatable).") +@click.option("--max-nodes", default=50, show_default=True, type=int) +def neighbors(node_id: str, depth: int, rel_types: tuple[str, ...], + max_nodes: int) -> None: + """List graph neighbors of a claim, page, entity, or source.""" + from .graph import find_neighbors + + store = _load_store() + with _cli_errors(): + result = find_neighbors( + store, node_id, depth=depth, + rel_types=list(rel_types) or None, + max_nodes=max_nodes, + ) + _emit_json(result) + + @cli.command() @click.argument("task") @click.option("--limit", default=10, show_default=True, type=int) @@ -1317,6 +1338,10 @@ def search( @click.option("--min-items", default=0, type=int) @click.option("--project", default=None, help="Viewer project for scope filtering.") @click.option("--agent", default=None, help="Viewer agent for scope filtering.") +@click.option("--expand-graph", is_flag=True, + help="Include 1-hop graph neighbors of search hits.") +@click.option("--graph-depth", default=1, show_default=True, type=int) +@click.option("--graph-limit", default=20, show_default=True, type=int) def context( task: str, limit: int, @@ -1325,6 +1350,9 @@ def context( min_items: int, project: str | None, agent: str | None, + expand_graph: bool, + graph_depth: int, + graph_limit: int, ) -> None: """Build a ContextPack ready to inject into an agent prompt.""" store = _load_store() @@ -1337,6 +1365,9 @@ def context( require_citations=require_citations, project=project, agent=agent, + expand_graph=expand_graph, + graph_depth=graph_depth, + graph_limit=graph_limit, ) _emit_json(pack) diff --git a/src/vouch/context.py b/src/vouch/context.py index 2f87782..423c2fa 100644 --- a/src/vouch/context.py +++ b/src/vouch/context.py @@ -18,7 +18,7 @@ import yaml -from . import index_db +from . import graph, index_db from .models import ClaimStatus, ContextItem, ContextPack, ContextQuality from .scoping import ( ViewerContext, @@ -138,6 +138,65 @@ def _enrich_summary(store: KBStore, kind: str, artifact_id: str, summary: str) - return summary +def _append_graph_neighbors( + store: KBStore, + items: list[ContextItem], + *, + depth: int, + limit: int, + rel_types: list[str] | None, +) -> list[str]: + """Expand `items` with 1-hop (or deeper) graph neighbors. Returns warnings.""" + warnings: list[str] = [] + if not items: + return warnings + seed_scores = {it.id: it.score for it in items} + neighbors = graph.graph_neighbors_for_seeds( + store, + [it.id for it in items], + depth=depth, + rel_types=rel_types, + max_nodes=limit, + ) + existing = {it.id for it in items} + added = 0 + for node in neighbors: + nid = node["id"] + if nid in existing: + continue + kind = node["kind"] + cites: list[str] = [] + if kind == "claim": + try: + claim = store.get_claim(nid) + except ArtifactNotFoundError: + continue + if claim.status in _RETRACTED_CLAIM_STATUSES: + continue + cites = list(claim.evidence) + via = node.get("via", "") + parent_score = seed_scores.get(via, 0.5) + distance = int(node.get("distance", 1)) + score = parent_score * (0.8 ** distance) + summary = node.get("summary") or _enrich_summary(store, kind, nid, "") + items.append( + ContextItem( + id=nid, + type=cast(ContextItemKind, kind), + summary=summary, + score=score, + backend="graph", + citations=cites, + freshness="unknown", + ) + ) + existing.add(nid) + added += 1 + if added: + warnings.append(f"graph expansion added {added} neighbor(s)") + return warnings + + def build_context_pack( store: KBStore, *, @@ -151,6 +210,10 @@ def build_context_pack( explain: bool = False, project: str | None = None, agent: str | None = None, + expand_graph: bool = False, + graph_depth: int = 1, + graph_limit: int = 20, + graph_rel_types: list[str] | None = None, ) -> ContextPack | dict[str, Any]: viewer = viewer_from( config_path=store.config_path, @@ -184,6 +247,13 @@ def build_context_pack( ) warnings: list[str] = [] + if expand_graph: + warnings.extend( + _append_graph_neighbors( + store, items, depth=graph_depth, limit=graph_limit, + rel_types=graph_rel_types, + ) + ) failed: list[str] = [] uncited: list[str] = [] budget_truncated = False diff --git a/src/vouch/graph.py b/src/vouch/graph.py new file mode 100644 index 0000000..e05e7c1 --- /dev/null +++ b/src/vouch/graph.py @@ -0,0 +1,249 @@ +"""Graph traversal — `kb.neighbors` / `vouch neighbors`. + +Walks Relation edges plus structural links on claims, pages, and entities +(supersedes, contradicts, mentions, includes). Used by context expansion +to pull related knowledge into a ContextPack after the initial search hits. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, Literal + +from .models import ClaimStatus, Relation +from .storage import ArtifactNotFoundError, KBStore + +_RETRACTED_CLAIM_STATUSES = frozenset({ + ClaimStatus.ARCHIVED, + ClaimStatus.SUPERSEDED, + ClaimStatus.REDACTED, +}) + +NodeKind = Literal["claim", "page", "entity", "source"] + + +@dataclass(frozen=True) +class _Edge: + source: str + target: str + relation: str + relation_id: str | None = None + + +def _node_kind(store: KBStore, node_id: str) -> NodeKind: + if store._claim_path(node_id).exists(): + return "claim" + if store._page_path(node_id).exists(): + return "page" + if store._entity_path(node_id).exists(): + return "entity" + if (store._source_dir(node_id) / "meta.yaml").exists(): + return "source" + raise ArtifactNotFoundError(f"node {node_id}") + + +def _summary_for(store: KBStore, kind: str, node_id: str) -> str: + try: + if kind == "claim": + return store.get_claim(node_id).text + if kind == "page": + p = store.get_page(node_id) + return p.title or p.body[:200] + if kind == "entity": + e = store.get_entity(node_id) + return e.name or (e.description or "")[:200] + if kind == "source": + s = store.get_source(node_id) + return s.title or s.locator or node_id + except ArtifactNotFoundError: + pass + return "" + + +def _claim_is_retrievable(store: KBStore, claim_id: str) -> bool: + try: + claim = store.get_claim(claim_id) + except ArtifactNotFoundError: + return False + return claim.status not in _RETRACTED_CLAIM_STATUSES + + +def _relation_allowed(rel: Relation, rel_types: frozenset[str] | None) -> bool: + if rel_types is None: + return True + return rel.relation.value in rel_types + + +def _structural_edges(store: KBStore, node_id: str, kind: NodeKind) -> list[_Edge]: + edges: list[_Edge] = [] + if kind == "claim": + claim = store.get_claim(node_id) + for eid in claim.entities: + if store._entity_path(eid).exists(): + edges.append(_Edge(node_id, eid, "mentions")) + for cid in claim.supersedes: + if store._claim_path(cid).exists(): + edges.append(_Edge(node_id, cid, "supersedes")) + if claim.superseded_by and store._claim_path(claim.superseded_by).exists(): + edges.append(_Edge(claim.superseded_by, node_id, "supersedes")) + for cid in claim.contradicts: + if store._claim_path(cid).exists(): + edges.append(_Edge(node_id, cid, "contradicts")) + elif kind == "page": + page = store.get_page(node_id) + for cid in page.claims: + if store._claim_path(cid).exists(): + edges.append(_Edge(node_id, cid, "includes_claim")) + for eid in page.entities: + if store._entity_path(eid).exists(): + edges.append(_Edge(node_id, eid, "mentions")) + for sid in page.sources: + if (store._source_dir(sid) / "meta.yaml").exists(): + edges.append(_Edge(node_id, sid, "references")) + elif kind == "entity": + entity = store.get_entity(node_id) + if entity.page and store._page_path(entity.page).exists(): + edges.append(_Edge(node_id, entity.page, "described_by")) + return edges + + +def _edges_from_node( + store: KBStore, + node_id: str, + *, + rel_types: frozenset[str] | None, +) -> list[_Edge]: + edges: list[_Edge] = [] + seen: set[tuple[str, str, str]] = set() + + def _add(edge: _Edge) -> None: + key = (edge.source, edge.target, edge.relation) + if key in seen: + return + if rel_types is not None and edge.relation not in rel_types: + return + seen.add(key) + edges.append(edge) + + for rel in store.relations_from(node_id): + if _relation_allowed(rel, rel_types): + _add(_Edge(rel.source, rel.target, rel.relation.value, rel.id)) + for rel in store.relations_to(node_id): + if _relation_allowed(rel, rel_types): + _add(_Edge(rel.source, rel.target, rel.relation.value, rel.id)) + + try: + kind = _node_kind(store, node_id) + except ArtifactNotFoundError: + return edges + + for edge in _structural_edges(store, node_id, kind): + _add(edge) + return edges + + +def _neighbor_ok(store: KBStore, node_id: str, kind: NodeKind) -> bool: + if kind == "claim": + return _claim_is_retrievable(store, node_id) + return store._node_exists(node_id) + + +def find_neighbors( + store: KBStore, + node_id: str, + *, + depth: int = 1, + rel_types: list[str] | None = None, + max_nodes: int = 50, +) -> dict[str, Any]: + """Return nodes and edges reachable within `depth` hops of `node_id`.""" + if depth < 1: + raise ValueError("depth must be >= 1") + if max_nodes < 1: + raise ValueError("max_nodes must be >= 1") + + root_kind = _node_kind(store, node_id) + rel_filter = frozenset(rel_types) if rel_types else None + + visited: set[str] = {node_id} + nodes: list[dict[str, Any]] = [] + edges_out: list[dict[str, Any]] = [] + seen_edges: set[tuple[str, str, str]] = set() + frontier = [node_id] + + for dist in range(1, depth + 1): + next_frontier: list[str] = [] + for current in frontier: + for edge in _edges_from_node(store, current, rel_types=rel_filter): + other = edge.target if edge.source == current else edge.source + ekey = (edge.source, edge.target, edge.relation) + if ekey not in seen_edges: + seen_edges.add(ekey) + edges_out.append({ + "source": edge.source, + "target": edge.target, + "relation": edge.relation, + "relation_id": edge.relation_id, + }) + if other in visited: + continue + try: + kind = _node_kind(store, other) + except ArtifactNotFoundError: + continue + if not _neighbor_ok(store, other, kind): + continue + visited.add(other) + next_frontier.append(other) + nodes.append({ + "id": other, + "kind": kind, + "distance": dist, + "via": current, + "relation": edge.relation, + "summary": _summary_for(store, kind, other), + }) + if len(nodes) >= max_nodes: + break + if len(nodes) >= max_nodes: + break + if len(nodes) >= max_nodes: + break + frontier = next_frontier + + return { + "node_id": node_id, + "kind": root_kind, + "depth": depth, + "nodes": nodes, + "edges": edges_out, + } + + +def graph_neighbors_for_seeds( + store: KBStore, + seed_ids: list[str], + *, + depth: int = 1, + rel_types: list[str] | None = None, + max_nodes: int = 20, +) -> list[dict[str, Any]]: + """Collect unique neighbor nodes for several seed ids (context expansion).""" + seen: set[str] = set(seed_ids) + out: list[dict[str, Any]] = [] + for seed in seed_ids: + try: + result = find_neighbors( + store, seed, depth=depth, rel_types=rel_types, max_nodes=max_nodes, + ) + except ArtifactNotFoundError: + continue + for node in result["nodes"]: + nid = node["id"] + if nid in seen: + continue + seen.add(nid) + out.append(node) + if len(out) >= max_nodes: + return out + return out diff --git a/src/vouch/jsonl_server.py b/src/vouch/jsonl_server.py index b6eca5b..04cc14f 100644 --- a/src/vouch/jsonl_server.py +++ b/src/vouch/jsonl_server.py @@ -157,6 +157,18 @@ def _h_search(p: dict) -> dict: +def _h_neighbors(p: dict) -> dict: + from .graph import find_neighbors + + return find_neighbors( + _store(), + p["node_id"], + depth=int(p.get("depth", 1)), + rel_types=p.get("rel_types"), + max_nodes=int(p.get("max_nodes", 50)), + ) + + def _h_context(p: dict) -> dict: return build_context_pack( # type: ignore[return-value] _store(), @@ -169,6 +181,10 @@ def _h_context(p: dict) -> dict: fail_on_budget_truncation=bool(p.get("fail_on_budget_truncation", False)), project=p.get("project"), agent=p.get("agent"), + expand_graph=bool(p.get("expand_graph", False)), + graph_depth=int(p.get("graph_depth", 1)), + graph_limit=int(p.get("graph_limit", 20)), + graph_rel_types=p.get("graph_rel_types"), ) @@ -567,6 +583,7 @@ def _h_provenance_rebuild(_: dict) -> dict: "kb.status": _h_status, "kb.stats": _h_stats, "kb.search": _h_search, + "kb.neighbors": _h_neighbors, "kb.context": _h_context, "kb.read_page": _h_read_page, "kb.read_claim": _h_read_claim, diff --git a/src/vouch/server.py b/src/vouch/server.py index fae5485..ad9c019 100644 --- a/src/vouch/server.py +++ b/src/vouch/server.py @@ -169,6 +169,24 @@ def _to_dicts(h: list[tuple[str, str, str, float]], used: str) -> dict[str, Any] raise ValueError(f"unknown backend: {backend}") +@mcp.tool() +def kb_neighbors( + node_id: str, + depth: int = 1, + rel_types: list[str] | None = None, + max_nodes: int = 50, +) -> dict[str, Any]: + """Return graph neighbors of a claim, page, entity, or source.""" + from .graph import find_neighbors + + try: + return find_neighbors( + _store(), node_id, depth=depth, rel_types=rel_types, max_nodes=max_nodes, + ) + except ArtifactNotFoundError as e: + raise ValueError(str(e)) from e + + @mcp.tool() def kb_context( task: str, @@ -178,12 +196,16 @@ def kb_context( require_citations: bool = False, project: str | None = None, agent: str | None = None, + expand_graph: bool = False, + graph_depth: int = 1, + graph_limit: int = 20, ) -> dict[str, Any]: """Build a ContextPack ready to inject into an agent prompt.""" return build_context_pack( # type: ignore[return-value] _store(), query=task, limit=limit, max_chars=max_chars, min_items=min_items, require_citations=require_citations, project=project, agent=agent, + expand_graph=expand_graph, graph_depth=graph_depth, graph_limit=graph_limit, ) diff --git a/tests/test_graph.py b/tests/test_graph.py new file mode 100644 index 0000000..8130dc5 --- /dev/null +++ b/tests/test_graph.py @@ -0,0 +1,164 @@ +"""Graph traversal — neighbors and context expansion.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from vouch import context, graph, health, lifecycle +from vouch.models import ( + Claim, + Entity, + EntityType, + Page, + PageType, + Relation, + RelationType, +) +from vouch.storage import ArtifactNotFoundError, KBStore + + +@pytest.fixture +def store(tmp_path: Path) -> KBStore: + return KBStore.init(tmp_path) + + +def test_find_neighbors_via_relation(store: KBStore) -> None: + store.put_entity(Entity(id="auth", name="Auth", type=EntityType.SYSTEM)) + store.put_entity(Entity(id="jwt", name="JWT", type=EntityType.CONCEPT)) + store.put_relation(Relation( + id="auth-uses-jwt", + source="auth", + relation=RelationType.USES, + target="jwt", + )) + result = graph.find_neighbors(store, "auth", depth=1) + assert result["kind"] == "entity" + assert {n["id"] for n in result["nodes"]} == {"jwt"} + assert result["edges"][0]["relation"] == "uses" + + +def test_find_neighbors_depth_two(store: KBStore) -> None: + for eid in ("a", "b", "c"): + store.put_entity(Entity(id=eid, name=eid.upper(), type=EntityType.CONCEPT)) + store.put_relation(Relation( + id="a-b", source="a", relation=RelationType.DEPENDS_ON, target="b", + )) + store.put_relation(Relation( + id="b-c", source="b", relation=RelationType.DEPENDS_ON, target="c", + )) + one_hop = graph.find_neighbors(store, "a", depth=1) + assert {n["id"] for n in one_hop["nodes"]} == {"b"} + + two_hop = graph.find_neighbors(store, "a", depth=2) + assert {n["id"] for n in two_hop["nodes"]} == {"b", "c"} + + +def test_find_neighbors_rel_type_filter(store: KBStore) -> None: + store.put_entity(Entity(id="a", name="A", type=EntityType.CONCEPT)) + store.put_entity(Entity(id="b", name="B", type=EntityType.CONCEPT)) + store.put_entity(Entity(id="c", name="C", type=EntityType.CONCEPT)) + store.put_relation(Relation( + id="uses", source="a", relation=RelationType.USES, target="b", + )) + store.put_relation(Relation( + id="blocks", source="a", relation=RelationType.BLOCKS, target="c", + )) + result = graph.find_neighbors( + store, "a", depth=1, rel_types=["uses"], + ) + assert {n["id"] for n in result["nodes"]} == {"b"} + + +def test_find_neighbors_claim_structural_links(store: KBStore) -> None: + src = store.put_source(b"e") + store.put_entity(Entity(id="auth-svc", name="Auth", type=EntityType.SYSTEM)) + store.put_claim(Claim( + id="jwt-fact", + text="Auth uses JWT", + evidence=[src.id], + entities=["auth-svc"], + )) + result = graph.find_neighbors(store, "jwt-fact", depth=1) + assert {n["id"] for n in result["nodes"]} == {"auth-svc"} + assert result["edges"][0]["relation"] == "mentions" + + +def test_find_neighbors_excludes_superseded_claims(store: KBStore) -> None: + src = store.put_source(b"e") + store.put_claim(Claim(id="old", text="v1", evidence=[src.id])) + store.put_claim(Claim(id="new", text="v2", evidence=[src.id])) + lifecycle.supersede(store, old_claim_id="old", new_claim_id="new", actor="r") + result = graph.find_neighbors(store, "new", depth=1) + assert {n["id"] for n in result["nodes"]} == set() + assert "old" not in {n["id"] for n in result["nodes"]} + + +def test_find_neighbors_unknown_node_raises(store: KBStore) -> None: + with pytest.raises(ArtifactNotFoundError): + graph.find_neighbors(store, "missing", depth=1) + + +def test_context_expand_graph_adds_neighbors(store: KBStore) -> None: + src = store.put_source(b"e") + store.put_entity(Entity(id="auth", name="Auth", type=EntityType.SYSTEM)) + store.put_claim(Claim( + id="jwt-claim", + text="JWT tokens secure the API", + evidence=[src.id], + )) + store.put_relation(Relation( + id="claim-uses-auth", + source="jwt-claim", + relation=RelationType.REFERENCES, + target="auth", + )) + health.rebuild_index(store) + + pack = context.build_context_pack( + store, query="JWT tokens", limit=5, expand_graph=True, + ) + ids = {it["id"] for it in pack["items"]} + assert "jwt-claim" in ids + assert "auth" in ids + assert any(it["backend"] == "graph" for it in pack["items"]) + assert any("graph expansion" in w for w in pack["warnings"]) + + +def test_context_expand_graph_includes_page_claims(store: KBStore) -> None: + src = store.put_source(b"e") + store.put_claim(Claim(id="c1", text="detail fact", evidence=[src.id])) + store.put_page(Page( + id="overview", + title="Overview", + type=PageType.CONCEPT, + body="Summary", + claims=["c1"], + )) + health.rebuild_index(store) + + pack = context.build_context_pack( + store, query="Overview", limit=5, expand_graph=True, + ) + ids = {it["id"] for it in pack["items"]} + assert "overview" in ids + assert "c1" in ids + + +def test_jsonl_kb_neighbors(store: KBStore, monkeypatch: pytest.MonkeyPatch) -> None: + from vouch.jsonl_server import handle_request + + monkeypatch.chdir(store.root) + store.put_entity(Entity(id="x", name="X", type=EntityType.CONCEPT)) + store.put_entity(Entity(id="y", name="Y", type=EntityType.CONCEPT)) + store.put_relation(Relation( + id="x-y", source="x", relation=RelationType.USES, target="y", + )) + resp = handle_request({ + "id": "n1", + "method": "kb.neighbors", + "params": {"node_id": "x", "depth": 1}, + }) + assert resp["ok"] is True + assert {n["id"] for n in resp["result"]["nodes"]} == {"y"}