From d89342c83770a0ccc41540f2c877d10b1938994e Mon Sep 17 00:00:00 2001 From: Srinath Siddamsetty Date: Fri, 12 Jun 2026 23:44:19 -0400 Subject: [PATCH 1/2] fix(security): close secret-scrubbing bypass paths in all MCP write routes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Problem `stripPrivateData` was only applied inside the observe pipeline (`src/hooks/prompt-submit.ts`). Seven write paths that accept content directly via MCP tools bypassed that pipeline entirely, so secrets passed through those routes were stored in plaintext: - `mem::remember` — explicit memory saves - `mem::lesson-save` — lessons (content + context fields) - `mem::slot-create` — slot initial content - `mem::slot-append` — appended text - `mem::slot-replace` — replaced content - `mem::sketch-create` — sketch/action title and description - `mem::team-share` — shared item at the audience-widening boundary - `mem::compress` — LLM summary output (model can echo seen secrets) - `mem::import` — imported dumps bypass capture entirely ## Changes ### src/functions/privacy.ts - Add regex for PEM private key blocks (`-----BEGIN * PRIVATE KEY-----`) - Add regex for DB connection URLs with embedded credentials (`postgres://`, `mysql://`, `mongodb+srv://`, `redis://`, etc.) - Export new `scrubRecord` — a recursive `T→T` walker that applies `stripPrivateData` to every string in an arbitrary nested object/array. Use this where the shape is unknown (imports, LLM output, team shares); call `stripPrivateData` directly for known string fields. ### src/functions/remember.ts - Call `stripPrivateData(data.content)` before the Jaccard similarity check so dedup compares and stores the already-scrubbed form. ### src/functions/lessons.ts - Scrub `content` and `context` before fingerprinting, so dedup keys match the scrubbed form. Applies to both manual saves and output from `mem::crystallize`. ### src/functions/slots.ts - Scrub content in `slot-create`, text in `slot-append`, and content in `slot-replace`. ### src/functions/sketches.ts - Scrub `title` and `description` in both `sketch-create` and the inline action-create path. ### src/functions/team.ts - Apply `scrubRecord` at the share boundary before writing the `TeamSharedItem`. Defense-in-depth: catches rows written before a newer pattern was added and shares widen audience from one user to the team. ### src/functions/compress.ts - Apply `scrubRecord` to the LLM-parsed compression output before it is spread into `CompressedObservation`. An LLM can echo a secret from its context window into a generated summary. ### src/functions/export-import.ts - Apply `scrubRecord` to all content-bearing collections (sessions, memories, summaries, lessons, insights, semanticMemories, crystals, sketches, observations) before any row is written during import. Imports arrive from outside the capture pipeline and may have been created by an older install with fewer patterns. ### test/scrubbing-bypass.test.ts (new) - Unit tests for all seven bypass paths: remember, lesson-save, slot-create, slot-append, slot-replace, sketch-create, team-share, compress (LLM output), and import. - Fixtures cover every secret family: Anthropic key, GitHub PAT, DB URL with embedded password, and PEM private key block. ### pnpm-workspace.yaml (new) - `allowBuilds` for esbuild, onnxruntime-node, protobufjs, sharp so `pnpm build` and `pnpm start` are not blocked by the ERR_PNPM_IGNORED_BUILDS check on developer machines. --- docs/E2E_ARCHITECTURE.md | 990 +++++++++++++++++++++++++++++++++ pnpm-workspace.yaml | 5 + src/functions/compress.ts | 8 +- src/functions/export-import.ts | 25 + src/functions/lessons.ts | 7 + src/functions/privacy.ts | 20 + src/functions/remember.ts | 6 + src/functions/sketches.ts | 9 +- src/functions/slots.ts | 8 +- src/functions/team.ts | 9 +- test/scrubbing-bypass.test.ts | 501 +++++++++++++++++ 11 files changed, 1580 insertions(+), 8 deletions(-) create mode 100644 docs/E2E_ARCHITECTURE.md create mode 100644 pnpm-workspace.yaml create mode 100644 test/scrubbing-bypass.test.ts diff --git a/docs/E2E_ARCHITECTURE.md b/docs/E2E_ARCHITECTURE.md new file mode 100644 index 000000000..8aa81e6c3 --- /dev/null +++ b/docs/E2E_ARCHITECTURE.md @@ -0,0 +1,990 @@ +# AgentMemory — End-to-End Architecture + +> **Version:** 0.9.27 | **Updated:** 2026-06-12 + +--- + +## Table of Contents + +1. [System Overview](#1-system-overview) +2. [Component Map](#2-component-map) +3. [End-to-End Flow](#3-end-to-end-flow) + - 3.1 [Session Lifecycle](#31-session-lifecycle) + - 3.2 [Observation Pipeline](#32-observation-pipeline) + - 3.3 [Memory Retrieval Pipeline](#33-memory-retrieval-pipeline) + - 3.4 [Memory Consolidation Pipeline](#34-memory-consolidation-pipeline) +4. [Component Deep-Dives](#4-component-deep-dives) + - 4.1 [Hooks Layer](#41-hooks-layer) + - 4.2 [REST / MCP Transport Layer](#42-rest--mcp-transport-layer) + - 4.3 [Business Logic (Functions)](#43-business-logic-functions) + - 4.4 [Storage Layer (StateKV)](#44-storage-layer-statekv) + - 4.5 [Search Architecture](#45-search-architecture) + - 4.6 [LLM Provider Abstraction](#46-llm-provider-abstraction) + - 4.7 [Knowledge Graph](#47-knowledge-graph) + - 4.8 [Orchestration Primitives](#48-orchestration-primitives) +5. [Data Model](#5-data-model) +6. [Configuration & Feature Flags](#6-configuration--feature-flags) +7. [Multi-Agent & Team Memory](#7-multi-agent--team-memory) +8. [Security Model](#8-security-model) +9. [Observability](#9-observability) +10. [Deployment Topology](#10-deployment-topology) + +--- + +## 1. System Overview + +AgentMemory is a **persistent, queryable memory layer** for AI coding agents. It attaches to agents (Claude Code, Cursor, GitHub Copilot, etc.) via lightweight hooks and gives them long-term memory across sessions. + +**What it does:** + +- Captures every tool call an agent makes (reads, writes, bash commands) as a *raw observation*. +- Compresses observations into structured summaries via LLM (title, facts, narrative, file references, importance score). +- Indexes all memories in a triple-stream search backend (BM25 keyword + dense vector + knowledge graph). +- Injects relevant context back into the agent at the start of each new session or tool call. +- Runs a nightly consolidation pipeline that distils session observations into long-lived *memories*. + +**What it is NOT:** + +- Not a vector database — it manages its own BM25 + vector indexes in-process. +- Not a cloud service — all state is local SQLite (via iii-engine), with optional team sync. +- Not a proxy — it hooks into agent lifecycle events, it does not intercept LLM calls. + +--- + +## 2. Component Map + +``` +┌─────────────────────────────────────────────────────────────────────────┐ +│ AI CODING AGENT │ +│ (Claude Code / Cursor / GitHub Copilot / etc.) │ +└────────────┬────────────────────────────────────────────────────────────┘ + │ Lifecycle hooks (stdin/stdout JSON, fire-and-forget HTTP) + ▼ +┌─────────────────────────────────────────────────────────────────────────┐ +│ HOOKS LAYER (src/hooks/) │ +│ session-start · pre-tool-use · post-tool-use · post-tool-failure │ +│ stop · session-end · pre-compact · prompt-submit · notification │ +│ post-commit · subagent-start · subagent-stop │ +└────────────┬────────────────────────────────────────────────────────────┘ + │ HTTP (fire-and-forget fetch, 800–1500 ms timeout) + ▼ +┌─────────────────────────────────────────────────────────────────────────┐ +│ TRANSPORT LAYER (src/mcp/ + src/triggers/) │ +│ │ +│ REST API (128 endpoints) MCP Server (53 tools) │ +│ src/triggers/api.ts src/mcp/server.ts │ +│ Port: AGENTMEMORY_PORT src/mcp/tools-registry.ts │ +│ src/mcp/transport.ts │ +└────────────┬────────────────────────────────────────────────────────────┘ + │ iii-sdk trigger() calls + ▼ +┌─────────────────────────────────────────────────────────────────────────┐ +│ BUSINESS LOGIC LAYER (src/functions/) │ +│ │ +│ Observation: observe · compress · synthetic-compress │ +│ Search: search · smart-search · query-expansion · sliding-window│ +│ Memory: remember · patterns · lessons · crystallize │ +│ Consolidation: consolidate · consolidation-pipeline · reflect │ +│ Graph: graph · graph-retrieval · graph-extract · relations │ +│ Orchestration: actions · frontier · leases · routines · signals │ +│ checkpoints · mesh · sketches │ +│ Management: export-import · audit · snapshots · governance │ +└────────────┬────────────────────────────────────────────────────────────┘ + │ + ┌───────┴──────────┐ + ▼ ▼ +┌─────────────┐ ┌─────────────────────────────────────────────────────┐ +│ LLM LAYER │ │ STORAGE LAYER │ +│(src/providers│ │ (src/state/) │ +│ │ │ │ +│ anthropic │ │ StateKV (iii-engine SQLite) ←─── schema.ts │ +│ openai │ │ BM25 Index (in-memory) ←─── search-index.ts │ +│ gemini │ │ Vector Index (in-memory) ←─── vector-index.ts │ +│ openrouter │ │ HybridSearch ←─── hybrid-search.ts│ +│ minimax │ │ Reranker ←─── reranker.ts │ +│ agent-sdk │ │ IndexPersistence (disk) ←─── index-persist.. │ +│ noop │ └─────────────────────────────────────────────────────┘ +└─────────────┘ + + Background Timers (src/index.ts) + ├─ Auto-forget sweep (hourly) + ├─ Lesson decay (daily) + ├─ Consolidation pipeline (every 2h) + └─ Recent-search TTL sweep (hourly) + + Viewer Server (src/viewer/) + └─ Web UI on port N+2 (default 3113) +``` + +--- + +## 3. End-to-End Flow + +### 3.1 Session Lifecycle + +``` +AGENT STARTS + │ + ▼ +[Hook: session-start.ts] + POST /agentmemory/session/start + Payload: { cwd, project, agentId, hookEventId } + │ + ▼ +[Function: mem::session-start] + 1. Create Session record → mem:sessions + 2. IF AGENTMEMORY_INJECT_CONTEXT=true: + a. Run mem::smart-search for project context + b. Fetch recent session summaries + c. Build system-prompt injection string + 3. Return context to hook (written to stdout) + 4. Agent receives context via stdin + │ + ▼ + ... agent runs tools ... + │ + ▼ +[Hook: stop.ts OR session-end.ts] + POST /agentmemory/session/end + Payload: { sessionId, reason } + │ + ▼ +[Function: mem::session-end] + 1. Mark Session as "completed" → mem:sessions + 2. IF AGENTMEMORY_REFLECT=true: + Run mem::reflect → synthesize insights + 3. IF CONSOLIDATION_ENABLED=true: + Run mem::consolidation-pipeline → create Memories + 4. IF GRAPH_EXTRACTION_ENABLED=true: + Run mem::graph-extract → update graph nodes/edges + 5. Persist BM25 + vector indexes to disk + +AGENT STOPS +``` + +--- + +### 3.2 Observation Pipeline + +This is the hot path — runs after every tool call. + +``` +AGENT EXECUTES A TOOL + │ + ▼ +[Hook: post-tool-use.ts] (or post-tool-failure.ts) + POST /agentmemory/observe + Payload: { + sessionId, hookType, toolName, + toolInput, toolOutput, + durationMs, exitCode + } + │ + ▼ +[Function: mem::observe] + 1. Deduplication check (DedupMap on toolInput hash + sessionId) + └─ Discard if duplicate within 30s window + 2. Create RawObservation → mem:obs:{sessionId} + 3. Increment session.observationCount + │ + ▼ (if AGENTMEMORY_AUTO_COMPRESS=true) +[Function: mem::compress] + 1. IF toolOutput.length > COMPRESS_THRESHOLD: + a. Build prompt from raw observation + b. Call LLM provider (anthropic / openai / ...) + c. Parse response into CompressedObservation: + { title, facts[], narrative, concepts[], files[], importance } + 2. ELSE (small output): + Run mem::synthetic-compress (regex + heuristics, no LLM) + 3. Replace RawObservation with CompressedObservation + │ + ├─── IF embedding provider configured: + │ [mem::embed] + │ 1. Call provider.embed(narrative + facts) + │ 2. Store Float32Array → mem:embeddings:{obsId} + │ 3. Update VectorIndex (in-memory) + │ + ├─── Update BM25 index (always) + │ SearchIndex.add(obsId, tokens) + │ + └─── Broadcast via WebSocket (viewer + MCP stream subscribers) + +OBSERVATION STORED & INDEXED +``` + +**Deduplication strategy:** +- Hash = SHA256(toolName + JSON.stringify(toolInput)) +- DedupMap holds `hash → timestamp` with 30s TTL +- Prevents identical tool re-runs (e.g. repeated `cat file.ts`) from flooding storage + +**Synthetic compression** (no LLM, ~1ms): +- Reads: extracts file path, line count +- Writes: extracts file path, diff stats +- Bash: extracts command, exit code, first 200 chars of output +- Importance heuristic: error keywords → 0.9, success → 0.5 + +--- + +### 3.3 Memory Retrieval Pipeline + +Triggered by: MCP tool `memory_recall`, MCP tool `memory_smart_search`, or pre-tool-use hook. + +``` +AGENT INVOKES MEMORY TOOL + │ + ▼ +[Function: mem::smart-search] + Input: { query, sessionId?, limit?, tokenBudget? } + + STEP 1 — Query Expansion (optional, 1 LLM call) + IF query mentions time ("recently", "last week"): + Rewrite with absolute timestamps + IF query is vague ("the button"): + Expand to related terms ("CSS button", "onClick handler") + IF expansion fails → use original query + + STEP 2 — Triple-Stream Search + ┌──────────────────────────────────────────────────────┐ + │ Stream A: BM25 Search │ + │ ├─ Tokenize + stem query │ + │ ├─ Lookup inverted index │ + │ └─ TF-IDF ranking → top-100 candidates │ + │ │ + │ Stream B: Vector Search (if embeddings enabled) │ + │ ├─ Embed query → Float32Array │ + │ ├─ Cosine similarity against all stored vectors │ + │ └─ Top-100 by similarity score │ + │ │ + │ Stream C: Graph Search │ + │ ├─ Extract entities from query (regex + NER) │ + │ ├─ Lookup graph nodes by name │ + │ ├─ Traverse edges (1-hop BFS) │ + │ └─ Degree-weighted ranking → top-100 │ + └──────────────────────────────────────────────────────┘ + + STEP 3 — Reciprocal Rank Fusion (RRF) + score(doc) = Σ 1/(k + rank_in_stream) k=60 + Merge all three streams, deduplicate by id + → Unified ranked list + + STEP 4 — Re-ranking (optional, if RERANK_ENABLED=true) + ├─ Cross-encoder model scores (query, passage) pairs + └─ Final score = 0.7 * rerank + 0.3 * rrf + + STEP 5 — Sliding-Window Context + IF results span multiple sessions: + Add ±N surrounding observations for context continuity + + STEP 6 — Token Budget Trimming + While total_tokens > TOKEN_BUDGET: + Drop lowest-score result + Return remaining results + + │ + ▼ +RESULTS RETURNED AS STRUCTURED CONTEXT +(title, facts, narrative, files, importance, session, timestamp) +``` + +--- + +### 3.4 Memory Consolidation Pipeline + +Runs on session end (if enabled) and on the 2-hour background timer. + +``` +[Function: mem::consolidation-pipeline] + Input: completed session observations + + PHASE 1 — Grouping + ├─ Load all CompressedObservations for session + ├─ Group by: file path, concept cluster, time window + └─ Filter: importance >= CONSOLIDATION_THRESHOLD (default 0.4) + + PHASE 2 — Per-Group Summarization (LLM) + FOR each group: + Prompt: "Synthesize these N observations into a long-term memory" + Extract: Memory { title, content, type, strength } + Types: pattern | preference | architecture | bug | workflow | fact + + PHASE 3 — Deduplication Against Existing Memories + ├─ Search existing memories (hybrid search) + ├─ IF similarity > 0.85: merge (update existing) + └─ IF similarity < 0.85: insert new Memory → mem:memories + + PHASE 4 — Crystallization (optional, AGENTMEMORY_CRYSTALLIZE=true) + [Function: mem::crystallize] + ├─ Scan all memories for reinforcement patterns + ├─ Memories appearing 3+ times → increase strength + └─ Weak memories (strength < 0.2) → schedule for deletion + + PHASE 5 — Graph Update (optional, GRAPH_EXTRACTION_ENABLED=true) + [Function: mem::graph-extract] + ├─ Extract entities (files, functions, concepts, people) + ├─ Extract relations (uses, modifies, imports, fixes) + ├─ Upsert GraphNode → mem:graph:nodes + └─ Upsert GraphEdge → mem:graph:edges (with frequency bump) +``` + +--- + +## 4. Component Deep-Dives + +### 4.1 Hooks Layer + +**Location:** `src/hooks/` +**Runtime:** Standalone Node.js scripts, no iii-sdk dependency. +**Contract:** Read JSON from `stdin`, POST to REST API, optionally write context to `stdout`, exit within timeout. + +| Hook | Trigger | Direction | Key Action | +|------|---------|-----------|------------| +| `session-start.ts` | Agent starts | Sync (reads stdout) | Create session, inject stored context | +| `pre-tool-use.ts` | Before each tool | Sync (reads stdout) | Inject relevant memories into prompt | +| `post-tool-use.ts` | After each tool | Fire-and-forget | Capture observation (observe + compress) | +| `post-tool-failure.ts` | Tool error | Fire-and-forget | Capture failure observation | +| `pre-compact.ts` | Before Claude context compress | Sync (reads stdout) | Inject summary to preserve across compression | +| `prompt-submit.ts` | User submits a prompt | Fire-and-forget | Telemetry + optional recall | +| `stop.ts` | Agent session ends | Fire-and-forget | Trigger session-end pipeline | +| `session-end.ts` | Full session end | Fire-and-forget | Consolidation + graph extraction | +| `notification.ts` | Agent sends notification | Fire-and-forget | Log notification event | +| `post-commit.ts` | Git commit made | Fire-and-forget | Capture commit context as observation | +| `subagent-start.ts` | Subagent spawned | Fire-and-forget | Register child agent in mesh | +| `subagent-stop.ts` | Subagent completes | Fire-and-forget | Merge subagent memories into parent | + +**Timeout budget:** Hooks that return context to stdout (session-start, pre-tool-use, pre-compact) have a hard 1500ms timeout. Fire-and-forget hooks use 800ms. Both use `AbortSignal` on the fetch call. + +--- + +### 4.2 REST / MCP Transport Layer + +**REST API** — `src/triggers/api.ts` +- 128 endpoints registered via iii-engine's trigger system +- Auth: Bearer token (`AGENTMEMORY_SECRET`), timing-safe `crypto.timingSafeEqual` comparison +- No framework (Express/Fastify) — direct iii-engine HTTP handlers + +Key endpoint groups: + +| Group | Prefix | Endpoints | +|-------|--------|-----------| +| Session | `/session/` | start, end, list, get | +| Observations | `/observe`, `/observations/` | create, list, get, delete | +| Search | `/search`, `/smart-search` | keyword, hybrid, graph query | +| Memories | `/memories/` | list, get, save, delete | +| Patterns | `/patterns` | extract, list | +| Management | `/health`, `/export`, `/import`, `/cleanup` | admin ops | + +**MCP Server** — `src/mcp/` +- 53 tools registered in `tools-registry.ts` (8 core + 45 advanced) +- Transport: WebSocket (`src/mcp/transport.ts`) + optional REST proxy (`src/mcp/rest-proxy.ts`) +- Standalone MCP shim mode: `agentmemory mcp` (no iii-engine needed) + +Core MCP tools: + +| Tool | Purpose | +|------|---------| +| `memory_recall` | Primary retrieval — hybrid search with context | +| `memory_save` | Explicitly save a memory | +| `memory_patterns` | Surface recurring patterns across sessions | +| `memory_sessions` | Browse session history | +| `memory_file_history` | All operations on a specific file | +| `memory_smart_search` | Progressive-disclosure search with expansion | +| `memory_compress_file` | Summarize a file's history | +| `memory_vision_search` | Search by screenshot/image description | + +--- + +### 4.3 Business Logic (Functions) + +**Location:** `src/functions/` — 67 TypeScript files, each a pure function registered on the iii-engine bus. + +Function naming convention: `mem::{name}` (internal), `api::{name}` (REST handlers), `mcp::{name}` (MCP handlers), `middleware::{name}` (cross-cutting). + +**Observation functions:** + +| Function | Input | Output | LLM? | +|----------|-------|--------|------| +| `mem::observe` | Raw hook payload | `RawObservation` stored | No | +| `mem::compress` | `RawObservation` | `CompressedObservation` stored | Yes (optional) | +| `mem::synthetic-compress` | `RawObservation` | `CompressedObservation` stored | No | + +**Search functions:** + +| Function | Input | Output | LLM? | +|----------|-------|--------|------| +| `mem::search` | Query string + filters | `SearchResult[]` | No | +| `mem::smart-search` | Query + context | `SearchResult[]` + narrative | Yes (expansion) | +| `mem::query-expansion` | Raw query | Expanded queries | Yes | +| `mem::sliding-window` | Result list | Result list + context neighbors | No | + +**Memory management functions:** + +| Function | Input | Output | LLM? | +|----------|-------|--------|------| +| `mem::remember` | Session observations | `Memory[]` saved | Yes | +| `mem::consolidate` | Observation group | `Memory` | Yes | +| `mem::consolidation-pipeline` | Session ID | Memories created | Yes | +| `mem::crystallize` | All memories | Strength updates | Yes | +| `mem::reflect` | Session observations | Insight summary | Yes | +| `mem::patterns` | Query | Pattern list | Yes | + +**Graph functions:** + +| Function | Input | Output | LLM? | +|----------|-------|--------|------| +| `mem::graph-extract` | Observations | `GraphNode[]` + `GraphEdge[]` | Yes | +| `mem::graph-retrieval` | Entity names | Subgraph | No | +| `mem::relations` | Node ID | Related nodes + edges | No | + +--- + +### 4.4 Storage Layer (StateKV) + +**Location:** `src/state/` +**Engine:** iii-engine `StateModule` — file-based SQLite at `~/.agentmemory/state_store.db` + +All reads/writes go through `src/state/kv.ts` (`StateKV` wrapper): +```typescript +kv.get(scope: string, key: string): Promise +kv.set(scope: string, key: string, value: T): Promise +kv.list(scope: string, prefix?: string): Promise +kv.delete(scope: string, key: string): Promise +``` + +**Storage scopes** (from `src/state/schema.ts`): + +| Scope Key | Contents | Access Pattern | +|-----------|----------|----------------| +| `mem:sessions` | `Session[]` | list by status, get by id | +| `mem:obs:{sessionId}` | `RawObservation | CompressedObservation[]` | list all for session | +| `mem:memories` | `Memory[]` | list all, get by id | +| `mem:embeddings:{obsId}` | `Float32Array` serialized | get by obs id | +| `mem:index:bm25` | BM25 inverted index snapshot | single key, loaded on boot | +| `mem:graph:nodes` | `GraphNode[]` | list all | +| `mem:graph:edges` | `GraphEdge[]` | list all | +| `mem:graph:name-index` | `{ name → nodeId }` | get by entity name | +| `mem:audit` | `AuditEntry[]` | append-only log | +| `mem:leases` | `Lease[]` | distributed lock primitives | +| `mem:routines` | `Routine[]` | recurring task definitions | +| `mem:signals` | `Signal[]` | inter-agent signals | +| `mem:checkpoints` | `Checkpoint[]` | save/restore points | +| `mem:slots` | `Slot[]` | pinned editable memory slots | +| `mem:global-slots` | `Slot[]` | cross-session global slots | +| `mem:team:{teamId}:shared` | `SharedMemory[]` | team memory pool | + +**Concurrency safety:** All writes use keyed mutexes (`Map`). Same-key writes queue up; different-key writes run in parallel. + +**Index Persistence** (`src/state/index-persistence.ts`): +- BM25 and vector indexes live in-memory during runtime +- Debounced flush to disk every 5 seconds (configurable) +- On boot: load from disk, validate embedding dimensions, backfill any missing entries +- On SIGINT/SIGTERM: synchronous flush before exit + +--- + +### 4.5 Search Architecture + +**Location:** `src/state/hybrid-search.ts`, `search-index.ts`, `vector-index.ts`, `reranker.ts` + +``` + ┌──────────────────────┐ + │ HybridSearch.query │ + └──────────┬───────────┘ + │ + ┌──────────────────┼──────────────────┐ + ▼ ▼ ▼ + ┌───────────────┐ ┌───────────────┐ ┌───────────────┐ + │ BM25 Search │ │ Vector Search │ │ Graph Search │ + │ │ │ │ │ │ + │ SearchIndex │ │ VectorIndex │ │ GraphIndex │ + │ (inverted) │ │ (Float32Array │ │ (adjacency │ + │ │ │ cosine sim) │ │ list BFS) │ + └───────┬───────┘ └───────┬───────┘ └───────┬───────┘ + │ │ │ + └──────────────────┼──────────────────┘ + │ + ┌───────────▼──────────┐ + │ RRF Score Fusion │ + │ score = Σ 1/(60+r) │ + └───────────┬──────────┘ + │ + ┌───────────▼──────────┐ + │ Reranker (optional) │ + │ cross-encoder model │ + └───────────┬──────────┘ + │ + ┌───────────▼──────────┐ + │ Token Budget Trim │ + │ top-K by score │ + └──────────────────────┘ +``` + +**BM25 Details:** +- Tokenizer: whitespace split + lowercase + stemmer +- IDF weighting: `log((N - df + 0.5) / (df + 0.5) + 1)` +- TF saturation: `k1=1.2, b=0.75` +- Index size: grows linearly with observations (fully in-memory) + +**Vector Details:** +- Dimensions: 768 (jina/voyage), 1536 (openai), 384 (local) +- Similarity: cosine (dot product of normalized vectors) +- Storage: `Float32Array` per observation, loaded fully into RAM +- Embedding providers: Jina AI, Voyage AI, OpenAI, local ONNX + +**Graph Details:** +- Nodes: files, functions, concepts, people, errors +- Edges: `uses | modifies | imports | fixes | relates_to | caused_by` +- BFS traversal: max 2 hops, degree-weighted scoring +- Node score = `log(1 + occurrences) * edge_weight_sum` + +**Reciprocal Rank Fusion:** +``` +Combined score = Σ_stream 1 / (60 + rank_in_stream) +``` +RRF is robust to score scale differences — BM25, vector, and graph scores are not on the same scale, but ranks are. + +--- + +### 4.6 LLM Provider Abstraction + +**Location:** `src/providers/` + +All LLM calls go through the `MemoryProvider` interface: +```typescript +interface MemoryProvider { + complete(prompt: string, options?: CompletionOptions): Promise + stream(prompt: string, options?: CompletionOptions): AsyncGenerator + embed(text: string): Promise + embedBatch(texts: string[]): Promise + describeImage(base64: string, prompt: string): Promise +} +``` + +**Provider chain:** + +``` +ResilientProvider (circuit-breaker wrapper) + └─ FallbackChainProvider (try A → B → C → noop) + ├─ AnthropicProvider (claude-haiku-4-5 / sonnet-4 by default) + ├─ OpenAIProvider (gpt-4o-mini / gpt-4o) + ├─ GeminiProvider (gemini-flash-2.0) + ├─ OpenRouterProvider (any model via openrouter.ai) + ├─ MinimaxProvider (minimax-text-01) + ├─ AgentSDKProvider (uses ambient agent's LLM) + └─ NoopProvider (returns empty, no LLM needed) +``` + +**Circuit Breaker:** +- Opens after 3 consecutive failures within 60s +- Half-open probe after 30s backoff +- Fail-open: falls through to next provider in chain, never blocks the observation pipeline + +**Model selection by task:** +| Task | Preferred Model | Rationale | +|------|----------------|-----------| +| Compression | `claude-haiku-4-5` | Fast, cheap, good at summarization | +| Consolidation | `claude-sonnet-4-6` | Better reasoning for synthesis | +| Graph extraction | `claude-haiku-4-5` | Entity/relation extraction is structured | +| Smart search expansion | `claude-haiku-4-5` | Query rewrite is a simple task | +| Reflection | `claude-sonnet-4-6` | Nuanced insight generation | + +--- + +### 4.7 Knowledge Graph + +**Location:** `src/functions/graph.ts`, `graph-retrieval.ts`, `graph-extract.ts` + +The knowledge graph is an **entity-relationship graph** extracted from session observations. It answers queries like "what files does `AuthService` touch?" or "what functions does `useAuth` call?" + +**Schema:** +```typescript +GraphNode { + id: string // SHA256 of (type + name) + type: "file" | "function" | "concept" | "person" | "error" | "package" + name: string // e.g. "src/auth/service.ts", "getUserById", "CORS" + occurrences: number // how many times seen across sessions + lastSeen: string // ISO timestamp + metadata: Record +} + +GraphEdge { + id: string // SHA256 of (source + target + type) + source: string // GraphNode.id + target: string // GraphNode.id + type: "uses" | "modifies" | "imports" | "fixes" | "relates_to" | "caused_by" + weight: number // 0–1, decays over time + frequency: number // raw occurrence count + lastSeen: string +} +``` + +**Extraction flow (per session end):** +1. Concatenate all compressed narratives + facts for the session +2. LLM prompt: extract entities and relations as JSON +3. Upsert nodes (increment `occurrences`, update `lastSeen`) +4. Upsert edges (increment `frequency`, recalculate `weight`) +5. Rebuild `name-index` for O(1) entity lookup + +**Graph search integration:** +- Query entities extracted via regex + optional NER +- Node lookup: `name-index[entity]` → `nodeId` +- BFS from matching nodes, accumulate neighbor node IDs +- Fetch CompressedObservations that mention those node IDs +- Score by degree: `log(1 + node.occurrences) * Σ(edge.weight)` + +--- + +### 4.8 Orchestration Primitives + +These are the system-level building blocks for multi-session and multi-agent workflows. + +**Leases** (`src/functions/leases.ts`) — distributed locks +``` +Lease { id, resource, holder, expiresAt, renewedAt } +Usage: prevent concurrent consolidation runs on same session +``` + +**Routines** (`src/functions/routines.ts`) — recurring tasks +``` +Routine { id, name, intervalMs, lastRunAt, enabled, fn } +Registered routines: auto-forget, lesson-decay, consolidation, search-TTL-sweep +``` + +**Signals** (`src/functions/signals.ts`) — inter-agent messages +``` +Signal { id, from, to, type, payload, sentAt, readAt } +Usage: subagent → parent communication, team coordination +``` + +**Checkpoints** (`src/functions/checkpoints.ts`) — save/restore state +``` +Checkpoint { id, sessionId, name, state, createdAt } +Usage: long-running tasks that can be paused and resumed +``` + +**Mesh** (`src/functions/mesh.ts`) — multi-agent memory sharing +``` +Mesh { agents: AgentRef[], syncStrategy: "broadcast" | "on-demand" } +Usage: multiple Claude Code instances sharing one memory pool +``` + +**Sketches** (`src/functions/sketches.ts`) — draft memories +``` +Sketch { id, content, confidence, promotedAt } +Usage: tentative memory fragments that crystallize once reinforced +``` + +**Frontier** (`src/functions/frontier.ts`) — work queue +``` +FrontierItem { id, type, priority, payload, claimedAt } +Usage: background processing queue for consolidation tasks +``` + +--- + +## 5. Data Model + +### Session +```typescript +interface Session { + id: string // UUID v4 + project: string // cwd-derived project identifier + cwd: string // working directory + agentId?: string // agent identifier (for multi-agent) + startedAt: string // ISO timestamp + endedAt?: string // ISO timestamp (when complete) + status: "active" | "completed" | "abandoned" + observationCount: number // running total + summary?: string // LLM-generated session summary +} +``` + +### RawObservation +```typescript +interface RawObservation { + id: string // UUID v4 + sessionId: string + timestamp: string // ISO + hookType: "post-tool-use" | "post-tool-failure" | "post-commit" | ... + toolName: string // e.g. "Read", "Bash", "Edit" + toolInput: Record // raw tool arguments + toolOutput: string // raw tool result + durationMs?: number + exitCode?: number + raw: string // full JSON payload from hook + compressed: false +} +``` + +### CompressedObservation +```typescript +interface CompressedObservation { + id: string // same as RawObservation.id + sessionId: string + timestamp: string + hookType: string + toolName: string + type: "code-read" | "code-write" | "bash" | "error" | "search" | "other" + title: string // one-line summary + facts: string[] // bullet-point facts + narrative: string // prose summary (searchable) + concepts: string[] // extracted topics/keywords + files: string[] // file paths mentioned + importance: number // 0.0–1.0 + compressedBy: "llm" | "synthetic" + compressed: true +} +``` + +### Memory +```typescript +interface Memory { + id: string // UUID v4 + type: "pattern" | "preference" | "architecture" | "bug" | "workflow" | "fact" + title: string + content: string // full memory prose + sessionIds: string[] // source sessions + strength: number // 0.0–1.0, decays over time + isLatest: boolean // false if superseded + forgetAfter?: string // ISO timestamp for TTL + createdAt: string + updatedAt: string + tags: string[] +} +``` + +### GraphNode / GraphEdge +```typescript +// See Section 4.7 above +``` + +--- + +## 6. Configuration & Feature Flags + +### Required environment variables + +| Variable | Purpose | Default | +|----------|---------|---------| +| (one of) `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, `GEMINI_API_KEY` | LLM provider | — | + +### Optional environment variables + +| Variable | Purpose | Default | +|----------|---------|---------| +| `AGENTMEMORY_URL` | Server URL (hooks use this) | `http://localhost:3111` | +| `AGENTMEMORY_SECRET` | Bearer auth token | — (auth disabled) | +| `AGENTMEMORY_PORT` | HTTP server port | `3111` | +| `DATA_DIR` | Storage root | `~/.agentmemory` | +| `EMBEDDING_PROVIDER` | `jina \| voyage \| openai \| local` | `none` | +| `TOKEN_BUDGET` | Max tokens in recall response | `4096` | + +### Feature flags + +| Flag | Enables | Cost | +|------|---------|------| +| `AGENTMEMORY_AUTO_COMPRESS=true` | LLM compression of observations | ~1 LLM call / tool call | +| `AGENTMEMORY_INJECT_CONTEXT=true` | Inject memories at session start | ~1 search / session | +| `CONSOLIDATION_ENABLED=true` | Run consolidation pipeline on session end | ~N LLM calls / session | +| `GRAPH_EXTRACTION_ENABLED=true` | Extract knowledge graph from sessions | ~1 LLM call / session | +| `AGENTMEMORY_REFLECT=true` | Generate insight reflection on session end | ~1 LLM call / session | +| `AGENTMEMORY_CRYSTALLIZE=true` | Reinforce and prune memories over time | ~1 LLM call / day | +| `AGENTMEMORY_SLOTS=true` | Enable pinned editable memory slots | No LLM cost | +| `RERANK_ENABLED=true` | Cross-encoder reranking on search | ~1 model call / search | + +**Minimal setup (no LLM compression, keyword search only):** +```env +ANTHROPIC_API_KEY=sk-ant-... +AGENTMEMORY_AUTO_COMPRESS=false +CONSOLIDATION_ENABLED=false +GRAPH_EXTRACTION_ENABLED=false +``` + +**Full setup (all features):** +```env +ANTHROPIC_API_KEY=sk-ant-... +EMBEDDING_PROVIDER=jina +JINA_API_KEY=jina_... +AGENTMEMORY_AUTO_COMPRESS=true +CONSOLIDATION_ENABLED=true +GRAPH_EXTRACTION_ENABLED=true +AGENTMEMORY_INJECT_CONTEXT=true +AGENTMEMORY_REFLECT=true +AGENTMEMORY_CRYSTALLIZE=true +AGENTMEMORY_SLOTS=true +RERANK_ENABLED=true +``` + +--- + +## 7. Multi-Agent & Team Memory + +### Subagent isolation + +When Claude Code spawns a subagent (via `Agent` tool), agentmemory handles it transparently: + +``` +Parent Agent Subagent + │ │ + ├─ [subagent-start hook] │ + │ Register subagent in mesh │ + │ Share relevant context → ─┤ + │ │ + │ ... runs ... │ + │ │ + ├─ [subagent-stop hook] │ + │ Merge subagent memories ←─┤ + └─ into parent session │ +``` + +Each subagent gets its own `Session` with `agentId` set, linked to the parent via `parentSessionId`. + +### Team memory + +When `TEAM_ID` and `TEAM_MODE` are set, memories can be shared across agents on the same team: + +``` +Agent A (writes) Agent B (reads) + │ │ + ▼ ▼ +mem:team:{teamId}:shared ←→ mem:team:{teamId}:shared +``` + +Sync modes: +- `broadcast`: writes are immediately visible to all team members +- `on-demand`: explicit `memory_team_share` MCP call required + +--- + +## 8. Security Model + +| Concern | Mechanism | +|---------|-----------| +| Authentication | Bearer token in `Authorization: Bearer {AGENTMEMORY_SECRET}` header | +| Token comparison | `crypto.timingSafeEqual` (prevents timing attacks) | +| Hook input validation | Zod schema validation on all hook payloads | +| Content Security Policy | Viewer server sets strict CSP headers | +| Data isolation | All storage is local; no external calls except configured LLM providers | +| Secret scrubbing | Tool outputs are scanned for common secret patterns before storage | +| Abort signals | All outbound fetch() calls have AbortSignal (800–1500ms) | + +--- + +## 9. Observability + +### Health endpoint + +`GET /health` returns: +```json +{ + "status": "ok", + "version": "0.9.27", + "sessions": { "active": 2, "completed": 1247 }, + "observations": { "total": 48210, "indexed": 48210 }, + "memories": { "total": 312 }, + "graph": { "nodes": 892, "edges": 2341 }, + "provider": "anthropic", + "embedding": "jina", + "uptime": 86400 +} +``` + +### Audit log + +Every state-changing operation appends to `mem:audit`: +```typescript +AuditEntry { ts, op, scope, key, actor, sessionId } +``` +Query via `memory_audit` MCP tool or `GET /audit`. + +### OpenTelemetry metrics (`src/telemetry/`) + +| Metric | Type | Labels | +|--------|------|--------| +| `agentmemory.observations.total` | Counter | hookType, sessionId | +| `agentmemory.compress.duration_ms` | Histogram | compressedBy, toolName | +| `agentmemory.search.duration_ms` | Histogram | stream (bm25, vector, graph) | +| `agentmemory.llm.calls.total` | Counter | provider, task | +| `agentmemory.llm.errors.total` | Counter | provider, errorType | + +### Web viewer (`src/viewer/`) + +Visual memory browser running on port `AGENTMEMORY_PORT + 2` (default: 3113): +- Session list with observation counts and timelines +- Memory browser with search +- Graph visualization +- Slot editor +- Live observation stream (WebSocket) + +--- + +## 10. Deployment Topology + +### Local (default) + +``` +Agent (Claude Code) + ↓ hooks (HTTP to localhost:3111) +agentmemory worker (Node.js process) + ↓ reads/writes +~/.agentmemory/state_store.db (SQLite) +~/.agentmemory/bm25.json (BM25 index snapshot) +~/.agentmemory/vectors.json (Vector index snapshot) +``` + +Start: `agentmemory start` (or `npx agentmemory start`) + +### Docker + +```yaml +# docker-compose.yml +services: + agentmemory: + image: agentmemory:latest + ports: + - "3111:3111" # REST API + MCP + - "3113:3113" # Web viewer + volumes: + - ~/.agentmemory:/data + env_file: .env +``` + +### Multi-machine (team mode) + +``` +Developer A (machine 1) Developer B (machine 2) + agentmemory worker agentmemory worker + │ │ + └─────── TEAM_ID sync ──────────┘ + (via shared AGENTMEMORY_URL + pointing to a central instance) +``` + +One team member runs a central agentmemory instance. All others point their `AGENTMEMORY_URL` at it. The `mem:team:{teamId}:shared` scope is the shared pool. + +### MCP standalone mode + +When embedding into an MCP-capable host that manages the server lifecycle: + +``` +Claude.app / Cursor + │ MCP protocol (stdio or SSE) +agentmemory MCP shim (agentmemory mcp) + │ HTTP +agentmemory worker (separate process) +``` + +```json +// .claude/mcp_servers.json +{ + "agentmemory": { + "command": "agentmemory", + "args": ["mcp"], + "env": { "AGENTMEMORY_URL": "http://localhost:3111" } + } +} +``` + +--- + +*Document generated from codebase analysis of agentmemory v0.9.27.* +*Source: `src/index.ts`, `src/types.ts`, `src/state/schema.ts`, `src/hooks/`, `src/functions/`, `src/mcp/`, `src/providers/`* diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml new file mode 100644 index 000000000..db936feb7 --- /dev/null +++ b/pnpm-workspace.yaml @@ -0,0 +1,5 @@ +allowBuilds: + esbuild: true + onnxruntime-node: true + protobufjs: true + sharp: true diff --git a/src/functions/compress.ts b/src/functions/compress.ts index 0569555e0..754576ff3 100644 --- a/src/functions/compress.ts +++ b/src/functions/compress.ts @@ -16,6 +16,7 @@ import { import { VISION_DESCRIPTION_PROMPT } from "../prompts/vision.js"; import { getXmlTag, getXmlChildren } from "../prompts/xml.js"; import { getSearchIndex, vectorIndexAddGuarded } from "./search.js"; +import { scrubRecord } from "./privacy.js"; import { CompressOutputSchema } from "../eval/schemas.js"; import { validateOutput } from "../eval/validator.js"; import { scoreCompression } from "../eval/quality.js"; @@ -156,11 +157,16 @@ export function registerCompressFunction( const qualityScore = scoreCompression(parsed); + // The raw input was scrubbed at capture, but the LLM can echo a + // secret it saw elsewhere in its context into the summary — scrub + // the model output too before it is persisted and indexed. + const scrubbedParsed = scrubRecord(parsed); + const compressed: CompressedObservation = { id: data.observationId, sessionId: data.sessionId, timestamp: data.raw.timestamp, - ...parsed, + ...scrubbedParsed, confidence: qualityScore / 100, ...(hasImage ? { modality: data.raw.modality } : {}), ...(imageDescription ? { imageDescription } : {}), diff --git a/src/functions/export-import.ts b/src/functions/export-import.ts index 327117b26..2bb52ec60 100644 --- a/src/functions/export-import.ts +++ b/src/functions/export-import.ts @@ -29,6 +29,7 @@ import { KV } from "../state/schema.js"; import { StateKV } from "../state/kv.js"; import { VERSION } from "../version.js"; import { recordAudit } from "./audit.js"; +import { scrubRecord } from "./privacy.js"; import { logger } from "../logger.js"; export function registerExportImportFunction(sdk: ISdk, kv: StateKV): void { @@ -263,6 +264,30 @@ export function registerExportImportFunction(sdk: ISdk, kv: StateKV): void { skipped: 0, }; + // Imports arrive from outside the observe pipeline (hand-edited dumps, + // exports from machines running older pattern lists), so re-scrub the + // content-bearing collections before any row is written. Collections + // keyed only by ids/numbers are skipped — they carry no free text. + for (const key of [ + "sessions", + "memories", + "summaries", + "lessons", + "insights", + "semanticMemories", + "crystals", + "sketches", + ] as const) { + const collection = importData[key]; + if (Array.isArray(collection)) { + (importData as unknown as Record)[key] = + scrubRecord(collection); + } + } + for (const [sessionId, obs] of Object.entries(importData.observations)) { + importData.observations[sessionId] = scrubRecord(obs); + } + if (strategy === "replace") { const existing = await kv.list(KV.sessions); for (const session of existing) { diff --git a/src/functions/lessons.ts b/src/functions/lessons.ts index 9e69f464f..115f4df4c 100644 --- a/src/functions/lessons.ts +++ b/src/functions/lessons.ts @@ -3,6 +3,7 @@ import type { StateKV } from "../state/kv.js"; import { KV, fingerprintId } from "../state/schema.js"; import type { Lesson } from "../types.js"; import { recordAudit } from "./audit.js"; +import { stripPrivateData } from "./privacy.js"; function reinforceLesson(lesson: Lesson): void { const now = new Date().toISOString(); @@ -30,6 +31,12 @@ export function registerLessonsFunctions(sdk: ISdk, kv: StateKV): void { return { success: false, error: "content is required" }; } + // Scrub before fingerprinting so the dedup key reflects the stored + // (scrubbed) form — lessons arrive from crystallize output and manual + // saves, neither of which passes through the observe pipeline. + data.content = stripPrivateData(data.content); + if (data.context) data.context = stripPrivateData(data.context); + const fp = fingerprintId("lsn", data.content.trim().toLowerCase()); const existing = await kv.get(KV.lessons, fp); diff --git a/src/functions/privacy.ts b/src/functions/privacy.ts index f28edd3e2..56e2dfe17 100644 --- a/src/functions/privacy.ts +++ b/src/functions/privacy.ts @@ -17,6 +17,8 @@ const SECRET_PATTERN_SOURCES = [ /npm_[A-Za-z0-9]{36}/g, /glpat-[A-Za-z0-9\-_]{20,}/g, /dop_v1_[A-Za-z0-9]{64}/g, + /-----BEGIN (?:RSA |EC |OPENSSH |DSA |PGP )?PRIVATE KEY(?: BLOCK)?-----[\s\S]*?-----END (?:RSA |EC |OPENSSH |DSA |PGP )?PRIVATE KEY(?: BLOCK)?-----/g, + /(?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqps?|mssql):\/\/[^\/\s:@"']+:[^@\s"']+@/gi, ]; export function stripPrivateData(input: string): string { @@ -28,6 +30,24 @@ export function stripPrivateData(input: string): string { return result; } +/** + * Apply stripPrivateData to every string in an arbitrary record, walking + * nested objects and arrays. Use this where the content shape is unknown + * (imports, team shares, parsed LLM output); for known string fields call + * stripPrivateData directly. Returns a scrubbed copy; non-string leaves are + * passed through unchanged, so structure can never be corrupted. + */ +export function scrubRecord(record: T): T { + if (typeof record === "string") return stripPrivateData(record) as T; + if (Array.isArray(record)) return record.map(scrubRecord) as T; + if (typeof record === "object" && record !== null) { + const out: Record = {}; + for (const [k, v] of Object.entries(record)) out[k] = scrubRecord(v); + return out as T; + } + return record; +} + export function registerPrivacyFunction(sdk: ISdk): void { sdk.registerFunction("mem::privacy", async (data: { input?: unknown } | undefined) => { diff --git a/src/functions/remember.ts b/src/functions/remember.ts index 5735b4f23..a346b6bb3 100644 --- a/src/functions/remember.ts +++ b/src/functions/remember.ts @@ -6,6 +6,7 @@ import { withKeyedLock } from "../state/keyed-mutex.js"; import { memoryToObservation } from "../state/memory-utils.js"; import { deleteAccessLog } from "./access-tracker.js"; import { recordAudit } from "./audit.js"; +import { stripPrivateData } from "./privacy.js"; import { getSearchIndex, vectorIndexAddGuarded, vectorIndexRemove, flushIndexSave } from "./search.js"; import { getAgentId } from "../config.js"; import { logger } from "../logger.js"; @@ -50,6 +51,11 @@ export function registerRememberFunction(sdk: ISdk, kv: StateKV): void { ? (data.type as Memory["type"]) : "fact"; + // Explicit saves bypass the observe pipeline, so they need the same + // secret scrubbing hook payloads get. Scrub before the similarity + // check so dedup compares the stored (scrubbed) form. + data.content = stripPrivateData(data.content); + const now = new Date().toISOString(); // Normalize project early so every subsequent comparison and storage // operation uses the same cleaned value. Raw data.project must not be diff --git a/src/functions/sketches.ts b/src/functions/sketches.ts index c617a2981..3ab5b5bea 100644 --- a/src/functions/sketches.ts +++ b/src/functions/sketches.ts @@ -4,6 +4,7 @@ import { KV, generateId } from "../state/schema.js"; import { withKeyedLock } from "../state/keyed-mutex.js"; import type { Action, ActionEdge, Sketch } from "../types.js"; import { safeAudit } from "./audit.js"; +import { stripPrivateData } from "./privacy.js"; export function registerSketchesFunction(sdk: ISdk, kv: StateKV): void { sdk.registerFunction("mem::sketch-create", @@ -21,8 +22,8 @@ export function registerSketchesFunction(sdk: ISdk, kv: StateKV): void { const expiresInMs = data.expiresInMs || 3600000; const sketch: Sketch = { id: generateId("sk"), - title: data.title.trim(), - description: (data.description || "").trim(), + title: stripPrivateData(data.title.trim()), + description: stripPrivateData((data.description || "").trim()), status: "active", actionIds: [], project: data.project, @@ -66,8 +67,8 @@ export function registerSketchesFunction(sdk: ISdk, kv: StateKV): void { const now = new Date().toISOString(); const action: Action = { id: generateId("act"), - title: data.title.trim(), - description: (data.description || "").trim(), + title: stripPrivateData(data.title.trim()), + description: stripPrivateData((data.description || "").trim()), status: "pending", priority: Math.max(1, Math.min(10, data.priority || 5)), createdAt: now, diff --git a/src/functions/slots.ts b/src/functions/slots.ts index 47b49d496..b2b29c12d 100644 --- a/src/functions/slots.ts +++ b/src/functions/slots.ts @@ -4,6 +4,7 @@ import { KV } from "../state/schema.js"; import { StateKV } from "../state/kv.js"; import { withKeyedLock } from "../state/keyed-mutex.js"; import { recordAudit } from "./audit.js"; +import { stripPrivateData } from "./privacy.js"; import { getEnvVar } from "../config.js"; import { logger } from "../logger.js"; @@ -243,7 +244,8 @@ export function registerSlotsFunctions(sdk: ISdk, kv: StateKV): void { if (sizeLimit === null) { return { success: false, error: "sizeLimit must be an integer between 1 and 20000" }; } - const content = typeof data?.content === "string" ? data.content : ""; + const content = + typeof data?.content === "string" ? stripPrivateData(data.content) : ""; if (content.length > sizeLimit) { return { success: false, error: `content exceeds sizeLimit (${content.length} > ${sizeLimit})` }; } @@ -282,7 +284,8 @@ export function registerSlotsFunctions(sdk: ISdk, kv: StateKV): void { async (data: { label?: string; text?: string }) => { const label = validateLabel(data?.label); if (!label) return { success: false, error: "label required" }; - const text = typeof data?.text === "string" ? data.text : ""; + const text = + typeof data?.text === "string" ? stripPrivateData(data.text) : ""; if (!text) return { success: false, error: "text required" }; return withKeyedLock(`slot:${label}`, async () => { const { slot, scope } = await readSlot(kv, label); @@ -316,6 +319,7 @@ export function registerSlotsFunctions(sdk: ISdk, kv: StateKV): void { const label = validateLabel(data?.label); if (!label) return { success: false, error: "label required" }; if (typeof data?.content !== "string") return { success: false, error: "content required (string)" }; + data.content = stripPrivateData(data.content); return withKeyedLock(`slot:${label}`, async () => { const { slot, scope } = await readSlot(kv, label); if (!slot) return { success: false, error: "slot not found (use mem::slot-create first)" }; diff --git a/src/functions/team.ts b/src/functions/team.ts index a6461fe61..e6f0b0cfb 100644 --- a/src/functions/team.ts +++ b/src/functions/team.ts @@ -8,6 +8,7 @@ import type { import { KV, generateId } from "../state/schema.js"; import type { StateKV } from "../state/kv.js"; import { recordAudit } from "./audit.js"; +import { scrubRecord } from "./privacy.js"; import { logger } from "../logger.js"; const VALID_ITEM_TYPES = new Set(["memory", "pattern", "observation"]); @@ -50,12 +51,18 @@ export function registerTeamFunction( return { success: false, error: "Item not found" }; } + // Sharing widens the audience from one machine to the whole team. + // Re-scrub at this boundary as defense-in-depth: it catches rows + // written before a newer secret pattern existed, for the items that + // actually get shared. + const scrubbedContent = scrubRecord(content); + const shared: TeamSharedItem = { id: generateId("ts"), sharedBy: config.userId, sharedAt: new Date().toISOString(), type: data.itemType, - content, + content: scrubbedContent, project: data.project || "", visibility: "shared", }; diff --git a/test/scrubbing-bypass.test.ts b/test/scrubbing-bypass.test.ts new file mode 100644 index 000000000..ebc2fd0b2 --- /dev/null +++ b/test/scrubbing-bypass.test.ts @@ -0,0 +1,501 @@ +import { describe, it, expect, beforeEach, vi } from "vitest"; + +vi.mock("../src/logger.js", () => ({ + logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() }, +})); + +// remember.ts touches the BM25/vector indexes after save; neither exists in +// this harness, so stub them the same way multimodal.test.ts does. +vi.mock("../src/functions/search.js", () => ({ + getSearchIndex: () => ({ add: vi.fn(), remove: vi.fn() }), + vectorIndexAddGuarded: vi.fn().mockResolvedValue(false), + vectorIndexRemove: vi.fn().mockResolvedValue(undefined), + flushIndexSave: vi.fn().mockResolvedValue(undefined), +})); + +import { stripPrivateData, scrubRecord } from "../src/functions/privacy.js"; +import { registerRememberFunction } from "../src/functions/remember.js"; +import { registerLessonsFunctions } from "../src/functions/lessons.js"; +import { registerSlotsFunctions } from "../src/functions/slots.js"; +import { registerTeamFunction } from "../src/functions/team.js"; +import { registerSketchesFunction } from "../src/functions/sketches.js"; +import { registerExportImportFunction } from "../src/functions/export-import.js"; +import { registerCompressFunction } from "../src/functions/compress.js"; +import { KV } from "../src/state/schema.js"; +import type { + Memory, + Lesson, + MemorySlot, + Sketch, + TeamSharedItem, + CompressedObservation, + ExportData, + MemoryProvider, +} from "../src/types.js"; + +// --------------------------------------------------------------------------- +// Fixtures: one representative secret per pattern family under test +// --------------------------------------------------------------------------- +const GH_TOKEN = "ghp_" + "A".repeat(36); +const ANTHROPIC_KEY = "sk-ant-abcdefghij0123456789xyz"; +const DB_URL = "postgres://admin:hunter2secret@db.internal:5432/app"; +const PEM_KEY = + "-----BEGIN RSA PRIVATE KEY-----\nMIIEowIBAAKCAQEA7bq=\n-----END RSA PRIVATE KEY-----"; + +function expectClean(text: string) { + expect(text).not.toContain(GH_TOKEN); + expect(text).not.toContain(ANTHROPIC_KEY); + expect(text).not.toContain("hunter2secret"); + expect(text).not.toContain("BEGIN RSA PRIVATE KEY"); +} + +// --------------------------------------------------------------------------- +// Shared harness (same shape as lessons.test.ts / multimodal.test.ts) +// --------------------------------------------------------------------------- +function mockKV() { + const store = new Map>(); + return { + get: async (scope: string, key: string): Promise => { + return (store.get(scope)?.get(key) as T) ?? null; + }, + set: async (scope: string, key: string, data: T): Promise => { + if (!store.has(scope)) store.set(scope, new Map()); + store.get(scope)!.set(key, data); + return data; + }, + delete: async (scope: string, key: string): Promise => { + store.get(scope)?.delete(key); + }, + list: async (scope: string): Promise => { + if (!store.has(scope)) return []; + return Array.from(store.get(scope)!.values()) as T[]; + }, + }; +} + +function mockSdk() { + const handlers = new Map Promise>(); + return { + handlers, + registerFunction: ( + idOrOpts: string | { id: string }, + cb: (data: unknown) => Promise, + ) => { + const id = typeof idOrOpts === "string" ? idOrOpts : idOrOpts.id; + handlers.set(id, cb); + }, + registerTrigger: () => {}, + trigger: async ( + input: string | { function_id: string; payload: unknown }, + data?: unknown, + ) => { + const id = typeof input === "string" ? input : input.function_id; + const payload = typeof input === "string" ? data : input.payload; + const fn = handlers.get(id); + // Side-channel triggers (streams, cascade) are out of scope here. + if (!fn) return undefined; + return fn(payload); + }, + }; +} + +type Harness = { + kv: ReturnType; + sdk: ReturnType; + call: (fn: string, data: unknown) => Promise>; +}; + +function wire(register: (sdk: never, kv: never) => void): Harness { + const kv = mockKV(); + const sdk = mockSdk(); + register(sdk as never, kv as never); + const call = async (fn: string, data: unknown) => { + const handler = sdk.handlers.get(fn); + if (!handler) throw new Error(`No handler registered: ${fn}`); + return (await handler(data)) as Record; + }; + return { kv, sdk, call }; +} + +// --------------------------------------------------------------------------- +// 1. New patterns in privacy.ts +// --------------------------------------------------------------------------- +describe("privacy.ts — new patterns and scrubRecord", () => { + it("redacts PEM private key blocks", () => { + const out = stripPrivateData(`config had\n${PEM_KEY}\nin it`); + expect(out).toContain("[REDACTED_SECRET]"); + expect(out).not.toContain("BEGIN RSA PRIVATE KEY"); + }); + + it("redacts OpenSSH private key blocks", () => { + const key = + "-----BEGIN OPENSSH PRIVATE KEY-----\nb3BlbnNzaA==\n-----END OPENSSH PRIVATE KEY-----"; + expect(stripPrivateData(key)).toBe("[REDACTED_SECRET]"); + }); + + it("redacts DB connection-string credentials", () => { + const out = stripPrivateData(`DATABASE_URL=${DB_URL}`); + expect(out).not.toContain("hunter2secret"); + // host/db suffix survives — only the credential part is removed + expect(out).toContain("db.internal:5432/app"); + }); + + it("redacts mongodb+srv and mysql URLs", () => { + expect( + stripPrivateData("mongodb+srv://user:p4ss@cluster.mongodb.net/db"), + ).not.toContain("p4ss"); + expect( + stripPrivateData("mysql://root:rootpw@localhost:3306/x"), + ).not.toContain("rootpw"); + }); + + it("does not redact credential-less URLs", () => { + const url = "postgres://db.internal:5432/app"; + expect(stripPrivateData(url)).toBe(url); + }); + + it("scrubRecord walks nested objects and arrays, preserving non-strings", () => { + const output = scrubRecord({ + title: `uses ${GH_TOKEN}`, + nested: { facts: [`db is ${DB_URL}`, "clean fact"] }, + count: 7, + flag: true, + }); + expect(output.title).toBe("uses [REDACTED_SECRET]"); + expect(output.nested.facts[0]).not.toContain("hunter2secret"); + expect(output.nested.facts[1]).toBe("clean fact"); + expect(output.count).toBe(7); + expect(output.flag).toBe(true); + }); + + it("scrubRecord leaves clean records semantically identical", () => { + const record = { a: "hello", b: [1, 2], c: null }; + expect(scrubRecord(record)).toEqual(record); + }); +}); + +// --------------------------------------------------------------------------- +// 2. mem::remember (explicit memory_save path) +// --------------------------------------------------------------------------- +describe("mem::remember scrubs explicit saves", () => { + let h: Harness; + beforeEach(() => { + h = wire(registerRememberFunction as never); + }); + + it("scrubs secrets from content before persisting", async () => { + const res = await h.call("mem::remember", { + content: `Deploy needs ${GH_TOKEN} and connects via ${DB_URL}`, + type: "workflow", + }); + expect(res["success"]).toBe(true); + + const stored = await h.kv.list(KV.memories); + expect(stored).toHaveLength(1); + expectClean(stored[0].content); + expectClean(stored[0].title); + expect(stored[0].content).toContain("[REDACTED_SECRET]"); + }); + + it("scrubs PEM keys pasted into memory content", async () => { + await h.call("mem::remember", { + content: `Cert setup:\n${PEM_KEY}\ndone`, + }); + const stored = await h.kv.list(KV.memories); + expectClean(stored[0].content); + }); +}); + +// --------------------------------------------------------------------------- +// 3. mem::lesson-save +// --------------------------------------------------------------------------- +describe("mem::lesson-save scrubs lessons", () => { + let h: Harness; + beforeEach(() => { + h = wire(registerLessonsFunctions as never); + }); + + it("scrubs content and context", async () => { + const res = await h.call("mem::lesson-save", { + content: `Auth header must be Bearer ${ANTHROPIC_KEY}`, + context: `discovered while debugging ${DB_URL}`, + }); + expect(res["success"]).toBe(true); + const lesson = res["lesson"] as Lesson; + expectClean(lesson.content); + expectClean(lesson.context); + + const stored = await h.kv.list(KV.lessons); + expectClean(stored[0].content); + }); + + it("dedups on scrubbed content (two different tokens → one lesson)", async () => { + const first = await h.call("mem::lesson-save", { + content: `rotate token ghp_${"A".repeat(36)} monthly`, + }); + const second = await h.call("mem::lesson-save", { + content: `rotate token ghp_${"B".repeat(36)} monthly`, + }); + expect(first["action"]).toBe("created"); + // Both scrub to the same string, so the fingerprint matches and the + // second save reinforces instead of storing a second secret-bearing row. + expect(second["action"]).toBe("strengthened"); + expect(await h.kv.list(KV.lessons)).toHaveLength(1); + }); +}); + +// --------------------------------------------------------------------------- +// 4. Slots: create / append / replace +// --------------------------------------------------------------------------- +describe("slots scrub content writes", () => { + let h: Harness; + beforeEach(() => { + h = wire(registerSlotsFunctions as never); + }); + + it("slot-create scrubs initial content", async () => { + const res = await h.call("mem::slot-create", { + label: "deploy_notes", + content: `staging db: ${DB_URL}`, + }); + expect(res["success"]).toBe(true); + expectClean((res["slot"] as MemorySlot).content); + }); + + it("slot-append scrubs appended text", async () => { + await h.call("mem::slot-create", { label: "scratch", content: "start" }); + const res = await h.call("mem::slot-append", { + label: "scratch", + text: `new key is ${GH_TOKEN}`, + }); + expect(res["success"]).toBe(true); + const slot = res["slot"] as MemorySlot; + expect(slot.content).toContain("start"); + expectClean(slot.content); + }); + + it("slot-replace scrubs replacement content", async () => { + await h.call("mem::slot-create", { label: "scratch2", content: "x" }); + const res = await h.call("mem::slot-replace", { + label: "scratch2", + content: `creds:\n${PEM_KEY}`, + }); + expect(res["success"]).toBe(true); + expectClean((res["slot"] as MemorySlot).content); + }); +}); + +// --------------------------------------------------------------------------- +// 5. mem::team-share re-scrubs at the sharing boundary +// --------------------------------------------------------------------------- +describe("mem::team-share scrubs shared content", () => { + it("scrubs a legacy memory row that still contains a secret", async () => { + const kv = mockKV(); + const sdk = mockSdk(); + registerTeamFunction(sdk as never, kv as never, { + teamId: "team1", + userId: "user1", + mode: "shared", + }); + + // Simulate a pre-existing row written before a pattern was added. + await kv.set(KV.memories, "mem_legacy", { + id: "mem_legacy", + content: `prod db is ${DB_URL}`, + title: `uses ${GH_TOKEN}`, + }); + + const handler = sdk.handlers.get("mem::team-share")!; + const res = (await handler({ + itemId: "mem_legacy", + itemType: "memory", + })) as { success: boolean; sharedItem: TeamSharedItem }; + + expect(res.success).toBe(true); + const shared = res.sharedItem.content as { content: string; title: string }; + expectClean(shared.content); + expectClean(shared.title); + + const stored = await kv.list(KV.teamShared("team1")); + expectClean(JSON.stringify(stored[0].content)); + }); +}); + +// --------------------------------------------------------------------------- +// 6. Sketches: create + add +// --------------------------------------------------------------------------- +describe("sketches scrub titles and descriptions", () => { + let h: Harness; + beforeEach(() => { + h = wire(registerSketchesFunction as never); + }); + + it("sketch-create scrubs title and description", async () => { + const res = await h.call("mem::sketch-create", { + title: `migrate off ${DB_URL}`, + description: `old token: ${GH_TOKEN}`, + }); + expect(res["success"]).toBe(true); + const sketch = res["sketch"] as Sketch; + expectClean(sketch.title); + expectClean(sketch.description); + }); + + it("sketch-add scrubs action title and description", async () => { + const created = await h.call("mem::sketch-create", { title: "plan" }); + const sketchId = (created["sketch"] as Sketch).id; + const res = await h.call("mem::sketch-add", { + sketchId, + title: `rotate ${ANTHROPIC_KEY}`, + description: `currently ${DB_URL}`, + }); + expect(res["success"]).toBe(true); + const action = res["action"] as { title: string; description: string }; + expectClean(action.title); + expectClean(action.description); + }); +}); + +// --------------------------------------------------------------------------- +// 7. mem::import scrubs imported dumps +// --------------------------------------------------------------------------- +describe("mem::import scrubs imported data", () => { + it("scrubs memories and observations from an imported dump", async () => { + const h = wire(registerExportImportFunction as never); + + const exportData: ExportData = { + version: "0.9.27", + exportedAt: new Date().toISOString(), + sessions: [ + { + id: "sess1", + project: "demo", + cwd: "/tmp", + startedAt: new Date().toISOString(), + status: "completed", + observationCount: 1, + firstPrompt: `set ANTHROPIC_API_KEY=${ANTHROPIC_KEY}`, + }, + ], + observations: { + sess1: [ + { + id: "obs1", + sessionId: "sess1", + timestamp: new Date().toISOString(), + type: "command_run", + title: "ran deploy", + facts: [`pushed with ${GH_TOKEN}`], + narrative: `deployed using ${DB_URL}`, + concepts: ["deploy"], + files: [], + importance: 0.5, + }, + ], + }, + memories: [ + { + id: "mem1", + createdAt: new Date().toISOString(), + updatedAt: new Date().toISOString(), + type: "fact", + title: "db location", + content: `database lives at ${DB_URL}`, + concepts: [], + files: [], + sessionIds: ["sess1"], + strength: 5, + version: 1, + isLatest: true, + }, + ], + summaries: [], + }; + + const res = await h.call("mem::import", { exportData }); + expect(res["success"]).toBe(true); + + const memories = await h.kv.list(KV.memories); + expectClean(memories[0].content); + + const obs = await h.kv.list( + KV.observations("sess1"), + ); + expectClean(obs[0].narrative); + expectClean(JSON.stringify(obs[0].facts)); + + const sessions = await h.kv.list<{ firstPrompt?: string }>(KV.sessions); + expectClean(sessions[0].firstPrompt ?? ""); + }); + + it("imports a clean dump without error", async () => { + const h = wire(registerExportImportFunction as never); + const res = await h.call("mem::import", { + exportData: { + version: "0.9.27", + exportedAt: new Date().toISOString(), + sessions: [], + observations: {}, + memories: [], + summaries: [], + } satisfies ExportData, + }); + expect(res["success"]).toBe(true); + }); +}); + +// --------------------------------------------------------------------------- +// 8. mem::compress scrubs LLM output +// --------------------------------------------------------------------------- +describe("mem::compress scrubs model output", () => { + it("redacts a secret the model echoed into the summary", async () => { + const kv = mockKV(); + const sdk = mockSdk(); + + // Model echoes a secret from its context into the narrative — the raw + // observation was scrubbed at capture, but this output was not, until now. + const xmlWithSecret = `command_run +Configured deploy token +token setup +set token ${GH_TOKEN} +The agent configured CI using ${GH_TOKEN} against ${DB_URL} +deploy + +5`; + + const provider: MemoryProvider = { + name: "mock", + compress: async () => xmlWithSecret, + summarize: async () => "", + }; + + registerCompressFunction(sdk as never, kv as never, provider); + + const handler = sdk.handlers.get("mem::compress")!; + const res = (await handler({ + observationId: "obs_x", + sessionId: "sess_x", + raw: { + id: "obs_x", + sessionId: "sess_x", + timestamp: new Date().toISOString(), + hookType: "post_tool_use", + toolName: "Bash", + toolInput: "deploy.sh", + toolOutput: "ok", + raw: {}, + }, + })) as { success: boolean; compressed: CompressedObservation }; + + expect(res.success).toBe(true); + expectClean(res.compressed.narrative); + expectClean(JSON.stringify(res.compressed.facts)); + expectClean(res.compressed.title); + + const stored = await kv.get( + KV.observations("sess_x"), + "obs_x", + ); + expectClean(JSON.stringify(stored)); + }); +}); From 915c526190b5679498c928169d7851331050e961 Mon Sep 17 00:00:00 2001 From: Srinath Siddamsetty Date: Sat, 13 Jun 2026 02:38:26 -0400 Subject: [PATCH 2/2] fix(docs,refactor): resolve CodeRabbit findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix truncated reference 'index-persist..' → 'index-persistence.ts' in architecture diagram - Escape pipe in table cell for RawObservation | CompressedObservation[] - Add parentSessionId field to Session interface to align with subagent contract - Refactor slot-replace to use local const instead of mutating input parameter Co-Authored-By: Claude Haiku 4.5 --- docs/E2E_ARCHITECTURE.md | 5 +++-- src/functions/slots.ts | 12 ++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/docs/E2E_ARCHITECTURE.md b/docs/E2E_ARCHITECTURE.md index 8aa81e6c3..4a09e2709 100644 --- a/docs/E2E_ARCHITECTURE.md +++ b/docs/E2E_ARCHITECTURE.md @@ -102,7 +102,7 @@ AgentMemory is a **persistent, queryable memory layer** for AI coding agents. It │ gemini │ │ Vector Index (in-memory) ←─── vector-index.ts │ │ openrouter │ │ HybridSearch ←─── hybrid-search.ts│ │ minimax │ │ Reranker ←─── reranker.ts │ -│ agent-sdk │ │ IndexPersistence (disk) ←─── index-persist.. │ +│ agent-sdk │ │ IndexPersistence (disk) ←─── index-persistence.ts │ │ noop │ └─────────────────────────────────────────────────────┘ └─────────────┘ @@ -455,7 +455,7 @@ kv.delete(scope: string, key: string): Promise | Scope Key | Contents | Access Pattern | |-----------|----------|----------------| | `mem:sessions` | `Session[]` | list by status, get by id | -| `mem:obs:{sessionId}` | `RawObservation | CompressedObservation[]` | list all for session | +| `mem:obs:{sessionId}` | `RawObservation \| CompressedObservation[]` | list all for session | | `mem:memories` | `Memory[]` | list all, get by id | | `mem:embeddings:{obsId}` | `Float32Array` serialized | get by obs id | | `mem:index:bm25` | BM25 inverted index snapshot | single key, loaded on boot | @@ -687,6 +687,7 @@ Usage: background processing queue for consolidation tasks ```typescript interface Session { id: string // UUID v4 + parentSessionId?: string // UUID v4 (set for subagent sessions) project: string // cwd-derived project identifier cwd: string // working directory agentId?: string // agent identifier (for multi-agent) diff --git a/src/functions/slots.ts b/src/functions/slots.ts index b2b29c12d..9df6322ab 100644 --- a/src/functions/slots.ts +++ b/src/functions/slots.ts @@ -319,26 +319,26 @@ export function registerSlotsFunctions(sdk: ISdk, kv: StateKV): void { const label = validateLabel(data?.label); if (!label) return { success: false, error: "label required" }; if (typeof data?.content !== "string") return { success: false, error: "content required (string)" }; - data.content = stripPrivateData(data.content); + const content = stripPrivateData(data.content); return withKeyedLock(`slot:${label}`, async () => { const { slot, scope } = await readSlot(kv, label); if (!slot) return { success: false, error: "slot not found (use mem::slot-create first)" }; if (slot.readOnly) return { success: false, error: "slot is read-only" }; - if (data.content.length > slot.sizeLimit) { + if (content.length > slot.sizeLimit) { return { success: false, - error: `content exceeds sizeLimit (${data.content.length} > ${slot.sizeLimit})`, + error: `content exceeds sizeLimit (${content.length} > ${slot.sizeLimit})`, sizeLimit: slot.sizeLimit, }; } - const updated: MemorySlot = { ...slot, content: data.content, updatedAt: nowIso() }; + const updated: MemorySlot = { ...slot, content, updatedAt: nowIso() }; await kv.set(scopeKv(scope), label, updated); await recordAudit(kv, "slot_replace", "mem::slot-replace", [label], { scope, before: slot.content.length, - after: data.content.length, + after: content.length, }); - return { success: true, slot: updated, size: data.content.length }; + return { success: true, slot: updated, size: content.length }; }); }, );