Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
991 changes: 991 additions & 0 deletions docs/E2E_ARCHITECTURE.md

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions pnpm-workspace.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
allowBuilds:
esbuild: true
onnxruntime-node: true
protobufjs: true
sharp: true
8 changes: 7 additions & 1 deletion src/functions/compress.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import {
import { VISION_DESCRIPTION_PROMPT } from "../prompts/vision.js";
import { getXmlTag, getXmlChildren } from "../prompts/xml.js";
import { getSearchIndex, vectorIndexAddGuarded } from "./search.js";
import { scrubRecord } from "./privacy.js";
import { CompressOutputSchema } from "../eval/schemas.js";
import { validateOutput } from "../eval/validator.js";
import { scoreCompression } from "../eval/quality.js";
Expand Down Expand Up @@ -156,11 +157,16 @@ export function registerCompressFunction(

const qualityScore = scoreCompression(parsed);

// The raw input was scrubbed at capture, but the LLM can echo a
// secret it saw elsewhere in its context into the summary — scrub
// the model output too before it is persisted and indexed.
const scrubbedParsed = scrubRecord(parsed);

const compressed: CompressedObservation = {
id: data.observationId,
sessionId: data.sessionId,
timestamp: data.raw.timestamp,
...parsed,
...scrubbedParsed,
confidence: qualityScore / 100,
...(hasImage ? { modality: data.raw.modality } : {}),
...(imageDescription ? { imageDescription } : {}),
Expand Down
25 changes: 25 additions & 0 deletions src/functions/export-import.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import { KV } from "../state/schema.js";
import { StateKV } from "../state/kv.js";
import { VERSION } from "../version.js";
import { recordAudit } from "./audit.js";
import { scrubRecord } from "./privacy.js";
import { logger } from "../logger.js";

export function registerExportImportFunction(sdk: ISdk, kv: StateKV): void {
Expand Down Expand Up @@ -263,6 +264,30 @@ export function registerExportImportFunction(sdk: ISdk, kv: StateKV): void {
skipped: 0,
};

// Imports arrive from outside the observe pipeline (hand-edited dumps,
// exports from machines running older pattern lists), so re-scrub the
// content-bearing collections before any row is written. Collections
// keyed only by ids/numbers are skipped — they carry no free text.
for (const key of [
"sessions",
"memories",
"summaries",
"lessons",
"insights",
"semanticMemories",
"crystals",
"sketches",
] as const) {
const collection = importData[key];
if (Array.isArray(collection)) {
(importData as unknown as Record<string, unknown>)[key] =
scrubRecord(collection);
}
}
for (const [sessionId, obs] of Object.entries(importData.observations)) {
importData.observations[sessionId] = scrubRecord(obs);
}

if (strategy === "replace") {
const existing = await kv.list<Session>(KV.sessions);
for (const session of existing) {
Expand Down
7 changes: 7 additions & 0 deletions src/functions/lessons.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import type { StateKV } from "../state/kv.js";
import { KV, fingerprintId } from "../state/schema.js";
import type { Lesson } from "../types.js";
import { recordAudit } from "./audit.js";
import { stripPrivateData } from "./privacy.js";

function reinforceLesson(lesson: Lesson): void {
const now = new Date().toISOString();
Expand Down Expand Up @@ -30,6 +31,12 @@ export function registerLessonsFunctions(sdk: ISdk, kv: StateKV): void {
return { success: false, error: "content is required" };
}

// Scrub before fingerprinting so the dedup key reflects the stored
// (scrubbed) form — lessons arrive from crystallize output and manual
// saves, neither of which passes through the observe pipeline.
data.content = stripPrivateData(data.content);
if (data.context) data.context = stripPrivateData(data.context);

const fp = fingerprintId("lsn", data.content.trim().toLowerCase());
const existing = await kv.get<Lesson>(KV.lessons, fp);

Expand Down
20 changes: 20 additions & 0 deletions src/functions/privacy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ const SECRET_PATTERN_SOURCES = [
/npm_[A-Za-z0-9]{36}/g,
/glpat-[A-Za-z0-9\-_]{20,}/g,
/dop_v1_[A-Za-z0-9]{64}/g,
/-----BEGIN (?:RSA |EC |OPENSSH |DSA |PGP )?PRIVATE KEY(?: BLOCK)?-----[\s\S]*?-----END (?:RSA |EC |OPENSSH |DSA |PGP )?PRIVATE KEY(?: BLOCK)?-----/g,
/(?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqps?|mssql):\/\/[^\/\s:@"']+:[^@\s"']+@/gi,
];

export function stripPrivateData(input: string): string {
Expand All @@ -28,6 +30,24 @@ export function stripPrivateData(input: string): string {
return result;
}

/**
* Apply stripPrivateData to every string in an arbitrary record, walking
* nested objects and arrays. Use this where the content shape is unknown
* (imports, team shares, parsed LLM output); for known string fields call
* stripPrivateData directly. Returns a scrubbed copy; non-string leaves are
* passed through unchanged, so structure can never be corrupted.
*/
export function scrubRecord<T>(record: T): T {
if (typeof record === "string") return stripPrivateData(record) as T;
if (Array.isArray(record)) return record.map(scrubRecord) as T;
if (typeof record === "object" && record !== null) {
const out: Record<string, unknown> = {};
for (const [k, v] of Object.entries(record)) out[k] = scrubRecord(v);
return out as T;
}
return record;
}

export function registerPrivacyFunction(sdk: ISdk): void {
sdk.registerFunction("mem::privacy",
async (data: { input?: unknown } | undefined) => {
Expand Down
6 changes: 6 additions & 0 deletions src/functions/remember.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { withKeyedLock } from "../state/keyed-mutex.js";
import { memoryToObservation } from "../state/memory-utils.js";
import { deleteAccessLog } from "./access-tracker.js";
import { recordAudit } from "./audit.js";
import { stripPrivateData } from "./privacy.js";
import { getSearchIndex, vectorIndexAddGuarded, vectorIndexRemove, flushIndexSave } from "./search.js";
import { getAgentId } from "../config.js";
import { logger } from "../logger.js";
Expand Down Expand Up @@ -50,6 +51,11 @@ export function registerRememberFunction(sdk: ISdk, kv: StateKV): void {
? (data.type as Memory["type"])
: "fact";

// Explicit saves bypass the observe pipeline, so they need the same
// secret scrubbing hook payloads get. Scrub before the similarity
// check so dedup compares the stored (scrubbed) form.
data.content = stripPrivateData(data.content);

const now = new Date().toISOString();
// Normalize project early so every subsequent comparison and storage
// operation uses the same cleaned value. Raw data.project must not be
Expand Down
9 changes: 5 additions & 4 deletions src/functions/sketches.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { KV, generateId } from "../state/schema.js";
import { withKeyedLock } from "../state/keyed-mutex.js";
import type { Action, ActionEdge, Sketch } from "../types.js";
import { safeAudit } from "./audit.js";
import { stripPrivateData } from "./privacy.js";

export function registerSketchesFunction(sdk: ISdk, kv: StateKV): void {
sdk.registerFunction("mem::sketch-create",
Expand All @@ -21,8 +22,8 @@ export function registerSketchesFunction(sdk: ISdk, kv: StateKV): void {
const expiresInMs = data.expiresInMs || 3600000;
const sketch: Sketch = {
id: generateId("sk"),
title: data.title.trim(),
description: (data.description || "").trim(),
title: stripPrivateData(data.title.trim()),
description: stripPrivateData((data.description || "").trim()),
status: "active",
actionIds: [],
project: data.project,
Expand Down Expand Up @@ -66,8 +67,8 @@ export function registerSketchesFunction(sdk: ISdk, kv: StateKV): void {
const now = new Date().toISOString();
const action: Action = {
id: generateId("act"),
title: data.title.trim(),
description: (data.description || "").trim(),
title: stripPrivateData(data.title.trim()),
description: stripPrivateData((data.description || "").trim()),
status: "pending",
priority: Math.max(1, Math.min(10, data.priority || 5)),
createdAt: now,
Expand Down
18 changes: 11 additions & 7 deletions src/functions/slots.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { KV } from "../state/schema.js";
import { StateKV } from "../state/kv.js";
import { withKeyedLock } from "../state/keyed-mutex.js";
import { recordAudit } from "./audit.js";
import { stripPrivateData } from "./privacy.js";
import { getEnvVar } from "../config.js";
import { logger } from "../logger.js";

Expand Down Expand Up @@ -243,7 +244,8 @@ export function registerSlotsFunctions(sdk: ISdk, kv: StateKV): void {
if (sizeLimit === null) {
return { success: false, error: "sizeLimit must be an integer between 1 and 20000" };
}
const content = typeof data?.content === "string" ? data.content : "";
const content =
typeof data?.content === "string" ? stripPrivateData(data.content) : "";
if (content.length > sizeLimit) {
return { success: false, error: `content exceeds sizeLimit (${content.length} > ${sizeLimit})` };
}
Expand Down Expand Up @@ -282,7 +284,8 @@ export function registerSlotsFunctions(sdk: ISdk, kv: StateKV): void {
async (data: { label?: string; text?: string }) => {
const label = validateLabel(data?.label);
if (!label) return { success: false, error: "label required" };
const text = typeof data?.text === "string" ? data.text : "";
const text =
typeof data?.text === "string" ? stripPrivateData(data.text) : "";
if (!text) return { success: false, error: "text required" };
return withKeyedLock(`slot:${label}`, async () => {
const { slot, scope } = await readSlot(kv, label);
Expand Down Expand Up @@ -316,25 +319,26 @@ export function registerSlotsFunctions(sdk: ISdk, kv: StateKV): void {
const label = validateLabel(data?.label);
if (!label) return { success: false, error: "label required" };
if (typeof data?.content !== "string") return { success: false, error: "content required (string)" };
const content = stripPrivateData(data.content);
return withKeyedLock(`slot:${label}`, async () => {
const { slot, scope } = await readSlot(kv, label);
if (!slot) return { success: false, error: "slot not found (use mem::slot-create first)" };
if (slot.readOnly) return { success: false, error: "slot is read-only" };
if (data.content.length > slot.sizeLimit) {
if (content.length > slot.sizeLimit) {
return {
success: false,
error: `content exceeds sizeLimit (${data.content.length} > ${slot.sizeLimit})`,
error: `content exceeds sizeLimit (${content.length} > ${slot.sizeLimit})`,
sizeLimit: slot.sizeLimit,
};
}
const updated: MemorySlot = { ...slot, content: data.content, updatedAt: nowIso() };
const updated: MemorySlot = { ...slot, content, updatedAt: nowIso() };
await kv.set(scopeKv(scope), label, updated);
await recordAudit(kv, "slot_replace", "mem::slot-replace", [label], {
scope,
before: slot.content.length,
after: data.content.length,
after: content.length,
});
return { success: true, slot: updated, size: data.content.length };
return { success: true, slot: updated, size: content.length };
});
},
);
Expand Down
9 changes: 8 additions & 1 deletion src/functions/team.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import type {
import { KV, generateId } from "../state/schema.js";
import type { StateKV } from "../state/kv.js";
import { recordAudit } from "./audit.js";
import { scrubRecord } from "./privacy.js";
import { logger } from "../logger.js";

const VALID_ITEM_TYPES = new Set(["memory", "pattern", "observation"]);
Expand Down Expand Up @@ -50,12 +51,18 @@ export function registerTeamFunction(
return { success: false, error: "Item not found" };
}

// Sharing widens the audience from one machine to the whole team.
// Re-scrub at this boundary as defense-in-depth: it catches rows
// written before a newer secret pattern existed, for the items that
// actually get shared.
const scrubbedContent = scrubRecord(content);

const shared: TeamSharedItem = {
id: generateId("ts"),
sharedBy: config.userId,
sharedAt: new Date().toISOString(),
type: data.itemType,
content,
content: scrubbedContent,
project: data.project || "",
visibility: "shared",
};
Expand Down
Loading