diff --git a/FOR_CLAUDE.md b/FOR_CLAUDE.md
index 891534f..da25e44 100644
--- a/FOR_CLAUDE.md
+++ b/FOR_CLAUDE.md
@@ -141,6 +141,8 @@ You can also use the `recall` CLI directly via Bash:
```bash
recall search "deployment pipeline" # Search memory
recall search "database choice" --bias-type decisions # Prefer decisions, keep other matches
+recall search "auth" --show-provenance # Show Record Provenance for every result
+recall provenance backfill # Classify legacy unknown-provenance rows (dry-run; --execute to apply)
recall stats # Database statistics
recall loa list # Browse curated knowledge
recall dump "Session title" # Capture current session
diff --git a/FOR_OPENCODE.md b/FOR_OPENCODE.md
index af48a50..02e6164 100644
--- a/FOR_OPENCODE.md
+++ b/FOR_OPENCODE.md
@@ -86,6 +86,8 @@ You can also use the `recall` CLI directly via Bash tool:
```bash
recall search "deployment pipeline" # Search memory
recall search "database choice" --bias-type decisions # Prefer decisions, keep other matches
+recall search "auth" --show-provenance # Show Record Provenance for every result
+recall provenance backfill # Classify legacy unknown-provenance rows (dry-run; --execute to apply)
recall stats # Database statistics
recall loa list # Browse curated knowledge
recall onboard # Interactive L0 identity setup (run once per user)
diff --git a/FOR_PI.md b/FOR_PI.md
index 58bddaa..2bf23ab 100644
--- a/FOR_PI.md
+++ b/FOR_PI.md
@@ -86,6 +86,8 @@ You can also use the `recall` CLI directly via shell commands:
```bash
recall search "deployment pipeline" # Search memory
recall search "database choice" --bias-type decisions # Prefer decisions, keep other matches
+recall search "auth" --show-provenance # Show Record Provenance for every result
+recall provenance backfill # Classify legacy unknown-provenance rows (dry-run; --execute to apply)
recall stats # Database statistics
recall loa list # Browse curated knowledge
recall onboard # Interactive L0 identity setup (run once per user)
diff --git a/commands/Recall/search.md b/commands/Recall/search.md
index 8c9c774..5a1882b 100644
--- a/commands/Recall/search.md
+++ b/commands/Recall/search.md
@@ -18,6 +18,7 @@ recall search "$1"
- `-t
` — Hard-filter to one table: messages, loa, decisions, learnings, breadcrumbs
- `--bias-type ` — Softly boost one table without filtering other matches. Same values as `-t`.
- `-l ` — Max results (default: 20)
+- `--show-provenance` — Show Record Provenance for every result (by default only unknown provenance is flagged)
## Examples
diff --git a/docs/architecture.md b/docs/architecture.md
index 93d241b..a7d859e 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -78,6 +78,16 @@ tables (`messages`, `decisions`, `learnings`, `loa_entries`). It controls L1
tier ranking at session start. Manage manually with `recall pin` / `recall unpin`
or backfill from confidence signals with `recall importance backfill`.
+The `provenance` column was added in schema migration 8→9 on all five memory
+tables (`messages`, `decisions`, `learnings`, `breadcrumbs`, `loa_entries`).
+It declares how each record was created — `verbatim`, `user_authored`,
+`extracted`, or `derived` — and is stamped automatically by every write path,
+never accepted from callers (see
+`docs/adr/0001-record-provenance-automatic-write-path-metadata.md`). Legacy
+rows stay `NULL` (unknown) until classified with
+`recall provenance backfill`, which only acts on deterministic write-path
+evidence and never guesses.
+
## Tiered RecallStart (v0.7.0+)
The `RecallStart` hook injects two tiers at the top of every session:
diff --git a/docs/cli-reference.md b/docs/cli-reference.md
index 2742894..43b47da 100644
--- a/docs/cli-reference.md
+++ b/docs/cli-reference.md
@@ -16,6 +16,7 @@ recall search "query" # FTS5 search with options
recall search "query" -t decisions # Hard-filter to decisions only
recall search "query" --bias-type decisions # Prefer decisions, still show other matching tables
recall search "query" -p myproject # Filter by project
+recall search "query" --show-provenance # Show provenance for every result
recall semantic "query" # Semantic search (explicit)
recall hybrid "query" # Hybrid search (explicit)
```
@@ -43,6 +44,8 @@ FTS5 supports boolean operators and prefix matching:
- `auth*` — prefix match (authz, authentication, etc.)
- `"vpn config"` — exact phrase
+By default, search output stays quiet about [Record Provenance](#record-provenance) when a record carries a known value, and visibly flags records whose provenance is unknown (legacy rows that predate the provenance column). Pass `--show-provenance` to display the provenance of every result.
+
---
## Capture
@@ -209,6 +212,30 @@ recall unpin decisions 42 # Reset to table default (5, or 8 for Lo
LoA entries have a write-time floor of 5; `recall pin` will not drop them below that.
+## Record Provenance
+
+The `provenance` column on `messages`, `decisions`, `learnings`, `breadcrumbs`,
+and `loa_entries` declares how each record was created: `verbatim` (exact source
+text), `user_authored` (directly authored via a user or agent command),
+`extracted` (generated from source material, possibly lossy), or `derived`
+(mechanically produced from existing memory records). Provenance is **automatic
+write-path metadata** — every write path stamps it; there is no flag or MCP
+parameter to set it (see `docs/adr/0001-record-provenance-automatic-write-path-metadata.md`).
+
+Legacy rows that predate the column have no declared provenance (`NULL`,
+reported as `unknown`). The backfill classifies them conservatively — only
+where the source table or a write-path marker gives deterministic evidence;
+everything else stays unknown rather than being guessed:
+
+```bash
+recall provenance backfill # Dry-run report (default)
+recall provenance backfill --execute # Apply the classification
+recall provenance backfill --execute -t loa_entries # Limit to one table
+```
+
+Allowed `-t/--table` values: `messages`, `decisions`, `learnings`,
+`breadcrumbs`, `loa_entries`, `all` (default).
+
## Benchmarks
Phase 2 benchmark harness for measuring context efficiency.
diff --git a/docs/mcp-tools.md b/docs/mcp-tools.md
index f36e1c3..31eab0d 100644
--- a/docs/mcp-tools.md
+++ b/docs/mcp-tools.md
@@ -22,7 +22,7 @@ Use `table` when you need a **hard filter** to one record type. Use `bias_type`
| bias_type | string | no | — | Softly boost one table type in ranking without filtering other matches. Same allowed values as `table`; prefer `table` when you need only one type. |
| limit | number | no | 10 | Maximum number of results to return |
-**Returns:** Array of matching records with table name, id, content, project, and snippet highlighting.
+**Returns:** Array of matching records with table name, id, content, project, snippet highlighting, and Record Provenance (`verbatim`, `user_authored`, `extracted`, `derived`, or `unknown` for legacy rows that predate provenance).
```js
// Only decisions
@@ -48,7 +48,7 @@ Combined keyword + semantic search using Reciprocal Rank Fusion. Best for natura
| project | string | no | — | Filter results to a specific project name |
| limit | number | no | 10 | Maximum number of results to return |
-**Returns:** Array of matching records ranked by fused keyword and semantic relevance scores.
+**Returns:** Array of matching records ranked by fused keyword and semantic relevance scores, each with its Record Provenance.
```js
memory_hybrid_search({ query: "how did we handle rate limiting", project: "my-app" })
@@ -67,7 +67,7 @@ Get recent context — LoA entries, decisions, and breadcrumbs. Good for orienti
| limit | number | no | 5 | Number of recent entries to return per category |
| project | string | no | — | Filter results to a specific project name |
-**Returns:** Recent records grouped by category: Library of Alexandria entries, decisions, and breadcrumbs.
+**Returns:** Recent records grouped by category: Library of Alexandria entries, decisions, and breadcrumbs — each annotated with its Record Provenance.
```js
memory_recall({ limit: 5, project: "my-app" })
@@ -112,6 +112,8 @@ Add structured records during a session. Use this to capture decisions, learning
**Returns:** Confirmation with the new record's id and table.
+Records created through `memory_add` are automatically stamped with Record Provenance `user_authored`. There is intentionally no provenance parameter — provenance is write-path metadata, not a caller claim (see `docs/adr/0001-record-provenance-automatic-write-path-metadata.md`).
+
```js
memory_add({ type: "decision", content: "Use PostgreSQL over MySQL", detail: "Better JSON support and JSONB indexing" })
memory_add({ type: "learning", content: "bun:sqlite uses $param syntax", detail: "Not :param like better-sqlite3", tags: "bun,sqlite" })
diff --git a/docs/slash-commands.md b/docs/slash-commands.md
index 9213e55..50343ab 100644
--- a/docs/slash-commands.md
+++ b/docs/slash-commands.md
@@ -27,6 +27,7 @@ Searches messages, LoA entries, decisions, learnings, and breadcrumbs. The slash
- `/Recall:search database choice -t decisions` — hard-filter to decisions only
- `/Recall:search database choice --bias-type decisions` — prefer decisions first, while still returning matching learnings/messages/LoA/breadcrumbs
+- `/Recall:search database choice --show-provenance` — show Record Provenance for every result (by default only unknown provenance is flagged)
Rule of thumb: use `-t` when you want only one table; use `--bias-type` when you want one table first without hiding other context.
diff --git a/hooks/RecallPreCompact.ts b/hooks/RecallPreCompact.ts
index 4460306..a1ae8de 100644
--- a/hooks/RecallPreCompact.ts
+++ b/hooks/RecallPreCompact.ts
@@ -358,10 +358,13 @@ export function flushConversation(convPath: string, cwd: string): FlushResult {
// Insert messages. importance defaults to 5 — these are mid-session
// captures, not curated, and the Stop hook may later promote a subset
- // to LoA at importance 8.
+ // to LoA at importance 8. Raw transcript capture is verbatim
+ // (ADR-0001); the column guard keeps pre-provenance DBs working.
+ const hasProvenance = (db.prepare('PRAGMA table_info(messages)').all() as Array<{ name: string }>)
+ .some((c) => c.name === 'provenance');
const insertMessage = db.prepare(`
- INSERT INTO messages (session_id, timestamp, role, content, project, importance)
- VALUES (?, ?, ?, ?, ?, 5)
+ INSERT INTO messages (session_id, timestamp, role, content, project, importance${hasProvenance ? ', provenance' : ''})
+ VALUES (?, ?, ?, ?, ?, 5${hasProvenance ? ", 'verbatim'" : ''})
`);
const tx = db.transaction((rows: ParsedMessage[]) => {
diff --git a/hooks/lib/sqlite-writers.ts b/hooks/lib/sqlite-writers.ts
index afc5880..3802911 100644
--- a/hooks/lib/sqlite-writers.ts
+++ b/hooks/lib/sqlite-writers.ts
@@ -42,6 +42,16 @@ function columnExists(db: Database, table: string, column: string): boolean {
}
}
+// ADR-0001: every writer in this file is an extraction path, so records are
+// stamped provenance = 'extracted'. The value is a SQL literal (not a bind
+// param) so the legacy-DB column guard stays a simple string switch — older
+// databases without the provenance column keep working unchanged.
+function provenanceFragment(db: Database, table: string): { col: string; val: string } {
+ return columnExists(db, table, 'provenance')
+ ? { col: ', provenance', val: ", 'extracted'" }
+ : { col: '', val: '' };
+}
+
// ---------------------------------------------------------------------------
// extraction_sessions
// ---------------------------------------------------------------------------
@@ -105,11 +115,12 @@ export function writeDecisionsBatch(dbPath: string, items: DecisionInput[]): num
try {
if (!tableExists(db, 'decisions')) return 0;
const hasConfidence = columnExists(db, 'decisions', 'confidence');
+ const provenance = provenanceFragment(db, 'decisions');
const sql = hasConfidence
- ? `INSERT INTO decisions (session_id, category, project, decision, status, confidence, importance)
- VALUES (?, ?, ?, ?, 'active', ?, ?)`
- : `INSERT INTO decisions (session_id, category, project, decision, status, importance)
- VALUES (?, ?, ?, ?, 'active', ?)`;
+ ? `INSERT INTO decisions (session_id, category, project, decision, status, confidence, importance${provenance.col})
+ VALUES (?, ?, ?, ?, 'active', ?, ?${provenance.val})`
+ : `INSERT INTO decisions (session_id, category, project, decision, status, importance${provenance.col})
+ VALUES (?, ?, ?, ?, 'active', ?${provenance.val})`;
const stmt = db.prepare(sql);
const insertMany = db.transaction((batch: DecisionInput[]) => {
let n = 0;
@@ -165,11 +176,12 @@ export function writeLearningsBatch(dbPath: string, items: LearningInput[]): num
try {
if (!tableExists(db, 'learnings')) return 0;
const hasConfidence = columnExists(db, 'learnings', 'confidence');
+ const provenance = provenanceFragment(db, 'learnings');
const sql = hasConfidence
- ? `INSERT INTO learnings (session_id, category, project, problem, solution, prevention, tags, confidence, importance)
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`
- : `INSERT INTO learnings (session_id, category, project, problem, solution, prevention, tags, importance)
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)`;
+ ? `INSERT INTO learnings (session_id, category, project, problem, solution, prevention, tags, confidence, importance${provenance.col})
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?${provenance.val})`
+ : `INSERT INTO learnings (session_id, category, project, problem, solution, prevention, tags, importance${provenance.col})
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?${provenance.val})`;
const stmt = db.prepare(sql);
const insertMany = db.transaction((batch: LearningInput[]) => {
let n = 0;
@@ -227,9 +239,10 @@ export function writeBreadcrumbsBatch(dbPath: string, items: BreadcrumbInput[]):
const db = openDb(dbPath);
try {
if (!tableExists(db, 'breadcrumbs')) return 0;
+ const provenance = provenanceFragment(db, 'breadcrumbs');
const stmt = db.prepare(
- `INSERT INTO breadcrumbs (session_id, content, category, project, importance, expires_at)
- VALUES (?, ?, ?, ?, ?, ?)`
+ `INSERT INTO breadcrumbs (session_id, content, category, project, importance, expires_at${provenance.col})
+ VALUES (?, ?, ?, ?, ?, ?${provenance.val})`
);
const insertMany = db.transaction((batch: BreadcrumbInput[]) => {
let n = 0;
@@ -273,11 +286,12 @@ export function writeLoaEntryFromExtraction(dbPath: string, entry: LoaInput): nu
if (!tableExists(db, 'loa_entries')) return 0;
// LoA importance is floored at 5 (curated tier guardrail).
const importance = Math.max(5, clampImportance(entry.importance, 8));
+ const provenance = provenanceFragment(db, 'loa_entries');
const result = db
.prepare(
`INSERT INTO loa_entries
- (title, description, fabric_extract, session_id, project, tags, message_count, importance)
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)`
+ (title, description, fabric_extract, session_id, project, tags, message_count, importance${provenance.col})
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?${provenance.val})`
)
.run(
entry.title,
diff --git a/src/commands/add.ts b/src/commands/add.ts
index c4befb9..62e65c3 100644
--- a/src/commands/add.ts
+++ b/src/commands/add.ts
@@ -21,7 +21,9 @@ export function runAddBreadcrumb(content: string, options: AddBreadcrumbOptions)
content,
project,
category: options.category,
- importance: options.importance ?? 5
+ importance: options.importance ?? 5,
+ // ADR-0001: provenance is stamped from the write path, never a CLI flag.
+ provenance: 'user_authored'
});
console.log(`✓ Added breadcrumb #${id}${project ? ` [${project}]` : ''}`);
@@ -51,7 +53,8 @@ export function runAddDecision(decision: string, options: AddDecisionOptions): v
reasoning: options.why,
alternatives: options.alternatives,
status: 'active',
- confidence
+ confidence,
+ provenance: 'user_authored'
});
console.log(`✓ Added decision #${id}${project ? ` [${project}]` : ''} (${confidence})`);
@@ -78,7 +81,8 @@ export function runAddLearning(problem: string, solution: string, options: AddLe
project,
category: options.category,
prevention: options.prevention,
- tags: options.tags
+ tags: options.tags,
+ provenance: 'user_authored'
});
console.log(`✓ Added learning #${id}${project ? ` [${project}]` : ''}`);
diff --git a/src/commands/dump.ts b/src/commands/dump.ts
index d87caec..dae54bb 100644
--- a/src/commands/dump.ts
+++ b/src/commands/dump.ts
@@ -387,7 +387,8 @@ export async function coreDump(title: string, options: DumpOptions & { session?:
summary: `Dumped: ${title}`
});
- const importedCount = addMessagesBatch(session.messages);
+ // Raw conversation capture is verbatim (ADR-0001).
+ const importedCount = addMessagesBatch(session.messages.map(m => ({ ...m, provenance: 'verbatim' as const })));
// Get imported message IDs for LoA
const db = getDb();
@@ -429,7 +430,10 @@ export async function coreDump(title: string, options: DumpOptions & { session?:
parent_loa_id: options.continues,
project: options.project || session.project,
tags: options.tags,
- message_count: importedMessages.length
+ message_count: importedMessages.length,
+ // Fabric output and the basic-summary fallback are both generated from
+ // the session messages — extracted either way (ADR-0001).
+ provenance: 'extracted'
});
await autoEmbedLoaEntry(loaId, title, fabricExtract);
diff --git a/src/commands/import-legacy.ts b/src/commands/import-legacy.ts
index 6d8b5ac..fc8cf98 100644
--- a/src/commands/import-legacy.ts
+++ b/src/commands/import-legacy.ts
@@ -164,7 +164,10 @@ export function runImportLegacy(options: ImportLegacyOptions): void {
message_range_start: undefined,
message_range_end: undefined,
message_count: undefined,
- tags: 'legacy,imported'
+ tags: 'legacy,imported',
+ // DISTILLED.md / HOT_RECALL.md content is prior extraction output —
+ // the record stays honest as extracted (ADR-0001).
+ provenance: 'extracted'
});
// Update the created_at to match the original date
diff --git a/src/commands/loa.ts b/src/commands/loa.ts
index 94d0f36..932920a 100644
--- a/src/commands/loa.ts
+++ b/src/commands/loa.ts
@@ -115,7 +115,8 @@ export async function runLoa(title: string, options: LoaOptions): Promise
process.exit(1);
}
- // Create LoA entry
+ // Create LoA entry — Fabric extract_wisdom output is generated from the
+ // session messages, so the record is extracted (ADR-0001).
const id = createLoaEntry({
title,
description: `Captured ${messages.length} messages`,
@@ -125,7 +126,8 @@ export async function runLoa(title: string, options: LoaOptions): Promise
parent_loa_id: options.continues,
project,
tags: options.tags,
- message_count: messages.length
+ message_count: messages.length,
+ provenance: 'extracted'
});
console.log(`\n✓ LoA #${id} captured: "${title}"`);
diff --git a/src/commands/provenance.ts b/src/commands/provenance.ts
new file mode 100644
index 0000000..db0cde1
--- /dev/null
+++ b/src/commands/provenance.ts
@@ -0,0 +1,148 @@
+// recall provenance — conservative backfill for the Record Provenance column.
+//
+// Background (ADR-0001, CONTEXT.md, issue #42):
+// Migration 8→9 added a nullable `provenance` column to messages/decisions/
+// learnings/breadcrumbs/loa_entries. Write paths stamp provenance going
+// forward; legacy rows are NULL ("unknown"). This command classifies legacy
+// rows — and ONLY where the source table or a write-path marker gives
+// deterministic evidence.
+//
+// Binding rules:
+// - NEVER guess. A row with no deterministic evidence stays NULL and is
+// reported as unknown.
+// - NEVER overwrite. Only rows with provenance IS NULL are touched.
+// - `user_authored` is never assigned by backfill: nothing in the data
+// distinguishes a CLI/MCP-authored row from an extraction row that was
+// given a custom category.
+//
+// Evidence table:
+// - messages → 'verbatim' — every message writer that has ever existed
+// (JSONL import, conversation import, dump, PreCompact flush)
+// captures raw transcript text without semantic rewriting.
+// - loa_entries → 'extracted' — every LoA writer stores machine-generated
+// content (Fabric/Haiku extracts, basic-summary fallback, or
+// prior DISTILLED.md extraction output via import-legacy).
+// - decisions → 'extracted' iff category = 'auto-extracted' (the marker the
+// extraction writers stamp). Other rows: unknown.
+// - learnings → 'extracted' iff category = 'auto-extracted'. Else unknown.
+// - breadcrumbs → 'extracted' iff category = 'extracted-idea'. Else unknown.
+//
+// Bind-count note (see src/lib/chunk.ts): every statement here binds zero
+// variables — bulk UPDATEs with literal predicates — so no chunking applies.
+
+import { getDb } from '../db/connection.js';
+
+const BACKFILL_TABLES = ['messages', 'decisions', 'learnings', 'breadcrumbs', 'loa_entries'] as const;
+type BackfillTable = typeof BACKFILL_TABLES[number];
+
+export interface ProvenanceBackfillOptions {
+ dryRun?: boolean;
+ table?: BackfillTable | 'all';
+}
+
+interface TableRule {
+ table: BackfillTable;
+ value: 'verbatim' | 'extracted';
+ // SQL predicate (beyond provenance IS NULL) that constitutes the
+ // deterministic evidence; undefined = the whole table qualifies.
+ evidenceWhere?: string;
+ evidence: string;
+}
+
+const RULES: TableRule[] = [
+ {
+ table: 'messages',
+ value: 'verbatim',
+ evidence: 'raw conversation capture is the only historical write path',
+ },
+ {
+ table: 'loa_entries',
+ value: 'extracted',
+ evidence: 'all historical LoA writers store machine-generated extracts',
+ },
+ {
+ table: 'decisions',
+ value: 'extracted',
+ evidenceWhere: "category = 'auto-extracted'",
+ evidence: "category = 'auto-extracted' (extraction-writer marker)",
+ },
+ {
+ table: 'learnings',
+ value: 'extracted',
+ evidenceWhere: "category = 'auto-extracted'",
+ evidence: "category = 'auto-extracted' (extraction-writer marker)",
+ },
+ {
+ table: 'breadcrumbs',
+ value: 'extracted',
+ evidenceWhere: "category = 'extracted-idea'",
+ evidence: "category = 'extracted-idea' (extraction-writer marker)",
+ },
+];
+
+export interface ProvenanceBackfillResult {
+ table: string;
+ value: string;
+ unknownBefore: number;
+ classified: number;
+ remainingUnknown: number;
+ evidence: string;
+}
+
+export function runProvenanceBackfill(options: ProvenanceBackfillOptions = {}): ProvenanceBackfillResult[] {
+ const dryRun = options.dryRun ?? true;
+ const target = options.table ?? 'all';
+
+ if (target !== 'all' && !(BACKFILL_TABLES as readonly string[]).includes(target)) {
+ console.error(`Unknown table: ${target}. Use one of: ${BACKFILL_TABLES.join(', ')}, all`);
+ process.exitCode = 1;
+ return [];
+ }
+
+ const db = getDb();
+ const results: ProvenanceBackfillResult[] = [];
+
+ for (const rule of RULES) {
+ if (target !== 'all' && target !== rule.table) continue;
+
+ const count = (where: string) =>
+ (db.prepare(`SELECT COUNT(*) as count FROM ${rule.table} WHERE ${where}`).get() as { count: number }).count;
+
+ const unknownBefore = count('provenance IS NULL');
+ const evidenceClause = rule.evidenceWhere
+ ? `provenance IS NULL AND ${rule.evidenceWhere}`
+ : 'provenance IS NULL';
+ const classified = count(evidenceClause);
+
+ if (!dryRun && classified > 0) {
+ db.prepare(`UPDATE ${rule.table} SET provenance = '${rule.value}' WHERE ${evidenceClause}`).run();
+ }
+
+ results.push({
+ table: rule.table,
+ value: rule.value,
+ unknownBefore,
+ classified,
+ remainingUnknown: unknownBefore - classified,
+ evidence: rule.evidence,
+ });
+ }
+
+ // Report
+ console.log(dryRun ? '[DRY RUN — no changes written]\n' : '[LIVE — changes written]\n');
+ for (const r of results) {
+ const verb = dryRun ? 'would set' : 'set';
+ console.log(`${r.table}: ${r.unknownBefore} unknown — ${verb} ${r.classified} to ${r.value}`);
+ console.log(` evidence: ${r.evidence}`);
+ if (r.remainingUnknown > 0) {
+ console.log(` ${r.remainingUnknown} left unknown (no deterministic evidence — staying NULL)`);
+ }
+ console.log('');
+ }
+
+ if (dryRun) {
+ console.log('Re-run with --execute to apply changes.');
+ }
+
+ return results;
+}
diff --git a/src/commands/search.ts b/src/commands/search.ts
index 6dc5092..2c13579 100644
--- a/src/commands/search.ts
+++ b/src/commands/search.ts
@@ -7,6 +7,7 @@ interface SearchOptions {
table?: string;
biasType?: string;
limit?: number;
+ showProvenance?: boolean;
}
export function runSearch(query: string, options: SearchOptions): void {
@@ -40,7 +41,16 @@ export function runSearch(query: string, options: SearchOptions): void {
const projectTag = result.project ? ` [${result.project}]` : '';
const date = result.created_at.split('T')[0];
- console.log(`[${result.table}#${result.id}]${projectTag} ${date}`);
+ // Display contract (issue #42): known provenance stays quiet by default;
+ // unknown (NULL) is always flagged. --show-provenance shows every value.
+ let provenanceTag = '';
+ if (options.showProvenance) {
+ provenanceTag = ` [provenance: ${result.provenance ?? 'unknown'}]`;
+ } else if (!result.provenance) {
+ provenanceTag = ' ⚠ [provenance: unknown]';
+ }
+
+ console.log(`[${result.table}#${result.id}]${projectTag} ${date}${provenanceTag}`);
console.log(` ${preview.replace(/\n/g, ' ')}`);
console.log('');
}
diff --git a/src/db/migrations.ts b/src/db/migrations.ts
index f567c03..415e1a4 100644
--- a/src/db/migrations.ts
+++ b/src/db/migrations.ts
@@ -179,6 +179,25 @@ export const MIGRATIONS: Migration[] = [
db.prepare('CREATE INDEX IF NOT EXISTS idx_learnings_importance ON learnings(importance)').run();
db.prepare('CREATE INDEX IF NOT EXISTS idx_loa_importance ON loa_entries(importance)').run();
},
+
+ // Migration 8 → 9: Record Provenance (ADR-0001, issue #42).
+ // Additive nullable column on all memory tables. Provenance is automatic
+ // write-path metadata; legacy rows stay NULL ("unknown") until explicitly
+ // backfilled via `recall provenance backfill` — never guessed, no default.
+ // The CHECK constraint passes for NULL (IN() evaluates to NULL → allowed),
+ // so unknown remains representable.
+ (db) => {
+ const tables = ['messages', 'decisions', 'learnings', 'breadcrumbs', 'loa_entries'];
+ for (const table of tables) {
+ try {
+ db.prepare(
+ `ALTER TABLE ${table} ADD COLUMN provenance TEXT CHECK (provenance IN ('verbatim', 'user_authored', 'extracted', 'derived'))`
+ ).run();
+ } catch {
+ // Column already exists — safe to ignore (fresh install case)
+ }
+ }
+ },
];
// ---------------------------------------------------------------------------
diff --git a/src/db/schema.ts b/src/db/schema.ts
index 3da02e8..7f60912 100644
--- a/src/db/schema.ts
+++ b/src/db/schema.ts
@@ -25,6 +25,7 @@ CREATE TABLE IF NOT EXISTS messages (
content TEXT NOT NULL,
project TEXT,
importance INTEGER DEFAULT 5 CHECK (importance BETWEEN 1 AND 10),
+ provenance TEXT CHECK (provenance IN ('verbatim', 'user_authored', 'extracted', 'derived')),
FOREIGN KEY (session_id) REFERENCES sessions(session_id)
);
@@ -40,6 +41,7 @@ CREATE TABLE IF NOT EXISTS decisions (
alternatives TEXT,
status TEXT DEFAULT 'active' CHECK (status IN ('active', 'superseded', 'reverted')),
importance INTEGER DEFAULT 5 CHECK (importance BETWEEN 1 AND 10),
+ provenance TEXT CHECK (provenance IN ('verbatim', 'user_authored', 'extracted', 'derived')),
FOREIGN KEY (session_id) REFERENCES sessions(session_id)
);
@@ -55,6 +57,7 @@ CREATE TABLE IF NOT EXISTS learnings (
prevention TEXT,
tags TEXT,
importance INTEGER DEFAULT 5 CHECK (importance BETWEEN 1 AND 10),
+ provenance TEXT CHECK (provenance IN ('verbatim', 'user_authored', 'extracted', 'derived')),
FOREIGN KEY (session_id) REFERENCES sessions(session_id)
);
@@ -67,6 +70,7 @@ CREATE TABLE IF NOT EXISTS breadcrumbs (
category TEXT,
project TEXT,
importance INTEGER DEFAULT 5 CHECK (importance BETWEEN 1 AND 10),
+ provenance TEXT CHECK (provenance IN ('verbatim', 'user_authored', 'extracted', 'derived')),
expires_at DATETIME,
FOREIGN KEY (session_id) REFERENCES sessions(session_id)
);
@@ -92,6 +96,7 @@ CREATE TABLE IF NOT EXISTS loa_entries (
tags TEXT,
message_count INTEGER,
importance INTEGER DEFAULT 8 CHECK (importance BETWEEN 1 AND 10),
+ provenance TEXT CHECK (provenance IN ('verbatim', 'user_authored', 'extracted', 'derived')),
FOREIGN KEY (parent_loa_id) REFERENCES loa_entries(id),
FOREIGN KEY (message_range_start) REFERENCES messages(id),
FOREIGN KEY (message_range_end) REFERENCES messages(id)
diff --git a/src/index.ts b/src/index.ts
index 2a0ab42..e36e94b 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -24,6 +24,7 @@ import { runCluster } from './commands/cluster.js';
import { runEmbedBackfill, runSemanticSearch, runEmbedStats, runHybridSearch } from './commands/embed.js';
import { runDoctor } from './commands/doctor.js';
import { runImportanceBackfill, runPin, runUnpin } from './commands/importance.js';
+import { runProvenanceBackfill } from './commands/provenance.js';
import { runBenchmark, listBenchmarks, reportLatestBenchmark } from './commands/benchmark.js';
import { runOnboard } from './commands/onboard.js';
import { runMigrate } from './commands/migrate.js';
@@ -177,12 +178,14 @@ program
.option('-t, --table ', 'Hard-filter to one table (messages, loa, decisions, learnings, breadcrumbs)')
.option('--bias-type ', 'Softly boost one table without filtering others (messages, loa, decisions, learnings, breadcrumbs)')
.option('-l, --limit ', 'Max results', '20')
+ .option('--show-provenance', 'Show provenance for every result (default: only unknown provenance is flagged)')
.action((query, options) => {
runSearch(query, {
project: options.project,
table: options.table,
biasType: options.biasType,
- limit: parseInt(options.limit, 10)
+ limit: parseInt(options.limit, 10),
+ showProvenance: options.showProvenance
});
closeDb();
});
@@ -535,6 +538,27 @@ importanceCmd
closeDb();
});
+// recall provenance — conservative backfill for Record Provenance (ADR-0001).
+// Provenance is automatic write-path metadata: there is intentionally no
+// flag to set it on add commands; this maintenance path only classifies
+// legacy NULL rows where deterministic evidence exists.
+const provenanceCmd = program
+ .command('provenance')
+ .description('Manage Record Provenance metadata on memory records');
+
+provenanceCmd
+ .command('backfill')
+ .description('Classify legacy rows with unknown provenance using deterministic write-path evidence (dry-run by default; never guesses)')
+ .option('--execute', 'Apply changes (default is dry-run)')
+ .option('-t, --table ', 'Target table: messages, decisions, learnings, breadcrumbs, loa_entries, all', 'all')
+ .action((options) => {
+ runProvenanceBackfill({
+ dryRun: !options.execute,
+ table: options.table
+ });
+ closeDb();
+ });
+
// recall pin [importance] — force a record to a high importance (default 10)
program
.command('pin [importance]')
@@ -620,7 +644,7 @@ program
.option('-k, --keyword', 'Use keyword search only (FTS5)')
.option('-v, --vector', 'Use vector search only (semantic)')
.action(async (query, options) => {
- if (query && !['init', 'add', 'search', 'recent', 'show', 'stats', 'import', 'import-conversations', 'loa', 'telos', 'docs', 'dump', 'embed', 'semantic', 'hybrid', 'doctor', 'importance', 'pin', 'unpin', 'decision', 'prune', 'cluster', 'import-legacy', 'benchmark', 'onboard', 'migrate', 'path'].includes(query)) {
+ if (query && !['init', 'add', 'search', 'recent', 'show', 'stats', 'import', 'import-conversations', 'loa', 'telos', 'docs', 'dump', 'embed', 'semantic', 'hybrid', 'doctor', 'importance', 'provenance', 'pin', 'unpin', 'decision', 'prune', 'cluster', 'import-legacy', 'benchmark', 'onboard', 'migrate', 'path'].includes(query)) {
if (options.keyword) {
// FTS5 only
runSearch(query, {
diff --git a/src/lib/conversation-import.ts b/src/lib/conversation-import.ts
index 382b790..8e1c70c 100644
--- a/src/lib/conversation-import.ts
+++ b/src/lib/conversation-import.ts
@@ -567,7 +567,9 @@ export async function importConversations(
source: session.source,
});
- const count = addMessagesBatch(session.messages);
+ // Raw conversation capture is verbatim (ADR-0001); the structured
+ // extraction below stamps its own records as extracted.
+ const count = addMessagesBatch(session.messages.map(m => ({ ...m, provenance: 'verbatim' as const })));
result.sessionsImported++;
result.messagesImported += count;
diff --git a/src/lib/import.ts b/src/lib/import.ts
index 66d9785..dd7c930 100644
--- a/src/lib/import.ts
+++ b/src/lib/import.ts
@@ -184,8 +184,8 @@ export function importAllSessions(options?: { dryRun?: boolean; verbose?: boolea
summary: `Imported from ${basename(file)}`
});
- // Insert messages in batch
- const count = addMessagesBatch(parsed.messages);
+ // Insert messages in batch — raw conversation capture is verbatim (ADR-0001)
+ const count = addMessagesBatch(parsed.messages.map(m => ({ ...m, provenance: 'verbatim' as const })));
result.sessionsImported++;
result.messagesImported += count;
diff --git a/src/lib/memory.ts b/src/lib/memory.ts
index f3695a5..eb89ce3 100644
--- a/src/lib/memory.ts
+++ b/src/lib/memory.ts
@@ -2,7 +2,7 @@
import { getDb, getDbPath } from '../db/connection.js';
import { existsSync, statSync } from 'fs';
-import type { Session, Message, Decision, Learning, Breadcrumb, LoaEntry, Stats, SearchResult } from '../types/index.js';
+import type { Session, Message, Decision, Learning, Breadcrumb, LoaEntry, Stats, SearchResult, Provenance } from '../types/index.js';
// ============ Sessions ============
@@ -48,8 +48,8 @@ export function endSession(sessionId: string, summary?: string): void {
export function addMessage(message: Omit): number {
const db = getDb();
const stmt = db.prepare(`
- INSERT INTO messages (session_id, timestamp, role, content, project, importance)
- VALUES ($session_id, $timestamp, $role, $content, $project, $importance)
+ INSERT INTO messages (session_id, timestamp, role, content, project, importance, provenance)
+ VALUES ($session_id, $timestamp, $role, $content, $project, $importance, $provenance)
`);
const result = stmt.run({
$session_id: message.session_id,
@@ -57,7 +57,8 @@ export function addMessage(message: Omit): number {
$role: message.role,
$content: message.content,
$project: message.project || null,
- $importance: clampImportance(message.importance, 5)
+ $importance: clampImportance(message.importance, 5),
+ $provenance: message.provenance ?? null
});
return result.lastInsertRowid as number;
}
@@ -65,8 +66,8 @@ export function addMessage(message: Omit): number {
export function addMessagesBatch(messages: Omit[]): number {
const db = getDb();
const stmt = db.prepare(`
- INSERT INTO messages (session_id, timestamp, role, content, project, importance)
- VALUES ($session_id, $timestamp, $role, $content, $project, $importance)
+ INSERT INTO messages (session_id, timestamp, role, content, project, importance, provenance)
+ VALUES ($session_id, $timestamp, $role, $content, $project, $importance, $provenance)
`);
const insertMany = db.transaction((msgs: Omit[]) => {
@@ -78,7 +79,8 @@ export function addMessagesBatch(messages: Omit[]): number {
$role: msg.role,
$content: msg.content,
$project: msg.project || null,
- $importance: clampImportance(msg.importance, 5)
+ $importance: clampImportance(msg.importance, 5),
+ $provenance: msg.provenance ?? null
});
count++;
}
@@ -113,8 +115,8 @@ export function pinRecord(table: 'decisions' | 'learnings' | 'breadcrumbs' | 'lo
export function addDecision(decision: Omit): number {
const db = getDb();
const stmt = db.prepare(`
- INSERT INTO decisions (session_id, category, project, decision, reasoning, alternatives, status, confidence, importance)
- VALUES ($session_id, $category, $project, $decision, $reasoning, $alternatives, $status, $confidence, $importance)
+ INSERT INTO decisions (session_id, category, project, decision, reasoning, alternatives, status, confidence, importance, provenance)
+ VALUES ($session_id, $category, $project, $decision, $reasoning, $alternatives, $status, $confidence, $importance, $provenance)
`);
const result = stmt.run({
$session_id: decision.session_id || null,
@@ -125,7 +127,8 @@ export function addDecision(decision: Omit): numb
$alternatives: decision.alternatives || null,
$status: decision.status || 'active',
$confidence: decision.confidence || 'medium',
- $importance: clampImportance(decision.importance, 5)
+ $importance: clampImportance(decision.importance, 5),
+ $provenance: decision.provenance ?? null
});
return result.lastInsertRowid as number;
}
@@ -207,8 +210,8 @@ export function findSimilarDecisions(text: string, limit = 3): Decision[] {
export function addLearning(learning: Omit): number {
const db = getDb();
const stmt = db.prepare(`
- INSERT INTO learnings (session_id, category, project, problem, solution, prevention, tags, confidence, importance)
- VALUES ($session_id, $category, $project, $problem, $solution, $prevention, $tags, $confidence, $importance)
+ INSERT INTO learnings (session_id, category, project, problem, solution, prevention, tags, confidence, importance, provenance)
+ VALUES ($session_id, $category, $project, $problem, $solution, $prevention, $tags, $confidence, $importance, $provenance)
`);
const result = stmt.run({
$session_id: learning.session_id || null,
@@ -219,7 +222,8 @@ export function addLearning(learning: Omit): numb
$prevention: learning.prevention || null,
$tags: learning.tags || null,
$confidence: learning.confidence || 'medium',
- $importance: clampImportance(learning.importance, 5)
+ $importance: clampImportance(learning.importance, 5),
+ $provenance: learning.provenance ?? null
});
return result.lastInsertRowid as number;
}
@@ -234,8 +238,8 @@ export function getLearning(id: number): Learning | undefined {
export function addBreadcrumb(breadcrumb: Omit): number {
const db = getDb();
const stmt = db.prepare(`
- INSERT INTO breadcrumbs (session_id, content, category, project, importance, expires_at)
- VALUES ($session_id, $content, $category, $project, $importance, $expires_at)
+ INSERT INTO breadcrumbs (session_id, content, category, project, importance, expires_at, provenance)
+ VALUES ($session_id, $content, $category, $project, $importance, $expires_at, $provenance)
`);
const result = stmt.run({
$session_id: breadcrumb.session_id || null,
@@ -243,7 +247,8 @@ export function addBreadcrumb(breadcrumb: Omit)
$category: breadcrumb.category || null,
$project: breadcrumb.project || null,
$importance: breadcrumb.importance ?? 5,
- $expires_at: breadcrumb.expires_at || null
+ $expires_at: breadcrumb.expires_at || null,
+ $provenance: breadcrumb.provenance ?? null
});
return result.lastInsertRowid as number;
}
@@ -304,7 +309,7 @@ export function search(query: string, options?: MemorySearchOptions): SearchResu
switch (table) {
case 'messages':
sql = `
- SELECT m.id, m.content, m.project, m.timestamp as created_at, f.rank
+ SELECT m.id, m.content, m.project, m.timestamp as created_at, m.provenance, f.rank
FROM messages_fts f
JOIN messages m ON m.id = f.rowid
WHERE messages_fts MATCH ?
@@ -315,7 +320,7 @@ export function search(query: string, options?: MemorySearchOptions): SearchResu
break;
case 'decisions':
sql = `
- SELECT d.id, d.decision as content, d.project, d.created_at, f.rank
+ SELECT d.id, d.decision as content, d.project, d.created_at, d.provenance, f.rank
FROM decisions_fts f
JOIN decisions d ON d.id = f.rowid
WHERE decisions_fts MATCH ?
@@ -327,7 +332,7 @@ export function search(query: string, options?: MemorySearchOptions): SearchResu
break;
case 'learnings':
sql = `
- SELECT l.id, l.problem as content, l.project, l.created_at, f.rank
+ SELECT l.id, l.problem as content, l.project, l.created_at, l.provenance, f.rank
FROM learnings_fts f
JOIN learnings l ON l.id = f.rowid
WHERE learnings_fts MATCH ?
@@ -338,7 +343,7 @@ export function search(query: string, options?: MemorySearchOptions): SearchResu
break;
case 'breadcrumbs':
sql = `
- SELECT b.id, b.content, b.project, b.created_at, f.rank
+ SELECT b.id, b.content, b.project, b.created_at, b.provenance, f.rank
FROM breadcrumbs_fts f
JOIN breadcrumbs b ON b.id = f.rowid
WHERE breadcrumbs_fts MATCH ?
@@ -349,7 +354,7 @@ export function search(query: string, options?: MemorySearchOptions): SearchResu
break;
case 'loa':
sql = `
- SELECT l.id, l.title || ': ' || SUBSTR(l.fabric_extract, 1, 200) as content, l.project, l.created_at, f.rank
+ SELECT l.id, l.title || ': ' || SUBSTR(l.fabric_extract, 1, 200) as content, l.project, l.created_at, l.provenance, f.rank
FROM loa_fts f
JOIN loa_entries l ON l.id = f.rowid
WHERE loa_fts MATCH ?
@@ -373,6 +378,7 @@ export function search(query: string, options?: MemorySearchOptions): SearchResu
content: string;
project: string | null;
created_at: string;
+ provenance: Provenance | null;
rank: number;
}>;
@@ -383,6 +389,7 @@ export function search(query: string, options?: MemorySearchOptions): SearchResu
content: row.content,
project: row.project || undefined,
created_at: row.created_at,
+ provenance: row.provenance ?? null,
rank: row.rank
});
}
@@ -453,8 +460,8 @@ export function createLoaEntry(entry: Omit): numb
// so a careless caller cannot demote curated knowledge below neutral.
const importance = Math.max(5, clampImportance(entry.importance, 8));
const stmt = db.prepare(`
- INSERT INTO loa_entries (title, description, fabric_extract, message_range_start, message_range_end, parent_loa_id, session_id, project, tags, message_count, importance)
- VALUES ($title, $description, $fabric_extract, $message_range_start, $message_range_end, $parent_loa_id, $session_id, $project, $tags, $message_count, $importance)
+ INSERT INTO loa_entries (title, description, fabric_extract, message_range_start, message_range_end, parent_loa_id, session_id, project, tags, message_count, importance, provenance)
+ VALUES ($title, $description, $fabric_extract, $message_range_start, $message_range_end, $parent_loa_id, $session_id, $project, $tags, $message_count, $importance, $provenance)
`);
const result = stmt.run({
$title: entry.title,
@@ -467,7 +474,8 @@ export function createLoaEntry(entry: Omit): numb
$project: entry.project || null,
$tags: entry.tags || null,
$message_count: entry.message_count || null,
- $importance: importance
+ $importance: importance,
+ $provenance: entry.provenance ?? null
});
return result.lastInsertRowid as number;
}
diff --git a/src/lib/structured-extraction.ts b/src/lib/structured-extraction.ts
index f262870..c314dfe 100644
--- a/src/lib/structured-extraction.ts
+++ b/src/lib/structured-extraction.ts
@@ -164,6 +164,7 @@ function writeLoa(ctx: StructuredExtractionContext): number {
project: ctx.project,
tags: ctx.topics.join(','),
message_count: ctx.messageCount ?? range.count,
+ provenance: 'extracted',
});
}
@@ -194,6 +195,7 @@ export function writeStructuredExtraction(ctx: StructuredExtractionContext): Str
decision: item.decision,
status: 'active',
confidence: item.confidence,
+ provenance: 'extracted',
});
result.decisions++;
}
@@ -211,6 +213,7 @@ export function writeStructuredExtraction(ctx: StructuredExtractionContext): Str
solution: item.solution,
tags: ctx.sessionLabel,
confidence: 'medium',
+ provenance: 'extracted',
});
result.learnings++;
}
@@ -226,6 +229,7 @@ export function writeStructuredExtraction(ctx: StructuredExtractionContext): Str
project: ctx.project,
content,
importance: 5,
+ provenance: 'extracted',
});
result.breadcrumbs++;
}
diff --git a/src/mcp-server.ts b/src/mcp-server.ts
index 60d53a2..958d001 100644
--- a/src/mcp-server.ts
+++ b/src/mcp-server.ts
@@ -66,8 +66,15 @@ import {
reciprocalRankFusion,
checkEmbeddingService,
} from "./lib/embeddings.js";
+import type { Provenance } from "./types/index.js";
import { existsSync } from "fs";
+// Record Provenance display (ADR-0001): structured results always carry
+// provenance; legacy NULL is reported as "unknown", never guessed.
+function provenanceLabel(provenance: Provenance | null | undefined): string {
+ return `provenance: ${provenance ?? "unknown"}`;
+}
+
/**
* Hybrid search combining FTS5 + vector embeddings with RRF fusion
* Used by context_for_agent and memory_hybrid_search
@@ -82,6 +89,7 @@ async function hybridSearch(
content: string;
score: number;
source: "fts" | "vec" | "both";
+ provenance: Provenance | null;
}>;
embeddingsAvailable: boolean;
}> {
@@ -159,6 +167,7 @@ async function hybridSearch(
content: string;
score: number;
source: "fts" | "vec" | "both";
+ provenance: Provenance | null;
}
>();
@@ -171,6 +180,7 @@ async function hybridSearch(
content: r.content,
score,
source: "fts",
+ provenance: r.provenance ?? null,
});
}
@@ -182,25 +192,29 @@ async function hybridSearch(
} else {
// Need to fetch content
let content = "";
+ let provenance: Provenance | null = null;
if (r.source_table === "loa_entries") {
const loa = db
.prepare(
- "SELECT title, fabric_extract FROM loa_entries WHERE id = ?",
+ "SELECT title, fabric_extract, provenance FROM loa_entries WHERE id = ?",
)
.get(r.source_id) as any;
content = loa
? `${loa.title}: ${loa.fabric_extract?.slice(0, 200)}`
: "";
+ provenance = loa?.provenance ?? null;
} else if (r.source_table === "decisions") {
const dec = db
- .prepare("SELECT decision FROM decisions WHERE id = ?")
+ .prepare("SELECT decision, provenance FROM decisions WHERE id = ?")
.get(r.source_id) as any;
content = dec?.decision || "";
+ provenance = dec?.provenance ?? null;
} else if (r.source_table === "messages") {
const msg = db
- .prepare("SELECT content FROM messages WHERE id = ?")
+ .prepare("SELECT content, provenance FROM messages WHERE id = ?")
.get(r.source_id) as any;
content = msg?.content?.slice(0, 200) || "";
+ provenance = msg?.provenance ?? null;
}
resultMap.set(key, {
@@ -209,6 +223,7 @@ async function hybridSearch(
content,
score: fusedScores.get(key) || 0,
source: "vec",
+ provenance,
});
}
}
@@ -229,6 +244,7 @@ async function hybridSearch(
content: r.content,
score: r.rank || 0,
source: "fts" as const,
+ provenance: r.provenance ?? null,
}))
.slice(0, limit),
embeddingsAvailable: false,
@@ -288,7 +304,7 @@ server.tool(
r.content.length > 200
? r.content.slice(0, 200) + "..."
: r.content;
- return `[${r.table}#${r.id}] ${r.project || "no-project"} | ${r.created_at}\n${preview}`;
+ return `[${r.table}#${r.id}] ${r.project || "no-project"} | ${r.created_at} | ${provenanceLabel(r.provenance)}\n${preview}`;
})
.join("\n\n---\n\n");
@@ -356,7 +372,7 @@ server.tool(
? r.content.slice(0, 200) + "..."
: r.content;
const score = (r.score * 100).toFixed(1);
- return `${score}% ${sourceTag} [${r.table}#${r.id}]\n${preview}`;
+ return `${score}% ${sourceTag} [${r.table}#${r.id}] | ${provenanceLabel(r.provenance)}\n${preview}`;
})
.join("\n\n---\n\n");
@@ -405,7 +421,7 @@ server.tool(
output += "### Library of Alexandria (Curated Knowledge)\n";
for (const e of loa) {
const preview = e.fabric_extract.slice(0, 300).replace(/\n/g, " ");
- output += `- **LoA #${e.id}** [${e.project || "no-project"}] ${e.created_at?.split("T")[0]}: ${e.title}\n ${preview}...\n`;
+ output += `- **LoA #${e.id}** [${e.project || "no-project"}] ${e.created_at?.split("T")[0]} (${provenanceLabel(e.provenance)}): ${e.title}\n ${preview}...\n`;
}
output += "\n";
}
@@ -413,7 +429,7 @@ server.tool(
if (decisions.length > 0) {
output += "### Recent Decisions\n";
for (const d of decisions) {
- output += `- **#${d.id}** [${d.project || "no-project"}]: ${d.decision}${d.reasoning ? ` (${d.reasoning})` : ""}\n`;
+ output += `- **#${d.id}** [${d.project || "no-project"}] (${provenanceLabel(d.provenance)}): ${d.decision}${d.reasoning ? ` (${d.reasoning})` : ""}\n`;
}
output += "\n";
}
@@ -421,7 +437,7 @@ server.tool(
if (breadcrumbs.length > 0) {
output += "### Breadcrumbs\n";
for (const b of breadcrumbs) {
- output += `- **#${b.id}** [${b.project || "no-project"}]: ${b.content}\n`;
+ output += `- **#${b.id}** [${b.project || "no-project"}] (${provenanceLabel(b.provenance)}): ${b.content}\n`;
}
output += "\n";
}
@@ -555,6 +571,9 @@ server.tool(
}
}
+ // ADR-0001: provenance is stamped from the write path. memory_add
+ // deliberately exposes no provenance parameter — agents must not
+ // be able to launder extracted content as something else.
id = addDecision({
decision: content,
reasoning: detail,
@@ -562,6 +581,7 @@ server.tool(
status: "active",
confidence: confidence || "medium",
importance,
+ provenance: "user_authored",
});
let resultText = `Added decision #${id}: ${content}`;
@@ -583,6 +603,7 @@ server.tool(
tags,
confidence: confidence || "medium",
importance,
+ provenance: "user_authored",
});
return {
content: [
@@ -595,6 +616,7 @@ server.tool(
content,
project,
importance: importance ?? 5,
+ provenance: "user_authored",
});
return {
content: [
diff --git a/src/types/index.ts b/src/types/index.ts
index 3601818..e7635b3 100644
--- a/src/types/index.ts
+++ b/src/types/index.ts
@@ -1,5 +1,14 @@
// Core types for RECALL
+// Record Provenance (ADR-0001, CONTEXT.md): the declared origin and
+// transformation level of a memory record. Automatic write-path metadata —
+// never a public MCP parameter or CLI classification input. Survivor-order
+// vocabulary: user_authored > verbatim > extracted > derived. Legacy unknown
+// is NULL/absent, never guessed. `derived` is reserved for future paths that
+// mechanically produce records from existing memory records.
+export const PROVENANCE_VALUES = ['user_authored', 'verbatim', 'extracted', 'derived'] as const;
+export type Provenance = typeof PROVENANCE_VALUES[number];
+
export interface Session {
id?: number;
session_id: string;
@@ -21,6 +30,7 @@ export interface Message {
content: string;
project?: string;
importance?: number;
+ provenance?: Provenance | null;
}
export interface Decision {
@@ -35,6 +45,7 @@ export interface Decision {
status: 'active' | 'superseded' | 'reverted';
confidence?: 'high' | 'medium' | 'low';
importance?: number;
+ provenance?: Provenance | null;
}
export interface Learning {
@@ -49,6 +60,7 @@ export interface Learning {
tags?: string;
confidence?: 'high' | 'medium' | 'low';
importance?: number;
+ provenance?: Provenance | null;
}
export interface Breadcrumb {
@@ -60,6 +72,7 @@ export interface Breadcrumb {
project?: string;
importance: number;
expires_at?: string;
+ provenance?: Provenance | null;
}
export interface LoaEntry {
@@ -76,6 +89,7 @@ export interface LoaEntry {
tags?: string;
message_count?: number;
importance?: number;
+ provenance?: Provenance | null;
}
export interface SearchResult {
@@ -85,6 +99,7 @@ export interface SearchResult {
project?: string;
created_at: string;
rank?: number;
+ provenance?: Provenance | null;
}
export interface Stats {
diff --git a/tests/commands/provenance.test.ts b/tests/commands/provenance.test.ts
new file mode 100644
index 0000000..d98bbc5
--- /dev/null
+++ b/tests/commands/provenance.test.ts
@@ -0,0 +1,169 @@
+// recall provenance backfill — conservative legacy classification (issue #42, ADR-0001).
+//
+// Binding rules under test:
+// - dry-run is the default and writes nothing
+// - --execute only sets provenance where deterministic evidence exists
+// - rows without evidence stay NULL (unknown), never guessed
+// - rows that already have provenance are never overwritten
+// - user_authored is never assigned by backfill
+
+import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
+import { Database } from 'bun:sqlite';
+import { setupTestDb, teardownTestDb } from '../helpers/setup';
+import { runProvenanceBackfill } from '../../src/commands/provenance';
+import {
+ createSession,
+ addMessage,
+ addDecision,
+ addLearning,
+ addBreadcrumb,
+ createLoaEntry,
+} from '../../src/lib/memory';
+
+let dbPath: string;
+const originalLog = console.log;
+
+beforeEach(() => {
+ dbPath = setupTestDb();
+ console.log = () => {}; // backfill prints a report; keep test output clean
+});
+
+afterEach(() => {
+ console.log = originalLog;
+ teardownTestDb();
+});
+
+function readDb(): Database {
+ return new Database(dbPath, { readonly: true });
+}
+
+/** Seeds one legacy (NULL-provenance) landscape across all five tables. */
+function seedLegacyRows(): void {
+ createSession({ session_id: 's1', started_at: '2026-01-01T00:00:00Z', project: 'demo' });
+
+ // messages: all legacy rows are deterministic 'verbatim'
+ addMessage({ session_id: 's1', timestamp: '2026-01-01T00:00:01Z', role: 'user', content: 'legacy message' });
+
+ // decisions: evidence marker is category = 'auto-extracted'
+ addDecision({ session_id: 's1', decision: 'extracted decision', category: 'auto-extracted', status: 'active' });
+ addDecision({ session_id: 's1', decision: 'unmarked decision', category: 'manual', status: 'active' });
+ addDecision({ session_id: 's1', decision: 'already stamped', status: 'active', provenance: 'user_authored' });
+
+ // learnings: evidence marker is category = 'auto-extracted'
+ addLearning({ session_id: 's1', problem: 'extracted problem', solution: 'fix', category: 'auto-extracted' });
+ addLearning({ session_id: 's1', problem: 'unmarked problem', solution: 'fix', category: 'other' });
+
+ // breadcrumbs: evidence marker is category = 'extracted-idea'
+ addBreadcrumb({ session_id: 's1', content: 'extracted idea', category: 'extracted-idea', importance: 5 });
+ addBreadcrumb({ session_id: 's1', content: 'unmarked note', category: 'note', importance: 5 });
+
+ // loa_entries: all legacy rows are deterministic 'extracted'
+ createLoaEntry({ title: 'legacy loa', fabric_extract: 'extract body', session_id: 's1' });
+}
+
+describe('runProvenanceBackfill — dry run (default)', () => {
+ test('reports classifications without writing anything', () => {
+ seedLegacyRows();
+
+ const results = runProvenanceBackfill({});
+
+ expect(results.length).toBe(5);
+ const byTable = Object.fromEntries(results.map(r => [r.table, r]));
+ expect(byTable.messages.classified).toBe(1);
+ expect(byTable.messages.value).toBe('verbatim');
+ expect(byTable.loa_entries.classified).toBe(1);
+ expect(byTable.loa_entries.value).toBe('extracted');
+ // only the evidence-marked rows qualify; pre-stamped row is not "unknown"
+ expect(byTable.decisions.unknownBefore).toBe(2);
+ expect(byTable.decisions.classified).toBe(1);
+ expect(byTable.decisions.remainingUnknown).toBe(1);
+ expect(byTable.learnings.classified).toBe(1);
+ expect(byTable.breadcrumbs.classified).toBe(1);
+
+ // Nothing was written
+ const db = readDb();
+ const nullCount = (table: string) =>
+ (db.prepare(`SELECT COUNT(*) AS c FROM ${table} WHERE provenance IS NULL`).get() as { c: number }).c;
+ expect(nullCount('messages')).toBe(1);
+ expect(nullCount('decisions')).toBe(2);
+ expect(nullCount('learnings')).toBe(2);
+ expect(nullCount('breadcrumbs')).toBe(2);
+ expect(nullCount('loa_entries')).toBe(1);
+ db.close();
+ });
+});
+
+describe('runProvenanceBackfill — execute', () => {
+ test('classifies only evidence-backed rows; the rest stay NULL', () => {
+ seedLegacyRows();
+
+ runProvenanceBackfill({ dryRun: false });
+
+ const db = readDb();
+ const provenanceOf = (table: string, where: string) =>
+ (db.prepare(`SELECT provenance FROM ${table} WHERE ${where}`).get() as any)?.provenance;
+
+ expect(provenanceOf('messages', "content = 'legacy message'")).toBe('verbatim');
+ expect(provenanceOf('loa_entries', "title = 'legacy loa'")).toBe('extracted');
+
+ expect(provenanceOf('decisions', "decision = 'extracted decision'")).toBe('extracted');
+ expect(provenanceOf('decisions', "decision = 'unmarked decision'")).toBeNull();
+ // never overwritten, and user_authored is never assigned by backfill
+ expect(provenanceOf('decisions', "decision = 'already stamped'")).toBe('user_authored');
+
+ expect(provenanceOf('learnings', "problem = 'extracted problem'")).toBe('extracted');
+ expect(provenanceOf('learnings', "problem = 'unmarked problem'")).toBeNull();
+
+ expect(provenanceOf('breadcrumbs', "content = 'extracted idea'")).toBe('extracted');
+ expect(provenanceOf('breadcrumbs', "content = 'unmarked note'")).toBeNull();
+ db.close();
+ });
+
+ test('is idempotent: a second execute classifies nothing new', () => {
+ seedLegacyRows();
+ runProvenanceBackfill({ dryRun: false });
+
+ const second = runProvenanceBackfill({ dryRun: false });
+ for (const r of second) {
+ expect(r.classified).toBe(0);
+ }
+ });
+
+ test('table filter limits the run to one table', () => {
+ seedLegacyRows();
+
+ const results = runProvenanceBackfill({ dryRun: false, table: 'decisions' });
+
+ expect(results.length).toBe(1);
+ expect(results[0].table).toBe('decisions');
+
+ const db = readDb();
+ // messages untouched by a decisions-only run
+ const msg = db.prepare("SELECT provenance FROM messages WHERE content = 'legacy message'").get() as any;
+ expect(msg.provenance).toBeNull();
+ db.close();
+ });
+});
+
+describe('runProvenanceBackfill — input validation', () => {
+ const originalExitCode = process.exitCode;
+ const originalError = console.error;
+
+ afterEach(() => {
+ process.exitCode = originalExitCode ?? 0;
+ console.error = originalError;
+ });
+
+ test('rejects an unknown table', () => {
+ let errorOutput = '';
+ console.error = (msg?: unknown) => {
+ errorOutput += String(msg);
+ };
+
+ const results = runProvenanceBackfill({ table: 'sessions' as any });
+
+ expect(results).toEqual([]);
+ expect(errorOutput).toContain('Unknown table: sessions');
+ expect(process.exitCode).toBe(1);
+ });
+});
diff --git a/tests/commands/search.test.ts b/tests/commands/search.test.ts
index 9a02b0e..226fe8c 100644
--- a/tests/commands/search.test.ts
+++ b/tests/commands/search.test.ts
@@ -1,4 +1,4 @@
-import { describe, test, expect, afterEach } from 'bun:test';
+import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
import { runSearch } from '../../src/commands/search.js';
describe('runSearch --bias-type guard', () => {
@@ -23,3 +23,54 @@ describe('runSearch --bias-type guard', () => {
expect(process.exitCode).toBe(1);
});
});
+
+import { setupTestDb, teardownTestDb } from '../helpers/setup';
+import { createSession, addDecision, addBreadcrumb } from '../../src/lib/memory';
+
+describe('runSearch provenance display contract (issue #42)', () => {
+ const originalLog = console.log;
+ let output: string;
+
+ beforeEach(() => {
+ setupTestDb();
+ output = '';
+ console.log = (msg?: unknown) => {
+ output += `${String(msg ?? '')}\n`;
+ };
+
+ createSession({ session_id: 'disp-1', started_at: '2026-01-01T00:00:00Z', project: 'demo' });
+ addDecision({ session_id: 'disp-1', decision: 'quizzacious known decision', status: 'active', provenance: 'user_authored' });
+ addBreadcrumb({ session_id: 'disp-1', content: 'quizzacious legacy crumb', importance: 5 }); // provenance NULL
+ });
+
+ afterEach(() => {
+ console.log = originalLog;
+ teardownTestDb();
+ });
+
+ test('default display stays quiet for known provenance and flags unknown', () => {
+ runSearch('quizzacious', {});
+
+ const lines = output.split('\n');
+ const knownLine = lines.find(l => l.includes('decisions#'));
+ const unknownLine = lines.find(l => l.includes('breadcrumbs#'));
+
+ expect(knownLine).toBeDefined();
+ expect(knownLine).not.toContain('provenance');
+
+ expect(unknownLine).toBeDefined();
+ expect(unknownLine).toContain('⚠');
+ expect(unknownLine).toContain('provenance: unknown');
+ });
+
+ test('--show-provenance shows every provenance value', () => {
+ runSearch('quizzacious', { showProvenance: true });
+
+ const lines = output.split('\n');
+ const knownLine = lines.find(l => l.includes('decisions#'));
+ const unknownLine = lines.find(l => l.includes('breadcrumbs#'));
+
+ expect(knownLine).toContain('provenance: user_authored');
+ expect(unknownLine).toContain('provenance: unknown');
+ });
+});
diff --git a/tests/db/migrations.test.ts b/tests/db/migrations.test.ts
index 43020e0..8a52d55 100644
--- a/tests/db/migrations.test.ts
+++ b/tests/db/migrations.test.ts
@@ -110,10 +110,85 @@ describe('migration failure handling', () => {
});
});
+describe('provenance migration (8 to 9)', () => {
+ const PROVENANCE_TABLES = ['messages', 'decisions', 'learnings', 'breadcrumbs', 'loa_entries'];
+
+ test('all memory tables have provenance column after migrations', () => {
+ applyMigrations(db);
+ for (const table of PROVENANCE_TABLES) {
+ const cols = db.prepare(`PRAGMA table_info(${table})`).all() as any[];
+ expect(cols.map((c: any) => c.name)).toContain('provenance');
+ }
+ });
+
+ test('upgrade path: ALTER adds provenance to a legacy table without it', () => {
+ // Simulate a pre-provenance install: legacy table shape, version 8.
+ const legacyDir = mkdtempSync(join(tmpdir(), 'recall-legacy-test-'));
+ const legacyDb = new Database(join(legacyDir, 'legacy.db'));
+ try {
+ legacyDb.exec(`
+ CREATE TABLE messages (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ session_id TEXT NOT NULL,
+ timestamp DATETIME NOT NULL,
+ role TEXT NOT NULL,
+ content TEXT NOT NULL,
+ project TEXT,
+ importance INTEGER DEFAULT 5
+ );
+ CREATE TABLE decisions (id INTEGER PRIMARY KEY AUTOINCREMENT, decision TEXT NOT NULL);
+ CREATE TABLE learnings (id INTEGER PRIMARY KEY AUTOINCREMENT, problem TEXT NOT NULL);
+ CREATE TABLE breadcrumbs (id INTEGER PRIMARY KEY AUTOINCREMENT, content TEXT NOT NULL);
+ CREATE TABLE loa_entries (id INTEGER PRIMARY KEY AUTOINCREMENT, title TEXT NOT NULL, fabric_extract TEXT NOT NULL);
+ `);
+ legacyDb.prepare('INSERT INTO messages (session_id, timestamp, role, content) VALUES (?, ?, ?, ?)')
+ .run('s1', '2026-01-01T00:00:00Z', 'user', 'legacy row');
+ legacyDb.prepare('PRAGMA user_version = 8').run();
+
+ const result = applyMigrations(legacyDb);
+ expect(result.from).toBe(8);
+ expect(getMigrationVersion(legacyDb)).toBe(MIGRATIONS.length);
+
+ for (const table of PROVENANCE_TABLES) {
+ const cols = legacyDb.prepare(`PRAGMA table_info(${table})`).all() as any[];
+ expect(cols.map((c: any) => c.name)).toContain('provenance');
+ }
+
+ // Legacy rows stay NULL — unknown is never laundered into a value.
+ const row = legacyDb.prepare('SELECT provenance FROM messages WHERE session_id = ?').get('s1') as any;
+ expect(row.provenance).toBeNull();
+
+ // CHECK on the ALTERed column enforces the vocabulary but allows NULL.
+ expect(() => {
+ legacyDb.prepare('INSERT INTO messages (session_id, timestamp, role, content, provenance) VALUES (?, ?, ?, ?, ?)')
+ .run('s1', '2026-01-01T00:00:01Z', 'user', 'bad', 'guessed');
+ }).toThrow();
+ legacyDb.prepare('INSERT INTO messages (session_id, timestamp, role, content, provenance) VALUES (?, ?, ?, ?, ?)')
+ .run('s1', '2026-01-01T00:00:02Z', 'user', 'ok', 'verbatim');
+ } finally {
+ legacyDb.close();
+ rmSync(legacyDir, { recursive: true, force: true });
+ }
+ });
+
+ test('CHECK constraint enforces vocabulary on fresh-install DDL', () => {
+ applyMigrations(db);
+ const insert = (provenance: string | null) =>
+ db.prepare('INSERT INTO breadcrumbs (content, provenance) VALUES (?, ?)').run('x', provenance);
+
+ for (const valid of ['verbatim', 'user_authored', 'extracted', 'derived', null]) {
+ expect(() => insert(valid)).not.toThrow();
+ }
+ expect(() => insert('unknown')).toThrow();
+ expect(() => insert('VERBATIM')).toThrow();
+ });
+});
+
describe('MIGRATIONS array', () => {
test('has expected number of migrations', () => {
// 7 → 8: importance column on messages/decisions/learnings/loa_entries (Sprint #4)
- expect(MIGRATIONS.length).toBe(8);
+ // 8 → 9: provenance column on all five memory tables (issue #42)
+ expect(MIGRATIONS.length).toBe(9);
});
test('all entries are functions', () => {
diff --git a/tests/hooks/recall-precompact.test.ts b/tests/hooks/recall-precompact.test.ts
index 13052de..4921d58 100644
--- a/tests/hooks/recall-precompact.test.ts
+++ b/tests/hooks/recall-precompact.test.ts
@@ -274,3 +274,35 @@ describe('RecallPreCompact — flushConversation', () => {
expect(hookSource).not.toMatch(/extractWithClaude|extractWithOllama|fetch\(|http\.request/);
});
});
+
+describe('RecallPreCompact — Record Provenance (ADR-0001, issue #42)', () => {
+ test('stamps flushed messages verbatim when the DB has the provenance column', async () => {
+ // Migrated DB shape: messages carries the provenance column.
+ const db = new Database(dbPath);
+ db.exec(`ALTER TABLE messages ADD COLUMN provenance TEXT CHECK (provenance IN ('verbatim', 'user_authored', 'extracted', 'derived'))`);
+ db.close();
+
+ writeJsonlMessages([
+ { role: 'user', text: 'a message captured mid-session' },
+ { role: 'assistant', text: 'a reply captured mid-session' },
+ ]);
+
+ const { flushConversation } = await import('../../hooks/RecallPreCompact');
+ const result = flushConversation(convPath, '/tmp/proj');
+ expect(result.imported).toBe(2);
+
+ const readDb = new Database(dbPath, { readonly: true });
+ const rows = readDb.prepare('SELECT provenance FROM messages ORDER BY id').all() as Array<{ provenance: string }>;
+ readDb.close();
+ expect(rows.map(r => r.provenance)).toEqual(['verbatim', 'verbatim']);
+ });
+
+ test('keeps working against a pre-provenance DB (column guard)', async () => {
+ // CORE_SCHEMA above has no provenance column — the flush must not fail.
+ writeJsonlMessages([{ role: 'user', text: 'legacy database flush message' }]);
+
+ const { flushConversation } = await import('../../hooks/RecallPreCompact');
+ const result = flushConversation(convPath, '/tmp/proj');
+ expect(result.imported).toBe(1);
+ });
+});
diff --git a/tests/hooks/sqlite-writers.test.ts b/tests/hooks/sqlite-writers.test.ts
index 866f7eb..4c744f4 100644
--- a/tests/hooks/sqlite-writers.test.ts
+++ b/tests/hooks/sqlite-writers.test.ts
@@ -177,3 +177,48 @@ describe('writeExtractionErrors', () => {
expect(rows[0].fix).toBe('chmod +x');
});
});
+
+describe('Record Provenance stamping (ADR-0001, issue #42)', () => {
+ test('every extraction writer stamps provenance = extracted', () => {
+ writeDecisionsBatch(dbPath, [{ decision: 'stamped decision' }]);
+ writeLearningsBatch(dbPath, [{ problem: 'stamped problem', solution: 'fix' }]);
+ writeBreadcrumbsBatch(dbPath, [{ content: 'stamped crumb' }]);
+ writeLoaEntryFromExtraction(dbPath, {
+ title: 'stamped loa',
+ fabricExtract: '## ONE SENTENCE SUMMARY\ntext',
+ sessionId: 's1',
+ });
+
+ const db = openRead();
+ for (const table of ['decisions', 'learnings', 'breadcrumbs', 'loa_entries']) {
+ const row = db.prepare(`SELECT provenance FROM ${table} LIMIT 1`).get() as any;
+ expect(row.provenance).toBe('extracted');
+ }
+ db.close();
+ });
+
+ test('still writes into a legacy DB whose tables have no provenance column', () => {
+ const legacyPath = dbPath.replace('test.db', 'legacy-writers.db');
+ const legacy = new Database(legacyPath);
+ legacy.exec(`
+ CREATE TABLE decisions (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ session_id TEXT,
+ category TEXT,
+ project TEXT,
+ decision TEXT NOT NULL,
+ status TEXT DEFAULT 'active',
+ importance INTEGER DEFAULT 5
+ );
+ `);
+ legacy.close();
+
+ const n = writeDecisionsBatch(legacyPath, [{ decision: 'legacy write' }]);
+ expect(n).toBe(1);
+
+ const db = new Database(legacyPath, { readonly: true });
+ const row = db.prepare('SELECT decision FROM decisions').get() as any;
+ db.close();
+ expect(row.decision).toBe('legacy write');
+ });
+});
diff --git a/tests/lib/conversation-import.test.ts b/tests/lib/conversation-import.test.ts
index b51c7c4..82f427d 100644
--- a/tests/lib/conversation-import.test.ts
+++ b/tests/lib/conversation-import.test.ts
@@ -289,3 +289,21 @@ describe('conversationSourceAdapters', () => {
}
});
});
+
+describe('Record Provenance (ADR-0001, issue #42)', () => {
+ test('raw imported messages are stamped verbatim', async () => {
+ const file = join(tempDir, 'slack-export.json');
+ writeFileSync(file, JSON.stringify([
+ { ts: '1710000000.000100', user: 'U1', text: 'hello from slack history' },
+ { ts: '1710000001.000200', user: 'U2', text: 'a reply worth remembering' },
+ ]));
+
+ const result = await importConversations(file, { format: 'slack', noExtract: true });
+ expect(result.messagesImported).toBe(2);
+
+ const db = readDb();
+ const rows = db.prepare('SELECT provenance FROM messages ORDER BY timestamp').all() as any[];
+ db.close();
+ expect(rows.map(r => r.provenance)).toEqual(['verbatim', 'verbatim']);
+ });
+});
diff --git a/tests/lib/provenance-write-paths.test.ts b/tests/lib/provenance-write-paths.test.ts
new file mode 100644
index 0000000..f7251ae
--- /dev/null
+++ b/tests/lib/provenance-write-paths.test.ts
@@ -0,0 +1,168 @@
+// Record Provenance write-path stamping (issue #42, ADR-0001).
+//
+// Provenance is automatic write-path metadata. Each capture surface stamps
+// the value its write-path semantics dictate; no public surface accepts a
+// provenance override. These tests pin the stamp per path:
+// - CLI `recall add` → user_authored
+// - structured extraction (Haiku/Fabric output) → extracted
+// - raw message capture (import/dump batch writer) → verbatim
+// - search() structured results carry provenance for every record type
+
+import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
+import { readFileSync } from 'fs';
+import { join } from 'path';
+import { Database } from 'bun:sqlite';
+import { setupTestDb, teardownTestDb } from '../helpers/setup';
+import { runAddBreadcrumb, runAddDecision, runAddLearning } from '../../src/commands/add';
+import { writeStructuredExtraction } from '../../src/lib/structured-extraction';
+import {
+ createSession,
+ addMessage,
+ addMessagesBatch,
+ addDecision,
+ addLearning,
+ addBreadcrumb,
+ createLoaEntry,
+ search,
+} from '../../src/lib/memory';
+
+let dbPath: string;
+const originalLog = console.log;
+
+beforeEach(() => {
+ dbPath = setupTestDb();
+ console.log = () => {}; // add commands print confirmations; keep output clean
+});
+
+afterEach(() => {
+ console.log = originalLog;
+ teardownTestDb();
+});
+
+function readDb(): Database {
+ return new Database(dbPath, { readonly: true });
+}
+
+describe('CLI add commands stamp user_authored', () => {
+ test('breadcrumb, decision, and learning all land as user_authored', () => {
+ runAddBreadcrumb('a crumb worth keeping', { project: 'demo' });
+ runAddDecision('we choose sqlite', { project: 'demo' });
+ runAddLearning('it was broken', 'we fixed it', { project: 'demo' });
+
+ const db = readDb();
+ for (const table of ['breadcrumbs', 'decisions', 'learnings']) {
+ const row = db.prepare(`SELECT provenance FROM ${table} LIMIT 1`).get() as any;
+ expect(row.provenance).toBe('user_authored');
+ }
+ db.close();
+ });
+});
+
+describe('structured extraction stamps extracted', () => {
+ test('decisions and LoA entry from an extract are marked extracted', () => {
+ createSession({ session_id: 'ext-1', started_at: '2026-01-01T00:00:00Z', project: 'demo' });
+
+ const result = writeStructuredExtraction({
+ sessionId: 'ext-1',
+ sessionLabel: 'extraction test',
+ project: 'demo',
+ timestamp: '2026-01-01',
+ conversationPath: '/tmp/conv.jsonl',
+ topics: ['testing'],
+ summary: 'a one sentence summary',
+ extracted: [
+ '## ONE SENTENCE SUMMARY',
+ 'a one sentence summary',
+ '',
+ '## DECISIONS MADE',
+ '- Adopt write-path provenance stamping (confidence: HIGH)',
+ ].join('\n'),
+ });
+
+ expect(result.decisions).toBe(1);
+ expect(result.loa).toBe(1);
+
+ const db = readDb();
+ const decision = db.prepare('SELECT provenance FROM decisions LIMIT 1').get() as any;
+ const loa = db.prepare('SELECT provenance FROM loa_entries LIMIT 1').get() as any;
+ db.close();
+ expect(decision.provenance).toBe('extracted');
+ expect(loa.provenance).toBe('extracted');
+ });
+});
+
+describe('raw message capture', () => {
+ test('batch writer persists verbatim when the import path stamps it', () => {
+ createSession({ session_id: 'imp-1', started_at: '2026-01-01T00:00:00Z', project: 'demo' });
+
+ // import.ts / conversation-import.ts / dump.ts all map messages through
+ // addMessagesBatch with provenance: 'verbatim'
+ addMessagesBatch([
+ { session_id: 'imp-1', timestamp: '2026-01-01T00:00:01Z', role: 'user', content: 'raw text', provenance: 'verbatim' },
+ { session_id: 'imp-1', timestamp: '2026-01-01T00:00:02Z', role: 'assistant', content: 'raw reply', provenance: 'verbatim' },
+ ]);
+
+ const db = readDb();
+ const rows = db.prepare('SELECT provenance FROM messages ORDER BY id').all() as any[];
+ db.close();
+ expect(rows.map(r => r.provenance)).toEqual(['verbatim', 'verbatim']);
+ });
+
+ test('a write without provenance stays NULL — unknown is representable, never defaulted', () => {
+ createSession({ session_id: 'imp-2', started_at: '2026-01-01T00:00:00Z', project: 'demo' });
+ addMessage({ session_id: 'imp-2', timestamp: '2026-01-01T00:00:01Z', role: 'user', content: 'unstamped' });
+
+ const db = readDb();
+ const row = db.prepare('SELECT provenance FROM messages LIMIT 1').get() as any;
+ db.close();
+ expect(row.provenance).toBeNull();
+ });
+});
+
+describe('search() structured results carry provenance', () => {
+ test('every record type returns its provenance; NULL surfaces as null', () => {
+ createSession({ session_id: 'srch-1', started_at: '2026-01-01T00:00:00Z', project: 'demo' });
+
+ addMessage({ session_id: 'srch-1', timestamp: '2026-01-01T00:00:01Z', role: 'user', content: 'xylocarp message', provenance: 'verbatim' });
+ addDecision({ session_id: 'srch-1', decision: 'xylocarp decision', status: 'active', provenance: 'user_authored' });
+ addLearning({ session_id: 'srch-1', problem: 'xylocarp problem', solution: 'fix', provenance: 'extracted' });
+ addBreadcrumb({ session_id: 'srch-1', content: 'xylocarp crumb', importance: 5, provenance: 'user_authored' });
+ createLoaEntry({ title: 'xylocarp loa', fabric_extract: 'xylocarp extract body', session_id: 'srch-1', provenance: 'extracted' });
+ // legacy row with unknown provenance
+ addBreadcrumb({ session_id: 'srch-1', content: 'xylocarp legacy crumb', importance: 5 });
+
+ const results = search('xylocarp', { limit: 20 });
+ const byKey = new Map(results.map(r => [`${r.table}:${r.content}`, r]));
+
+ expect(byKey.get('messages:xylocarp message')?.provenance).toBe('verbatim');
+ expect(byKey.get('decisions:xylocarp decision')?.provenance).toBe('user_authored');
+ expect(byKey.get('learnings:xylocarp problem')?.provenance).toBe('extracted');
+ expect(byKey.get('breadcrumbs:xylocarp crumb')?.provenance).toBe('user_authored');
+ expect(byKey.get('breadcrumbs:xylocarp legacy crumb')?.provenance).toBeNull();
+
+ const loaResult = results.find(r => r.table === 'loa');
+ expect(loaResult?.provenance).toBe('extracted');
+ });
+});
+
+describe('no public provenance override (ADR-0001 contract)', () => {
+ const repoRoot = join(import.meta.dir, '..', '..');
+
+ test('MCP memory_add input schema exposes no provenance parameter', () => {
+ const source = readFileSync(join(repoRoot, 'src', 'mcp-server.ts'), 'utf-8');
+ const toolStart = source.indexOf('"memory_add"');
+ expect(toolStart).toBeGreaterThan(-1);
+ // The zod input schema sits between the tool name and the handler callback.
+ const handlerStart = source.indexOf('async (', toolStart);
+ const schemaBlock = source.slice(toolStart, handlerStart);
+ expect(schemaBlock).not.toContain('provenance');
+ // The handler stamps it instead.
+ const handlerBlock = source.slice(handlerStart, source.indexOf('server.tool', handlerStart));
+ expect(handlerBlock).toContain('provenance: "user_authored"');
+ });
+
+ test('CLI exposes no --provenance flag anywhere', () => {
+ const source = readFileSync(join(repoRoot, 'src', 'index.ts'), 'utf-8');
+ expect(source).not.toContain('--provenance');
+ });
+});