From 061fd0d3f0523f91de204b503e43d3ba3b8a0005 Mon Sep 17 00:00:00 2001
From: Ed Heltzel <402910+edheltzel@users.noreply.github.com>
Date: Wed, 10 Jun 2026 20:34:06 -0400
Subject: [PATCH 1/3] feat(provenance): add Record Provenance as automatic
 write-path metadata
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Migration 8->9 adds a nullable provenance column (CHECK-constrained to
verbatim/user_authored/extracted/derived) to messages, decisions,
learnings, breadcrumbs, and loa_entries. Legacy rows stay NULL (unknown)
per ADR-0001 — never guessed, never laundered.

Write paths stamp provenance automatically:
- CLI add + MCP memory_add -> user_authored (no public override)
- raw conversation capture (import, dump, PreCompact flush) -> verbatim
- extraction writers (hooks, structured extraction, LoA, import-legacy) -> extracted
- derived reserved for future internal paths

recall provenance backfill classifies legacy rows on deterministic
evidence only, dry-run by default with --execute.

CLI search flags unknown provenance by default; --show-provenance shows
all values. MCP search/hybrid/recall payloads carry provenance for every
record type.

Refs #42, ADR-0001
---
 hooks/RecallPreCompact.ts        |   9 +-
 hooks/lib/sqlite-writers.ts      |  38 +++++---
 src/commands/add.ts              |  10 ++-
 src/commands/dump.ts             |   8 +-
 src/commands/import-legacy.ts    |   5 +-
 src/commands/loa.ts              |   6 +-
 src/commands/provenance.ts       | 148 +++++++++++++++++++++++++++++++
 src/commands/search.ts           |  12 ++-
 src/db/migrations.ts             |  19 ++++
 src/db/schema.ts                 |   5 ++
 src/index.ts                     |  28 +++++-
 src/lib/conversation-import.ts   |   4 +-
 src/lib/import.ts                |   4 +-
 src/lib/memory.ts                |  56 +++++++-----
 src/lib/structured-extraction.ts |   4 +
 src/mcp-server.ts                |  38 ++++++--
 src/types/index.ts               |  15 ++++
 tests/db/migrations.test.ts      |  77 +++++++++++++++-
 18 files changed, 424 insertions(+), 62 deletions(-)
 create mode 100644 src/commands/provenance.ts

diff --git a/hooks/RecallPreCompact.ts b/hooks/RecallPreCompact.ts
index 4460306..a1ae8de 100644
--- a/hooks/RecallPreCompact.ts
+++ b/hooks/RecallPreCompact.ts
@@ -358,10 +358,13 @@ export function flushConversation(convPath: string, cwd: string): FlushResult {
 
       // Insert messages. importance defaults to 5 — these are mid-session
       // captures, not curated, and the Stop hook may later promote a subset
-      // to LoA at importance 8.
+      // to LoA at importance 8. Raw transcript capture is verbatim
+      // (ADR-0001); the column guard keeps pre-provenance DBs working.
+      const hasProvenance = (db.prepare('PRAGMA table_info(messages)').all() as Array<{ name: string }>)
+        .some((c) => c.name === 'provenance');
       const insertMessage = db.prepare(`
-        INSERT INTO messages (session_id, timestamp, role, content, project, importance)
-        VALUES (?, ?, ?, ?, ?, 5)
+        INSERT INTO messages (session_id, timestamp, role, content, project, importance${hasProvenance ? ', provenance' : ''})
+        VALUES (?, ?, ?, ?, ?, 5${hasProvenance ? ", 'verbatim'" : ''})
       `);
 
       const tx = db.transaction((rows: ParsedMessage[]) => {
diff --git a/hooks/lib/sqlite-writers.ts b/hooks/lib/sqlite-writers.ts
index afc5880..3802911 100644
--- a/hooks/lib/sqlite-writers.ts
+++ b/hooks/lib/sqlite-writers.ts
@@ -42,6 +42,16 @@ function columnExists(db: Database, table: string, column: string): boolean {
   }
 }
 
+// ADR-0001: every writer in this file is an extraction path, so records are
+// stamped provenance = 'extracted'. The value is a SQL literal (not a bind
+// param) so the legacy-DB column guard stays a simple string switch — older
+// databases without the provenance column keep working unchanged.
+function provenanceFragment(db: Database, table: string): { col: string; val: string } {
+  return columnExists(db, table, 'provenance')
+    ? { col: ', provenance', val: ", 'extracted'" }
+    : { col: '', val: '' };
+}
+
 // ---------------------------------------------------------------------------
 // extraction_sessions
 // ---------------------------------------------------------------------------
@@ -105,11 +115,12 @@ export function writeDecisionsBatch(dbPath: string, items: DecisionInput[]): num
   try {
     if (!tableExists(db, 'decisions')) return 0;
     const hasConfidence = columnExists(db, 'decisions', 'confidence');
+    const provenance = provenanceFragment(db, 'decisions');
     const sql = hasConfidence
-      ? `INSERT INTO decisions (session_id, category, project, decision, status, confidence, importance)
-         VALUES (?, ?, ?, ?, 'active', ?, ?)`
-      : `INSERT INTO decisions (session_id, category, project, decision, status, importance)
-         VALUES (?, ?, ?, ?, 'active', ?)`;
+      ? `INSERT INTO decisions (session_id, category, project, decision, status, confidence, importance${provenance.col})
+         VALUES (?, ?, ?, ?, 'active', ?, ?${provenance.val})`
+      : `INSERT INTO decisions (session_id, category, project, decision, status, importance${provenance.col})
+         VALUES (?, ?, ?, ?, 'active', ?${provenance.val})`;
     const stmt = db.prepare(sql);
     const insertMany = db.transaction((batch: DecisionInput[]) => {
       let n = 0;
@@ -165,11 +176,12 @@ export function writeLearningsBatch(dbPath: string, items: LearningInput[]): num
   try {
     if (!tableExists(db, 'learnings')) return 0;
     const hasConfidence = columnExists(db, 'learnings', 'confidence');
+    const provenance = provenanceFragment(db, 'learnings');
     const sql = hasConfidence
-      ? `INSERT INTO learnings (session_id, category, project, problem, solution, prevention, tags, confidence, importance)
-         VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`
-      : `INSERT INTO learnings (session_id, category, project, problem, solution, prevention, tags, importance)
-         VALUES (?, ?, ?, ?, ?, ?, ?, ?)`;
+      ? `INSERT INTO learnings (session_id, category, project, problem, solution, prevention, tags, confidence, importance${provenance.col})
+         VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?${provenance.val})`
+      : `INSERT INTO learnings (session_id, category, project, problem, solution, prevention, tags, importance${provenance.col})
+         VALUES (?, ?, ?, ?, ?, ?, ?, ?${provenance.val})`;
     const stmt = db.prepare(sql);
     const insertMany = db.transaction((batch: LearningInput[]) => {
       let n = 0;
@@ -227,9 +239,10 @@ export function writeBreadcrumbsBatch(dbPath: string, items: BreadcrumbInput[]):
   const db = openDb(dbPath);
   try {
     if (!tableExists(db, 'breadcrumbs')) return 0;
+    const provenance = provenanceFragment(db, 'breadcrumbs');
     const stmt = db.prepare(
-      `INSERT INTO breadcrumbs (session_id, content, category, project, importance, expires_at)
-       VALUES (?, ?, ?, ?, ?, ?)`
+      `INSERT INTO breadcrumbs (session_id, content, category, project, importance, expires_at${provenance.col})
+       VALUES (?, ?, ?, ?, ?, ?${provenance.val})`
     );
     const insertMany = db.transaction((batch: BreadcrumbInput[]) => {
       let n = 0;
@@ -273,11 +286,12 @@ export function writeLoaEntryFromExtraction(dbPath: string, entry: LoaInput): nu
     if (!tableExists(db, 'loa_entries')) return 0;
     // LoA importance is floored at 5 (curated tier guardrail).
     const importance = Math.max(5, clampImportance(entry.importance, 8));
+    const provenance = provenanceFragment(db, 'loa_entries');
     const result = db
       .prepare(
         `INSERT INTO loa_entries
-           (title, description, fabric_extract, session_id, project, tags, message_count, importance)
-         VALUES (?, ?, ?, ?, ?, ?, ?, ?)`
+           (title, description, fabric_extract, session_id, project, tags, message_count, importance${provenance.col})
+         VALUES (?, ?, ?, ?, ?, ?, ?, ?${provenance.val})`
       )
       .run(
         entry.title,
diff --git a/src/commands/add.ts b/src/commands/add.ts
index c4befb9..62e65c3 100644
--- a/src/commands/add.ts
+++ b/src/commands/add.ts
@@ -21,7 +21,9 @@ export function runAddBreadcrumb(content: string, options: AddBreadcrumbOptions)
     content,
     project,
     category: options.category,
-    importance: options.importance ?? 5
+    importance: options.importance ?? 5,
+    // ADR-0001: provenance is stamped from the write path, never a CLI flag.
+    provenance: 'user_authored'
   });
 
   console.log(`✓ Added breadcrumb #${id}${project ? ` [${project}]` : ''}`);
@@ -51,7 +53,8 @@ export function runAddDecision(decision: string, options: AddDecisionOptions): v
     reasoning: options.why,
     alternatives: options.alternatives,
     status: 'active',
-    confidence
+    confidence,
+    provenance: 'user_authored'
   });
 
   console.log(`✓ Added decision #${id}${project ? ` [${project}]` : ''} (${confidence})`);
@@ -78,7 +81,8 @@ export function runAddLearning(problem: string, solution: string, options: AddLe
     project,
     category: options.category,
     prevention: options.prevention,
-    tags: options.tags
+    tags: options.tags,
+    provenance: 'user_authored'
   });
 
   console.log(`✓ Added learning #${id}${project ? ` [${project}]` : ''}`);
diff --git a/src/commands/dump.ts b/src/commands/dump.ts
index d87caec..dae54bb 100644
--- a/src/commands/dump.ts
+++ b/src/commands/dump.ts
@@ -387,7 +387,8 @@ export async function coreDump(title: string, options: DumpOptions & { session?:
     summary: `Dumped: ${title}`
   });
 
-  const importedCount = addMessagesBatch(session.messages);
+  // Raw conversation capture is verbatim (ADR-0001).
+  const importedCount = addMessagesBatch(session.messages.map(m => ({ ...m, provenance: 'verbatim' as const })));
 
   // Get imported message IDs for LoA
   const db = getDb();
@@ -429,7 +430,10 @@ export async function coreDump(title: string, options: DumpOptions & { session?:
     parent_loa_id: options.continues,
     project: options.project || session.project,
     tags: options.tags,
-    message_count: importedMessages.length
+    message_count: importedMessages.length,
+    // Fabric output and the basic-summary fallback are both generated from
+    // the session messages — extracted either way (ADR-0001).
+    provenance: 'extracted'
   });
 
   await autoEmbedLoaEntry(loaId, title, fabricExtract);
diff --git a/src/commands/import-legacy.ts b/src/commands/import-legacy.ts
index 6d8b5ac..fc8cf98 100644
--- a/src/commands/import-legacy.ts
+++ b/src/commands/import-legacy.ts
@@ -164,7 +164,10 @@ export function runImportLegacy(options: ImportLegacyOptions): void {
         message_range_start: undefined,
         message_range_end: undefined,
         message_count: undefined,
-        tags: 'legacy,imported'
+        tags: 'legacy,imported',
+        // DISTILLED.md / HOT_RECALL.md content is prior extraction output —
+        // the record stays honest as extracted (ADR-0001).
+        provenance: 'extracted'
       });
 
       // Update the created_at to match the original date
diff --git a/src/commands/loa.ts b/src/commands/loa.ts
index 94d0f36..932920a 100644
--- a/src/commands/loa.ts
+++ b/src/commands/loa.ts
@@ -115,7 +115,8 @@ export async function runLoa(title: string, options: LoaOptions): Promise<void>
     process.exit(1);
   }
 
-  // Create LoA entry
+  // Create LoA entry — Fabric extract_wisdom output is generated from the
+  // session messages, so the record is extracted (ADR-0001).
   const id = createLoaEntry({
     title,
     description: `Captured ${messages.length} messages`,
@@ -125,7 +126,8 @@ export async function runLoa(title: string, options: LoaOptions): Promise<void>
     parent_loa_id: options.continues,
     project,
     tags: options.tags,
-    message_count: messages.length
+    message_count: messages.length,
+    provenance: 'extracted'
   });
 
   console.log(`\n✓ LoA #${id} captured: "${title}"`);
diff --git a/src/commands/provenance.ts b/src/commands/provenance.ts
new file mode 100644
index 0000000..db0cde1
--- /dev/null
+++ b/src/commands/provenance.ts
@@ -0,0 +1,148 @@
+// recall provenance — conservative backfill for the Record Provenance column.
+//
+// Background (ADR-0001, CONTEXT.md, issue #42):
+// Migration 8→9 added a nullable `provenance` column to messages/decisions/
+// learnings/breadcrumbs/loa_entries. Write paths stamp provenance going
+// forward; legacy rows are NULL ("unknown"). This command classifies legacy
+// rows — and ONLY where the source table or a write-path marker gives
+// deterministic evidence.
+//
+// Binding rules:
+// - NEVER guess. A row with no deterministic evidence stays NULL and is
+//   reported as unknown.
+// - NEVER overwrite. Only rows with provenance IS NULL are touched.
+// - `user_authored` is never assigned by backfill: nothing in the data
+//   distinguishes a CLI/MCP-authored row from an extraction row that was
+//   given a custom category.
+//
+// Evidence table:
+// - messages    → 'verbatim'  — every message writer that has ever existed
+//                 (JSONL import, conversation import, dump, PreCompact flush)
+//                 captures raw transcript text without semantic rewriting.
+// - loa_entries → 'extracted' — every LoA writer stores machine-generated
+//                 content (Fabric/Haiku extracts, basic-summary fallback, or
+//                 prior DISTILLED.md extraction output via import-legacy).
+// - decisions   → 'extracted' iff category = 'auto-extracted' (the marker the
+//                 extraction writers stamp). Other rows: unknown.
+// - learnings   → 'extracted' iff category = 'auto-extracted'. Else unknown.
+// - breadcrumbs → 'extracted' iff category = 'extracted-idea'. Else unknown.
+//
+// Bind-count note (see src/lib/chunk.ts): every statement here binds zero
+// variables — bulk UPDATEs with literal predicates — so no chunking applies.
+
+import { getDb } from '../db/connection.js';
+
+const BACKFILL_TABLES = ['messages', 'decisions', 'learnings', 'breadcrumbs', 'loa_entries'] as const;
+type BackfillTable = typeof BACKFILL_TABLES[number];
+
+export interface ProvenanceBackfillOptions {
+  dryRun?: boolean;
+  table?: BackfillTable | 'all';
+}
+
+interface TableRule {
+  table: BackfillTable;
+  value: 'verbatim' | 'extracted';
+  // SQL predicate (beyond provenance IS NULL) that constitutes the
+  // deterministic evidence; undefined = the whole table qualifies.
+  evidenceWhere?: string;
+  evidence: string;
+}
+
+const RULES: TableRule[] = [
+  {
+    table: 'messages',
+    value: 'verbatim',
+    evidence: 'raw conversation capture is the only historical write path',
+  },
+  {
+    table: 'loa_entries',
+    value: 'extracted',
+    evidence: 'all historical LoA writers store machine-generated extracts',
+  },
+  {
+    table: 'decisions',
+    value: 'extracted',
+    evidenceWhere: "category = 'auto-extracted'",
+    evidence: "category = 'auto-extracted' (extraction-writer marker)",
+  },
+  {
+    table: 'learnings',
+    value: 'extracted',
+    evidenceWhere: "category = 'auto-extracted'",
+    evidence: "category = 'auto-extracted' (extraction-writer marker)",
+  },
+  {
+    table: 'breadcrumbs',
+    value: 'extracted',
+    evidenceWhere: "category = 'extracted-idea'",
+    evidence: "category = 'extracted-idea' (extraction-writer marker)",
+  },
+];
+
+export interface ProvenanceBackfillResult {
+  table: string;
+  value: string;
+  unknownBefore: number;
+  classified: number;
+  remainingUnknown: number;
+  evidence: string;
+}
+
+export function runProvenanceBackfill(options: ProvenanceBackfillOptions = {}): ProvenanceBackfillResult[] {
+  const dryRun = options.dryRun ?? true;
+  const target = options.table ?? 'all';
+
+  if (target !== 'all' && !(BACKFILL_TABLES as readonly string[]).includes(target)) {
+    console.error(`Unknown table: ${target}. Use one of: ${BACKFILL_TABLES.join(', ')}, all`);
+    process.exitCode = 1;
+    return [];
+  }
+
+  const db = getDb();
+  const results: ProvenanceBackfillResult[] = [];
+
+  for (const rule of RULES) {
+    if (target !== 'all' && target !== rule.table) continue;
+
+    const count = (where: string) =>
+      (db.prepare(`SELECT COUNT(*) as count FROM ${rule.table} WHERE ${where}`).get() as { count: number }).count;
+
+    const unknownBefore = count('provenance IS NULL');
+    const evidenceClause = rule.evidenceWhere
+      ? `provenance IS NULL AND ${rule.evidenceWhere}`
+      : 'provenance IS NULL';
+    const classified = count(evidenceClause);
+
+    if (!dryRun && classified > 0) {
+      db.prepare(`UPDATE ${rule.table} SET provenance = '${rule.value}' WHERE ${evidenceClause}`).run();
+    }
+
+    results.push({
+      table: rule.table,
+      value: rule.value,
+      unknownBefore,
+      classified,
+      remainingUnknown: unknownBefore - classified,
+      evidence: rule.evidence,
+    });
+  }
+
+  // Report
+  console.log(dryRun ? '[DRY RUN — no changes written]\n' : '[LIVE — changes written]\n');
+  for (const r of results) {
+    const verb = dryRun ? 'would set' : 'set';
+    console.log(`${r.table}: ${r.unknownBefore} unknown — ${verb} ${r.classified} to ${r.value}`);
+    console.log(`  evidence: ${r.evidence}`);
+    if (r.remainingUnknown > 0) {
+      console.log(`  ${r.remainingUnknown} left unknown (no deterministic evidence — staying NULL)`);
+    }
+    console.log('');
+  }
+
+  if (dryRun) {
+    console.log('Re-run with --execute to apply changes.');
+  }
+
+  return results;
+}
diff --git a/src/commands/search.ts b/src/commands/search.ts
index 6dc5092..2c13579 100644
--- a/src/commands/search.ts
+++ b/src/commands/search.ts
@@ -7,6 +7,7 @@ interface SearchOptions {
   table?: string;
   biasType?: string;
   limit?: number;
+  showProvenance?: boolean;
 }
 
 export function runSearch(query: string, options: SearchOptions): void {
@@ -40,7 +41,16 @@ export function runSearch(query: string, options: SearchOptions): void {
     const projectTag = result.project ? ` [${result.project}]` : '';
     const date = result.created_at.split('T')[0];
 
-    console.log(`[${result.table}#${result.id}]${projectTag} ${date}`);
+    // Display contract (issue #42): known provenance stays quiet by default;
+    // unknown (NULL) is always flagged. --show-provenance shows every value.
+    let provenanceTag = '';
+    if (options.showProvenance) {
+      provenanceTag = ` [provenance: ${result.provenance ?? 'unknown'}]`;
+    } else if (!result.provenance) {
+      provenanceTag = ' ⚠ [provenance: unknown]';
+    }
+
+    console.log(`[${result.table}#${result.id}]${projectTag} ${date}${provenanceTag}`);
     console.log(`  ${preview.replace(/\n/g, ' ')}`);
     console.log('');
   }
diff --git a/src/db/migrations.ts b/src/db/migrations.ts
index f567c03..415e1a4 100644
--- a/src/db/migrations.ts
+++ b/src/db/migrations.ts
@@ -179,6 +179,25 @@ export const MIGRATIONS: Migration[] = [
     db.prepare('CREATE INDEX IF NOT EXISTS idx_learnings_importance ON learnings(importance)').run();
     db.prepare('CREATE INDEX IF NOT EXISTS idx_loa_importance ON loa_entries(importance)').run();
   },
+
+  // Migration 8 → 9: Record Provenance (ADR-0001, issue #42).
+  // Additive nullable column on all memory tables. Provenance is automatic
+  // write-path metadata; legacy rows stay NULL ("unknown") until explicitly
+  // backfilled via `recall provenance backfill` — never guessed, no default.
+  // The CHECK constraint passes for NULL (IN() evaluates to NULL → allowed),
+  // so unknown remains representable.
+  (db) => {
+    const tables = ['messages', 'decisions', 'learnings', 'breadcrumbs', 'loa_entries'];
+    for (const table of tables) {
+      try {
+        db.prepare(
+          `ALTER TABLE ${table} ADD COLUMN provenance TEXT CHECK (provenance IN ('verbatim', 'user_authored', 'extracted', 'derived'))`
+        ).run();
+      } catch {
+        // Column already exists — safe to ignore (fresh install case)
+      }
+    }
+  },
 ];
 
 // ---------------------------------------------------------------------------
diff --git a/src/db/schema.ts b/src/db/schema.ts
index 3da02e8..7f60912 100644
--- a/src/db/schema.ts
+++ b/src/db/schema.ts
@@ -25,6 +25,7 @@ CREATE TABLE IF NOT EXISTS messages (
   content TEXT NOT NULL,
   project TEXT,
   importance INTEGER DEFAULT 5 CHECK (importance BETWEEN 1 AND 10),
+  provenance TEXT CHECK (provenance IN ('verbatim', 'user_authored', 'extracted', 'derived')),
   FOREIGN KEY (session_id) REFERENCES sessions(session_id)
 );
 
@@ -40,6 +41,7 @@ CREATE TABLE IF NOT EXISTS decisions (
   alternatives TEXT,
   status TEXT DEFAULT 'active' CHECK (status IN ('active', 'superseded', 'reverted')),
   importance INTEGER DEFAULT 5 CHECK (importance BETWEEN 1 AND 10),
+  provenance TEXT CHECK (provenance IN ('verbatim', 'user_authored', 'extracted', 'derived')),
   FOREIGN KEY (session_id) REFERENCES sessions(session_id)
 );
 
@@ -55,6 +57,7 @@ CREATE TABLE IF NOT EXISTS learnings (
   prevention TEXT,
   tags TEXT,
   importance INTEGER DEFAULT 5 CHECK (importance BETWEEN 1 AND 10),
+  provenance TEXT CHECK (provenance IN ('verbatim', 'user_authored', 'extracted', 'derived')),
   FOREIGN KEY (session_id) REFERENCES sessions(session_id)
 );
 
@@ -67,6 +70,7 @@ CREATE TABLE IF NOT EXISTS breadcrumbs (
   category TEXT,
   project TEXT,
   importance INTEGER DEFAULT 5 CHECK (importance BETWEEN 1 AND 10),
+  provenance TEXT CHECK (provenance IN ('verbatim', 'user_authored', 'extracted', 'derived')),
   expires_at DATETIME,
   FOREIGN KEY (session_id) REFERENCES sessions(session_id)
 );
@@ -92,6 +96,7 @@ CREATE TABLE IF NOT EXISTS loa_entries (
   tags TEXT,
   message_count INTEGER,
   importance INTEGER DEFAULT 8 CHECK (importance BETWEEN 1 AND 10),
+  provenance TEXT CHECK (provenance IN ('verbatim', 'user_authored', 'extracted', 'derived')),
   FOREIGN KEY (parent_loa_id) REFERENCES loa_entries(id),
   FOREIGN KEY (message_range_start) REFERENCES messages(id),
   FOREIGN KEY (message_range_end) REFERENCES messages(id)
diff --git a/src/index.ts b/src/index.ts
index 2a0ab42..e36e94b 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -24,6 +24,7 @@ import { runCluster } from './commands/cluster.js';
 import { runEmbedBackfill, runSemanticSearch, runEmbedStats, runHybridSearch } from './commands/embed.js';
 import { runDoctor } from './commands/doctor.js';
 import { runImportanceBackfill, runPin, runUnpin } from './commands/importance.js';
+import { runProvenanceBackfill } from './commands/provenance.js';
 import { runBenchmark, listBenchmarks, reportLatestBenchmark } from './commands/benchmark.js';
 import { runOnboard } from './commands/onboard.js';
 import { runMigrate } from './commands/migrate.js';
@@ -177,12 +178,14 @@ program
   .option('-t, --table <table>', 'Hard-filter to one table (messages, loa, decisions, learnings, breadcrumbs)')
   .option('--bias-type <table>', 'Softly boost one table without filtering others (messages, loa, decisions, learnings, breadcrumbs)')
   .option('-l, --limit <n>', 'Max results', '20')
+  .option('--show-provenance', 'Show provenance for every result (default: only unknown provenance is flagged)')
   .action((query, options) => {
     runSearch(query, {
       project: options.project,
       table: options.table,
       biasType: options.biasType,
-      limit: parseInt(options.limit, 10)
+      limit: parseInt(options.limit, 10),
+      showProvenance: options.showProvenance
     });
     closeDb();
   });
@@ -535,6 +538,27 @@ importanceCmd
     closeDb();
   });
 
+// recall provenance — conservative backfill for Record Provenance (ADR-0001).
+// Provenance is automatic write-path metadata: there is intentionally no
+// flag to set it on add commands; this maintenance path only classifies
+// legacy NULL rows where deterministic evidence exists.
+const provenanceCmd = program
+  .command('provenance')
+  .description('Manage Record Provenance metadata on memory records');
+
+provenanceCmd
+  .command('backfill')
+  .description('Classify legacy rows with unknown provenance using deterministic write-path evidence (dry-run by default; never guesses)')
+  .option('--execute', 'Apply changes (default is dry-run)')
+  .option('-t, --table <table>', 'Target table: messages, decisions, learnings, breadcrumbs, loa_entries, all', 'all')
+  .action((options) => {
+    runProvenanceBackfill({
+      dryRun: !options.execute,
+      table: options.table
+    });
+    closeDb();
+  });
+
 // recall pin <table> <id> [importance] — force a record to a high importance (default 10)
 program
   .command('pin <table> <id> [importance]')
@@ -620,7 +644,7 @@ program
   .option('-k, --keyword', 'Use keyword search only (FTS5)')
   .option('-v, --vector', 'Use vector search only (semantic)')
   .action(async (query, options) => {
-    if (query && !['init', 'add', 'search', 'recent', 'show', 'stats', 'import', 'import-conversations', 'loa', 'telos', 'docs', 'dump', 'embed', 'semantic', 'hybrid', 'doctor', 'importance', 'pin', 'unpin', 'decision', 'prune', 'cluster', 'import-legacy', 'benchmark', 'onboard', 'migrate', 'path'].includes(query)) {
+    if (query && !['init', 'add', 'search', 'recent', 'show', 'stats', 'import', 'import-conversations', 'loa', 'telos', 'docs', 'dump', 'embed', 'semantic', 'hybrid', 'doctor', 'importance', 'provenance', 'pin', 'unpin', 'decision', 'prune', 'cluster', 'import-legacy', 'benchmark', 'onboard', 'migrate', 'path'].includes(query)) {
       if (options.keyword) {
         // FTS5 only
         runSearch(query, {
diff --git a/src/lib/conversation-import.ts b/src/lib/conversation-import.ts
index 382b790..8e1c70c 100644
--- a/src/lib/conversation-import.ts
+++ b/src/lib/conversation-import.ts
@@ -567,7 +567,9 @@ export async function importConversations(
         source: session.source,
       });
 
-      const count = addMessagesBatch(session.messages);
+      // Raw conversation capture is verbatim (ADR-0001); the structured
+      // extraction below stamps its own records as extracted.
+      const count = addMessagesBatch(session.messages.map(m => ({ ...m, provenance: 'verbatim' as const })));
       result.sessionsImported++;
       result.messagesImported += count;
 
diff --git a/src/lib/import.ts b/src/lib/import.ts
index 66d9785..dd7c930 100644
--- a/src/lib/import.ts
+++ b/src/lib/import.ts
@@ -184,8 +184,8 @@ export function importAllSessions(options?: { dryRun?: boolean; verbose?: boolea
         summary: `Imported from ${basename(file)}`
       });
 
-      // Insert messages in batch
-      const count = addMessagesBatch(parsed.messages);
+      // Insert messages in batch — raw conversation capture is verbatim (ADR-0001)
+      const count = addMessagesBatch(parsed.messages.map(m => ({ ...m, provenance: 'verbatim' as const })));
 
       result.sessionsImported++;
       result.messagesImported += count;
diff --git a/src/lib/memory.ts b/src/lib/memory.ts
index f3695a5..eb89ce3 100644
--- a/src/lib/memory.ts
+++ b/src/lib/memory.ts
@@ -2,7 +2,7 @@
 
 import { getDb, getDbPath } from '../db/connection.js';
 import { existsSync, statSync } from 'fs';
-import type { Session, Message, Decision, Learning, Breadcrumb, LoaEntry, Stats, SearchResult } from '../types/index.js';
+import type { Session, Message, Decision, Learning, Breadcrumb, LoaEntry, Stats, SearchResult, Provenance } from '../types/index.js';
 
 // ============ Sessions ============
 
@@ -48,8 +48,8 @@ export function endSession(sessionId: string, summary?: string): void {
 export function addMessage(message: Omit<Message, 'id'>): number {
   const db = getDb();
   const stmt = db.prepare(`
-    INSERT INTO messages (session_id, timestamp, role, content, project, importance)
-    VALUES ($session_id, $timestamp, $role, $content, $project, $importance)
+    INSERT INTO messages (session_id, timestamp, role, content, project, importance, provenance)
+    VALUES ($session_id, $timestamp, $role, $content, $project, $importance, $provenance)
   `);
   const result = stmt.run({
     $session_id: message.session_id,
@@ -57,7 +57,8 @@ export function addMessage(message: Omit<Message, 'id'>): number {
     $role: message.role,
     $content: message.content,
     $project: message.project || null,
-    $importance: clampImportance(message.importance, 5)
+    $importance: clampImportance(message.importance, 5),
+    $provenance: message.provenance ?? null
   });
   return result.lastInsertRowid as number;
 }
@@ -65,8 +66,8 @@ export function addMessage(message: Omit<Message, 'id'>): number {
 export function addMessagesBatch(messages: Omit<Message, 'id'>[]): number {
   const db = getDb();
   const stmt = db.prepare(`
-    INSERT INTO messages (session_id, timestamp, role, content, project, importance)
-    VALUES ($session_id, $timestamp, $role, $content, $project, $importance)
+    INSERT INTO messages (session_id, timestamp, role, content, project, importance, provenance)
+    VALUES ($session_id, $timestamp, $role, $content, $project, $importance, $provenance)
   `);
 
   const insertMany = db.transaction((msgs: Omit<Message, 'id'>[]) => {
@@ -78,7 +79,8 @@ export function addMessagesBatch(messages: Omit<Message, 'id'>[]): number {
         $role: msg.role,
         $content: msg.content,
         $project: msg.project || null,
-        $importance: clampImportance(msg.importance, 5)
+        $importance: clampImportance(msg.importance, 5),
+        $provenance: msg.provenance ?? null
       });
       count++;
     }
@@ -113,8 +115,8 @@ export function pinRecord(table: 'decisions' | 'learnings' | 'breadcrumbs' | 'lo
 export function addDecision(decision: Omit<Decision, 'id' | 'created_at'>): number {
   const db = getDb();
   const stmt = db.prepare(`
-    INSERT INTO decisions (session_id, category, project, decision, reasoning, alternatives, status, confidence, importance)
-    VALUES ($session_id, $category, $project, $decision, $reasoning, $alternatives, $status, $confidence, $importance)
+    INSERT INTO decisions (session_id, category, project, decision, reasoning, alternatives, status, confidence, importance, provenance)
+    VALUES ($session_id, $category, $project, $decision, $reasoning, $alternatives, $status, $confidence, $importance, $provenance)
   `);
   const result = stmt.run({
     $session_id: decision.session_id || null,
@@ -125,7 +127,8 @@ export function addDecision(decision: Omit<Decision, 'id' | 'created_at'>): numb
     $alternatives: decision.alternatives || null,
     $status: decision.status || 'active',
     $confidence: decision.confidence || 'medium',
-    $importance: clampImportance(decision.importance, 5)
+    $importance: clampImportance(decision.importance, 5),
+    $provenance: decision.provenance ?? null
   });
   return result.lastInsertRowid as number;
 }
@@ -207,8 +210,8 @@ export function findSimilarDecisions(text: string, limit = 3): Decision[] {
 export function addLearning(learning: Omit<Learning, 'id' | 'created_at'>): number {
   const db = getDb();
   const stmt = db.prepare(`
-    INSERT INTO learnings (session_id, category, project, problem, solution, prevention, tags, confidence, importance)
-    VALUES ($session_id, $category, $project, $problem, $solution, $prevention, $tags, $confidence, $importance)
+    INSERT INTO learnings (session_id, category, project, problem, solution, prevention, tags, confidence, importance, provenance)
+    VALUES ($session_id, $category, $project, $problem, $solution, $prevention, $tags, $confidence, $importance, $provenance)
   `);
   const result = stmt.run({
     $session_id: learning.session_id || null,
@@ -219,7 +222,8 @@ export function addLearning(learning: Omit<Learning, 'id' | 'created_at'>): numb
     $prevention: learning.prevention || null,
     $tags: learning.tags || null,
     $confidence: learning.confidence || 'medium',
-    $importance: clampImportance(learning.importance, 5)
+    $importance: clampImportance(learning.importance, 5),
+    $provenance: learning.provenance ?? null
   });
   return result.lastInsertRowid as number;
 }
@@ -234,8 +238,8 @@ export function getLearning(id: number): Learning | undefined {
 export function addBreadcrumb(breadcrumb: Omit<Breadcrumb, 'id' | 'created_at'>): number {
   const db = getDb();
   const stmt = db.prepare(`
-    INSERT INTO breadcrumbs (session_id, content, category, project, importance, expires_at)
-    VALUES ($session_id, $content, $category, $project, $importance, $expires_at)
+    INSERT INTO breadcrumbs (session_id, content, category, project, importance, expires_at, provenance)
+    VALUES ($session_id, $content, $category, $project, $importance, $expires_at, $provenance)
   `);
   const result = stmt.run({
     $session_id: breadcrumb.session_id || null,
@@ -243,7 +247,8 @@ export function addBreadcrumb(breadcrumb: Omit<Breadcrumb, 'id' | 'created_at'>)
     $category: breadcrumb.category || null,
     $project: breadcrumb.project || null,
     $importance: breadcrumb.importance ?? 5,
-    $expires_at: breadcrumb.expires_at || null
+    $expires_at: breadcrumb.expires_at || null,
+    $provenance: breadcrumb.provenance ?? null
   });
   return result.lastInsertRowid as number;
 }
@@ -304,7 +309,7 @@ export function search(query: string, options?: MemorySearchOptions): SearchResu
     switch (table) {
       case 'messages':
         sql = `
-          SELECT m.id, m.content, m.project, m.timestamp as created_at, f.rank
+          SELECT m.id, m.content, m.project, m.timestamp as created_at, m.provenance, f.rank
           FROM messages_fts f
           JOIN messages m ON m.id = f.rowid
           WHERE messages_fts MATCH ?
@@ -315,7 +320,7 @@ export function search(query: string, options?: MemorySearchOptions): SearchResu
         break;
       case 'decisions':
         sql = `
-          SELECT d.id, d.decision as content, d.project, d.created_at, f.rank
+          SELECT d.id, d.decision as content, d.project, d.created_at, d.provenance, f.rank
           FROM decisions_fts f
           JOIN decisions d ON d.id = f.rowid
           WHERE decisions_fts MATCH ?
@@ -327,7 +332,7 @@ export function search(query: string, options?: MemorySearchOptions): SearchResu
         break;
       case 'learnings':
         sql = `
-          SELECT l.id, l.problem as content, l.project, l.created_at, f.rank
+          SELECT l.id, l.problem as content, l.project, l.created_at, l.provenance, f.rank
           FROM learnings_fts f
           JOIN learnings l ON l.id = f.rowid
           WHERE learnings_fts MATCH ?
@@ -338,7 +343,7 @@ export function search(query: string, options?: MemorySearchOptions): SearchResu
         break;
       case 'breadcrumbs':
         sql = `
-          SELECT b.id, b.content, b.project, b.created_at, f.rank
+          SELECT b.id, b.content, b.project, b.created_at, b.provenance, f.rank
           FROM breadcrumbs_fts f
           JOIN breadcrumbs b ON b.id = f.rowid
           WHERE breadcrumbs_fts MATCH ?
@@ -349,7 +354,7 @@ export function search(query: string, options?: MemorySearchOptions): SearchResu
         break;
       case 'loa':
         sql = `
-          SELECT l.id, l.title || ': ' || SUBSTR(l.fabric_extract, 1, 200) as content, l.project, l.created_at, f.rank
+          SELECT l.id, l.title || ': ' || SUBSTR(l.fabric_extract, 1, 200) as content, l.project, l.created_at, l.provenance, f.rank
           FROM loa_fts f
           JOIN loa_entries l ON l.id = f.rowid
           WHERE loa_fts MATCH ?
@@ -373,6 +378,7 @@ export function search(query: string, options?: MemorySearchOptions): SearchResu
         content: string;
         project: string | null;
         created_at: string;
+        provenance: Provenance | null;
         rank: number;
       }>;
 
@@ -383,6 +389,7 @@ export function search(query: string, options?: MemorySearchOptions): SearchResu
           content: row.content,
           project: row.project || undefined,
           created_at: row.created_at,
+          provenance: row.provenance ?? null,
           rank: row.rank
         });
       }
@@ -453,8 +460,8 @@ export function createLoaEntry(entry: Omit<LoaEntry, 'id' | 'created_at'>): numb
   // so a careless caller cannot demote curated knowledge below neutral.
   const importance = Math.max(5, clampImportance(entry.importance, 8));
   const stmt = db.prepare(`
-    INSERT INTO loa_entries (title, description, fabric_extract, message_range_start, message_range_end, parent_loa_id, session_id, project, tags, message_count, importance)
-    VALUES ($title, $description, $fabric_extract, $message_range_start, $message_range_end, $parent_loa_id, $session_id, $project, $tags, $message_count, $importance)
+    INSERT INTO loa_entries (title, description, fabric_extract, message_range_start, message_range_end, parent_loa_id, session_id, project, tags, message_count, importance, provenance)
+    VALUES ($title, $description, $fabric_extract, $message_range_start, $message_range_end, $parent_loa_id, $session_id, $project, $tags, $message_count, $importance, $provenance)
   `);
   const result = stmt.run({
     $title: entry.title,
@@ -467,7 +474,8 @@ export function createLoaEntry(entry: Omit<LoaEntry, 'id' | 'created_at'>): numb
     $project: entry.project || null,
     $tags: entry.tags || null,
     $message_count: entry.message_count || null,
-    $importance: importance
+    $importance: importance,
+    $provenance: entry.provenance ?? null
   });
   return result.lastInsertRowid as number;
 }
diff --git a/src/lib/structured-extraction.ts b/src/lib/structured-extraction.ts
index f262870..c314dfe 100644
--- a/src/lib/structured-extraction.ts
+++ b/src/lib/structured-extraction.ts
@@ -164,6 +164,7 @@ function writeLoa(ctx: StructuredExtractionContext): number {
     project: ctx.project,
     tags: ctx.topics.join(','),
     message_count: ctx.messageCount ?? range.count,
+    provenance: 'extracted',
   });
 }
 
@@ -194,6 +195,7 @@ export function writeStructuredExtraction(ctx: StructuredExtractionContext): Str
         decision: item.decision,
         status: 'active',
         confidence: item.confidence,
+        provenance: 'extracted',
       });
       result.decisions++;
     }
@@ -211,6 +213,7 @@ export function writeStructuredExtraction(ctx: StructuredExtractionContext): Str
         solution: item.solution,
         tags: ctx.sessionLabel,
         confidence: 'medium',
+        provenance: 'extracted',
       });
       result.learnings++;
     }
@@ -226,6 +229,7 @@ export function writeStructuredExtraction(ctx: StructuredExtractionContext): Str
         project: ctx.project,
         content,
         importance: 5,
+        provenance: 'extracted',
       });
       result.breadcrumbs++;
     }
diff --git a/src/mcp-server.ts b/src/mcp-server.ts
index 60d53a2..958d001 100644
--- a/src/mcp-server.ts
+++ b/src/mcp-server.ts
@@ -66,8 +66,15 @@ import {
 	reciprocalRankFusion,
 	checkEmbeddingService,
 } from "./lib/embeddings.js";
+import type { Provenance } from "./types/index.js";
 import { existsSync } from "fs";
 
+// Record Provenance display (ADR-0001): structured results always carry
+// provenance; legacy NULL is reported as "unknown", never guessed.
+function provenanceLabel(provenance: Provenance | null | undefined): string {
+	return `provenance: ${provenance ?? "unknown"}`;
+}
+
 /**
  * Hybrid search combining FTS5 + vector embeddings with RRF fusion
  * Used by context_for_agent and memory_hybrid_search
@@ -82,6 +89,7 @@ async function hybridSearch(
 		content: string;
 		score: number;
 		source: "fts" | "vec" | "both";
+		provenance: Provenance | null;
 	}>;
 	embeddingsAvailable: boolean;
 }> {
@@ -159,6 +167,7 @@ async function hybridSearch(
 				content: string;
 				score: number;
 				source: "fts" | "vec" | "both";
+				provenance: Provenance | null;
 			}
 		>();
 
@@ -171,6 +180,7 @@ async function hybridSearch(
 				content: r.content,
 				score,
 				source: "fts",
+				provenance: r.provenance ?? null,
 			});
 		}
 
@@ -182,25 +192,29 @@ async function hybridSearch(
 			} else {
 				// Need to fetch content
 				let content = "";
+				let provenance: Provenance | null = null;
 				if (r.source_table === "loa_entries") {
 					const loa = db
 						.prepare(
-							"SELECT title, fabric_extract FROM loa_entries WHERE id = ?",
+							"SELECT title, fabric_extract, provenance FROM loa_entries WHERE id = ?",
 						)
 						.get(r.source_id) as any;
 					content = loa
 						? `${loa.title}: ${loa.fabric_extract?.slice(0, 200)}`
 						: "";
+					provenance = loa?.provenance ?? null;
 				} else if (r.source_table === "decisions") {
 					const dec = db
-						.prepare("SELECT decision FROM decisions WHERE id = ?")
+						.prepare("SELECT decision, provenance FROM decisions WHERE id = ?")
 						.get(r.source_id) as any;
 					content = dec?.decision || "";
+					provenance = dec?.provenance ?? null;
 				} else if (r.source_table === "messages") {
 					const msg = db
-						.prepare("SELECT content FROM messages WHERE id = ?")
+						.prepare("SELECT content, provenance FROM messages WHERE id = ?")
 						.get(r.source_id) as any;
 					content = msg?.content?.slice(0, 200) || "";
+					provenance = msg?.provenance ?? null;
 				}
 
 				resultMap.set(key, {
@@ -209,6 +223,7 @@ async function hybridSearch(
 					content,
 					score: fusedScores.get(key) || 0,
 					source: "vec",
+					provenance,
 				});
 			}
 		}
@@ -229,6 +244,7 @@ async function hybridSearch(
 				content: r.content,
 				score: r.rank || 0,
 				source: "fts" as const,
+				provenance: r.provenance ?? null,
 			}))
 			.slice(0, limit),
 		embeddingsAvailable: false,
@@ -288,7 +304,7 @@ server.tool(
 						r.content.length > 200
 							? r.content.slice(0, 200) + "..."
 							: r.content;
-					return `[${r.table}#${r.id}] ${r.project || "no-project"} | ${r.created_at}\n${preview}`;
+					return `[${r.table}#${r.id}] ${r.project || "no-project"} | ${r.created_at} | ${provenanceLabel(r.provenance)}\n${preview}`;
 				})
 				.join("\n\n---\n\n");
 
@@ -356,7 +372,7 @@ server.tool(
 							? r.content.slice(0, 200) + "..."
 							: r.content;
 					const score = (r.score * 100).toFixed(1);
-					return `${score}% ${sourceTag} [${r.table}#${r.id}]\n${preview}`;
+					return `${score}% ${sourceTag} [${r.table}#${r.id}] | ${provenanceLabel(r.provenance)}\n${preview}`;
 				})
 				.join("\n\n---\n\n");
 
@@ -405,7 +421,7 @@ server.tool(
 				output += "### Library of Alexandria (Curated Knowledge)\n";
 				for (const e of loa) {
 					const preview = e.fabric_extract.slice(0, 300).replace(/\n/g, " ");
-					output += `- **LoA #${e.id}** [${e.project || "no-project"}] ${e.created_at?.split("T")[0]}: ${e.title}\n  ${preview}...\n`;
+					output += `- **LoA #${e.id}** [${e.project || "no-project"}] ${e.created_at?.split("T")[0]} (${provenanceLabel(e.provenance)}): ${e.title}\n  ${preview}...\n`;
 				}
 				output += "\n";
 			}
@@ -413,7 +429,7 @@ server.tool(
 			if (decisions.length > 0) {
 				output += "### Recent Decisions\n";
 				for (const d of decisions) {
-					output += `- **#${d.id}** [${d.project || "no-project"}]: ${d.decision}${d.reasoning ? ` (${d.reasoning})` : ""}\n`;
+					output += `- **#${d.id}** [${d.project || "no-project"}] (${provenanceLabel(d.provenance)}): ${d.decision}${d.reasoning ? ` (${d.reasoning})` : ""}\n`;
 				}
 				output += "\n";
 			}
@@ -421,7 +437,7 @@ server.tool(
 			if (breadcrumbs.length > 0) {
 				output += "### Breadcrumbs\n";
 				for (const b of breadcrumbs) {
-					output += `- **#${b.id}** [${b.project || "no-project"}]: ${b.content}\n`;
+					output += `- **#${b.id}** [${b.project || "no-project"}] (${provenanceLabel(b.provenance)}): ${b.content}\n`;
 				}
 				output += "\n";
 			}
@@ -555,6 +571,9 @@ server.tool(
 						}
 					}
 
+					// ADR-0001: provenance is stamped from the write path. memory_add
+					// deliberately exposes no provenance parameter — agents must not
+					// be able to launder extracted content as something else.
 					id = addDecision({
 						decision: content,
 						reasoning: detail,
@@ -562,6 +581,7 @@ server.tool(
 						status: "active",
 						confidence: confidence || "medium",
 						importance,
+						provenance: "user_authored",
 					});
 
 					let resultText = `Added decision #${id}: ${content}`;
@@ -583,6 +603,7 @@ server.tool(
 						tags,
 						confidence: confidence || "medium",
 						importance,
+						provenance: "user_authored",
 					});
 					return {
 						content: [
@@ -595,6 +616,7 @@ server.tool(
 						content,
 						project,
 						importance: importance ?? 5,
+						provenance: "user_authored",
 					});
 					return {
 						content: [
diff --git a/src/types/index.ts b/src/types/index.ts
index 3601818..e7635b3 100644
--- a/src/types/index.ts
+++ b/src/types/index.ts
@@ -1,5 +1,14 @@
 // Core types for RECALL
 
+// Record Provenance (ADR-0001, CONTEXT.md): the declared origin and
+// transformation level of a memory record. Automatic write-path metadata —
+// never a public MCP parameter or CLI classification input. Survivor-order
+// vocabulary: user_authored > verbatim > extracted > derived. Legacy unknown
+// is NULL/absent, never guessed. `derived` is reserved for future paths that
+// mechanically produce records from existing memory records.
+export const PROVENANCE_VALUES = ['user_authored', 'verbatim', 'extracted', 'derived'] as const;
+export type Provenance = typeof PROVENANCE_VALUES[number];
+
 export interface Session {
   id?: number;
   session_id: string;
@@ -21,6 +30,7 @@ export interface Message {
   content: string;
   project?: string;
   importance?: number;
+  provenance?: Provenance | null;
 }
 
 export interface Decision {
@@ -35,6 +45,7 @@ export interface Decision {
   status: 'active' | 'superseded' | 'reverted';
   confidence?: 'high' | 'medium' | 'low';
   importance?: number;
+  provenance?: Provenance | null;
 }
 
 export interface Learning {
@@ -49,6 +60,7 @@ export interface Learning {
   tags?: string;
   confidence?: 'high' | 'medium' | 'low';
   importance?: number;
+  provenance?: Provenance | null;
 }
 
 export interface Breadcrumb {
@@ -60,6 +72,7 @@ export interface Breadcrumb {
   project?: string;
   importance: number;
   expires_at?: string;
+  provenance?: Provenance | null;
 }
 
 export interface LoaEntry {
@@ -76,6 +89,7 @@ export interface LoaEntry {
   tags?: string;
   message_count?: number;
   importance?: number;
+  provenance?: Provenance | null;
 }
 
 export interface SearchResult {
@@ -85,6 +99,7 @@ export interface SearchResult {
   project?: string;
   created_at: string;
   rank?: number;
+  provenance?: Provenance | null;
 }
 
 export interface Stats {
diff --git a/tests/db/migrations.test.ts b/tests/db/migrations.test.ts
index 43020e0..8a52d55 100644
--- a/tests/db/migrations.test.ts
+++ b/tests/db/migrations.test.ts
@@ -110,10 +110,85 @@ describe('migration failure handling', () => {
   });
 });
 
+describe('provenance migration (8 to 9)', () => {
+  const PROVENANCE_TABLES = ['messages', 'decisions', 'learnings', 'breadcrumbs', 'loa_entries'];
+
+  test('all memory tables have provenance column after migrations', () => {
+    applyMigrations(db);
+    for (const table of PROVENANCE_TABLES) {
+      const cols = db.prepare(`PRAGMA table_info(${table})`).all() as any[];
+      expect(cols.map((c: any) => c.name)).toContain('provenance');
+    }
+  });
+
+  test('upgrade path: ALTER adds provenance to a legacy table without it', () => {
+    // Simulate a pre-provenance install: legacy table shape, version 8.
+    const legacyDir = mkdtempSync(join(tmpdir(), 'recall-legacy-test-'));
+    const legacyDb = new Database(join(legacyDir, 'legacy.db'));
+    try {
+      legacyDb.exec(`
+        CREATE TABLE messages (
+          id INTEGER PRIMARY KEY AUTOINCREMENT,
+          session_id TEXT NOT NULL,
+          timestamp DATETIME NOT NULL,
+          role TEXT NOT NULL,
+          content TEXT NOT NULL,
+          project TEXT,
+          importance INTEGER DEFAULT 5
+        );
+        CREATE TABLE decisions (id INTEGER PRIMARY KEY AUTOINCREMENT, decision TEXT NOT NULL);
+        CREATE TABLE learnings (id INTEGER PRIMARY KEY AUTOINCREMENT, problem TEXT NOT NULL);
+        CREATE TABLE breadcrumbs (id INTEGER PRIMARY KEY AUTOINCREMENT, content TEXT NOT NULL);
+        CREATE TABLE loa_entries (id INTEGER PRIMARY KEY AUTOINCREMENT, title TEXT NOT NULL, fabric_extract TEXT NOT NULL);
+      `);
+      legacyDb.prepare('INSERT INTO messages (session_id, timestamp, role, content) VALUES (?, ?, ?, ?)')
+        .run('s1', '2026-01-01T00:00:00Z', 'user', 'legacy row');
+      legacyDb.prepare('PRAGMA user_version = 8').run();
+
+      const result = applyMigrations(legacyDb);
+      expect(result.from).toBe(8);
+      expect(getMigrationVersion(legacyDb)).toBe(MIGRATIONS.length);
+
+      for (const table of PROVENANCE_TABLES) {
+        const cols = legacyDb.prepare(`PRAGMA table_info(${table})`).all() as any[];
+        expect(cols.map((c: any) => c.name)).toContain('provenance');
+      }
+
+      // Legacy rows stay NULL — unknown is never laundered into a value.
+      const row = legacyDb.prepare('SELECT provenance FROM messages WHERE session_id = ?').get('s1') as any;
+      expect(row.provenance).toBeNull();
+
+      // CHECK on the ALTERed column enforces the vocabulary but allows NULL.
+      expect(() => {
+        legacyDb.prepare('INSERT INTO messages (session_id, timestamp, role, content, provenance) VALUES (?, ?, ?, ?, ?)')
+          .run('s1', '2026-01-01T00:00:01Z', 'user', 'bad', 'guessed');
+      }).toThrow();
+      legacyDb.prepare('INSERT INTO messages (session_id, timestamp, role, content, provenance) VALUES (?, ?, ?, ?, ?)')
+        .run('s1', '2026-01-01T00:00:02Z', 'user', 'ok', 'verbatim');
+    } finally {
+      legacyDb.close();
+      rmSync(legacyDir, { recursive: true, force: true });
+    }
+  });
+
+  test('CHECK constraint enforces vocabulary on fresh-install DDL', () => {
+    applyMigrations(db);
+    const insert = (provenance: string | null) =>
+      db.prepare('INSERT INTO breadcrumbs (content, provenance) VALUES (?, ?)').run('x', provenance);
+
+    for (const valid of ['verbatim', 'user_authored', 'extracted', 'derived', null]) {
+      expect(() => insert(valid)).not.toThrow();
+    }
+    expect(() => insert('unknown')).toThrow();
+    expect(() => insert('VERBATIM')).toThrow();
+  });
+});
+
 describe('MIGRATIONS array', () => {
   test('has expected number of migrations', () => {
     // 7 → 8: importance column on messages/decisions/learnings/loa_entries (Sprint #4)
-    expect(MIGRATIONS.length).toBe(8);
+    // 8 → 9: provenance column on all five memory tables (issue #42)
+    expect(MIGRATIONS.length).toBe(9);
   });
 
   test('all entries are functions', () => {

From 7a938426355e069f2f42e9ad11179a9197e99270 Mon Sep 17 00:00:00 2001
From: Ed Heltzel <402910+edheltzel@users.noreply.github.com>
Date: Wed, 10 Jun 2026 20:41:39 -0400
Subject: [PATCH 2/3] test(provenance): cover write paths, backfill, search
 display, and result payloads

- backfill: dry-run default writes nothing, --execute classifies only
  evidence-backed rows, never overwrites, idempotent, table filter,
  unknown-table rejection
- write paths: CLI add stamps user_authored, structured extraction
  stamps extracted, batch message capture persists verbatim, unstamped
  writes stay NULL
- hooks: sqlite-writers stamp extracted + legacy-DB column guard,
  PreCompact flush stamps verbatim + pre-provenance DB guard
- conversation import: raw messages stamped verbatim
- search(): provenance present for all five record types, NULL as null
- CLI display contract: quiet for known, flags unknown, --show-provenance
- ADR-0001 contract pins: MCP memory_add schema and CLI expose no
  provenance override

Refs #42
---
 tests/commands/provenance.test.ts        | 169 +++++++++++++++++++++++
 tests/commands/search.test.ts            |  53 ++++++-
 tests/hooks/recall-precompact.test.ts    |  32 +++++
 tests/hooks/sqlite-writers.test.ts       |  45 ++++++
 tests/lib/conversation-import.test.ts    |  18 +++
 tests/lib/provenance-write-paths.test.ts | 168 ++++++++++++++++++++++
 6 files changed, 484 insertions(+), 1 deletion(-)
 create mode 100644 tests/commands/provenance.test.ts
 create mode 100644 tests/lib/provenance-write-paths.test.ts

diff --git a/tests/commands/provenance.test.ts b/tests/commands/provenance.test.ts
new file mode 100644
index 0000000..d98bbc5
--- /dev/null
+++ b/tests/commands/provenance.test.ts
@@ -0,0 +1,169 @@
+// recall provenance backfill — conservative legacy classification (issue #42, ADR-0001).
+//
+// Binding rules under test:
+// - dry-run is the default and writes nothing
+// - --execute only sets provenance where deterministic evidence exists
+// - rows without evidence stay NULL (unknown), never guessed
+// - rows that already have provenance are never overwritten
+// - user_authored is never assigned by backfill
+
+import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
+import { Database } from 'bun:sqlite';
+import { setupTestDb, teardownTestDb } from '../helpers/setup';
+import { runProvenanceBackfill } from '../../src/commands/provenance';
+import {
+  createSession,
+  addMessage,
+  addDecision,
+  addLearning,
+  addBreadcrumb,
+  createLoaEntry,
+} from '../../src/lib/memory';
+
+let dbPath: string;
+const originalLog = console.log;
+
+beforeEach(() => {
+  dbPath = setupTestDb();
+  console.log = () => {}; // backfill prints a report; keep test output clean
+});
+
+afterEach(() => {
+  console.log = originalLog;
+  teardownTestDb();
+});
+
+function readDb(): Database {
+  return new Database(dbPath, { readonly: true });
+}
+
+/** Seeds one legacy (NULL-provenance) landscape across all five tables. */
+function seedLegacyRows(): void {
+  createSession({ session_id: 's1', started_at: '2026-01-01T00:00:00Z', project: 'demo' });
+
+  // messages: all legacy rows are deterministic 'verbatim'
+  addMessage({ session_id: 's1', timestamp: '2026-01-01T00:00:01Z', role: 'user', content: 'legacy message' });
+
+  // decisions: evidence marker is category = 'auto-extracted'
+  addDecision({ session_id: 's1', decision: 'extracted decision', category: 'auto-extracted', status: 'active' });
+  addDecision({ session_id: 's1', decision: 'unmarked decision', category: 'manual', status: 'active' });
+  addDecision({ session_id: 's1', decision: 'already stamped', status: 'active', provenance: 'user_authored' });
+
+  // learnings: evidence marker is category = 'auto-extracted'
+  addLearning({ session_id: 's1', problem: 'extracted problem', solution: 'fix', category: 'auto-extracted' });
+  addLearning({ session_id: 's1', problem: 'unmarked problem', solution: 'fix', category: 'other' });
+
+  // breadcrumbs: evidence marker is category = 'extracted-idea'
+  addBreadcrumb({ session_id: 's1', content: 'extracted idea', category: 'extracted-idea', importance: 5 });
+  addBreadcrumb({ session_id: 's1', content: 'unmarked note', category: 'note', importance: 5 });
+
+  // loa_entries: all legacy rows are deterministic 'extracted'
+  createLoaEntry({ title: 'legacy loa', fabric_extract: 'extract body', session_id: 's1' });
+}
+
+describe('runProvenanceBackfill — dry run (default)', () => {
+  test('reports classifications without writing anything', () => {
+    seedLegacyRows();
+
+    const results = runProvenanceBackfill({});
+
+    expect(results.length).toBe(5);
+    const byTable = Object.fromEntries(results.map(r => [r.table, r]));
+    expect(byTable.messages.classified).toBe(1);
+    expect(byTable.messages.value).toBe('verbatim');
+    expect(byTable.loa_entries.classified).toBe(1);
+    expect(byTable.loa_entries.value).toBe('extracted');
+    // only the evidence-marked rows qualify; pre-stamped row is not "unknown"
+    expect(byTable.decisions.unknownBefore).toBe(2);
+    expect(byTable.decisions.classified).toBe(1);
+    expect(byTable.decisions.remainingUnknown).toBe(1);
+    expect(byTable.learnings.classified).toBe(1);
+    expect(byTable.breadcrumbs.classified).toBe(1);
+
+    // Nothing was written
+    const db = readDb();
+    const nullCount = (table: string) =>
+      (db.prepare(`SELECT COUNT(*) AS c FROM ${table} WHERE provenance IS NULL`).get() as { c: number }).c;
+    expect(nullCount('messages')).toBe(1);
+    expect(nullCount('decisions')).toBe(2);
+    expect(nullCount('learnings')).toBe(2);
+    expect(nullCount('breadcrumbs')).toBe(2);
+    expect(nullCount('loa_entries')).toBe(1);
+    db.close();
+  });
+});
+
+describe('runProvenanceBackfill — execute', () => {
+  test('classifies only evidence-backed rows; the rest stay NULL', () => {
+    seedLegacyRows();
+
+    runProvenanceBackfill({ dryRun: false });
+
+    const db = readDb();
+    const provenanceOf = (table: string, where: string) =>
+      (db.prepare(`SELECT provenance FROM ${table} WHERE ${where}`).get() as any)?.provenance;
+
+    expect(provenanceOf('messages', "content = 'legacy message'")).toBe('verbatim');
+    expect(provenanceOf('loa_entries', "title = 'legacy loa'")).toBe('extracted');
+
+    expect(provenanceOf('decisions', "decision = 'extracted decision'")).toBe('extracted');
+    expect(provenanceOf('decisions', "decision = 'unmarked decision'")).toBeNull();
+    // never overwritten, and user_authored is never assigned by backfill
+    expect(provenanceOf('decisions', "decision = 'already stamped'")).toBe('user_authored');
+
+    expect(provenanceOf('learnings', "problem = 'extracted problem'")).toBe('extracted');
+    expect(provenanceOf('learnings', "problem = 'unmarked problem'")).toBeNull();
+
+    expect(provenanceOf('breadcrumbs', "content = 'extracted idea'")).toBe('extracted');
+    expect(provenanceOf('breadcrumbs', "content = 'unmarked note'")).toBeNull();
+    db.close();
+  });
+
+  test('is idempotent: a second execute classifies nothing new', () => {
+    seedLegacyRows();
+    runProvenanceBackfill({ dryRun: false });
+
+    const second = runProvenanceBackfill({ dryRun: false });
+    for (const r of second) {
+      expect(r.classified).toBe(0);
+    }
+  });
+
+  test('table filter limits the run to one table', () => {
+    seedLegacyRows();
+
+    const results = runProvenanceBackfill({ dryRun: false, table: 'decisions' });
+
+    expect(results.length).toBe(1);
+    expect(results[0].table).toBe('decisions');
+
+    const db = readDb();
+    // messages untouched by a decisions-only run
+    const msg = db.prepare("SELECT provenance FROM messages WHERE content = 'legacy message'").get() as any;
+    expect(msg.provenance).toBeNull();
+    db.close();
+  });
+});
+
+describe('runProvenanceBackfill — input validation', () => {
+  const originalExitCode = process.exitCode;
+  const originalError = console.error;
+
+  afterEach(() => {
+    process.exitCode = originalExitCode ?? 0;
+    console.error = originalError;
+  });
+
+  test('rejects an unknown table', () => {
+    let errorOutput = '';
+    console.error = (msg?: unknown) => {
+      errorOutput += String(msg);
+    };
+
+    const results = runProvenanceBackfill({ table: 'sessions' as any });
+
+    expect(results).toEqual([]);
+    expect(errorOutput).toContain('Unknown table: sessions');
+    expect(process.exitCode).toBe(1);
+  });
+});
diff --git a/tests/commands/search.test.ts b/tests/commands/search.test.ts
index 9a02b0e..226fe8c 100644
--- a/tests/commands/search.test.ts
+++ b/tests/commands/search.test.ts
@@ -1,4 +1,4 @@
-import { describe, test, expect, afterEach } from 'bun:test';
+import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
 import { runSearch } from '../../src/commands/search.js';
 
 describe('runSearch --bias-type guard', () => {
@@ -23,3 +23,54 @@ describe('runSearch --bias-type guard', () => {
     expect(process.exitCode).toBe(1);
   });
 });
+
+import { setupTestDb, teardownTestDb } from '../helpers/setup';
+import { createSession, addDecision, addBreadcrumb } from '../../src/lib/memory';
+
+describe('runSearch provenance display contract (issue #42)', () => {
+  const originalLog = console.log;
+  let output: string;
+
+  beforeEach(() => {
+    setupTestDb();
+    output = '';
+    console.log = (msg?: unknown) => {
+      output += `${String(msg ?? '')}\n`;
+    };
+
+    createSession({ session_id: 'disp-1', started_at: '2026-01-01T00:00:00Z', project: 'demo' });
+    addDecision({ session_id: 'disp-1', decision: 'quizzacious known decision', status: 'active', provenance: 'user_authored' });
+    addBreadcrumb({ session_id: 'disp-1', content: 'quizzacious legacy crumb', importance: 5 }); // provenance NULL
+  });
+
+  afterEach(() => {
+    console.log = originalLog;
+    teardownTestDb();
+  });
+
+  test('default display stays quiet for known provenance and flags unknown', () => {
+    runSearch('quizzacious', {});
+
+    const lines = output.split('\n');
+    const knownLine = lines.find(l => l.includes('decisions#'));
+    const unknownLine = lines.find(l => l.includes('breadcrumbs#'));
+
+    expect(knownLine).toBeDefined();
+    expect(knownLine).not.toContain('provenance');
+
+    expect(unknownLine).toBeDefined();
+    expect(unknownLine).toContain('⚠');
+    expect(unknownLine).toContain('provenance: unknown');
+  });
+
+  test('--show-provenance shows every provenance value', () => {
+    runSearch('quizzacious', { showProvenance: true });
+
+    const lines = output.split('\n');
+    const knownLine = lines.find(l => l.includes('decisions#'));
+    const unknownLine = lines.find(l => l.includes('breadcrumbs#'));
+
+    expect(knownLine).toContain('provenance: user_authored');
+    expect(unknownLine).toContain('provenance: unknown');
+  });
+});
diff --git a/tests/hooks/recall-precompact.test.ts b/tests/hooks/recall-precompact.test.ts
index 13052de..4921d58 100644
--- a/tests/hooks/recall-precompact.test.ts
+++ b/tests/hooks/recall-precompact.test.ts
@@ -274,3 +274,35 @@ describe('RecallPreCompact — flushConversation', () => {
     expect(hookSource).not.toMatch(/extractWithClaude|extractWithOllama|fetch\(|http\.request/);
   });
 });
+
+describe('RecallPreCompact — Record Provenance (ADR-0001, issue #42)', () => {
+  test('stamps flushed messages verbatim when the DB has the provenance column', async () => {
+    // Migrated DB shape: messages carries the provenance column.
+    const db = new Database(dbPath);
+    db.exec(`ALTER TABLE messages ADD COLUMN provenance TEXT CHECK (provenance IN ('verbatim', 'user_authored', 'extracted', 'derived'))`);
+    db.close();
+
+    writeJsonlMessages([
+      { role: 'user', text: 'a message captured mid-session' },
+      { role: 'assistant', text: 'a reply captured mid-session' },
+    ]);
+
+    const { flushConversation } = await import('../../hooks/RecallPreCompact');
+    const result = flushConversation(convPath, '/tmp/proj');
+    expect(result.imported).toBe(2);
+
+    const readDb = new Database(dbPath, { readonly: true });
+    const rows = readDb.prepare('SELECT provenance FROM messages ORDER BY id').all() as Array<{ provenance: string }>;
+    readDb.close();
+    expect(rows.map(r => r.provenance)).toEqual(['verbatim', 'verbatim']);
+  });
+
+  test('keeps working against a pre-provenance DB (column guard)', async () => {
+    // CORE_SCHEMA above has no provenance column — the flush must not fail.
+    writeJsonlMessages([{ role: 'user', text: 'legacy database flush message' }]);
+
+    const { flushConversation } = await import('../../hooks/RecallPreCompact');
+    const result = flushConversation(convPath, '/tmp/proj');
+    expect(result.imported).toBe(1);
+  });
+});
diff --git a/tests/hooks/sqlite-writers.test.ts b/tests/hooks/sqlite-writers.test.ts
index 866f7eb..4c744f4 100644
--- a/tests/hooks/sqlite-writers.test.ts
+++ b/tests/hooks/sqlite-writers.test.ts
@@ -177,3 +177,48 @@ describe('writeExtractionErrors', () => {
     expect(rows[0].fix).toBe('chmod +x');
   });
 });
+
+describe('Record Provenance stamping (ADR-0001, issue #42)', () => {
+  test('every extraction writer stamps provenance = extracted', () => {
+    writeDecisionsBatch(dbPath, [{ decision: 'stamped decision' }]);
+    writeLearningsBatch(dbPath, [{ problem: 'stamped problem', solution: 'fix' }]);
+    writeBreadcrumbsBatch(dbPath, [{ content: 'stamped crumb' }]);
+    writeLoaEntryFromExtraction(dbPath, {
+      title: 'stamped loa',
+      fabricExtract: '## ONE SENTENCE SUMMARY\ntext',
+      sessionId: 's1',
+    });
+
+    const db = openRead();
+    for (const table of ['decisions', 'learnings', 'breadcrumbs', 'loa_entries']) {
+      const row = db.prepare(`SELECT provenance FROM ${table} LIMIT 1`).get() as any;
+      expect(row.provenance).toBe('extracted');
+    }
+    db.close();
+  });
+
+  test('still writes into a legacy DB whose tables have no provenance column', () => {
+    const legacyPath = dbPath.replace('test.db', 'legacy-writers.db');
+    const legacy = new Database(legacyPath);
+    legacy.exec(`
+      CREATE TABLE decisions (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        session_id TEXT,
+        category TEXT,
+        project TEXT,
+        decision TEXT NOT NULL,
+        status TEXT DEFAULT 'active',
+        importance INTEGER DEFAULT 5
+      );
+    `);
+    legacy.close();
+
+    const n = writeDecisionsBatch(legacyPath, [{ decision: 'legacy write' }]);
+    expect(n).toBe(1);
+
+    const db = new Database(legacyPath, { readonly: true });
+    const row = db.prepare('SELECT decision FROM decisions').get() as any;
+    db.close();
+    expect(row.decision).toBe('legacy write');
+  });
+});
diff --git a/tests/lib/conversation-import.test.ts b/tests/lib/conversation-import.test.ts
index b51c7c4..82f427d 100644
--- a/tests/lib/conversation-import.test.ts
+++ b/tests/lib/conversation-import.test.ts
@@ -289,3 +289,21 @@ describe('conversationSourceAdapters', () => {
     }
   });
 });
+
+describe('Record Provenance (ADR-0001, issue #42)', () => {
+  test('raw imported messages are stamped verbatim', async () => {
+    const file = join(tempDir, 'slack-export.json');
+    writeFileSync(file, JSON.stringify([
+      { ts: '1710000000.000100', user: 'U1', text: 'hello from slack history' },
+      { ts: '1710000001.000200', user: 'U2', text: 'a reply worth remembering' },
+    ]));
+
+    const result = await importConversations(file, { format: 'slack', noExtract: true });
+    expect(result.messagesImported).toBe(2);
+
+    const db = readDb();
+    const rows = db.prepare('SELECT provenance FROM messages ORDER BY timestamp').all() as any[];
+    db.close();
+    expect(rows.map(r => r.provenance)).toEqual(['verbatim', 'verbatim']);
+  });
+});
diff --git a/tests/lib/provenance-write-paths.test.ts b/tests/lib/provenance-write-paths.test.ts
new file mode 100644
index 0000000..f7251ae
--- /dev/null
+++ b/tests/lib/provenance-write-paths.test.ts
@@ -0,0 +1,168 @@
+// Record Provenance write-path stamping (issue #42, ADR-0001).
+//
+// Provenance is automatic write-path metadata. Each capture surface stamps
+// the value its write-path semantics dictate; no public surface accepts a
+// provenance override. These tests pin the stamp per path:
+//   - CLI `recall add` → user_authored
+//   - structured extraction (Haiku/Fabric output) → extracted
+//   - raw message capture (import/dump batch writer) → verbatim
+//   - search() structured results carry provenance for every record type
+
+import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
+import { readFileSync } from 'fs';
+import { join } from 'path';
+import { Database } from 'bun:sqlite';
+import { setupTestDb, teardownTestDb } from '../helpers/setup';
+import { runAddBreadcrumb, runAddDecision, runAddLearning } from '../../src/commands/add';
+import { writeStructuredExtraction } from '../../src/lib/structured-extraction';
+import {
+  createSession,
+  addMessage,
+  addMessagesBatch,
+  addDecision,
+  addLearning,
+  addBreadcrumb,
+  createLoaEntry,
+  search,
+} from '../../src/lib/memory';
+
+let dbPath: string;
+const originalLog = console.log;
+
+beforeEach(() => {
+  dbPath = setupTestDb();
+  console.log = () => {}; // add commands print confirmations; keep output clean
+});
+
+afterEach(() => {
+  console.log = originalLog;
+  teardownTestDb();
+});
+
+function readDb(): Database {
+  return new Database(dbPath, { readonly: true });
+}
+
+describe('CLI add commands stamp user_authored', () => {
+  test('breadcrumb, decision, and learning all land as user_authored', () => {
+    runAddBreadcrumb('a crumb worth keeping', { project: 'demo' });
+    runAddDecision('we choose sqlite', { project: 'demo' });
+    runAddLearning('it was broken', 'we fixed it', { project: 'demo' });
+
+    const db = readDb();
+    for (const table of ['breadcrumbs', 'decisions', 'learnings']) {
+      const row = db.prepare(`SELECT provenance FROM ${table} LIMIT 1`).get() as any;
+      expect(row.provenance).toBe('user_authored');
+    }
+    db.close();
+  });
+});
+
+describe('structured extraction stamps extracted', () => {
+  test('decisions and LoA entry from an extract are marked extracted', () => {
+    createSession({ session_id: 'ext-1', started_at: '2026-01-01T00:00:00Z', project: 'demo' });
+
+    const result = writeStructuredExtraction({
+      sessionId: 'ext-1',
+      sessionLabel: 'extraction test',
+      project: 'demo',
+      timestamp: '2026-01-01',
+      conversationPath: '/tmp/conv.jsonl',
+      topics: ['testing'],
+      summary: 'a one sentence summary',
+      extracted: [
+        '## ONE SENTENCE SUMMARY',
+        'a one sentence summary',
+        '',
+        '## DECISIONS MADE',
+        '- Adopt write-path provenance stamping (confidence: HIGH)',
+      ].join('\n'),
+    });
+
+    expect(result.decisions).toBe(1);
+    expect(result.loa).toBe(1);
+
+    const db = readDb();
+    const decision = db.prepare('SELECT provenance FROM decisions LIMIT 1').get() as any;
+    const loa = db.prepare('SELECT provenance FROM loa_entries LIMIT 1').get() as any;
+    db.close();
+    expect(decision.provenance).toBe('extracted');
+    expect(loa.provenance).toBe('extracted');
+  });
+});
+
+describe('raw message capture', () => {
+  test('batch writer persists verbatim when the import path stamps it', () => {
+    createSession({ session_id: 'imp-1', started_at: '2026-01-01T00:00:00Z', project: 'demo' });
+
+    // import.ts / conversation-import.ts / dump.ts all map messages through
+    // addMessagesBatch with provenance: 'verbatim'
+    addMessagesBatch([
+      { session_id: 'imp-1', timestamp: '2026-01-01T00:00:01Z', role: 'user', content: 'raw text', provenance: 'verbatim' },
+      { session_id: 'imp-1', timestamp: '2026-01-01T00:00:02Z', role: 'assistant', content: 'raw reply', provenance: 'verbatim' },
+    ]);
+
+    const db = readDb();
+    const rows = db.prepare('SELECT provenance FROM messages ORDER BY id').all() as any[];
+    db.close();
+    expect(rows.map(r => r.provenance)).toEqual(['verbatim', 'verbatim']);
+  });
+
+  test('a write without provenance stays NULL — unknown is representable, never defaulted', () => {
+    createSession({ session_id: 'imp-2', started_at: '2026-01-01T00:00:00Z', project: 'demo' });
+    addMessage({ session_id: 'imp-2', timestamp: '2026-01-01T00:00:01Z', role: 'user', content: 'unstamped' });
+
+    const db = readDb();
+    const row = db.prepare('SELECT provenance FROM messages LIMIT 1').get() as any;
+    db.close();
+    expect(row.provenance).toBeNull();
+  });
+});
+
+describe('search() structured results carry provenance', () => {
+  test('every record type returns its provenance; NULL surfaces as null', () => {
+    createSession({ session_id: 'srch-1', started_at: '2026-01-01T00:00:00Z', project: 'demo' });
+
+    addMessage({ session_id: 'srch-1', timestamp: '2026-01-01T00:00:01Z', role: 'user', content: 'xylocarp message', provenance: 'verbatim' });
+    addDecision({ session_id: 'srch-1', decision: 'xylocarp decision', status: 'active', provenance: 'user_authored' });
+    addLearning({ session_id: 'srch-1', problem: 'xylocarp problem', solution: 'fix', provenance: 'extracted' });
+    addBreadcrumb({ session_id: 'srch-1', content: 'xylocarp crumb', importance: 5, provenance: 'user_authored' });
+    createLoaEntry({ title: 'xylocarp loa', fabric_extract: 'xylocarp extract body', session_id: 'srch-1', provenance: 'extracted' });
+    // legacy row with unknown provenance
+    addBreadcrumb({ session_id: 'srch-1', content: 'xylocarp legacy crumb', importance: 5 });
+
+    const results = search('xylocarp', { limit: 20 });
+    const byKey = new Map(results.map(r => [`${r.table}:${r.content}`, r]));
+
+    expect(byKey.get('messages:xylocarp message')?.provenance).toBe('verbatim');
+    expect(byKey.get('decisions:xylocarp decision')?.provenance).toBe('user_authored');
+    expect(byKey.get('learnings:xylocarp problem')?.provenance).toBe('extracted');
+    expect(byKey.get('breadcrumbs:xylocarp crumb')?.provenance).toBe('user_authored');
+    expect(byKey.get('breadcrumbs:xylocarp legacy crumb')?.provenance).toBeNull();
+
+    const loaResult = results.find(r => r.table === 'loa');
+    expect(loaResult?.provenance).toBe('extracted');
+  });
+});
+
+describe('no public provenance override (ADR-0001 contract)', () => {
+  const repoRoot = join(import.meta.dir, '..', '..');
+
+  test('MCP memory_add input schema exposes no provenance parameter', () => {
+    const source = readFileSync(join(repoRoot, 'src', 'mcp-server.ts'), 'utf-8');
+    const toolStart = source.indexOf('"memory_add"');
+    expect(toolStart).toBeGreaterThan(-1);
+    // The zod input schema sits between the tool name and the handler callback.
+    const handlerStart = source.indexOf('async (', toolStart);
+    const schemaBlock = source.slice(toolStart, handlerStart);
+    expect(schemaBlock).not.toContain('provenance');
+    // The handler stamps it instead.
+    const handlerBlock = source.slice(handlerStart, source.indexOf('server.tool', handlerStart));
+    expect(handlerBlock).toContain('provenance: "user_authored"');
+  });
+
+  test('CLI exposes no --provenance flag anywhere', () => {
+    const source = readFileSync(join(repoRoot, 'src', 'index.ts'), 'utf-8');
+    expect(source).not.toContain('--provenance');
+  });
+});

From 49c18f438fbb296310dcc73aa15b8be5679498d7 Mon Sep 17 00:00:00 2001
From: Ed Heltzel <402910+edheltzel@users.noreply.github.com>
Date: Wed, 10 Jun 2026 20:44:16 -0400
Subject: [PATCH 3/3] docs(provenance): document --show-provenance and recall
 provenance backfill

- cli-reference: search flag, display contract, Record Provenance section
- mcp-tools: provenance in search/hybrid/recall payloads; memory_add stamps
  user_authored with no provenance parameter (ADR-0001)
- architecture: provenance column + migration 8->9 note
- slash-commands + /Recall:search: --show-provenance flag
- FOR_CLAUDE/FOR_PI/FOR_OPENCODE: CLI examples kept in sync

Refs #42
---
 FOR_CLAUDE.md             |  2 ++
 FOR_OPENCODE.md           |  2 ++
 FOR_PI.md                 |  2 ++
 commands/Recall/search.md |  1 +
 docs/architecture.md      | 10 ++++++++++
 docs/cli-reference.md     | 27 +++++++++++++++++++++++++++
 docs/mcp-tools.md         |  8 +++++---
 docs/slash-commands.md    |  1 +
 8 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/FOR_CLAUDE.md b/FOR_CLAUDE.md
index 891534f..da25e44 100644
--- a/FOR_CLAUDE.md
+++ b/FOR_CLAUDE.md
@@ -141,6 +141,8 @@ You can also use the `recall` CLI directly via Bash:
 ```bash
 recall search "deployment pipeline"    # Search memory
 recall search "database choice" --bias-type decisions  # Prefer decisions, keep other matches
+recall search "auth" --show-provenance # Show Record Provenance for every result
+recall provenance backfill             # Classify legacy unknown-provenance rows (dry-run; --execute to apply)
 recall stats                           # Database statistics
 recall loa list                        # Browse curated knowledge
 recall dump "Session title"            # Capture current session
diff --git a/FOR_OPENCODE.md b/FOR_OPENCODE.md
index af48a50..02e6164 100644
--- a/FOR_OPENCODE.md
+++ b/FOR_OPENCODE.md
@@ -86,6 +86,8 @@ You can also use the `recall` CLI directly via Bash tool:
 ```bash
 recall search "deployment pipeline"    # Search memory
 recall search "database choice" --bias-type decisions  # Prefer decisions, keep other matches
+recall search "auth" --show-provenance # Show Record Provenance for every result
+recall provenance backfill             # Classify legacy unknown-provenance rows (dry-run; --execute to apply)
 recall stats                           # Database statistics
 recall loa list                        # Browse curated knowledge
 recall onboard                         # Interactive L0 identity setup (run once per user)
diff --git a/FOR_PI.md b/FOR_PI.md
index 58bddaa..2bf23ab 100644
--- a/FOR_PI.md
+++ b/FOR_PI.md
@@ -86,6 +86,8 @@ You can also use the `recall` CLI directly via shell commands:
 ```bash
 recall search "deployment pipeline"    # Search memory
 recall search "database choice" --bias-type decisions  # Prefer decisions, keep other matches
+recall search "auth" --show-provenance # Show Record Provenance for every result
+recall provenance backfill             # Classify legacy unknown-provenance rows (dry-run; --execute to apply)
 recall stats                           # Database statistics
 recall loa list                        # Browse curated knowledge
 recall onboard                         # Interactive L0 identity setup (run once per user)
diff --git a/commands/Recall/search.md b/commands/Recall/search.md
index 8c9c774..5a1882b 100644
--- a/commands/Recall/search.md
+++ b/commands/Recall/search.md
@@ -18,6 +18,7 @@ recall search "$1"
 - `-t <table>` — Hard-filter to one table: messages, loa, decisions, learnings, breadcrumbs
 - `--bias-type <table>` — Softly boost one table without filtering other matches. Same values as `-t`.
 - `-l <n>` — Max results (default: 20)
+- `--show-provenance` — Show Record Provenance for every result (by default only unknown provenance is flagged)
 
 ## Examples
 
diff --git a/docs/architecture.md b/docs/architecture.md
index 93d241b..a7d859e 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -78,6 +78,16 @@ tables (`messages`, `decisions`, `learnings`, `loa_entries`). It controls L1
 tier ranking at session start. Manage manually with `recall pin` / `recall unpin`
 or backfill from confidence signals with `recall importance backfill`.
 
+The `provenance` column was added in schema migration 8→9 on all five memory
+tables (`messages`, `decisions`, `learnings`, `breadcrumbs`, `loa_entries`).
+It declares how each record was created — `verbatim`, `user_authored`,
+`extracted`, or `derived` — and is stamped automatically by every write path,
+never accepted from callers (see
+`docs/adr/0001-record-provenance-automatic-write-path-metadata.md`). Legacy
+rows stay `NULL` (unknown) until classified with
+`recall provenance backfill`, which only acts on deterministic write-path
+evidence and never guesses.
+
 ## Tiered RecallStart (v0.7.0+)
 
 The `RecallStart` hook injects two tiers at the top of every session:
diff --git a/docs/cli-reference.md b/docs/cli-reference.md
index 2742894..43b47da 100644
--- a/docs/cli-reference.md
+++ b/docs/cli-reference.md
@@ -16,6 +16,7 @@ recall search "query"                   # FTS5 search with options
 recall search "query" -t decisions      # Hard-filter to decisions only
 recall search "query" --bias-type decisions # Prefer decisions, still show other matching tables
 recall search "query" -p myproject      # Filter by project
+recall search "query" --show-provenance # Show provenance for every result
 recall semantic "query"                 # Semantic search (explicit)
 recall hybrid "query"                   # Hybrid search (explicit)
 ```
@@ -43,6 +44,8 @@ FTS5 supports boolean operators and prefix matching:
 - `auth*` — prefix match (authz, authentication, etc.)
 - `"vpn config"` — exact phrase
 
+By default, search output stays quiet about [Record Provenance](#record-provenance) when a record carries a known value, and visibly flags records whose provenance is unknown (legacy rows that predate the provenance column). Pass `--show-provenance` to display the provenance of every result.
+
 ---
 
 ## Capture
@@ -209,6 +212,30 @@ recall unpin decisions 42               # Reset to table default (5, or 8 for Lo
 
 LoA entries have a write-time floor of 5; `recall pin` will not drop them below that.
 
+## Record Provenance
+
+The `provenance` column on `messages`, `decisions`, `learnings`, `breadcrumbs`,
+and `loa_entries` declares how each record was created: `verbatim` (exact source
+text), `user_authored` (directly authored via a user or agent command),
+`extracted` (generated from source material, possibly lossy), or `derived`
+(mechanically produced from existing memory records). Provenance is **automatic
+write-path metadata** — every write path stamps it; there is no flag or MCP
+parameter to set it (see `docs/adr/0001-record-provenance-automatic-write-path-metadata.md`).
+
+Legacy rows that predate the column have no declared provenance (`NULL`,
+reported as `unknown`). The backfill classifies them conservatively — only
+where the source table or a write-path marker gives deterministic evidence;
+everything else stays unknown rather than being guessed:
+
+```bash
+recall provenance backfill                      # Dry-run report (default)
+recall provenance backfill --execute            # Apply the classification
+recall provenance backfill --execute -t loa_entries  # Limit to one table
+```
+
+Allowed `-t/--table` values: `messages`, `decisions`, `learnings`,
+`breadcrumbs`, `loa_entries`, `all` (default).
+
 ## Benchmarks
 
 Phase 2 benchmark harness for measuring context efficiency.
diff --git a/docs/mcp-tools.md b/docs/mcp-tools.md
index f36e1c3..31eab0d 100644
--- a/docs/mcp-tools.md
+++ b/docs/mcp-tools.md
@@ -22,7 +22,7 @@ Use `table` when you need a **hard filter** to one record type. Use `bias_type`
 | bias_type | string | no | — | Softly boost one table type in ranking without filtering other matches. Same allowed values as `table`; prefer `table` when you need only one type. |
 | limit | number | no | 10 | Maximum number of results to return |
 
-**Returns:** Array of matching records with table name, id, content, project, and snippet highlighting.
+**Returns:** Array of matching records with table name, id, content, project, snippet highlighting, and Record Provenance (`verbatim`, `user_authored`, `extracted`, `derived`, or `unknown` for legacy rows that predate provenance).
 
 ```js
 // Only decisions
@@ -48,7 +48,7 @@ Combined keyword + semantic search using Reciprocal Rank Fusion. Best for natura
 | project | string | no | — | Filter results to a specific project name |
 | limit | number | no | 10 | Maximum number of results to return |
 
-**Returns:** Array of matching records ranked by fused keyword and semantic relevance scores.
+**Returns:** Array of matching records ranked by fused keyword and semantic relevance scores, each with its Record Provenance.
 
 ```js
 memory_hybrid_search({ query: "how did we handle rate limiting", project: "my-app" })
@@ -67,7 +67,7 @@ Get recent context — LoA entries, decisions, and breadcrumbs. Good for orienti
 | limit | number | no | 5 | Number of recent entries to return per category |
 | project | string | no | — | Filter results to a specific project name |
 
-**Returns:** Recent records grouped by category: Library of Alexandria entries, decisions, and breadcrumbs.
+**Returns:** Recent records grouped by category: Library of Alexandria entries, decisions, and breadcrumbs — each annotated with its Record Provenance.
 
 ```js
 memory_recall({ limit: 5, project: "my-app" })
@@ -112,6 +112,8 @@ Add structured records during a session. Use this to capture decisions, learning
 
 **Returns:** Confirmation with the new record's id and table.
 
+Records created through `memory_add` are automatically stamped with Record Provenance `user_authored`. There is intentionally no provenance parameter — provenance is write-path metadata, not a caller claim (see `docs/adr/0001-record-provenance-automatic-write-path-metadata.md`).
+
 ```js
 memory_add({ type: "decision", content: "Use PostgreSQL over MySQL", detail: "Better JSON support and JSONB indexing" })
 memory_add({ type: "learning", content: "bun:sqlite uses $param syntax", detail: "Not :param like better-sqlite3", tags: "bun,sqlite" })
diff --git a/docs/slash-commands.md b/docs/slash-commands.md
index 9213e55..50343ab 100644
--- a/docs/slash-commands.md
+++ b/docs/slash-commands.md
@@ -27,6 +27,7 @@ Searches messages, LoA entries, decisions, learnings, and breadcrumbs. The slash
 
 - `/Recall:search database choice -t decisions` — hard-filter to decisions only
 - `/Recall:search database choice --bias-type decisions` — prefer decisions first, while still returning matching learnings/messages/LoA/breadcrumbs
+- `/Recall:search database choice --show-provenance` — show Record Provenance for every result (by default only unknown provenance is flagged)
 
 Rule of thumb: use `-t` when you want only one table; use `--bias-type` when you want one table first without hiding other context.