edheltzel · edheltzel · Jun 11, 2026 · Jun 11, 2026 · Jun 11, 2026 · Jun 11, 2026
diff --git a/FOR_CLAUDE.md b/FOR_CLAUDE.md
@@ -141,6 +141,8 @@ You can also use the `recall` CLI directly via Bash:
 ```bash
 recall search "deployment pipeline"    # Search memory
 recall search "database choice" --bias-type decisions  # Prefer decisions, keep other matches
+recall search "auth" --show-provenance # Show Record Provenance for every result
+recall provenance backfill             # Classify legacy unknown-provenance rows (dry-run; --execute to apply)
 recall stats                           # Database statistics
 recall loa list                        # Browse curated knowledge
 recall dump "Session title"            # Capture current session

diff --git a/FOR_OPENCODE.md b/FOR_OPENCODE.md
@@ -86,6 +86,8 @@ You can also use the `recall` CLI directly via Bash tool:
 ```bash
 recall search "deployment pipeline"    # Search memory
 recall search "database choice" --bias-type decisions  # Prefer decisions, keep other matches
+recall search "auth" --show-provenance # Show Record Provenance for every result
+recall provenance backfill             # Classify legacy unknown-provenance rows (dry-run; --execute to apply)
 recall stats                           # Database statistics
 recall loa list                        # Browse curated knowledge
 recall onboard                         # Interactive L0 identity setup (run once per user)

diff --git a/FOR_PI.md b/FOR_PI.md
@@ -86,6 +86,8 @@ You can also use the `recall` CLI directly via shell commands:
 ```bash
 recall search "deployment pipeline"    # Search memory
 recall search "database choice" --bias-type decisions  # Prefer decisions, keep other matches
+recall search "auth" --show-provenance # Show Record Provenance for every result
+recall provenance backfill             # Classify legacy unknown-provenance rows (dry-run; --execute to apply)
 recall stats                           # Database statistics
 recall loa list                        # Browse curated knowledge
 recall onboard                         # Interactive L0 identity setup (run once per user)

diff --git a/commands/Recall/search.md b/commands/Recall/search.md
@@ -18,6 +18,7 @@ recall search "$1"
 - `-t <table>` — Hard-filter to one table: messages, loa, decisions, learnings, breadcrumbs
 - `--bias-type <table>` — Softly boost one table without filtering other matches. Same values as `-t`.
 - `-l <n>` — Max results (default: 20)
+- `--show-provenance` — Show Record Provenance for every result (by default only unknown provenance is flagged)
 
 ## Examples
 

diff --git a/docs/architecture.md b/docs/architecture.md
@@ -78,6 +78,16 @@ tables (`messages`, `decisions`, `learnings`, `loa_entries`). It controls L1
 tier ranking at session start. Manage manually with `recall pin` / `recall unpin`
 or backfill from confidence signals with `recall importance backfill`.
 
+The `provenance` column was added in schema migration 8→9 on all five memory
+tables (`messages`, `decisions`, `learnings`, `breadcrumbs`, `loa_entries`).
+It declares how each record was created — `verbatim`, `user_authored`,
+`extracted`, or `derived` — and is stamped automatically by every write path,
+never accepted from callers (see
+`docs/adr/0001-record-provenance-automatic-write-path-metadata.md`). Legacy
+rows stay `NULL` (unknown) until classified with
+`recall provenance backfill`, which only acts on deterministic write-path
+evidence and never guesses.
+
 ## Tiered RecallStart (v0.7.0+)
 
 The `RecallStart` hook injects two tiers at the top of every session:

diff --git a/docs/cli-reference.md b/docs/cli-reference.md
@@ -16,6 +16,7 @@ recall search "query"                   # FTS5 search with options
 recall search "query" -t decisions      # Hard-filter to decisions only
 recall search "query" --bias-type decisions # Prefer decisions, still show other matching tables
 recall search "query" -p myproject      # Filter by project
+recall search "query" --show-provenance # Show provenance for every result
 recall semantic "query"                 # Semantic search (explicit)
 recall hybrid "query"                   # Hybrid search (explicit)
 ```
@@ -43,6 +44,8 @@ FTS5 supports boolean operators and prefix matching:
 - `auth*` — prefix match (authz, authentication, etc.)
 - `"vpn config"` — exact phrase
 
+By default, search output stays quiet about [Record Provenance](#record-provenance) when a record carries a known value, and visibly flags records whose provenance is unknown (legacy rows that predate the provenance column). Pass `--show-provenance` to display the provenance of every result.
+
 ---
 
 ## Capture
@@ -209,6 +212,30 @@ recall unpin decisions 42               # Reset to table default (5, or 8 for Lo
 
 LoA entries have a write-time floor of 5; `recall pin` will not drop them below that.
 
+## Record Provenance
+
+The `provenance` column on `messages`, `decisions`, `learnings`, `breadcrumbs`,
+and `loa_entries` declares how each record was created: `verbatim` (exact source
+text), `user_authored` (directly authored via a user or agent command),
+`extracted` (generated from source material, possibly lossy), or `derived`
+(mechanically produced from existing memory records). Provenance is **automatic
+write-path metadata** — every write path stamps it; there is no flag or MCP
+parameter to set it (see `docs/adr/0001-record-provenance-automatic-write-path-metadata.md`).
+
+Legacy rows that predate the column have no declared provenance (`NULL`,
+reported as `unknown`). The backfill classifies them conservatively — only
+where the source table or a write-path marker gives deterministic evidence;
+everything else stays unknown rather than being guessed:
+
+```bash
+recall provenance backfill                      # Dry-run report (default)
+recall provenance backfill --execute            # Apply the classification
+recall provenance backfill --execute -t loa_entries  # Limit to one table
+```
+
+Allowed `-t/--table` values: `messages`, `decisions`, `learnings`,
+`breadcrumbs`, `loa_entries`, `all` (default).
+
 ## Benchmarks
 
 Phase 2 benchmark harness for measuring context efficiency.

diff --git a/docs/mcp-tools.md b/docs/mcp-tools.md
@@ -22,7 +22,7 @@ Use `table` when you need a **hard filter** to one record type. Use `bias_type`
 | bias_type | string | no | — | Softly boost one table type in ranking without filtering other matches. Same allowed values as `table`; prefer `table` when you need only one type. |
 | limit | number | no | 10 | Maximum number of results to return |
 
-**Returns:** Array of matching records with table name, id, content, project, and snippet highlighting.
+**Returns:** Array of matching records with table name, id, content, project, snippet highlighting, and Record Provenance (`verbatim`, `user_authored`, `extracted`, `derived`, or `unknown` for legacy rows that predate provenance).
 
 ```js
 // Only decisions
@@ -48,7 +48,7 @@ Combined keyword + semantic search using Reciprocal Rank Fusion. Best for natura
 | project | string | no | — | Filter results to a specific project name |
 | limit | number | no | 10 | Maximum number of results to return |
 
-**Returns:** Array of matching records ranked by fused keyword and semantic relevance scores.
+**Returns:** Array of matching records ranked by fused keyword and semantic relevance scores, each with its Record Provenance.
 
 ```js
 memory_hybrid_search({ query: "how did we handle rate limiting", project: "my-app" })
@@ -67,7 +67,7 @@ Get recent context — LoA entries, decisions, and breadcrumbs. Good for orienti
 | limit | number | no | 5 | Number of recent entries to return per category |
 | project | string | no | — | Filter results to a specific project name |
 
-**Returns:** Recent records grouped by category: Library of Alexandria entries, decisions, and breadcrumbs.
+**Returns:** Recent records grouped by category: Library of Alexandria entries, decisions, and breadcrumbs — each annotated with its Record Provenance.
 
 ```js
 memory_recall({ limit: 5, project: "my-app" })
@@ -112,6 +112,8 @@ Add structured records during a session. Use this to capture decisions, learning
 
 **Returns:** Confirmation with the new record's id and table.
 
+Records created through `memory_add` are automatically stamped with Record Provenance `user_authored`. There is intentionally no provenance parameter — provenance is write-path metadata, not a caller claim (see `docs/adr/0001-record-provenance-automatic-write-path-metadata.md`).
+
 ```js
 memory_add({ type: "decision", content: "Use PostgreSQL over MySQL", detail: "Better JSON support and JSONB indexing" })
 memory_add({ type: "learning", content: "bun:sqlite uses $param syntax", detail: "Not :param like better-sqlite3", tags: "bun,sqlite" })

diff --git a/docs/slash-commands.md b/docs/slash-commands.md
@@ -27,6 +27,7 @@ Searches messages, LoA entries, decisions, learnings, and breadcrumbs. The slash
 
 - `/Recall:search database choice -t decisions` — hard-filter to decisions only
 - `/Recall:search database choice --bias-type decisions` — prefer decisions first, while still returning matching learnings/messages/LoA/breadcrumbs
+- `/Recall:search database choice --show-provenance` — show Record Provenance for every result (by default only unknown provenance is flagged)
 
 Rule of thumb: use `-t` when you want only one table; use `--bias-type` when you want one table first without hiding other context.
 

diff --git a/hooks/RecallPreCompact.ts b/hooks/RecallPreCompact.ts
@@ -358,10 +358,13 @@ export function flushConversation(convPath: string, cwd: string): FlushResult {
 
       // Insert messages. importance defaults to 5 — these are mid-session
       // captures, not curated, and the Stop hook may later promote a subset
-      // to LoA at importance 8.
+      // to LoA at importance 8. Raw transcript capture is verbatim
+      // (ADR-0001); the column guard keeps pre-provenance DBs working.
+      const hasProvenance = (db.prepare('PRAGMA table_info(messages)').all() as Array<{ name: string }>)
+        .some((c) => c.name === 'provenance');
       const insertMessage = db.prepare(`
-        INSERT INTO messages (session_id, timestamp, role, content, project, importance)
-        VALUES (?, ?, ?, ?, ?, 5)
+        INSERT INTO messages (session_id, timestamp, role, content, project, importance${hasProvenance ? ', provenance' : ''})
+        VALUES (?, ?, ?, ?, ?, 5${hasProvenance ? ", 'verbatim'" : ''})
       `);
 
       const tx = db.transaction((rows: ParsedMessage[]) => {

diff --git a/hooks/lib/sqlite-writers.ts b/hooks/lib/sqlite-writers.ts
@@ -42,6 +42,16 @@ function columnExists(db: Database, table: string, column: string): boolean {
   }
 }
 
+// ADR-0001: every writer in this file is an extraction path, so records are
+// stamped provenance = 'extracted'. The value is a SQL literal (not a bind
+// param) so the legacy-DB column guard stays a simple string switch — older
+// databases without the provenance column keep working unchanged.
+function provenanceFragment(db: Database, table: string): { col: string; val: string } {
+  return columnExists(db, table, 'provenance')
+    ? { col: ', provenance', val: ", 'extracted'" }
+    : { col: '', val: '' };
+}
+
 // ---------------------------------------------------------------------------
 // extraction_sessions
 // ---------------------------------------------------------------------------
@@ -105,11 +115,12 @@ export function writeDecisionsBatch(dbPath: string, items: DecisionInput[]): num
   try {
     if (!tableExists(db, 'decisions')) return 0;
     const hasConfidence = columnExists(db, 'decisions', 'confidence');
+    const provenance = provenanceFragment(db, 'decisions');
     const sql = hasConfidence
-      ? `INSERT INTO decisions (session_id, category, project, decision, status, confidence, importance)
-         VALUES (?, ?, ?, ?, 'active', ?, ?)`
-      : `INSERT INTO decisions (session_id, category, project, decision, status, importance)
-         VALUES (?, ?, ?, ?, 'active', ?)`;
+      ? `INSERT INTO decisions (session_id, category, project, decision, status, confidence, importance${provenance.col})
+         VALUES (?, ?, ?, ?, 'active', ?, ?${provenance.val})`
+      : `INSERT INTO decisions (session_id, category, project, decision, status, importance${provenance.col})
+         VALUES (?, ?, ?, ?, 'active', ?${provenance.val})`;
     const stmt = db.prepare(sql);
     const insertMany = db.transaction((batch: DecisionInput[]) => {
       let n = 0;
@@ -165,11 +176,12 @@ export function writeLearningsBatch(dbPath: string, items: LearningInput[]): num
   try {
     if (!tableExists(db, 'learnings')) return 0;
     const hasConfidence = columnExists(db, 'learnings', 'confidence');
+    const provenance = provenanceFragment(db, 'learnings');
     const sql = hasConfidence
-      ? `INSERT INTO learnings (session_id, category, project, problem, solution, prevention, tags, confidence, importance)
-         VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`
-      : `INSERT INTO learnings (session_id, category, project, problem, solution, prevention, tags, importance)
-         VALUES (?, ?, ?, ?, ?, ?, ?, ?)`;
+      ? `INSERT INTO learnings (session_id, category, project, problem, solution, prevention, tags, confidence, importance${provenance.col})
+         VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?${provenance.val})`
+      : `INSERT INTO learnings (session_id, category, project, problem, solution, prevention, tags, importance${provenance.col})
+         VALUES (?, ?, ?, ?, ?, ?, ?, ?${provenance.val})`;
     const stmt = db.prepare(sql);
     const insertMany = db.transaction((batch: LearningInput[]) => {
       let n = 0;
@@ -227,9 +239,10 @@ export function writeBreadcrumbsBatch(dbPath: string, items: BreadcrumbInput[]):
   const db = openDb(dbPath);
   try {
     if (!tableExists(db, 'breadcrumbs')) return 0;
+    const provenance = provenanceFragment(db, 'breadcrumbs');
     const stmt = db.prepare(
-      `INSERT INTO breadcrumbs (session_id, content, category, project, importance, expires_at)
-       VALUES (?, ?, ?, ?, ?, ?)`
+      `INSERT INTO breadcrumbs (session_id, content, category, project, importance, expires_at${provenance.col})
+       VALUES (?, ?, ?, ?, ?, ?${provenance.val})`
     );
     const insertMany = db.transaction((batch: BreadcrumbInput[]) => {
       let n = 0;
@@ -273,11 +286,12 @@ export function writeLoaEntryFromExtraction(dbPath: string, entry: LoaInput): nu
     if (!tableExists(db, 'loa_entries')) return 0;
     // LoA importance is floored at 5 (curated tier guardrail).
     const importance = Math.max(5, clampImportance(entry.importance, 8));
+    const provenance = provenanceFragment(db, 'loa_entries');
     const result = db
       .prepare(
         `INSERT INTO loa_entries
-           (title, description, fabric_extract, session_id, project, tags, message_count, importance)
-         VALUES (?, ?, ?, ?, ?, ?, ?, ?)`
+           (title, description, fabric_extract, session_id, project, tags, message_count, importance${provenance.col})
+         VALUES (?, ?, ?, ?, ?, ?, ?, ?${provenance.val})`
       )
       .run(
         entry.title,

diff --git a/src/commands/add.ts b/src/commands/add.ts
@@ -21,7 +21,9 @@ export function runAddBreadcrumb(content: string, options: AddBreadcrumbOptions)
     content,
     project,
     category: options.category,
-    importance: options.importance ?? 5
+    importance: options.importance ?? 5,
+    // ADR-0001: provenance is stamped from the write path, never a CLI flag.
+    provenance: 'user_authored'
   });
 
   console.log(`✓ Added breadcrumb #${id}${project ? ` [${project}]` : ''}`);
@@ -51,7 +53,8 @@ export function runAddDecision(decision: string, options: AddDecisionOptions): v
     reasoning: options.why,
     alternatives: options.alternatives,
     status: 'active',
-    confidence
+    confidence,
+    provenance: 'user_authored'
   });
 
   console.log(`✓ Added decision #${id}${project ? ` [${project}]` : ''} (${confidence})`);
@@ -78,7 +81,8 @@ export function runAddLearning(problem: string, solution: string, options: AddLe
     project,
     category: options.category,
     prevention: options.prevention,
-    tags: options.tags
+    tags: options.tags,
+    provenance: 'user_authored'
   });
 
   console.log(`✓ Added learning #${id}${project ? ` [${project}]` : ''}`);

diff --git a/src/commands/dump.ts b/src/commands/dump.ts
@@ -387,7 +387,8 @@ export async function coreDump(title: string, options: DumpOptions & { session?:
     summary: `Dumped: ${title}`
   });
 
-  const importedCount = addMessagesBatch(session.messages);
+  // Raw conversation capture is verbatim (ADR-0001).
+  const importedCount = addMessagesBatch(session.messages.map(m => ({ ...m, provenance: 'verbatim' as const })));
 
   // Get imported message IDs for LoA
   const db = getDb();
@@ -429,7 +430,10 @@ export async function coreDump(title: string, options: DumpOptions & { session?:
     parent_loa_id: options.continues,
     project: options.project || session.project,
     tags: options.tags,
-    message_count: importedMessages.length
+    message_count: importedMessages.length,
+    // Fabric output and the basic-summary fallback are both generated from
+    // the session messages — extracted either way (ADR-0001).
+    provenance: 'extracted'
   });
 
   await autoEmbedLoaEntry(loaId, title, fabricExtract);

diff --git a/src/commands/import-legacy.ts b/src/commands/import-legacy.ts
@@ -164,7 +164,10 @@ export function runImportLegacy(options: ImportLegacyOptions): void {
         message_range_start: undefined,
         message_range_end: undefined,
         message_count: undefined,
-        tags: 'legacy,imported'
+        tags: 'legacy,imported',
+        // DISTILLED.md / HOT_RECALL.md content is prior extraction output —
+        // the record stays honest as extracted (ADR-0001).
+        provenance: 'extracted'
       });
 
       // Update the created_at to match the original date

diff --git a/src/commands/loa.ts b/src/commands/loa.ts
@@ -115,7 +115,8 @@ export async function runLoa(title: string, options: LoaOptions): Promise<void>
     process.exit(1);
   }
 
-  // Create LoA entry
+  // Create LoA entry — Fabric extract_wisdom output is generated from the
+  // session messages, so the record is extracted (ADR-0001).
   const id = createLoaEntry({
     title,
     description: `Captured ${messages.length} messages`,
@@ -125,7 +126,8 @@ export async function runLoa(title: string, options: LoaOptions): Promise<void>
     parent_loa_id: options.continues,
     project,
     tags: options.tags,
-    message_count: messages.length
+    message_count: messages.length,
+    provenance: 'extracted'
   });
 
   console.log(`\n✓ LoA #${id} captured: "${title}"`);