diff --git a/.claude/settings.json b/.claude/settings.json
index 297f516..287c0bb 100644
--- a/.claude/settings.json
+++ b/.claude/settings.json
@@ -36,28 +36,18 @@
         "hooks": [
           {
             "type": "command",
-            "command": "\"/home/ubuntu/.local/bin/kata\" hook mode-gate",
-            "timeout": 10
-          }
-        ]
-      },
-      {
-        "matcher": "TaskUpdate",
-        "hooks": [
-          {
-            "type": "command",
-            "command": "\"/home/ubuntu/.local/bin/kata\" hook task-deps",
-            "timeout": 10
+            "command": "\"/home/ubuntu/.local/bin/kata\" hook pre-tool-use",
+            "timeout": 30
           }
         ]
-      },
+      }
+    ],
+    "PostToolUse": [
       {
-        "matcher": "TaskUpdate",
         "hooks": [
           {
             "type": "command",
-            "command": "\"/home/ubuntu/.local/bin/kata\" hook task-evidence",
-            "timeout": 10
+            "command": "\"/home/ubuntu/.local/bin/kata\" hook post-tool-use"
           }
         ]
       }
diff --git a/.gitignore b/.gitignore
index cd57479..9a4bf41 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,6 @@ eval-projects/
 .geminiignore
 .kata/sessions/
 .kata/batteries-backup/
+.kata/verification-evidence/
+.claude/sessions/
+eval-transcripts/
diff --git a/batteries/templates/stop-hook-test.md b/batteries/templates/stop-hook-test.md
index 84d02d4..a3976bc 100644
--- a/batteries/templates/stop-hook-test.md
+++ b/batteries/templates/stop-hook-test.md
@@ -8,6 +8,7 @@ workflow_prefix: "SH"
 phases:
   - id: p0
     name: Write
+    stage: work
     task_config:
       title: "P0: Write a trivial file"
       labels: [phase, phase-0]
@@ -36,6 +37,7 @@ phases:
 
   - id: p1
     name: Commit
+    stage: work
     task_config:
       title: "P1: Commit the file"
       labels: [phase, phase-1]
@@ -59,6 +61,7 @@ phases:
 
   - id: p2
     name: Push
+    stage: work
     task_config:
       title: "P2: Push to remote"
       labels: [phase, phase-2]
@@ -80,6 +83,7 @@ phases:
 
   - id: p3
     name: Cleanup
+    stage: close
     task_config:
       title: "P3: Revert and clean up"
       labels: [phase, phase-3]
diff --git a/planning/research/2026-03-30-dynamic-task-creation-generalization.md b/planning/research/2026-03-30-dynamic-task-creation-generalization.md
new file mode 100644
index 0000000..b3f132e
--- /dev/null
+++ b/planning/research/2026-03-30-dynamic-task-creation-generalization.md
@@ -0,0 +1,189 @@
+---
+date: 2026-03-30
+topic: Generalizing dynamic task creation beyond verify mode
+status: complete
+---
+
+# Research: Dynamic Task Creation Generalization
+
+## Questions Explored
+- How does verify mode's dynamic task creation work?
+- Which other modes would benefit from the same pattern?
+- What changes are needed to generalize it?
+
+## Current State
+
+### Task creation mechanisms today
+
+| Mode | Task Creation | Method |
+|------|--------------|--------|
+| planning | 16 static tasks | Template phases → `buildPhaseTasks()` at enter time |
+| implementation | Static + spec-driven | Template + `buildSpecTasks()` with subphase patterns at enter time |
+| task | 6 static tasks | Template phases → `buildPhaseTasks()` at enter time |
+| verify | Static + **dynamic** | Template + `TaskCreate` at runtime in container phase |
+| research | **None** | Has phases/steps but no tasks created |
+| debug | **None** | Has phases/steps but no tasks created |
+| freeform | **None** | No phases at all |
+
+### Verify mode's pattern (the one that works)
+
+Verify mode uses a discover-then-expand pattern:
+
+```
+P0: Setup (static tasks)
+    → Discovers VP steps from spec, plan file, or git diff
+
+P1: Execute (container: true)
+    → expand-vp-steps calls TaskCreate per discovered VP step
+    → Each VP step becomes a trackable, completable task
+
+P2+: Operate on those dynamic tasks (fix loop, evidence)
+```
+
+Key design elements:
+- P1 is marked `container: true` in the template YAML
+- The template instruction explicitly tells the agent to call `TaskCreate`
+- A special exception overrides the "no TaskCreate" rule for verify mode only
+- Tasks are created ALL at once before execution begins
+- Each task is independently trackable (pass/fail per VP step)
+
+### The current gate
+
+Verify has a hardcoded exception:
+
+> "Verify mode is the **only mode** that uses `TaskCreate`. This overrides the standard `task_rules`..."
+
+This is the only thing preventing other modes from using the same pattern.
+
+## Key Finding: The Pattern is Template-Driven, Not Mode-Driven
+
+Verify's dynamic task creation isn't special infrastructure — it's just a template instruction that says "call `TaskCreate` here." The `container: true` phase marker already exists in the schema. The only blocker is the policy gate that restricts `TaskCreate` to verify mode.
+
+**Proposed change:** Make the `TaskCreate` exception phase-driven rather than mode-driven. Any phase with `container: true` allows `TaskCreate` within that phase.
+
+## Candidate Modes for Dynamic Task Creation
+
+### Planning Mode — strongest candidate
+
+**Current problem:** P2 (Spec Writing) has 3 static tasks regardless of feature complexity. A simple config change and a complex multi-service feature get the same task structure.
+
+**Dynamic pattern:**
+```
+P0: Research (static — 2 tasks)
+P1: Interview (static — 5 tasks)
+    → Discovers: behaviors, integration points, test scenarios
+
+P2: Spec Writing (container: true)
+    → After P1 requirements approval, expand per behavior:
+      - "Write B1: auth flow"
+      - "Write B2: token refresh"
+      - "Write B3: session management"
+    → Each behavior section independently trackable
+
+P3: Review Gate (static — 3 tasks)
+P4: Finalize (static — 3 tasks)
+```
+
+**Benefits:**
+- Progress tracking per behavior (not just "spec writing in progress")
+- Natural parallelism — behaviors can be written by parallel agents
+- Review can reference specific behavior tasks
+- Scales with feature complexity (2 behaviors = 2 tasks, 10 = 10)
+
+**Trade-off:** Currently a single agent writes the whole spec in one shot, which preserves cross-behavior coherence. Per-behavior tasks would need a "coherence pass" afterward, or a shared context doc that each behavior writer reads.
+
+### Debug Mode — strong candidate
+
+**Current problem:** No tasks at all. Progress is invisible.
+
+**Dynamic pattern:**
+```
+P0: Reproduce (static — 2 tasks)
+    → Discovers: symptoms, affected code paths
+
+P1: Hypotheses (container: true)
+    → After reproduction, expand per hypothesis:
+      - "H1: Race condition in session cleanup"
+      - "H2: Stale cache after config reload"
+      - "H3: Off-by-one in pagination"
+    → Each hypothesis independently testable/dismissable
+
+P2: Fix (static — depends on which hypothesis confirmed)
+P3: Verify fix (static — 2 tasks)
+```
+
+**Benefits:**
+- Hypotheses are tracked (tested/confirmed/dismissed)
+- Stop conditions can check "at least one hypothesis confirmed"
+- Natural debugging workflow — you don't know the hypotheses upfront
+
+### Research Mode — moderate candidate
+
+**Dynamic pattern:**
+```
+P0: Initial scan (static — 2 tasks)
+    → Discovers: research threads to investigate
+
+P1: Deep dive (container: true)
+    → After initial scan, expand per thread:
+      - "Investigate logging architecture"
+      - "Map auth middleware chain"
+      - "Compare caching strategies"
+    → Each thread independently explorable
+
+P2: Synthesize (static — 2 tasks)
+P3: Document (static — 2 tasks)
+```
+
+**Benefits:**
+- Research coverage tracked per thread
+- Natural parallelism for independent threads
+- Output doc can reference which threads were explored
+
+**Trade-off:** Research is intentionally exploratory. Too much structure might constrain discovery. Could make the container phase optional — only expand if the agent identifies discrete threads.
+
+### Task Mode — poor candidate
+
+Already lightweight (6 tasks). The whole point is "small change, minimal ceremony." Dynamic expansion would fight the mode's purpose.
+
+### Freeform — not a candidate
+
+Intentionally unstructured. No phases at all.
+
+## Implementation Path
+
+### Step 1: Make `TaskCreate` gate phase-driven
+
+Change the `TaskCreate` restriction from "mode === verify" to "current phase has `container: true`". This is likely in the mode-gate hook or task rules documentation.
+
+**Files to check:**
+- `src/commands/hook.ts` — mode-gate hook logic
+- Template task_rules section — documentation that agents read
+- Any PreToolUse hook that blocks `TaskCreate`
+
+### Step 2: Update templates that want dynamic creation
+
+Add `container: true` to the relevant phase and write the expand instruction. No TypeScript changes needed — this is purely template content.
+
+### Step 3: Wire stop conditions
+
+Modes using dynamic tasks should add `tasks_complete` to their `stop_conditions` in `modes.yaml` so the stop hook enforces completion.
+
+### Incremental rollout
+
+1. **First:** Just lift the verify-only restriction (step 1). No template changes yet.
+2. **Then:** Update debug template to use container phase for hypotheses — simplest template to modify, low risk.
+3. **Then:** Planning P2 — higher impact but needs the coherence-pass design decision.
+4. **Last:** Research — only if the pattern proves valuable in debug/planning.
+
+## Open Questions
+
+- **Planning coherence:** If behaviors are written as separate tasks, how do you ensure cross-behavior consistency? Options: shared context doc, coherence review pass, or keep single-agent-writes-all but track per-behavior review tasks instead.
+- **Task naming convention:** Verify uses `VP{N}: {title}`. Should other modes follow a similar pattern? (`H{N}:` for hypotheses, `B{N}:` for behaviors, `R{N}:` for research threads?)
+- **Container phase nesting:** Can a mode have multiple container phases? (e.g., planning could have container phases in both P2 and P3 for per-behavior writing AND per-behavior review)
+
+## Next Steps
+
+- Create GitHub issue for this feature
+- Start with step 1 (lift verify-only gate) as a small task
+- Design debug template update as proof of concept
diff --git a/src/commands/can-exit.test.ts b/src/commands/can-exit.test.ts
index 690c24e..150bf2e 100644
--- a/src/commands/can-exit.test.ts
+++ b/src/commands/can-exit.test.ts
@@ -20,6 +20,7 @@ async function captureCanExit(args: string[]): Promise<string> {
   const { canExit } = await import('./can-exit.js')
   let captured = ''
   const origLog = console.log
+  const origExitCode = process.exitCode
   console.log = (...logArgs: unknown[]) => {
     captured += logArgs.map(String).join(' ')
   }
@@ -27,6 +28,7 @@ async function captureCanExit(args: string[]): Promise<string> {
     await canExit(args)
   } finally {
     console.log = origLog
+    process.exitCode = origExitCode
   }
   return captured
 }
@@ -38,13 +40,12 @@ describe('canExit', () => {
 
   beforeEach(() => {
     tmpDir = makeTmpDir()
-    mkdirSync(join(tmpDir, '.claude', 'sessions'), { recursive: true })
-    mkdirSync(join(tmpDir, '.claude', 'workflows'), { recursive: true })
+    mkdirSync(join(tmpDir, '.kata', 'sessions'), { recursive: true })
     // Write baseline kata.yaml so loadKataConfig() finds it (no longer reads wm.yaml/modes.yaml)
     // Include implementation + freeform modes with the stop_conditions used by test scenarios.
     // Individual tests that need specific review config overwrite this file before calling canExit.
     writeFileSync(
-      join(tmpDir, '.claude', 'workflows', 'kata.yaml'),
+      join(tmpDir, '.kata', 'kata.yaml'),
       [
         'spec_path: planning/specs',
         'research_path: planning/research',
@@ -74,12 +75,12 @@ describe('canExit', () => {
     } else {
       delete process.env.CLAUDE_SESSION_ID
     }
-    process.exitCode = undefined
+    process.exitCode = 0
   })
 
   function createSessionState(state: Record<string, unknown>): void {
     const sessionId = process.env.CLAUDE_SESSION_ID!
-    const sessionDir = join(tmpDir, '.claude', 'sessions', sessionId)
+    const sessionDir = join(tmpDir, '.kata', 'sessions', sessionId)
     mkdirSync(sessionDir, { recursive: true })
     writeFileSync(
       join(sessionDir, 'state.json'),
@@ -124,7 +125,7 @@ describe('canExit', () => {
     // Regression: "on base branch / no diff" used to short-circuit ALL checks including
     // tasks_complete, allowing exit at session start before any work was done.
     writeFileSync(
-      join(tmpDir, '.claude', 'workflows', 'kata.yaml'),
+      join(tmpDir, '.kata', 'kata.yaml'),
       jsYaml.dump({
         modes: {
           research: { template: 'research.md', stop_conditions: ['tasks_complete', 'committed'] },
@@ -157,7 +158,7 @@ describe('canExit', () => {
 
   it('checkTestsPass: blocks when no phase evidence files exist', async () => {
     writeFileSync(
-      join(tmpDir, '.claude', 'workflows', 'kata.yaml'),
+      join(tmpDir, '.kata', 'kata.yaml'),
       jsYaml.dump({
         modes: {
           implementation: { template: 'implementation.md', stop_conditions: ['tasks_complete', 'committed', 'pushed', 'tests_pass', 'feature_tests_added'] },
@@ -180,7 +181,7 @@ describe('canExit', () => {
 
   it('checkTestsPass: passes when phase evidence file exists with overallPassed true', async () => {
     writeFileSync(
-      join(tmpDir, '.claude', 'workflows', 'kata.yaml'),
+      join(tmpDir, '.kata', 'kata.yaml'),
       jsYaml.dump({
         modes: {
           implementation: { template: 'implementation.md', stop_conditions: ['tasks_complete', 'committed', 'pushed', 'tests_pass', 'feature_tests_added'] },
@@ -194,7 +195,7 @@ describe('canExit', () => {
       issueNumber: 333,
     })
 
-    const evidenceDir = join(tmpDir, '.claude', 'verification-evidence')
+    const evidenceDir = join(tmpDir, '.kata', 'verification-evidence')
     mkdirSync(evidenceDir, { recursive: true })
     writeFileSync(
       join(evidenceDir, 'phase-p1-333.json'),
@@ -215,7 +216,7 @@ describe('canExit', () => {
 
   it('checkTestsPass: blocks when phase evidence overallPassed is false', async () => {
     writeFileSync(
-      join(tmpDir, '.claude', 'workflows', 'kata.yaml'),
+      join(tmpDir, '.kata', 'kata.yaml'),
       jsYaml.dump({
         modes: {
           implementation: { template: 'implementation.md', stop_conditions: ['tasks_complete', 'committed', 'pushed', 'tests_pass', 'feature_tests_added'] },
@@ -229,7 +230,7 @@ describe('canExit', () => {
       issueNumber: 222,
     })
 
-    const evidenceDir = join(tmpDir, '.claude', 'verification-evidence')
+    const evidenceDir = join(tmpDir, '.kata', 'verification-evidence')
     mkdirSync(evidenceDir, { recursive: true })
     writeFileSync(
       join(evidenceDir, 'phase-p1-222.json'),
diff --git a/src/commands/can-exit.ts b/src/commands/can-exit.ts
index 871b7b5..6adbdfb 100644
--- a/src/commands/can-exit.ts
+++ b/src/commands/can-exit.ts
@@ -2,7 +2,7 @@
 import { execSync } from 'node:child_process'
 import { existsSync, readdirSync, readFileSync } from 'node:fs'
 import { join } from 'node:path'
-import { getCurrentSessionId, findProjectDir, getStateFilePath, getVerificationDir } from '../session/lookup.js'
+import { getCurrentSessionId, findProjectDir, getStateFilePath, getVerificationDir, getSessionsDir } from '../session/lookup.js'
 import { readState } from '../state/reader.js'
 import {
   type StopGuidance,
@@ -19,6 +19,7 @@ import {
 } from './enter/task-factory.js'
 import { loadKataConfig } from '../config/kata-config.js'
 import { findSpecFile, validateSpec } from './validate-spec.js'
+import { readEditsSet, parseGitStatusPaths } from '../tracking/edits-log.js'
 
 /**
  * Parse command line arguments for can-exit command
@@ -43,28 +44,53 @@ function parseArgs(args: string[]): {
 /**
  * Check git conditions (committed, pushed) based on which checks are active
  */
-function checkGlobalConditions(checks: Set<string>): { passed: boolean; reasons: string[] } {
+function checkGlobalConditions(checks: Set<string>, sessionDir?: string): { passed: boolean; reasons: string[]; advisories: string[] } {
   const reasons: string[] = []
+  const advisories: string[] = []
 
   try {
     if (checks.has('committed')) {
+      // Strip trailing newlines only — `.trim()` would eat the leading space
+      // of the first line's porcelain status (e.g. " M README.md"), corrupting
+      // parseGitStatusPaths which expects status at positions 0-1 and path at position 3+.
       const gitStatus = execSync('git status --porcelain 2>/dev/null || true', {
         encoding: 'utf-8',
         stdio: ['pipe', 'pipe', 'pipe'],
-      }).trim()
+      }).replace(/\n+$/, '')
 
       if (gitStatus) {
+        const sessionEdits = sessionDir ? readEditsSet(sessionDir) : null
+        const outOfScopeFiles: string[] = []
+
         const changedFiles = gitStatus.split('\n').filter((line) => {
           if (line.startsWith('??')) return false
+          const paths = parseGitStatusPaths(line)
+          const file = paths[0] // primary path
           // Exclude kata session logs — the stop hook writes these on every invocation,
           // creating a recursive loop if we count them as uncommitted changes
-          const file = line.slice(3)
           if (file.startsWith('.kata/sessions/')) return false
+
+          if (sessionEdits) {
+            // Session-scoped: only count files this session touched
+            if (sessionEdits.has(file)) return true
+            // Track out-of-scope files for advisory
+            outOfScopeFiles.push(file)
+            return false
+          }
+          // No session tracking (no edits.jsonl) — fall back to global behavior
           return true
         })
+
         if (changedFiles.length > 0) {
           reasons.push('Uncommitted changes in tracked files')
         }
+
+        // Advisory for out-of-scope dirty files
+        if (outOfScopeFiles.length > 0) {
+          const shown = outOfScopeFiles.slice(0, 5)
+          const suffix = outOfScopeFiles.length > 5 ? `, ... and ${outOfScopeFiles.length - 5} more` : ''
+          advisories.push(`Note: ${outOfScopeFiles.length} file(s) outside this session's scope have uncommitted changes: ${shown.join(', ')}${suffix}`)
+        }
       }
     }
 
@@ -85,6 +111,7 @@ function checkGlobalConditions(checks: Set<string>): { passed: boolean; reasons:
   return {
     passed: reasons.length === 0,
     reasons,
+    advisories,
   }
 }
 
@@ -178,7 +205,7 @@ function checkTestsPass(issueNumber: number, nonCodePaths: string[]): { passed:
  * Check that at least one new test function was added in this session vs diff_base.
  * Reads project.diff_base and project.test_file_pattern from wm.yaml.
  */
-function checkFeatureTestsAdded(): { passed: boolean; newTestCount?: number } {
+function checkFeatureTestsAdded(sessionDir?: string): { passed: boolean; newTestCount?: number } {
   try {
     const cfg = loadKataConfig()
     const diffBase = cfg.project?.diff_base ?? 'origin/main'
@@ -194,13 +221,27 @@ function checkFeatureTestsAdded(): { passed: boolean; newTestCount?: number } {
       .split('\n')
       .filter((f) => f && patterns.some((ext) => f.endsWith(ext)))
 
-    if (changedFiles.length === 0) {
+    // Filter to session-owned files if tracking is available.
+    // If filtering produces an empty set (tracking may not cover the full session),
+    // fall back to the unfiltered list — better to over-check than miss real tests.
+    let filteredFiles = changedFiles
+    if (sessionDir) {
+      const sessionEdits = readEditsSet(sessionDir)
+      if (sessionEdits.size > 0) {
+        const scoped = changedFiles.filter(f => sessionEdits.has(f))
+        if (scoped.length > 0) {
+          filteredFiles = scoped
+        }
+      }
+    }
+
+    if (filteredFiles.length === 0) {
       return { passed: false, newTestCount: 0 }
     }
 
     // Count new test function declarations added
     const diffOutput = execSync(
-      `git diff "${diffBase}" -- ${changedFiles.map((f) => `"${f}"`).join(' ')} 2>/dev/null || true`,
+      `git diff "${diffBase}" -- ${filteredFiles.map((f) => `"${f}"`).join(' ')} 2>/dev/null || true`,
       { encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe'] },
     )
 
@@ -324,14 +365,25 @@ function validateCanExit(
 ): {
   canExit: boolean
   reasons: string[]
+  advisories: string[]
   hasOpenTasks: boolean
   usingTasks: boolean
 } {
   const reasons: string[] = []
+  let allAdvisories: string[] = []
+
+  const sessionDir = (() => {
+    try {
+      const projectDir = findProjectDir()
+      return join(getSessionsDir(projectDir), sessionId)
+    } catch {
+      return undefined
+    }
+  })()
 
   // No stop conditions = can always exit
   if (stopConditions.length === 0) {
-    return { canExit: true, reasons: [], hasOpenTasks: false, usingTasks: false }
+    return { canExit: true, reasons: [], advisories: [], hasOpenTasks: false, usingTasks: false }
   }
 
   // Build effective checks set (filter stage-scoped conditions whose stage isn't complete)
@@ -399,7 +451,7 @@ function validateCanExit(
 
     // ── feature_tests_added ──
     if (checks.has('feature_tests_added')) {
-      const featureTestsCheck = checkFeatureTestsAdded()
+      const featureTestsCheck = checkFeatureTestsAdded(sessionDir)
       if (!featureTestsCheck.passed) {
         reasons.push(
           'At least one new test function required (it/test/describe). See: arXiv 2402.13521',
@@ -426,8 +478,9 @@ function validateCanExit(
     // ── committed + pushed (check after task/verification checks) ──
     if (reasons.length === 0) {
       if (checks.has('committed') || checks.has('pushed')) {
-        const globalCheck = checkGlobalConditions(checks)
+        const globalCheck = checkGlobalConditions(checks, sessionDir)
         reasons.push(...globalCheck.reasons)
+        allAdvisories = globalCheck.advisories
       }
     }
   }
@@ -435,6 +488,7 @@ function validateCanExit(
   return {
     canExit: reasons.length === 0,
     reasons,
+    advisories: allAdvisories,
     hasOpenTasks,
     usingTasks,
   }
@@ -548,6 +602,7 @@ export async function canExit(args: string[]): Promise<void> {
   const {
     canExit: canExitNow,
     reasons,
+    advisories,
     hasOpenTasks,
     usingTasks,
   } = validateCanExit(workflowId, sessionId, stopConditions, issueNumber, phasesByStage, deliverablePath)
@@ -569,6 +624,7 @@ export async function canExit(args: string[]): Promise<void> {
         {
           canExit: canExitNow,
           reasons,
+          advisories,
           guidance,
           workflowId,
           sessionType,
@@ -601,6 +657,10 @@ export async function canExit(args: string[]): Promise<void> {
         )
       }
     }
+    for (const advisory of advisories) {
+      // biome-ignore lint/suspicious/noConsole: intentional CLI output
+      console.log(`  ℹ️  ${advisory}`)
+    }
   }
 
   // Exit code 0 if can exit, 1 if not
diff --git a/src/commands/enter.test.ts b/src/commands/enter.test.ts
index 05b9f31..d1802b6 100644
--- a/src/commands/enter.test.ts
+++ b/src/commands/enter.test.ts
@@ -1,5 +1,8 @@
-import { describe, it, expect, beforeEach, afterEach } from 'bun:test'
-import { mkdirSync, rmSync, writeFileSync } from 'node:fs'
+import { describe, it, expect, beforeEach, afterEach, afterAll } from 'bun:test'
+
+afterAll(() => { process.exitCode = 0 })
+import { mkdirSync, rmSync, writeFileSync, readFileSync, existsSync } from 'node:fs'
+import { execSync } from 'node:child_process'
 import { join } from 'node:path'
 import * as os from 'node:os'
 
@@ -15,13 +18,15 @@ function makeTmpDir(): string {
 /**
  * Helper: capture console.log output from enter(), also suppressing stderr
  */
-async function captureEnter(args: string[]): Promise<{ stdout: string; stderr: string }> {
+async function captureEnter(args: string[]): Promise<{ stdout: string; stderr: string; exitCode: number | undefined }> {
   const { enter } = await import('./enter.js')
   let stdout = ''
   let stderr = ''
   const origLog = console.log
   const origError = console.error
   const origStderrWrite = process.stderr.write
+  const origExitCode = process.exitCode
+  process.exitCode = 0
   console.log = (...logArgs: unknown[]) => {
     stdout += logArgs.map(String).join(' ')
   }
@@ -39,7 +44,9 @@ async function captureEnter(args: string[]): Promise<{ stdout: string; stderr: s
     console.error = origError
     process.stderr.write = origStderrWrite
   }
-  return { stdout, stderr }
+  const exitCode = process.exitCode
+  process.exitCode = 0
+  return { stdout, stderr, exitCode }
 }
 
 describe('enter', () => {
@@ -49,13 +56,16 @@ describe('enter', () => {
 
   beforeEach(() => {
     tmpDir = makeTmpDir()
-    mkdirSync(join(tmpDir, '.claude', 'sessions'), { recursive: true })
-    mkdirSync(join(tmpDir, '.claude', 'workflows'), { recursive: true })
+    mkdirSync(join(tmpDir, '.kata', 'sessions'), { recursive: true })
     // Write kata.yaml so loadKataConfig() finds it (no longer reads wm.yaml/modes.yaml)
     // Include modes needed by tests (freeform, research, flow-deprecated)
     writeFileSync(
-      join(tmpDir, '.claude', 'workflows', 'kata.yaml'),
+      join(tmpDir, '.kata', 'kata.yaml'),
       [
+        'project:',
+        '  build_command: "echo build"',
+        '  test_command: "echo test"',
+        '  typecheck_command: "echo typecheck"',
         'spec_path: planning/specs',
         'research_path: planning/research',
         'modes:',
@@ -69,6 +79,9 @@ describe('enter', () => {
         '  implementation:',
         '    template: implementation.md',
         '    stop_conditions: [tasks_complete, committed]',
+        '  task:',
+        '    template: task.md',
+        '    stop_conditions: [tasks_complete, committed]',
         '  flow:',
         '    deprecated: true',
         '    redirect_to: freeform',
@@ -91,12 +104,12 @@ describe('enter', () => {
     } else {
       delete process.env.CLAUDE_SESSION_ID
     }
-    process.exitCode = undefined
+    process.exitCode = 0
   })
 
   it('prints usage when no mode is provided', async () => {
-    const { stderr } = await captureEnter([])
-    expect(process.exitCode).toBe(1)
+    const { stderr, exitCode } = await captureEnter([])
+    expect(exitCode).toBe(1)
     expect(stderr).toContain('Usage:')
   })
 
@@ -142,25 +155,25 @@ describe('enter', () => {
   })
 
   it('rejects unknown mode', async () => {
-    const { stderr } = await captureEnter([
+    const { stderr, exitCode } = await captureEnter([
       'totally-nonexistent-mode',
       '--skip-cleanup',
       `--session=${process.env.CLAUDE_SESSION_ID}`,
     ])
 
-    expect(process.exitCode).toBe(1)
+    expect(exitCode).toBe(1)
     expect(stderr).toContain('Unknown mode')
   })
 
   it('rejects deprecated mode', async () => {
     // 'flow' is deprecated with redirect_to: freeform
-    const { stderr } = await captureEnter([
+    const { stderr, exitCode } = await captureEnter([
       'flow',
       '--skip-cleanup',
       `--session=${process.env.CLAUDE_SESSION_ID}`,
     ])
 
-    expect(process.exitCode).toBe(1)
+    expect(exitCode).toBe(1)
     expect(stderr).toContain('deprecated')
   })
 
@@ -195,10 +208,12 @@ name: "Custom Template"
 phases:
   - id: p0
     name: "Step 1"
+    stage: setup
     task_config:
       title: "Do step 1"
   - id: p1
     name: "Step 2"
+    stage: work
     task_config:
       title: "Do step 2"
 ---
@@ -217,13 +232,13 @@ Instructions here.
 
     const result = JSON.parse(stdout) as {
       success: boolean
-      customTemplate: string
+      template: string
       phases: string[]
       dryRun: boolean
     }
 
     expect(result.success).toBe(true)
-    expect(result.customTemplate).toBe(templatePath)
+    expect(result.template).toBe(templatePath)
     expect(result.phases).toEqual(['p0', 'p1'])
     expect(result.dryRun).toBe(true)
   })
@@ -231,7 +246,7 @@ Instructions here.
   it('spec_path from kata.yaml is respected', async () => {
     // Write kata.yaml with custom spec_path, including the freeform mode needed by the test
     writeFileSync(
-      join(tmpDir, '.claude', 'workflows', 'kata.yaml'),
+      join(tmpDir, '.kata', 'kata.yaml'),
       [
         'spec_path: custom/specs',
         'research_path: planning/research',
@@ -320,4 +335,44 @@ Instructions here.
     // The dry-run stderr preview includes native task subjects with skill invocations
     expect(stderr).toContain('kata-setup')
   })
+
+  // Regression for porcelain-leading-space bug: a worktree-only modification
+  // emits " M path" (leading space = empty index status). captureBaseline used
+  // to `.trim()` the full status, eating that leading space and causing
+  // parseGitStatusPaths to return "ath" instead of "path".
+  it('baseline.json records correct path for worktree-only modifications', async () => {
+    // Build a real git repo inside tmpDir so kata enter's captureBaseline
+    // sees genuine porcelain output (with a leading-space status line).
+    // Note: captureBaseline runs execSync without an explicit cwd, so we
+    // chdir into tmpDir for the duration of this test.
+    const exec = (cmd: string) => execSync(cmd, { cwd: tmpDir, stdio: 'pipe', encoding: 'utf-8' })
+    exec('git init -q')
+    exec('git config user.email test@test')
+    exec('git config user.name test')
+    writeFileSync(join(tmpDir, 'README.md'), 'original\n')
+    exec('git add README.md')
+    exec('git -c commit.gpgsign=false commit -q -m init')
+    // Worktree-only modification — emits " M README.md" in porcelain.
+    writeFileSync(join(tmpDir, 'README.md'), 'modified\n')
+
+    const sessionId = process.env.CLAUDE_SESSION_ID!
+    const origCwd = process.cwd()
+    process.chdir(tmpDir)
+    try {
+      await captureEnter([
+        'task',
+        '--skip-cleanup',
+        `--session=${sessionId}`,
+      ])
+    } finally {
+      process.chdir(origCwd)
+    }
+
+    const baselinePath = join(tmpDir, '.kata', 'sessions', sessionId, 'baseline.json')
+    expect(existsSync(baselinePath)).toBe(true)
+    const baseline = JSON.parse(readFileSync(baselinePath, 'utf-8')) as { files: string[] }
+    // The key assertion: path is "README.md", NOT "EADME.md".
+    expect(baseline.files).toContain('README.md')
+    expect(baseline.files).not.toContain('EADME.md')
+  })
 })
diff --git a/src/commands/enter.ts b/src/commands/enter.ts
index 4b1ffe2..3e0df4e 100644
--- a/src/commands/enter.ts
+++ b/src/commands/enter.ts
@@ -1,4 +1,5 @@
 // kata enter - Enter a mode
+import { execSync } from 'node:child_process'
 import { existsSync, mkdirSync, readFileSync } from 'node:fs'
 import { resolve, dirname, join } from 'node:path'
 import jsYaml from 'js-yaml'
@@ -7,6 +8,7 @@ import {
   getStateFilePath,
   findProjectDir,
   getPackageRoot,
+  getSessionsDir,
 } from '../session/lookup.js'
 import { readState, stateExists } from '../state/reader.js'
 import { writeState } from '../state/writer.js'
@@ -123,6 +125,32 @@ import {
 } from './enter/task-factory.js'
 import { parseArgs, createDefaultState } from './enter/cli.js'
 import { createFdNotesFile, createDoctrineNotesFile } from './enter/notes.js'
+import { writeBaseline, parseGitStatusPaths } from '../tracking/edits-log.js'
+
+/**
+ * Capture baseline snapshot — record pre-existing dirty files so stop conditions
+ * can distinguish between files that were dirty before the session started and
+ * files the session actually modified.
+ */
+function captureBaseline(sessionId: string): void {
+  try {
+    const sessionDir = join(getSessionsDir(findProjectDir()), sessionId)
+    // Strip trailing newlines only — `.trim()` would eat the leading space
+    // of the first line's porcelain status (e.g. " M README.md"), corrupting
+    // path parsing which expects status at positions 0-1 and path at position 3+.
+    const status = execSync('git status --porcelain 2>/dev/null || true', {
+      encoding: 'utf-8',
+      stdio: ['pipe', 'pipe', 'pipe'],
+    }).replace(/\n+$/, '')
+    const baselineFiles = status
+      .split('\n')
+      .filter(l => l && !l.startsWith('??'))
+      .flatMap(parseGitStatusPaths)
+    writeBaseline(sessionDir, baselineFiles)
+  } catch {
+    // Baseline failure must not block mode entry
+  }
+}
 
 /**
  * Enter with a custom template (one-off session)
@@ -243,6 +271,8 @@ async function enterWithCustomTemplate(
   if (!parsed.dryRun) {
     await writeState(stateFile, finalState)
 
+    captureBaseline(finalState.sessionId!)
+
     // Create fd-notes.md for feature-documentation mode (interview context persistence)
     if (modeName === 'feature-documentation' || templatePath.includes('feature-documentation')) {
       const featureDocPath = (finalState as Record<string, unknown>).featureDocPath as
@@ -676,6 +706,8 @@ export async function enter(args: string[]): Promise<void> {
   // Skip state write in dry-run mode
   if (!parsed.dryRun) {
     await writeState(stateFile, finalState)
+
+    captureBaseline(finalState.sessionId!)
   }
 
   // Determine action taken (native tasks always recreate, so always 'started')
diff --git a/src/commands/hook.test.ts b/src/commands/hook.test.ts
index c52ccd3..fc01bf1 100644
--- a/src/commands/hook.test.ts
+++ b/src/commands/hook.test.ts
@@ -1,4 +1,6 @@
-import { describe, it, expect, beforeEach, afterEach } from 'bun:test'
+import { describe, it, expect, beforeEach, afterEach, afterAll } from 'bun:test'
+
+afterAll(() => { process.exitCode = 0 })
 import { existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from 'node:fs'
 import { join } from 'node:path'
 import * as os from 'node:os'
@@ -100,13 +102,14 @@ describe('hook dispatch', () => {
     } else {
       delete process.env.CLAUDE_PROJECT_DIR
     }
-    process.exitCode = undefined
+    process.exitCode = 0
   })
 
   it('unknown hook name sets exit code 1', async () => {
     const { hook } = await import('./hook.js')
     const stderr = await captureStderr(() => hook(['nonexistent-hook']))
     expect(process.exitCode).toBe(1)
+    process.exitCode = 0
     expect(stderr).toContain('Unknown hook')
   })
 
@@ -114,6 +117,7 @@ describe('hook dispatch', () => {
     const { hook } = await import('./hook.js')
     const stderr = await captureStderr(() => hook([]))
     expect(process.exitCode).toBe(1)
+    process.exitCode = 0
     expect(stderr).toContain('Usage: kata hook <name>')
   })
 })
diff --git a/src/commands/hook.ts b/src/commands/hook.ts
index 37f067f..61767f9 100644
--- a/src/commands/hook.ts
+++ b/src/commands/hook.ts
@@ -2,7 +2,7 @@
 // Core of hooks-as-commands architecture: each hook event has a handler function
 // that reads stdin JSON, performs the check, and outputs Claude Code hook JSON.
 import { execSync } from 'node:child_process'
-import { appendFileSync, existsSync, mkdirSync, readdirSync, readFileSync } from 'node:fs'
+import { appendFileSync, existsSync, mkdirSync, readdirSync, readFileSync, unlinkSync, writeFileSync } from 'node:fs'
 import { homedir } from 'node:os'
 import { join } from 'node:path'
 import { getStateFilePath, findProjectDir, getSessionsDir, resolveTemplatePath } from '../session/lookup.js'
@@ -13,6 +13,7 @@ import { isNativeTasksEnabled } from '../utils/tasks-check.js'
 import { resolvePlaceholders, type PlaceholderContext } from './enter/placeholder.js'
 import { parseTemplateYaml } from './enter/template.js'
 import type { Gate } from '../validation/schemas.js'
+import { toGitRelative, appendEdit, parseGitStatusPaths, readEditsSet } from '../tracking/edits-log.js'
 
 /**
  * Claude Code hook output format
@@ -405,11 +406,13 @@ export async function handleTaskEvidence(_input: Record<string, unknown>): Promi
     } catch {
       // No .claude/ found — fall back to hook runner's cwd
     }
+    // Strip trailing newlines only — consistent with other porcelain call sites
+    // so that the leading space of " M path" status lines is preserved.
     const gitStatus = execSync('git status --porcelain 2>/dev/null || true', {
       encoding: 'utf-8',
       stdio: ['pipe', 'pipe', 'pipe'],
       ...(cwd ? { cwd } : {}),
-    }).trim()
+    }).replace(/\n+$/, '')
 
     if (gitStatus) {
       // There are uncommitted changes — remind agent to commit before marking done
@@ -828,6 +831,34 @@ export async function handlePreToolUse(input: Record<string, unknown>): Promise<
     }
   }
 
+  // Bash pre-snapshot: capture git status before suspicious commands
+  if (toolName === 'Bash' && sessionId) {
+    const command = (toolInput.command as string) ?? ''
+    // Safe-list checked first — skip snapshot entirely
+    const safeList = /^(git\s|bun\s+test|ls\b|cat\b|echo\b[^>]*$|cd\b|pwd\b|which\b|head\b|tail\b|wc\b|diff\b|grep\b|find\b)/
+    if (!safeList.test(command)) {
+      // Suspicious regex checked second
+      const suspicious = /sed\s.*-i|>\s|>>\s|\btee\b|\bcp\b|\bmv\b|\brm\b|\bchmod\b|\bchown\b|\bpatch\b|\bcurl\b.*-o/
+      if (suspicious.test(command)) {
+        try {
+          const projectDir = findProjectDir()
+          const sessionDir = join(getSessionsDir(projectDir), sessionId)
+          // Strip trailing newlines only — `.trim()` would eat the leading space
+          // of the first porcelain line, corrupting diff parsing in PostToolUse.
+          const snapshot = execSync('git status --porcelain 2>/dev/null || true', {
+            encoding: 'utf-8',
+            stdio: ['pipe', 'pipe', 'pipe'],
+            cwd: projectDir,
+          }).replace(/\n+$/, '')
+          mkdirSync(sessionDir, { recursive: true })
+          writeFileSync(join(sessionDir, 'bash-pre-snapshot.txt'), snapshot)
+        } catch {
+          // Pre-snapshot failure must not block tool execution
+        }
+      }
+    }
+  }
+
   // 3. TaskUpdate(status: "completed") — run deps, gates, evidence in sequence
   if (toolName === 'TaskUpdate') {
     const taskId = (toolInput.taskId as string) ?? ''
@@ -907,20 +938,32 @@ export async function handlePreToolUse(input: Record<string, unknown>): Promise<
       // 3c. Check git evidence (advisory warning, always allow)
       let additionalContext = ''
       try {
-        let cwd: string | undefined
+        let projectDir: string | undefined
         try {
-          cwd = findProjectDir()
+          projectDir = findProjectDir()
         } catch {
           // No .kata/ found
         }
+        // Strip trailing newlines only — `.trim()` would eat the leading space
+        // of the first porcelain line (e.g. " M file.ts"), corrupting parseGitStatusPaths.
         const gitStatus = execSync('git status --porcelain 2>/dev/null || true', {
           encoding: 'utf-8',
           stdio: ['pipe', 'pipe', 'pipe'],
-          ...(cwd ? { cwd } : {}),
-        }).trim()
+          ...(projectDir ? { cwd: projectDir } : {}),
+        }).replace(/\n+$/, '')
 
         if (gitStatus) {
-          const changedFiles = gitStatus.split('\n').filter((l) => !l.startsWith('??'))
+          const evidenceSessionDir = sessionId ? join(getSessionsDir(projectDir ?? process.cwd()), sessionId) : undefined
+          const sessionEdits = evidenceSessionDir ? readEditsSet(evidenceSessionDir) : null
+
+          const changedFiles = gitStatus.split('\n').filter((l) => {
+            if (l.startsWith('??')) return false
+            if (sessionEdits) {
+              const paths = parseGitStatusPaths(l)
+              return paths.some(p => sessionEdits.has(p))
+            }
+            return true
+          })
           if (changedFiles.length > 0) {
             additionalContext =
               `⚠️ You have ${changedFiles.length} uncommitted change(s). ` +
@@ -953,12 +996,73 @@ export async function handlePreToolUse(input: Record<string, unknown>): Promise<
   })
 }
 
+// ── Handler: post-tool-use ──
+// Tracks files modified by Edit, Write, NotebookEdit, and Bash tools
+export async function handlePostToolUse(input: Record<string, unknown>): Promise<void> {
+  const sessionId = input.session_id as string | undefined
+  if (!sessionId) return
+
+  try {
+    const projectDir = findProjectDir()
+    const sessionDir = join(getSessionsDir(projectDir), sessionId)
+
+    // Guard: only track if session exists
+    if (!existsSync(join(sessionDir, 'state.json'))) return
+
+    const toolName = (input.tool_name as string) ?? ''
+    const toolInput = (input.tool_input as Record<string, unknown>) ?? {}
+
+    if (toolName === 'Edit' || toolName === 'Write' || toolName === 'NotebookEdit') {
+      const filePath = toolInput.file_path as string | undefined
+      if (filePath) {
+        const gitRelative = toGitRelative(filePath)
+        appendEdit(sessionDir, { file: gitRelative, tool: toolName, ts: new Date().toISOString() })
+      }
+    } else if (toolName === 'Bash') {
+      // Compare post-execution git status against pre-snapshot
+      const snapshotPath = join(sessionDir, 'bash-pre-snapshot.txt')
+      if (existsSync(snapshotPath)) {
+        try {
+          // Strip trailing newlines only — `.trim()` would eat the leading space
+          // of the first porcelain line, corrupting parseGitStatusPaths.
+          const preSnapshot = readFileSync(snapshotPath, 'utf-8').replace(/\n+$/, '')
+          const postSnapshot = execSync('git status --porcelain 2>/dev/null || true', {
+            encoding: 'utf-8',
+            stdio: ['pipe', 'pipe', 'pipe'],
+            cwd: projectDir,
+          }).replace(/\n+$/, '')
+
+          // Find new dirty files
+          const preFiles = new Set(preSnapshot.split('\n').filter(Boolean).flatMap(parseGitStatusPaths))
+          const postLines = postSnapshot.split('\n').filter(Boolean)
+          for (const line of postLines) {
+            const paths = parseGitStatusPaths(line)
+            for (const p of paths) {
+              if (!preFiles.has(p)) {
+                appendEdit(sessionDir, { file: p, tool: 'Bash', ts: new Date().toISOString() })
+              }
+            }
+          }
+
+          // Clean up snapshot file
+          try { unlinkSync(snapshotPath) } catch { /* ignore */ }
+        } catch {
+          // Diff failure — silently ignore
+        }
+      }
+    }
+  } catch {
+    // PostToolUse must never fail — silent no-op
+  }
+}
+
 // ── Hook name -> handler map ──
 const hookHandlers: Record<string, (input: Record<string, unknown>) => Promise<void>> = {
   'session-start': handleSessionStart,
   'user-prompt': handleUserPrompt,
   'pre-tool-use': handlePreToolUse,
   'stop-conditions': handleStopConditions,
+  'post-tool-use': handlePostToolUse,
   // Backwards-compat aliases for transition period
   'mode-gate': handlePreToolUse,
   'task-deps': handlePreToolUse,
diff --git a/src/commands/setup.ts b/src/commands/setup.ts
index a9327b6..2029c41 100644
--- a/src/commands/setup.ts
+++ b/src/commands/setup.ts
@@ -149,6 +149,17 @@ export function buildHookEntries(wmBin: string): Record<string, HookEntry[]> {
         ],
       },
     ],
+    // PostToolUse: track file mutations for session-scoped stop conditions
+    PostToolUse: [
+      {
+        hooks: [
+          {
+            type: 'command',
+            command: `${bin} hook post-tool-use`,
+          },
+        ],
+      },
+    ],
   }
 
   return hooks
@@ -203,7 +214,7 @@ export function mergeHooksIntoSettings(
     // Tolerates both bare `kata hook …` and quoted `"/path/kata" hook …` forms while
     // avoiding false positives from unrelated tools like lefthook or husky.
     const wmHookPattern =
-      /\bhook (session-start|user-prompt|stop-conditions|mode-gate|task-deps|task-evidence|pre-tool-use)\b/
+      /\bhook (session-start|user-prompt|stop-conditions|mode-gate|task-deps|task-evidence|pre-tool-use|post-tool-use)\b/
     const nonWmEntries = existing.filter((entry) => {
       return !entry.hooks?.some(
         (h) => typeof h.command === 'string' && wmHookPattern.test(h.command),
diff --git a/src/commands/suggest.test.ts b/src/commands/suggest.test.ts
index 668094f..aec94b1 100644
--- a/src/commands/suggest.test.ts
+++ b/src/commands/suggest.test.ts
@@ -36,12 +36,11 @@ describe('suggest', () => {
 
   beforeEach(() => {
     tmpDir = makeTmpDir()
-    mkdirSync(join(tmpDir, '.claude', 'sessions'), { recursive: true })
-    mkdirSync(join(tmpDir, '.claude', 'workflows'), { recursive: true })
+    mkdirSync(join(tmpDir, '.kata', 'sessions'), { recursive: true })
     // Write kata.yaml so loadKataConfig() finds it (no longer reads wm.yaml/modes.yaml)
     // Include modes with intent_keywords so mode detection tests work
     writeFileSync(
-      join(tmpDir, '.claude', 'workflows', 'kata.yaml'),
+      join(tmpDir, '.kata', 'kata.yaml'),
       [
         'spec_path: planning/specs',
         'research_path: planning/research',
@@ -123,7 +122,7 @@ describe('suggest', () => {
 
   it('uses research_path from KataConfig for search commands', async () => {
     // Write custom kata.yaml with custom research_path
-    const kataYamlPath = join(tmpDir, '.claude', 'workflows', 'kata.yaml')
+    const kataYamlPath = join(tmpDir, '.kata', 'kata.yaml')
     writeFileSync(kataYamlPath, 'research_path: custom/research\nspec_path: custom/specs\n')
 
     const output = await captureSuggest(['find', 'research', 'about', 'api'])
diff --git a/src/commands/teardown.test.ts b/src/commands/teardown.test.ts
index d922721..e1058b8 100644
--- a/src/commands/teardown.test.ts
+++ b/src/commands/teardown.test.ts
@@ -47,18 +47,18 @@ describe('teardown', () => {
     } else {
       delete process.env.CLAUDE_PROJECT_DIR
     }
-    process.exitCode = undefined
+    process.exitCode = 0
   })
 
   /**
    * Create a fully configured kata project at tmpDir
    */
   function createWmProject(): void {
-    mkdirSync(join(tmpDir, '.claude', 'sessions', 'some-session'), { recursive: true })
-    mkdirSync(join(tmpDir, '.claude', 'workflows'), { recursive: true })
+    mkdirSync(join(tmpDir, '.kata', 'sessions', 'some-session'), { recursive: true })
+    mkdirSync(join(tmpDir, '.claude'), { recursive: true })
 
     // Write kata.yaml (teardown deletes kata.yaml, not wm.yaml)
-    writeFileSync(join(tmpDir, '.claude', 'workflows', 'kata.yaml'), 'spec_path: planning/specs\n')
+    writeFileSync(join(tmpDir, '.kata', 'kata.yaml'), 'spec_path: planning/specs\n')
 
     // Write settings.json with kata hooks and a non-kata hook
     writeFileSync(
@@ -158,7 +158,7 @@ describe('teardown', () => {
 
   it('deletes kata.yaml', async () => {
     createWmProject()
-    const kataYamlPath = join(tmpDir, '.claude', 'workflows', 'kata.yaml')
+    const kataYamlPath = join(tmpDir, '.kata', 'kata.yaml')
     expect(existsSync(kataYamlPath)).toBe(true)
 
     await captureTeardown(['--yes'], tmpDir)
@@ -168,7 +168,7 @@ describe('teardown', () => {
 
   it('preserves sessions/', async () => {
     createWmProject()
-    const sessionsDir = join(tmpDir, '.claude', 'sessions')
+    const sessionsDir = join(tmpDir, '.kata', 'sessions')
     expect(existsSync(sessionsDir)).toBe(true)
 
     await captureTeardown(['--yes'], tmpDir)
@@ -199,7 +199,7 @@ describe('teardown', () => {
 
   it('dry-run shows planned actions without making changes', async () => {
     createWmProject()
-    const kataYamlPath = join(tmpDir, '.claude', 'workflows', 'kata.yaml')
+    const kataYamlPath = join(tmpDir, '.kata', 'kata.yaml')
 
     const output = await captureTeardown(['--yes', '--dry-run'], tmpDir)
     expect(output).toContain('[DRY RUN]')
@@ -215,9 +215,10 @@ describe('teardown', () => {
     const output = await captureTeardown([], tmpDir)
     expect(output).toContain('--yes to confirm')
     expect(process.exitCode).toBe(1)
+    process.exitCode = 0
 
     // Files should still exist
-    const kataYamlPath = join(tmpDir, '.claude', 'workflows', 'kata.yaml')
+    const kataYamlPath = join(tmpDir, '.kata', 'kata.yaml')
     expect(existsSync(kataYamlPath)).toBe(true)
   })
 })
diff --git a/src/session/lookup.test.ts b/src/session/lookup.test.ts
index 2fc20eb..ac1ffa8 100644
--- a/src/session/lookup.test.ts
+++ b/src/session/lookup.test.ts
@@ -30,18 +30,18 @@ describe('resolveTemplatePath', () => {
   it('resolves project-level template first', () => {
     const tmpDir = makeTmpDir('proj-tmpl')
     tmpDirs.push(tmpDir)
-    mkdirSync(join(tmpDir, '.claude', 'workflows', 'templates'), { recursive: true })
-    writeFileSync(join(tmpDir, '.claude', 'workflows', 'templates', 'task.md'), '# project task')
+    mkdirSync(join(tmpDir, '.kata', 'templates'), { recursive: true })
+    writeFileSync(join(tmpDir, '.kata', 'templates', 'task.md'), '# project task')
     process.env.CLAUDE_PROJECT_DIR = tmpDir
 
     const result = resolveTemplatePath('task.md')
-    expect(result).toBe(join(tmpDir, '.claude', 'workflows', 'templates', 'task.md'))
+    expect(result).toBe(join(tmpDir, '.kata', 'templates', 'task.md'))
   })
 
   it('falls back to package batteries template', () => {
     const tmpDir = makeTmpDir('pkg-fallback')
     tmpDirs.push(tmpDir)
-    mkdirSync(join(tmpDir, '.claude', 'sessions'), { recursive: true })
+    mkdirSync(join(tmpDir, '.kata'), { recursive: true })
     process.env.CLAUDE_PROJECT_DIR = tmpDir
 
     // task.md exists in batteries/templates/ (package level)
@@ -52,7 +52,7 @@ describe('resolveTemplatePath', () => {
   it('throws when template not found at any tier', () => {
     const tmpDir = makeTmpDir('not-found')
     tmpDirs.push(tmpDir)
-    mkdirSync(join(tmpDir, '.claude', 'sessions'), { recursive: true })
+    mkdirSync(join(tmpDir, '.kata'), { recursive: true })
     process.env.CLAUDE_PROJECT_DIR = tmpDir
 
     expect(() => resolveTemplatePath('does-not-exist.md')).toThrow('Template not found')
@@ -86,7 +86,7 @@ describe('resolveSpecTemplatePath', () => {
   it('resolves project-level spec template first', () => {
     const tmpDir = makeTmpDir('proj-spec')
     tmpDirs.push(tmpDir)
-    mkdirSync(join(tmpDir, '.claude', 'sessions'), { recursive: true })
+    mkdirSync(join(tmpDir, '.kata'), { recursive: true })
     mkdirSync(join(tmpDir, 'planning', 'spec-templates'), { recursive: true })
     writeFileSync(join(tmpDir, 'planning', 'spec-templates', 'feature.md'), '# project feature')
     process.env.CLAUDE_PROJECT_DIR = tmpDir
@@ -95,21 +95,20 @@ describe('resolveSpecTemplatePath', () => {
     expect(result).toBe(join(tmpDir, 'planning', 'spec-templates', 'feature.md'))
   })
 
-  it('falls back to package batteries spec template', () => {
+  it('throws when spec template not found in project (no batteries fallback)', () => {
     const tmpDir = makeTmpDir('pkg-spec')
     tmpDirs.push(tmpDir)
-    mkdirSync(join(tmpDir, '.claude', 'sessions'), { recursive: true })
+    mkdirSync(join(tmpDir, '.kata'), { recursive: true })
     process.env.CLAUDE_PROJECT_DIR = tmpDir
 
-    // feature.md exists in batteries/spec-templates/
-    const result = resolveSpecTemplatePath('feature.md')
-    expect(result).toMatch(/batteries\/spec-templates\/feature\.md$/)
+    // resolveSpecTemplatePath only checks project planning/spec-templates/ — no batteries fallback
+    expect(() => resolveSpecTemplatePath('feature.md')).toThrow('Spec template not found')
   })
 
   it('throws when spec template not found at any tier', () => {
     const tmpDir = makeTmpDir('spec-not-found')
     tmpDirs.push(tmpDir)
-    mkdirSync(join(tmpDir, '.claude', 'sessions'), { recursive: true })
+    mkdirSync(join(tmpDir, '.kata'), { recursive: true })
     process.env.CLAUDE_PROJECT_DIR = tmpDir
 
     expect(() => resolveSpecTemplatePath('nonexistent.md')).toThrow('Spec template not found')
@@ -213,7 +212,7 @@ describe('getStateFilePath — layout-shift resilience', () => {
 
   beforeEach(() => {
     tmpDir = makeTmpDir('state-path')
-    mkdirSync(join(tmpDir, '.claude', 'sessions'), { recursive: true })
+    mkdirSync(join(tmpDir, '.kata', 'sessions'), { recursive: true })
     process.env.CLAUDE_PROJECT_DIR = tmpDir
   })
 
@@ -226,18 +225,11 @@ describe('getStateFilePath — layout-shift resilience', () => {
     }
   })
 
-  it('returns .claude/ path when state.json exists only there despite .kata/ existing', async () => {
+  it('returns .kata/ path for session state', async () => {
     const sessionId = '12345678-1234-4234-8234-123456789abc'
-    mkdirSync(join(tmpDir, '.claude', 'sessions', sessionId), { recursive: true })
-    writeFileSync(
-      join(tmpDir, '.claude', 'sessions', sessionId, 'state.json'),
-      JSON.stringify({ updatedAt: new Date().toISOString() }),
-    )
-    // .kata/ exists but has no sessions
-    mkdirSync(join(tmpDir, '.kata', 'sessions'), { recursive: true })
 
     const result = await getStateFilePath(sessionId)
-    expect(result).toBe(join(tmpDir, '.claude', 'sessions', sessionId, 'state.json'))
+    expect(result).toBe(join(tmpDir, '.kata', 'sessions', sessionId, 'state.json'))
   })
 })
 
diff --git a/src/testing/integration.test.ts b/src/testing/integration.test.ts
index 48b36e7..9f5c5c8 100644
--- a/src/testing/integration.test.ts
+++ b/src/testing/integration.test.ts
@@ -94,7 +94,7 @@ describe('integration: full hook dispatch simulation', () => {
     } else {
       delete process.env.CLAUDE_SESSION_ID
     }
-    process.exitCode = undefined
+    process.exitCode = 0
   })
 
   it('session-start -> user-prompt -> stop-conditions lifecycle', async () => {
diff --git a/src/tracking/edits-log.test.ts b/src/tracking/edits-log.test.ts
new file mode 100644
index 0000000..261b432
--- /dev/null
+++ b/src/tracking/edits-log.test.ts
@@ -0,0 +1,142 @@
+import { describe, it, expect, beforeEach, afterEach } from 'bun:test'
+import { mkdirSync, rmSync, readFileSync, writeFileSync, existsSync, chmodSync } from 'node:fs'
+import { join } from 'node:path'
+import * as os from 'node:os'
+
+import {
+  appendEdit,
+  readEditsSet,
+  writeBaseline,
+  readBaseline,
+  parseGitStatusPaths,
+  toGitRelative,
+} from './edits-log.js'
+
+let tmpDir: string
+
+beforeEach(() => {
+  tmpDir = join(os.tmpdir(), `edits-log-test-${Date.now()}-${Math.random().toString(36).slice(2)}`)
+  mkdirSync(tmpDir, { recursive: true })
+})
+
+afterEach(() => {
+  rmSync(tmpDir, { recursive: true, force: true })
+})
+
+describe('parseGitStatusPaths', () => {
+  it('parses modified file', () => {
+    expect(parseGitStatusPaths('M  foo.ts')).toEqual(['foo.ts'])
+  })
+
+  it('parses added file', () => {
+    expect(parseGitStatusPaths('A  bar.ts')).toEqual(['bar.ts'])
+  })
+
+  it('skips untracked files', () => {
+    expect(parseGitStatusPaths('?? untracked.ts')).toEqual([])
+  })
+
+  it('parses rename producing both paths', () => {
+    expect(parseGitStatusPaths('R  old.ts -> new.ts')).toEqual(['old.ts', 'new.ts'])
+  })
+
+  // Regression: worktree-only modifications emit " M path" (leading space = empty index status).
+  // Callers that stripped the git output with .trim() used to corrupt the first character
+  // of the first dirty file. parseGitStatusPaths itself handles the line correctly;
+  // this test guards the callers' expected input shape.
+  it('parses worktree-only modification (leading space)', () => {
+    expect(parseGitStatusPaths(' M README.md')).toEqual(['README.md'])
+  })
+
+  it('parses worktree-only deletion (leading space)', () => {
+    expect(parseGitStatusPaths(' D gone.ts')).toEqual(['gone.ts'])
+  })
+})
+
+describe('appendEdit + readEditsSet', () => {
+  it('appends one edit and reads it back', () => {
+    const ts = new Date().toISOString()
+    appendEdit(tmpDir, { file: 'src/index.ts', tool: 'Edit', ts })
+    const result = readEditsSet(tmpDir)
+    expect(result.has('src/index.ts')).toBe(true)
+    expect(result.size).toBe(1)
+  })
+
+  it('deduplicates the same file appended twice', () => {
+    const ts = new Date().toISOString()
+    appendEdit(tmpDir, { file: 'src/index.ts', tool: 'Edit', ts })
+    appendEdit(tmpDir, { file: 'src/index.ts', tool: 'Write', ts })
+    const result = readEditsSet(tmpDir)
+    expect(result.has('src/index.ts')).toBe(true)
+    expect(result.size).toBe(1)
+  })
+
+  it('returns empty Set for non-existent dir', () => {
+    const result = readEditsSet(join(tmpDir, 'nonexistent'))
+    expect(result.size).toBe(0)
+  })
+
+  it('persists multiple rapid sequential appends', () => {
+    const ts = new Date().toISOString()
+    for (let i = 0; i < 5; i++) {
+      appendEdit(tmpDir, { file: `file-${i}.ts`, tool: 'Edit', ts })
+    }
+    const result = readEditsSet(tmpDir)
+    expect(result.size).toBe(5)
+    for (let i = 0; i < 5; i++) {
+      expect(result.has(`file-${i}.ts`)).toBe(true)
+    }
+  })
+})
+
+describe('readEditsSet corrupt line resilience', () => {
+  it('skips corrupt lines and returns valid entries', () => {
+    const editsPath = join(tmpDir, 'edits.jsonl')
+    const lines = [
+      JSON.stringify({ file: 'a.ts', tool: 'Edit', ts: '2026-01-01T00:00:00Z' }),
+      'this is not valid json {{{',
+      JSON.stringify({ file: 'b.ts', tool: 'Write', ts: '2026-01-01T00:00:01Z' }),
+    ]
+    writeFileSync(editsPath, lines.join('\n') + '\n')
+    const result = readEditsSet(tmpDir)
+    expect(result.size).toBe(2)
+    expect(result.has('a.ts')).toBe(true)
+    expect(result.has('b.ts')).toBe(true)
+  })
+})
+
+describe('writeBaseline + readBaseline', () => {
+  it('writes and reads back baseline files as Set', () => {
+    const files = ['src/a.ts', 'src/b.ts', 'src/c.ts']
+    writeBaseline(tmpDir, files)
+    const result = readBaseline(tmpDir)
+    expect(result.size).toBe(3)
+    for (const f of files) {
+      expect(result.has(f)).toBe(true)
+    }
+  })
+
+  it('returns empty Set for non-existent dir', () => {
+    const result = readBaseline(join(tmpDir, 'nonexistent'))
+    expect(result.size).toBe(0)
+  })
+})
+
+describe('appendEdit silent failure', () => {
+  it('does not throw when writing to an invalid path', () => {
+    // /dev/null/impossible is not a valid directory
+    expect(() => {
+      appendEdit('/dev/null/impossible/path', { file: 'x.ts', tool: 'Edit', ts: new Date().toISOString() })
+    }).not.toThrow()
+  })
+})
+
+describe('toGitRelative', () => {
+  it('converts absolute path under git root to relative', () => {
+    // Use the actual project root for this test
+    const projectRoot = '/data/projects/kata-wm'
+    const abs = join(projectRoot, 'src', 'tracking', 'edits-log.ts')
+    const rel = toGitRelative(abs)
+    expect(rel).toBe('src/tracking/edits-log.ts')
+  })
+})
diff --git a/src/tracking/edits-log.ts b/src/tracking/edits-log.ts
new file mode 100644
index 0000000..e042984
--- /dev/null
+++ b/src/tracking/edits-log.ts
@@ -0,0 +1,98 @@
+import { appendFileSync, existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs'
+import { execSync } from 'node:child_process'
+import { join, relative, resolve } from 'node:path'
+
+let cachedGitRoot: string | undefined
+
+/**
+ * Normalize an absolute path to git-root-relative.
+ * Caches the git root to avoid repeated shell-outs.
+ */
+export function toGitRelative(absolutePath: string): string {
+  if (!cachedGitRoot) {
+    cachedGitRoot = execSync('git rev-parse --show-toplevel', { encoding: 'utf-8' }).trim()
+  }
+  return relative(cachedGitRoot, resolve(absolutePath))
+}
+
+/**
+ * Parse a `git status --porcelain` line and extract file path(s).
+ * Returns 1 path normally, or 2 paths for renames (status R, split on ` -> `).
+ * Skips untracked lines (??).
+ */
+export function parseGitStatusPaths(line: string): string[] {
+  const status = line.slice(0, 2)
+  if (status === '??') return []
+  const pathPart = line.slice(3)
+  if (status.includes('R')) {
+    return pathPart.split(' -> ')
+  }
+  return [pathPart]
+}
+
+/**
+ * Append a JSON line to {sessionDir}/edits.jsonl.
+ * Tracking failure must NEVER throw.
+ */
+export function appendEdit(sessionDir: string, entry: { file: string; tool: string; ts: string }): void {
+  try {
+    mkdirSync(sessionDir, { recursive: true })
+    appendFileSync(join(sessionDir, 'edits.jsonl'), JSON.stringify(entry) + '\n')
+  } catch {
+    // Silently ignore — tracking failure must never throw
+  }
+}
+
+/**
+ * Read {sessionDir}/edits.jsonl, parse each line as JSON,
+ * extract .file, return a Set<string> of unique file paths.
+ * Handles missing file and corrupt lines gracefully.
+ */
+export function readEditsSet(sessionDir: string): Set<string> {
+  const result = new Set<string>()
+  try {
+    const filePath = join(sessionDir, 'edits.jsonl')
+    if (!existsSync(filePath)) return result
+    const content = readFileSync(filePath, 'utf-8')
+    for (const line of content.split('\n')) {
+      if (!line.trim()) continue
+      try {
+        const parsed = JSON.parse(line)
+        if (parsed.file) result.add(parsed.file)
+      } catch {
+        // Skip corrupt line
+      }
+    }
+  } catch {
+    // Return whatever we have so far
+  }
+  return result
+}
+
+/**
+ * Write {sessionDir}/baseline.json as {"files": [...], "ts": "ISO"}.
+ */
+export function writeBaseline(sessionDir: string, files: string[]): void {
+  try {
+    mkdirSync(sessionDir, { recursive: true })
+    writeFileSync(join(sessionDir, 'baseline.json'), JSON.stringify({ files, ts: new Date().toISOString() }))
+  } catch {
+    // Silently ignore
+  }
+}
+
+/**
+ * Read {sessionDir}/baseline.json, parse JSON, return Set<string> from the files array.
+ * Handles missing/corrupt file by returning empty Set.
+ */
+export function readBaseline(sessionDir: string): Set<string> {
+  try {
+    const filePath = join(sessionDir, 'baseline.json')
+    if (!existsSync(filePath)) return new Set()
+    const content = readFileSync(filePath, 'utf-8')
+    const parsed = JSON.parse(content)
+    return new Set<string>(parsed.files ?? [])
+  } catch {
+    return new Set()
+  }
+}
diff --git a/src/validation/schemas.test.ts b/src/validation/schemas.test.ts
index 487bb0b..782421b 100644
--- a/src/validation/schemas.test.ts
+++ b/src/validation/schemas.test.ts
@@ -255,11 +255,16 @@ describe('stage field on phaseSchema', () => {
     expect(result.success).toBe(false)
   })
 
-  it('rejects expansion on non-work phase', () => {
-    const result = phaseSchema.safeParse({ id: 'p0', name: 'Test', stage: 'setup', expansion: 'agent' })
+  it('rejects spec expansion on non-work phase', () => {
+    const result = phaseSchema.safeParse({ id: 'p0', name: 'Test', stage: 'setup', expansion: 'spec', subphase_pattern: [] })
     expect(result.success).toBe(false)
   })
 
+  it('allows agent expansion on setup phase', () => {
+    const result = phaseSchema.safeParse({ id: 'p0', name: 'Test', stage: 'setup', expansion: 'agent' })
+    expect(result.success).toBe(true)
+  })
+
   it('accepts expansion on work phase', () => {
     const result = phaseSchema.safeParse({ id: 'p1', name: 'Work', stage: 'work', expansion: 'spec', subphase_pattern: [] })
     expect(result.success).toBe(true)
diff --git a/src/validation/schemas.ts b/src/validation/schemas.ts
index a4a2c56..990d7e5 100644
--- a/src/validation/schemas.ts
+++ b/src/validation/schemas.ts
@@ -127,6 +127,7 @@ export const subphasePatternSchema = z.object({
   agent: agentStepConfigSchema.optional(),
   gate: gateSchema.optional(),
   hints: z.array(hintSchema).optional(),
+  skill: z.string().optional(),
 })
 
 // ── Agent protocol schema (for expansion: 'agent' phases) ──
@@ -151,8 +152,8 @@ export const phaseSchema = z.object({
   steps: z.array(phaseStepSchema).optional(), // Individual trackable units within phase (e.g., interview rounds)
   subphase_pattern: z.array(subphasePatternSchema).optional(), // Inline array only (string references removed)
 }).refine(
-  (p) => !p.expansion || p.stage === 'work',
-  { message: 'expansion is only allowed on work-stage phases' }
+  (p) => p.expansion !== 'spec' || p.stage === 'work',
+  { message: 'expansion: spec is only allowed on work-stage phases' }
 )
 
 /**