codevibesmatter · codevibesmatter · Apr 17, 2026 · Apr 17, 2026 · Apr 17, 2026 · Apr 17, 2026
diff --git a/.claude/settings.json b/.claude/settings.json
@@ -36,28 +36,18 @@
         "hooks": [
           {
             "type": "command",
-            "command": "\"/home/ubuntu/.local/bin/kata\" hook mode-gate",
-            "timeout": 10
-          }
-        ]
-      },
-      {
-        "matcher": "TaskUpdate",
-        "hooks": [
-          {
-            "type": "command",
-            "command": "\"/home/ubuntu/.local/bin/kata\" hook task-deps",
-            "timeout": 10
+            "command": "\"/home/ubuntu/.local/bin/kata\" hook pre-tool-use",
+            "timeout": 30
           }
         ]
-      },
+      }
+    ],
+    "PostToolUse": [
       {
-        "matcher": "TaskUpdate",
         "hooks": [
           {
             "type": "command",
-            "command": "\"/home/ubuntu/.local/bin/kata\" hook task-evidence",
-            "timeout": 10
+            "command": "\"/home/ubuntu/.local/bin/kata\" hook post-tool-use"
           }
         ]
       }

diff --git a/.gitignore b/.gitignore
@@ -7,3 +7,6 @@ eval-projects/
 .geminiignore
 .kata/sessions/
 .kata/batteries-backup/
+.kata/verification-evidence/
+.claude/sessions/
+eval-transcripts/
diff --git a/batteries/templates/stop-hook-test.md b/batteries/templates/stop-hook-test.md
@@ -8,6 +8,7 @@ workflow_prefix: "SH"
 phases:
   - id: p0
     name: Write
+    stage: work
     task_config:
       title: "P0: Write a trivial file"
       labels: [phase, phase-0]
@@ -36,6 +37,7 @@ phases:
 
   - id: p1
     name: Commit
+    stage: work
     task_config:
       title: "P1: Commit the file"
       labels: [phase, phase-1]
@@ -59,6 +61,7 @@ phases:
 
   - id: p2
     name: Push
+    stage: work
     task_config:
       title: "P2: Push to remote"
       labels: [phase, phase-2]
@@ -80,6 +83,7 @@ phases:
 
   - id: p3
     name: Cleanup
+    stage: close
     task_config:
       title: "P3: Revert and clean up"
       labels: [phase, phase-3]

diff --git a/planning/research/2026-03-30-dynamic-task-creation-generalization.md b/planning/research/2026-03-30-dynamic-task-creation-generalization.md
@@ -0,0 +1,189 @@
+---
+date: 2026-03-30
+topic: Generalizing dynamic task creation beyond verify mode
+status: complete
+---
+
+# Research: Dynamic Task Creation Generalization
+
+## Questions Explored
+- How does verify mode's dynamic task creation work?
+- Which other modes would benefit from the same pattern?
+- What changes are needed to generalize it?
+
+## Current State
+
+### Task creation mechanisms today
+
+| Mode | Task Creation | Method |
+|------|--------------|--------|
+| planning | 16 static tasks | Template phases → `buildPhaseTasks()` at enter time |
+| implementation | Static + spec-driven | Template + `buildSpecTasks()` with subphase patterns at enter time |
+| task | 6 static tasks | Template phases → `buildPhaseTasks()` at enter time |
+| verify | Static + **dynamic** | Template + `TaskCreate` at runtime in container phase |
+| research | **None** | Has phases/steps but no tasks created |
+| debug | **None** | Has phases/steps but no tasks created |
+| freeform | **None** | No phases at all |
+
+### Verify mode's pattern (the one that works)
+
+Verify mode uses a discover-then-expand pattern:
+
+```
+P0: Setup (static tasks)
+    → Discovers VP steps from spec, plan file, or git diff
+
+P1: Execute (container: true)
+    → expand-vp-steps calls TaskCreate per discovered VP step
+    → Each VP step becomes a trackable, completable task
+
+P2+: Operate on those dynamic tasks (fix loop, evidence)
+```
+
+Key design elements:
+- P1 is marked `container: true` in the template YAML
+- The template instruction explicitly tells the agent to call `TaskCreate`
+- A special exception overrides the "no TaskCreate" rule for verify mode only
+- Tasks are created ALL at once before execution begins
+- Each task is independently trackable (pass/fail per VP step)
+
+### The current gate
+
+Verify has a hardcoded exception:
+
+> "Verify mode is the **only mode** that uses `TaskCreate`. This overrides the standard `task_rules`..."
+
+This is the only thing preventing other modes from using the same pattern.
+
+## Key Finding: The Pattern is Template-Driven, Not Mode-Driven
+
+Verify's dynamic task creation isn't special infrastructure — it's just a template instruction that says "call `TaskCreate` here." The `container: true` phase marker already exists in the schema. The only blocker is the policy gate that restricts `TaskCreate` to verify mode.
+
+**Proposed change:** Make the `TaskCreate` exception phase-driven rather than mode-driven. Any phase with `container: true` allows `TaskCreate` within that phase.
+
+## Candidate Modes for Dynamic Task Creation
+
+### Planning Mode — strongest candidate
+
+**Current problem:** P2 (Spec Writing) has 3 static tasks regardless of feature complexity. A simple config change and a complex multi-service feature get the same task structure.
+
+**Dynamic pattern:**
+```
+P0: Research (static — 2 tasks)
+P1: Interview (static — 5 tasks)
+    → Discovers: behaviors, integration points, test scenarios
+
+P2: Spec Writing (container: true)
+    → After P1 requirements approval, expand per behavior:
+      - "Write B1: auth flow"
+      - "Write B2: token refresh"
+      - "Write B3: session management"
+    → Each behavior section independently trackable
+
+P3: Review Gate (static — 3 tasks)
+P4: Finalize (static — 3 tasks)
+```
+
+**Benefits:**
+- Progress tracking per behavior (not just "spec writing in progress")
+- Natural parallelism — behaviors can be written by parallel agents
+- Review can reference specific behavior tasks
+- Scales with feature complexity (2 behaviors = 2 tasks, 10 = 10)
+
+**Trade-off:** Currently a single agent writes the whole spec in one shot, which preserves cross-behavior coherence. Per-behavior tasks would need a "coherence pass" afterward, or a shared context doc that each behavior writer reads.
+
+### Debug Mode — strong candidate
+
+**Current problem:** No tasks at all. Progress is invisible.
+
+**Dynamic pattern:**
+```
+P0: Reproduce (static — 2 tasks)
+    → Discovers: symptoms, affected code paths
+
+P1: Hypotheses (container: true)
+    → After reproduction, expand per hypothesis:
+      - "H1: Race condition in session cleanup"
+      - "H2: Stale cache after config reload"
+      - "H3: Off-by-one in pagination"
+    → Each hypothesis independently testable/dismissable
+
+P2: Fix (static — depends on which hypothesis confirmed)
+P3: Verify fix (static — 2 tasks)
+```
+
+**Benefits:**
+- Hypotheses are tracked (tested/confirmed/dismissed)
+- Stop conditions can check "at least one hypothesis confirmed"
+- Natural debugging workflow — you don't know the hypotheses upfront
+
+### Research Mode — moderate candidate
+
+**Dynamic pattern:**
+```
+P0: Initial scan (static — 2 tasks)
+    → Discovers: research threads to investigate
+
+P1: Deep dive (container: true)
+    → After initial scan, expand per thread:
+      - "Investigate logging architecture"
+      - "Map auth middleware chain"
+      - "Compare caching strategies"
+    → Each thread independently explorable
+
+P2: Synthesize (static — 2 tasks)
+P3: Document (static — 2 tasks)
+```
+
+**Benefits:**
+- Research coverage tracked per thread
+- Natural parallelism for independent threads
+- Output doc can reference which threads were explored
+
+**Trade-off:** Research is intentionally exploratory. Too much structure might constrain discovery. Could make the container phase optional — only expand if the agent identifies discrete threads.
+
+### Task Mode — poor candidate
+
+Already lightweight (6 tasks). The whole point is "small change, minimal ceremony." Dynamic expansion would fight the mode's purpose.
+
+### Freeform — not a candidate
+
+Intentionally unstructured. No phases at all.
+
+## Implementation Path
+
+### Step 1: Make `TaskCreate` gate phase-driven
+
+Change the `TaskCreate` restriction from "mode === verify" to "current phase has `container: true`". This is likely in the mode-gate hook or task rules documentation.
+
+**Files to check:**
+- `src/commands/hook.ts` — mode-gate hook logic
+- Template task_rules section — documentation that agents read
+- Any PreToolUse hook that blocks `TaskCreate`
+
+### Step 2: Update templates that want dynamic creation
+
+Add `container: true` to the relevant phase and write the expand instruction. No TypeScript changes needed — this is purely template content.
+
+### Step 3: Wire stop conditions
+
+Modes using dynamic tasks should add `tasks_complete` to their `stop_conditions` in `modes.yaml` so the stop hook enforces completion.
+
+### Incremental rollout
+
+1. **First:** Just lift the verify-only restriction (step 1). No template changes yet.
+2. **Then:** Update debug template to use container phase for hypotheses — simplest template to modify, low risk.
+3. **Then:** Planning P2 — higher impact but needs the coherence-pass design decision.
+4. **Last:** Research — only if the pattern proves valuable in debug/planning.
+
+## Open Questions
+
+- **Planning coherence:** If behaviors are written as separate tasks, how do you ensure cross-behavior consistency? Options: shared context doc, coherence review pass, or keep single-agent-writes-all but track per-behavior review tasks instead.
+- **Task naming convention:** Verify uses `VP{N}: {title}`. Should other modes follow a similar pattern? (`H{N}:` for hypotheses, `B{N}:` for behaviors, `R{N}:` for research threads?)
+- **Container phase nesting:** Can a mode have multiple container phases? (e.g., planning could have container phases in both P2 and P3 for per-behavior writing AND per-behavior review)
+
+## Next Steps
+
+- Create GitHub issue for this feature
+- Start with step 1 (lift verify-only gate) as a small task
+- Design debug template update as proof of concept
diff --git a/src/commands/can-exit.test.ts b/src/commands/can-exit.test.ts
@@ -20,13 +20,15 @@ async function captureCanExit(args: string[]): Promise<string> {
   const { canExit } = await import('./can-exit.js')
   let captured = ''
   const origLog = console.log
+  const origExitCode = process.exitCode
   console.log = (...logArgs: unknown[]) => {
     captured += logArgs.map(String).join(' ')
   }
   try {
     await canExit(args)
   } finally {
     console.log = origLog
+    process.exitCode = origExitCode
   }
   return captured
 }
@@ -38,13 +40,12 @@ describe('canExit', () => {
 
   beforeEach(() => {
     tmpDir = makeTmpDir()
-    mkdirSync(join(tmpDir, '.claude', 'sessions'), { recursive: true })
-    mkdirSync(join(tmpDir, '.claude', 'workflows'), { recursive: true })
+    mkdirSync(join(tmpDir, '.kata', 'sessions'), { recursive: true })
     // Write baseline kata.yaml so loadKataConfig() finds it (no longer reads wm.yaml/modes.yaml)
     // Include implementation + freeform modes with the stop_conditions used by test scenarios.
     // Individual tests that need specific review config overwrite this file before calling canExit.
     writeFileSync(
-      join(tmpDir, '.claude', 'workflows', 'kata.yaml'),
+      join(tmpDir, '.kata', 'kata.yaml'),
       [
         'spec_path: planning/specs',
         'research_path: planning/research',
@@ -74,12 +75,12 @@ describe('canExit', () => {
     } else {
       delete process.env.CLAUDE_SESSION_ID
     }
-    process.exitCode = undefined
+    process.exitCode = 0
   })
 
   function createSessionState(state: Record<string, unknown>): void {
     const sessionId = process.env.CLAUDE_SESSION_ID!
-    const sessionDir = join(tmpDir, '.claude', 'sessions', sessionId)
+    const sessionDir = join(tmpDir, '.kata', 'sessions', sessionId)
     mkdirSync(sessionDir, { recursive: true })
     writeFileSync(
       join(sessionDir, 'state.json'),
@@ -124,7 +125,7 @@ describe('canExit', () => {
     // Regression: "on base branch / no diff" used to short-circuit ALL checks including
     // tasks_complete, allowing exit at session start before any work was done.
     writeFileSync(
-      join(tmpDir, '.claude', 'workflows', 'kata.yaml'),
+      join(tmpDir, '.kata', 'kata.yaml'),
       jsYaml.dump({
         modes: {
           research: { template: 'research.md', stop_conditions: ['tasks_complete', 'committed'] },
@@ -157,7 +158,7 @@ describe('canExit', () => {
 
   it('checkTestsPass: blocks when no phase evidence files exist', async () => {
     writeFileSync(
-      join(tmpDir, '.claude', 'workflows', 'kata.yaml'),
+      join(tmpDir, '.kata', 'kata.yaml'),
       jsYaml.dump({
         modes: {
           implementation: { template: 'implementation.md', stop_conditions: ['tasks_complete', 'committed', 'pushed', 'tests_pass', 'feature_tests_added'] },
@@ -180,7 +181,7 @@ describe('canExit', () => {
 
   it('checkTestsPass: passes when phase evidence file exists with overallPassed true', async () => {
     writeFileSync(
-      join(tmpDir, '.claude', 'workflows', 'kata.yaml'),
+      join(tmpDir, '.kata', 'kata.yaml'),
       jsYaml.dump({
         modes: {
           implementation: { template: 'implementation.md', stop_conditions: ['tasks_complete', 'committed', 'pushed', 'tests_pass', 'feature_tests_added'] },
@@ -194,7 +195,7 @@ describe('canExit', () => {
       issueNumber: 333,
     })
 
-    const evidenceDir = join(tmpDir, '.claude', 'verification-evidence')
+    const evidenceDir = join(tmpDir, '.kata', 'verification-evidence')
     mkdirSync(evidenceDir, { recursive: true })
     writeFileSync(
       join(evidenceDir, 'phase-p1-333.json'),
@@ -215,7 +216,7 @@ describe('canExit', () => {
 
   it('checkTestsPass: blocks when phase evidence overallPassed is false', async () => {
     writeFileSync(
-      join(tmpDir, '.claude', 'workflows', 'kata.yaml'),
+      join(tmpDir, '.kata', 'kata.yaml'),
       jsYaml.dump({
         modes: {
           implementation: { template: 'implementation.md', stop_conditions: ['tasks_complete', 'committed', 'pushed', 'tests_pass', 'feature_tests_added'] },
@@ -229,7 +230,7 @@ describe('canExit', () => {
       issueNumber: 222,
     })
 
-    const evidenceDir = join(tmpDir, '.claude', 'verification-evidence')
+    const evidenceDir = join(tmpDir, '.kata', 'verification-evidence')
     mkdirSync(evidenceDir, { recursive: true })
     writeFileSync(
       join(evidenceDir, 'phase-p1-222.json'),