diff --git a/.claude/settings.json b/.claude/settings.json index 297f516..287c0bb 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -36,28 +36,18 @@ "hooks": [ { "type": "command", - "command": "\"/home/ubuntu/.local/bin/kata\" hook mode-gate", - "timeout": 10 - } - ] - }, - { - "matcher": "TaskUpdate", - "hooks": [ - { - "type": "command", - "command": "\"/home/ubuntu/.local/bin/kata\" hook task-deps", - "timeout": 10 + "command": "\"/home/ubuntu/.local/bin/kata\" hook pre-tool-use", + "timeout": 30 } ] - }, + } + ], + "PostToolUse": [ { - "matcher": "TaskUpdate", "hooks": [ { "type": "command", - "command": "\"/home/ubuntu/.local/bin/kata\" hook task-evidence", - "timeout": 10 + "command": "\"/home/ubuntu/.local/bin/kata\" hook post-tool-use" } ] } diff --git a/.gitignore b/.gitignore index cd57479..9a4bf41 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,6 @@ eval-projects/ .geminiignore .kata/sessions/ .kata/batteries-backup/ +.kata/verification-evidence/ +.claude/sessions/ +eval-transcripts/ diff --git a/batteries/templates/stop-hook-test.md b/batteries/templates/stop-hook-test.md index 84d02d4..a3976bc 100644 --- a/batteries/templates/stop-hook-test.md +++ b/batteries/templates/stop-hook-test.md @@ -8,6 +8,7 @@ workflow_prefix: "SH" phases: - id: p0 name: Write + stage: work task_config: title: "P0: Write a trivial file" labels: [phase, phase-0] @@ -36,6 +37,7 @@ phases: - id: p1 name: Commit + stage: work task_config: title: "P1: Commit the file" labels: [phase, phase-1] @@ -59,6 +61,7 @@ phases: - id: p2 name: Push + stage: work task_config: title: "P2: Push to remote" labels: [phase, phase-2] @@ -80,6 +83,7 @@ phases: - id: p3 name: Cleanup + stage: close task_config: title: "P3: Revert and clean up" labels: [phase, phase-3] diff --git a/planning/research/2026-03-30-dynamic-task-creation-generalization.md b/planning/research/2026-03-30-dynamic-task-creation-generalization.md new file mode 100644 index 0000000..b3f132e --- /dev/null +++ b/planning/research/2026-03-30-dynamic-task-creation-generalization.md @@ -0,0 +1,189 @@ +--- +date: 2026-03-30 +topic: Generalizing dynamic task creation beyond verify mode +status: complete +--- + +# Research: Dynamic Task Creation Generalization + +## Questions Explored +- How does verify mode's dynamic task creation work? +- Which other modes would benefit from the same pattern? +- What changes are needed to generalize it? + +## Current State + +### Task creation mechanisms today + +| Mode | Task Creation | Method | +|------|--------------|--------| +| planning | 16 static tasks | Template phases → `buildPhaseTasks()` at enter time | +| implementation | Static + spec-driven | Template + `buildSpecTasks()` with subphase patterns at enter time | +| task | 6 static tasks | Template phases → `buildPhaseTasks()` at enter time | +| verify | Static + **dynamic** | Template + `TaskCreate` at runtime in container phase | +| research | **None** | Has phases/steps but no tasks created | +| debug | **None** | Has phases/steps but no tasks created | +| freeform | **None** | No phases at all | + +### Verify mode's pattern (the one that works) + +Verify mode uses a discover-then-expand pattern: + +``` +P0: Setup (static tasks) + → Discovers VP steps from spec, plan file, or git diff + +P1: Execute (container: true) + → expand-vp-steps calls TaskCreate per discovered VP step + → Each VP step becomes a trackable, completable task + +P2+: Operate on those dynamic tasks (fix loop, evidence) +``` + +Key design elements: +- P1 is marked `container: true` in the template YAML +- The template instruction explicitly tells the agent to call `TaskCreate` +- A special exception overrides the "no TaskCreate" rule for verify mode only +- Tasks are created ALL at once before execution begins +- Each task is independently trackable (pass/fail per VP step) + +### The current gate + +Verify has a hardcoded exception: + +> "Verify mode is the **only mode** that uses `TaskCreate`. This overrides the standard `task_rules`..." + +This is the only thing preventing other modes from using the same pattern. + +## Key Finding: The Pattern is Template-Driven, Not Mode-Driven + +Verify's dynamic task creation isn't special infrastructure — it's just a template instruction that says "call `TaskCreate` here." The `container: true` phase marker already exists in the schema. The only blocker is the policy gate that restricts `TaskCreate` to verify mode. + +**Proposed change:** Make the `TaskCreate` exception phase-driven rather than mode-driven. Any phase with `container: true` allows `TaskCreate` within that phase. + +## Candidate Modes for Dynamic Task Creation + +### Planning Mode — strongest candidate + +**Current problem:** P2 (Spec Writing) has 3 static tasks regardless of feature complexity. A simple config change and a complex multi-service feature get the same task structure. + +**Dynamic pattern:** +``` +P0: Research (static — 2 tasks) +P1: Interview (static — 5 tasks) + → Discovers: behaviors, integration points, test scenarios + +P2: Spec Writing (container: true) + → After P1 requirements approval, expand per behavior: + - "Write B1: auth flow" + - "Write B2: token refresh" + - "Write B3: session management" + → Each behavior section independently trackable + +P3: Review Gate (static — 3 tasks) +P4: Finalize (static — 3 tasks) +``` + +**Benefits:** +- Progress tracking per behavior (not just "spec writing in progress") +- Natural parallelism — behaviors can be written by parallel agents +- Review can reference specific behavior tasks +- Scales with feature complexity (2 behaviors = 2 tasks, 10 = 10) + +**Trade-off:** Currently a single agent writes the whole spec in one shot, which preserves cross-behavior coherence. Per-behavior tasks would need a "coherence pass" afterward, or a shared context doc that each behavior writer reads. + +### Debug Mode — strong candidate + +**Current problem:** No tasks at all. Progress is invisible. + +**Dynamic pattern:** +``` +P0: Reproduce (static — 2 tasks) + → Discovers: symptoms, affected code paths + +P1: Hypotheses (container: true) + → After reproduction, expand per hypothesis: + - "H1: Race condition in session cleanup" + - "H2: Stale cache after config reload" + - "H3: Off-by-one in pagination" + → Each hypothesis independently testable/dismissable + +P2: Fix (static — depends on which hypothesis confirmed) +P3: Verify fix (static — 2 tasks) +``` + +**Benefits:** +- Hypotheses are tracked (tested/confirmed/dismissed) +- Stop conditions can check "at least one hypothesis confirmed" +- Natural debugging workflow — you don't know the hypotheses upfront + +### Research Mode — moderate candidate + +**Dynamic pattern:** +``` +P0: Initial scan (static — 2 tasks) + → Discovers: research threads to investigate + +P1: Deep dive (container: true) + → After initial scan, expand per thread: + - "Investigate logging architecture" + - "Map auth middleware chain" + - "Compare caching strategies" + → Each thread independently explorable + +P2: Synthesize (static — 2 tasks) +P3: Document (static — 2 tasks) +``` + +**Benefits:** +- Research coverage tracked per thread +- Natural parallelism for independent threads +- Output doc can reference which threads were explored + +**Trade-off:** Research is intentionally exploratory. Too much structure might constrain discovery. Could make the container phase optional — only expand if the agent identifies discrete threads. + +### Task Mode — poor candidate + +Already lightweight (6 tasks). The whole point is "small change, minimal ceremony." Dynamic expansion would fight the mode's purpose. + +### Freeform — not a candidate + +Intentionally unstructured. No phases at all. + +## Implementation Path + +### Step 1: Make `TaskCreate` gate phase-driven + +Change the `TaskCreate` restriction from "mode === verify" to "current phase has `container: true`". This is likely in the mode-gate hook or task rules documentation. + +**Files to check:** +- `src/commands/hook.ts` — mode-gate hook logic +- Template task_rules section — documentation that agents read +- Any PreToolUse hook that blocks `TaskCreate` + +### Step 2: Update templates that want dynamic creation + +Add `container: true` to the relevant phase and write the expand instruction. No TypeScript changes needed — this is purely template content. + +### Step 3: Wire stop conditions + +Modes using dynamic tasks should add `tasks_complete` to their `stop_conditions` in `modes.yaml` so the stop hook enforces completion. + +### Incremental rollout + +1. **First:** Just lift the verify-only restriction (step 1). No template changes yet. +2. **Then:** Update debug template to use container phase for hypotheses — simplest template to modify, low risk. +3. **Then:** Planning P2 — higher impact but needs the coherence-pass design decision. +4. **Last:** Research — only if the pattern proves valuable in debug/planning. + +## Open Questions + +- **Planning coherence:** If behaviors are written as separate tasks, how do you ensure cross-behavior consistency? Options: shared context doc, coherence review pass, or keep single-agent-writes-all but track per-behavior review tasks instead. +- **Task naming convention:** Verify uses `VP{N}: {title}`. Should other modes follow a similar pattern? (`H{N}:` for hypotheses, `B{N}:` for behaviors, `R{N}:` for research threads?) +- **Container phase nesting:** Can a mode have multiple container phases? (e.g., planning could have container phases in both P2 and P3 for per-behavior writing AND per-behavior review) + +## Next Steps + +- Create GitHub issue for this feature +- Start with step 1 (lift verify-only gate) as a small task +- Design debug template update as proof of concept diff --git a/src/commands/can-exit.test.ts b/src/commands/can-exit.test.ts index 690c24e..150bf2e 100644 --- a/src/commands/can-exit.test.ts +++ b/src/commands/can-exit.test.ts @@ -20,6 +20,7 @@ async function captureCanExit(args: string[]): Promise { const { canExit } = await import('./can-exit.js') let captured = '' const origLog = console.log + const origExitCode = process.exitCode console.log = (...logArgs: unknown[]) => { captured += logArgs.map(String).join(' ') } @@ -27,6 +28,7 @@ async function captureCanExit(args: string[]): Promise { await canExit(args) } finally { console.log = origLog + process.exitCode = origExitCode } return captured } @@ -38,13 +40,12 @@ describe('canExit', () => { beforeEach(() => { tmpDir = makeTmpDir() - mkdirSync(join(tmpDir, '.claude', 'sessions'), { recursive: true }) - mkdirSync(join(tmpDir, '.claude', 'workflows'), { recursive: true }) + mkdirSync(join(tmpDir, '.kata', 'sessions'), { recursive: true }) // Write baseline kata.yaml so loadKataConfig() finds it (no longer reads wm.yaml/modes.yaml) // Include implementation + freeform modes with the stop_conditions used by test scenarios. // Individual tests that need specific review config overwrite this file before calling canExit. writeFileSync( - join(tmpDir, '.claude', 'workflows', 'kata.yaml'), + join(tmpDir, '.kata', 'kata.yaml'), [ 'spec_path: planning/specs', 'research_path: planning/research', @@ -74,12 +75,12 @@ describe('canExit', () => { } else { delete process.env.CLAUDE_SESSION_ID } - process.exitCode = undefined + process.exitCode = 0 }) function createSessionState(state: Record): void { const sessionId = process.env.CLAUDE_SESSION_ID! - const sessionDir = join(tmpDir, '.claude', 'sessions', sessionId) + const sessionDir = join(tmpDir, '.kata', 'sessions', sessionId) mkdirSync(sessionDir, { recursive: true }) writeFileSync( join(sessionDir, 'state.json'), @@ -124,7 +125,7 @@ describe('canExit', () => { // Regression: "on base branch / no diff" used to short-circuit ALL checks including // tasks_complete, allowing exit at session start before any work was done. writeFileSync( - join(tmpDir, '.claude', 'workflows', 'kata.yaml'), + join(tmpDir, '.kata', 'kata.yaml'), jsYaml.dump({ modes: { research: { template: 'research.md', stop_conditions: ['tasks_complete', 'committed'] }, @@ -157,7 +158,7 @@ describe('canExit', () => { it('checkTestsPass: blocks when no phase evidence files exist', async () => { writeFileSync( - join(tmpDir, '.claude', 'workflows', 'kata.yaml'), + join(tmpDir, '.kata', 'kata.yaml'), jsYaml.dump({ modes: { implementation: { template: 'implementation.md', stop_conditions: ['tasks_complete', 'committed', 'pushed', 'tests_pass', 'feature_tests_added'] }, @@ -180,7 +181,7 @@ describe('canExit', () => { it('checkTestsPass: passes when phase evidence file exists with overallPassed true', async () => { writeFileSync( - join(tmpDir, '.claude', 'workflows', 'kata.yaml'), + join(tmpDir, '.kata', 'kata.yaml'), jsYaml.dump({ modes: { implementation: { template: 'implementation.md', stop_conditions: ['tasks_complete', 'committed', 'pushed', 'tests_pass', 'feature_tests_added'] }, @@ -194,7 +195,7 @@ describe('canExit', () => { issueNumber: 333, }) - const evidenceDir = join(tmpDir, '.claude', 'verification-evidence') + const evidenceDir = join(tmpDir, '.kata', 'verification-evidence') mkdirSync(evidenceDir, { recursive: true }) writeFileSync( join(evidenceDir, 'phase-p1-333.json'), @@ -215,7 +216,7 @@ describe('canExit', () => { it('checkTestsPass: blocks when phase evidence overallPassed is false', async () => { writeFileSync( - join(tmpDir, '.claude', 'workflows', 'kata.yaml'), + join(tmpDir, '.kata', 'kata.yaml'), jsYaml.dump({ modes: { implementation: { template: 'implementation.md', stop_conditions: ['tasks_complete', 'committed', 'pushed', 'tests_pass', 'feature_tests_added'] }, @@ -229,7 +230,7 @@ describe('canExit', () => { issueNumber: 222, }) - const evidenceDir = join(tmpDir, '.claude', 'verification-evidence') + const evidenceDir = join(tmpDir, '.kata', 'verification-evidence') mkdirSync(evidenceDir, { recursive: true }) writeFileSync( join(evidenceDir, 'phase-p1-222.json'), diff --git a/src/commands/can-exit.ts b/src/commands/can-exit.ts index 871b7b5..6adbdfb 100644 --- a/src/commands/can-exit.ts +++ b/src/commands/can-exit.ts @@ -2,7 +2,7 @@ import { execSync } from 'node:child_process' import { existsSync, readdirSync, readFileSync } from 'node:fs' import { join } from 'node:path' -import { getCurrentSessionId, findProjectDir, getStateFilePath, getVerificationDir } from '../session/lookup.js' +import { getCurrentSessionId, findProjectDir, getStateFilePath, getVerificationDir, getSessionsDir } from '../session/lookup.js' import { readState } from '../state/reader.js' import { type StopGuidance, @@ -19,6 +19,7 @@ import { } from './enter/task-factory.js' import { loadKataConfig } from '../config/kata-config.js' import { findSpecFile, validateSpec } from './validate-spec.js' +import { readEditsSet, parseGitStatusPaths } from '../tracking/edits-log.js' /** * Parse command line arguments for can-exit command @@ -43,28 +44,53 @@ function parseArgs(args: string[]): { /** * Check git conditions (committed, pushed) based on which checks are active */ -function checkGlobalConditions(checks: Set): { passed: boolean; reasons: string[] } { +function checkGlobalConditions(checks: Set, sessionDir?: string): { passed: boolean; reasons: string[]; advisories: string[] } { const reasons: string[] = [] + const advisories: string[] = [] try { if (checks.has('committed')) { + // Strip trailing newlines only — `.trim()` would eat the leading space + // of the first line's porcelain status (e.g. " M README.md"), corrupting + // parseGitStatusPaths which expects status at positions 0-1 and path at position 3+. const gitStatus = execSync('git status --porcelain 2>/dev/null || true', { encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe'], - }).trim() + }).replace(/\n+$/, '') if (gitStatus) { + const sessionEdits = sessionDir ? readEditsSet(sessionDir) : null + const outOfScopeFiles: string[] = [] + const changedFiles = gitStatus.split('\n').filter((line) => { if (line.startsWith('??')) return false + const paths = parseGitStatusPaths(line) + const file = paths[0] // primary path // Exclude kata session logs — the stop hook writes these on every invocation, // creating a recursive loop if we count them as uncommitted changes - const file = line.slice(3) if (file.startsWith('.kata/sessions/')) return false + + if (sessionEdits) { + // Session-scoped: only count files this session touched + if (sessionEdits.has(file)) return true + // Track out-of-scope files for advisory + outOfScopeFiles.push(file) + return false + } + // No session tracking (no edits.jsonl) — fall back to global behavior return true }) + if (changedFiles.length > 0) { reasons.push('Uncommitted changes in tracked files') } + + // Advisory for out-of-scope dirty files + if (outOfScopeFiles.length > 0) { + const shown = outOfScopeFiles.slice(0, 5) + const suffix = outOfScopeFiles.length > 5 ? `, ... and ${outOfScopeFiles.length - 5} more` : '' + advisories.push(`Note: ${outOfScopeFiles.length} file(s) outside this session's scope have uncommitted changes: ${shown.join(', ')}${suffix}`) + } } } @@ -85,6 +111,7 @@ function checkGlobalConditions(checks: Set): { passed: boolean; reasons: return { passed: reasons.length === 0, reasons, + advisories, } } @@ -178,7 +205,7 @@ function checkTestsPass(issueNumber: number, nonCodePaths: string[]): { passed: * Check that at least one new test function was added in this session vs diff_base. * Reads project.diff_base and project.test_file_pattern from wm.yaml. */ -function checkFeatureTestsAdded(): { passed: boolean; newTestCount?: number } { +function checkFeatureTestsAdded(sessionDir?: string): { passed: boolean; newTestCount?: number } { try { const cfg = loadKataConfig() const diffBase = cfg.project?.diff_base ?? 'origin/main' @@ -194,13 +221,27 @@ function checkFeatureTestsAdded(): { passed: boolean; newTestCount?: number } { .split('\n') .filter((f) => f && patterns.some((ext) => f.endsWith(ext))) - if (changedFiles.length === 0) { + // Filter to session-owned files if tracking is available. + // If filtering produces an empty set (tracking may not cover the full session), + // fall back to the unfiltered list — better to over-check than miss real tests. + let filteredFiles = changedFiles + if (sessionDir) { + const sessionEdits = readEditsSet(sessionDir) + if (sessionEdits.size > 0) { + const scoped = changedFiles.filter(f => sessionEdits.has(f)) + if (scoped.length > 0) { + filteredFiles = scoped + } + } + } + + if (filteredFiles.length === 0) { return { passed: false, newTestCount: 0 } } // Count new test function declarations added const diffOutput = execSync( - `git diff "${diffBase}" -- ${changedFiles.map((f) => `"${f}"`).join(' ')} 2>/dev/null || true`, + `git diff "${diffBase}" -- ${filteredFiles.map((f) => `"${f}"`).join(' ')} 2>/dev/null || true`, { encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe'] }, ) @@ -324,14 +365,25 @@ function validateCanExit( ): { canExit: boolean reasons: string[] + advisories: string[] hasOpenTasks: boolean usingTasks: boolean } { const reasons: string[] = [] + let allAdvisories: string[] = [] + + const sessionDir = (() => { + try { + const projectDir = findProjectDir() + return join(getSessionsDir(projectDir), sessionId) + } catch { + return undefined + } + })() // No stop conditions = can always exit if (stopConditions.length === 0) { - return { canExit: true, reasons: [], hasOpenTasks: false, usingTasks: false } + return { canExit: true, reasons: [], advisories: [], hasOpenTasks: false, usingTasks: false } } // Build effective checks set (filter stage-scoped conditions whose stage isn't complete) @@ -399,7 +451,7 @@ function validateCanExit( // ── feature_tests_added ── if (checks.has('feature_tests_added')) { - const featureTestsCheck = checkFeatureTestsAdded() + const featureTestsCheck = checkFeatureTestsAdded(sessionDir) if (!featureTestsCheck.passed) { reasons.push( 'At least one new test function required (it/test/describe). See: arXiv 2402.13521', @@ -426,8 +478,9 @@ function validateCanExit( // ── committed + pushed (check after task/verification checks) ── if (reasons.length === 0) { if (checks.has('committed') || checks.has('pushed')) { - const globalCheck = checkGlobalConditions(checks) + const globalCheck = checkGlobalConditions(checks, sessionDir) reasons.push(...globalCheck.reasons) + allAdvisories = globalCheck.advisories } } } @@ -435,6 +488,7 @@ function validateCanExit( return { canExit: reasons.length === 0, reasons, + advisories: allAdvisories, hasOpenTasks, usingTasks, } @@ -548,6 +602,7 @@ export async function canExit(args: string[]): Promise { const { canExit: canExitNow, reasons, + advisories, hasOpenTasks, usingTasks, } = validateCanExit(workflowId, sessionId, stopConditions, issueNumber, phasesByStage, deliverablePath) @@ -569,6 +624,7 @@ export async function canExit(args: string[]): Promise { { canExit: canExitNow, reasons, + advisories, guidance, workflowId, sessionType, @@ -601,6 +657,10 @@ export async function canExit(args: string[]): Promise { ) } } + for (const advisory of advisories) { + // biome-ignore lint/suspicious/noConsole: intentional CLI output + console.log(` ℹ️ ${advisory}`) + } } // Exit code 0 if can exit, 1 if not diff --git a/src/commands/enter.test.ts b/src/commands/enter.test.ts index 05b9f31..d1802b6 100644 --- a/src/commands/enter.test.ts +++ b/src/commands/enter.test.ts @@ -1,5 +1,8 @@ -import { describe, it, expect, beforeEach, afterEach } from 'bun:test' -import { mkdirSync, rmSync, writeFileSync } from 'node:fs' +import { describe, it, expect, beforeEach, afterEach, afterAll } from 'bun:test' + +afterAll(() => { process.exitCode = 0 }) +import { mkdirSync, rmSync, writeFileSync, readFileSync, existsSync } from 'node:fs' +import { execSync } from 'node:child_process' import { join } from 'node:path' import * as os from 'node:os' @@ -15,13 +18,15 @@ function makeTmpDir(): string { /** * Helper: capture console.log output from enter(), also suppressing stderr */ -async function captureEnter(args: string[]): Promise<{ stdout: string; stderr: string }> { +async function captureEnter(args: string[]): Promise<{ stdout: string; stderr: string; exitCode: number | undefined }> { const { enter } = await import('./enter.js') let stdout = '' let stderr = '' const origLog = console.log const origError = console.error const origStderrWrite = process.stderr.write + const origExitCode = process.exitCode + process.exitCode = 0 console.log = (...logArgs: unknown[]) => { stdout += logArgs.map(String).join(' ') } @@ -39,7 +44,9 @@ async function captureEnter(args: string[]): Promise<{ stdout: string; stderr: s console.error = origError process.stderr.write = origStderrWrite } - return { stdout, stderr } + const exitCode = process.exitCode + process.exitCode = 0 + return { stdout, stderr, exitCode } } describe('enter', () => { @@ -49,13 +56,16 @@ describe('enter', () => { beforeEach(() => { tmpDir = makeTmpDir() - mkdirSync(join(tmpDir, '.claude', 'sessions'), { recursive: true }) - mkdirSync(join(tmpDir, '.claude', 'workflows'), { recursive: true }) + mkdirSync(join(tmpDir, '.kata', 'sessions'), { recursive: true }) // Write kata.yaml so loadKataConfig() finds it (no longer reads wm.yaml/modes.yaml) // Include modes needed by tests (freeform, research, flow-deprecated) writeFileSync( - join(tmpDir, '.claude', 'workflows', 'kata.yaml'), + join(tmpDir, '.kata', 'kata.yaml'), [ + 'project:', + ' build_command: "echo build"', + ' test_command: "echo test"', + ' typecheck_command: "echo typecheck"', 'spec_path: planning/specs', 'research_path: planning/research', 'modes:', @@ -69,6 +79,9 @@ describe('enter', () => { ' implementation:', ' template: implementation.md', ' stop_conditions: [tasks_complete, committed]', + ' task:', + ' template: task.md', + ' stop_conditions: [tasks_complete, committed]', ' flow:', ' deprecated: true', ' redirect_to: freeform', @@ -91,12 +104,12 @@ describe('enter', () => { } else { delete process.env.CLAUDE_SESSION_ID } - process.exitCode = undefined + process.exitCode = 0 }) it('prints usage when no mode is provided', async () => { - const { stderr } = await captureEnter([]) - expect(process.exitCode).toBe(1) + const { stderr, exitCode } = await captureEnter([]) + expect(exitCode).toBe(1) expect(stderr).toContain('Usage:') }) @@ -142,25 +155,25 @@ describe('enter', () => { }) it('rejects unknown mode', async () => { - const { stderr } = await captureEnter([ + const { stderr, exitCode } = await captureEnter([ 'totally-nonexistent-mode', '--skip-cleanup', `--session=${process.env.CLAUDE_SESSION_ID}`, ]) - expect(process.exitCode).toBe(1) + expect(exitCode).toBe(1) expect(stderr).toContain('Unknown mode') }) it('rejects deprecated mode', async () => { // 'flow' is deprecated with redirect_to: freeform - const { stderr } = await captureEnter([ + const { stderr, exitCode } = await captureEnter([ 'flow', '--skip-cleanup', `--session=${process.env.CLAUDE_SESSION_ID}`, ]) - expect(process.exitCode).toBe(1) + expect(exitCode).toBe(1) expect(stderr).toContain('deprecated') }) @@ -195,10 +208,12 @@ name: "Custom Template" phases: - id: p0 name: "Step 1" + stage: setup task_config: title: "Do step 1" - id: p1 name: "Step 2" + stage: work task_config: title: "Do step 2" --- @@ -217,13 +232,13 @@ Instructions here. const result = JSON.parse(stdout) as { success: boolean - customTemplate: string + template: string phases: string[] dryRun: boolean } expect(result.success).toBe(true) - expect(result.customTemplate).toBe(templatePath) + expect(result.template).toBe(templatePath) expect(result.phases).toEqual(['p0', 'p1']) expect(result.dryRun).toBe(true) }) @@ -231,7 +246,7 @@ Instructions here. it('spec_path from kata.yaml is respected', async () => { // Write kata.yaml with custom spec_path, including the freeform mode needed by the test writeFileSync( - join(tmpDir, '.claude', 'workflows', 'kata.yaml'), + join(tmpDir, '.kata', 'kata.yaml'), [ 'spec_path: custom/specs', 'research_path: planning/research', @@ -320,4 +335,44 @@ Instructions here. // The dry-run stderr preview includes native task subjects with skill invocations expect(stderr).toContain('kata-setup') }) + + // Regression for porcelain-leading-space bug: a worktree-only modification + // emits " M path" (leading space = empty index status). captureBaseline used + // to `.trim()` the full status, eating that leading space and causing + // parseGitStatusPaths to return "ath" instead of "path". + it('baseline.json records correct path for worktree-only modifications', async () => { + // Build a real git repo inside tmpDir so kata enter's captureBaseline + // sees genuine porcelain output (with a leading-space status line). + // Note: captureBaseline runs execSync without an explicit cwd, so we + // chdir into tmpDir for the duration of this test. + const exec = (cmd: string) => execSync(cmd, { cwd: tmpDir, stdio: 'pipe', encoding: 'utf-8' }) + exec('git init -q') + exec('git config user.email test@test') + exec('git config user.name test') + writeFileSync(join(tmpDir, 'README.md'), 'original\n') + exec('git add README.md') + exec('git -c commit.gpgsign=false commit -q -m init') + // Worktree-only modification — emits " M README.md" in porcelain. + writeFileSync(join(tmpDir, 'README.md'), 'modified\n') + + const sessionId = process.env.CLAUDE_SESSION_ID! + const origCwd = process.cwd() + process.chdir(tmpDir) + try { + await captureEnter([ + 'task', + '--skip-cleanup', + `--session=${sessionId}`, + ]) + } finally { + process.chdir(origCwd) + } + + const baselinePath = join(tmpDir, '.kata', 'sessions', sessionId, 'baseline.json') + expect(existsSync(baselinePath)).toBe(true) + const baseline = JSON.parse(readFileSync(baselinePath, 'utf-8')) as { files: string[] } + // The key assertion: path is "README.md", NOT "EADME.md". + expect(baseline.files).toContain('README.md') + expect(baseline.files).not.toContain('EADME.md') + }) }) diff --git a/src/commands/enter.ts b/src/commands/enter.ts index 4b1ffe2..3e0df4e 100644 --- a/src/commands/enter.ts +++ b/src/commands/enter.ts @@ -1,4 +1,5 @@ // kata enter - Enter a mode +import { execSync } from 'node:child_process' import { existsSync, mkdirSync, readFileSync } from 'node:fs' import { resolve, dirname, join } from 'node:path' import jsYaml from 'js-yaml' @@ -7,6 +8,7 @@ import { getStateFilePath, findProjectDir, getPackageRoot, + getSessionsDir, } from '../session/lookup.js' import { readState, stateExists } from '../state/reader.js' import { writeState } from '../state/writer.js' @@ -123,6 +125,32 @@ import { } from './enter/task-factory.js' import { parseArgs, createDefaultState } from './enter/cli.js' import { createFdNotesFile, createDoctrineNotesFile } from './enter/notes.js' +import { writeBaseline, parseGitStatusPaths } from '../tracking/edits-log.js' + +/** + * Capture baseline snapshot — record pre-existing dirty files so stop conditions + * can distinguish between files that were dirty before the session started and + * files the session actually modified. + */ +function captureBaseline(sessionId: string): void { + try { + const sessionDir = join(getSessionsDir(findProjectDir()), sessionId) + // Strip trailing newlines only — `.trim()` would eat the leading space + // of the first line's porcelain status (e.g. " M README.md"), corrupting + // path parsing which expects status at positions 0-1 and path at position 3+. + const status = execSync('git status --porcelain 2>/dev/null || true', { + encoding: 'utf-8', + stdio: ['pipe', 'pipe', 'pipe'], + }).replace(/\n+$/, '') + const baselineFiles = status + .split('\n') + .filter(l => l && !l.startsWith('??')) + .flatMap(parseGitStatusPaths) + writeBaseline(sessionDir, baselineFiles) + } catch { + // Baseline failure must not block mode entry + } +} /** * Enter with a custom template (one-off session) @@ -243,6 +271,8 @@ async function enterWithCustomTemplate( if (!parsed.dryRun) { await writeState(stateFile, finalState) + captureBaseline(finalState.sessionId!) + // Create fd-notes.md for feature-documentation mode (interview context persistence) if (modeName === 'feature-documentation' || templatePath.includes('feature-documentation')) { const featureDocPath = (finalState as Record).featureDocPath as @@ -676,6 +706,8 @@ export async function enter(args: string[]): Promise { // Skip state write in dry-run mode if (!parsed.dryRun) { await writeState(stateFile, finalState) + + captureBaseline(finalState.sessionId!) } // Determine action taken (native tasks always recreate, so always 'started') diff --git a/src/commands/hook.test.ts b/src/commands/hook.test.ts index c52ccd3..fc01bf1 100644 --- a/src/commands/hook.test.ts +++ b/src/commands/hook.test.ts @@ -1,4 +1,6 @@ -import { describe, it, expect, beforeEach, afterEach } from 'bun:test' +import { describe, it, expect, beforeEach, afterEach, afterAll } from 'bun:test' + +afterAll(() => { process.exitCode = 0 }) import { existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from 'node:fs' import { join } from 'node:path' import * as os from 'node:os' @@ -100,13 +102,14 @@ describe('hook dispatch', () => { } else { delete process.env.CLAUDE_PROJECT_DIR } - process.exitCode = undefined + process.exitCode = 0 }) it('unknown hook name sets exit code 1', async () => { const { hook } = await import('./hook.js') const stderr = await captureStderr(() => hook(['nonexistent-hook'])) expect(process.exitCode).toBe(1) + process.exitCode = 0 expect(stderr).toContain('Unknown hook') }) @@ -114,6 +117,7 @@ describe('hook dispatch', () => { const { hook } = await import('./hook.js') const stderr = await captureStderr(() => hook([])) expect(process.exitCode).toBe(1) + process.exitCode = 0 expect(stderr).toContain('Usage: kata hook ') }) }) diff --git a/src/commands/hook.ts b/src/commands/hook.ts index 37f067f..61767f9 100644 --- a/src/commands/hook.ts +++ b/src/commands/hook.ts @@ -2,7 +2,7 @@ // Core of hooks-as-commands architecture: each hook event has a handler function // that reads stdin JSON, performs the check, and outputs Claude Code hook JSON. import { execSync } from 'node:child_process' -import { appendFileSync, existsSync, mkdirSync, readdirSync, readFileSync } from 'node:fs' +import { appendFileSync, existsSync, mkdirSync, readdirSync, readFileSync, unlinkSync, writeFileSync } from 'node:fs' import { homedir } from 'node:os' import { join } from 'node:path' import { getStateFilePath, findProjectDir, getSessionsDir, resolveTemplatePath } from '../session/lookup.js' @@ -13,6 +13,7 @@ import { isNativeTasksEnabled } from '../utils/tasks-check.js' import { resolvePlaceholders, type PlaceholderContext } from './enter/placeholder.js' import { parseTemplateYaml } from './enter/template.js' import type { Gate } from '../validation/schemas.js' +import { toGitRelative, appendEdit, parseGitStatusPaths, readEditsSet } from '../tracking/edits-log.js' /** * Claude Code hook output format @@ -405,11 +406,13 @@ export async function handleTaskEvidence(_input: Record): Promi } catch { // No .claude/ found — fall back to hook runner's cwd } + // Strip trailing newlines only — consistent with other porcelain call sites + // so that the leading space of " M path" status lines is preserved. const gitStatus = execSync('git status --porcelain 2>/dev/null || true', { encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe'], ...(cwd ? { cwd } : {}), - }).trim() + }).replace(/\n+$/, '') if (gitStatus) { // There are uncommitted changes — remind agent to commit before marking done @@ -828,6 +831,34 @@ export async function handlePreToolUse(input: Record): Promise< } } + // Bash pre-snapshot: capture git status before suspicious commands + if (toolName === 'Bash' && sessionId) { + const command = (toolInput.command as string) ?? '' + // Safe-list checked first — skip snapshot entirely + const safeList = /^(git\s|bun\s+test|ls\b|cat\b|echo\b[^>]*$|cd\b|pwd\b|which\b|head\b|tail\b|wc\b|diff\b|grep\b|find\b)/ + if (!safeList.test(command)) { + // Suspicious regex checked second + const suspicious = /sed\s.*-i|>\s|>>\s|\btee\b|\bcp\b|\bmv\b|\brm\b|\bchmod\b|\bchown\b|\bpatch\b|\bcurl\b.*-o/ + if (suspicious.test(command)) { + try { + const projectDir = findProjectDir() + const sessionDir = join(getSessionsDir(projectDir), sessionId) + // Strip trailing newlines only — `.trim()` would eat the leading space + // of the first porcelain line, corrupting diff parsing in PostToolUse. + const snapshot = execSync('git status --porcelain 2>/dev/null || true', { + encoding: 'utf-8', + stdio: ['pipe', 'pipe', 'pipe'], + cwd: projectDir, + }).replace(/\n+$/, '') + mkdirSync(sessionDir, { recursive: true }) + writeFileSync(join(sessionDir, 'bash-pre-snapshot.txt'), snapshot) + } catch { + // Pre-snapshot failure must not block tool execution + } + } + } + } + // 3. TaskUpdate(status: "completed") — run deps, gates, evidence in sequence if (toolName === 'TaskUpdate') { const taskId = (toolInput.taskId as string) ?? '' @@ -907,20 +938,32 @@ export async function handlePreToolUse(input: Record): Promise< // 3c. Check git evidence (advisory warning, always allow) let additionalContext = '' try { - let cwd: string | undefined + let projectDir: string | undefined try { - cwd = findProjectDir() + projectDir = findProjectDir() } catch { // No .kata/ found } + // Strip trailing newlines only — `.trim()` would eat the leading space + // of the first porcelain line (e.g. " M file.ts"), corrupting parseGitStatusPaths. const gitStatus = execSync('git status --porcelain 2>/dev/null || true', { encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe'], - ...(cwd ? { cwd } : {}), - }).trim() + ...(projectDir ? { cwd: projectDir } : {}), + }).replace(/\n+$/, '') if (gitStatus) { - const changedFiles = gitStatus.split('\n').filter((l) => !l.startsWith('??')) + const evidenceSessionDir = sessionId ? join(getSessionsDir(projectDir ?? process.cwd()), sessionId) : undefined + const sessionEdits = evidenceSessionDir ? readEditsSet(evidenceSessionDir) : null + + const changedFiles = gitStatus.split('\n').filter((l) => { + if (l.startsWith('??')) return false + if (sessionEdits) { + const paths = parseGitStatusPaths(l) + return paths.some(p => sessionEdits.has(p)) + } + return true + }) if (changedFiles.length > 0) { additionalContext = `⚠️ You have ${changedFiles.length} uncommitted change(s). ` + @@ -953,12 +996,73 @@ export async function handlePreToolUse(input: Record): Promise< }) } +// ── Handler: post-tool-use ── +// Tracks files modified by Edit, Write, NotebookEdit, and Bash tools +export async function handlePostToolUse(input: Record): Promise { + const sessionId = input.session_id as string | undefined + if (!sessionId) return + + try { + const projectDir = findProjectDir() + const sessionDir = join(getSessionsDir(projectDir), sessionId) + + // Guard: only track if session exists + if (!existsSync(join(sessionDir, 'state.json'))) return + + const toolName = (input.tool_name as string) ?? '' + const toolInput = (input.tool_input as Record) ?? {} + + if (toolName === 'Edit' || toolName === 'Write' || toolName === 'NotebookEdit') { + const filePath = toolInput.file_path as string | undefined + if (filePath) { + const gitRelative = toGitRelative(filePath) + appendEdit(sessionDir, { file: gitRelative, tool: toolName, ts: new Date().toISOString() }) + } + } else if (toolName === 'Bash') { + // Compare post-execution git status against pre-snapshot + const snapshotPath = join(sessionDir, 'bash-pre-snapshot.txt') + if (existsSync(snapshotPath)) { + try { + // Strip trailing newlines only — `.trim()` would eat the leading space + // of the first porcelain line, corrupting parseGitStatusPaths. + const preSnapshot = readFileSync(snapshotPath, 'utf-8').replace(/\n+$/, '') + const postSnapshot = execSync('git status --porcelain 2>/dev/null || true', { + encoding: 'utf-8', + stdio: ['pipe', 'pipe', 'pipe'], + cwd: projectDir, + }).replace(/\n+$/, '') + + // Find new dirty files + const preFiles = new Set(preSnapshot.split('\n').filter(Boolean).flatMap(parseGitStatusPaths)) + const postLines = postSnapshot.split('\n').filter(Boolean) + for (const line of postLines) { + const paths = parseGitStatusPaths(line) + for (const p of paths) { + if (!preFiles.has(p)) { + appendEdit(sessionDir, { file: p, tool: 'Bash', ts: new Date().toISOString() }) + } + } + } + + // Clean up snapshot file + try { unlinkSync(snapshotPath) } catch { /* ignore */ } + } catch { + // Diff failure — silently ignore + } + } + } + } catch { + // PostToolUse must never fail — silent no-op + } +} + // ── Hook name -> handler map ── const hookHandlers: Record) => Promise> = { 'session-start': handleSessionStart, 'user-prompt': handleUserPrompt, 'pre-tool-use': handlePreToolUse, 'stop-conditions': handleStopConditions, + 'post-tool-use': handlePostToolUse, // Backwards-compat aliases for transition period 'mode-gate': handlePreToolUse, 'task-deps': handlePreToolUse, diff --git a/src/commands/setup.ts b/src/commands/setup.ts index a9327b6..2029c41 100644 --- a/src/commands/setup.ts +++ b/src/commands/setup.ts @@ -149,6 +149,17 @@ export function buildHookEntries(wmBin: string): Record { ], }, ], + // PostToolUse: track file mutations for session-scoped stop conditions + PostToolUse: [ + { + hooks: [ + { + type: 'command', + command: `${bin} hook post-tool-use`, + }, + ], + }, + ], } return hooks @@ -203,7 +214,7 @@ export function mergeHooksIntoSettings( // Tolerates both bare `kata hook …` and quoted `"/path/kata" hook …` forms while // avoiding false positives from unrelated tools like lefthook or husky. const wmHookPattern = - /\bhook (session-start|user-prompt|stop-conditions|mode-gate|task-deps|task-evidence|pre-tool-use)\b/ + /\bhook (session-start|user-prompt|stop-conditions|mode-gate|task-deps|task-evidence|pre-tool-use|post-tool-use)\b/ const nonWmEntries = existing.filter((entry) => { return !entry.hooks?.some( (h) => typeof h.command === 'string' && wmHookPattern.test(h.command), diff --git a/src/commands/suggest.test.ts b/src/commands/suggest.test.ts index 668094f..aec94b1 100644 --- a/src/commands/suggest.test.ts +++ b/src/commands/suggest.test.ts @@ -36,12 +36,11 @@ describe('suggest', () => { beforeEach(() => { tmpDir = makeTmpDir() - mkdirSync(join(tmpDir, '.claude', 'sessions'), { recursive: true }) - mkdirSync(join(tmpDir, '.claude', 'workflows'), { recursive: true }) + mkdirSync(join(tmpDir, '.kata', 'sessions'), { recursive: true }) // Write kata.yaml so loadKataConfig() finds it (no longer reads wm.yaml/modes.yaml) // Include modes with intent_keywords so mode detection tests work writeFileSync( - join(tmpDir, '.claude', 'workflows', 'kata.yaml'), + join(tmpDir, '.kata', 'kata.yaml'), [ 'spec_path: planning/specs', 'research_path: planning/research', @@ -123,7 +122,7 @@ describe('suggest', () => { it('uses research_path from KataConfig for search commands', async () => { // Write custom kata.yaml with custom research_path - const kataYamlPath = join(tmpDir, '.claude', 'workflows', 'kata.yaml') + const kataYamlPath = join(tmpDir, '.kata', 'kata.yaml') writeFileSync(kataYamlPath, 'research_path: custom/research\nspec_path: custom/specs\n') const output = await captureSuggest(['find', 'research', 'about', 'api']) diff --git a/src/commands/teardown.test.ts b/src/commands/teardown.test.ts index d922721..e1058b8 100644 --- a/src/commands/teardown.test.ts +++ b/src/commands/teardown.test.ts @@ -47,18 +47,18 @@ describe('teardown', () => { } else { delete process.env.CLAUDE_PROJECT_DIR } - process.exitCode = undefined + process.exitCode = 0 }) /** * Create a fully configured kata project at tmpDir */ function createWmProject(): void { - mkdirSync(join(tmpDir, '.claude', 'sessions', 'some-session'), { recursive: true }) - mkdirSync(join(tmpDir, '.claude', 'workflows'), { recursive: true }) + mkdirSync(join(tmpDir, '.kata', 'sessions', 'some-session'), { recursive: true }) + mkdirSync(join(tmpDir, '.claude'), { recursive: true }) // Write kata.yaml (teardown deletes kata.yaml, not wm.yaml) - writeFileSync(join(tmpDir, '.claude', 'workflows', 'kata.yaml'), 'spec_path: planning/specs\n') + writeFileSync(join(tmpDir, '.kata', 'kata.yaml'), 'spec_path: planning/specs\n') // Write settings.json with kata hooks and a non-kata hook writeFileSync( @@ -158,7 +158,7 @@ describe('teardown', () => { it('deletes kata.yaml', async () => { createWmProject() - const kataYamlPath = join(tmpDir, '.claude', 'workflows', 'kata.yaml') + const kataYamlPath = join(tmpDir, '.kata', 'kata.yaml') expect(existsSync(kataYamlPath)).toBe(true) await captureTeardown(['--yes'], tmpDir) @@ -168,7 +168,7 @@ describe('teardown', () => { it('preserves sessions/', async () => { createWmProject() - const sessionsDir = join(tmpDir, '.claude', 'sessions') + const sessionsDir = join(tmpDir, '.kata', 'sessions') expect(existsSync(sessionsDir)).toBe(true) await captureTeardown(['--yes'], tmpDir) @@ -199,7 +199,7 @@ describe('teardown', () => { it('dry-run shows planned actions without making changes', async () => { createWmProject() - const kataYamlPath = join(tmpDir, '.claude', 'workflows', 'kata.yaml') + const kataYamlPath = join(tmpDir, '.kata', 'kata.yaml') const output = await captureTeardown(['--yes', '--dry-run'], tmpDir) expect(output).toContain('[DRY RUN]') @@ -215,9 +215,10 @@ describe('teardown', () => { const output = await captureTeardown([], tmpDir) expect(output).toContain('--yes to confirm') expect(process.exitCode).toBe(1) + process.exitCode = 0 // Files should still exist - const kataYamlPath = join(tmpDir, '.claude', 'workflows', 'kata.yaml') + const kataYamlPath = join(tmpDir, '.kata', 'kata.yaml') expect(existsSync(kataYamlPath)).toBe(true) }) }) diff --git a/src/session/lookup.test.ts b/src/session/lookup.test.ts index 2fc20eb..ac1ffa8 100644 --- a/src/session/lookup.test.ts +++ b/src/session/lookup.test.ts @@ -30,18 +30,18 @@ describe('resolveTemplatePath', () => { it('resolves project-level template first', () => { const tmpDir = makeTmpDir('proj-tmpl') tmpDirs.push(tmpDir) - mkdirSync(join(tmpDir, '.claude', 'workflows', 'templates'), { recursive: true }) - writeFileSync(join(tmpDir, '.claude', 'workflows', 'templates', 'task.md'), '# project task') + mkdirSync(join(tmpDir, '.kata', 'templates'), { recursive: true }) + writeFileSync(join(tmpDir, '.kata', 'templates', 'task.md'), '# project task') process.env.CLAUDE_PROJECT_DIR = tmpDir const result = resolveTemplatePath('task.md') - expect(result).toBe(join(tmpDir, '.claude', 'workflows', 'templates', 'task.md')) + expect(result).toBe(join(tmpDir, '.kata', 'templates', 'task.md')) }) it('falls back to package batteries template', () => { const tmpDir = makeTmpDir('pkg-fallback') tmpDirs.push(tmpDir) - mkdirSync(join(tmpDir, '.claude', 'sessions'), { recursive: true }) + mkdirSync(join(tmpDir, '.kata'), { recursive: true }) process.env.CLAUDE_PROJECT_DIR = tmpDir // task.md exists in batteries/templates/ (package level) @@ -52,7 +52,7 @@ describe('resolveTemplatePath', () => { it('throws when template not found at any tier', () => { const tmpDir = makeTmpDir('not-found') tmpDirs.push(tmpDir) - mkdirSync(join(tmpDir, '.claude', 'sessions'), { recursive: true }) + mkdirSync(join(tmpDir, '.kata'), { recursive: true }) process.env.CLAUDE_PROJECT_DIR = tmpDir expect(() => resolveTemplatePath('does-not-exist.md')).toThrow('Template not found') @@ -86,7 +86,7 @@ describe('resolveSpecTemplatePath', () => { it('resolves project-level spec template first', () => { const tmpDir = makeTmpDir('proj-spec') tmpDirs.push(tmpDir) - mkdirSync(join(tmpDir, '.claude', 'sessions'), { recursive: true }) + mkdirSync(join(tmpDir, '.kata'), { recursive: true }) mkdirSync(join(tmpDir, 'planning', 'spec-templates'), { recursive: true }) writeFileSync(join(tmpDir, 'planning', 'spec-templates', 'feature.md'), '# project feature') process.env.CLAUDE_PROJECT_DIR = tmpDir @@ -95,21 +95,20 @@ describe('resolveSpecTemplatePath', () => { expect(result).toBe(join(tmpDir, 'planning', 'spec-templates', 'feature.md')) }) - it('falls back to package batteries spec template', () => { + it('throws when spec template not found in project (no batteries fallback)', () => { const tmpDir = makeTmpDir('pkg-spec') tmpDirs.push(tmpDir) - mkdirSync(join(tmpDir, '.claude', 'sessions'), { recursive: true }) + mkdirSync(join(tmpDir, '.kata'), { recursive: true }) process.env.CLAUDE_PROJECT_DIR = tmpDir - // feature.md exists in batteries/spec-templates/ - const result = resolveSpecTemplatePath('feature.md') - expect(result).toMatch(/batteries\/spec-templates\/feature\.md$/) + // resolveSpecTemplatePath only checks project planning/spec-templates/ — no batteries fallback + expect(() => resolveSpecTemplatePath('feature.md')).toThrow('Spec template not found') }) it('throws when spec template not found at any tier', () => { const tmpDir = makeTmpDir('spec-not-found') tmpDirs.push(tmpDir) - mkdirSync(join(tmpDir, '.claude', 'sessions'), { recursive: true }) + mkdirSync(join(tmpDir, '.kata'), { recursive: true }) process.env.CLAUDE_PROJECT_DIR = tmpDir expect(() => resolveSpecTemplatePath('nonexistent.md')).toThrow('Spec template not found') @@ -213,7 +212,7 @@ describe('getStateFilePath — layout-shift resilience', () => { beforeEach(() => { tmpDir = makeTmpDir('state-path') - mkdirSync(join(tmpDir, '.claude', 'sessions'), { recursive: true }) + mkdirSync(join(tmpDir, '.kata', 'sessions'), { recursive: true }) process.env.CLAUDE_PROJECT_DIR = tmpDir }) @@ -226,18 +225,11 @@ describe('getStateFilePath — layout-shift resilience', () => { } }) - it('returns .claude/ path when state.json exists only there despite .kata/ existing', async () => { + it('returns .kata/ path for session state', async () => { const sessionId = '12345678-1234-4234-8234-123456789abc' - mkdirSync(join(tmpDir, '.claude', 'sessions', sessionId), { recursive: true }) - writeFileSync( - join(tmpDir, '.claude', 'sessions', sessionId, 'state.json'), - JSON.stringify({ updatedAt: new Date().toISOString() }), - ) - // .kata/ exists but has no sessions - mkdirSync(join(tmpDir, '.kata', 'sessions'), { recursive: true }) const result = await getStateFilePath(sessionId) - expect(result).toBe(join(tmpDir, '.claude', 'sessions', sessionId, 'state.json')) + expect(result).toBe(join(tmpDir, '.kata', 'sessions', sessionId, 'state.json')) }) }) diff --git a/src/testing/integration.test.ts b/src/testing/integration.test.ts index 48b36e7..9f5c5c8 100644 --- a/src/testing/integration.test.ts +++ b/src/testing/integration.test.ts @@ -94,7 +94,7 @@ describe('integration: full hook dispatch simulation', () => { } else { delete process.env.CLAUDE_SESSION_ID } - process.exitCode = undefined + process.exitCode = 0 }) it('session-start -> user-prompt -> stop-conditions lifecycle', async () => { diff --git a/src/tracking/edits-log.test.ts b/src/tracking/edits-log.test.ts new file mode 100644 index 0000000..261b432 --- /dev/null +++ b/src/tracking/edits-log.test.ts @@ -0,0 +1,142 @@ +import { describe, it, expect, beforeEach, afterEach } from 'bun:test' +import { mkdirSync, rmSync, readFileSync, writeFileSync, existsSync, chmodSync } from 'node:fs' +import { join } from 'node:path' +import * as os from 'node:os' + +import { + appendEdit, + readEditsSet, + writeBaseline, + readBaseline, + parseGitStatusPaths, + toGitRelative, +} from './edits-log.js' + +let tmpDir: string + +beforeEach(() => { + tmpDir = join(os.tmpdir(), `edits-log-test-${Date.now()}-${Math.random().toString(36).slice(2)}`) + mkdirSync(tmpDir, { recursive: true }) +}) + +afterEach(() => { + rmSync(tmpDir, { recursive: true, force: true }) +}) + +describe('parseGitStatusPaths', () => { + it('parses modified file', () => { + expect(parseGitStatusPaths('M foo.ts')).toEqual(['foo.ts']) + }) + + it('parses added file', () => { + expect(parseGitStatusPaths('A bar.ts')).toEqual(['bar.ts']) + }) + + it('skips untracked files', () => { + expect(parseGitStatusPaths('?? untracked.ts')).toEqual([]) + }) + + it('parses rename producing both paths', () => { + expect(parseGitStatusPaths('R old.ts -> new.ts')).toEqual(['old.ts', 'new.ts']) + }) + + // Regression: worktree-only modifications emit " M path" (leading space = empty index status). + // Callers that stripped the git output with .trim() used to corrupt the first character + // of the first dirty file. parseGitStatusPaths itself handles the line correctly; + // this test guards the callers' expected input shape. + it('parses worktree-only modification (leading space)', () => { + expect(parseGitStatusPaths(' M README.md')).toEqual(['README.md']) + }) + + it('parses worktree-only deletion (leading space)', () => { + expect(parseGitStatusPaths(' D gone.ts')).toEqual(['gone.ts']) + }) +}) + +describe('appendEdit + readEditsSet', () => { + it('appends one edit and reads it back', () => { + const ts = new Date().toISOString() + appendEdit(tmpDir, { file: 'src/index.ts', tool: 'Edit', ts }) + const result = readEditsSet(tmpDir) + expect(result.has('src/index.ts')).toBe(true) + expect(result.size).toBe(1) + }) + + it('deduplicates the same file appended twice', () => { + const ts = new Date().toISOString() + appendEdit(tmpDir, { file: 'src/index.ts', tool: 'Edit', ts }) + appendEdit(tmpDir, { file: 'src/index.ts', tool: 'Write', ts }) + const result = readEditsSet(tmpDir) + expect(result.has('src/index.ts')).toBe(true) + expect(result.size).toBe(1) + }) + + it('returns empty Set for non-existent dir', () => { + const result = readEditsSet(join(tmpDir, 'nonexistent')) + expect(result.size).toBe(0) + }) + + it('persists multiple rapid sequential appends', () => { + const ts = new Date().toISOString() + for (let i = 0; i < 5; i++) { + appendEdit(tmpDir, { file: `file-${i}.ts`, tool: 'Edit', ts }) + } + const result = readEditsSet(tmpDir) + expect(result.size).toBe(5) + for (let i = 0; i < 5; i++) { + expect(result.has(`file-${i}.ts`)).toBe(true) + } + }) +}) + +describe('readEditsSet corrupt line resilience', () => { + it('skips corrupt lines and returns valid entries', () => { + const editsPath = join(tmpDir, 'edits.jsonl') + const lines = [ + JSON.stringify({ file: 'a.ts', tool: 'Edit', ts: '2026-01-01T00:00:00Z' }), + 'this is not valid json {{{', + JSON.stringify({ file: 'b.ts', tool: 'Write', ts: '2026-01-01T00:00:01Z' }), + ] + writeFileSync(editsPath, lines.join('\n') + '\n') + const result = readEditsSet(tmpDir) + expect(result.size).toBe(2) + expect(result.has('a.ts')).toBe(true) + expect(result.has('b.ts')).toBe(true) + }) +}) + +describe('writeBaseline + readBaseline', () => { + it('writes and reads back baseline files as Set', () => { + const files = ['src/a.ts', 'src/b.ts', 'src/c.ts'] + writeBaseline(tmpDir, files) + const result = readBaseline(tmpDir) + expect(result.size).toBe(3) + for (const f of files) { + expect(result.has(f)).toBe(true) + } + }) + + it('returns empty Set for non-existent dir', () => { + const result = readBaseline(join(tmpDir, 'nonexistent')) + expect(result.size).toBe(0) + }) +}) + +describe('appendEdit silent failure', () => { + it('does not throw when writing to an invalid path', () => { + // /dev/null/impossible is not a valid directory + expect(() => { + appendEdit('/dev/null/impossible/path', { file: 'x.ts', tool: 'Edit', ts: new Date().toISOString() }) + }).not.toThrow() + }) +}) + +describe('toGitRelative', () => { + it('converts absolute path under git root to relative', () => { + // Use the actual project root for this test + const projectRoot = '/data/projects/kata-wm' + const abs = join(projectRoot, 'src', 'tracking', 'edits-log.ts') + const rel = toGitRelative(abs) + expect(rel).toBe('src/tracking/edits-log.ts') + }) +}) diff --git a/src/tracking/edits-log.ts b/src/tracking/edits-log.ts new file mode 100644 index 0000000..e042984 --- /dev/null +++ b/src/tracking/edits-log.ts @@ -0,0 +1,98 @@ +import { appendFileSync, existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs' +import { execSync } from 'node:child_process' +import { join, relative, resolve } from 'node:path' + +let cachedGitRoot: string | undefined + +/** + * Normalize an absolute path to git-root-relative. + * Caches the git root to avoid repeated shell-outs. + */ +export function toGitRelative(absolutePath: string): string { + if (!cachedGitRoot) { + cachedGitRoot = execSync('git rev-parse --show-toplevel', { encoding: 'utf-8' }).trim() + } + return relative(cachedGitRoot, resolve(absolutePath)) +} + +/** + * Parse a `git status --porcelain` line and extract file path(s). + * Returns 1 path normally, or 2 paths for renames (status R, split on ` -> `). + * Skips untracked lines (??). + */ +export function parseGitStatusPaths(line: string): string[] { + const status = line.slice(0, 2) + if (status === '??') return [] + const pathPart = line.slice(3) + if (status.includes('R')) { + return pathPart.split(' -> ') + } + return [pathPart] +} + +/** + * Append a JSON line to {sessionDir}/edits.jsonl. + * Tracking failure must NEVER throw. + */ +export function appendEdit(sessionDir: string, entry: { file: string; tool: string; ts: string }): void { + try { + mkdirSync(sessionDir, { recursive: true }) + appendFileSync(join(sessionDir, 'edits.jsonl'), JSON.stringify(entry) + '\n') + } catch { + // Silently ignore — tracking failure must never throw + } +} + +/** + * Read {sessionDir}/edits.jsonl, parse each line as JSON, + * extract .file, return a Set of unique file paths. + * Handles missing file and corrupt lines gracefully. + */ +export function readEditsSet(sessionDir: string): Set { + const result = new Set() + try { + const filePath = join(sessionDir, 'edits.jsonl') + if (!existsSync(filePath)) return result + const content = readFileSync(filePath, 'utf-8') + for (const line of content.split('\n')) { + if (!line.trim()) continue + try { + const parsed = JSON.parse(line) + if (parsed.file) result.add(parsed.file) + } catch { + // Skip corrupt line + } + } + } catch { + // Return whatever we have so far + } + return result +} + +/** + * Write {sessionDir}/baseline.json as {"files": [...], "ts": "ISO"}. + */ +export function writeBaseline(sessionDir: string, files: string[]): void { + try { + mkdirSync(sessionDir, { recursive: true }) + writeFileSync(join(sessionDir, 'baseline.json'), JSON.stringify({ files, ts: new Date().toISOString() })) + } catch { + // Silently ignore + } +} + +/** + * Read {sessionDir}/baseline.json, parse JSON, return Set from the files array. + * Handles missing/corrupt file by returning empty Set. + */ +export function readBaseline(sessionDir: string): Set { + try { + const filePath = join(sessionDir, 'baseline.json') + if (!existsSync(filePath)) return new Set() + const content = readFileSync(filePath, 'utf-8') + const parsed = JSON.parse(content) + return new Set(parsed.files ?? []) + } catch { + return new Set() + } +} diff --git a/src/validation/schemas.test.ts b/src/validation/schemas.test.ts index 487bb0b..782421b 100644 --- a/src/validation/schemas.test.ts +++ b/src/validation/schemas.test.ts @@ -255,11 +255,16 @@ describe('stage field on phaseSchema', () => { expect(result.success).toBe(false) }) - it('rejects expansion on non-work phase', () => { - const result = phaseSchema.safeParse({ id: 'p0', name: 'Test', stage: 'setup', expansion: 'agent' }) + it('rejects spec expansion on non-work phase', () => { + const result = phaseSchema.safeParse({ id: 'p0', name: 'Test', stage: 'setup', expansion: 'spec', subphase_pattern: [] }) expect(result.success).toBe(false) }) + it('allows agent expansion on setup phase', () => { + const result = phaseSchema.safeParse({ id: 'p0', name: 'Test', stage: 'setup', expansion: 'agent' }) + expect(result.success).toBe(true) + }) + it('accepts expansion on work phase', () => { const result = phaseSchema.safeParse({ id: 'p1', name: 'Work', stage: 'work', expansion: 'spec', subphase_pattern: [] }) expect(result.success).toBe(true) diff --git a/src/validation/schemas.ts b/src/validation/schemas.ts index a4a2c56..990d7e5 100644 --- a/src/validation/schemas.ts +++ b/src/validation/schemas.ts @@ -127,6 +127,7 @@ export const subphasePatternSchema = z.object({ agent: agentStepConfigSchema.optional(), gate: gateSchema.optional(), hints: z.array(hintSchema).optional(), + skill: z.string().optional(), }) // ── Agent protocol schema (for expansion: 'agent' phases) ── @@ -151,8 +152,8 @@ export const phaseSchema = z.object({ steps: z.array(phaseStepSchema).optional(), // Individual trackable units within phase (e.g., interview rounds) subphase_pattern: z.array(subphasePatternSchema).optional(), // Inline array only (string references removed) }).refine( - (p) => !p.expansion || p.stage === 'work', - { message: 'expansion is only allowed on work-stage phases' } + (p) => p.expansion !== 'spec' || p.stage === 'work', + { message: 'expansion: spec is only allowed on work-stage phases' } ) /**