diff --git a/src/infrastructure/persistence/yaml_output_repository.ts b/src/infrastructure/persistence/yaml_output_repository.ts index f00758eb..21054685 100644 --- a/src/infrastructure/persistence/yaml_output_repository.ts +++ b/src/infrastructure/persistence/yaml_output_repository.ts @@ -71,8 +71,15 @@ export class YamlOutputRepository implements OutputRepository { const dir = this.getMethodDir(type, method); try { for await (const entry of Deno.readDir(dir)) { - if (entry.isFile && entry.name.endsWith(".yaml")) { - const path = join(dir, entry.name); + if (!entry.isFile || !entry.name.endsWith(".yaml")) continue; + const path = join(dir, entry.name); + + // Per-file try/catch closes the TOCTOU window: a concurrent + // delete can remove a non-target file between readDir and + // readTextFile. NotFound on a single file means "skip it" — + // never "abort the search and return null when the target + // exists later in the directory." + try { const content = await Deno.readTextFile(path); const data = parseYaml(content) as ModelOutputData; if (data.id === id) { @@ -82,6 +89,9 @@ export class YamlOutputRepository implements OutputRepository { } return ModelOutput.fromData(data); } + } catch (error) { + if (error instanceof Deno.errors.NotFound) continue; + throw error; } } } catch (error) { @@ -124,32 +134,44 @@ export class YamlOutputRepository implements OutputRepository { for await (const methodEntry of Deno.readDir(typeDir)) { if (!methodEntry.isDirectory) continue; const methodDir = join(typeDir, methodEntry.name); - // Iterate over output files in method directory - for await (const entry of Deno.readDir(methodDir)) { - if (!entry.isFile || !entry.name.endsWith(".yaml")) continue; - const path = join(methodDir, entry.name); - - // Per-file try/catch closes the TOCTOU window: a concurrent - // delete (e.g. GC, output cleanup) can remove the file - // between readDir and readTextFile. NotFound on a single - // file means "skip it" — never "abandon the rest of the - // current method directory." - try { - const content = await Deno.readTextFile(path); - const data = parseYaml(content) as ModelOutputData; - if (data.logFile) { - data.logFile = toAbsolutePath(this.repoDir, data.logFile); + + // Per-method-directory try/catch closes the directory-level + // TOCTOU window: a concurrent bulk delete can remove the + // method directory between readDir(typeDir) and + // readDir(methodDir). NotFound on a single method directory + // means "skip it" — never "abandon outputs already collected + // from earlier method directories of this type." + try { + // Iterate over output files in method directory + for await (const entry of Deno.readDir(methodDir)) { + if (!entry.isFile || !entry.name.endsWith(".yaml")) continue; + const path = join(methodDir, entry.name); + + // Per-file try/catch closes the TOCTOU window: a concurrent + // delete (e.g. GC, output cleanup) can remove the file + // between readDir and readTextFile. NotFound on a single + // file means "skip it" — never "abandon the rest of the + // current method directory." + try { + const content = await Deno.readTextFile(path); + const data = parseYaml(content) as ModelOutputData; + if (data.logFile) { + data.logFile = toAbsolutePath(this.repoDir, data.logFile); + } + outputs.push(ModelOutput.fromData(data)); + } catch (error) { + if (error instanceof Deno.errors.NotFound) continue; + throw error; } - outputs.push(ModelOutput.fromData(data)); - } catch (error) { - if (error instanceof Deno.errors.NotFound) continue; - throw error; } + } catch (error) { + if (error instanceof Deno.errors.NotFound) continue; + throw error; } } } catch (error) { - // Outer catch handles "type/method directory itself doesn't - // exist." Per-file NotFound is handled above. + // Outer catch handles "type directory itself doesn't exist." + // Per-method-dir and per-file NotFound are handled above. if (error instanceof Deno.errors.NotFound) { return []; } @@ -203,44 +225,56 @@ export class YamlOutputRepository implements OutputRepository { for await (const methodEntry of Deno.readDir(typeDir)) { if (!methodEntry.isDirectory) continue; const methodDir = join(typeDir, methodEntry.name); - for await (const entry of Deno.readDir(methodDir)) { - if (!entry.isFile || !entry.name.endsWith(".yaml")) continue; - const path = join(methodDir, entry.name); - // Per-file try/catch closes the TOCTOU window: a concurrent - // delete can remove the file between readDir and stat or - // between stat and readTextFile. NotFound on a single file - // means "skip it" — never "abandon the rest of the current - // method directory or model type." - try { - // Stage A: mtime pre-filter - const stat = await Deno.stat(path); - const mtimeMs = stat.mtime?.getTime(); - if (mtimeMs !== undefined && mtimeMs < cutoffMs) continue; + // Per-method-directory try/catch closes the directory-level + // TOCTOU window: a concurrent bulk delete can remove the + // method directory between readDir(typeDir) and + // readDir(methodDir). NotFound on a single method directory + // means "skip it" — never "abandon results already collected + // from earlier method directories of this type." + try { + for await (const entry of Deno.readDir(methodDir)) { + if (!entry.isFile || !entry.name.endsWith(".yaml")) continue; + const path = join(methodDir, entry.name); - // Stage B: parse and verify - const content = await Deno.readTextFile(path); - const data = parseYaml(content) as ModelOutputData; - if (data.logFile) { - data.logFile = toAbsolutePath(this.repoDir, data.logFile); + // Per-file try/catch closes the TOCTOU window: a concurrent + // delete can remove the file between readDir and stat or + // between stat and readTextFile. NotFound on a single file + // means "skip it" — never "abandon the rest of the current + // method directory or model type." + try { + // Stage A: mtime pre-filter + const stat = await Deno.stat(path); + const mtimeMs = stat.mtime?.getTime(); + if (mtimeMs !== undefined && mtimeMs < cutoffMs) continue; + + // Stage B: parse and verify + const content = await Deno.readTextFile(path); + const data = parseYaml(content) as ModelOutputData; + if (data.logFile) { + data.logFile = toAbsolutePath(this.repoDir, data.logFile); + } + const output = ModelOutput.fromData(data); + if (output.startedAt.getTime() < cutoffMs) continue; + + results.push({ + output, + type: modelType, + method: output.methodName, + }); + } catch (error) { + if (error instanceof Deno.errors.NotFound) continue; + throw error; } - const output = ModelOutput.fromData(data); - if (output.startedAt.getTime() < cutoffMs) continue; - - results.push({ - output, - type: modelType, - method: output.methodName, - }); - } catch (error) { - if (error instanceof Deno.errors.NotFound) continue; - throw error; } + } catch (error) { + if (error instanceof Deno.errors.NotFound) continue; + throw error; } } } catch (error) { - // Outer catch handles "type/method directory itself doesn't - // exist." Per-file NotFound is handled above. + // Outer catch handles "type directory itself doesn't exist." + // Per-method-dir and per-file NotFound are handled above. if (error instanceof Deno.errors.NotFound) continue; throw error; } @@ -277,28 +311,46 @@ export class YamlOutputRepository implements OutputRepository { id: ModelOutputId, ): Promise { // We need to find the file first since filename includes timestamp. - // Notify per-path inside the loop once the match is found, before - // the Deno.remove — same crash-safety as the unconditional pre-write - // notify. When no match is found, nothing is removed and no signal - // is needed. + // Notify per-path once the match is found, before the Deno.remove — + // same crash-safety as the unconditional pre-write notify. When no + // match is found, nothing is removed and no signal is needed. + // + // Find-then-act structure: the per-file try/catch wraps only the + // search (readTextFile + parseYaml + match check) so a concurrent + // delete of a non-target file doesn't abort the search before we + // reach the target. The destructive ops (notifyDirty + Deno.remove + // + cleanupEmptyParentDirs) run after the loop, still inside the + // outer try/catch — preserving the existing semantic that a + // NotFound from Deno.remove (race: target deleted concurrently) is + // absorbed and returns successfully (delete is idempotent). const dir = this.getMethodDir(type, method); try { + let matchPath: string | null = null; for await (const entry of Deno.readDir(dir)) { - if (entry.isFile && entry.name.endsWith(".yaml")) { - const path = join(dir, entry.name); + if (!entry.isFile || !entry.name.endsWith(".yaml")) continue; + const path = join(dir, entry.name); + + try { const content = await Deno.readTextFile(path); const data = parseYaml(content) as ModelOutputData; if (data.id === id) { - await this.notifyDirty(path); - await Deno.remove(path); - - // Clean up empty parent directories - const outputsDir = this.baseDir; - await cleanupEmptyParentDirs(path, outputsDir); - return; + matchPath = path; + break; } + } catch (error) { + if (error instanceof Deno.errors.NotFound) continue; + throw error; } } + + if (matchPath) { + await this.notifyDirty(matchPath); + await Deno.remove(matchPath); + + // Clean up empty parent directories + const outputsDir = this.baseDir; + await cleanupEmptyParentDirs(matchPath, outputsDir); + } } catch (error) { if (!(error instanceof Deno.errors.NotFound)) { throw error; diff --git a/src/infrastructure/persistence/yaml_output_repository_test.ts b/src/infrastructure/persistence/yaml_output_repository_test.ts index 4eb31b5d..5fd206ec 100644 --- a/src/infrastructure/persistence/yaml_output_repository_test.ts +++ b/src/infrastructure/persistence/yaml_output_repository_test.ts @@ -493,3 +493,204 @@ Deno.test( }); }, ); + +// Wraps `Deno.readDir` so that a single chosen path yields a phantom +// entry before its real entries. The phantom points to nothing on disk, +// so when production code follows up with `Deno.readTextFile` or +// `Deno.readDir` on it, the syscall throws `NotFound` naturally — +// deterministically exercising the TOCTOU window. Returns a restore +// function that must be called in `finally`. We monkey-patch the global +// because the `find*`/`delete` walkers call `Deno.readDir` directly with +// no injection point; the alternative ("remove file before call") only +// triggers the race on platforms where readDir surfaces stale entries, +// which isn't portable. +function withPhantomReadDirEntry( + targetDir: string, + phantom: Deno.DirEntry, + fn: () => Promise, +): Promise { + const original = Deno.readDir; + const wrapped = (path: string | URL): AsyncIterable => { + const realIter = original(path); + const matchPath = typeof path === "string" ? path : path.pathname; + if (matchPath === targetDir) { + return (async function* () { + yield phantom; + yield* realIter; + })(); + } + return realIter; + }; + (Deno as { readDir: typeof Deno.readDir }).readDir = wrapped; + return fn().finally(() => { + (Deno as { readDir: typeof Deno.readDir }).readDir = original; + }); +} + +Deno.test( + "findAll: method directory deleted mid-iteration is skipped, not fatal", + async () => { + await withTempDir(async (dir) => { + const repo = new YamlOutputRepository(dir); + + // Seed one real output under method "run". We then inject a + // phantom method-dir entry into readDir(typeDir); production + // code calls readDir(phantomMethodDir) and gets NotFound. + // Pre-fix: NotFound escapes the inner for-await to the outer + // catch, returns [], discarding `keep`. + const keep = await makeOutput(repo, new Date()); + + const typeDir = join( + dir, + ".swamp", + "outputs", + registeredType.normalized, + ); + const phantom: Deno.DirEntry = { + name: "phantom-method", + isFile: false, + isDirectory: true, + isSymlink: false, + }; + + const found = await withPhantomReadDirEntry( + typeDir, + phantom, + () => repo.findAll(registeredType), + ); + + assertEquals(found.length, 1); + assertEquals(found[0].id, keep.id); + }); + }, +); + +Deno.test( + "findAllGlobalSince: method directory deleted mid-iteration is skipped, not fatal", + async () => { + await withTempDir(async (dir) => { + const repo = new YamlOutputRepository(dir); + + // Same shape as above, against the per-model-type walker. Pre-fix: + // NotFound from readDir(phantomMethodDir) escapes to the per-type + // catch which `continue`s past the entire current model type, + // discarding results already pushed for it. + const keep = await makeOutput(repo, new Date()); + + const typeDir = join( + dir, + ".swamp", + "outputs", + registeredType.normalized, + ); + const phantom: Deno.DirEntry = { + name: "phantom-method", + isFile: false, + isDirectory: true, + isSymlink: false, + }; + + const cutoff = new Date(Date.now() - 60 * 60 * 1000); + const found = await withPhantomReadDirEntry( + typeDir, + phantom, + () => repo.findAllGlobalSince(cutoff), + ); + + assertEquals(found.length, 1); + assertEquals(found[0].output.id, keep.id); + }); + }, +); + +Deno.test( + "findById: non-target file deleted mid-iteration still returns target", + async () => { + await withTempDir(async (dir) => { + const repo = new YamlOutputRepository(dir); + + // Seed the target. Inject a phantom .yaml entry into the method + // dir's readDir so production code reads it before reaching the + // real target file. Pre-fix: NotFound from readTextFile on the + // phantom escapes the outer catch and returns null even though + // the target exists. + const target = await makeOutput(repo, new Date()); + + const methodDir = join( + dir, + ".swamp", + "outputs", + registeredType.normalized, + "run", + ); + const phantom: Deno.DirEntry = { + name: "phantom-non-target.yaml", + isFile: true, + isDirectory: false, + isSymlink: false, + }; + + const found = await withPhantomReadDirEntry( + methodDir, + phantom, + () => repo.findById(registeredType, "run", target.id), + ); + + assertEquals(found?.id, target.id); + }); + }, +); + +Deno.test( + "delete: non-target file deleted mid-iteration still deletes target", + async () => { + await withTempDir(async (dir) => { + // Capture markDirty so we can assert notifyDirty fires for the + // target's path. Pre-fix, a NotFound on a non-target file aborts + // the search before the match is found — notifyDirty never fires + // and the target survives. + const dirtyPaths: string[] = []; + const markDirty = (path?: string): Promise => { + if (path) dirtyPaths.push(path); + return Promise.resolve(); + }; + const repo = new YamlOutputRepository(dir, undefined, markDirty); + + const target = await makeOutput(repo, new Date()); + const targetPath = repo.getPath(registeredType, "run", target); + + // Reset dirty log: makeOutput's save called markDirty already. + dirtyPaths.length = 0; + + const methodDir = join( + dir, + ".swamp", + "outputs", + registeredType.normalized, + "run", + ); + const phantom: Deno.DirEntry = { + name: "phantom-non-target.yaml", + isFile: true, + isDirectory: false, + isSymlink: false, + }; + + await withPhantomReadDirEntry( + methodDir, + phantom, + () => repo.delete(registeredType, "run", target.id), + ); + + // Target file must be gone. + assertEquals( + await repo.findById(registeredType, "run", target.id), + null, + ); + + // notifyDirty must have fired with the target's path. + assertEquals(dirtyPaths.length, 1); + assertEquals(dirtyPaths[0], targetPath); + }); + }, +); diff --git a/src/infrastructure/persistence/yaml_workflow_run_repository.ts b/src/infrastructure/persistence/yaml_workflow_run_repository.ts index ce2f0419..d9b59c2a 100644 --- a/src/infrastructure/persistence/yaml_workflow_run_repository.ts +++ b/src/infrastructure/persistence/yaml_workflow_run_repository.ts @@ -74,6 +74,16 @@ export class YamlWorkflowRunRepository implements WorkflowRunRepository { ): Promise { const dir = this.getRunsDir(workflowId); + // No per-file try/catch around readTextFile here — unlike the + // analogous YamlOutputRepository.findById, the entry.name filter + // restricts readTextFile to the target file only (filenames are + // `workflow-run-${runId}.yaml`; runIds are 36-char UUIDs that + // never substring-match each other, and atomicWriteTextFile's + // `.{freshUuid}.tmp` files don't contain the runId). The only + // NotFound readTextFile can raise is for the target's own + // concurrent deletion, for which returning null is correct. If + // this filter is ever relaxed, add the per-file try/catch + + // continue pattern used in YamlOutputRepository.findById. try { for await (const entry of Deno.readDir(dir)) { if (