diff --git a/src/lib/agent-skills.ts b/src/lib/agent-skills.ts index 8bea02239..cd81d506e 100644 --- a/src/lib/agent-skills.ts +++ b/src/lib/agent-skills.ts @@ -10,8 +10,8 @@ */ import { accessSync, constants, existsSync, mkdirSync } from "node:fs"; -import { writeFile } from "node:fs/promises"; -import { dirname, join } from "node:path"; +import { rename, rm, writeFile } from "node:fs/promises"; +import { basename, dirname, join } from "node:path"; import { captureException } from "@sentry/node-core/light"; import { SKILL_FILES } from "../generated/skill-content.js"; @@ -49,6 +49,53 @@ export function getSkillInstallPath( return join(homeDir, rootDir, "skills", "sentry-cli", "SKILL.md"); } +/** + * Atomically write `content` to `destPath`. + * + * Writes to a uniquely-named temp file in the same directory, then `rename()`s + * it over the destination. Because the temp file shares the destination's + * directory (and therefore filesystem), the rename is atomic on POSIX, so a + * concurrent reader never observes a truncated or partially-written file. + * + * This matters because AI coding agents (e.g. Claude Code, OpenCode) discover + * skills by globbing the skills tree for `SKILL.md` files and parsing their + * frontmatter. A plain in-place `writeFile` truncates the file before + * rewriting it; an agent scanning during that window reads empty frontmatter + * and silently drops the skill. On POSIX the rename eliminates that race. + * (On Windows the replace still works, but can fail with a sharing violation + * if a reader holds the destination open — a transient, best-effort outcome.) + * + * The temp file is named `....tmp` (e.g. + * `.SKILL.md.1234.ab12.tmp`) so it never matches the `SKILL.md`/`*.md` globs + * agents look for, even while it exists. + * + * Precondition: `dirname(destPath)` must already exist; callers create the + * directory before invoking this helper. On failure the temp file is removed + * (best-effort) and the original error is re-thrown for the caller to handle. + */ +async function atomicWriteFile( + destPath: string, + content: string +): Promise { + const dir = dirname(destPath); + const tempPath = join( + dir, + `.${basename(destPath)}.${process.pid}.${Math.random().toString(36).slice(2)}.tmp` + ); + try { + await writeFile(tempPath, content, "utf-8"); + await rename(tempPath, destPath); + } catch (error) { + await rm(tempPath, { force: true }).catch((cleanupError) => { + captureException(cleanupError, { + level: "debug", + tags: { "setup.step": "agent-skills-cleanup" }, + }); + }); + throw error; + } +} + /** * Write embedded skill files beneath an already-detected agent root. * @@ -69,7 +116,7 @@ async function writeSkillFiles( if (!existsSync(dir)) { mkdirSync(dir, { recursive: true, mode: 0o755 }); } - await writeFile(fullPath, content, "utf-8"); + await atomicWriteFile(fullPath, content); if (relativePath.startsWith("references/")) { referenceCount += 1; } diff --git a/test/lib/agent-skills.test.ts b/test/lib/agent-skills.test.ts index 04e636131..80f621c8b 100644 --- a/test/lib/agent-skills.test.ts +++ b/test/lib/agent-skills.test.ts @@ -5,16 +5,32 @@ * embedded skill installation across detected agent roots. */ -import { chmodSync, existsSync, mkdirSync, rmSync } from "node:fs"; -import { readFile } from "node:fs/promises"; +import { chmodSync, existsSync, mkdirSync, readdirSync, rmSync } from "node:fs"; +import { readFile, rename, rm, writeFile } from "node:fs/promises"; import { join } from "node:path"; -import { afterEach, beforeEach, describe, expect, test } from "vitest"; +import { afterEach, beforeEach, describe, expect, test, vi } from "vitest"; import { detectClaudeCode, getSkillInstallPath, installAgentSkills, } from "../../src/lib/agent-skills.js"; +// Wrap node:fs/promises so individual functions can be observed (to prove the +// atomic temp-file + rename mechanism is actually used) and selectively made to +// fail (to exercise the cleanup/rollback path). Every function delegates to the +// real implementation by default, so the rest of the suite hits the real FS. +// `mockReset: false` (vitest.config.ts) keeps these delegating implementations +// across tests; only the `*Once` overrides below are transient. +vi.mock("node:fs/promises", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + writeFile: vi.fn(actual.writeFile), + rename: vi.fn(actual.rename), + rm: vi.fn(actual.rm), + }; +}); + describe("agent-skills", () => { describe("detectClaudeCode", () => { let testDir: string; @@ -65,6 +81,9 @@ describe("agent-skills", () => { }); afterEach(() => { + // Clear recorded calls (and any leftover `*Once` overrides) between tests + // without dropping the delegating implementations set in the factory. + vi.clearAllMocks(); for (const dir of [ testDir, join(testDir, ".agents"), @@ -117,6 +136,87 @@ describe("agent-skills", () => { ).toBe(false); }); + test("publishes every file via rename from a temp path (atomic write guard)", async () => { + mkdirSync(join(testDir, ".agents"), { recursive: true }); + + const result = await installAgentSkills(testDir); + expect(result).not.toBeNull(); + + // The whole point of the change: content is staged to a hidden `.tmp` + // file and then atomically renamed into place. If this regresses to a + // direct in-place `writeFile`, both assertions below fail — which is what + // makes this a real guard rather than a tautology. + const writeTargets = vi + .mocked(writeFile) + .mock.calls.map((call) => String(call[0])); + expect(writeTargets.length).toBeGreaterThan(0); + expect(writeTargets.every((target) => target.endsWith(".tmp"))).toBe( + true + ); + + const renameDestinations = vi + .mocked(rename) + .mock.calls.map((call) => String(call[1])); + expect(renameDestinations).toContain(result!.path); + }); + + test("leaves no temp files behind on success", async () => { + mkdirSync(join(testDir, ".agents"), { recursive: true }); + + const result = await installAgentSkills(testDir); + expect(result).not.toBeNull(); + + // A leftover `.tmp` would mean the rename/cleanup regressed. + const skillDir = join(testDir, ".agents", "skills", "sentry-cli"); + const leftovers = [ + ...readdirSync(skillDir), + ...readdirSync(join(skillDir, "references")), + ].filter((name) => name.endsWith(".tmp")); + expect(leftovers).toEqual([]); + }); + + test("rolls back cleanly when rename fails (no partial dest, no temp orphan)", async () => { + mkdirSync(join(testDir, ".agents"), { recursive: true }); + + // Force the first publish to fail at the rename step. The temp file has + // already been written at this point, so this exercises the catch/cleanup + // branch in atomicWriteFile (rm of the temp) and the null-returning + // failure handler in writeSkillFiles. + vi.mocked(rename).mockRejectedValueOnce( + new Error("simulated rename failure") + ); + + const result = await installAgentSkills(testDir); + + // A non-atomic in-place writer never calls rename, so forcing rename to + // fail would have no effect and the install would still succeed — this + // assertion only holds because the rename is on the critical path. + expect(result).toBeNull(); + + const skillDir = join(testDir, ".agents", "skills", "sentry-cli"); + const leftovers = readdirSync(skillDir).filter((name) => + name.endsWith(".tmp") + ); + expect(leftovers).toEqual([]); + expect(existsSync(join(skillDir, "SKILL.md"))).toBe(false); + }); + + test("still fails to null when temp cleanup also fails after a rename error", async () => { + mkdirSync(join(testDir, ".agents"), { recursive: true }); + + // Both the rename and the subsequent best-effort `rm` of the temp file + // fail. The cleanup error must be swallowed (captured, not thrown) so the + // original rename error is what propagates — exercising the inner + // catch of atomicWriteFile. + vi.mocked(rename).mockRejectedValueOnce(new Error("rename failed")); + vi.mocked(rm).mockRejectedValueOnce(new Error("cleanup failed")); + + const result = await installAgentSkills(testDir); + + expect(result).toBeNull(); + expect(vi.mocked(rm)).toHaveBeenCalled(); + }); + test("installs to both ~/.agents/ and ~/.claude/ when both roots exist", async () => { mkdirSync(join(testDir, ".agents"), { recursive: true }); mkdirSync(join(testDir, ".claude"), { recursive: true });