diff --git a/packages/core/src/evaluation/orchestrator.ts b/packages/core/src/evaluation/orchestrator.ts index cfed68de..56b6dce5 100644 --- a/packages/core/src/evaluation/orchestrator.ts +++ b/packages/core/src/evaluation/orchestrator.ts @@ -752,9 +752,8 @@ export async function runEvaluation( const hasSharedWorkspace = !!( useStaticWorkspace || - workspaceTemplate || - suiteWorkspace?.hooks || - (suiteWorkspace?.repos?.length && !isPerTestIsolation) + (!isPerTestIsolation && + (workspaceTemplate || suiteWorkspace?.hooks || suiteWorkspace?.repos?.length)) ); // Pool support is mode-based: pooled enables, temp/static disable. @@ -834,7 +833,7 @@ export async function runEvaluation( setupLog(`reusing existing static workspace: ${configuredStaticPath}`); } sharedWorkspacePath = configuredStaticPath; - } else if (usePool && suiteWorkspace?.repos) { + } else if (!isPerTestIsolation && usePool && suiteWorkspace?.repos) { const slotsNeeded = workers; setupLog(`acquiring ${slotsNeeded} workspace pool slot(s) (pool capacity: ${poolMaxSlots})`); poolManager = new WorkspacePoolManager(getWorkspacePoolRoot()); @@ -862,7 +861,7 @@ export async function runEvaluation( // Multi-slot: tests will grab slots dynamically availablePoolSlots.push(...poolSlots); } - } else if (workspaceTemplate) { + } else if (!isPerTestIsolation && workspaceTemplate) { setupLog(`creating shared workspace from template: ${workspaceTemplate}`); try { sharedWorkspacePath = await createTempWorkspace(workspaceTemplate, evalRunId, 'shared'); @@ -871,7 +870,7 @@ export async function runEvaluation( const message = error instanceof Error ? error.message : String(error); throw new Error(`Failed to create shared workspace: ${message}`); } - } else if (suiteWorkspace?.hooks || (suiteWorkspace?.repos?.length && !isPerTestIsolation)) { + } else if (!isPerTestIsolation && (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length)) { // No template but hooks or repos are configured: create empty workspace sharedWorkspacePath = getWorkspacePath(evalRunId, 'shared'); await mkdir(sharedWorkspacePath, { recursive: true }); diff --git a/packages/core/test/evaluation/orchestrator.test.ts b/packages/core/test/evaluation/orchestrator.test.ts index 7f492272..2f25f927 100644 --- a/packages/core/test/evaluation/orchestrator.test.ts +++ b/packages/core/test/evaluation/orchestrator.test.ts @@ -1754,7 +1754,7 @@ rl.on('close', () => { provider, target: baseTarget, evaluators: evaluatorRegistry, - evalRunId: 'test-run-reset-only-hook', + evalRunId: `test-run-reset-only-hook-${Date.now()}`, cleanupWorkspaces: true, }); @@ -2380,7 +2380,7 @@ describe('workspace.template .code-workspace resolution', () => { }, }; - const evalRunId = 'test-ws-resolve'; + const evalRunId = `test-ws-resolve-${Date.now()}`; const result = await runEvalCase({ evalCase, provider, @@ -2902,6 +2902,84 @@ describe('--workspace flag', () => { expect(results[0].beforeEachOutput).toBeDefined(); }); + it('creates per-test workspaces for hook-only suites when isolation is per_test', async () => { + const { mkdtemp, mkdir, writeFile, access: fsAccess } = await import('node:fs/promises'); + testDir = await mkdtemp(path.join(tmpdir(), 'agentv-per-test-hooks-')); + + const beforeAllScript = path.join(testDir, 'before-all.js'); + writeFileSync( + beforeAllScript, + `const fs = require('node:fs'); +const path = require('node:path'); +const payload = JSON.parse(fs.readFileSync(0, 'utf8')); +fs.mkdirSync(payload.workspace_path, { recursive: true }); +fs.writeFileSync(path.join(payload.workspace_path, 'hook.txt'), payload.test_id || 'unknown'); +`, + 'utf8', + ); + + const workspacesSeen: string[] = []; + + const provider: Provider = { + id: 'mock:per-test-hooks', + kind: 'mock', + targetName: 'mock', + async invoke(request: ProviderRequest): Promise { + if (!request.cwd) { + throw new Error('cwd was not provided'); + } + workspacesSeen.push(request.cwd); + writeFileSync(path.join(request.cwd, 'agent.txt'), 'answer\n'); + return { + output: [{ role: 'assistant', content: [{ type: 'text', text: 'answer' }] }], + }; + }, + }; + + const workspaceConfig = { + isolation: 'per_test' as const, + hooks: { + before_all: { + command: [process.execPath, beforeAllScript], + }, + }, + }; + + const evalCases: EvalTest[] = [ + { + ...baseTestCase, + id: 'case-a', + workspace: workspaceConfig, + }, + { + ...baseTestCase, + id: 'case-b', + workspace: workspaceConfig, + }, + ]; + + const results = await runEvaluation({ + testFilePath: 'in-memory.yaml', + repoRoot: 'in-memory', + target: baseTarget, + providerFactory: () => provider, + evaluators: evaluatorRegistry, + evalCases, + keepWorkspaces: true, + cleanupWorkspaces: false, + retainOnSuccess: 'keep', + }); + + expect(results).toHaveLength(2); + expect(workspacesSeen).toHaveLength(2); + expect(workspacesSeen[0]).not.toContain(`${path.sep}shared`); + expect(workspacesSeen[1]).not.toContain(`${path.sep}shared`); + expect(workspacesSeen[0]).not.toBe(workspacesSeen[1]); + + await fsAccess(path.join(workspacesSeen[0], 'hook.txt')); + await fsAccess(path.join(workspacesSeen[1], 'hook.txt')); + }); + it('skips template copy and repo materialization when workspace provided', async () => { const { mkdtemp } = await import('node:fs/promises'); testDir = await mkdtemp(path.join(tmpdir(), 'agentv-ws-flag-'));