Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions packages/core/src/evaluation/orchestrator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -752,9 +752,8 @@ export async function runEvaluation(

const hasSharedWorkspace = !!(
useStaticWorkspace ||
workspaceTemplate ||
suiteWorkspace?.hooks ||
(suiteWorkspace?.repos?.length && !isPerTestIsolation)
(!isPerTestIsolation &&
(workspaceTemplate || suiteWorkspace?.hooks || suiteWorkspace?.repos?.length))
);

// Pool support is mode-based: pooled enables, temp/static disable.
Expand Down Expand Up @@ -834,7 +833,7 @@ export async function runEvaluation(
setupLog(`reusing existing static workspace: ${configuredStaticPath}`);
}
sharedWorkspacePath = configuredStaticPath;
} else if (usePool && suiteWorkspace?.repos) {
} else if (!isPerTestIsolation && usePool && suiteWorkspace?.repos) {
const slotsNeeded = workers;
setupLog(`acquiring ${slotsNeeded} workspace pool slot(s) (pool capacity: ${poolMaxSlots})`);
poolManager = new WorkspacePoolManager(getWorkspacePoolRoot());
Expand Down Expand Up @@ -862,7 +861,7 @@ export async function runEvaluation(
// Multi-slot: tests will grab slots dynamically
availablePoolSlots.push(...poolSlots);
}
} else if (workspaceTemplate) {
} else if (!isPerTestIsolation && workspaceTemplate) {
setupLog(`creating shared workspace from template: ${workspaceTemplate}`);
try {
sharedWorkspacePath = await createTempWorkspace(workspaceTemplate, evalRunId, 'shared');
Expand All @@ -871,7 +870,7 @@ export async function runEvaluation(
const message = error instanceof Error ? error.message : String(error);
throw new Error(`Failed to create shared workspace: ${message}`);
}
} else if (suiteWorkspace?.hooks || (suiteWorkspace?.repos?.length && !isPerTestIsolation)) {
} else if (!isPerTestIsolation && (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length)) {
// No template but hooks or repos are configured: create empty workspace
sharedWorkspacePath = getWorkspacePath(evalRunId, 'shared');
await mkdir(sharedWorkspacePath, { recursive: true });
Expand Down
82 changes: 80 additions & 2 deletions packages/core/test/evaluation/orchestrator.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1754,7 +1754,7 @@ rl.on('close', () => {
provider,
target: baseTarget,
evaluators: evaluatorRegistry,
evalRunId: 'test-run-reset-only-hook',
evalRunId: `test-run-reset-only-hook-${Date.now()}`,
cleanupWorkspaces: true,
});

Expand Down Expand Up @@ -2380,7 +2380,7 @@ describe('workspace.template .code-workspace resolution', () => {
},
};

const evalRunId = 'test-ws-resolve';
const evalRunId = `test-ws-resolve-${Date.now()}`;
const result = await runEvalCase({
evalCase,
provider,
Expand Down Expand Up @@ -2902,6 +2902,84 @@ describe('--workspace flag', () => {
expect(results[0].beforeEachOutput).toBeDefined();
});

it('creates per-test workspaces for hook-only suites when isolation is per_test', async () => {
const { mkdtemp, mkdir, writeFile, access: fsAccess } = await import('node:fs/promises');
testDir = await mkdtemp(path.join(tmpdir(), 'agentv-per-test-hooks-'));

const beforeAllScript = path.join(testDir, 'before-all.js');
writeFileSync(
beforeAllScript,
`const fs = require('node:fs');
const path = require('node:path');
const payload = JSON.parse(fs.readFileSync(0, 'utf8'));
fs.mkdirSync(payload.workspace_path, { recursive: true });
fs.writeFileSync(path.join(payload.workspace_path, 'hook.txt'), payload.test_id || 'unknown');
`,
'utf8',
);

const workspacesSeen: string[] = [];

const provider: Provider = {
id: 'mock:per-test-hooks',
kind: 'mock',
targetName: 'mock',
async invoke(request: ProviderRequest): Promise<ProviderResponse> {
if (!request.cwd) {
throw new Error('cwd was not provided');
}
workspacesSeen.push(request.cwd);
writeFileSync(path.join(request.cwd, 'agent.txt'), 'answer\n');
return {
output: [{ role: 'assistant', content: [{ type: 'text', text: 'answer' }] }],
};
},
};

const workspaceConfig = {
isolation: 'per_test' as const,
hooks: {
before_all: {
command: [process.execPath, beforeAllScript],
},
},
};

const evalCases: EvalTest[] = [
{
...baseTestCase,
id: 'case-a',
workspace: workspaceConfig,
},
{
...baseTestCase,
id: 'case-b',
workspace: workspaceConfig,
},
];

const results = await runEvaluation({
testFilePath: 'in-memory.yaml',
repoRoot: 'in-memory',
target: baseTarget,
providerFactory: () => provider,
evaluators: evaluatorRegistry,
evalCases,
keepWorkspaces: true,
cleanupWorkspaces: false,
retainOnSuccess: 'keep',
});

expect(results).toHaveLength(2);
expect(workspacesSeen).toHaveLength(2);
expect(workspacesSeen[0]).not.toContain(`${path.sep}shared`);
expect(workspacesSeen[1]).not.toContain(`${path.sep}shared`);
expect(workspacesSeen[0]).not.toBe(workspacesSeen[1]);

await fsAccess(path.join(workspacesSeen[0], 'hook.txt'));
await fsAccess(path.join(workspacesSeen[1], 'hook.txt'));
});

it('skips template copy and repo materialization when workspace provided', async () => {
const { mkdtemp } = await import('node:fs/promises');
testDir = await mkdtemp(path.join(tmpdir(), 'agentv-ws-flag-'));
Expand Down
Loading