diff --git a/packages/core/src/evaluation/orchestrator.ts b/packages/core/src/evaluation/orchestrator.ts index a7244ff87..667cc1bac 100644 --- a/packages/core/src/evaluation/orchestrator.ts +++ b/packages/core/src/evaluation/orchestrator.ts @@ -962,6 +962,7 @@ export async function runEvaluation( testId: '__before_all__', evalRunId, evalDir, + workspaceFileDir: suiteWorkspace?.workspaceFileDir, }; try { beforeAllOutput = await executeWorkspaceScript( @@ -988,6 +989,7 @@ export async function runEvaluation( testId: '__before_all__', evalRunId, evalDir, + workspaceFileDir: suiteWorkspace?.workspaceFileDir, }; try { const output = await executeWorkspaceScript( @@ -1408,6 +1410,7 @@ export async function runEvaluation( testId: '__after_all__', evalRunId, evalDir, + workspaceFileDir: suiteWorkspace?.workspaceFileDir, }; try { const afterAllOutput = await executeWorkspaceScript( @@ -1859,6 +1862,7 @@ export async function runEvalCase(options: RunEvalCaseOptions): Promise; - /** Directory containing the eval YAML file. Used as default cwd. */ + /** Directory containing the eval YAML file. Used as fallback cwd. */ readonly evalDir?: string; + /** Directory containing the workspace file (when workspace is a file reference). + * Takes priority over evalDir as default cwd so that file-referenced templates + * resolve relative paths from their own directory. */ + readonly workspaceFileDir?: string; } export type ScriptFailureMode = 'fatal' | 'warn'; @@ -57,7 +61,7 @@ export async function executeWorkspaceScript( }); const timeoutMs = config.timeout_ms ?? (failureMode === 'fatal' ? 60000 : 30000); - const cwd = config.cwd ?? context.evalDir; + const cwd = config.cwd ?? context.workspaceFileDir ?? context.evalDir; // Support both command (canonical) and script (deprecated alias) if (config.script !== undefined && config.command === undefined) { diff --git a/packages/core/src/evaluation/yaml-parser.ts b/packages/core/src/evaluation/yaml-parser.ts index 0a9332c3d..f98112aef 100644 --- a/packages/core/src/evaluation/yaml-parser.ts +++ b/packages/core/src/evaluation/yaml-parser.ts @@ -751,7 +751,7 @@ async function resolveWorkspaceConfig( const workspaceFileDir = path.dirname(workspaceFilePath); const resolvedWorkspace = parseWorkspaceConfig(parsed, workspaceFileDir); if (resolvedWorkspace) { - return resolvedWorkspace; + return { ...resolvedWorkspace, workspaceFileDir }; } const parsedObject = parsed as Record; @@ -882,6 +882,7 @@ function mergeWorkspaceConfigs( mode: caseLevel.mode ?? suiteLevel.mode, path: caseLevel.path ?? suiteLevel.path, docker: caseLevel.docker ?? suiteLevel.docker, + workspaceFileDir: caseLevel.workspaceFileDir ?? suiteLevel.workspaceFileDir, }; } diff --git a/packages/core/test/evaluation/workspace-config-parsing.test.ts b/packages/core/test/evaluation/workspace-config-parsing.test.ts index 5498e0077..24f4d3b54 100644 --- a/packages/core/test/evaluation/workspace-config-parsing.test.ts +++ b/packages/core/test/evaluation/workspace-config-parsing.test.ts @@ -557,6 +557,62 @@ tests: expect(cases[0].workspace?.template).toBe(path.join(wsDir, 'my-template')); // cwd resolved relative to workspace file dir expect(cases[0].workspace?.hooks?.before_all?.cwd).toBe(path.join(wsDir, 'scripts')); + // workspaceFileDir is set to the workspace file's directory + expect(cases[0].workspace?.workspaceFileDir).toBe(wsDir); + }); + + it('should set workspaceFileDir when workspace is a file reference', async () => { + const wsDir = path.join(testDir, 'wsfiledir-test'); + await mkdir(wsDir, { recursive: true }); + + const workspaceFile = path.join(wsDir, 'workspace.yaml'); + await writeFile( + workspaceFile, + ` +hooks: + before_all: + command: ["echo", "hello"] +`, + ); + + const evalFile = path.join(testDir, 'wsfiledir-eval.yaml'); + await writeFile( + evalFile, + ` +workspace: ./wsfiledir-test/workspace.yaml + +tests: + - id: wsfiledir-test-1 + input: "Do something" + criteria: "Should work" +`, + ); + + const cases = await loadTests(evalFile, testDir); + expect(cases).toHaveLength(1); + expect(cases[0].workspace?.workspaceFileDir).toBe(wsDir); + }); + + it('should not set workspaceFileDir for inline workspace config', async () => { + const evalFile = path.join(testDir, 'inline-workspace.yaml'); + await writeFile( + evalFile, + ` +workspace: + hooks: + before_all: + command: ["echo", "hello"] + +tests: + - id: inline-test-1 + input: "Do something" + criteria: "Should work" +`, + ); + + const cases = await loadTests(evalFile, testDir); + expect(cases).toHaveLength(1); + expect(cases[0].workspace?.workspaceFileDir).toBeUndefined(); }); it('should throw a clear error when workspace file is not found', async () => { diff --git a/packages/core/test/evaluation/workspace/script-executor.test.ts b/packages/core/test/evaluation/workspace/script-executor.test.ts index d9e5e0b24..e0d149c52 100644 --- a/packages/core/test/evaluation/workspace/script-executor.test.ts +++ b/packages/core/test/evaluation/workspace/script-executor.test.ts @@ -344,4 +344,69 @@ process.stdout.write(JSON.stringify(args)); await rm(explicitDir, { recursive: true, force: true }); } }); + + it('defaults cwd to workspaceFileDir over evalDir when workspace is a file reference', async () => { + const evalDir = path.join(tmpdir(), `agentv-evaldir-${randomUUID()}`); + const workspaceFileDir = path.join(tmpdir(), `agentv-wsfiledir-${randomUUID()}`); + await mkdir(evalDir, { recursive: true }); + await mkdir(workspaceFileDir, { recursive: true }); + + try { + const cwdScript = path.join(testDir, 'print-cwd3.js'); + await writeFile(cwdScript, 'process.stdout.write(process.cwd());'); + + const config: WorkspaceScriptConfig = { + command: ['node', cwdScript], + // No cwd — should default to workspaceFileDir, not evalDir + }; + + const context: ScriptExecutionContext = { + workspacePath: '/tmp/workspace', + testId: 'wsfiledir-default-test', + evalRunId: 'run-cwd-3', + evalDir, + workspaceFileDir, + }; + + const output = await executeWorkspaceScript(config, context, 'fatal'); + expect(output).toBe(workspaceFileDir); + } finally { + await rm(evalDir, { recursive: true, force: true }); + await rm(workspaceFileDir, { recursive: true, force: true }); + } + }); + + it('uses explicit cwd over workspaceFileDir', async () => { + const evalDir = path.join(tmpdir(), `agentv-evaldir-${randomUUID()}`); + const workspaceFileDir = path.join(tmpdir(), `agentv-wsfiledir-${randomUUID()}`); + const explicitDir = path.join(tmpdir(), `agentv-explicit-${randomUUID()}`); + await mkdir(evalDir, { recursive: true }); + await mkdir(workspaceFileDir, { recursive: true }); + await mkdir(explicitDir, { recursive: true }); + + try { + const cwdScript = path.join(testDir, 'print-cwd4.js'); + await writeFile(cwdScript, 'process.stdout.write(process.cwd());'); + + const config: WorkspaceScriptConfig = { + command: ['node', cwdScript], + cwd: explicitDir, // Explicit cwd should override both workspaceFileDir and evalDir + }; + + const context: ScriptExecutionContext = { + workspacePath: '/tmp/workspace', + testId: 'explicit-over-wsfiledir-test', + evalRunId: 'run-cwd-4', + evalDir, + workspaceFileDir, + }; + + const output = await executeWorkspaceScript(config, context, 'fatal'); + expect(output).toBe(explicitDir); + } finally { + await rm(evalDir, { recursive: true, force: true }); + await rm(workspaceFileDir, { recursive: true, force: true }); + await rm(explicitDir, { recursive: true, force: true }); + } + }); });