diff --git a/apps/cli/src/commands/validate/validate-files.ts b/apps/cli/src/commands/validate/validate-files.ts index b00c27b47..f3840c76b 100644 --- a/apps/cli/src/commands/validate/validate-files.ts +++ b/apps/cli/src/commands/validate/validate-files.ts @@ -9,6 +9,7 @@ import { validateEvalFile, validateFileReferences, validateTargetsFile, + validateWorkspacePaths, } from '@agentv/core/evaluation/validation'; import fg from 'fast-glob'; @@ -47,14 +48,18 @@ async function validateSingleFile(filePath: string): Promise { if (fileType === 'eval') { result = await validateEvalFile(absolutePath); - // Also validate file references for eval files + // Also validate file references and workspace paths for eval files if (result.valid || result.errors.filter((e) => e.severity === 'error').length === 0) { - const fileRefErrors = await validateFileReferences(absolutePath); - if (fileRefErrors.length > 0) { + const [fileRefErrors, workspaceErrors] = await Promise.all([ + validateFileReferences(absolutePath), + validateWorkspacePaths(absolutePath), + ]); + const extraErrors = [...fileRefErrors, ...workspaceErrors]; + if (extraErrors.length > 0) { result = { ...result, - errors: [...result.errors, ...fileRefErrors], - valid: result.valid && fileRefErrors.filter((e) => e.severity === 'error').length === 0, + errors: [...result.errors, ...extraErrors], + valid: result.valid && extraErrors.filter((e) => e.severity === 'error').length === 0, }; } } diff --git a/packages/core/src/evaluation/validation/index.ts b/packages/core/src/evaluation/validation/index.ts index 9347974f3..82286775b 100644 --- a/packages/core/src/evaluation/validation/index.ts +++ b/packages/core/src/evaluation/validation/index.ts @@ -7,6 +7,7 @@ export { validateEvalFile } from './eval-validator.js'; export { validateTargetsFile } from './targets-validator.js'; export { validateConfigFile } from './config-validator.js'; export { validateFileReferences } from './file-reference-validator.js'; +export { validateWorkspacePaths } from './workspace-path-validator.js'; export type { FileType, ValidationSeverity, diff --git a/packages/core/src/evaluation/validation/workspace-path-validator.ts b/packages/core/src/evaluation/validation/workspace-path-validator.ts new file mode 100644 index 000000000..a58795682 --- /dev/null +++ b/packages/core/src/evaluation/validation/workspace-path-validator.ts @@ -0,0 +1,166 @@ +import { access, readFile } from 'node:fs/promises'; +import path from 'node:path'; +import { parse } from 'yaml'; + +import type { ValidationError } from './types.js'; + +type JsonValue = string | number | boolean | null | JsonObject | JsonArray; +type JsonObject = { readonly [key: string]: JsonValue }; +type JsonArray = readonly JsonValue[]; + +function isObject(value: unknown): value is JsonObject { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +/** + * Validate that workspace template and hook script paths in eval files exist. + * + * Catches two classes of errors that surface as `setup_error` at runtime but are + * detectable statically: + * + * 1. `workspace.template` — the template path must exist. + * 2. `workspace.hooks.*.command` — script arguments that look like relative file + * paths (start with `./`/`../` or carry a script extension) must resolve to + * existing files using the same cwd precedence the runtime uses: + * `hook.cwd ?? workspaceFileDir ?? evalDir` + * + * When `workspace` is a string path to an external file, that file is also read + * and its template/hook paths are checked relative to the workspace file's directory. + * (The workspace file's existence itself is already checked by eval-validator.) + */ +export async function validateWorkspacePaths( + evalFilePath: string, +): Promise { + const errors: ValidationError[] = []; + const absolutePath = path.resolve(evalFilePath); + const evalDir = path.dirname(absolutePath); + + let parsed: unknown; + try { + const content = await readFile(absolutePath, 'utf8'); + parsed = parse(content); + } catch { + // Parse errors are already caught by eval-validator + return errors; + } + + if (!isObject(parsed)) return errors; + + const workspaceRaw = parsed.workspace; + if (workspaceRaw === undefined || workspaceRaw === null) return errors; + + if (typeof workspaceRaw === 'string') { + // External workspace file — existence is already checked by eval-validator. + // Read and check template/hook paths inside the external file. + const workspaceFilePath = path.resolve(evalDir, workspaceRaw); + try { + const wsContent = await readFile(workspaceFilePath, 'utf8'); + const wsParsed = parse(wsContent); + if (isObject(wsParsed)) { + const wsDir = path.dirname(workspaceFilePath); + await validateWorkspaceObject(wsParsed, wsDir, absolutePath, 'workspace', errors); + } + } catch { + // File missing or invalid YAML — eval-validator already reports this + } + } else if (isObject(workspaceRaw)) { + await validateWorkspaceObject(workspaceRaw, evalDir, absolutePath, 'workspace', errors); + } + + return errors; +} + +async function validateWorkspaceObject( + obj: JsonObject, + baseDir: string, + evalFilePath: string, + location: string, + errors: ValidationError[], +): Promise { + // Check template path + const template = obj.template; + if (typeof template === 'string') { + const templatePath = path.isAbsolute(template) ? template : path.resolve(baseDir, template); + if (!(await fileExists(templatePath))) { + errors.push({ + severity: 'error', + filePath: evalFilePath, + location: `${location}.template`, + message: `Template path not found: ${template} (resolved to ${templatePath})`, + }); + } + } + + // Check hook script paths + const hooks = obj.hooks; + if (!isObject(hooks)) return; + + for (const hookName of ['before_all', 'before_each', 'after_each', 'after_all'] as const) { + const hook = hooks[hookName]; + if (!isObject(hook)) continue; + + // Resolve hook cwd the same way yaml-parser does at parse time: + // config.cwd (resolved against baseDir) ?? baseDir + // baseDir = workspaceFileDir for external workspace files, evalDir for inline. + const hookCwdRaw = typeof hook.cwd === 'string' ? hook.cwd : undefined; + const hookCwd = hookCwdRaw + ? path.isAbsolute(hookCwdRaw) + ? hookCwdRaw + : path.resolve(baseDir, hookCwdRaw) + : baseDir; + + // Support both `command` (canonical) and `script` (deprecated alias) + const command = hook.command ?? hook.script; + if (!Array.isArray(command)) continue; + + for (let i = 0; i < command.length; i++) { + const arg = command[i]; + if (typeof arg !== 'string') continue; + if (!looksLikeFilePath(arg)) continue; + + const resolved = path.isAbsolute(arg) ? arg : path.resolve(hookCwd, arg); + if (!(await fileExists(resolved))) { + errors.push({ + severity: 'error', + filePath: evalFilePath, + location: `${location}.hooks.${hookName}.command[${i}]`, + message: `Hook script not found: ${arg} (resolved to ${resolved})`, + }); + } + } + } +} + +/** + * Heuristic: does this command argument look like a file path rather than a + * system binary name? + * + * Detects: + * - Explicit relative paths: `./foo`, `../bar/baz` + * - Script-extension arguments: `setup.mjs`, `scripts/init.sh` + */ +function looksLikeFilePath(arg: string): boolean { + if (arg.startsWith('./') || arg.startsWith('../')) return true; + const scriptExtensions = [ + '.mjs', + '.cjs', + '.js', + '.ts', + '.sh', + '.bash', + '.zsh', + '.py', + '.rb', + '.pl', + ]; + return scriptExtensions.some((ext) => arg.endsWith(ext)); +} + +async function fileExists(filePath: string): Promise { + try { + await access(filePath); + return true; + } catch { + return false; + } +} diff --git a/packages/core/test/evaluation/validation/workspace-path-validator.test.ts b/packages/core/test/evaluation/validation/workspace-path-validator.test.ts new file mode 100644 index 000000000..7735b528e --- /dev/null +++ b/packages/core/test/evaluation/validation/workspace-path-validator.test.ts @@ -0,0 +1,234 @@ +import { afterAll, beforeAll, describe, expect, it } from 'bun:test'; +import { mkdir, rm, writeFile } from 'node:fs/promises'; +import os from 'node:os'; +import path from 'node:path'; + +import { validateWorkspacePaths } from '../../../src/evaluation/validation/workspace-path-validator.js'; + +const minimalEvalPrefix = `tests: + - id: t1 + criteria: Goal + input: hello +`; + +describe('validateWorkspacePaths', () => { + let tempDir: string; + + beforeAll(async () => { + tempDir = path.join(os.tmpdir(), `agentv-test-workspace-${Date.now()}`); + await mkdir(tempDir, { recursive: true }); + }); + + afterAll(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('returns no errors when workspace field is absent', async () => { + const filePath = path.join(tempDir, 'no-workspace.yaml'); + await writeFile(filePath, minimalEvalPrefix); + const errors = await validateWorkspacePaths(filePath); + expect(errors).toHaveLength(0); + }); + + it('returns no errors when workspace file reference exists (with no internal paths)', async () => { + const wsFilePath = path.join(tempDir, 'workspace.yaml'); + await writeFile(wsFilePath, 'template: ~\n'); + + const evalFilePath = path.join(tempDir, 'eval-ws-ref-ok.yaml'); + await writeFile(evalFilePath, `${minimalEvalPrefix}workspace: workspace.yaml\n`); + + const errors = await validateWorkspacePaths(evalFilePath); + expect(errors).toHaveLength(0); + }); + + it('returns no errors when workspace file is missing (eval-validator owns that check)', async () => { + const evalFilePath = path.join(tempDir, 'eval-ws-ref-missing.yaml'); + await writeFile(evalFilePath, `${minimalEvalPrefix}workspace: missing-workspace.yaml\n`); + + // eval-validator already catches the missing file error; this validator silently skips + const errors = await validateWorkspacePaths(evalFilePath); + expect(errors).toHaveLength(0); + }); + + it('errors when workspace.template does not exist (inline workspace)', async () => { + const evalFilePath = path.join(tempDir, 'eval-inline-template.yaml'); + await writeFile( + evalFilePath, + `${minimalEvalPrefix}workspace:\n template: nonexistent-template\n`, + ); + + const errors = await validateWorkspacePaths(evalFilePath); + expect(errors).toHaveLength(1); + expect(errors[0]?.severity).toBe('error'); + expect(errors[0]?.location).toBe('workspace.template'); + expect(errors[0]?.message).toContain('Template path not found'); + }); + + it('returns no errors when workspace.template exists', async () => { + const templateDir = path.join(tempDir, 'my-template'); + await mkdir(templateDir, { recursive: true }); + + const evalFilePath = path.join(tempDir, 'eval-template-ok.yaml'); + await writeFile(evalFilePath, `${minimalEvalPrefix}workspace:\n template: my-template\n`); + + const errors = await validateWorkspacePaths(evalFilePath); + expect(errors).toHaveLength(0); + }); + + it('errors when hook before_all command has a missing relative script', async () => { + const evalFilePath = path.join(tempDir, 'eval-hook-missing.yaml'); + await writeFile( + evalFilePath, + `${minimalEvalPrefix}workspace: + hooks: + before_all: + command: + - node + - ../../scripts/setup.mjs +`, + ); + + const errors = await validateWorkspacePaths(evalFilePath); + expect(errors).toHaveLength(1); + expect(errors[0]?.severity).toBe('error'); + expect(errors[0]?.location).toBe('workspace.hooks.before_all.command[1]'); + expect(errors[0]?.message).toContain('setup.mjs'); + }); + + it('returns no errors when hook script exists at resolved path', async () => { + const scriptsDir = path.join(tempDir, 'scripts'); + await mkdir(scriptsDir, { recursive: true }); + const setupScript = path.join(scriptsDir, 'setup.mjs'); + await writeFile(setupScript, 'console.log("setup");'); + + const evalFilePath = path.join(tempDir, 'eval-hook-ok.yaml'); + await writeFile( + evalFilePath, + `${minimalEvalPrefix}workspace: + hooks: + before_all: + command: + - node + - ./scripts/setup.mjs +`, + ); + + const errors = await validateWorkspacePaths(evalFilePath); + expect(errors).toHaveLength(0); + }); + + it('does not flag system binaries (no extension, no relative prefix)', async () => { + const evalFilePath = path.join(tempDir, 'eval-system-binary.yaml'); + await writeFile( + evalFilePath, + `${minimalEvalPrefix}workspace: + hooks: + before_all: + command: + - bash + - -c + - echo hello +`, + ); + + const errors = await validateWorkspacePaths(evalFilePath); + expect(errors).toHaveLength(0); + }); + + it('checks hooks inside external workspace file', async () => { + const wsFilePath = path.join(tempDir, 'ws-with-hooks.yaml'); + await writeFile( + wsFilePath, + `hooks: + before_all: + command: + - node + - ./missing-setup.mjs +`, + ); + + const evalFilePath = path.join(tempDir, 'eval-ws-hooks.yaml'); + await writeFile(evalFilePath, `${minimalEvalPrefix}workspace: ws-with-hooks.yaml\n`); + + const errors = await validateWorkspacePaths(evalFilePath); + expect(errors).toHaveLength(1); + expect(errors[0]?.message).toContain('missing-setup.mjs'); + }); + + it('checks template inside external workspace file', async () => { + const wsFilePath = path.join(tempDir, 'ws-with-template.yaml'); + await writeFile(wsFilePath, 'template: ./missing-template\n'); + + const evalFilePath = path.join(tempDir, 'eval-ws-template.yaml'); + await writeFile(evalFilePath, `${minimalEvalPrefix}workspace: ws-with-template.yaml\n`); + + const errors = await validateWorkspacePaths(evalFilePath); + expect(errors).toHaveLength(1); + expect(errors[0]?.location).toBe('workspace.template'); + expect(errors[0]?.message).toContain('missing-template'); + }); + + it('respects hook cwd for script path resolution', async () => { + const subDir = path.join(tempDir, 'sub'); + await mkdir(subDir, { recursive: true }); + const scriptPath = path.join(subDir, 'run.sh'); + await writeFile(scriptPath, '#!/bin/bash\necho run'); + + const evalFilePath = path.join(tempDir, 'eval-hook-cwd.yaml'); + await writeFile( + evalFilePath, + `${minimalEvalPrefix}workspace: + hooks: + before_all: + cwd: ./sub + command: + - bash + - ./run.sh +`, + ); + + const errors = await validateWorkspacePaths(evalFilePath); + expect(errors).toHaveLength(0); + }); + + it('checks after_each and after_all hooks too', async () => { + const evalFilePath = path.join(tempDir, 'eval-other-hooks.yaml'); + await writeFile( + evalFilePath, + `${minimalEvalPrefix}workspace: + hooks: + after_each: + command: + - node + - ./missing-after-each.mjs + after_all: + command: + - node + - ./missing-after-all.mjs +`, + ); + + const errors = await validateWorkspacePaths(evalFilePath); + expect(errors).toHaveLength(2); + expect(errors.some((e) => e.message.includes('missing-after-each.mjs'))).toBe(true); + expect(errors.some((e) => e.message.includes('missing-after-all.mjs'))).toBe(true); + }); + + it('supports deprecated script alias', async () => { + const evalFilePath = path.join(tempDir, 'eval-script-alias.yaml'); + await writeFile( + evalFilePath, + `${minimalEvalPrefix}workspace: + hooks: + before_all: + script: + - node + - ./missing-via-alias.mjs +`, + ); + + const errors = await validateWorkspacePaths(evalFilePath); + expect(errors).toHaveLength(1); + expect(errors[0]?.message).toContain('missing-via-alias.mjs'); + }); +});