From cad060ddd5b25fc7f05247b04e605956abb54623 Mon Sep 17 00:00:00 2001 From: Christopher Date: Tue, 7 Apr 2026 22:43:16 +0000 Subject: [PATCH 1/5] feat(core): add content preprocessors for llm graders --- .../src/evaluation/content-preprocessor.ts | 208 ++ .../src/evaluation/evaluators/llm-grader.ts | 43 +- .../evaluation/loaders/evaluator-parser.ts | 116 +- packages/core/src/evaluation/types.ts | 13 + .../evaluation/validation/eval-file.schema.ts | 9 + packages/core/src/evaluation/yaml-parser.ts | 9 + .../evaluation/content-preprocessor.test.ts | 95 + .../evaluation/llm-grader-multimodal.test.ts | 63 +- .../evaluation/preprocessors-yaml.test.ts | 55 + .../references/eval-schema.json | 3049 ++++++++++++++--- 10 files changed, 3193 insertions(+), 467 deletions(-) create mode 100644 packages/core/src/evaluation/content-preprocessor.ts create mode 100644 packages/core/test/evaluation/content-preprocessor.test.ts create mode 100644 packages/core/test/evaluation/preprocessors-yaml.test.ts diff --git a/packages/core/src/evaluation/content-preprocessor.ts b/packages/core/src/evaluation/content-preprocessor.ts new file mode 100644 index 000000000..a718a85b7 --- /dev/null +++ b/packages/core/src/evaluation/content-preprocessor.ts @@ -0,0 +1,208 @@ +import { readFile } from 'node:fs/promises'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; + +import { execFileWithStdin } from '../runtime/exec.js'; +import type { Content, ContentFile } from './content.js'; +import type { ContentPreprocessorConfig } from './types.js'; + +const MIME_TYPE_ALIASES: Record = { + csv: 'text/csv', + docx: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + htm: 'text/html', + html: 'text/html', + json: 'application/json', + markdown: 'text/markdown', + md: 'text/markdown', + pdf: 'application/pdf', + sql: 'application/sql', + txt: 'text/plain', + xhtml: 'application/xhtml+xml', + xls: 'application/vnd.ms-excel', + xlsx: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + xml: 'application/xml', + yaml: 'application/yaml', + yml: 'application/yaml', +}; + +const REPLACEMENT_CHAR = '\ufffd'; + +export interface FilePreprocessingWarning { + readonly file: string; + readonly mediaType: string; + readonly reason: string; +} + +export interface ExtractedContentText { + readonly text: string; + readonly warnings: readonly FilePreprocessingWarning[]; +} + +export async function extractTextWithPreprocessors( + content: string | readonly Content[] | undefined, + preprocessors: readonly ContentPreprocessorConfig[] | undefined, +): Promise { + if (typeof content === 'string') { + return { text: content, warnings: [] }; + } + if (!content || content.length === 0) { + return { text: '', warnings: [] }; + } + + const parts: string[] = []; + const warnings: FilePreprocessingWarning[] = []; + + for (const block of content) { + if (block.type === 'text') { + parts.push(block.text); + continue; + } + if (block.type !== 'file') { + continue; + } + + const result = await preprocessContentFile(block, preprocessors); + if (result.text) { + parts.push(result.text); + } + warnings.push(...result.warnings); + } + + return { text: parts.join('\n'), warnings }; +} + +async function preprocessContentFile( + block: ContentFile, + preprocessors: readonly ContentPreprocessorConfig[] | undefined, +): Promise { + const mediaType = normalizePreprocessorType(block.media_type); + const resolvedPath = resolveLocalFilePath(block.path); + + if (!resolvedPath) { + return { + text: '', + warnings: [ + { + file: block.path, + mediaType: block.media_type, + reason: 'remote file paths are not supported for preprocessing', + }, + ], + }; + } + + const preprocessor = preprocessors?.find( + (entry) => normalizePreprocessorType(entry.type) === mediaType, + ); + if (preprocessor) { + return runContentPreprocessor(block, resolvedPath, preprocessor); + } + + try { + const buffer = await readFile(resolvedPath); + const text = buffer.toString('utf8').replace(/\r\n/g, '\n'); + if (buffer.includes(0) || text.includes(REPLACEMENT_CHAR)) { + return { + text: '', + warnings: [ + { + file: block.path, + mediaType: block.media_type, + reason: 'default UTF-8 read produced binary or invalid text; configure a preprocessor', + }, + ], + }; + } + return { text: formatFileText(block.path, text), warnings: [] }; + } catch (error) { + return { + text: '', + warnings: [ + { + file: block.path, + mediaType: block.media_type, + reason: error instanceof Error ? error.message : String(error), + }, + ], + }; + } +} + +async function runContentPreprocessor( + block: ContentFile, + resolvedPath: string, + preprocessor: ContentPreprocessorConfig, +): Promise { + try { + const argv = preprocessor.resolvedCommand ?? preprocessor.command; + const { stdout, stderr, exitCode } = await execFileWithStdin( + argv, + JSON.stringify({ + path: resolvedPath, + original_path: block.path, + media_type: block.media_type, + }), + ); + + if (exitCode !== 0) { + return { + text: '', + warnings: [ + { + file: block.path, + mediaType: block.media_type, + reason: stderr.trim() || `preprocessor exited with code ${exitCode}`, + }, + ], + }; + } + + return { text: formatFileText(block.path, stdout.trim()), warnings: [] }; + } catch (error) { + return { + text: '', + warnings: [ + { + file: block.path, + mediaType: block.media_type, + reason: error instanceof Error ? error.message : String(error), + }, + ], + }; + } +} + +export function appendPreprocessingWarnings( + text: string, + warnings: readonly FilePreprocessingWarning[], +): string { + if (warnings.length === 0) { + return text; + } + + const notes = warnings.map( + (warning) => + `[file preprocessing warning] ${warning.file} (${warning.mediaType}): ${warning.reason}`, + ); + + return [text, ...notes].filter((part) => part.length > 0).join('\n'); +} + +export function normalizePreprocessorType(value: string): string { + const normalized = value.trim().toLowerCase(); + return MIME_TYPE_ALIASES[normalized] ?? normalized; +} + +function resolveLocalFilePath(value: string): string | undefined { + if (value.startsWith('file://')) { + return fileURLToPath(value); + } + if (/^[a-z]+:\/\//i.test(value)) { + return undefined; + } + return path.resolve(value); +} + +function formatFileText(filePath: string, text: string): string { + return `[[ file: ${filePath} ]]\n${text}`; +} diff --git a/packages/core/src/evaluation/evaluators/llm-grader.ts b/packages/core/src/evaluation/evaluators/llm-grader.ts index d53599358..b64440779 100644 --- a/packages/core/src/evaluation/evaluators/llm-grader.ts +++ b/packages/core/src/evaluation/evaluators/llm-grader.ts @@ -4,6 +4,10 @@ import path from 'node:path'; import { generateText, stepCountIs, tool } from 'ai'; import { z } from 'zod'; +import { + appendPreprocessingWarnings, + extractTextWithPreprocessors, +} from '../content-preprocessor.js'; import type { ContentImage } from '../content.js'; import { isContentArray } from '../content.js'; import type { Message, Provider, ProviderResponse } from '../providers/types.js'; @@ -172,33 +176,58 @@ export class LlmGraderEvaluator implements Evaluator { } async evaluate(context: EvaluationContext): Promise { + const preparedContext = await this.prepareContext(context); + // Delegate mode: grader target provider is an agent provider — send prompt via invoke() if (this.graderTargetProvider) { - return this.evaluateWithGraderTarget(context); + return this.evaluateWithGraderTarget(preparedContext); } - const graderProvider = await this.resolveGraderProvider(context); + const graderProvider = await this.resolveGraderProvider(preparedContext); if (!graderProvider) { throw new Error('No grader provider available for LLM grading'); } // Built-in agent mode: agentv provider → AI SDK generateText with filesystem tools if (graderProvider.kind === 'agentv') { - return this.evaluateBuiltIn(context, graderProvider); + return this.evaluateBuiltIn(preparedContext, graderProvider); } // Delegate mode: resolved provider is an agent provider → send prompt via invoke() if (isAgentProvider(graderProvider)) { - return this.evaluateWithDelegatedAgent(context, graderProvider); + return this.evaluateWithDelegatedAgent(preparedContext, graderProvider); } // LLM mode: structured JSON evaluation - const config = context.evaluator; + const config = preparedContext.evaluator; if (config?.type === 'llm-grader' && config.rubrics && config.rubrics.length > 0) { - return this.evaluateWithRubrics(context, graderProvider, config.rubrics); + return this.evaluateWithRubrics(preparedContext, graderProvider, config.rubrics); + } + + return this.evaluateFreeform(preparedContext, graderProvider); + } + + private async prepareContext(context: EvaluationContext): Promise { + const config = context.evaluator; + if (config?.type !== 'llm-grader' || !context.output) { + return context; + } + + const lastAssistant = [...context.output] + .reverse() + .find((message) => message.role === 'assistant' && message.content !== undefined); + if (!lastAssistant || typeof lastAssistant.content === 'string') { + return context; } - return this.evaluateFreeform(context, graderProvider); + const extracted = await extractTextWithPreprocessors( + lastAssistant.content, + config.preprocessors, + ); + return { + ...context, + candidate: appendPreprocessingWarnings(extracted.text, extracted.warnings), + }; } // --------------------------------------------------------------------------- diff --git a/packages/core/src/evaluation/loaders/evaluator-parser.ts b/packages/core/src/evaluation/loaders/evaluator-parser.ts index 76cfae0e7..a98a7b760 100644 --- a/packages/core/src/evaluation/loaders/evaluator-parser.ts +++ b/packages/core/src/evaluation/loaders/evaluator-parser.ts @@ -1,7 +1,13 @@ import path from 'node:path'; import type { ToolTrajectoryEvaluatorConfig, ToolTrajectoryExpectedItem } from '../trace.js'; -import type { EvaluatorConfig, EvaluatorKind, JsonObject, JsonValue } from '../types.js'; +import type { + ContentPreprocessorConfig, + EvaluatorConfig, + EvaluatorKind, + JsonObject, + JsonValue, +} from '../types.js'; import { isEvaluatorKind } from '../types.js'; import { validateCustomPromptContent } from '../validation/prompt-validator.js'; import { resolveFileReference } from './file-resolver.js'; @@ -48,6 +54,7 @@ export async function parseEvaluators( globalExecution: JsonObject | undefined, searchRoots: readonly string[], evalId: string, + defaultPreprocessors?: readonly ContentPreprocessorConfig[], ): Promise { const execution = rawEvalCase.execution; const executionObject = isJsonObject(execution) ? execution : undefined; @@ -66,9 +73,19 @@ export async function parseEvaluators( : (globalExecution?.assertions ?? globalExecution?.assert ?? globalExecution?.evaluators); // deprecated: use assertions // Parse case-level evaluators - const parsedCase = await parseEvaluatorList(caseEvaluators, searchRoots, evalId); + const parsedCase = await parseEvaluatorList( + caseEvaluators, + searchRoots, + evalId, + defaultPreprocessors, + ); // Parse root-level evaluators (appended after case-level) - const parsedRoot = await parseEvaluatorList(rootEvaluators, searchRoots, evalId); + const parsedRoot = await parseEvaluatorList( + rootEvaluators, + searchRoots, + evalId, + defaultPreprocessors, + ); if (!parsedCase && !parsedRoot) { return undefined; @@ -87,6 +104,7 @@ async function parseEvaluatorList( candidateEvaluators: JsonValue | undefined, searchRoots: readonly string[], evalId: string, + defaultPreprocessors?: readonly ContentPreprocessorConfig[], ): Promise { if (candidateEvaluators === undefined) { return undefined; @@ -175,6 +193,13 @@ async function parseEvaluatorList( } const negate = rawEvaluator.negate === true ? true : undefined; + const mergedPreprocessors = await parseMergedPreprocessors( + rawEvaluator.preprocessors as JsonValue | undefined, + defaultPreprocessors, + searchRoots, + name, + evalId, + ); // Custom assertion types — store with their type name for registry dispatch if (isCustomType) { @@ -297,6 +322,7 @@ async function parseEvaluatorList( 'cwd', 'weight', 'target', + 'preprocessors', 'required', 'negate', ]); @@ -318,6 +344,7 @@ async function parseEvaluatorList( ...(min_score !== undefined ? { min_score } : {}), ...(negate !== undefined ? { negate } : {}), ...(Object.keys(config).length > 0 ? { config } : {}), + ...(mergedPreprocessors ? { preprocessors: mergedPreprocessors } : {}), ...(targetConfig !== undefined ? { target: targetConfig } : {}), }); continue; @@ -1236,6 +1263,7 @@ async function parseEvaluatorList( ...(required !== undefined ? { required } : {}), ...(min_score !== undefined ? { min_score } : {}), ...(negate !== undefined ? { negate } : {}), + ...(mergedPreprocessors ? { preprocessors: mergedPreprocessors } : {}), }); continue; } @@ -1346,6 +1374,7 @@ async function parseEvaluatorList( ...(required !== undefined ? { required } : {}), ...(min_score !== undefined ? { min_score } : {}), ...(negate !== undefined ? { negate } : {}), + ...(mergedPreprocessors ? { preprocessors: mergedPreprocessors } : {}), }); continue; } @@ -1375,6 +1404,7 @@ async function parseEvaluatorList( 'max_steps', 'maxSteps', 'temperature', + 'preprocessors', ]); const config: Record = {}; for (const [key, value] of Object.entries(rawEvaluator)) { @@ -1422,12 +1452,92 @@ async function parseEvaluatorList( ...(finalConfig ? { config: finalConfig } : {}), ...(llmMaxSteps !== undefined ? { max_steps: llmMaxSteps } : {}), ...(llmTemperature !== undefined ? { temperature: llmTemperature } : {}), + ...(mergedPreprocessors ? { preprocessors: mergedPreprocessors } : {}), }); } return evaluators.length > 0 ? evaluators : undefined; } +async function parseMergedPreprocessors( + rawValue: JsonValue | undefined, + defaultPreprocessors: readonly ContentPreprocessorConfig[] | undefined, + searchRoots: readonly string[], + evaluatorName: string, + evalId: string, +): Promise { + const parsedDefaults = defaultPreprocessors ?? []; + const parsedOverrides = await parsePreprocessors(rawValue, searchRoots, evaluatorName, evalId); + + if (parsedDefaults.length === 0 && (!parsedOverrides || parsedOverrides.length === 0)) { + return undefined; + } + + const merged = new Map(); + for (const entry of parsedDefaults) { + merged.set(entry.type.toLowerCase(), entry); + } + for (const entry of parsedOverrides ?? []) { + merged.set(entry.type.toLowerCase(), entry); + } + + return [...merged.values()]; +} + +export async function parsePreprocessors( + rawValue: JsonValue | undefined, + searchRoots: readonly string[], + evaluatorName: string, + evalId: string, +): Promise { + if (rawValue === undefined) { + return undefined; + } + if (!Array.isArray(rawValue)) { + throw new Error(`Evaluator '${evaluatorName}' in '${evalId}': preprocessors must be an array`); + } + + const preprocessors: ContentPreprocessorConfig[] = []; + for (const rawEntry of rawValue) { + if (!isJsonObject(rawEntry)) { + throw new Error( + `Evaluator '${evaluatorName}' in '${evalId}': each preprocessor must be an object`, + ); + } + + const type = asString(rawEntry.type)?.trim(); + if (!type) { + throw new Error(`Evaluator '${evaluatorName}' in '${evalId}': preprocessor.type is required`); + } + + const command = asStringArray( + rawEntry.command, + `preprocessor command for evaluator '${evaluatorName}' in '${evalId}'`, + ); + if (!command || command.length === 0) { + throw new Error( + `Evaluator '${evaluatorName}' in '${evalId}': preprocessor '${type}' requires command`, + ); + } + + const commandPath = command[command.length - 1]; + const resolved = await resolveFileReference(commandPath, searchRoots); + if (!resolved.resolvedPath) { + throw new Error( + `Evaluator '${evaluatorName}' in '${evalId}': preprocessor command file not found: ${resolved.displayPath}`, + ); + } + + preprocessors.push({ + type, + command, + resolvedCommand: [...command.slice(0, -1), path.resolve(resolved.resolvedPath)], + }); + } + + return preprocessors; +} + /** Assertion evaluator types that support auto-generated names. */ const ASSERTION_TYPES = new Set([ 'skill-trigger', diff --git a/packages/core/src/evaluation/types.ts b/packages/core/src/evaluation/types.ts index fb4a83ba7..d5f3bcfa7 100644 --- a/packages/core/src/evaluation/types.ts +++ b/packages/core/src/evaluation/types.ts @@ -311,6 +311,8 @@ export type CodeEvaluatorConfig = { readonly config?: JsonObject; /** When present, enables target access via local proxy */ readonly target?: TargetAccessConfig; + /** Optional content preprocessors inherited from suite/evaluator config */ + readonly preprocessors?: readonly ContentPreprocessorConfig[]; }; /** @@ -326,6 +328,15 @@ export type PromptScriptConfig = { readonly config?: Record; }; +export type ContentPreprocessorConfig = { + /** MIME type or short alias such as "xlsx" or "html" */ + readonly type: string; + /** Command array to execute (stdin JSON payload -> stdout text) */ + readonly command: readonly string[]; + /** Resolved absolute path for the command script (last argv element) */ + readonly resolvedCommand?: readonly string[]; +}; + export type LlmGraderEvaluatorConfig = { readonly name: string; readonly type: 'llm-grader'; @@ -351,6 +362,8 @@ export type LlmGraderEvaluatorConfig = { readonly max_steps?: number; /** Temperature override for grader calls */ readonly temperature?: number; + /** Optional content preprocessors for ContentFile blocks in assistant output */ + readonly preprocessors?: readonly ContentPreprocessorConfig[]; }; /** @deprecated Use `LlmGraderEvaluatorConfig` instead */ diff --git a/packages/core/src/evaluation/validation/eval-file.schema.ts b/packages/core/src/evaluation/validation/eval-file.schema.ts index 0e1fef17d..e0af2feee 100644 --- a/packages/core/src/evaluation/validation/eval-file.schema.ts +++ b/packages/core/src/evaluation/validation/eval-file.schema.ts @@ -55,6 +55,11 @@ const PromptSchema = z.union([ }), ]); +const PreprocessorSchema = z.object({ + type: z.string().min(1), + command: z.union([z.string(), z.array(z.string())]), +}); + /** Score range for analytic rubrics */ const ScoreRangeSchema = z.object({ score_range: z.tuple([z.number().int().min(0).max(10), z.number().int().min(0).max(10)]), @@ -81,6 +86,7 @@ const CodeGraderSchema = EvaluatorCommonSchema.extend({ cwd: z.string().optional(), target: z.union([z.boolean(), z.object({ max_calls: z.number().optional() })]).optional(), config: z.record(z.unknown()).optional(), + preprocessors: z.array(PreprocessorSchema).optional(), }); const LlmGraderSchema = EvaluatorCommonSchema.extend({ @@ -92,6 +98,7 @@ const LlmGraderSchema = EvaluatorCommonSchema.extend({ config: z.record(z.unknown()).optional(), max_steps: z.number().int().min(1).max(50).optional(), temperature: z.number().min(0).max(2).optional(), + preprocessors: z.array(PreprocessorSchema).optional(), }); /** Aggregator configs for composite evaluator */ @@ -383,6 +390,8 @@ export const EvalFileSchema = z.object({ execution: ExecutionSchema.optional(), // Suite-level assertions assertions: z.array(EvaluatorSchema).optional(), + // Suite-level content preprocessors shared by evaluators + preprocessors: z.array(PreprocessorSchema).optional(), // Workspace (inline object or path to external workspace YAML file) workspace: z.union([WorkspaceSchema, z.string()]).optional(), }); diff --git a/packages/core/src/evaluation/yaml-parser.ts b/packages/core/src/evaluation/yaml-parser.ts index dc552c2b5..5b0933f7d 100644 --- a/packages/core/src/evaluation/yaml-parser.ts +++ b/packages/core/src/evaluation/yaml-parser.ts @@ -23,6 +23,7 @@ import { coerceEvaluator, parseEvaluators, parseInlineRubrics, + parsePreprocessors, warnUnconsumedCriteria, } from './loaders/evaluator-parser.js'; import { buildSearchRoots, resolveToAbsolutePath } from './loaders/file-resolver.js'; @@ -95,6 +96,7 @@ type RawTestSuite = JsonObject & { readonly execution?: JsonValue; readonly workspace?: JsonValue; readonly assertions?: JsonValue; + readonly preprocessors?: JsonValue; /** @deprecated Use `assertions` instead */ readonly assert?: JsonValue; readonly input?: JsonValue; @@ -283,6 +285,12 @@ async function loadTestsFromYaml( const rawTestCases = resolveTests(suite); const globalEvaluator = coerceEvaluator(suite.evaluator, 'global') ?? 'llm-grader'; + const suitePreprocessors = await parsePreprocessors( + suite.preprocessors, + searchRoots, + '', + absoluteTestPath, + ); // Parse suite-level workspace config (default for all cases) const evalFileDir = path.dirname(absoluteTestPath); @@ -456,6 +464,7 @@ async function loadTestsFromYaml( globalExecution, searchRoots, id ?? 'unknown', + suitePreprocessors, ); } catch (error) { // Skip entire test if evaluator validation fails diff --git a/packages/core/test/evaluation/content-preprocessor.test.ts b/packages/core/test/evaluation/content-preprocessor.test.ts new file mode 100644 index 000000000..5d3e40bb9 --- /dev/null +++ b/packages/core/test/evaluation/content-preprocessor.test.ts @@ -0,0 +1,95 @@ +import { afterEach, describe, expect, it } from 'bun:test'; +import { mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; + +import { + appendPreprocessingWarnings, + extractTextWithPreprocessors, + normalizePreprocessorType, +} from '../../src/evaluation/content-preprocessor.js'; + +describe('content preprocessors', () => { + const tempDirs: string[] = []; + + afterEach(async () => { + await Promise.all(tempDirs.map((dir) => rm(dir, { recursive: true, force: true }))); + tempDirs.length = 0; + }); + + it('reads text files as UTF-8 by default', async () => { + const dir = await mkdtemp(join(tmpdir(), 'agentv-preprocessor-')); + tempDirs.push(dir); + const filePath = join(dir, 'report.txt'); + await writeFile(filePath, 'alpha\nbeta\n', 'utf8'); + + const result = await extractTextWithPreprocessors( + [{ type: 'file', media_type: 'text/plain', path: filePath }], + undefined, + ); + + expect(result.warnings).toEqual([]); + expect(result.text).toContain('[[ file:'); + expect(result.text).toContain('alpha\nbeta'); + }); + + it('uses configured preprocessors for matching file types', async () => { + const dir = await mkdtemp(join(tmpdir(), 'agentv-preprocessor-')); + tempDirs.push(dir); + const filePath = join(dir, 'report.xlsx'); + const scriptPath = join(dir, 'xlsx-to-text.js'); + await writeFile(filePath, 'unused', 'utf8'); + await writeFile( + scriptPath, + `const fs = require('node:fs'); +const payload = JSON.parse(fs.readFileSync(0, 'utf8')); +console.log('sheet:' + payload.original_path.split('/').pop());`, + 'utf8', + ); + + const result = await extractTextWithPreprocessors( + [ + { + type: 'file', + media_type: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + path: filePath, + }, + ], + [{ type: 'xlsx', command: [process.execPath, scriptPath] }], + ); + + expect(result.warnings).toEqual([]); + expect(result.text).toContain('sheet:report.xlsx'); + }); + + it('records a warning when default UTF-8 extraction looks binary', async () => { + const dir = await mkdtemp(join(tmpdir(), 'agentv-preprocessor-')); + tempDirs.push(dir); + const filePath = join(dir, 'report.pdf'); + await writeFile(filePath, Buffer.from([0, 159, 146, 150])); + + const result = await extractTextWithPreprocessors( + [{ type: 'file', media_type: 'application/pdf', path: filePath }], + undefined, + ); + + expect(result.text).toBe(''); + expect(result.warnings).toHaveLength(1); + expect(result.warnings[0]?.reason).toContain('configure a preprocessor'); + }); + + it('appends warnings to extracted text for grader prompts', () => { + const text = appendPreprocessingWarnings('body', [ + { file: '/tmp/report.pdf', mediaType: 'application/pdf', reason: 'failed to extract' }, + ]); + expect(text).toContain('body'); + expect(text).toContain('[file preprocessing warning]'); + }); + + it('normalizes short aliases to MIME types', () => { + expect(normalizePreprocessorType('xlsx')).toBe( + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + ); + expect(normalizePreprocessorType('text/html')).toBe('text/html'); + }); +}); diff --git a/packages/core/test/evaluation/llm-grader-multimodal.test.ts b/packages/core/test/evaluation/llm-grader-multimodal.test.ts index 1ff035a02..bbf6e8c04 100644 --- a/packages/core/test/evaluation/llm-grader-multimodal.test.ts +++ b/packages/core/test/evaluation/llm-grader-multimodal.test.ts @@ -9,7 +9,10 @@ * - Images in non-assistant messages are ignored */ -import { beforeEach, describe, expect, it, mock } from 'bun:test'; +import { afterEach, beforeEach, describe, expect, it, mock } from 'bun:test'; +import { mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; import type { ResolvedTarget } from '../../src/evaluation/providers/targets.js'; import type { Message } from '../../src/evaluation/providers/types.js'; @@ -194,10 +197,19 @@ describe('extractImageBlocks', () => { // --------------------------------------------------------------------------- describe('LlmGraderEvaluator multimodal', () => { + let tempDir: string | undefined; + beforeEach(() => { capturedGenerateTextArgs = undefined; }); + afterEach(async () => { + if (tempDir) { + await rm(tempDir, { recursive: true, force: true }); + tempDir = undefined; + } + }); + it('sends plain text prompt when output has no images', async () => { const provider = createLmProvider(); @@ -353,4 +365,53 @@ describe('LlmGraderEvaluator multimodal', () => { expect(capturedGenerateTextArgs?.prompt).toBeTypeOf('string'); expect(capturedGenerateTextArgs?.messages).toBeUndefined(); }); + + it('injects preprocessed file text into the plain prompt', async () => { + tempDir = await mkdtemp(join(tmpdir(), 'agentv-llm-file-')); + const filePath = join(tempDir, 'report.xlsx'); + const scriptPath = join(tempDir, 'xlsx-to-text.js'); + await writeFile(filePath, 'unused', 'utf8'); + await writeFile( + scriptPath, + `const fs = require('node:fs'); +const payload = JSON.parse(fs.readFileSync(0, 'utf8')); +console.log('spreadsheet:' + payload.original_path.split('/').pop());`, + 'utf8', + ); + + const provider = createLmProvider(); + const evaluator = new LlmGraderEvaluator({ + resolveGraderProvider: async () => provider, + }); + + await evaluator.evaluate({ + evalCase: baseTestCase, + candidate: '', + target: baseTarget, + provider, + attempt: 0, + promptInputs: { question: 'Describe the image' }, + now: new Date(), + evaluator: { + name: 'grade', + type: 'llm-grader', + preprocessors: [{ type: 'xlsx', command: [process.execPath, scriptPath] }], + }, + output: [ + { + role: 'assistant', + content: [ + { + type: 'file', + media_type: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + path: filePath, + }, + ], + }, + ], + }); + + expect(capturedGenerateTextArgs?.prompt).toBeTypeOf('string'); + expect(String(capturedGenerateTextArgs?.prompt)).toContain('spreadsheet:report.xlsx'); + }); }); diff --git a/packages/core/test/evaluation/preprocessors-yaml.test.ts b/packages/core/test/evaluation/preprocessors-yaml.test.ts new file mode 100644 index 000000000..c05416b8f --- /dev/null +++ b/packages/core/test/evaluation/preprocessors-yaml.test.ts @@ -0,0 +1,55 @@ +import { afterEach, describe, expect, it } from 'bun:test'; +import { mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import path from 'node:path'; + +import { loadTests } from '../../src/evaluation/yaml-parser.js'; + +describe('eval YAML preprocessors', () => { + const tempDirs: string[] = []; + + afterEach(async () => { + await Promise.all(tempDirs.map((dir) => rm(dir, { recursive: true, force: true }))); + tempDirs.length = 0; + }); + + it('merges suite-level preprocessors into llm-graders and resolves command paths', async () => { + const dir = await mkdtemp(path.join(tmpdir(), 'agentv-yaml-preprocessors-')); + tempDirs.push(dir); + + await writeFile(path.join(dir, 'xlsx-default.js'), 'console.log("default")', 'utf8'); + await writeFile(path.join(dir, 'xlsx-override.js'), 'console.log("override")', 'utf8'); + await writeFile( + path.join(dir, 'suite.eval.yaml'), + `preprocessors: + - type: xlsx + command: ["node", "xlsx-default.js"] +tests: + - id: report + input: "grade this" + criteria: "works" + assertions: + - name: grade + type: llm-grader + prompt: "Evaluate {{ output }}" + preprocessors: + - type: xlsx + command: ["node", "xlsx-override.js"] +`, + 'utf8', + ); + + const tests = await loadTests(path.join(dir, 'suite.eval.yaml'), dir); + const evaluator = tests[0]?.assertions?.[0]; + + expect(evaluator?.type).toBe('llm-grader'); + if (!evaluator || evaluator.type !== 'llm-grader') { + throw new Error('expected llm-grader evaluator'); + } + + expect(evaluator.preprocessors).toHaveLength(1); + expect(evaluator.preprocessors?.[0]?.resolvedCommand?.[1]).toBe( + path.join(dir, 'xlsx-override.js'), + ); + }); +}); diff --git a/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json b/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json index 7d2f54e93..31c44b3e1 100644 --- a/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json +++ b/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json @@ -53,7 +53,12 @@ "properties": { "role": { "type": "string", - "enum": ["system", "user", "assistant", "tool"] + "enum": [ + "system", + "user", + "assistant", + "tool" + ] }, "content": { "anyOf": [ @@ -67,20 +72,30 @@ "properties": { "type": { "type": "string", - "enum": ["text", "file", "image"] + "enum": [ + "text", + "file", + "image" + ] }, "value": { "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false } } ] } }, - "required": ["role", "content"], + "required": [ + "role", + "content" + ], "additionalProperties": false } } @@ -118,7 +133,12 @@ "properties": { "role": { "type": "string", - "enum": ["system", "user", "assistant", "tool"] + "enum": [ + "system", + "user", + "assistant", + "tool" + ] }, "content": { "anyOf": [ @@ -132,20 +152,30 @@ "properties": { "type": { "type": "string", - "enum": ["text", "file", "image"] + "enum": [ + "text", + "file", + "image" + ] }, "value": { "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false } } ] } }, - "required": ["role", "content"], + "required": [ + "role", + "content" + ], "additionalProperties": false } } @@ -173,7 +203,12 @@ "properties": { "role": { "type": "string", - "enum": ["system", "user", "assistant", "tool"] + "enum": [ + "system", + "user", + "assistant", + "tool" + ] }, "content": { "anyOf": [ @@ -187,20 +222,30 @@ "properties": { "type": { "type": "string", - "enum": ["text", "file", "image"] + "enum": [ + "text", + "file", + "image" + ] }, "value": { "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false } } ] } }, - "required": ["role", "content"], + "required": [ + "role", + "content" + ], "additionalProperties": false } } @@ -244,7 +289,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -294,9 +342,42 @@ "config": { "type": "object", "additionalProperties": {} + }, + "preprocessors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "command": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + } + }, + "required": [ + "type", + "command" + ], + "additionalProperties": false + } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -333,7 +414,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -428,7 +512,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -455,9 +542,41 @@ "type": "number", "minimum": 0, "maximum": 2 + }, + "preprocessors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "command": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + } + }, + "required": [ + "type", + "command" + ], + "additionalProperties": false + } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -520,7 +639,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -536,7 +657,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -553,7 +677,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -570,13 +697,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -613,11 +745,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -658,7 +799,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -672,7 +818,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -683,7 +834,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -691,7 +844,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -705,7 +863,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -716,7 +879,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -753,7 +919,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -765,7 +934,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -787,17 +960,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -841,7 +1023,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -885,7 +1070,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -922,7 +1110,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -937,7 +1128,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -974,7 +1167,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -1006,7 +1202,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -1049,7 +1247,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -1092,7 +1293,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -1129,10 +1333,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -1175,7 +1384,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -1264,7 +1476,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -1274,7 +1489,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -1318,7 +1536,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -1368,9 +1589,42 @@ "config": { "type": "object", "additionalProperties": {} + }, + "preprocessors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "command": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + } + }, + "required": [ + "type", + "command" + ], + "additionalProperties": false + } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -1407,7 +1661,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -1502,7 +1759,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -1529,9 +1789,41 @@ "type": "number", "minimum": 0, "maximum": 2 + }, + "preprocessors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "command": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + } + }, + "required": [ + "type", + "command" + ], + "additionalProperties": false + } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -1594,7 +1886,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -1610,7 +1904,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -1627,7 +1924,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -1644,13 +1944,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -1687,11 +1992,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -1732,7 +2046,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -1746,7 +2065,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -1757,7 +2081,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -1765,7 +2091,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -1779,7 +2110,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -1790,7 +2126,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -1827,7 +2166,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -1839,7 +2181,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -1861,17 +2207,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -1915,7 +2270,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -1959,7 +2317,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -1996,7 +2357,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -2011,7 +2375,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -2048,7 +2414,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -2080,7 +2449,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -2123,7 +2494,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -2166,7 +2540,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -2203,10 +2580,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -2249,7 +2631,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -2338,7 +2723,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -2348,7 +2736,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -2409,7 +2800,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -2459,9 +2853,42 @@ "config": { "type": "object", "additionalProperties": {} + }, + "preprocessors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "command": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + } + }, + "required": [ + "type", + "command" + ], + "additionalProperties": false + } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -2498,7 +2925,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -2593,7 +3023,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -2620,9 +3053,41 @@ "type": "number", "minimum": 0, "maximum": 2 + }, + "preprocessors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "command": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + } + }, + "required": [ + "type", + "command" + ], + "additionalProperties": false + } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -2685,7 +3150,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -2701,7 +3168,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -2718,7 +3188,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -2735,13 +3208,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -2778,11 +3256,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -2823,7 +3310,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -2837,7 +3329,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -2848,7 +3345,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -2856,7 +3355,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -2870,7 +3374,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -2881,7 +3390,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -2918,7 +3430,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -2930,7 +3445,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -2952,17 +3471,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -3006,7 +3534,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -3050,7 +3581,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -3087,7 +3621,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -3102,7 +3639,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -3139,7 +3678,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -3171,7 +3713,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -3214,7 +3758,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -3257,7 +3804,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -3294,10 +3844,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -3340,7 +3895,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -3429,7 +3987,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -3439,7 +4000,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -3483,7 +4047,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -3533,9 +4100,42 @@ "config": { "type": "object", "additionalProperties": {} + }, + "preprocessors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "command": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + } + }, + "required": [ + "type", + "command" + ], + "additionalProperties": false + } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -3572,7 +4172,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -3667,7 +4270,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -3694,9 +4300,41 @@ "type": "number", "minimum": 0, "maximum": 2 + }, + "preprocessors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "command": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + } + }, + "required": [ + "type", + "command" + ], + "additionalProperties": false + } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -3759,7 +4397,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -3775,7 +4415,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -3792,7 +4435,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -3809,13 +4455,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -3852,11 +4503,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -3897,7 +4557,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -3911,7 +4576,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -3922,7 +4592,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -3930,7 +4602,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -3944,7 +4621,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -3955,7 +4637,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -3992,7 +4677,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -4004,7 +4692,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -4026,17 +4718,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -4080,7 +4781,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -4124,7 +4828,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -4161,7 +4868,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -4176,7 +4886,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -4213,7 +4925,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -4245,7 +4960,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -4288,7 +5005,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -4331,7 +5051,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -4368,10 +5091,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -4414,7 +5142,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -4503,7 +5234,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -4513,7 +5247,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -4534,7 +5271,11 @@ }, "strategy": { "type": "string", - "enum": ["pass_at_k", "mean", "confidence_interval"] + "enum": [ + "pass_at_k", + "mean", + "confidence_interval" + ] }, "cost_limit_usd": { "type": "number", @@ -4545,7 +5286,9 @@ "minimum": 0 } }, - "required": ["count"], + "required": [ + "count" + ], "additionalProperties": false }, "total_budget_usd": { @@ -4578,7 +5321,10 @@ }, "isolation": { "type": "string", - "enum": ["shared", "per_test"] + "enum": [ + "shared", + "per_test" + ] }, "repos": { "type": "array", @@ -4602,7 +5348,10 @@ "format": "uri" } }, - "required": ["type", "url"], + "required": [ + "type", + "url" + ], "additionalProperties": false }, { @@ -4616,7 +5365,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false } ] @@ -4629,7 +5381,10 @@ }, "resolve": { "type": "string", - "enum": ["remote", "local"] + "enum": [ + "remote", + "local" + ] }, "ancestor": { "type": "integer", @@ -4658,7 +5413,10 @@ "additionalProperties": false } }, - "required": ["path", "source"], + "required": [ + "path", + "source" + ], "additionalProperties": false } }, @@ -4694,7 +5452,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -4725,7 +5487,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -4756,7 +5522,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -4787,7 +5557,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -4797,7 +5571,11 @@ }, "mode": { "type": "string", - "enum": ["pooled", "temp", "static"] + "enum": [ + "pooled", + "temp", + "static" + ] }, "path": { "type": "string" @@ -4819,7 +5597,9 @@ "type": "string" } }, - "required": ["id"], + "required": [ + "id" + ], "additionalProperties": false } }, @@ -4854,7 +5634,12 @@ "properties": { "role": { "type": "string", - "enum": ["system", "user", "assistant", "tool"] + "enum": [ + "system", + "user", + "assistant", + "tool" + ] }, "content": { "anyOf": [ @@ -4868,20 +5653,30 @@ "properties": { "type": { "type": "string", - "enum": ["text", "file", "image"] + "enum": [ + "text", + "file", + "image" + ] }, "value": { "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false } } ] } }, - "required": ["role", "content"], + "required": [ + "role", + "content" + ], "additionalProperties": false } } @@ -4909,7 +5704,12 @@ "properties": { "role": { "type": "string", - "enum": ["system", "user", "assistant", "tool"] + "enum": [ + "system", + "user", + "assistant", + "tool" + ] }, "content": { "anyOf": [ @@ -4923,20 +5723,30 @@ "properties": { "type": { "type": "string", - "enum": ["text", "file", "image"] + "enum": [ + "text", + "file", + "image" + ] }, "value": { "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false } } ] } }, - "required": ["role", "content"], + "required": [ + "role", + "content" + ], "additionalProperties": false } } @@ -4980,7 +5790,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -5030,9 +5843,42 @@ "config": { "type": "object", "additionalProperties": {} - } + }, + "preprocessors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "command": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + } + }, + "required": [ + "type", + "command" + ], + "additionalProperties": false + } + } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -5069,7 +5915,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -5164,7 +6013,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -5191,9 +6043,41 @@ "type": "number", "minimum": 0, "maximum": 2 + }, + "preprocessors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "command": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + } + }, + "required": [ + "type", + "command" + ], + "additionalProperties": false + } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -5256,7 +6140,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -5272,7 +6158,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -5289,7 +6178,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -5306,13 +6198,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -5349,11 +6246,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -5394,7 +6300,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -5408,7 +6319,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -5419,7 +6335,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -5427,7 +6345,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -5441,7 +6364,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -5452,7 +6380,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -5489,7 +6420,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -5501,7 +6435,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -5523,17 +6461,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -5577,7 +6524,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -5621,7 +6571,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -5658,7 +6611,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -5673,7 +6629,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -5710,7 +6668,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -5742,7 +6703,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -5785,7 +6748,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -5828,7 +6794,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -5865,10 +6834,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -5911,7 +6885,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -6000,7 +6977,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -6010,7 +6990,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -6054,7 +7037,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -6104,9 +7090,42 @@ "config": { "type": "object", "additionalProperties": {} + }, + "preprocessors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "command": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + } + }, + "required": [ + "type", + "command" + ], + "additionalProperties": false + } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -6143,7 +7162,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -6238,7 +7260,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -6265,9 +7290,41 @@ "type": "number", "minimum": 0, "maximum": 2 + }, + "preprocessors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "command": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + } + }, + "required": [ + "type", + "command" + ], + "additionalProperties": false + } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -6330,7 +7387,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -6346,7 +7405,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -6363,7 +7425,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -6380,13 +7445,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -6423,11 +7493,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -6468,7 +7547,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -6482,7 +7566,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -6493,7 +7582,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -6501,7 +7592,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -6515,7 +7611,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -6526,7 +7627,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -6563,7 +7667,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -6575,7 +7682,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -6597,17 +7708,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -6651,7 +7771,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -6695,7 +7818,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -6732,7 +7858,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -6747,7 +7876,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -6784,7 +7915,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -6816,7 +7950,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -6859,7 +7995,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -6902,7 +8041,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -6939,10 +8081,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -6985,7 +8132,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -7074,7 +8224,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -7084,7 +8237,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -7145,7 +8301,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -7195,9 +8354,42 @@ "config": { "type": "object", "additionalProperties": {} + }, + "preprocessors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "command": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + } + }, + "required": [ + "type", + "command" + ], + "additionalProperties": false + } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -7234,7 +8426,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -7329,7 +8524,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -7356,9 +8554,41 @@ "type": "number", "minimum": 0, "maximum": 2 + }, + "preprocessors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "command": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + } + }, + "required": [ + "type", + "command" + ], + "additionalProperties": false + } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -7421,7 +8651,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -7437,7 +8669,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -7454,7 +8689,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -7471,13 +8709,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -7514,11 +8757,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -7559,7 +8811,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -7573,7 +8830,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -7584,7 +8846,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -7592,7 +8856,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -7606,7 +8875,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -7617,7 +8891,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -7654,7 +8931,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -7666,7 +8946,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -7688,17 +8972,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -7742,7 +9035,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -7786,7 +9082,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -7823,7 +9122,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -7838,7 +9140,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -7875,7 +9179,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -7907,7 +9214,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -7950,7 +9259,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -7993,7 +9305,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -8030,10 +9345,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -8076,7 +9396,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -8165,7 +9488,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -8175,7 +9501,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -8219,7 +9548,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -8269,9 +9601,42 @@ "config": { "type": "object", "additionalProperties": {} + }, + "preprocessors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "command": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + } + }, + "required": [ + "type", + "command" + ], + "additionalProperties": false + } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -8308,7 +9673,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -8403,7 +9771,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -8430,9 +9801,41 @@ "type": "number", "minimum": 0, "maximum": 2 + }, + "preprocessors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "command": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + } + }, + "required": [ + "type", + "command" + ], + "additionalProperties": false + } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -8495,7 +9898,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -8511,7 +9916,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -8528,7 +9936,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -8545,13 +9956,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -8588,11 +10004,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -8633,7 +10058,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -8647,7 +10077,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -8658,7 +10093,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -8666,7 +10103,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -8680,7 +10122,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -8691,7 +10138,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -8728,7 +10178,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -8740,7 +10193,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -8762,17 +10219,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -8816,7 +10282,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -8860,7 +10329,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -8897,7 +10369,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -8912,7 +10387,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -8949,7 +10426,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -8981,7 +10461,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -9024,7 +10506,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -9067,7 +10552,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -9104,10 +10592,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -9150,7 +10643,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -9239,7 +10735,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -9249,7 +10748,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -9270,7 +10772,11 @@ }, "strategy": { "type": "string", - "enum": ["pass_at_k", "mean", "confidence_interval"] + "enum": [ + "pass_at_k", + "mean", + "confidence_interval" + ] }, "cost_limit_usd": { "type": "number", @@ -9281,7 +10787,9 @@ "minimum": 0 } }, - "required": ["count"], + "required": [ + "count" + ], "additionalProperties": false }, "total_budget_usd": { @@ -9314,7 +10822,10 @@ }, "isolation": { "type": "string", - "enum": ["shared", "per_test"] + "enum": [ + "shared", + "per_test" + ] }, "repos": { "type": "array", @@ -9338,7 +10849,10 @@ "format": "uri" } }, - "required": ["type", "url"], + "required": [ + "type", + "url" + ], "additionalProperties": false }, { @@ -9352,7 +10866,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false } ] @@ -9365,7 +10882,10 @@ }, "resolve": { "type": "string", - "enum": ["remote", "local"] + "enum": [ + "remote", + "local" + ] }, "ancestor": { "type": "integer", @@ -9394,7 +10914,10 @@ "additionalProperties": false } }, - "required": ["path", "source"], + "required": [ + "path", + "source" + ], "additionalProperties": false } }, @@ -9430,7 +10953,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -9461,7 +10988,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -9492,7 +11023,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -9523,7 +11058,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -9533,7 +11072,11 @@ }, "mode": { "type": "string", - "enum": ["pooled", "temp", "static"] + "enum": [ + "pooled", + "temp", + "static" + ] }, "path": { "type": "string" @@ -9555,7 +11098,9 @@ "type": "string" } }, - "required": ["id"], + "required": [ + "id" + ], "additionalProperties": false } }, @@ -9622,7 +11167,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -9672,9 +11220,42 @@ "config": { "type": "object", "additionalProperties": {} + }, + "preprocessors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "command": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + } + }, + "required": [ + "type", + "command" + ], + "additionalProperties": false + } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -9711,7 +11292,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -9806,7 +11390,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -9833,9 +11420,41 @@ "type": "number", "minimum": 0, "maximum": 2 + }, + "preprocessors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "command": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + } + }, + "required": [ + "type", + "command" + ], + "additionalProperties": false + } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -9898,7 +11517,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -9914,7 +11535,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -9931,7 +11555,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -9948,13 +11575,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -9991,11 +11623,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -10036,7 +11677,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -10050,7 +11696,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -10061,7 +11712,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -10069,7 +11722,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -10083,7 +11741,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -10094,7 +11757,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -10131,7 +11797,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -10143,7 +11812,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -10165,17 +11838,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -10219,7 +11901,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -10263,7 +11948,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -10300,7 +11988,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -10315,7 +12006,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -10352,7 +12045,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -10384,7 +12080,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -10427,7 +12125,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -10470,7 +12171,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -10507,10 +12211,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -10553,7 +12262,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -10642,7 +12354,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -10652,7 +12367,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -10696,7 +12414,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -10746,9 +12467,42 @@ "config": { "type": "object", "additionalProperties": {} + }, + "preprocessors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "command": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + } + }, + "required": [ + "type", + "command" + ], + "additionalProperties": false + } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -10785,7 +12539,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -10880,7 +12637,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -10907,9 +12667,41 @@ "type": "number", "minimum": 0, "maximum": 2 + }, + "preprocessors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "command": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + } + }, + "required": [ + "type", + "command" + ], + "additionalProperties": false + } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -10972,7 +12764,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -10988,7 +12782,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -11005,7 +12802,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -11022,13 +12822,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -11065,11 +12870,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -11110,7 +12924,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -11124,7 +12943,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -11135,7 +12959,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -11143,7 +12969,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -11157,7 +12988,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -11168,7 +13004,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -11205,7 +13044,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -11217,7 +13059,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -11239,17 +13085,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -11293,7 +13148,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -11337,7 +13195,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -11374,7 +13235,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -11389,7 +13253,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -11426,7 +13292,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -11458,7 +13327,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -11501,7 +13372,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -11544,7 +13418,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -11581,10 +13458,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -11627,7 +13509,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -11716,7 +13601,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -11726,7 +13614,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -11747,7 +13638,11 @@ }, "strategy": { "type": "string", - "enum": ["pass_at_k", "mean", "confidence_interval"] + "enum": [ + "pass_at_k", + "mean", + "confidence_interval" + ] }, "cost_limit_usd": { "type": "number", @@ -11758,7 +13653,9 @@ "minimum": 0 } }, - "required": ["count"], + "required": [ + "count" + ], "additionalProperties": false }, "total_budget_usd": { @@ -11821,7 +13718,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -11871,9 +13771,42 @@ "config": { "type": "object", "additionalProperties": {} + }, + "preprocessors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "command": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + } + }, + "required": [ + "type", + "command" + ], + "additionalProperties": false + } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -11910,7 +13843,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -12005,7 +13941,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -12032,9 +13971,41 @@ "type": "number", "minimum": 0, "maximum": 2 + }, + "preprocessors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "command": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + } + }, + "required": [ + "type", + "command" + ], + "additionalProperties": false + } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -12097,7 +14068,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -12113,7 +14086,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -12130,7 +14106,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -12147,13 +14126,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -12190,11 +14174,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -12235,7 +14228,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -12249,7 +14247,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -12260,7 +14263,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -12268,7 +14273,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -12282,7 +14292,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -12293,7 +14308,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -12330,7 +14348,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -12342,7 +14363,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -12364,17 +14389,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -12418,7 +14452,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -12462,7 +14499,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -12499,7 +14539,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -12514,7 +14557,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -12551,7 +14596,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -12583,7 +14631,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -12626,7 +14676,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -12669,7 +14722,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -12706,10 +14762,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -12752,7 +14813,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -12841,7 +14905,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -12851,12 +14918,45 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] } }, + "preprocessors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "command": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + } + }, + "required": [ + "type", + "command" + ], + "additionalProperties": false + } + }, "workspace": { "anyOf": [ { @@ -12867,7 +14967,10 @@ }, "isolation": { "type": "string", - "enum": ["shared", "per_test"] + "enum": [ + "shared", + "per_test" + ] }, "repos": { "type": "array", @@ -12891,7 +14994,10 @@ "format": "uri" } }, - "required": ["type", "url"], + "required": [ + "type", + "url" + ], "additionalProperties": false }, { @@ -12905,7 +15011,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false } ] @@ -12918,7 +15027,10 @@ }, "resolve": { "type": "string", - "enum": ["remote", "local"] + "enum": [ + "remote", + "local" + ] }, "ancestor": { "type": "integer", @@ -12947,7 +15059,10 @@ "additionalProperties": false } }, - "required": ["path", "source"], + "required": [ + "path", + "source" + ], "additionalProperties": false } }, @@ -12983,7 +15098,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -13014,7 +15133,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -13045,7 +15168,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -13076,7 +15203,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -13086,7 +15217,11 @@ }, "mode": { "type": "string", - "enum": ["pooled", "temp", "static"] + "enum": [ + "pooled", + "temp", + "static" + ] }, "path": { "type": "string" @@ -13100,7 +15235,9 @@ ] } }, - "required": ["tests"], + "required": [ + "tests" + ], "additionalProperties": false } } From ec4c2d6ca146fafbc4c7b8021bcc856a25a6d748 Mon Sep 17 00:00:00 2001 From: Christopher Date: Tue, 7 Apr 2026 22:44:07 +0000 Subject: [PATCH 2/5] chore(core): format generated eval schema --- .../references/eval-schema.json | 2472 ++++------------- 1 file changed, 478 insertions(+), 1994 deletions(-) diff --git a/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json b/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json index 31c44b3e1..bb340e350 100644 --- a/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json +++ b/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json @@ -53,12 +53,7 @@ "properties": { "role": { "type": "string", - "enum": [ - "system", - "user", - "assistant", - "tool" - ] + "enum": ["system", "user", "assistant", "tool"] }, "content": { "anyOf": [ @@ -72,30 +67,20 @@ "properties": { "type": { "type": "string", - "enum": [ - "text", - "file", - "image" - ] + "enum": ["text", "file", "image"] }, "value": { "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false } } ] } }, - "required": [ - "role", - "content" - ], + "required": ["role", "content"], "additionalProperties": false } } @@ -133,12 +118,7 @@ "properties": { "role": { "type": "string", - "enum": [ - "system", - "user", - "assistant", - "tool" - ] + "enum": ["system", "user", "assistant", "tool"] }, "content": { "anyOf": [ @@ -152,30 +132,20 @@ "properties": { "type": { "type": "string", - "enum": [ - "text", - "file", - "image" - ] + "enum": ["text", "file", "image"] }, "value": { "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false } } ] } }, - "required": [ - "role", - "content" - ], + "required": ["role", "content"], "additionalProperties": false } } @@ -203,12 +173,7 @@ "properties": { "role": { "type": "string", - "enum": [ - "system", - "user", - "assistant", - "tool" - ] + "enum": ["system", "user", "assistant", "tool"] }, "content": { "anyOf": [ @@ -222,30 +187,20 @@ "properties": { "type": { "type": "string", - "enum": [ - "text", - "file", - "image" - ] + "enum": ["text", "file", "image"] }, "value": { "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false } } ] } }, - "required": [ - "role", - "content" - ], + "required": ["role", "content"], "additionalProperties": false } } @@ -289,10 +244,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -366,18 +318,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -414,10 +360,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -512,10 +455,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -566,17 +506,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -639,9 +574,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -657,10 +590,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -677,10 +607,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -697,18 +624,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -745,20 +667,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -799,12 +712,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -818,12 +726,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -834,9 +737,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -844,12 +745,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -863,12 +759,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -879,10 +770,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -919,10 +807,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -934,11 +819,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -960,26 +841,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -1023,10 +895,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -1070,10 +939,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -1110,10 +976,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -1128,9 +991,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -1167,10 +1028,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -1202,9 +1060,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -1247,10 +1103,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -1293,10 +1146,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -1333,15 +1183,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -1384,10 +1229,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -1476,10 +1318,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -1489,10 +1328,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -1536,10 +1372,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -1613,18 +1446,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -1661,10 +1488,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -1759,10 +1583,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -1813,17 +1634,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -1886,9 +1702,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -1904,10 +1718,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -1924,10 +1735,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -1944,18 +1752,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -1992,20 +1795,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -2046,12 +1840,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -2065,12 +1854,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -2081,9 +1865,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -2091,12 +1873,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -2110,12 +1887,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -2126,10 +1898,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -2166,10 +1935,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -2181,11 +1947,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -2207,26 +1969,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -2270,10 +2023,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -2317,10 +2067,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -2357,10 +2104,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -2375,9 +2119,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -2414,10 +2156,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -2449,9 +2188,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -2494,10 +2231,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -2540,10 +2274,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -2580,15 +2311,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -2631,10 +2357,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -2723,10 +2446,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -2736,10 +2456,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -2800,10 +2517,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -2877,18 +2591,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -2925,10 +2633,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -3023,10 +2728,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -3077,17 +2779,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -3150,9 +2847,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -3168,10 +2863,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -3188,10 +2880,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -3208,18 +2897,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -3256,20 +2940,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -3310,12 +2985,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -3329,12 +2999,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -3345,9 +3010,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -3355,12 +3018,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -3374,12 +3032,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -3390,10 +3043,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -3430,10 +3080,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -3445,11 +3092,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -3471,26 +3114,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -3534,10 +3168,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -3581,10 +3212,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -3621,10 +3249,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -3639,9 +3264,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -3678,10 +3301,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -3713,9 +3333,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -3758,10 +3376,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -3804,10 +3419,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -3844,15 +3456,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -3895,10 +3502,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -3987,10 +3591,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -4000,10 +3601,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -4047,10 +3645,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -4124,18 +3719,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -4172,10 +3761,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -4270,10 +3856,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -4324,17 +3907,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -4397,9 +3975,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -4415,10 +3991,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -4435,10 +4008,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -4455,18 +4025,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -4503,20 +4068,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -4557,12 +4113,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -4576,12 +4127,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -4592,9 +4138,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -4602,12 +4146,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -4621,12 +4160,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -4637,10 +4171,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -4677,10 +4208,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -4692,11 +4220,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -4718,26 +4242,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -4781,10 +4296,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -4828,10 +4340,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -4868,10 +4377,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -4886,9 +4392,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -4925,10 +4429,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -4960,9 +4461,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -5005,10 +4504,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -5051,10 +4547,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -5091,15 +4584,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -5142,10 +4630,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -5234,10 +4719,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -5247,10 +4729,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -5271,11 +4750,7 @@ }, "strategy": { "type": "string", - "enum": [ - "pass_at_k", - "mean", - "confidence_interval" - ] + "enum": ["pass_at_k", "mean", "confidence_interval"] }, "cost_limit_usd": { "type": "number", @@ -5286,9 +4761,7 @@ "minimum": 0 } }, - "required": [ - "count" - ], + "required": ["count"], "additionalProperties": false }, "total_budget_usd": { @@ -5321,10 +4794,7 @@ }, "isolation": { "type": "string", - "enum": [ - "shared", - "per_test" - ] + "enum": ["shared", "per_test"] }, "repos": { "type": "array", @@ -5348,10 +4818,7 @@ "format": "uri" } }, - "required": [ - "type", - "url" - ], + "required": ["type", "url"], "additionalProperties": false }, { @@ -5365,10 +4832,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false } ] @@ -5381,10 +4845,7 @@ }, "resolve": { "type": "string", - "enum": [ - "remote", - "local" - ] + "enum": ["remote", "local"] }, "ancestor": { "type": "integer", @@ -5413,10 +4874,7 @@ "additionalProperties": false } }, - "required": [ - "path", - "source" - ], + "required": ["path", "source"], "additionalProperties": false } }, @@ -5452,11 +4910,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -5487,11 +4941,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -5522,11 +4972,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -5557,11 +5003,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -5571,11 +5013,7 @@ }, "mode": { "type": "string", - "enum": [ - "pooled", - "temp", - "static" - ] + "enum": ["pooled", "temp", "static"] }, "path": { "type": "string" @@ -5597,9 +5035,7 @@ "type": "string" } }, - "required": [ - "id" - ], + "required": ["id"], "additionalProperties": false } }, @@ -5634,12 +5070,7 @@ "properties": { "role": { "type": "string", - "enum": [ - "system", - "user", - "assistant", - "tool" - ] + "enum": ["system", "user", "assistant", "tool"] }, "content": { "anyOf": [ @@ -5653,30 +5084,20 @@ "properties": { "type": { "type": "string", - "enum": [ - "text", - "file", - "image" - ] + "enum": ["text", "file", "image"] }, "value": { "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false } } ] } }, - "required": [ - "role", - "content" - ], + "required": ["role", "content"], "additionalProperties": false } } @@ -5704,12 +5125,7 @@ "properties": { "role": { "type": "string", - "enum": [ - "system", - "user", - "assistant", - "tool" - ] + "enum": ["system", "user", "assistant", "tool"] }, "content": { "anyOf": [ @@ -5723,30 +5139,20 @@ "properties": { "type": { "type": "string", - "enum": [ - "text", - "file", - "image" - ] + "enum": ["text", "file", "image"] }, "value": { "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false } } ] } }, - "required": [ - "role", - "content" - ], + "required": ["role", "content"], "additionalProperties": false } } @@ -5790,10 +5196,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -5867,18 +5270,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -5915,10 +5312,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -6013,10 +5407,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -6067,17 +5458,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -6140,9 +5526,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -6158,10 +5542,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -6178,10 +5559,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -6198,18 +5576,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -6246,20 +5619,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -6300,12 +5664,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -6319,12 +5678,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -6335,9 +5689,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -6345,12 +5697,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -6364,12 +5711,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -6380,10 +5722,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -6420,10 +5759,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -6435,11 +5771,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -6461,26 +5793,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -6524,10 +5847,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -6571,10 +5891,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -6611,10 +5928,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -6629,9 +5943,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -6668,10 +5980,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -6703,9 +6012,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -6748,10 +6055,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -6794,10 +6098,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -6834,15 +6135,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -6885,10 +6181,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -6977,10 +6270,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -6990,10 +6280,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -7037,10 +6324,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -7114,18 +6398,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -7162,10 +6440,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -7260,10 +6535,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -7314,17 +6586,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -7387,9 +6654,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -7405,10 +6670,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -7425,10 +6687,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -7445,18 +6704,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -7493,20 +6747,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -7547,12 +6792,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -7566,12 +6806,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -7582,9 +6817,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -7592,12 +6825,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -7611,12 +6839,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -7627,10 +6850,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -7667,10 +6887,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -7682,11 +6899,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -7708,26 +6921,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -7771,10 +6975,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -7818,10 +7019,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -7858,10 +7056,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -7876,9 +7071,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -7915,10 +7108,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -7950,9 +7140,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -7995,10 +7183,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -8041,10 +7226,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -8081,15 +7263,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -8132,10 +7309,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -8224,10 +7398,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -8237,10 +7408,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -8301,10 +7469,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -8378,18 +7543,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -8426,10 +7585,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -8524,10 +7680,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -8578,17 +7731,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -8651,9 +7799,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -8669,10 +7815,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -8689,10 +7832,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -8709,18 +7849,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -8757,20 +7892,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -8811,12 +7937,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -8830,12 +7951,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -8846,9 +7962,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -8856,12 +7970,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -8875,12 +7984,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -8891,10 +7995,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -8931,10 +8032,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -8946,11 +8044,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -8972,26 +8066,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -9035,10 +8120,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -9082,10 +8164,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -9122,10 +8201,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -9140,9 +8216,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -9179,10 +8253,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -9214,9 +8285,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -9259,10 +8328,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -9305,10 +8371,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -9345,15 +8408,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -9396,10 +8454,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -9488,10 +8543,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -9501,10 +8553,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -9548,10 +8597,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -9625,18 +8671,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -9673,10 +8713,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -9771,10 +8808,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -9825,17 +8859,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -9898,9 +8927,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -9916,10 +8943,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -9936,10 +8960,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -9956,18 +8977,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -10004,20 +9020,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -10058,12 +9065,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -10077,12 +9079,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -10093,9 +9090,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -10103,12 +9098,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -10122,12 +9112,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -10138,10 +9123,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -10178,10 +9160,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -10193,11 +9172,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -10219,26 +9194,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -10282,10 +9248,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -10329,10 +9292,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -10369,10 +9329,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -10387,9 +9344,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -10426,10 +9381,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -10461,9 +9413,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -10506,10 +9456,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -10552,10 +9499,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -10592,15 +9536,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -10643,10 +9582,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -10735,10 +9671,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -10748,10 +9681,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -10772,11 +9702,7 @@ }, "strategy": { "type": "string", - "enum": [ - "pass_at_k", - "mean", - "confidence_interval" - ] + "enum": ["pass_at_k", "mean", "confidence_interval"] }, "cost_limit_usd": { "type": "number", @@ -10787,9 +9713,7 @@ "minimum": 0 } }, - "required": [ - "count" - ], + "required": ["count"], "additionalProperties": false }, "total_budget_usd": { @@ -10822,10 +9746,7 @@ }, "isolation": { "type": "string", - "enum": [ - "shared", - "per_test" - ] + "enum": ["shared", "per_test"] }, "repos": { "type": "array", @@ -10849,10 +9770,7 @@ "format": "uri" } }, - "required": [ - "type", - "url" - ], + "required": ["type", "url"], "additionalProperties": false }, { @@ -10866,10 +9784,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false } ] @@ -10882,10 +9797,7 @@ }, "resolve": { "type": "string", - "enum": [ - "remote", - "local" - ] + "enum": ["remote", "local"] }, "ancestor": { "type": "integer", @@ -10914,10 +9826,7 @@ "additionalProperties": false } }, - "required": [ - "path", - "source" - ], + "required": ["path", "source"], "additionalProperties": false } }, @@ -10953,11 +9862,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -10988,11 +9893,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -11023,11 +9924,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -11058,11 +9955,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -11072,11 +9965,7 @@ }, "mode": { "type": "string", - "enum": [ - "pooled", - "temp", - "static" - ] + "enum": ["pooled", "temp", "static"] }, "path": { "type": "string" @@ -11098,9 +9987,7 @@ "type": "string" } }, - "required": [ - "id" - ], + "required": ["id"], "additionalProperties": false } }, @@ -11167,10 +10054,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -11244,18 +10128,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -11292,10 +10170,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -11390,10 +10265,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -11444,17 +10316,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -11517,9 +10384,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -11535,10 +10400,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -11555,10 +10417,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -11575,18 +10434,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -11623,20 +10477,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -11677,12 +10522,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -11696,12 +10536,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -11712,9 +10547,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -11722,12 +10555,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -11741,12 +10569,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -11757,10 +10580,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -11797,10 +10617,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -11812,11 +10629,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -11838,26 +10651,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -11901,10 +10705,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -11948,10 +10749,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -11988,10 +10786,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -12006,9 +10801,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -12045,10 +10838,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -12080,9 +10870,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -12125,10 +10913,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -12171,10 +10956,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -12211,15 +10993,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -12262,10 +11039,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -12354,10 +11128,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -12367,10 +11138,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -12414,10 +11182,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -12491,18 +11256,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -12539,10 +11298,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -12637,10 +11393,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -12691,17 +11444,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -12764,9 +11512,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -12782,10 +11528,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -12802,10 +11545,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -12822,18 +11562,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -12870,20 +11605,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -12924,12 +11650,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -12943,12 +11664,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -12959,9 +11675,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -12969,12 +11683,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -12988,12 +11697,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -13004,10 +11708,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -13044,10 +11745,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -13059,11 +11757,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -13085,26 +11779,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -13148,10 +11833,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -13195,10 +11877,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -13235,10 +11914,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -13253,9 +11929,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -13292,10 +11966,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -13327,9 +11998,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -13372,10 +12041,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -13418,10 +12084,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -13458,15 +12121,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -13509,10 +12167,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -13601,10 +12256,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -13614,10 +12266,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -13638,11 +12287,7 @@ }, "strategy": { "type": "string", - "enum": [ - "pass_at_k", - "mean", - "confidence_interval" - ] + "enum": ["pass_at_k", "mean", "confidence_interval"] }, "cost_limit_usd": { "type": "number", @@ -13653,9 +12298,7 @@ "minimum": 0 } }, - "required": [ - "count" - ], + "required": ["count"], "additionalProperties": false }, "total_budget_usd": { @@ -13718,10 +12361,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -13795,18 +12435,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -13843,10 +12477,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -13941,10 +12572,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -13995,17 +12623,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -14068,9 +12691,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -14086,10 +12707,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -14106,10 +12724,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -14126,18 +12741,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -14174,20 +12784,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -14228,12 +12829,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -14247,12 +12843,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -14263,9 +12854,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -14273,12 +12862,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -14292,12 +12876,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -14308,10 +12887,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -14348,10 +12924,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -14363,11 +12936,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -14389,26 +12958,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -14452,10 +13012,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -14499,10 +13056,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -14539,10 +13093,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -14557,9 +13108,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -14596,10 +13145,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -14631,9 +13177,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -14676,10 +13220,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -14722,10 +13263,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -14762,15 +13300,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -14813,10 +13346,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -14905,10 +13435,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -14918,10 +13445,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -14950,10 +13474,7 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } }, @@ -14967,10 +13488,7 @@ }, "isolation": { "type": "string", - "enum": [ - "shared", - "per_test" - ] + "enum": ["shared", "per_test"] }, "repos": { "type": "array", @@ -14994,10 +13512,7 @@ "format": "uri" } }, - "required": [ - "type", - "url" - ], + "required": ["type", "url"], "additionalProperties": false }, { @@ -15011,10 +13526,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false } ] @@ -15027,10 +13539,7 @@ }, "resolve": { "type": "string", - "enum": [ - "remote", - "local" - ] + "enum": ["remote", "local"] }, "ancestor": { "type": "integer", @@ -15059,10 +13568,7 @@ "additionalProperties": false } }, - "required": [ - "path", - "source" - ], + "required": ["path", "source"], "additionalProperties": false } }, @@ -15098,11 +13604,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -15133,11 +13635,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -15168,11 +13666,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -15203,11 +13697,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -15217,11 +13707,7 @@ }, "mode": { "type": "string", - "enum": [ - "pooled", - "temp", - "static" - ] + "enum": ["pooled", "temp", "static"] }, "path": { "type": "string" @@ -15235,9 +13721,7 @@ ] } }, - "required": [ - "tests" - ], + "required": ["tests"], "additionalProperties": false } } From 62b4beca1e789828bbca234d09ea7056a1994123 Mon Sep 17 00:00:00 2001 From: Christopher Date: Wed, 8 Apr 2026 03:57:29 +0000 Subject: [PATCH 3/5] fix(core): handle implicit and relative file preprocessors --- .../src/evaluation/content-preprocessor.ts | 10 +-- .../src/evaluation/evaluators/llm-grader.ts | 21 ++++++ .../evaluation/loaders/evaluator-parser.ts | 5 +- packages/core/src/evaluation/orchestrator.ts | 18 +++++ packages/core/src/evaluation/types.ts | 2 + packages/core/src/evaluation/yaml-parser.ts | 1 + .../evaluation/content-preprocessor.test.ts | 19 ++++- .../core/test/evaluation/orchestrator.test.ts | 72 +++++++++++++++++++ .../evaluation/preprocessors-yaml.test.ts | 38 ++++++++++ 9 files changed, 179 insertions(+), 7 deletions(-) diff --git a/packages/core/src/evaluation/content-preprocessor.ts b/packages/core/src/evaluation/content-preprocessor.ts index a718a85b7..ab5fb295c 100644 --- a/packages/core/src/evaluation/content-preprocessor.ts +++ b/packages/core/src/evaluation/content-preprocessor.ts @@ -41,6 +41,7 @@ export interface ExtractedContentText { export async function extractTextWithPreprocessors( content: string | readonly Content[] | undefined, preprocessors: readonly ContentPreprocessorConfig[] | undefined, + options: { readonly basePath?: string } = {}, ): Promise { if (typeof content === 'string') { return { text: content, warnings: [] }; @@ -61,7 +62,7 @@ export async function extractTextWithPreprocessors( continue; } - const result = await preprocessContentFile(block, preprocessors); + const result = await preprocessContentFile(block, preprocessors, options.basePath); if (result.text) { parts.push(result.text); } @@ -74,9 +75,10 @@ export async function extractTextWithPreprocessors( async function preprocessContentFile( block: ContentFile, preprocessors: readonly ContentPreprocessorConfig[] | undefined, + basePath?: string, ): Promise { const mediaType = normalizePreprocessorType(block.media_type); - const resolvedPath = resolveLocalFilePath(block.path); + const resolvedPath = resolveLocalFilePath(block.path, basePath); if (!resolvedPath) { return { @@ -193,14 +195,14 @@ export function normalizePreprocessorType(value: string): string { return MIME_TYPE_ALIASES[normalized] ?? normalized; } -function resolveLocalFilePath(value: string): string | undefined { +function resolveLocalFilePath(value: string, basePath?: string): string | undefined { if (value.startsWith('file://')) { return fileURLToPath(value); } if (/^[a-z]+:\/\//i.test(value)) { return undefined; } - return path.resolve(value); + return basePath ? path.resolve(basePath, value) : path.resolve(value); } function formatFileText(filePath: string, text: string): string { diff --git a/packages/core/src/evaluation/evaluators/llm-grader.ts b/packages/core/src/evaluation/evaluators/llm-grader.ts index b64440779..cc6e482c7 100644 --- a/packages/core/src/evaluation/evaluators/llm-grader.ts +++ b/packages/core/src/evaluation/evaluators/llm-grader.ts @@ -155,6 +155,24 @@ interface StructuredGenerationResult { readonly tokenUsage?: TokenUsage; } +function resolveContentBasePath(context: EvaluationContext): string | undefined { + if (context.workspacePath) { + return context.workspacePath; + } + + if ( + 'config' in context.target && + context.target.config && + typeof context.target.config === 'object' && + 'cwd' in context.target.config && + typeof context.target.config.cwd === 'string' + ) { + return context.target.config.cwd; + } + + return undefined; +} + export class LlmGraderEvaluator implements Evaluator { readonly kind = 'llm-grader'; @@ -223,6 +241,9 @@ export class LlmGraderEvaluator implements Evaluator { const extracted = await extractTextWithPreprocessors( lastAssistant.content, config.preprocessors, + { + basePath: resolveContentBasePath(context), + }, ); return { ...context, diff --git a/packages/core/src/evaluation/loaders/evaluator-parser.ts b/packages/core/src/evaluation/loaders/evaluator-parser.ts index a98a7b760..79385fe81 100644 --- a/packages/core/src/evaluation/loaders/evaluator-parser.ts +++ b/packages/core/src/evaluation/loaders/evaluator-parser.ts @@ -1,5 +1,6 @@ import path from 'node:path'; +import { normalizePreprocessorType } from '../content-preprocessor.js'; import type { ToolTrajectoryEvaluatorConfig, ToolTrajectoryExpectedItem } from '../trace.js'; import type { ContentPreprocessorConfig, @@ -1475,10 +1476,10 @@ async function parseMergedPreprocessors( const merged = new Map(); for (const entry of parsedDefaults) { - merged.set(entry.type.toLowerCase(), entry); + merged.set(normalizePreprocessorType(entry.type), entry); } for (const entry of parsedOverrides ?? []) { - merged.set(entry.type.toLowerCase(), entry); + merged.set(normalizePreprocessorType(entry.type), entry); } return [...merged.values()]; diff --git a/packages/core/src/evaluation/orchestrator.ts b/packages/core/src/evaluation/orchestrator.ts index a10a73025..416fa1ba2 100644 --- a/packages/core/src/evaluation/orchestrator.ts +++ b/packages/core/src/evaluation/orchestrator.ts @@ -58,6 +58,7 @@ import type { FailureStage, JsonObject, JsonValue, + LlmGraderEvaluatorConfig, TrialResult, TrialsConfig, WorkspaceHookConfig, @@ -2287,6 +2288,10 @@ async function runEvaluatorsForCase(options: { if (!activeEvaluator) { throw new Error(`No evaluator registered for kind '${evaluatorKind}'`); } + const implicitEvaluator = + evaluatorKind === 'llm-grader' && !evalCase.assertions + ? buildImplicitLlmGraderConfig(evalCase) + : undefined; const score = await activeEvaluator.evaluate({ evalCase, @@ -2308,11 +2313,24 @@ async function runEvaluatorsForCase(options: { availableTargets, fileChanges, workspacePath, + ...(implicitEvaluator ? { evaluator: implicitEvaluator } : {}), }); return { score }; } +function buildImplicitLlmGraderConfig(evalCase: EvalTest): LlmGraderEvaluatorConfig | undefined { + if (!evalCase.preprocessors || evalCase.preprocessors.length === 0) { + return undefined; + } + + return { + name: 'llm-grader', + type: 'llm-grader', + preprocessors: evalCase.preprocessors, + }; +} + async function runEvaluatorList(options: { readonly evalCase: EvalTest; readonly evaluators: readonly EvaluatorConfig[]; diff --git a/packages/core/src/evaluation/types.ts b/packages/core/src/evaluation/types.ts index d5f3bcfa7..ae21d986b 100644 --- a/packages/core/src/evaluation/types.ts +++ b/packages/core/src/evaluation/types.ts @@ -843,6 +843,8 @@ export interface EvalTest { readonly criteria: string; readonly evaluator?: EvaluatorKind; readonly assertions?: readonly EvaluatorConfig[]; + /** Suite-level preprocessors used by the implicit default llm-grader. */ + readonly preprocessors?: readonly ContentPreprocessorConfig[]; /** Workspace configuration (merged from suite-level and case-level) */ readonly workspace?: WorkspaceConfig; /** Arbitrary metadata passed to workspace scripts via stdin */ diff --git a/packages/core/src/evaluation/yaml-parser.ts b/packages/core/src/evaluation/yaml-parser.ts index 5b0933f7d..72ef09b2e 100644 --- a/packages/core/src/evaluation/yaml-parser.ts +++ b/packages/core/src/evaluation/yaml-parser.ts @@ -512,6 +512,7 @@ async function loadTestsFromYaml( criteria: outcome ?? '', evaluator: testCaseEvaluatorKind, assertions: evaluators, + ...(suitePreprocessors ? { preprocessors: suitePreprocessors } : {}), workspace: mergedWorkspace, metadata, targets: caseTargets, diff --git a/packages/core/test/evaluation/content-preprocessor.test.ts b/packages/core/test/evaluation/content-preprocessor.test.ts index 5d3e40bb9..22c562feb 100644 --- a/packages/core/test/evaluation/content-preprocessor.test.ts +++ b/packages/core/test/evaluation/content-preprocessor.test.ts @@ -1,5 +1,5 @@ import { afterEach, describe, expect, it } from 'bun:test'; -import { mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; @@ -62,6 +62,23 @@ console.log('sheet:' + payload.original_path.split('/').pop());`, expect(result.text).toContain('sheet:report.xlsx'); }); + it('resolves relative file paths against the provided base path', async () => { + const dir = await mkdtemp(join(tmpdir(), 'agentv-preprocessor-')); + tempDirs.push(dir); + const nestedDir = join(dir, 'workspace'); + await mkdir(nestedDir, { recursive: true }); + await writeFile(join(nestedDir, 'report.txt'), 'from workspace', 'utf8'); + + const result = await extractTextWithPreprocessors( + [{ type: 'file', media_type: 'text/plain', path: 'report.txt' }], + undefined, + { basePath: nestedDir }, + ); + + expect(result.warnings).toEqual([]); + expect(result.text).toContain('from workspace'); + }); + it('records a warning when default UTF-8 extraction looks binary', async () => { const dir = await mkdtemp(join(tmpdir(), 'agentv-preprocessor-')); tempDirs.push(dir); diff --git a/packages/core/test/evaluation/orchestrator.test.ts b/packages/core/test/evaluation/orchestrator.test.ts index 183ad2258..14ff78e5b 100644 --- a/packages/core/test/evaluation/orchestrator.test.ts +++ b/packages/core/test/evaluation/orchestrator.test.ts @@ -179,6 +179,78 @@ describe('runTestCase', () => { expect(result.failureReasonCode).toBeUndefined(); }); + it('applies suite-level preprocessors to the implicit default llm-grader', async () => { + const tempDir = mkdtempSync(path.join(tmpdir(), 'agentv-orchestrator-preprocessor-')); + const reportPath = path.join(tempDir, 'report.xlsx'); + const scriptPath = path.join(tempDir, 'xlsx-to-text.js'); + writeFileSync(reportPath, Buffer.from([0, 159, 146, 150])); + writeFileSync( + scriptPath, + `const fs = require('node:fs'); +const payload = JSON.parse(fs.readFileSync(0, 'utf8')); +if (!payload.path) throw new Error('missing path'); +console.log('spreadsheet: revenue,total\\nQ1,42');`, + 'utf8', + ); + + const answerProvider = new SequenceProvider('file-output', { + responses: [ + { + output: [ + { + role: 'assistant', + content: [ + { + type: 'file', + media_type: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + path: reportPath, + }, + ], + }, + ], + }, + ], + }); + const graderProvider = new CapturingGraderProvider('grader', { + output: [ + { + role: 'assistant', + content: JSON.stringify({ + score: 1, + assertions: [{ text: 'ok', passed: true }], + }), + }, + ], + }); + + const evalCase: EvalTest = { + ...baseTestCase, + id: 'implicit-preprocessors', + assertions: undefined, + preprocessors: [{ type: 'xlsx', command: [process.execPath, scriptPath] }], + }; + + const results = await runEvaluation({ + testFilePath: 'in-memory.yaml', + repoRoot: tempDir, + target: { ...baseTarget, name: 'file-output', graderTarget: 'grader' }, + targets: [ + { name: 'grader', provider: 'mock' }, + { name: 'file-output', provider: 'mock', grader_target: 'grader' }, + ], + providerFactory: (target) => { + if (target.name === 'grader') return graderProvider; + return answerProvider; + }, + evaluators: undefined, + evalCases: [evalCase], + }); + + expect(results[0]?.score).toBe(1); + expect(graderProvider.lastRequest?.question).toContain('spreadsheet: revenue,total'); + expect(graderProvider.lastRequest?.question).toContain('Q1,42'); + }); + it('reuses cached provider response when available', async () => { const provider = new SequenceProvider('mock', { responses: [ diff --git a/packages/core/test/evaluation/preprocessors-yaml.test.ts b/packages/core/test/evaluation/preprocessors-yaml.test.ts index c05416b8f..92e4be24e 100644 --- a/packages/core/test/evaluation/preprocessors-yaml.test.ts +++ b/packages/core/test/evaluation/preprocessors-yaml.test.ts @@ -52,4 +52,42 @@ tests: path.join(dir, 'xlsx-override.js'), ); }); + + it('lets alias-based evaluator overrides replace MIME-typed suite defaults', async () => { + const dir = await mkdtemp(path.join(tmpdir(), 'agentv-yaml-preprocessors-')); + tempDirs.push(dir); + + await writeFile(path.join(dir, 'xlsx-default.js'), 'console.log("default")', 'utf8'); + await writeFile(path.join(dir, 'xlsx-override.js'), 'console.log("override")', 'utf8'); + await writeFile( + path.join(dir, 'suite.eval.yaml'), + `preprocessors: + - type: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet + command: ["node", "xlsx-default.js"] +tests: + - id: report + input: "grade this" + criteria: "works" + assertions: + - name: grade + type: llm-grader + prompt: "Evaluate {{ output }}" + preprocessors: + - type: xlsx + command: ["node", "xlsx-override.js"] +`, + 'utf8', + ); + + const tests = await loadTests(path.join(dir, 'suite.eval.yaml'), dir); + const evaluator = tests[0]?.assertions?.[0]; + if (!evaluator || evaluator.type !== 'llm-grader') { + throw new Error('expected llm-grader evaluator'); + } + + expect(evaluator.preprocessors).toHaveLength(1); + expect(evaluator.preprocessors?.[0]?.resolvedCommand?.[1]).toBe( + path.join(dir, 'xlsx-override.js'), + ); + }); }); From 294d87fdd9fc0d4ce26a9aa15c480fdb422fbcae Mon Sep 17 00:00:00 2001 From: Christopher Date: Wed, 8 Apr 2026 04:03:14 +0000 Subject: [PATCH 4/5] docs(examples): add content preprocessor example --- .../docs/docs/evaluation/eval-cases.mdx | 33 ++++++++++++++++ .../content/docs/docs/evaluation/examples.mdx | 29 ++++++++++++++ .../docs/docs/evaluators/llm-graders.mdx | 36 ++++++++++++++++++ examples/features/README.md | 2 + .../.agentv/providers/file-output.ts | 30 +++++++++++++++ .../.agentv/providers/grader-check.ts | 30 +++++++++++++++ .../preprocessors/.agentv/targets.yaml | 12 ++++++ examples/features/preprocessors/README.md | 27 +++++++++++++ .../preprocessors/evals/dataset.eval.yaml | 17 +++++++++ .../preprocessors/generated/report.xlsx | Bin 0 -> 4 bytes .../scripts/preprocessors/xlsx-to-csv.ts | 12 ++++++ 11 files changed, 228 insertions(+) create mode 100644 examples/features/preprocessors/.agentv/providers/file-output.ts create mode 100644 examples/features/preprocessors/.agentv/providers/grader-check.ts create mode 100644 examples/features/preprocessors/.agentv/targets.yaml create mode 100644 examples/features/preprocessors/README.md create mode 100644 examples/features/preprocessors/evals/dataset.eval.yaml create mode 100644 examples/features/preprocessors/generated/report.xlsx create mode 100644 examples/features/preprocessors/scripts/preprocessors/xlsx-to-csv.ts diff --git a/apps/web/src/content/docs/docs/evaluation/eval-cases.mdx b/apps/web/src/content/docs/docs/evaluation/eval-cases.mdx index afe5cae07..2f69246c2 100644 --- a/apps/web/src/content/docs/docs/evaluation/eval-cases.mdx +++ b/apps/web/src/content/docs/docs/evaluation/eval-cases.mdx @@ -328,6 +328,19 @@ tests: # No assertions → default llm-grader evaluates against criteria ``` +Suite-level `preprocessors` also apply to this implicit grader. That matters when the agent output is a `ContentFile` block rather than plain text: + +```yaml +preprocessors: + - type: xlsx + command: ["bun", "run", "scripts/preprocessors/xlsx-to-csv.ts"] + +tests: + - id: spreadsheet-eval + criteria: Output includes the revenue rows + input: Generate the spreadsheet report +``` + ### `assertions` present — explicit evaluators only When `assertions` is defined, only the declared evaluators run. No implicit grader is added. Graders that are declared (such as `llm-grader`, `code-grader`, or `rubrics`) receive `criteria` as input automatically. @@ -353,6 +366,26 @@ tests: value: "fix" ``` +When you need a custom file conversion for only one grader, add `preprocessors` directly to that evaluator: + +```yaml +preprocessors: + - type: xlsx + command: ["bun", "run", "scripts/preprocessors/xlsx-to-csv.ts"] + +tests: + - id: mixed-eval + criteria: Response is helpful and mentions the fix + input: "Debug this function..." + assertions: + - type: llm-grader + preprocessors: + - type: xlsx + command: ["bun", "run", "scripts/preprocessors/xlsx-to-json.ts"] + - type: contains + value: "fix" +``` + ## Metadata Pass additional context to evaluators via the `metadata` field: diff --git a/apps/web/src/content/docs/docs/evaluation/examples.mdx b/apps/web/src/content/docs/docs/evaluation/examples.mdx index d7c33e7e6..d6346512f 100644 --- a/apps/web/src/content/docs/docs/evaluation/examples.mdx +++ b/apps/web/src/content/docs/docs/evaluation/examples.mdx @@ -103,6 +103,35 @@ tests: } ``` +## File Output Preprocessing + +Convert a binary file output into text before the `llm-grader` sees it: + +```yaml +description: Grade spreadsheet output via a preprocessor + +preprocessors: + - type: xlsx + command: ["bun", "run", "../scripts/preprocessors/xlsx-to-csv.ts"] + +execution: + target: file_output + +tests: + - id: spreadsheet-output + input: Generate the spreadsheet report + criteria: The extracted spreadsheet content includes the revenue rows + assertions: + - name: file-check + type: llm-grader + prompt: | + Check whether the transformed spreadsheet text contains the revenue rows: + + {{ output }} +``` + +See [`examples/features/preprocessors/`](../../../../examples/features/preprocessors/) for a runnable end-to-end example with a file-producing target and custom grader target. + ## Tool Trajectory Validate that an agent uses specific tools during execution: diff --git a/apps/web/src/content/docs/docs/evaluators/llm-graders.mdx b/apps/web/src/content/docs/docs/evaluators/llm-graders.mdx index 9e17027c8..51d0db372 100644 --- a/apps/web/src/content/docs/docs/evaluators/llm-graders.mdx +++ b/apps/web/src/content/docs/docs/evaluators/llm-graders.mdx @@ -143,6 +143,42 @@ assertions: The `config` object is available as `ctx.config` inside the template function. +## Preprocessing File Outputs + +If an agent returns a `ContentFile` block instead of plain text, you can preprocess that file into text before `llm-grader` builds the candidate prompt. + +AgentV always tries a default UTF-8 text read first. That is enough for text-based formats such as CSV, JSON, SQL, Markdown, YAML, HTML, XML, and plain text. For binary formats such as `.xlsx`, `.pdf`, or `.docx`, add a preprocessor command: + +```yaml +preprocessors: + - type: xlsx + command: ["bun", "run", "scripts/preprocessors/xlsx-to-csv.ts"] + +tests: + - id: spreadsheet-output + criteria: Output includes the revenue rows + input: Generate the spreadsheet report + assertions: + - name: spreadsheet-check + type: llm-grader + prompt: | + Check whether the transformed spreadsheet text contains the revenue rows: + + {{ output }} +``` + +`type` accepts either a short alias such as `xlsx` or a full MIME type such as `application/vnd.openxmlformats-officedocument.spreadsheetml.sheet`. + +Resolution order: + +- per-evaluator `preprocessors` override suite-level entries +- if no preprocessor matches, AgentV falls back to a UTF-8 text read +- if the fallback read looks binary or invalid, the grader receives a warning note instead of failing the test run + +The implicit default `llm-grader` also inherits suite-level `preprocessors`, so you can omit `assertions` and still preprocess file outputs before grading. + +See [`examples/features/preprocessors/`](../../../../examples/features/preprocessors/) for a runnable example with a file-producing target and a custom preprocessor script. + ## Available Context Fields TypeScript templates receive a context object with these fields: diff --git a/examples/features/README.md b/examples/features/README.md index f3eb92fa5..72d50e39d 100644 --- a/examples/features/README.md +++ b/examples/features/README.md @@ -21,6 +21,7 @@ Focused examples for specific AgentV capabilities. Find your use case below, the | [composite](composite/) | Safety gate and weighted aggregation patterns | | [threshold-evaluator](threshold-evaluator/) | Pass a test if a configurable percentage of sub-evaluators pass | | [multi-turn-conversation](multi-turn-conversation/) | Grade a multi-turn conversation with per-turn score breakdowns | +| [preprocessors](preprocessors/) | Convert `ContentFile` outputs into grader-readable text before `llm-grader` runs | --- @@ -159,6 +160,7 @@ Focused examples for specific AgentV capabilities. Find your use case below, the | [matrix-evaluation](matrix-evaluation/) | Benchmarking | | [multi-turn-conversation](multi-turn-conversation/) | LLM grading | | [nlp-metrics](nlp-metrics/) | Deterministic assertions | +| [preprocessors](preprocessors/) | LLM grading | | [prompt-template-sdk](prompt-template-sdk/) | TypeScript SDK | | [repo-lifecycle](repo-lifecycle/) | Workspace & targets | | [rubric](rubric/) | LLM grading | diff --git a/examples/features/preprocessors/.agentv/providers/file-output.ts b/examples/features/preprocessors/.agentv/providers/file-output.ts new file mode 100644 index 000000000..a85b4ed61 --- /dev/null +++ b/examples/features/preprocessors/.agentv/providers/file-output.ts @@ -0,0 +1,30 @@ +import { mkdirSync, writeFileSync } from 'node:fs'; +import path from 'node:path'; + +const outputFile = process.argv[2]; +if (!outputFile) { + throw new Error('missing output file path'); +} + +const generatedDir = path.join(process.cwd(), 'generated'); +mkdirSync(generatedDir, { recursive: true }); +writeFileSync(path.join(generatedDir, 'report.xlsx'), Buffer.from([0, 159, 146, 150])); + +writeFileSync( + outputFile, + JSON.stringify({ + output: [ + { + role: 'assistant', + content: [ + { + type: 'file', + media_type: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + path: 'generated/report.xlsx', + }, + ], + }, + ], + }), + 'utf8', +); diff --git a/examples/features/preprocessors/.agentv/providers/grader-check.ts b/examples/features/preprocessors/.agentv/providers/grader-check.ts new file mode 100644 index 000000000..130991ffc --- /dev/null +++ b/examples/features/preprocessors/.agentv/providers/grader-check.ts @@ -0,0 +1,30 @@ +import { readFileSync, writeFileSync } from 'node:fs'; + +const promptFile = process.argv[2]; +const outputFile = process.argv[3]; + +if (!promptFile || !outputFile) { + throw new Error('missing args'); +} + +const prompt = readFileSync(promptFile, 'utf8'); +const passed = prompt.includes('spreadsheet: revenue,total') && prompt.includes('Q1,42'); + +writeFileSync( + outputFile, + JSON.stringify({ + text: JSON.stringify({ + score: passed ? 1 : 0, + assertions: [ + { + text: 'preprocessed file content reached the llm grader', + passed, + evidence: passed + ? 'found transformed spreadsheet text in prompt' + : 'transformed spreadsheet text missing from prompt', + }, + ], + }), + }), + 'utf8', +); diff --git a/examples/features/preprocessors/.agentv/targets.yaml b/examples/features/preprocessors/.agentv/targets.yaml new file mode 100644 index 000000000..4afc8a559 --- /dev/null +++ b/examples/features/preprocessors/.agentv/targets.yaml @@ -0,0 +1,12 @@ +$schema: agentv-targets-v2.2 +targets: + - name: file_output + provider: file-output + command: bun run .agentv/providers/file-output.ts {OUTPUT_FILE} + cwd: .. + grader_target: grader_check + + - name: grader_check + provider: grader-check + command: bun run .agentv/providers/grader-check.ts {PROMPT_FILE} {OUTPUT_FILE} + cwd: .. diff --git a/examples/features/preprocessors/README.md b/examples/features/preprocessors/README.md new file mode 100644 index 000000000..ad095b2cb --- /dev/null +++ b/examples/features/preprocessors/README.md @@ -0,0 +1,27 @@ +# Content Preprocessors + +Demonstrates how `llm-grader` preprocessors turn `ContentFile` outputs into text before grading. + +## What This Shows + +- top-level `preprocessors:` shared by all graders in an eval +- an agent target returning a `ContentFile` block instead of plain text +- an `llm-grader` receiving transformed spreadsheet text +- relative `ContentFile.path` resolution against the target workspace + +## Running + +```bash +# From repository root +bun apps/cli/src/cli.ts eval examples/features/preprocessors/evals/dataset.eval.yaml --target file_output +``` + +Expected result: the eval passes because the grader sees the transformed spreadsheet text from `generated/report.xlsx`. + +## Key Files + +- `evals/dataset.eval.yaml` - eval with top-level `preprocessors` +- `.agentv/targets.yaml` - custom file-producing target and custom grader target +- `.agentv/providers/file-output.ts` - emits a relative `ContentFile` path +- `.agentv/providers/grader-check.ts` - passes only when transformed text reaches the grader prompt +- `scripts/preprocessors/xlsx-to-csv.ts` - example spreadsheet preprocessor script diff --git a/examples/features/preprocessors/evals/dataset.eval.yaml b/examples/features/preprocessors/evals/dataset.eval.yaml new file mode 100644 index 000000000..af4f0efb3 --- /dev/null +++ b/examples/features/preprocessors/evals/dataset.eval.yaml @@ -0,0 +1,17 @@ +description: Convert file outputs to grader-readable text before llm grading + +preprocessors: + - type: xlsx + command: ["bun", "run", "../scripts/preprocessors/xlsx-to-csv.ts"] + +tests: + - id: spreadsheet-output + input: Generate the spreadsheet report + criteria: The extracted spreadsheet content includes the revenue rows + assertions: + - name: file-check + type: llm-grader + prompt: | + Check whether the answer contains the transformed spreadsheet text: + + {{ output }} diff --git a/examples/features/preprocessors/generated/report.xlsx b/examples/features/preprocessors/generated/report.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..e1bcee3ce8fad5f15f99dd2d30d5aabde3f1a546 GIT binary patch literal 4 LcmZRWKWQ2O1DgTJ literal 0 HcmV?d00001 diff --git a/examples/features/preprocessors/scripts/preprocessors/xlsx-to-csv.ts b/examples/features/preprocessors/scripts/preprocessors/xlsx-to-csv.ts new file mode 100644 index 000000000..c1dd4860f --- /dev/null +++ b/examples/features/preprocessors/scripts/preprocessors/xlsx-to-csv.ts @@ -0,0 +1,12 @@ +import { readFileSync } from 'node:fs'; + +const payload = JSON.parse(readFileSync(0, 'utf8')) as { path?: string }; + +if (!payload.path) { + throw new Error('missing file path'); +} + +// Example-only placeholder transformation. Copy this script into your project +// and replace it with real spreadsheet extraction logic. +console.log('spreadsheet: revenue,total'); +console.log('Q1,42'); From d7becdc87aef99d95b4e09cf405ae390e3c9d493 Mon Sep 17 00:00:00 2001 From: Christopher Date: Wed, 8 Apr 2026 04:03:23 +0000 Subject: [PATCH 5/5] chore(examples): remove generated preprocessor artifact --- .../features/preprocessors/generated/report.xlsx | Bin 4 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 examples/features/preprocessors/generated/report.xlsx diff --git a/examples/features/preprocessors/generated/report.xlsx b/examples/features/preprocessors/generated/report.xlsx deleted file mode 100644 index e1bcee3ce8fad5f15f99dd2d30d5aabde3f1a546..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4 LcmZRWKWQ2O1DgTJ