From 0bec98a116644517251b0e37634b22e5634da1ef Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Mon, 13 Apr 2026 22:20:39 +0000 Subject: [PATCH 1/3] =?UTF-8?q?fix(validate):=20smarter=20validation=20?= =?UTF-8?q?=E2=80=94=20cases=20support,=20.eval.yaml=20enforcement,=20sche?= =?UTF-8?q?ma=20sync,=20custom=20assertion=20discovery?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - FileType: add 'cases' type; directory scanner only picks up *.eval.yaml - file-type.ts: YAML array root → 'cases'; require .eval.yaml suffix for eval inference - cases-validator.ts: new validateCasesFile() validates YAML array test case files - eval-validator.ts: sync KNOWN_*_FIELDS with yaml-parser.ts (add preprocessors, category, rubrics); add DEPRECATED maps with migration hints for evaluator/assert/eval_cases/expected_outcome; discover custom assertion types from .agentv/assertions/ to suppress false-positive warnings; remove redundant 'note' field - eval-file.schema.ts: remove 'note' field from Zod schema - examples: rename tool-eval-demo.yaml → .eval.yaml; fix trace-file-demo (description → YAML comments); fix basic-jsonl (remove deprecated evaluator); fix prompt-template-sdk (reference_answer → expected_output) - agentv validate examples/: 68/68 valid, 0 warnings Co-Authored-By: Claude Sonnet 4.6 --- .../src/commands/validate/validate-files.ts | 28 +- .../basic-jsonl/evals/dataset.eval.yaml | 2 - .../evals/dataset.eval.yaml | 10 +- .../evals/trace-file-demo.eval.yaml | 36 +- .../tool-evaluation-plugins/README.md | 2 +- ...val-demo.yaml => tool-eval-demo.eval.yaml} | 2 +- .../evaluation/validation/cases-validator.ts | 98 + .../evaluation/validation/eval-file.schema.ts | 1 - .../evaluation/validation/eval-validator.ts | 91 +- .../src/evaluation/validation/file-type.ts | 15 +- .../core/src/evaluation/validation/index.ts | 1 + .../core/src/evaluation/validation/types.ts | 2 +- .../validation/eval-validator.test.ts | 37 +- .../references/eval-schema.json | 2995 +++++++++++++---- 14 files changed, 2672 insertions(+), 648 deletions(-) rename examples/showcase/tool-evaluation-plugins/{tool-eval-demo.yaml => tool-eval-demo.eval.yaml} (98%) create mode 100644 packages/core/src/evaluation/validation/cases-validator.ts diff --git a/apps/cli/src/commands/validate/validate-files.ts b/apps/cli/src/commands/validate/validate-files.ts index b00c27b47..97543bac4 100644 --- a/apps/cli/src/commands/validate/validate-files.ts +++ b/apps/cli/src/commands/validate/validate-files.ts @@ -5,6 +5,7 @@ import { type ValidationResult, type ValidationSummary, detectFileType, + validateCasesFile, validateConfigFile, validateEvalFile, validateFileReferences, @@ -58,10 +59,27 @@ async function validateSingleFile(filePath: string): Promise { }; } } + } else if (fileType === 'cases') { + result = await validateCasesFile(absolutePath); } else if (fileType === 'targets') { result = await validateTargetsFile(absolutePath); - } else { + } else if (fileType === 'config') { result = await validateConfigFile(absolutePath); + } else { + // Unknown file type — skip validation, report as skipped + result = { + valid: true, + filePath: absolutePath, + fileType: 'unknown', + errors: [ + { + severity: 'warning', + filePath: absolutePath, + message: + 'File type not recognized. Eval files must end in .eval.yaml. Skipping validation.', + }, + ], + }; } return result; @@ -130,7 +148,7 @@ async function findYamlFiles(dirPath: string): Promise { } const subFiles = await findYamlFiles(fullPath); results.push(...subFiles); - } else if (entry.isFile() && isYamlFile(entry.name)) { + } else if (entry.isFile() && isEvalYamlFile(entry.name)) { results.push(fullPath); } } @@ -145,3 +163,9 @@ function isYamlFile(filePath: string): boolean { const ext = path.extname(filePath).toLowerCase(); return ext === '.yaml' || ext === '.yml'; } + +/** Returns true only for *.eval.yaml / *.eval.yml files (used for directory scanning). */ +function isEvalYamlFile(filePath: string): boolean { + const lower = path.basename(filePath).toLowerCase(); + return lower.endsWith('.eval.yaml') || lower.endsWith('.eval.yml'); +} diff --git a/examples/features/basic-jsonl/evals/dataset.eval.yaml b/examples/features/basic-jsonl/evals/dataset.eval.yaml index c226536db..508c684c0 100644 --- a/examples/features/basic-jsonl/evals/dataset.eval.yaml +++ b/examples/features/basic-jsonl/evals/dataset.eval.yaml @@ -7,6 +7,4 @@ name: basic-jsonl execution: target: llm -evaluator: llm_grader - tests: ./dataset.jsonl diff --git a/examples/features/prompt-template-sdk/evals/dataset.eval.yaml b/examples/features/prompt-template-sdk/evals/dataset.eval.yaml index 6917de1bd..81e221837 100644 --- a/examples/features/prompt-template-sdk/evals/dataset.eval.yaml +++ b/examples/features/prompt-template-sdk/evals/dataset.eval.yaml @@ -18,8 +18,9 @@ tests: - type: text value: What are the main benefits of TypeScript over JavaScript? - reference_answer: |- - TypeScript provides static type checking, better IDE support, and improved maintainability. + expected_output: + - role: assistant + content: TypeScript provides static type checking, better IDE support, and improved maintainability. assertions: - name: custom-prompt-eval @@ -37,8 +38,9 @@ tests: - type: text value: Explain async/await in JavaScript. - reference_answer: |- - Async/await is syntactic sugar over Promises that makes asynchronous code look synchronous. + expected_output: + - role: assistant + content: Async/await is syntactic sugar over Promises that makes asynchronous code look synchronous. assertions: - name: strict-eval diff --git a/examples/features/tool-trajectory-advanced/evals/trace-file-demo.eval.yaml b/examples/features/tool-trajectory-advanced/evals/trace-file-demo.eval.yaml index 07609020a..a978cee75 100644 --- a/examples/features/tool-trajectory-advanced/evals/trace-file-demo.eval.yaml +++ b/examples/features/tool-trajectory-advanced/evals/trace-file-demo.eval.yaml @@ -23,10 +23,8 @@ tests: # Expected score: 1.0 (webSearch before fetchPage is satisfied) # ============================================================================= - id: in-order-validation - description: |- - Validates that the agent performs web search before fetching page details. - Mode 'in_order' allows other tool calls between expected tools. - + # Validates that the agent performs web search before fetching page details. + # Mode 'in_order' allows other tool calls between expected tools. criteria: |- Agent searches for product information, then fetches detailed specs. @@ -49,10 +47,8 @@ tests: # Expected score: 1.0 (matches full trace exactly) # ============================================================================= - id: exact-sequence-validation - description: |- - Validates the exact sequence of all tool calls in the trace. - Mode 'exact' requires the trace to match precisely. - + # Validates the exact sequence of all tool calls in the trace. + # Mode 'exact' requires the trace to match precisely. criteria: |- Agent follows the exact research workflow: search, fetch, search reviews, summarize. @@ -76,10 +72,8 @@ tests: # Expected score: 1.0 (meets all minimums) # ============================================================================= - id: any-order-with-minimums - description: |- - Validates that the agent performs adequate research by checking minimum - tool call counts. Mode 'any_order' with minimums is flexible on sequence. - + # Validates that the agent performs adequate research by checking minimum + # tool call counts. Mode 'any_order' with minimums is flexible on sequence. criteria: |- Agent performs at least 2 web searches and 1 page fetch for thorough research. @@ -101,10 +95,8 @@ tests: # Expected score: 1.0 (inputs match expected patterns) # ============================================================================= - id: tool-input-validation - description: |- - Validates that tool calls include appropriate input parameters. - Useful for ensuring the agent provides correct context to tools. - + # Validates that tool calls include appropriate input parameters. + # Useful for ensuring the agent provides correct context to tools. criteria: |- Agent searches with relevant product keywords and fetches from authoritative source. @@ -130,10 +122,8 @@ tests: # Expected score: 1.0 (outputs contain expected fields) # ============================================================================= - id: tool-output-validation - description: |- - Validates that tool outputs contain expected data. - Useful for regression testing when tool behavior changes. - + # Validates that tool outputs contain expected data. + # Useful for regression testing when tool behavior changes. criteria: |- Web search returns results with links and snippets; fetch returns product specs. @@ -161,10 +151,8 @@ tests: # This mirrors patterns used in complex agent pipelines # ============================================================================= - id: combined-validation - description: |- - Production-style evaluation combining sequence validation with input/output - checks. Demonstrates a realistic multi-turn agent workflow. - + # Production-style evaluation combining sequence validation with input/output + # checks. Demonstrates a realistic multi-turn agent workflow. criteria: |- Agent performs comprehensive product research: 1. Initial web search for product specs diff --git a/examples/showcase/tool-evaluation-plugins/README.md b/examples/showcase/tool-evaluation-plugins/README.md index fe779397a..cbf91eaca 100644 --- a/examples/showcase/tool-evaluation-plugins/README.md +++ b/examples/showcase/tool-evaluation-plugins/README.md @@ -56,7 +56,7 @@ evaluators: export TOOL_EVAL_PLUGINS_DIR=$(pwd)/examples/showcase/tool-evaluation-plugins # Run the demo -npx agentv eval examples/showcase/tool-evaluation-plugins/tool-eval-demo.yaml +npx agentv eval examples/showcase/tool-evaluation-plugins/tool-eval-demo.eval.yaml ``` ## Input Contract diff --git a/examples/showcase/tool-evaluation-plugins/tool-eval-demo.yaml b/examples/showcase/tool-evaluation-plugins/tool-eval-demo.eval.yaml similarity index 98% rename from examples/showcase/tool-evaluation-plugins/tool-eval-demo.yaml rename to examples/showcase/tool-evaluation-plugins/tool-eval-demo.eval.yaml index 0dab62823..5ba64e4d2 100644 --- a/examples/showcase/tool-evaluation-plugins/tool-eval-demo.yaml +++ b/examples/showcase/tool-evaluation-plugins/tool-eval-demo.eval.yaml @@ -5,7 +5,7 @@ # semantic evaluation capabilities that require domain-specific logic. # # Run: cd examples/showcase/tool-evaluation-plugins -# npx agentv eval tool-eval-demo.yaml --target mock_agent +# npx agentv eval tool-eval-demo.eval.yaml --target mock_agent description: Showcase of tool evaluation plugin patterns diff --git a/packages/core/src/evaluation/validation/cases-validator.ts b/packages/core/src/evaluation/validation/cases-validator.ts new file mode 100644 index 000000000..d88a7a232 --- /dev/null +++ b/packages/core/src/evaluation/validation/cases-validator.ts @@ -0,0 +1,98 @@ +import { readFile } from 'node:fs/promises'; +import path from 'node:path'; +import { parse } from 'yaml'; + +import type { ValidationError, ValidationResult } from './types.js'; + +type JsonValue = string | number | boolean | null | JsonObject | JsonArray; +type JsonObject = { readonly [key: string]: JsonValue }; +type JsonArray = readonly JsonValue[]; + +function isObject(value: unknown): value is JsonObject { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +/** + * Validate a cases file — a YAML file whose root is an array of test case objects. + * + * Cases files are referenced from eval files via `tests: path/to/cases.yaml` or + * `file://cases/accuracy.yaml` entries in the tests array. Each item must have + * at least an `id` (non-empty string) and an `input` (string or array). + */ +export async function validateCasesFile(filePath: string): Promise { + const errors: ValidationError[] = []; + const absolutePath = path.resolve(filePath); + + let parsed: unknown; + try { + const content = await readFile(absolutePath, 'utf8'); + parsed = parse(content); + } catch (error) { + errors.push({ + severity: 'error', + filePath: absolutePath, + message: `Failed to parse YAML: ${(error as Error).message}`, + }); + return { valid: false, filePath: absolutePath, fileType: 'cases', errors }; + } + + if (!Array.isArray(parsed)) { + errors.push({ + severity: 'error', + filePath: absolutePath, + message: 'Cases file must contain a YAML array of test case objects', + }); + return { valid: false, filePath: absolutePath, fileType: 'cases', errors }; + } + + for (let i = 0; i < parsed.length; i++) { + const item = parsed[i]; + const location = `[${i}]`; + + if (!isObject(item)) { + errors.push({ + severity: 'error', + filePath: absolutePath, + location, + message: 'Each test case must be an object', + }); + continue; + } + + // Required: id + const id = item.id; + if (typeof id !== 'string' || id.trim().length === 0) { + errors.push({ + severity: 'error', + filePath: absolutePath, + location: `${location}.id`, + message: "Missing or invalid 'id' field (must be a non-empty string)", + }); + } + + // Required: input + const input = item.input; + if (input === undefined) { + errors.push({ + severity: 'error', + filePath: absolutePath, + location: `${location}.input`, + message: "Missing 'input' field (must be a string or array of messages)", + }); + } else if (typeof input !== 'string' && !Array.isArray(input)) { + errors.push({ + severity: 'error', + filePath: absolutePath, + location: `${location}.input`, + message: "Invalid 'input' field (must be a string or array of messages)", + }); + } + } + + return { + valid: errors.filter((e) => e.severity === 'error').length === 0, + filePath: absolutePath, + fileType: 'cases', + errors, + }; +} diff --git a/packages/core/src/evaluation/validation/eval-file.schema.ts b/packages/core/src/evaluation/validation/eval-file.schema.ts index e35f46287..7c6bbc392 100644 --- a/packages/core/src/evaluation/validation/eval-file.schema.ts +++ b/packages/core/src/evaluation/validation/eval-file.schema.ts @@ -382,7 +382,6 @@ const EvalTestSchema = z.object({ metadata: z.record(z.unknown()).optional(), conversation_id: z.string().optional(), suite: z.string().optional(), - note: z.string().optional(), depends_on: z.array(z.string()).optional(), on_dependency_failure: z.enum(['skip', 'fail', 'run']).optional(), mode: z.enum(['conversation']).optional(), diff --git a/packages/core/src/evaluation/validation/eval-validator.ts b/packages/core/src/evaluation/validation/eval-validator.ts index 576dc9f78..0c8348ca0 100644 --- a/packages/core/src/evaluation/validation/eval-validator.ts +++ b/packages/core/src/evaluation/validation/eval-validator.ts @@ -1,4 +1,4 @@ -import { readFile } from 'node:fs/promises'; +import { readFile, readdir } from 'node:fs/promises'; import path from 'node:path'; import { parse } from 'yaml'; @@ -36,6 +36,7 @@ const KNOWN_TOP_LEVEL_FIELDS = new Set([ '$schema', 'name', 'description', + 'category', 'version', 'author', 'tags', @@ -44,14 +45,25 @@ const KNOWN_TOP_LEVEL_FIELDS = new Set([ 'input', 'input_files', 'tests', - 'eval_cases', 'target', 'execution', 'assertions', 'evaluators', + 'preprocessors', 'workspace', ]); +/** + * Deprecated top-level fields with migration hints. + * These are still processed by yaml-parser but authors should migrate. + */ +const DEPRECATED_TOP_LEVEL_FIELDS = new Map([ + ['eval_cases', "'eval_cases' is deprecated. Use 'tests' instead."], + ['evalcases', "'evalcases' is deprecated. Use 'tests' instead."], + ['evaluator', "'evaluator' is deprecated. Use 'assertions' instead."], + ['assert', "'assert' is deprecated. Use 'assertions' instead."], +]); + /** Known fields at the test level. */ const KNOWN_TEST_FIELDS = new Set([ 'id', @@ -61,12 +73,12 @@ const KNOWN_TEST_FIELDS = new Set([ 'expected_output', 'assertions', 'evaluators', + 'rubrics', 'execution', 'workspace', 'metadata', 'conversation_id', 'suite', - 'note', 'depends_on', 'on_dependency_failure', 'mode', @@ -76,9 +88,52 @@ const KNOWN_TEST_FIELDS = new Set([ 'window_size', ]); +/** + * Deprecated test-level fields with migration hints. + * These are still processed by yaml-parser but authors should migrate. + */ +const DEPRECATED_TEST_FIELDS = new Map([ + ['evaluator', "'evaluator' is deprecated. Use 'assertions' instead."], + ['assert', "'assert' is deprecated. Use 'assertions' instead."], + ['expected_outcome', "'expected_outcome' is deprecated. Use 'criteria' instead."], +]); + /** Name field pattern: lowercase alphanumeric with hyphens. */ const NAME_PATTERN = /^[a-z0-9-]+$/; +/** Script file extensions recognised as custom assertion plugins. */ +const ASSERTION_SCRIPT_EXTENSIONS = new Set(['.ts', '.js', '.mts', '.mjs', '.cts', '.cjs']); + +/** + * Walk up the directory tree from `baseDir` collecting type names from + * `.agentv/assertions/` directories — mirrors the runtime discovery in + * `assertion-discovery.ts`. + */ +async function discoverCustomAssertionTypes(baseDir: string): Promise> { + const types = new Set(); + let dir = path.resolve(baseDir); + const root = path.parse(dir).root; + + while (dir !== root) { + const assertionsDir = path.join(dir, '.agentv', 'assertions'); + try { + const entries = await readdir(assertionsDir, { withFileTypes: true }); + for (const entry of entries) { + if (!entry.isFile()) continue; + const ext = path.extname(entry.name).toLowerCase(); + if (!ASSERTION_SCRIPT_EXTENSIONS.has(ext)) continue; + const typeName = entry.name.slice(0, -ext.length); + types.add(typeName); + } + } catch { + // Directory doesn't exist — skip + } + dir = path.dirname(dir); + } + + return types; +} + function isObject(value: unknown): value is JsonObject { return typeof value === 'object' && value !== null && !Array.isArray(value); } @@ -89,6 +144,7 @@ function isObject(value: unknown): value is JsonObject { export async function validateEvalFile(filePath: string): Promise { const errors: ValidationError[] = []; const absolutePath = path.resolve(filePath); + const customAssertionTypes = await discoverCustomAssertionTypes(path.dirname(absolutePath)); let parsed: unknown; try { @@ -125,9 +181,17 @@ export async function validateEvalFile(filePath: string): Promise = new Set(), ): void { if (!Array.isArray(assertField)) { errors.push({ @@ -669,7 +742,7 @@ function validateAssertArray( // Normalize snake_case to kebab-case for backward compatibility const typeValue = rawTypeValue.replace(/_/g, '-'); - if (!isEvaluatorKind(typeValue)) { + if (!isEvaluatorKind(typeValue) && !customAssertionTypes.has(typeValue)) { errors.push({ severity: 'warning', filePath, diff --git a/packages/core/src/evaluation/validation/file-type.ts b/packages/core/src/evaluation/validation/file-type.ts index 46e87bef5..464610c61 100644 --- a/packages/core/src/evaluation/validation/file-type.ts +++ b/packages/core/src/evaluation/validation/file-type.ts @@ -20,6 +20,11 @@ export async function detectFileType(filePath: string): Promise { const content = await readFile(filePath, 'utf8'); const parsed = parse(content) as unknown; + // YAML array root → cases file (array of test case objects) + if (Array.isArray(parsed)) { + return 'cases'; + } + if (typeof parsed !== 'object' || parsed === null) { return inferFileTypeFromPath(filePath); } @@ -65,8 +70,14 @@ function inferFileTypeFromPath(filePath: string): FileType { } } - // Default to eval file - return 'eval'; + // Require .eval.yaml / .eval.yml suffix for eval files + const lower = basename.toLowerCase(); + if (lower.endsWith('.eval.yaml') || lower.endsWith('.eval.yml')) { + return 'eval'; + } + + // Unrecognized — do not assume eval type + return 'unknown'; } /** diff --git a/packages/core/src/evaluation/validation/index.ts b/packages/core/src/evaluation/validation/index.ts index 9347974f3..f5874ee9d 100644 --- a/packages/core/src/evaluation/validation/index.ts +++ b/packages/core/src/evaluation/validation/index.ts @@ -4,6 +4,7 @@ export { detectFileType, isValidSchema, getExpectedSchema } from './file-type.js'; export { validateEvalFile } from './eval-validator.js'; +export { validateCasesFile } from './cases-validator.js'; export { validateTargetsFile } from './targets-validator.js'; export { validateConfigFile } from './config-validator.js'; export { validateFileReferences } from './file-reference-validator.js'; diff --git a/packages/core/src/evaluation/validation/types.ts b/packages/core/src/evaluation/validation/types.ts index b49a0bed8..8aafe5aee 100644 --- a/packages/core/src/evaluation/validation/types.ts +++ b/packages/core/src/evaluation/validation/types.ts @@ -2,7 +2,7 @@ * Validation result types for AgentV file validation. */ -export type FileType = 'eval' | 'targets' | 'config' | 'unknown'; +export type FileType = 'eval' | 'targets' | 'config' | 'cases' | 'unknown'; export type ValidationSeverity = 'error' | 'warning'; diff --git a/packages/core/test/evaluation/validation/eval-validator.test.ts b/packages/core/test/evaluation/validation/eval-validator.test.ts index f6b819479..7085c5fa5 100644 --- a/packages/core/test/evaluation/validation/eval-validator.test.ts +++ b/packages/core/test/evaluation/validation/eval-validator.test.ts @@ -993,7 +993,6 @@ tests: value: "world" metadata: tag: test - note: A note `, ); @@ -1051,8 +1050,8 @@ tests: }); describe('removed legacy fields', () => { - it('warns on expected_outcome as unknown field', async () => { - const filePath = path.join(tempDir, 'expected-outcome-unknown.yaml'); + it('warns on expected_outcome as deprecated field', async () => { + const filePath = path.join(tempDir, 'expected-outcome-deprecated.yaml'); await writeFile( filePath, `tests: @@ -1068,13 +1067,17 @@ tests: expect(result.valid).toBe(true); const warnings = result.errors.filter((e) => e.severity === 'warning'); - expect(warnings.some((e) => e.message.includes("Unknown field 'expected_outcome'"))).toBe( - true, - ); + expect( + warnings.some( + (e) => + e.message.includes("'expected_outcome' is deprecated") && + e.message.includes("'criteria'"), + ), + ).toBe(true); }); - it('warns on assert as unknown field at test level', async () => { - const filePath = path.join(tempDir, 'assert-unknown.yaml'); + it('warns on assert as deprecated field at test level', async () => { + const filePath = path.join(tempDir, 'assert-deprecated.yaml'); await writeFile( filePath, `tests: @@ -1090,11 +1093,16 @@ tests: expect(result.valid).toBe(true); const warnings = result.errors.filter((e) => e.severity === 'warning'); - expect(warnings.some((e) => e.message.includes("Unknown field 'assert'"))).toBe(true); + expect( + warnings.some( + (e) => + e.message.includes("'assert' is deprecated") && e.message.includes("'assertions'"), + ), + ).toBe(true); }); - it('warns on assert as unknown field at top level', async () => { - const filePath = path.join(tempDir, 'assert-top-unknown.yaml'); + it('warns on assert as deprecated field at top level', async () => { + const filePath = path.join(tempDir, 'assert-top-deprecated.yaml'); await writeFile( filePath, `assert: @@ -1109,7 +1117,12 @@ tests: const result = await validateEvalFile(filePath); const warnings = result.errors.filter((e) => e.severity === 'warning'); - expect(warnings.some((e) => e.message.includes("Unknown field 'assert'"))).toBe(true); + expect( + warnings.some( + (e) => + e.message.includes("'assert' is deprecated") && e.message.includes("'assertions'"), + ), + ).toBe(true); }); }); }); diff --git a/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json b/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json index 80dc2ebd8..d2b30313d 100644 --- a/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json +++ b/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json @@ -56,7 +56,12 @@ "properties": { "role": { "type": "string", - "enum": ["system", "user", "assistant", "tool"] + "enum": [ + "system", + "user", + "assistant", + "tool" + ] }, "content": { "anyOf": [ @@ -70,20 +75,30 @@ "properties": { "type": { "type": "string", - "enum": ["text", "file", "image"] + "enum": [ + "text", + "file", + "image" + ] }, "value": { "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false } } ] } }, - "required": ["role", "content"], + "required": [ + "role", + "content" + ], "additionalProperties": false } } @@ -121,7 +136,12 @@ "properties": { "role": { "type": "string", - "enum": ["system", "user", "assistant", "tool"] + "enum": [ + "system", + "user", + "assistant", + "tool" + ] }, "content": { "anyOf": [ @@ -135,20 +155,30 @@ "properties": { "type": { "type": "string", - "enum": ["text", "file", "image"] + "enum": [ + "text", + "file", + "image" + ] }, "value": { "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false } } ] } }, - "required": ["role", "content"], + "required": [ + "role", + "content" + ], "additionalProperties": false } } @@ -176,7 +206,12 @@ "properties": { "role": { "type": "string", - "enum": ["system", "user", "assistant", "tool"] + "enum": [ + "system", + "user", + "assistant", + "tool" + ] }, "content": { "anyOf": [ @@ -190,20 +225,30 @@ "properties": { "type": { "type": "string", - "enum": ["text", "file", "image"] + "enum": [ + "text", + "file", + "image" + ] }, "value": { "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false } } ] } }, - "required": ["role", "content"], + "required": [ + "role", + "content" + ], "additionalProperties": false } } @@ -247,7 +292,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -321,12 +369,18 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -363,7 +417,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -458,7 +515,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -509,12 +569,17 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -525,7 +590,9 @@ "minLength": 1 } }, - "required": ["include"], + "required": [ + "include" + ], "additionalProperties": false }, { @@ -588,7 +655,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -604,7 +673,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -621,7 +693,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -638,13 +713,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -681,11 +761,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -726,7 +815,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -740,7 +834,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -751,7 +850,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -759,7 +860,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -773,7 +879,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -784,7 +895,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -821,7 +935,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -833,7 +950,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -855,17 +976,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -909,7 +1039,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -953,7 +1086,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -990,7 +1126,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -1005,7 +1144,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -1042,7 +1183,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -1074,7 +1218,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -1117,7 +1263,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -1160,7 +1309,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -1197,10 +1349,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -1243,7 +1400,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -1332,7 +1492,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -1342,7 +1505,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -1386,7 +1552,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -1460,12 +1629,18 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -1502,7 +1677,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -1597,7 +1775,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -1648,12 +1829,17 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -1664,7 +1850,9 @@ "minLength": 1 } }, - "required": ["include"], + "required": [ + "include" + ], "additionalProperties": false }, { @@ -1727,7 +1915,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -1743,7 +1933,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -1760,7 +1953,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -1777,13 +1973,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -1820,11 +2021,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -1865,7 +2075,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -1879,7 +2094,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -1890,7 +2110,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -1898,7 +2120,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -1912,7 +2139,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -1923,7 +2155,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -1960,7 +2195,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -1972,7 +2210,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -1994,17 +2236,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -2048,7 +2299,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -2092,7 +2346,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -2129,7 +2386,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -2144,7 +2404,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -2181,7 +2443,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -2213,7 +2478,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -2256,7 +2523,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -2299,7 +2569,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -2336,10 +2609,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -2382,7 +2660,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -2471,7 +2752,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -2481,7 +2765,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -2542,7 +2829,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -2616,12 +2906,18 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -2658,7 +2954,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -2753,7 +3052,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -2804,12 +3106,17 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -2820,7 +3127,9 @@ "minLength": 1 } }, - "required": ["include"], + "required": [ + "include" + ], "additionalProperties": false }, { @@ -2883,7 +3192,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -2899,7 +3210,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -2916,7 +3230,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -2933,13 +3250,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -2976,11 +3298,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -3021,7 +3352,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -3035,7 +3371,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -3046,7 +3387,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -3054,7 +3397,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -3068,7 +3416,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -3079,7 +3432,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -3116,7 +3472,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -3128,7 +3487,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -3150,17 +3513,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -3204,7 +3576,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -3248,7 +3623,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -3285,7 +3663,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -3300,7 +3681,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -3337,7 +3720,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -3369,7 +3755,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -3412,7 +3800,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -3455,7 +3846,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -3492,10 +3886,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -3538,7 +3937,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -3627,7 +4029,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -3637,7 +4042,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -3681,7 +4089,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -3755,12 +4166,18 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -3797,7 +4214,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -3892,7 +4312,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -3943,12 +4366,17 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -3959,7 +4387,9 @@ "minLength": 1 } }, - "required": ["include"], + "required": [ + "include" + ], "additionalProperties": false }, { @@ -4022,7 +4452,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -4038,7 +4470,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -4055,7 +4490,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -4072,13 +4510,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -4115,11 +4558,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -4160,7 +4612,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -4174,7 +4631,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -4185,7 +4647,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -4193,7 +4657,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -4207,7 +4676,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -4218,7 +4692,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -4255,7 +4732,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -4267,7 +4747,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -4289,17 +4773,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -4343,7 +4836,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -4387,7 +4883,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -4424,7 +4923,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -4439,7 +4941,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -4476,7 +4980,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -4508,7 +5015,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -4551,7 +5060,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -4594,7 +5106,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -4631,10 +5146,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -4677,7 +5197,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -4766,7 +5289,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -4776,7 +5302,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -4797,7 +5326,11 @@ }, "strategy": { "type": "string", - "enum": ["pass_at_k", "mean", "confidence_interval"] + "enum": [ + "pass_at_k", + "mean", + "confidence_interval" + ] }, "cost_limit_usd": { "type": "number", @@ -4808,7 +5341,9 @@ "minimum": 0 } }, - "required": ["count"], + "required": [ + "count" + ], "additionalProperties": false }, "total_budget_usd": { @@ -4841,7 +5376,10 @@ }, "isolation": { "type": "string", - "enum": ["shared", "per_test"] + "enum": [ + "shared", + "per_test" + ] }, "repos": { "type": "array", @@ -4865,7 +5403,10 @@ "format": "uri" } }, - "required": ["type", "url"], + "required": [ + "type", + "url" + ], "additionalProperties": false }, { @@ -4879,7 +5420,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false } ] @@ -4896,7 +5440,10 @@ }, "resolve": { "type": "string", - "enum": ["remote", "local"] + "enum": [ + "remote", + "local" + ] }, "ancestor": { "type": "integer", @@ -4960,7 +5507,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -4991,7 +5542,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -5022,7 +5577,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -5053,7 +5612,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -5063,7 +5626,11 @@ }, "mode": { "type": "string", - "enum": ["pooled", "temp", "static"] + "enum": [ + "pooled", + "temp", + "static" + ] }, "path": { "type": "string" @@ -5086,7 +5653,9 @@ "minimum": 0.1 } }, - "required": ["image"], + "required": [ + "image" + ], "additionalProperties": false } }, @@ -5102,9 +5671,6 @@ "suite": { "type": "string" }, - "note": { - "type": "string" - }, "depends_on": { "type": "array", "items": { @@ -5113,11 +5679,17 @@ }, "on_dependency_failure": { "type": "string", - "enum": ["skip", "fail", "run"] + "enum": [ + "skip", + "fail", + "run" + ] }, "mode": { "type": "string", - "enum": ["conversation"] + "enum": [ + "conversation" + ] }, "turns": { "type": "array", @@ -5141,13 +5713,20 @@ "properties": { "type": { "type": "string", - "enum": ["text", "file", "image"] + "enum": [ + "text", + "file", + "image" + ] }, "value": { "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false } } @@ -5172,13 +5751,20 @@ "properties": { "type": { "type": "string", - "enum": ["text", "file", "image"] + "enum": [ + "text", + "file", + "image" + ] }, "value": { "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false } } @@ -5229,7 +5815,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -5303,12 +5892,18 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -5345,7 +5940,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -5440,7 +6038,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -5491,12 +6092,17 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -5507,7 +6113,9 @@ "minLength": 1 } }, - "required": ["include"], + "required": [ + "include" + ], "additionalProperties": false }, { @@ -5570,7 +6178,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -5586,7 +6196,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -5603,7 +6216,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -5620,13 +6236,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -5663,7 +6284,10 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", @@ -5714,7 +6338,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -5728,7 +6357,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -5739,7 +6373,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -5747,7 +6383,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -5761,7 +6402,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -5772,7 +6418,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -5809,7 +6458,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -5821,7 +6473,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -5843,17 +6499,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -5897,7 +6562,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -5941,7 +6609,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -5978,7 +6649,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -5993,7 +6667,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -6030,7 +6706,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -6062,7 +6741,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -6105,7 +6786,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -6148,7 +6832,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -6185,10 +6872,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -6231,7 +6923,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -6320,7 +7015,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -6330,7 +7028,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -6339,25 +7040,36 @@ } } }, - "required": ["input"], + "required": [ + "input" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["mean", "min", "max"] + "enum": [ + "mean", + "min", + "max" + ] }, "on_turn_failure": { "type": "string", - "enum": ["continue", "stop"] + "enum": [ + "continue", + "stop" + ] }, "window_size": { "type": "integer", "minimum": 1 } }, - "required": ["id"], + "required": [ + "id" + ], "additionalProperties": false } }, @@ -6392,7 +7104,12 @@ "properties": { "role": { "type": "string", - "enum": ["system", "user", "assistant", "tool"] + "enum": [ + "system", + "user", + "assistant", + "tool" + ] }, "content": { "anyOf": [ @@ -6406,20 +7123,30 @@ "properties": { "type": { "type": "string", - "enum": ["text", "file", "image"] + "enum": [ + "text", + "file", + "image" + ] }, "value": { "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false } } ] } }, - "required": ["role", "content"], + "required": [ + "role", + "content" + ], "additionalProperties": false } } @@ -6447,7 +7174,12 @@ "properties": { "role": { "type": "string", - "enum": ["system", "user", "assistant", "tool"] + "enum": [ + "system", + "user", + "assistant", + "tool" + ] }, "content": { "anyOf": [ @@ -6461,20 +7193,30 @@ "properties": { "type": { "type": "string", - "enum": ["text", "file", "image"] + "enum": [ + "text", + "file", + "image" + ] }, "value": { "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false } } ] } }, - "required": ["role", "content"], + "required": [ + "role", + "content" + ], "additionalProperties": false } } @@ -6518,7 +7260,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -6592,12 +7337,18 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -6634,7 +7385,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -6729,7 +7483,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -6780,12 +7537,17 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -6796,7 +7558,9 @@ "minLength": 1 } }, - "required": ["include"], + "required": [ + "include" + ], "additionalProperties": false }, { @@ -6859,7 +7623,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -6875,7 +7641,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -6892,7 +7661,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -6909,13 +7681,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -6952,11 +7729,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -6997,7 +7783,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -7011,7 +7802,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -7022,7 +7818,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -7030,7 +7828,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -7044,7 +7847,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -7055,7 +7863,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -7092,7 +7903,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -7104,7 +7918,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -7126,17 +7944,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -7180,7 +8007,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -7224,7 +8054,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -7261,7 +8094,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -7276,7 +8112,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -7313,7 +8151,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -7345,7 +8186,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -7388,7 +8231,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -7431,7 +8277,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -7468,10 +8317,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -7514,7 +8368,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -7603,7 +8460,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -7613,7 +8473,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -7657,7 +8520,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -7731,12 +8597,18 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -7773,7 +8645,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -7868,7 +8743,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -7919,12 +8797,17 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -7935,7 +8818,9 @@ "minLength": 1 } }, - "required": ["include"], + "required": [ + "include" + ], "additionalProperties": false }, { @@ -7998,7 +8883,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -8014,7 +8901,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -8031,7 +8921,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -8048,13 +8941,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -8091,11 +8989,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -8136,7 +9043,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -8150,7 +9062,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -8161,7 +9078,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -8169,7 +9088,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -8183,7 +9107,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -8194,7 +9123,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -8231,7 +9163,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -8243,7 +9178,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -8265,17 +9204,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -8319,7 +9267,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -8363,7 +9314,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -8400,7 +9354,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -8415,7 +9372,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -8452,7 +9411,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -8484,7 +9446,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -8527,7 +9491,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -8570,7 +9537,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -8607,10 +9577,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -8653,7 +9628,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -8742,7 +9720,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -8752,7 +9733,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -8813,7 +9797,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -8887,12 +9874,18 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -8929,7 +9922,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -9024,7 +10020,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -9075,12 +10074,17 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -9091,7 +10095,9 @@ "minLength": 1 } }, - "required": ["include"], + "required": [ + "include" + ], "additionalProperties": false }, { @@ -9154,7 +10160,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -9170,7 +10178,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -9187,7 +10198,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -9204,13 +10218,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -9247,11 +10266,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -9292,7 +10320,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -9306,7 +10339,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -9317,7 +10355,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -9325,7 +10365,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -9339,7 +10384,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -9350,7 +10400,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -9387,7 +10440,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -9399,7 +10455,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -9421,17 +10481,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -9475,7 +10544,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -9519,7 +10591,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -9556,7 +10631,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -9571,7 +10649,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -9608,7 +10688,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -9640,7 +10723,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -9683,7 +10768,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -9726,7 +10814,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -9763,10 +10854,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -9809,7 +10905,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -9898,7 +10997,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -9908,7 +11010,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -9952,7 +11057,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -10026,12 +11134,18 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -10068,7 +11182,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -10163,7 +11280,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -10214,12 +11334,17 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -10230,7 +11355,9 @@ "minLength": 1 } }, - "required": ["include"], + "required": [ + "include" + ], "additionalProperties": false }, { @@ -10293,7 +11420,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -10309,7 +11438,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -10326,7 +11458,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -10343,13 +11478,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -10386,11 +11526,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -10431,7 +11580,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -10445,7 +11599,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -10456,7 +11615,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -10464,7 +11625,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -10478,7 +11644,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -10489,7 +11660,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -10526,7 +11700,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -10538,7 +11715,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -10560,17 +11741,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -10614,7 +11804,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -10658,7 +11851,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -10695,7 +11891,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -10710,7 +11909,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -10747,7 +11948,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -10779,7 +11983,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -10822,7 +12028,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -10865,7 +12074,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -10902,10 +12114,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -10948,7 +12165,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -11037,7 +12257,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -11047,7 +12270,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -11068,7 +12294,11 @@ }, "strategy": { "type": "string", - "enum": ["pass_at_k", "mean", "confidence_interval"] + "enum": [ + "pass_at_k", + "mean", + "confidence_interval" + ] }, "cost_limit_usd": { "type": "number", @@ -11079,7 +12309,9 @@ "minimum": 0 } }, - "required": ["count"], + "required": [ + "count" + ], "additionalProperties": false }, "total_budget_usd": { @@ -11112,7 +12344,10 @@ }, "isolation": { "type": "string", - "enum": ["shared", "per_test"] + "enum": [ + "shared", + "per_test" + ] }, "repos": { "type": "array", @@ -11136,7 +12371,10 @@ "format": "uri" } }, - "required": ["type", "url"], + "required": [ + "type", + "url" + ], "additionalProperties": false }, { @@ -11150,7 +12388,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false } ] @@ -11167,7 +12408,10 @@ }, "resolve": { "type": "string", - "enum": ["remote", "local"] + "enum": [ + "remote", + "local" + ] }, "ancestor": { "type": "integer", @@ -11231,7 +12475,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -11262,7 +12510,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -11293,7 +12545,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -11324,7 +12580,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -11334,7 +12594,11 @@ }, "mode": { "type": "string", - "enum": ["pooled", "temp", "static"] + "enum": [ + "pooled", + "temp", + "static" + ] }, "path": { "type": "string" @@ -11357,7 +12621,9 @@ "minimum": 0.1 } }, - "required": ["image"], + "required": [ + "image" + ], "additionalProperties": false } }, @@ -11373,9 +12639,6 @@ "suite": { "type": "string" }, - "note": { - "type": "string" - }, "depends_on": { "type": "array", "items": { @@ -11384,11 +12647,17 @@ }, "on_dependency_failure": { "type": "string", - "enum": ["skip", "fail", "run"] + "enum": [ + "skip", + "fail", + "run" + ] }, "mode": { "type": "string", - "enum": ["conversation"] + "enum": [ + "conversation" + ] }, "turns": { "type": "array", @@ -11412,13 +12681,20 @@ "properties": { "type": { "type": "string", - "enum": ["text", "file", "image"] + "enum": [ + "text", + "file", + "image" + ] }, "value": { "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false } } @@ -11443,13 +12719,20 @@ "properties": { "type": { "type": "string", - "enum": ["text", "file", "image"] + "enum": [ + "text", + "file", + "image" + ] }, "value": { "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false } } @@ -11500,7 +12783,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -11574,12 +12860,18 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -11616,7 +12908,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -11711,7 +13006,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -11762,12 +13060,17 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -11778,7 +13081,9 @@ "minLength": 1 } }, - "required": ["include"], + "required": [ + "include" + ], "additionalProperties": false }, { @@ -11841,7 +13146,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -11857,7 +13164,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -11874,7 +13184,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -11891,13 +13204,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -11934,7 +13252,10 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", @@ -11985,7 +13306,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -11999,7 +13325,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -12010,7 +13341,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -12018,7 +13351,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -12032,7 +13370,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -12043,7 +13386,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -12080,7 +13426,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -12092,7 +13441,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -12114,17 +13467,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -12168,7 +13530,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -12212,7 +13577,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -12249,7 +13617,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -12264,7 +13635,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -12301,7 +13674,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -12333,7 +13709,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -12376,7 +13754,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -12419,7 +13800,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -12456,10 +13840,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -12502,7 +13891,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -12591,7 +13983,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -12601,7 +13996,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -12610,25 +14008,36 @@ } } }, - "required": ["input"], + "required": [ + "input" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["mean", "min", "max"] + "enum": [ + "mean", + "min", + "max" + ] }, "on_turn_failure": { "type": "string", - "enum": ["continue", "stop"] + "enum": [ + "continue", + "stop" + ] }, "window_size": { "type": "integer", "minimum": 1 } }, - "required": ["id"], + "required": [ + "id" + ], "additionalProperties": false } }, @@ -12695,7 +14104,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -12769,12 +14181,18 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -12811,7 +14229,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -12906,7 +14327,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -12957,12 +14381,17 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -12973,7 +14402,9 @@ "minLength": 1 } }, - "required": ["include"], + "required": [ + "include" + ], "additionalProperties": false }, { @@ -13036,7 +14467,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -13052,7 +14485,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -13069,7 +14505,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -13086,13 +14525,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -13129,11 +14573,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -13174,7 +14627,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -13188,7 +14646,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -13199,7 +14662,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -13207,7 +14672,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -13221,7 +14691,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -13232,7 +14707,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -13269,7 +14747,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -13281,7 +14762,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -13303,17 +14788,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -13357,7 +14851,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -13401,7 +14898,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -13438,7 +14938,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -13453,7 +14956,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -13490,7 +14995,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -13522,7 +15030,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -13565,7 +15075,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -13608,7 +15121,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -13645,10 +15161,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -13691,7 +15212,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -13780,7 +15304,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -13790,7 +15317,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -13834,7 +15364,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -13908,12 +15441,18 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -13950,7 +15489,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -14045,7 +15587,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -14096,12 +15641,17 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -14112,7 +15662,9 @@ "minLength": 1 } }, - "required": ["include"], + "required": [ + "include" + ], "additionalProperties": false }, { @@ -14175,7 +15727,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -14191,7 +15745,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -14208,7 +15765,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -14225,13 +15785,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -14268,11 +15833,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -14313,7 +15887,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -14327,7 +15906,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -14338,7 +15922,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -14346,7 +15932,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -14360,7 +15951,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -14371,7 +15967,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -14408,7 +16007,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -14420,7 +16022,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -14442,17 +16048,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -14496,7 +16111,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -14540,7 +16158,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -14577,7 +16198,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -14592,7 +16216,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -14629,7 +16255,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -14661,7 +16290,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -14704,7 +16335,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -14747,7 +16381,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -14784,10 +16421,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -14830,7 +16472,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -14919,7 +16564,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -14929,7 +16577,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -14950,7 +16601,11 @@ }, "strategy": { "type": "string", - "enum": ["pass_at_k", "mean", "confidence_interval"] + "enum": [ + "pass_at_k", + "mean", + "confidence_interval" + ] }, "cost_limit_usd": { "type": "number", @@ -14961,7 +16616,9 @@ "minimum": 0 } }, - "required": ["count"], + "required": [ + "count" + ], "additionalProperties": false }, "total_budget_usd": { @@ -15024,7 +16681,10 @@ }, "type": { "type": "string", - "enum": ["code-grader", "code_grader"] + "enum": [ + "code-grader", + "code_grader" + ] }, "command": { "anyOf": [ @@ -15098,12 +16758,18 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false }, { @@ -15140,7 +16806,10 @@ }, "type": { "type": "string", - "enum": ["llm-grader", "llm_grader"] + "enum": [ + "llm-grader", + "llm_grader" + ] }, "prompt": { "anyOf": [ @@ -15235,7 +16904,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -15286,12 +16958,17 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -15302,7 +16979,9 @@ "minLength": 1 } }, - "required": ["include"], + "required": [ + "include" + ], "additionalProperties": false }, { @@ -15365,7 +17044,9 @@ } } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -15381,7 +17062,10 @@ "maximum": 1 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -15398,7 +17082,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false }, { @@ -15415,13 +17102,18 @@ "type": "string" } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false } ] } }, - "required": ["type", "aggregator"], + "required": [ + "type", + "aggregator" + ], "additionalProperties": false }, { @@ -15458,11 +17150,20 @@ }, "type": { "type": "string", - "enum": ["tool-trajectory", "tool_trajectory"] + "enum": [ + "tool-trajectory", + "tool_trajectory" + ] }, "mode": { "type": "string", - "enum": ["any_order", "in_order", "exact", "subset", "superset"] + "enum": [ + "any_order", + "in_order", + "exact", + "subset", + "superset" + ] }, "minimums": { "type": "object", @@ -15503,7 +17204,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -15517,7 +17223,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -15528,7 +17239,9 @@ ] } }, - "required": ["tool"], + "required": [ + "tool" + ], "additionalProperties": false } }, @@ -15536,7 +17249,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -15550,7 +17268,12 @@ "anyOf": [ { "type": "string", - "enum": ["exact", "ignore", "subset", "superset"] + "enum": [ + "exact", + "ignore", + "subset", + "superset" + ] }, { "type": "array", @@ -15561,7 +17284,10 @@ ] } }, - "required": ["type", "mode"], + "required": [ + "type", + "mode" + ], "additionalProperties": false }, { @@ -15598,7 +17324,10 @@ }, "type": { "type": "string", - "enum": ["field-accuracy", "field_accuracy"] + "enum": [ + "field-accuracy", + "field_accuracy" + ] }, "fields": { "type": "array", @@ -15610,7 +17339,11 @@ }, "match": { "type": "string", - "enum": ["exact", "numeric_tolerance", "date"] + "enum": [ + "exact", + "numeric_tolerance", + "date" + ] }, "required": { "type": "boolean" @@ -15632,17 +17365,26 @@ } } }, - "required": ["path", "match"], + "required": [ + "path", + "match" + ], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": ["weighted_average", "all_or_nothing"] + "enum": [ + "weighted_average", + "all_or_nothing" + ] } }, - "required": ["type", "fields"], + "required": [ + "type", + "fields" + ], "additionalProperties": false }, { @@ -15686,7 +17428,10 @@ "minimum": 0 } }, - "required": ["type", "threshold"], + "required": [ + "type", + "threshold" + ], "additionalProperties": false }, { @@ -15730,7 +17475,10 @@ "minimum": 0 } }, - "required": ["type", "budget"], + "required": [ + "type", + "budget" + ], "additionalProperties": false }, { @@ -15767,7 +17515,10 @@ }, "type": { "type": "string", - "enum": ["token-usage", "token_usage"] + "enum": [ + "token-usage", + "token_usage" + ] }, "max_total": { "type": "number", @@ -15782,7 +17533,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -15819,7 +17572,10 @@ }, "type": { "type": "string", - "enum": ["execution-metrics", "execution_metrics"] + "enum": [ + "execution-metrics", + "execution_metrics" + ] }, "max_tool_calls": { "type": "number", @@ -15851,7 +17607,9 @@ "minimum": 0 } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -15894,7 +17652,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -15937,7 +17698,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -15974,10 +17738,15 @@ }, "type": { "type": "string", - "enum": ["is-json", "is_json"] + "enum": [ + "is-json", + "is_json" + ] } }, - "required": ["type"], + "required": [ + "type" + ], "additionalProperties": false }, { @@ -16020,7 +17789,10 @@ "type": "string" } }, - "required": ["type", "value"], + "required": [ + "type", + "value" + ], "additionalProperties": false }, { @@ -16109,7 +17881,10 @@ "minLength": 1 } }, - "required": ["score_range", "outcome"], + "required": [ + "score_range", + "outcome" + ], "additionalProperties": false } } @@ -16119,7 +17894,10 @@ "minItems": 1 } }, - "required": ["type", "criteria"], + "required": [ + "type", + "criteria" + ], "additionalProperties": false } ] @@ -16148,7 +17926,10 @@ ] } }, - "required": ["type", "command"], + "required": [ + "type", + "command" + ], "additionalProperties": false } }, @@ -16162,7 +17943,10 @@ }, "isolation": { "type": "string", - "enum": ["shared", "per_test"] + "enum": [ + "shared", + "per_test" + ] }, "repos": { "type": "array", @@ -16186,7 +17970,10 @@ "format": "uri" } }, - "required": ["type", "url"], + "required": [ + "type", + "url" + ], "additionalProperties": false }, { @@ -16200,7 +17987,10 @@ "type": "string" } }, - "required": ["type", "path"], + "required": [ + "type", + "path" + ], "additionalProperties": false } ] @@ -16217,7 +18007,10 @@ }, "resolve": { "type": "string", - "enum": ["remote", "local"] + "enum": [ + "remote", + "local" + ] }, "ancestor": { "type": "integer", @@ -16281,7 +18074,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -16312,7 +18109,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -16343,7 +18144,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -16374,7 +18179,11 @@ }, "reset": { "type": "string", - "enum": ["none", "fast", "strict"] + "enum": [ + "none", + "fast", + "strict" + ] } }, "additionalProperties": false @@ -16384,7 +18193,11 @@ }, "mode": { "type": "string", - "enum": ["pooled", "temp", "static"] + "enum": [ + "pooled", + "temp", + "static" + ] }, "path": { "type": "string" @@ -16407,7 +18220,9 @@ "minimum": 0.1 } }, - "required": ["image"], + "required": [ + "image" + ], "additionalProperties": false } }, @@ -16419,7 +18234,9 @@ ] } }, - "required": ["tests"], + "required": [ + "tests" + ], "additionalProperties": false } } From c597d877cbd0ac1d33e1130b48d257e3092761cd Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Mon, 13 Apr 2026 22:23:26 +0000 Subject: [PATCH 2/3] chore: fix biome formatting Co-Authored-By: Claude Sonnet 4.6 --- .../validation/eval-validator.test.ts | 6 +- .../references/eval-schema.json | 2989 ++++------------- 2 files changed, 585 insertions(+), 2410 deletions(-) diff --git a/packages/core/test/evaluation/validation/eval-validator.test.ts b/packages/core/test/evaluation/validation/eval-validator.test.ts index 7085c5fa5..7992160b8 100644 --- a/packages/core/test/evaluation/validation/eval-validator.test.ts +++ b/packages/core/test/evaluation/validation/eval-validator.test.ts @@ -1095,8 +1095,7 @@ tests: const warnings = result.errors.filter((e) => e.severity === 'warning'); expect( warnings.some( - (e) => - e.message.includes("'assert' is deprecated") && e.message.includes("'assertions'"), + (e) => e.message.includes("'assert' is deprecated") && e.message.includes("'assertions'"), ), ).toBe(true); }); @@ -1119,8 +1118,7 @@ tests: const warnings = result.errors.filter((e) => e.severity === 'warning'); expect( warnings.some( - (e) => - e.message.includes("'assert' is deprecated") && e.message.includes("'assertions'"), + (e) => e.message.includes("'assert' is deprecated") && e.message.includes("'assertions'"), ), ).toBe(true); }); diff --git a/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json b/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json index d2b30313d..885d275b3 100644 --- a/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json +++ b/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json @@ -56,12 +56,7 @@ "properties": { "role": { "type": "string", - "enum": [ - "system", - "user", - "assistant", - "tool" - ] + "enum": ["system", "user", "assistant", "tool"] }, "content": { "anyOf": [ @@ -75,30 +70,20 @@ "properties": { "type": { "type": "string", - "enum": [ - "text", - "file", - "image" - ] + "enum": ["text", "file", "image"] }, "value": { "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false } } ] } }, - "required": [ - "role", - "content" - ], + "required": ["role", "content"], "additionalProperties": false } } @@ -136,12 +121,7 @@ "properties": { "role": { "type": "string", - "enum": [ - "system", - "user", - "assistant", - "tool" - ] + "enum": ["system", "user", "assistant", "tool"] }, "content": { "anyOf": [ @@ -155,30 +135,20 @@ "properties": { "type": { "type": "string", - "enum": [ - "text", - "file", - "image" - ] + "enum": ["text", "file", "image"] }, "value": { "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false } } ] } }, - "required": [ - "role", - "content" - ], + "required": ["role", "content"], "additionalProperties": false } } @@ -206,12 +176,7 @@ "properties": { "role": { "type": "string", - "enum": [ - "system", - "user", - "assistant", - "tool" - ] + "enum": ["system", "user", "assistant", "tool"] }, "content": { "anyOf": [ @@ -225,30 +190,20 @@ "properties": { "type": { "type": "string", - "enum": [ - "text", - "file", - "image" - ] + "enum": ["text", "file", "image"] }, "value": { "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false } } ] } }, - "required": [ - "role", - "content" - ], + "required": ["role", "content"], "additionalProperties": false } } @@ -292,10 +247,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -369,18 +321,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -417,10 +363,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -515,10 +458,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -569,17 +509,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -590,9 +525,7 @@ "minLength": 1 } }, - "required": [ - "include" - ], + "required": ["include"], "additionalProperties": false }, { @@ -655,9 +588,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -673,10 +604,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -693,10 +621,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -713,18 +638,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -761,20 +681,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -815,12 +726,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -834,12 +740,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -850,9 +751,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -860,12 +759,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -879,12 +773,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -895,10 +784,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -935,10 +821,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -950,11 +833,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -976,26 +855,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -1039,10 +909,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -1086,10 +953,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -1126,10 +990,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -1144,9 +1005,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -1183,10 +1042,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -1218,9 +1074,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -1263,10 +1117,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -1309,10 +1160,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -1349,15 +1197,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -1400,10 +1243,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -1492,10 +1332,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -1505,10 +1342,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -1552,10 +1386,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -1629,18 +1460,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -1677,10 +1502,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -1775,10 +1597,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -1829,17 +1648,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -1850,9 +1664,7 @@ "minLength": 1 } }, - "required": [ - "include" - ], + "required": ["include"], "additionalProperties": false }, { @@ -1915,9 +1727,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -1933,10 +1743,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -1953,10 +1760,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -1973,18 +1777,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -2021,20 +1820,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -2075,12 +1865,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -2094,12 +1879,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -2110,9 +1890,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -2120,12 +1898,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -2139,12 +1912,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -2155,10 +1923,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -2195,10 +1960,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -2210,11 +1972,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -2236,26 +1994,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -2299,10 +2048,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -2346,10 +2092,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -2386,10 +2129,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -2404,9 +2144,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -2443,10 +2181,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -2478,9 +2213,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -2523,10 +2256,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -2569,10 +2299,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -2609,15 +2336,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -2660,10 +2382,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -2752,10 +2471,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -2765,10 +2481,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -2829,10 +2542,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -2906,18 +2616,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -2954,10 +2658,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -3052,10 +2753,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -3106,17 +2804,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -3127,9 +2820,7 @@ "minLength": 1 } }, - "required": [ - "include" - ], + "required": ["include"], "additionalProperties": false }, { @@ -3192,9 +2883,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -3210,10 +2899,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -3230,10 +2916,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -3250,18 +2933,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -3298,20 +2976,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -3352,12 +3021,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -3371,12 +3035,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -3387,9 +3046,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -3397,12 +3054,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -3416,12 +3068,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -3432,10 +3079,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -3472,10 +3116,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -3487,11 +3128,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -3513,26 +3150,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -3576,10 +3204,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -3623,10 +3248,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -3663,10 +3285,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -3681,9 +3300,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -3720,10 +3337,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -3755,9 +3369,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -3800,10 +3412,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -3846,10 +3455,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -3886,15 +3492,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -3937,10 +3538,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -4029,10 +3627,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -4042,10 +3637,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -4089,10 +3681,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -4166,18 +3755,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -4214,10 +3797,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -4312,10 +3892,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -4366,17 +3943,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -4387,9 +3959,7 @@ "minLength": 1 } }, - "required": [ - "include" - ], + "required": ["include"], "additionalProperties": false }, { @@ -4452,9 +4022,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -4470,10 +4038,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -4490,10 +4055,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -4510,18 +4072,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -4558,20 +4115,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -4612,12 +4160,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -4631,12 +4174,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -4647,9 +4185,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -4657,12 +4193,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -4676,12 +4207,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -4692,10 +4218,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -4732,10 +4255,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -4747,11 +4267,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -4773,26 +4289,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -4836,10 +4343,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -4883,10 +4387,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -4923,10 +4424,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -4941,9 +4439,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -4980,10 +4476,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -5015,9 +4508,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -5060,10 +4551,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -5106,10 +4594,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -5146,15 +4631,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -5197,10 +4677,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -5289,10 +4766,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -5302,10 +4776,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -5326,11 +4797,7 @@ }, "strategy": { "type": "string", - "enum": [ - "pass_at_k", - "mean", - "confidence_interval" - ] + "enum": ["pass_at_k", "mean", "confidence_interval"] }, "cost_limit_usd": { "type": "number", @@ -5341,9 +4808,7 @@ "minimum": 0 } }, - "required": [ - "count" - ], + "required": ["count"], "additionalProperties": false }, "total_budget_usd": { @@ -5376,10 +4841,7 @@ }, "isolation": { "type": "string", - "enum": [ - "shared", - "per_test" - ] + "enum": ["shared", "per_test"] }, "repos": { "type": "array", @@ -5403,10 +4865,7 @@ "format": "uri" } }, - "required": [ - "type", - "url" - ], + "required": ["type", "url"], "additionalProperties": false }, { @@ -5420,10 +4879,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false } ] @@ -5440,10 +4896,7 @@ }, "resolve": { "type": "string", - "enum": [ - "remote", - "local" - ] + "enum": ["remote", "local"] }, "ancestor": { "type": "integer", @@ -5507,11 +4960,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -5542,11 +4991,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -5577,11 +5022,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -5612,11 +5053,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -5626,11 +5063,7 @@ }, "mode": { "type": "string", - "enum": [ - "pooled", - "temp", - "static" - ] + "enum": ["pooled", "temp", "static"] }, "path": { "type": "string" @@ -5653,9 +5086,7 @@ "minimum": 0.1 } }, - "required": [ - "image" - ], + "required": ["image"], "additionalProperties": false } }, @@ -5679,17 +5110,11 @@ }, "on_dependency_failure": { "type": "string", - "enum": [ - "skip", - "fail", - "run" - ] + "enum": ["skip", "fail", "run"] }, "mode": { "type": "string", - "enum": [ - "conversation" - ] + "enum": ["conversation"] }, "turns": { "type": "array", @@ -5713,20 +5138,13 @@ "properties": { "type": { "type": "string", - "enum": [ - "text", - "file", - "image" - ] + "enum": ["text", "file", "image"] }, "value": { "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false } } @@ -5751,20 +5169,13 @@ "properties": { "type": { "type": "string", - "enum": [ - "text", - "file", - "image" - ] + "enum": ["text", "file", "image"] }, "value": { "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false } } @@ -5815,10 +5226,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -5892,18 +5300,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -5940,10 +5342,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -6038,10 +5437,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -6092,17 +5488,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -6113,9 +5504,7 @@ "minLength": 1 } }, - "required": [ - "include" - ], + "required": ["include"], "additionalProperties": false }, { @@ -6178,9 +5567,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -6196,10 +5583,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -6216,10 +5600,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -6236,18 +5617,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -6284,10 +5660,7 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", @@ -6338,12 +5711,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -6357,12 +5725,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -6373,9 +5736,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -6383,12 +5744,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -6402,12 +5758,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -6418,10 +5769,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -6458,10 +5806,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -6473,11 +5818,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -6499,26 +5840,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -6562,10 +5894,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -6609,10 +5938,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -6649,10 +5975,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -6667,9 +5990,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -6706,10 +6027,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -6741,9 +6059,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -6786,10 +6102,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -6832,10 +6145,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -6872,15 +6182,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -6923,10 +6228,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -7015,10 +6317,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -7028,10 +6327,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -7040,36 +6336,25 @@ } } }, - "required": [ - "input" - ], + "required": ["input"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "mean", - "min", - "max" - ] + "enum": ["mean", "min", "max"] }, "on_turn_failure": { "type": "string", - "enum": [ - "continue", - "stop" - ] + "enum": ["continue", "stop"] }, "window_size": { "type": "integer", "minimum": 1 } }, - "required": [ - "id" - ], + "required": ["id"], "additionalProperties": false } }, @@ -7104,12 +6389,7 @@ "properties": { "role": { "type": "string", - "enum": [ - "system", - "user", - "assistant", - "tool" - ] + "enum": ["system", "user", "assistant", "tool"] }, "content": { "anyOf": [ @@ -7123,30 +6403,20 @@ "properties": { "type": { "type": "string", - "enum": [ - "text", - "file", - "image" - ] + "enum": ["text", "file", "image"] }, "value": { "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false } } ] } }, - "required": [ - "role", - "content" - ], + "required": ["role", "content"], "additionalProperties": false } } @@ -7174,12 +6444,7 @@ "properties": { "role": { "type": "string", - "enum": [ - "system", - "user", - "assistant", - "tool" - ] + "enum": ["system", "user", "assistant", "tool"] }, "content": { "anyOf": [ @@ -7193,30 +6458,20 @@ "properties": { "type": { "type": "string", - "enum": [ - "text", - "file", - "image" - ] + "enum": ["text", "file", "image"] }, "value": { "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false } } ] } }, - "required": [ - "role", - "content" - ], + "required": ["role", "content"], "additionalProperties": false } } @@ -7260,10 +6515,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -7337,18 +6589,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -7385,10 +6631,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -7483,10 +6726,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -7537,17 +6777,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -7558,9 +6793,7 @@ "minLength": 1 } }, - "required": [ - "include" - ], + "required": ["include"], "additionalProperties": false }, { @@ -7623,9 +6856,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -7641,10 +6872,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -7661,10 +6889,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -7681,18 +6906,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -7729,20 +6949,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -7783,12 +6994,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -7802,12 +7008,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -7818,9 +7019,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -7828,12 +7027,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -7847,12 +7041,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -7863,10 +7052,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -7903,10 +7089,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -7918,11 +7101,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -7944,26 +7123,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -8007,10 +7177,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -8054,10 +7221,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -8094,10 +7258,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -8112,9 +7273,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -8151,10 +7310,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -8186,9 +7342,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -8231,10 +7385,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -8277,10 +7428,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -8317,15 +7465,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -8368,10 +7511,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -8460,10 +7600,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -8473,10 +7610,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -8520,10 +7654,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -8597,18 +7728,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -8645,10 +7770,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -8743,10 +7865,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -8797,17 +7916,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -8818,9 +7932,7 @@ "minLength": 1 } }, - "required": [ - "include" - ], + "required": ["include"], "additionalProperties": false }, { @@ -8883,9 +7995,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -8901,10 +8011,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -8921,10 +8028,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -8941,18 +8045,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -8989,20 +8088,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -9043,12 +8133,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -9062,12 +8147,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -9078,9 +8158,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -9088,12 +8166,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -9107,12 +8180,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -9123,10 +8191,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -9163,10 +8228,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -9178,11 +8240,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -9204,26 +8262,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -9267,10 +8316,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -9314,10 +8360,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -9354,10 +8397,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -9372,9 +8412,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -9411,10 +8449,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -9446,9 +8481,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -9491,10 +8524,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -9537,10 +8567,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -9577,15 +8604,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -9628,10 +8650,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -9720,10 +8739,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -9733,10 +8749,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -9797,10 +8810,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -9874,18 +8884,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -9922,10 +8926,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -10020,10 +9021,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -10074,17 +9072,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -10095,9 +9088,7 @@ "minLength": 1 } }, - "required": [ - "include" - ], + "required": ["include"], "additionalProperties": false }, { @@ -10160,9 +9151,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -10178,10 +9167,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -10198,10 +9184,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -10218,18 +9201,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -10266,20 +9244,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -10320,12 +9289,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -10339,12 +9303,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -10355,9 +9314,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -10365,12 +9322,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -10384,12 +9336,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -10400,10 +9347,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -10440,10 +9384,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -10455,11 +9396,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -10481,26 +9418,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -10544,10 +9472,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -10591,10 +9516,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -10631,10 +9553,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -10649,9 +9568,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -10688,10 +9605,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -10723,9 +9637,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -10768,10 +9680,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -10814,10 +9723,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -10854,15 +9760,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -10905,10 +9806,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -10997,10 +9895,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -11010,10 +9905,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -11057,10 +9949,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -11134,18 +10023,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -11182,10 +10065,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -11280,10 +10160,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -11334,17 +10211,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -11355,9 +10227,7 @@ "minLength": 1 } }, - "required": [ - "include" - ], + "required": ["include"], "additionalProperties": false }, { @@ -11420,9 +10290,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -11438,10 +10306,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -11458,10 +10323,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -11478,18 +10340,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -11526,20 +10383,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -11580,12 +10428,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -11599,12 +10442,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -11615,9 +10453,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -11625,12 +10461,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -11644,12 +10475,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -11660,10 +10486,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -11700,10 +10523,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -11715,11 +10535,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -11741,26 +10557,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -11804,10 +10611,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -11851,10 +10655,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -11891,10 +10692,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -11909,9 +10707,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -11948,10 +10744,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -11983,9 +10776,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -12028,10 +10819,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -12074,10 +10862,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -12114,15 +10899,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -12165,10 +10945,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -12257,10 +11034,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -12270,10 +11044,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -12294,11 +11065,7 @@ }, "strategy": { "type": "string", - "enum": [ - "pass_at_k", - "mean", - "confidence_interval" - ] + "enum": ["pass_at_k", "mean", "confidence_interval"] }, "cost_limit_usd": { "type": "number", @@ -12309,9 +11076,7 @@ "minimum": 0 } }, - "required": [ - "count" - ], + "required": ["count"], "additionalProperties": false }, "total_budget_usd": { @@ -12344,10 +11109,7 @@ }, "isolation": { "type": "string", - "enum": [ - "shared", - "per_test" - ] + "enum": ["shared", "per_test"] }, "repos": { "type": "array", @@ -12371,10 +11133,7 @@ "format": "uri" } }, - "required": [ - "type", - "url" - ], + "required": ["type", "url"], "additionalProperties": false }, { @@ -12388,10 +11147,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false } ] @@ -12408,10 +11164,7 @@ }, "resolve": { "type": "string", - "enum": [ - "remote", - "local" - ] + "enum": ["remote", "local"] }, "ancestor": { "type": "integer", @@ -12475,11 +11228,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -12510,11 +11259,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -12545,11 +11290,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -12580,11 +11321,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -12594,11 +11331,7 @@ }, "mode": { "type": "string", - "enum": [ - "pooled", - "temp", - "static" - ] + "enum": ["pooled", "temp", "static"] }, "path": { "type": "string" @@ -12621,9 +11354,7 @@ "minimum": 0.1 } }, - "required": [ - "image" - ], + "required": ["image"], "additionalProperties": false } }, @@ -12647,17 +11378,11 @@ }, "on_dependency_failure": { "type": "string", - "enum": [ - "skip", - "fail", - "run" - ] + "enum": ["skip", "fail", "run"] }, "mode": { "type": "string", - "enum": [ - "conversation" - ] + "enum": ["conversation"] }, "turns": { "type": "array", @@ -12681,20 +11406,13 @@ "properties": { "type": { "type": "string", - "enum": [ - "text", - "file", - "image" - ] + "enum": ["text", "file", "image"] }, "value": { "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false } } @@ -12719,20 +11437,13 @@ "properties": { "type": { "type": "string", - "enum": [ - "text", - "file", - "image" - ] + "enum": ["text", "file", "image"] }, "value": { "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false } } @@ -12783,10 +11494,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -12860,18 +11568,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -12908,10 +11610,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -13006,10 +11705,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -13060,17 +11756,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -13081,9 +11772,7 @@ "minLength": 1 } }, - "required": [ - "include" - ], + "required": ["include"], "additionalProperties": false }, { @@ -13146,9 +11835,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -13164,10 +11851,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -13184,10 +11868,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -13204,18 +11885,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -13252,10 +11928,7 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", @@ -13306,12 +11979,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -13325,12 +11993,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -13341,9 +12004,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -13351,12 +12012,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -13370,12 +12026,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -13386,10 +12037,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -13426,10 +12074,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -13441,11 +12086,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -13467,26 +12108,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -13530,10 +12162,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -13577,10 +12206,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -13617,10 +12243,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -13635,9 +12258,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -13674,10 +12295,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -13709,9 +12327,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -13754,10 +12370,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -13800,10 +12413,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -13840,15 +12450,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -13891,10 +12496,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -13983,10 +12585,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -13996,10 +12595,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -14008,36 +12604,25 @@ } } }, - "required": [ - "input" - ], + "required": ["input"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "mean", - "min", - "max" - ] + "enum": ["mean", "min", "max"] }, "on_turn_failure": { "type": "string", - "enum": [ - "continue", - "stop" - ] + "enum": ["continue", "stop"] }, "window_size": { "type": "integer", "minimum": 1 } }, - "required": [ - "id" - ], + "required": ["id"], "additionalProperties": false } }, @@ -14104,10 +12689,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -14181,18 +12763,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -14229,10 +12805,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -14327,10 +12900,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -14381,17 +12951,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -14402,9 +12967,7 @@ "minLength": 1 } }, - "required": [ - "include" - ], + "required": ["include"], "additionalProperties": false }, { @@ -14467,9 +13030,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -14485,10 +13046,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -14505,10 +13063,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -14525,18 +13080,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -14573,20 +13123,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -14627,12 +13168,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -14646,12 +13182,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -14662,9 +13193,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -14672,12 +13201,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -14691,12 +13215,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -14707,10 +13226,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -14747,10 +13263,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -14762,11 +13275,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -14788,26 +13297,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -14851,10 +13351,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -14898,10 +13395,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -14938,10 +13432,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -14956,9 +13447,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -14995,10 +13484,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -15030,9 +13516,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -15075,10 +13559,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -15121,10 +13602,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -15161,15 +13639,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -15212,10 +13685,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -15304,10 +13774,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -15317,10 +13784,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -15364,10 +13828,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -15441,18 +13902,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -15489,10 +13944,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -15587,10 +14039,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -15641,17 +14090,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -15662,9 +14106,7 @@ "minLength": 1 } }, - "required": [ - "include" - ], + "required": ["include"], "additionalProperties": false }, { @@ -15727,9 +14169,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -15745,10 +14185,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -15765,10 +14202,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -15785,18 +14219,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -15833,20 +14262,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -15887,12 +14307,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -15906,12 +14321,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -15922,9 +14332,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -15932,12 +14340,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -15951,12 +14354,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -15967,10 +14365,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -16007,10 +14402,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -16022,11 +14414,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -16048,26 +14436,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -16111,10 +14490,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -16158,10 +14534,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -16198,10 +14571,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -16216,9 +14586,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -16255,10 +14623,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -16290,9 +14655,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -16335,10 +14698,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -16381,10 +14741,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -16421,15 +14778,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -16472,10 +14824,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -16564,10 +14913,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -16577,10 +14923,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -16601,11 +14944,7 @@ }, "strategy": { "type": "string", - "enum": [ - "pass_at_k", - "mean", - "confidence_interval" - ] + "enum": ["pass_at_k", "mean", "confidence_interval"] }, "cost_limit_usd": { "type": "number", @@ -16616,9 +14955,7 @@ "minimum": 0 } }, - "required": [ - "count" - ], + "required": ["count"], "additionalProperties": false }, "total_budget_usd": { @@ -16681,10 +15018,7 @@ }, "type": { "type": "string", - "enum": [ - "code-grader", - "code_grader" - ] + "enum": ["code-grader", "code_grader"] }, "command": { "anyOf": [ @@ -16758,18 +15092,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false }, { @@ -16806,10 +15134,7 @@ }, "type": { "type": "string", - "enum": [ - "llm-grader", - "llm_grader" - ] + "enum": ["llm-grader", "llm_grader"] }, "prompt": { "anyOf": [ @@ -16904,10 +15229,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -16958,17 +15280,12 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -16979,9 +15296,7 @@ "minLength": 1 } }, - "required": [ - "include" - ], + "required": ["include"], "additionalProperties": false }, { @@ -17044,9 +15359,7 @@ } } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -17062,10 +15375,7 @@ "maximum": 1 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -17082,10 +15392,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false }, { @@ -17102,18 +15409,13 @@ "type": "string" } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false } ] } }, - "required": [ - "type", - "aggregator" - ], + "required": ["type", "aggregator"], "additionalProperties": false }, { @@ -17150,20 +15452,11 @@ }, "type": { "type": "string", - "enum": [ - "tool-trajectory", - "tool_trajectory" - ] + "enum": ["tool-trajectory", "tool_trajectory"] }, "mode": { "type": "string", - "enum": [ - "any_order", - "in_order", - "exact", - "subset", - "superset" - ] + "enum": ["any_order", "in_order", "exact", "subset", "superset"] }, "minimums": { "type": "object", @@ -17204,12 +15497,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -17223,12 +15511,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -17239,9 +15522,7 @@ ] } }, - "required": [ - "tool" - ], + "required": ["tool"], "additionalProperties": false } }, @@ -17249,12 +15530,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -17268,12 +15544,7 @@ "anyOf": [ { "type": "string", - "enum": [ - "exact", - "ignore", - "subset", - "superset" - ] + "enum": ["exact", "ignore", "subset", "superset"] }, { "type": "array", @@ -17284,10 +15555,7 @@ ] } }, - "required": [ - "type", - "mode" - ], + "required": ["type", "mode"], "additionalProperties": false }, { @@ -17324,10 +15592,7 @@ }, "type": { "type": "string", - "enum": [ - "field-accuracy", - "field_accuracy" - ] + "enum": ["field-accuracy", "field_accuracy"] }, "fields": { "type": "array", @@ -17339,11 +15604,7 @@ }, "match": { "type": "string", - "enum": [ - "exact", - "numeric_tolerance", - "date" - ] + "enum": ["exact", "numeric_tolerance", "date"] }, "required": { "type": "boolean" @@ -17365,26 +15626,17 @@ } } }, - "required": [ - "path", - "match" - ], + "required": ["path", "match"], "additionalProperties": false }, "minItems": 1 }, "aggregation": { "type": "string", - "enum": [ - "weighted_average", - "all_or_nothing" - ] + "enum": ["weighted_average", "all_or_nothing"] } }, - "required": [ - "type", - "fields" - ], + "required": ["type", "fields"], "additionalProperties": false }, { @@ -17428,10 +15680,7 @@ "minimum": 0 } }, - "required": [ - "type", - "threshold" - ], + "required": ["type", "threshold"], "additionalProperties": false }, { @@ -17475,10 +15724,7 @@ "minimum": 0 } }, - "required": [ - "type", - "budget" - ], + "required": ["type", "budget"], "additionalProperties": false }, { @@ -17515,10 +15761,7 @@ }, "type": { "type": "string", - "enum": [ - "token-usage", - "token_usage" - ] + "enum": ["token-usage", "token_usage"] }, "max_total": { "type": "number", @@ -17533,9 +15776,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -17572,10 +15813,7 @@ }, "type": { "type": "string", - "enum": [ - "execution-metrics", - "execution_metrics" - ] + "enum": ["execution-metrics", "execution_metrics"] }, "max_tool_calls": { "type": "number", @@ -17607,9 +15845,7 @@ "minimum": 0 } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -17652,10 +15888,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -17698,10 +15931,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -17738,15 +15968,10 @@ }, "type": { "type": "string", - "enum": [ - "is-json", - "is_json" - ] + "enum": ["is-json", "is_json"] } }, - "required": [ - "type" - ], + "required": ["type"], "additionalProperties": false }, { @@ -17789,10 +16014,7 @@ "type": "string" } }, - "required": [ - "type", - "value" - ], + "required": ["type", "value"], "additionalProperties": false }, { @@ -17881,10 +16103,7 @@ "minLength": 1 } }, - "required": [ - "score_range", - "outcome" - ], + "required": ["score_range", "outcome"], "additionalProperties": false } } @@ -17894,10 +16113,7 @@ "minItems": 1 } }, - "required": [ - "type", - "criteria" - ], + "required": ["type", "criteria"], "additionalProperties": false } ] @@ -17926,10 +16142,7 @@ ] } }, - "required": [ - "type", - "command" - ], + "required": ["type", "command"], "additionalProperties": false } }, @@ -17943,10 +16156,7 @@ }, "isolation": { "type": "string", - "enum": [ - "shared", - "per_test" - ] + "enum": ["shared", "per_test"] }, "repos": { "type": "array", @@ -17970,10 +16180,7 @@ "format": "uri" } }, - "required": [ - "type", - "url" - ], + "required": ["type", "url"], "additionalProperties": false }, { @@ -17987,10 +16194,7 @@ "type": "string" } }, - "required": [ - "type", - "path" - ], + "required": ["type", "path"], "additionalProperties": false } ] @@ -18007,10 +16211,7 @@ }, "resolve": { "type": "string", - "enum": [ - "remote", - "local" - ] + "enum": ["remote", "local"] }, "ancestor": { "type": "integer", @@ -18074,11 +16275,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -18109,11 +16306,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -18144,11 +16337,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -18179,11 +16368,7 @@ }, "reset": { "type": "string", - "enum": [ - "none", - "fast", - "strict" - ] + "enum": ["none", "fast", "strict"] } }, "additionalProperties": false @@ -18193,11 +16378,7 @@ }, "mode": { "type": "string", - "enum": [ - "pooled", - "temp", - "static" - ] + "enum": ["pooled", "temp", "static"] }, "path": { "type": "string" @@ -18220,9 +16401,7 @@ "minimum": 0.1 } }, - "required": [ - "image" - ], + "required": ["image"], "additionalProperties": false } }, @@ -18234,9 +16413,7 @@ ] } }, - "required": [ - "tests" - ], + "required": ["tests"], "additionalProperties": false } } From 333c7ccadfd8845cf5d02ec5cb55855735b61140 Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Mon, 13 Apr 2026 22:30:01 +0000 Subject: [PATCH 3/3] perf(validate): parallelize file validation and cache custom assertion discovery - Validate all files concurrently with Promise.all instead of sequential loop - Cache discoverCustomAssertionTypes() results by directory so N files in the same directory only pay for one filesystem walk (not N) Co-Authored-By: Claude Sonnet 4.6 --- .../src/commands/validate/validate-files.ts | 7 +-- .../evaluation/validation/eval-validator.ts | 54 ++++++++++++------- 2 files changed, 35 insertions(+), 26 deletions(-) diff --git a/apps/cli/src/commands/validate/validate-files.ts b/apps/cli/src/commands/validate/validate-files.ts index 97543bac4..f9a5021ba 100644 --- a/apps/cli/src/commands/validate/validate-files.ts +++ b/apps/cli/src/commands/validate/validate-files.ts @@ -18,12 +18,7 @@ import fg from 'fast-glob'; */ export async function validateFiles(paths: readonly string[]): Promise { const filePaths = await expandPaths(paths); - const results: ValidationResult[] = []; - - for (const filePath of filePaths) { - const result = await validateSingleFile(filePath); - results.push(result); - } + const results = await Promise.all(filePaths.map((filePath) => validateSingleFile(filePath))); const validFiles = results.filter((r) => r.valid).length; const invalidFiles = results.filter((r) => !r.valid).length; diff --git a/packages/core/src/evaluation/validation/eval-validator.ts b/packages/core/src/evaluation/validation/eval-validator.ts index 0c8348ca0..f22f82e15 100644 --- a/packages/core/src/evaluation/validation/eval-validator.ts +++ b/packages/core/src/evaluation/validation/eval-validator.ts @@ -104,34 +104,48 @@ const NAME_PATTERN = /^[a-z0-9-]+$/; /** Script file extensions recognised as custom assertion plugins. */ const ASSERTION_SCRIPT_EXTENSIONS = new Set(['.ts', '.js', '.mts', '.mjs', '.cts', '.cjs']); +/** Cache: directory path → promise of discovered type names. */ +const customAssertionCache = new Map>>(); + /** * Walk up the directory tree from `baseDir` collecting type names from * `.agentv/assertions/` directories — mirrors the runtime discovery in * `assertion-discovery.ts`. + * + * Results are cached by directory so concurrent validation of many files + * in the same directory only does the filesystem walk once. */ -async function discoverCustomAssertionTypes(baseDir: string): Promise> { - const types = new Set(); - let dir = path.resolve(baseDir); - const root = path.parse(dir).root; - - while (dir !== root) { - const assertionsDir = path.join(dir, '.agentv', 'assertions'); - try { - const entries = await readdir(assertionsDir, { withFileTypes: true }); - for (const entry of entries) { - if (!entry.isFile()) continue; - const ext = path.extname(entry.name).toLowerCase(); - if (!ASSERTION_SCRIPT_EXTENSIONS.has(ext)) continue; - const typeName = entry.name.slice(0, -ext.length); - types.add(typeName); +function discoverCustomAssertionTypes(baseDir: string): Promise> { + const resolved = path.resolve(baseDir); + const cached = customAssertionCache.get(resolved); + if (cached) return cached; + + const promise = (async () => { + const types = new Set(); + let dir = resolved; + const root = path.parse(dir).root; + + while (dir !== root) { + const assertionsDir = path.join(dir, '.agentv', 'assertions'); + try { + const entries = await readdir(assertionsDir, { withFileTypes: true }); + for (const entry of entries) { + if (!entry.isFile()) continue; + const ext = path.extname(entry.name).toLowerCase(); + if (!ASSERTION_SCRIPT_EXTENSIONS.has(ext)) continue; + types.add(entry.name.slice(0, -ext.length)); + } + } catch { + // Directory doesn't exist — skip } - } catch { - // Directory doesn't exist — skip + dir = path.dirname(dir); } - dir = path.dirname(dir); - } - return types; + return types; + })(); + + customAssertionCache.set(resolved, promise); + return promise; } function isObject(value: unknown): value is JsonObject {