From fd3200ab91b57ffd16317a93b3a9ef8a13120f57 Mon Sep 17 00:00:00 2001 From: Christopher Date: Wed, 8 Apr 2026 04:37:57 +0000 Subject: [PATCH 1/2] fix: auto-derive evaluator name from type when not provided (#968) Previously, evaluators without an explicit `name` field were skipped with a warning for non-assertion types (llm-grader, code-grader, latency, etc.). Now `generateAssertionName` falls back to the type name itself, so all evaluator types work without requiring a name. Also cleans up test warnings: migrates deprecated `required_min_score` to `min_score` and suppresses intentional skip-behavior console output. Closes #968 Co-Authored-By: Claude Opus 4.6 (1M context) --- .../evaluation/loaders/evaluator-parser.ts | 34 +++++-------------- .../loaders/evaluator-parser.test.ts | 25 +++++++++++--- 2 files changed, 29 insertions(+), 30 deletions(-) diff --git a/packages/core/src/evaluation/loaders/evaluator-parser.ts b/packages/core/src/evaluation/loaders/evaluator-parser.ts index 79385fe81..013f29b69 100644 --- a/packages/core/src/evaluation/loaders/evaluator-parser.ts +++ b/packages/core/src/evaluation/loaders/evaluator-parser.ts @@ -183,7 +183,7 @@ async function parseEvaluatorList( const customTypeName = isCustomType ? typeValue : undefined; - // Auto-generate name for assertion types if not provided + // Auto-generate name from type if not provided const name = rawName ?? (isCustomType ? typeValue : generateAssertionName(typeValue as EvaluatorKind, rawEvaluator)); @@ -1539,32 +1539,12 @@ export async function parsePreprocessors( return preprocessors; } -/** Assertion evaluator types that support auto-generated names. */ -const ASSERTION_TYPES = new Set([ - 'skill-trigger', - 'contains', - 'contains-any', - 'contains-all', - 'icontains', - 'icontains-any', - 'icontains-all', - 'starts-with', - 'ends-with', - 'regex', - 'is-json', - 'equals', - 'rubrics', -]); - /** - * Generate a descriptive name for assertion-type evaluators when no explicit name is given. - * Returns undefined for non-assertion types (those still require an explicit name). + * Generate a descriptive name for evaluators when no explicit name is given. + * Returns the type name as a fallback so evaluators are never skipped just + * because the author omitted `name`. */ -function generateAssertionName(typeValue: string, rawEvaluator: JsonObject): string | undefined { - if (!ASSERTION_TYPES.has(typeValue)) { - return undefined; - } - +function generateAssertionName(typeValue: string, rawEvaluator: JsonObject): string { const value = asString(rawEvaluator.value); const arrayValue = Array.isArray(rawEvaluator.value) ? rawEvaluator.value : undefined; @@ -1598,7 +1578,9 @@ function generateAssertionName(typeValue: string, rawEvaluator: JsonObject): str case 'rubrics': return 'rubrics'; default: - return undefined; + // For all other evaluator types (llm-grader, code-grader, latency, etc.), + // use the type name itself as the auto-derived name. + return typeValue; } } diff --git a/packages/core/test/evaluation/loaders/evaluator-parser.test.ts b/packages/core/test/evaluation/loaders/evaluator-parser.test.ts index 2f9248750..98f9482e5 100644 --- a/packages/core/test/evaluation/loaders/evaluator-parser.test.ts +++ b/packages/core/test/evaluation/loaders/evaluator-parser.test.ts @@ -1,4 +1,4 @@ -import { afterAll, beforeAll, describe, expect, it } from 'bun:test'; +import { afterAll, beforeAll, describe, expect, it, spyOn } from 'bun:test'; import { mkdir, rm, writeFile } from 'node:fs/promises'; import os from 'node:os'; import path from 'node:path'; @@ -665,7 +665,7 @@ describe('parseEvaluators - kebab-case type normalization', () => { }); describe('parseEvaluators - score_ranges rubrics', () => { - it('parses valid score_ranges with required_min_score', async () => { + it('parses valid score_ranges with min_score', async () => { const rawEvalCase = { evaluators: [ { @@ -675,7 +675,7 @@ describe('parseEvaluators - score_ranges rubrics', () => { { id: 'accuracy', weight: 2.0, - required_min_score: 7, + min_score: 0.7, score_ranges: [ { score_range: [0, 3], outcome: 'Incorrect' }, { score_range: [4, 6], outcome: 'Partially correct' }, @@ -698,6 +698,7 @@ describe('parseEvaluators - score_ranges rubrics', () => { const rubric = config.rubrics?.[0]; expect(rubric?.id).toBe('accuracy'); expect(rubric?.weight).toBe(2.0); + expect(rubric?.min_score).toBe(0.7); expect(rubric?.required_min_score).toBe(7); expect(rubric?.score_ranges).toHaveLength(4); } @@ -752,6 +753,7 @@ describe('parseEvaluators - score_ranges rubrics', () => { }); it('skips rubric items that use legacy description field without outcome', async () => { + const warnSpy = spyOn(console, 'warn').mockImplementation(() => {}); const rawEvalCase = { evaluators: [ { @@ -777,6 +779,10 @@ describe('parseEvaluators - score_ranges rubrics', () => { // Rubric should be skipped since it has no 'outcome' field expect(config.rubrics ?? []).toHaveLength(0); } + expect(warnSpy).toHaveBeenCalledWith( + expect.stringContaining("missing outcome"), + ); + warnSpy.mockRestore(); }); }); @@ -791,7 +797,7 @@ describe('parseEvaluators - score_ranges shorthand map', () => { { id: 'accuracy', weight: 2.0, - required_min_score: 7, + min_score: 0.7, score_ranges: { 0: 'Completely wrong', 3: 'Partially correct', @@ -813,6 +819,7 @@ describe('parseEvaluators - score_ranges shorthand map', () => { expect(config.rubrics).toHaveLength(1); const rubric = config.rubrics?.[0]; expect(rubric?.id).toBe('accuracy'); + expect(rubric?.min_score).toBe(0.7); expect(rubric?.required_min_score).toBe(7); expect(rubric?.score_ranges).toHaveLength(4); expect(rubric?.score_ranges?.[0]).toEqual({ @@ -1430,6 +1437,7 @@ describe('parseEvaluators - type: rubrics with criteria', () => { }); it('skips rubrics with empty criteria array', async () => { + const warnSpy = spyOn(console, 'warn').mockImplementation(() => {}); const evaluators = await parseEvaluators( { assertions: [ @@ -1444,9 +1452,14 @@ describe('parseEvaluators - type: rubrics with criteria', () => { 'test-1', ); expect(evaluators).toBeUndefined(); + expect(warnSpy).toHaveBeenCalledWith( + expect.stringContaining('criteria must be a non-empty array'), + ); + warnSpy.mockRestore(); }); it('skips rubrics with missing criteria', async () => { + const warnSpy = spyOn(console, 'warn').mockImplementation(() => {}); const evaluators = await parseEvaluators( { assertions: [ @@ -1460,6 +1473,10 @@ describe('parseEvaluators - type: rubrics with criteria', () => { 'test-1', ); expect(evaluators).toBeUndefined(); + expect(warnSpy).toHaveBeenCalledWith( + expect.stringContaining('criteria must be a non-empty array'), + ); + warnSpy.mockRestore(); }); it('supports string shorthand in criteria', async () => { From 51388314c48e3d565672dba254454001fb8900d5 Mon Sep 17 00:00:00 2001 From: Christopher Date: Wed, 8 Apr 2026 04:39:21 +0000 Subject: [PATCH 2/2] style: fix lint formatting Co-Authored-By: Claude Opus 4.6 (1M context) --- .../core/test/evaluation/loaders/evaluator-parser.test.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/packages/core/test/evaluation/loaders/evaluator-parser.test.ts b/packages/core/test/evaluation/loaders/evaluator-parser.test.ts index 98f9482e5..00a3002b1 100644 --- a/packages/core/test/evaluation/loaders/evaluator-parser.test.ts +++ b/packages/core/test/evaluation/loaders/evaluator-parser.test.ts @@ -779,9 +779,7 @@ describe('parseEvaluators - score_ranges rubrics', () => { // Rubric should be skipped since it has no 'outcome' field expect(config.rubrics ?? []).toHaveLength(0); } - expect(warnSpy).toHaveBeenCalledWith( - expect.stringContaining("missing outcome"), - ); + expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('missing outcome')); warnSpy.mockRestore(); }); });