From fd3200ab91b57ffd16317a93b3a9ef8a13120f57 Mon Sep 17 00:00:00 2001
From: Christopher <christso@gmail.com>
Date: Wed, 8 Apr 2026 04:37:57 +0000
Subject: [PATCH 1/2] fix: auto-derive evaluator name from type when not
 provided (#968)

Previously, evaluators without an explicit `name` field were skipped with
a warning for non-assertion types (llm-grader, code-grader, latency, etc.).
Now `generateAssertionName` falls back to the type name itself, so all
evaluator types work without requiring a name.

Also cleans up test warnings: migrates deprecated `required_min_score` to
`min_score` and suppresses intentional skip-behavior console output.

Closes #968

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../evaluation/loaders/evaluator-parser.ts    | 34 +++++--------------
 .../loaders/evaluator-parser.test.ts          | 25 +++++++++++---
 2 files changed, 29 insertions(+), 30 deletions(-)

diff --git a/packages/core/src/evaluation/loaders/evaluator-parser.ts b/packages/core/src/evaluation/loaders/evaluator-parser.ts
index 79385fe81..013f29b69 100644
--- a/packages/core/src/evaluation/loaders/evaluator-parser.ts
+++ b/packages/core/src/evaluation/loaders/evaluator-parser.ts
@@ -183,7 +183,7 @@ async function parseEvaluatorList(
 
     const customTypeName = isCustomType ? typeValue : undefined;
 
-    // Auto-generate name for assertion types if not provided
+    // Auto-generate name from type if not provided
     const name =
       rawName ??
       (isCustomType ? typeValue : generateAssertionName(typeValue as EvaluatorKind, rawEvaluator));
@@ -1539,32 +1539,12 @@ export async function parsePreprocessors(
   return preprocessors;
 }
 
-/** Assertion evaluator types that support auto-generated names. */
-const ASSERTION_TYPES = new Set([
-  'skill-trigger',
-  'contains',
-  'contains-any',
-  'contains-all',
-  'icontains',
-  'icontains-any',
-  'icontains-all',
-  'starts-with',
-  'ends-with',
-  'regex',
-  'is-json',
-  'equals',
-  'rubrics',
-]);
-
 /**
- * Generate a descriptive name for assertion-type evaluators when no explicit name is given.
- * Returns undefined for non-assertion types (those still require an explicit name).
+ * Generate a descriptive name for evaluators when no explicit name is given.
+ * Returns the type name as a fallback so evaluators are never skipped just
+ * because the author omitted `name`.
  */
-function generateAssertionName(typeValue: string, rawEvaluator: JsonObject): string | undefined {
-  if (!ASSERTION_TYPES.has(typeValue)) {
-    return undefined;
-  }
-
+function generateAssertionName(typeValue: string, rawEvaluator: JsonObject): string {
   const value = asString(rawEvaluator.value);
   const arrayValue = Array.isArray(rawEvaluator.value) ? rawEvaluator.value : undefined;
 
@@ -1598,7 +1578,9 @@ function generateAssertionName(typeValue: string, rawEvaluator: JsonObject): str
     case 'rubrics':
       return 'rubrics';
     default:
-      return undefined;
+      // For all other evaluator types (llm-grader, code-grader, latency, etc.),
+      // use the type name itself as the auto-derived name.
+      return typeValue;
   }
 }
 
diff --git a/packages/core/test/evaluation/loaders/evaluator-parser.test.ts b/packages/core/test/evaluation/loaders/evaluator-parser.test.ts
index 2f9248750..98f9482e5 100644
--- a/packages/core/test/evaluation/loaders/evaluator-parser.test.ts
+++ b/packages/core/test/evaluation/loaders/evaluator-parser.test.ts
@@ -1,4 +1,4 @@
-import { afterAll, beforeAll, describe, expect, it } from 'bun:test';
+import { afterAll, beforeAll, describe, expect, it, spyOn } from 'bun:test';
 import { mkdir, rm, writeFile } from 'node:fs/promises';
 import os from 'node:os';
 import path from 'node:path';
@@ -665,7 +665,7 @@ describe('parseEvaluators - kebab-case type normalization', () => {
 });
 
 describe('parseEvaluators - score_ranges rubrics', () => {
-  it('parses valid score_ranges with required_min_score', async () => {
+  it('parses valid score_ranges with min_score', async () => {
     const rawEvalCase = {
       evaluators: [
         {
@@ -675,7 +675,7 @@ describe('parseEvaluators - score_ranges rubrics', () => {
             {
               id: 'accuracy',
               weight: 2.0,
-              required_min_score: 7,
+              min_score: 0.7,
               score_ranges: [
                 { score_range: [0, 3], outcome: 'Incorrect' },
                 { score_range: [4, 6], outcome: 'Partially correct' },
@@ -698,6 +698,7 @@ describe('parseEvaluators - score_ranges rubrics', () => {
       const rubric = config.rubrics?.[0];
       expect(rubric?.id).toBe('accuracy');
       expect(rubric?.weight).toBe(2.0);
+      expect(rubric?.min_score).toBe(0.7);
       expect(rubric?.required_min_score).toBe(7);
       expect(rubric?.score_ranges).toHaveLength(4);
     }
@@ -752,6 +753,7 @@ describe('parseEvaluators - score_ranges rubrics', () => {
   });
 
   it('skips rubric items that use legacy description field without outcome', async () => {
+    const warnSpy = spyOn(console, 'warn').mockImplementation(() => {});
     const rawEvalCase = {
       evaluators: [
         {
@@ -777,6 +779,10 @@ describe('parseEvaluators - score_ranges rubrics', () => {
       // Rubric should be skipped since it has no 'outcome' field
       expect(config.rubrics ?? []).toHaveLength(0);
     }
+    expect(warnSpy).toHaveBeenCalledWith(
+      expect.stringContaining("missing outcome"),
+    );
+    warnSpy.mockRestore();
   });
 });
 
@@ -791,7 +797,7 @@ describe('parseEvaluators - score_ranges shorthand map', () => {
             {
               id: 'accuracy',
               weight: 2.0,
-              required_min_score: 7,
+              min_score: 0.7,
               score_ranges: {
                 0: 'Completely wrong',
                 3: 'Partially correct',
@@ -813,6 +819,7 @@ describe('parseEvaluators - score_ranges shorthand map', () => {
       expect(config.rubrics).toHaveLength(1);
       const rubric = config.rubrics?.[0];
       expect(rubric?.id).toBe('accuracy');
+      expect(rubric?.min_score).toBe(0.7);
       expect(rubric?.required_min_score).toBe(7);
       expect(rubric?.score_ranges).toHaveLength(4);
       expect(rubric?.score_ranges?.[0]).toEqual({
@@ -1430,6 +1437,7 @@ describe('parseEvaluators - type: rubrics with criteria', () => {
   });
 
   it('skips rubrics with empty criteria array', async () => {
+    const warnSpy = spyOn(console, 'warn').mockImplementation(() => {});
     const evaluators = await parseEvaluators(
       {
         assertions: [
@@ -1444,9 +1452,14 @@ describe('parseEvaluators - type: rubrics with criteria', () => {
       'test-1',
     );
     expect(evaluators).toBeUndefined();
+    expect(warnSpy).toHaveBeenCalledWith(
+      expect.stringContaining('criteria must be a non-empty array'),
+    );
+    warnSpy.mockRestore();
   });
 
   it('skips rubrics with missing criteria', async () => {
+    const warnSpy = spyOn(console, 'warn').mockImplementation(() => {});
     const evaluators = await parseEvaluators(
       {
         assertions: [
@@ -1460,6 +1473,10 @@ describe('parseEvaluators - type: rubrics with criteria', () => {
       'test-1',
     );
     expect(evaluators).toBeUndefined();
+    expect(warnSpy).toHaveBeenCalledWith(
+      expect.stringContaining('criteria must be a non-empty array'),
+    );
+    warnSpy.mockRestore();
   });
 
   it('supports string shorthand in criteria', async () => {

From 51388314c48e3d565672dba254454001fb8900d5 Mon Sep 17 00:00:00 2001
From: Christopher <christso@gmail.com>
Date: Wed, 8 Apr 2026 04:39:21 +0000
Subject: [PATCH 2/2] style: fix lint formatting

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../core/test/evaluation/loaders/evaluator-parser.test.ts     | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/packages/core/test/evaluation/loaders/evaluator-parser.test.ts b/packages/core/test/evaluation/loaders/evaluator-parser.test.ts
index 98f9482e5..00a3002b1 100644
--- a/packages/core/test/evaluation/loaders/evaluator-parser.test.ts
+++ b/packages/core/test/evaluation/loaders/evaluator-parser.test.ts
@@ -779,9 +779,7 @@ describe('parseEvaluators - score_ranges rubrics', () => {
       // Rubric should be skipped since it has no 'outcome' field
       expect(config.rubrics ?? []).toHaveLength(0);
     }
-    expect(warnSpy).toHaveBeenCalledWith(
-      expect.stringContaining("missing outcome"),
-    );
+    expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('missing outcome'));
     warnSpy.mockRestore();
   });
 });