Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions apps/cli/test/commands/eval/pipeline/input.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ describe('pipeline input', () => {
const manifest = JSON.parse(await readFile(join(OUT_DIR, 'manifest.json'), 'utf8'));
expect(manifest.test_ids).toEqual(['test-01']);
expect(manifest.eval_file).toContain('input-test.eval.yaml');
});
}, 30_000);

it('writes per-test input.json with input and input_files', async () => {
const { execa } = await import('execa');
Expand All @@ -30,7 +30,7 @@ describe('pipeline input', () => {
);
expect(input.input).toHaveLength(1);
expect(input.input[0].content).toBe('hello world');
});
}, 30_000);

it('writes code_graders/<name>.json with resolved command', async () => {
const { execa } = await import('execa');
Expand All @@ -44,7 +44,7 @@ describe('pipeline input', () => {
);
expect(grader.command).toBeDefined();
expect(grader.name).toBe('contains_hello');
});
}, 30_000);

it('writes llm_graders/<name>.json with prompt content', async () => {
const { execa } = await import('execa');
Expand All @@ -58,15 +58,15 @@ describe('pipeline input', () => {
);
expect(grader.prompt_content).toBeDefined();
expect(grader.name).toBe('relevance');
});
}, 30_000);

it('writes criteria.md', async () => {
const { execa } = await import('execa');
await execa('bun', [CLI_ENTRY, 'pipeline', 'input', EVAL_PATH, '--out', OUT_DIR]);

const criteria = await readFile(join(OUT_DIR, 'input-test', 'test-01', 'criteria.md'), 'utf8');
expect(criteria).toContain('Response echoes the input');
});
}, 30_000);

it('writes invoke.json', async () => {
const { execa } = await import('execa');
Expand All @@ -76,7 +76,7 @@ describe('pipeline input', () => {
await readFile(join(OUT_DIR, 'input-test', 'test-01', 'invoke.json'), 'utf8'),
);
expect(invoke.kind).toBeDefined();
});
}, 30_000);

it('writes experiment to manifest when --experiment is provided', async () => {
const { execa } = await import('execa');
Expand All @@ -93,15 +93,15 @@ describe('pipeline input', () => {

const manifest = JSON.parse(await readFile(join(OUT_DIR, 'manifest.json'), 'utf8'));
expect(manifest.experiment).toBe('without_skills');
});
}, 30_000);

it('omits experiment from manifest when --experiment is not provided', async () => {
const { execa } = await import('execa');
await execa('bun', [CLI_ENTRY, 'pipeline', 'input', EVAL_PATH, '--out', OUT_DIR]);

const manifest = JSON.parse(await readFile(join(OUT_DIR, 'manifest.json'), 'utf8'));
expect(manifest.experiment).toBeUndefined();
});
}, 30_000);

it('writes code_graders/<name>.json for deterministic assertions', async () => {
const { execa } = await import('execa');
Expand All @@ -127,7 +127,7 @@ describe('pipeline input', () => {
expect(regexGrader.name).toBe('matches_pattern');
expect(regexGrader.type).toBe('regex');
expect(regexGrader.value).toBe('h[aeiou]llo');
});
}, 30_000);

it('falls back to eval file basename for suite directory when name is absent', async () => {
const { execa } = await import('execa');
Expand All @@ -141,5 +141,5 @@ describe('pipeline input', () => {

const manifest = JSON.parse(await readFile(join(OUT_DIR, 'manifest.json'), 'utf8'));
expect(manifest.suite).toBe('no-name');
});
}, 30_000);
});
Loading