From 47e013eda021031f968abe81b5786a6e99dab55d Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Mon, 13 Apr 2026 09:20:48 +1000 Subject: [PATCH 1/4] fix(cli): write transcript jsonl as message lines --- apps/cli/src/commands/eval/artifact-writer.ts | 59 ++++++++----------- .../commands/eval/artifact-writer.test.ts | 54 +++++++++++++++++ 2 files changed, 79 insertions(+), 34 deletions(-) diff --git a/apps/cli/src/commands/eval/artifact-writer.ts b/apps/cli/src/commands/eval/artifact-writer.ts index e2e026bf..a0678ffe 100644 --- a/apps/cli/src/commands/eval/artifact-writer.ts +++ b/apps/cli/src/commands/eval/artifact-writer.ts @@ -5,7 +5,6 @@ import { DEFAULT_THRESHOLD, type EvaluationResult, type EvaluatorResult, - type TranscriptJsonLine, } from '@agentv/core'; import { toSnakeCaseDeep } from '../../utils/case-conversion.js'; import { RESULT_INDEX_FILENAME } from './result-layout.js'; @@ -711,6 +710,30 @@ export async function writeArtifacts( return writeArtifactsFromResults(results, outputDir, options); } +function buildTranscriptMessageLines(results: readonly EvaluationResult[]): string { + const lines: string[] = []; + + for (const result of results) { + const messages = [...(result.input ?? []), ...result.output]; + + for (let index = 0; index < messages.length; index += 1) { + const message = messages[index]; + lines.push( + JSON.stringify( + toSnakeCaseDeep({ + testId: result.testId, + target: result.target, + messageIndex: index, + ...message, + }), + ), + ); + } + } + + return lines.length > 0 ? `${lines.join('\n')}\n` : ''; +} + export async function writeArtifactsFromResults( results: readonly EvaluationResult[], outputDir: string, @@ -773,39 +796,7 @@ export async function writeArtifactsFromResults( // Write transcript JSONL (auto-generated on every eval run) const transcriptPath = path.join(outputDir, 'transcript.jsonl'); - const transcriptLines: TranscriptJsonLine[] = results.map((result) => { - let inputText = ''; - if (typeof result.input === 'string') { - inputText = result.input; - } else if (Array.isArray(result.input)) { - const firstUserMsg = result.input.find((m) => m.role === 'user'); - inputText = typeof firstUserMsg?.content === 'string' ? firstUserMsg.content : ''; - } - return { - input: inputText, - output: result.output, - token_usage: result.tokenUsage - ? { - input: result.tokenUsage.input, - output: result.tokenUsage.output, - cached: result.tokenUsage.cached, - } - : undefined, - duration_ms: result.durationMs, - cost_usd: result.costUsd, - source: { - provider: result.target, - session_id: result.conversationId ?? result.testId, - timestamp: result.timestamp, - }, - }; - }); - await writeFile( - transcriptPath, - transcriptLines.map((line) => JSON.stringify(line)).join('\n') + - (transcriptLines.length ? '\n' : ''), - 'utf8', - ); + await writeFile(transcriptPath, buildTranscriptMessageLines(results), 'utf8'); return { testArtifactDir, timingPath, benchmarkPath, indexPath }; } diff --git a/apps/cli/test/commands/eval/artifact-writer.test.ts b/apps/cli/test/commands/eval/artifact-writer.test.ts index 7aee68e6..b18731e0 100644 --- a/apps/cli/test/commands/eval/artifact-writer.test.ts +++ b/apps/cli/test/commands/eval/artifact-writer.test.ts @@ -674,6 +674,60 @@ describe('writeArtifactsFromResults', () => { expect(timingOne.duration_ms).toBe(0); }); + it('writes transcript.jsonl as one message object per line', async () => { + const results = [ + makeResult({ + testId: 'transcript-case', + target: 'codex', + input: [{ role: 'user' as const, content: 'Inspect artifact output' }], + output: [ + { + role: 'assistant' as const, + content: 'Reading artifact-writer.ts', + toolCalls: [ + { + tool: 'Read', + input: { file_path: 'apps/cli/src/commands/eval/artifact-writer.ts' }, + output: 'file contents', + }, + ], + }, + ], + }), + ]; + + await writeArtifactsFromResults(results, testDir); + + const transcriptLines = (await readFile(path.join(testDir, 'transcript.jsonl'), 'utf8')) + .trim() + .split('\n') + .map((line) => JSON.parse(line)); + + expect(transcriptLines).toEqual([ + { + test_id: 'transcript-case', + target: 'codex', + message_index: 0, + role: 'user', + content: 'Inspect artifact output', + }, + { + test_id: 'transcript-case', + target: 'codex', + message_index: 1, + role: 'assistant', + content: 'Reading artifact-writer.ts', + tool_calls: [ + { + tool: 'Read', + input: { file_path: 'apps/cli/src/commands/eval/artifact-writer.ts' }, + output: 'file contents', + }, + ], + }, + ]); + }); + it('sanitizes test IDs for directory names', async () => { const results = [makeResult({ testId: 'path/to:test*1' })]; await writeArtifactsFromResults(results, testDir); From 8f57dbc3b7bd8ff25c02cfb6e717e0e8c83532da Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Mon, 13 Apr 2026 09:23:12 +1000 Subject: [PATCH 2/4] style(cli): format transcript writer --- apps/cli/src/commands/eval/artifact-writer.ts | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/apps/cli/src/commands/eval/artifact-writer.ts b/apps/cli/src/commands/eval/artifact-writer.ts index a0678ffe..5c2c5d0b 100644 --- a/apps/cli/src/commands/eval/artifact-writer.ts +++ b/apps/cli/src/commands/eval/artifact-writer.ts @@ -1,11 +1,7 @@ import { mkdir, readFile, writeFile } from 'node:fs/promises'; import path from 'node:path'; -import { - DEFAULT_THRESHOLD, - type EvaluationResult, - type EvaluatorResult, -} from '@agentv/core'; +import { DEFAULT_THRESHOLD, type EvaluationResult, type EvaluatorResult } from '@agentv/core'; import { toSnakeCaseDeep } from '../../utils/case-conversion.js'; import { RESULT_INDEX_FILENAME } from './result-layout.js'; From 70c1d28de1e4d766ad36bfe478a3a20a75282f61 Mon Sep 17 00:00:00 2001 From: Christopher Date: Sun, 12 Apr 2026 23:54:48 +0000 Subject: [PATCH 3/4] fix(core): replay grouped transcript message rows --- apps/cli/src/commands/eval/artifact-writer.ts | 41 ++-- apps/cli/src/commands/eval/run-eval.ts | 6 +- apps/cli/src/commands/import/claude.ts | 8 +- apps/cli/src/commands/import/codex.ts | 8 +- apps/cli/src/commands/import/copilot.ts | 8 +- .../commands/eval/artifact-writer.test.ts | 20 ++ packages/core/src/import/index.ts | 4 +- .../core/src/import/transcript-provider.ts | 42 ++-- packages/core/src/import/types.ts | 202 +++++++++++++++--- .../test/import/transcript-provider.test.ts | 88 ++++++++ 10 files changed, 343 insertions(+), 84 deletions(-) create mode 100644 packages/core/test/import/transcript-provider.test.ts diff --git a/apps/cli/src/commands/eval/artifact-writer.ts b/apps/cli/src/commands/eval/artifact-writer.ts index 5c2c5d0b..5813c58e 100644 --- a/apps/cli/src/commands/eval/artifact-writer.ts +++ b/apps/cli/src/commands/eval/artifact-writer.ts @@ -1,7 +1,12 @@ import { mkdir, readFile, writeFile } from 'node:fs/promises'; import path from 'node:path'; -import { DEFAULT_THRESHOLD, type EvaluationResult, type EvaluatorResult } from '@agentv/core'; +import { + DEFAULT_THRESHOLD, + toTranscriptJsonLines, + type EvaluationResult, + type EvaluatorResult, +} from '@agentv/core'; import { toSnakeCaseDeep } from '../../utils/case-conversion.js'; import { RESULT_INDEX_FILENAME } from './result-layout.js'; @@ -710,21 +715,25 @@ function buildTranscriptMessageLines(results: readonly EvaluationResult[]): stri const lines: string[] = []; for (const result of results) { - const messages = [...(result.input ?? []), ...result.output]; - - for (let index = 0; index < messages.length; index += 1) { - const message = messages[index]; - lines.push( - JSON.stringify( - toSnakeCaseDeep({ - testId: result.testId, - target: result.target, - messageIndex: index, - ...message, - }), - ), - ); - } + const transcriptLines = toTranscriptJsonLines( + { + messages: [...(result.input ?? []), ...result.output], + source: { + provider: result.target, + sessionId: result.conversationId ?? result.testId, + startedAt: result.timestamp, + }, + tokenUsage: result.tokenUsage, + durationMs: result.durationMs, + costUsd: result.costUsd, + }, + { + testId: result.testId, + target: result.target, + }, + ); + + lines.push(...transcriptLines.map((line) => JSON.stringify(line))); } return lines.length > 0 ? `${lines.join('\n')}\n` : ''; diff --git a/apps/cli/src/commands/eval/run-eval.ts b/apps/cli/src/commands/eval/run-eval.ts index c5aa81a2..6011c00c 100644 --- a/apps/cli/src/commands/eval/run-eval.ts +++ b/apps/cli/src/commands/eval/run-eval.ts @@ -1260,7 +1260,7 @@ export async function runEvalCommand( // Use only files that survived tag filtering (fileMetadata keys) const activeTestFiles = resolvedTestFiles.filter((f) => fileMetadata.has(f)); - // --transcript: create a shared TranscriptProvider and validate line count + // --transcript: create a shared TranscriptProvider and validate entry count let transcriptProviderFactory: | ((target: import('@agentv/core').ResolvedTarget) => import('@agentv/core').Provider) | undefined; @@ -1268,14 +1268,14 @@ export async function runEvalCommand( const { TranscriptProvider } = await import('@agentv/core'); const transcriptProvider = await TranscriptProvider.fromFile(options.transcript); - // Validate: transcript lines must match total test cases across all files + // Validate: transcript entries must match total test cases across all files const totalTests = [...fileMetadata.values()].reduce( (sum, meta) => sum + meta.testCases.length, 0, ); if (transcriptProvider.lineCount !== totalTests) { throw new Error( - `Transcript has ${transcriptProvider.lineCount} entry(s) but eval defines ${totalTests} test(s). Each transcript line maps positionally to one test case.`, + `Transcript has ${transcriptProvider.lineCount} entr${transcriptProvider.lineCount === 1 ? 'y' : 'ies'} but eval defines ${totalTests} test(s). Each transcript entry maps positionally to one test case.`, ); } diff --git a/apps/cli/src/commands/import/claude.ts b/apps/cli/src/commands/import/claude.ts index 633cbd49..e5975e43 100644 --- a/apps/cli/src/commands/import/claude.ts +++ b/apps/cli/src/commands/import/claude.ts @@ -4,7 +4,7 @@ import { discoverClaudeSessions, parseClaudeSession, readTranscriptFile, - toTranscriptJsonLine, + toTranscriptJsonLines, } from '@agentv/core'; import { command, flag, option, optional, string } from 'cmd-ts'; @@ -94,9 +94,9 @@ export const importClaudeCommand = command({ // Ensure output directory exists await mkdir(path.dirname(outputPath), { recursive: true }); - // Write transcript as JSONL (one line per test case, snake_case wire format) - const jsonLine = toTranscriptJsonLine(transcript); - await writeFile(outputPath, `${JSON.stringify(jsonLine)}\n`, 'utf8'); + // Write transcript as JSONL (one message per line, grouped by test_id) + const jsonLines = toTranscriptJsonLines(transcript); + await writeFile(outputPath, `${jsonLines.map((line) => JSON.stringify(line)).join('\n')}\n`, 'utf8'); const msgCount = transcript.messages.length; const toolCount = transcript.messages.reduce((sum, m) => sum + (m.toolCalls?.length ?? 0), 0); diff --git a/apps/cli/src/commands/import/codex.ts b/apps/cli/src/commands/import/codex.ts index a7d3cd88..367ea900 100644 --- a/apps/cli/src/commands/import/codex.ts +++ b/apps/cli/src/commands/import/codex.ts @@ -4,7 +4,7 @@ import { discoverCodexSessions, parseCodexSession, readTranscriptFile, - toTranscriptJsonLine, + toTranscriptJsonLines, } from '@agentv/core'; import { command, flag, option, optional, string } from 'cmd-ts'; @@ -91,9 +91,9 @@ export const importCodexCommand = command({ // Ensure output directory exists await mkdir(path.dirname(outputPath), { recursive: true }); - // Write transcript as JSONL (snake_case wire format) - const jsonLine = toTranscriptJsonLine(transcript); - await writeFile(outputPath, `${JSON.stringify(jsonLine)}\n`, 'utf8'); + // Write transcript as JSONL (one message per line, grouped by test_id) + const jsonLines = toTranscriptJsonLines(transcript); + await writeFile(outputPath, `${jsonLines.map((line) => JSON.stringify(line)).join('\n')}\n`, 'utf8'); const msgCount = transcript.messages.length; const toolCount = transcript.messages.reduce((sum, m) => sum + (m.toolCalls?.length ?? 0), 0); diff --git a/apps/cli/src/commands/import/copilot.ts b/apps/cli/src/commands/import/copilot.ts index 7377181c..44661ce8 100644 --- a/apps/cli/src/commands/import/copilot.ts +++ b/apps/cli/src/commands/import/copilot.ts @@ -1,6 +1,6 @@ import { mkdir, readFile, writeFile } from 'node:fs/promises'; import path from 'node:path'; -import { discoverCopilotSessions, parseCopilotEvents, toTranscriptJsonLine } from '@agentv/core'; +import { discoverCopilotSessions, parseCopilotEvents, toTranscriptJsonLines } from '@agentv/core'; import { command, flag, option, optional, string } from 'cmd-ts'; export const importCopilotCommand = command({ @@ -99,9 +99,9 @@ export const importCopilotCommand = command({ // Ensure output directory exists await mkdir(path.dirname(outputPath), { recursive: true }); - // Write transcript as JSONL (snake_case wire format) - const jsonLine = toTranscriptJsonLine(transcript); - await writeFile(outputPath, `${JSON.stringify(jsonLine)}\n`, 'utf8'); + // Write transcript as JSONL (one message per line, grouped by test_id) + const jsonLines = toTranscriptJsonLines(transcript); + await writeFile(outputPath, `${jsonLines.map((line) => JSON.stringify(line)).join('\n')}\n`, 'utf8'); const msgCount = transcript.messages.length; const toolCount = transcript.messages.reduce((sum, m) => sum + (m.toolCalls?.length ?? 0), 0); diff --git a/apps/cli/test/commands/eval/artifact-writer.test.ts b/apps/cli/test/commands/eval/artifact-writer.test.ts index b18731e0..26cd097a 100644 --- a/apps/cli/test/commands/eval/artifact-writer.test.ts +++ b/apps/cli/test/commands/eval/artifact-writer.test.ts @@ -679,6 +679,10 @@ describe('writeArtifactsFromResults', () => { makeResult({ testId: 'transcript-case', target: 'codex', + conversationId: 'session-123', + durationMs: 4200, + costUsd: 0.25, + tokenUsage: { input: 100, output: 40, cached: 10, reasoning: 5 }, input: [{ role: 'user' as const, content: 'Inspect artifact output' }], output: [ { @@ -710,6 +714,14 @@ describe('writeArtifactsFromResults', () => { message_index: 0, role: 'user', content: 'Inspect artifact output', + transcript_token_usage: { input: 100, output: 40, cached: 10, reasoning: 5 }, + transcript_duration_ms: 4200, + transcript_cost_usd: 0.25, + source: { + provider: 'codex', + session_id: 'session-123', + timestamp: '2026-03-13T00:00:00.000Z', + }, }, { test_id: 'transcript-case', @@ -724,6 +736,14 @@ describe('writeArtifactsFromResults', () => { output: 'file contents', }, ], + transcript_token_usage: { input: 100, output: 40, cached: 10, reasoning: 5 }, + transcript_duration_ms: 4200, + transcript_cost_usd: 0.25, + source: { + provider: 'codex', + session_id: 'session-123', + timestamp: '2026-03-13T00:00:00.000Z', + }, }, ]); }); diff --git a/packages/core/src/import/index.ts b/packages/core/src/import/index.ts index 664ef534..4170f412 100644 --- a/packages/core/src/import/index.ts +++ b/packages/core/src/import/index.ts @@ -12,11 +12,13 @@ export { } from './session-discovery.js'; export { TranscriptProvider } from './transcript-provider.js'; export { + groupTranscriptJsonLines, readTranscriptFile, readTranscriptJsonl, - toTranscriptJsonLine, + toTranscriptJsonLines, type TranscriptEntry, type TranscriptJsonLine, + type TranscriptReplayEntry, type TranscriptSource, } from './types.js'; diff --git a/packages/core/src/import/transcript-provider.ts b/packages/core/src/import/transcript-provider.ts index b1c43f85..2437c1f3 100644 --- a/packages/core/src/import/transcript-provider.ts +++ b/packages/core/src/import/transcript-provider.ts @@ -15,21 +15,21 @@ */ import type { Provider, ProviderRequest, ProviderResponse } from '../evaluation/providers/types.js'; -import type { TranscriptJsonLine } from './types.js'; -import { readTranscriptJsonl } from './types.js'; +import type { TranscriptReplayEntry } from './types.js'; +import { groupTranscriptJsonLines, readTranscriptJsonl } from './types.js'; export class TranscriptProvider implements Provider { readonly id: string; readonly kind = 'transcript' as const; readonly targetName: string; - private lines: TranscriptJsonLine[]; + private entries: TranscriptReplayEntry[]; private cursor = 0; - constructor(targetName: string, lines: TranscriptJsonLine[]) { + constructor(targetName: string, entries: TranscriptReplayEntry[]) { this.targetName = targetName; this.id = `transcript:${targetName}`; - this.lines = lines; + this.entries = entries; } /** @@ -40,36 +40,38 @@ export class TranscriptProvider implements Provider { if (lines.length === 0) { throw new Error(`Transcript file is empty: ${filePath}`); } - const providerName = lines[0].source.provider ?? 'transcript'; - return new TranscriptProvider(providerName, lines); + const entries = groupTranscriptJsonLines(lines); + const providerName = entries[0]?.source.provider ?? 'transcript'; + return new TranscriptProvider(providerName, entries); } get lineCount(): number { - return this.lines.length; + return this.entries.length; } async invoke(_request: ProviderRequest): Promise { - if (this.cursor >= this.lines.length) { + if (this.cursor >= this.entries.length) { throw new Error( - `Transcript exhausted: ${this.lines.length} line(s) available but ` + - `${this.cursor + 1} invocations attempted. Each transcript line maps to one test case.`, + `Transcript exhausted: ${this.entries.length} entr${this.entries.length === 1 ? 'y' : 'ies'} available but ` + + `${this.cursor + 1} invocations attempted. Each transcript entry maps to one test case.`, ); } - const line = this.lines[this.cursor++]; + const entry = this.entries[this.cursor++]; return { - output: line.output, - tokenUsage: line.token_usage + output: entry.messages, + tokenUsage: entry.tokenUsage ? { - input: line.token_usage.input, - output: line.token_usage.output, - cached: line.token_usage.cached, + input: entry.tokenUsage.input, + output: entry.tokenUsage.output, + cached: entry.tokenUsage.cached, + reasoning: entry.tokenUsage.reasoning, } : undefined, - durationMs: line.duration_ms, - costUsd: line.cost_usd ?? undefined, - startTime: line.source.timestamp, + durationMs: entry.durationMs, + costUsd: entry.costUsd ?? undefined, + startTime: entry.source.startedAt, }; } } diff --git a/packages/core/src/import/types.ts b/packages/core/src/import/types.ts index 109fa3f1..4a69f594 100644 --- a/packages/core/src/import/types.ts +++ b/packages/core/src/import/types.ts @@ -7,15 +7,16 @@ * * Flow: * raw session JSONL → parser → TranscriptEntry (internal) - * TranscriptEntry → toTranscriptJsonLine() → JSONL on disk + * TranscriptEntry → toTranscriptJsonLines() → JSONL on disk * JSONL on disk → readTranscriptJsonl() → TranscriptJsonLine[] * * To add a new importer: write a parser that returns TranscriptEntry, - * then use toTranscriptJsonLine() to serialize. + * then use toTranscriptJsonLines() to serialize. */ import { readFile } from 'node:fs/promises'; +import { toCamelCaseDeep, toSnakeCaseDeep } from '../evaluation/case-conversion.js'; import type { Message, ProviderTokenUsage } from '../evaluation/providers/types.js'; /** @@ -46,20 +47,35 @@ export interface TranscriptSource { /** * One line in a transcript JSONL file (snake_case wire format). * - * Each line is a self-contained test case with pre-populated output. - * The `input` field is the first user message; the `output` field is the - * full conversation (Message[]). + * Each line captures one message within an ordered per-test transcript. + * Consumers group all rows with the same `test_id` into a replayable session. */ export interface TranscriptJsonLine { - readonly input: string; - readonly output: readonly Message[]; + readonly test_id: string; + readonly target: string; + readonly message_index: number; + readonly role: string; + readonly name?: string; + readonly content?: unknown; + readonly tool_calls?: readonly Record[]; + readonly start_time?: string; + readonly end_time?: string; + readonly duration_ms?: number; + readonly metadata?: Record; readonly token_usage?: { readonly input: number; readonly output: number; readonly cached?: number; + readonly reasoning?: number; }; - readonly duration_ms?: number; - readonly cost_usd?: number | null; + readonly transcript_token_usage?: { + readonly input: number; + readonly output: number; + readonly cached?: number; + readonly reasoning?: number; + }; + readonly transcript_duration_ms?: number; + readonly transcript_cost_usd?: number | null; readonly source: { readonly provider: string; readonly session_id: string; @@ -72,34 +88,156 @@ export interface TranscriptJsonLine { } /** - * Convert a parsed TranscriptEntry to the on-disk JSONL wire format. + * Grouped replayable transcript reconstructed from per-message rows. */ -export function toTranscriptJsonLine(entry: TranscriptEntry): TranscriptJsonLine { - const firstUserMessage = entry.messages.find((m) => m.role === 'user'); - const input = typeof firstUserMessage?.content === 'string' ? firstUserMessage.content : ''; +export interface TranscriptReplayEntry { + readonly testId: string; + readonly target: string; + readonly messages: readonly Message[]; + readonly tokenUsage?: ProviderTokenUsage; + readonly durationMs?: number; + readonly costUsd?: number | null; + readonly source: TranscriptSource; +} + +/** + * Convert a parsed TranscriptEntry to per-message JSONL rows. + */ +export function toTranscriptJsonLines( + entry: TranscriptEntry, + options?: { testId?: string; target?: string }, +): TranscriptJsonLine[] { + const source = { + provider: entry.source.provider, + session_id: entry.source.sessionId, + model: entry.source.model, + timestamp: entry.source.startedAt, + git_branch: entry.source.gitBranch, + cwd: entry.source.cwd ?? entry.source.projectPath, + version: entry.source.version, + }; + const transcriptTokenUsage = entry.tokenUsage + ? { + input: entry.tokenUsage.input, + output: entry.tokenUsage.output, + cached: entry.tokenUsage.cached, + reasoning: entry.tokenUsage.reasoning, + } + : undefined; + const testId = options?.testId ?? entry.source.sessionId; + const target = options?.target ?? entry.source.provider; + + return entry.messages.map((message, index) => ({ + test_id: testId, + target, + message_index: index, + ...(toSnakeCaseDeep(message) as Omit< + TranscriptJsonLine, + | 'test_id' + | 'target' + | 'message_index' + | 'source' + | 'transcript_token_usage' + | 'transcript_duration_ms' + | 'transcript_cost_usd' + >), + transcript_token_usage: transcriptTokenUsage, + transcript_duration_ms: entry.durationMs, + transcript_cost_usd: entry.costUsd, + source, + })); +} + +function buildReplayMessage(line: TranscriptJsonLine): Message { + const camelCased = toCamelCaseDeep(line) as { + role: string; + name?: string; + content?: Message['content']; + toolCalls?: Message['toolCalls']; + startTime?: string; + endTime?: string; + durationMs?: number; + metadata?: Record; + tokenUsage?: ProviderTokenUsage; + }; return { - input, - output: entry.messages, - token_usage: entry.tokenUsage + role: camelCased.role, + name: camelCased.name, + content: camelCased.content, + toolCalls: camelCased.toolCalls, + startTime: camelCased.startTime, + endTime: camelCased.endTime, + durationMs: camelCased.durationMs, + metadata: camelCased.metadata, + tokenUsage: camelCased.tokenUsage, + }; +} + +/** + * Group per-message transcript rows back into replayable conversations. + */ +export function groupTranscriptJsonLines( + lines: readonly TranscriptJsonLine[], +): TranscriptReplayEntry[] { + const grouped = new Map< + string, + { + target: string; + tokenUsage?: ProviderTokenUsage; + durationMs?: number; + costUsd?: number | null; + source: TranscriptSource; + messages: { index: number; message: Message }[]; + } + >(); + + for (const line of lines) { + const existing = grouped.get(line.test_id); + const source: TranscriptSource = { + provider: line.source.provider, + sessionId: line.source.session_id, + startedAt: line.source.timestamp, + model: line.source.model, + gitBranch: line.source.git_branch, + cwd: line.source.cwd, + version: line.source.version, + }; + const transcriptTokenUsage = line.transcript_token_usage ? { - input: entry.tokenUsage.input, - output: entry.tokenUsage.output, - cached: entry.tokenUsage.cached, + input: line.transcript_token_usage.input, + output: line.transcript_token_usage.output, + cached: line.transcript_token_usage.cached, + reasoning: line.transcript_token_usage.reasoning, } - : undefined, - duration_ms: entry.durationMs, - cost_usd: entry.costUsd, - source: { - provider: entry.source.provider, - session_id: entry.source.sessionId, - model: entry.source.model, - timestamp: entry.source.startedAt, - git_branch: entry.source.gitBranch, - cwd: entry.source.cwd ?? entry.source.projectPath, - version: entry.source.version, - }, - }; + : undefined; + + if (existing) { + existing.messages.push({ index: line.message_index, message: buildReplayMessage(line) }); + continue; + } + + grouped.set(line.test_id, { + target: line.target, + tokenUsage: transcriptTokenUsage, + durationMs: line.transcript_duration_ms, + costUsd: line.transcript_cost_usd, + source, + messages: [{ index: line.message_index, message: buildReplayMessage(line) }], + }); + } + + return [...grouped.entries()].map(([testId, entry]) => ({ + testId, + target: entry.target, + tokenUsage: entry.tokenUsage, + durationMs: entry.durationMs, + costUsd: entry.costUsd, + source: entry.source, + messages: entry.messages + .sort((first, second) => first.index - second.index) + .map((item) => item.message), + })); } /** diff --git a/packages/core/test/import/transcript-provider.test.ts b/packages/core/test/import/transcript-provider.test.ts new file mode 100644 index 00000000..0b38ea91 --- /dev/null +++ b/packages/core/test/import/transcript-provider.test.ts @@ -0,0 +1,88 @@ +import { afterEach, describe, expect, it } from 'bun:test'; +import { mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import path from 'node:path'; + +import { TranscriptProvider, toTranscriptJsonLines, type TranscriptEntry } from '../../src/index.js'; + +describe('TranscriptProvider', () => { + const tempDirs: string[] = []; + + afterEach(async () => { + await Promise.all(tempDirs.map((dir) => rm(dir, { recursive: true, force: true }))); + tempDirs.length = 0; + }); + + it('groups per-message transcript rows into one replay entry per test', async () => { + const dir = await mkdtemp(path.join(tmpdir(), 'agentv-transcript-provider-')); + tempDirs.push(dir); + const transcriptPath = path.join(dir, 'transcript.jsonl'); + + const transcript: TranscriptEntry = { + messages: [ + { role: 'user', content: 'Inspect the repository' }, + { + role: 'assistant', + content: 'Opening the relevant files now.', + toolCalls: [{ tool: 'read_file', input: { path: 'README.md' }, output: 'contents' }], + }, + ], + source: { + provider: 'codex', + sessionId: 'session-abc', + startedAt: '2026-03-13T00:00:00.000Z', + model: 'gpt-5.4', + }, + tokenUsage: { input: 120, output: 45, cached: 12, reasoning: 6 }, + durationMs: 3200, + costUsd: 0.0125, + }; + + const lines = toTranscriptJsonLines(transcript, { + testId: 'case-1', + target: 'offline-codex', + }); + await writeFile(transcriptPath, `${lines.map((line) => JSON.stringify(line)).join('\n')}\n`, 'utf8'); + + const provider = await TranscriptProvider.fromFile(transcriptPath); + expect(provider.lineCount).toBe(1); + expect(provider.targetName).toBe('codex'); + + const response = await provider.invoke({ question: 'ignored' }); + expect(response.output).toEqual(transcript.messages); + expect(response.tokenUsage).toEqual({ input: 120, output: 45, cached: 12, reasoning: 6 }); + expect(response.durationMs).toBe(3200); + expect(response.costUsd).toBe(0.0125); + expect(response.startTime).toBe('2026-03-13T00:00:00.000Z'); + }); + + it('counts distinct test transcripts instead of raw JSONL rows', async () => { + const dir = await mkdtemp(path.join(tmpdir(), 'agentv-transcript-provider-')); + tempDirs.push(dir); + const transcriptPath = path.join(dir, 'transcript.jsonl'); + + const first = toTranscriptJsonLines({ + messages: [ + { role: 'user', content: 'First task' }, + { role: 'assistant', content: 'First answer' }, + ], + source: { provider: 'claude', sessionId: 'one' }, + }); + const second = toTranscriptJsonLines({ + messages: [ + { role: 'user', content: 'Second task' }, + { role: 'assistant', content: 'Second answer' }, + ], + source: { provider: 'claude', sessionId: 'two' }, + }); + + await writeFile( + transcriptPath, + `${[...first, ...second].map((line) => JSON.stringify(line)).join('\n')}\n`, + 'utf8', + ); + + const provider = await TranscriptProvider.fromFile(transcriptPath); + expect(provider.lineCount).toBe(2); + }); +}); From 185660d7ac47a07f0763c45302112af50f128e4f Mon Sep 17 00:00:00 2001 From: Christopher Date: Sun, 12 Apr 2026 23:56:07 +0000 Subject: [PATCH 4/4] style: satisfy transcript migration lint --- apps/cli/src/commands/eval/artifact-writer.ts | 2 +- apps/cli/src/commands/import/claude.ts | 6 +++++- apps/cli/src/commands/import/codex.ts | 6 +++++- apps/cli/src/commands/import/copilot.ts | 6 +++++- .../core/test/import/transcript-provider.test.ts | 12 ++++++++++-- 5 files changed, 26 insertions(+), 6 deletions(-) diff --git a/apps/cli/src/commands/eval/artifact-writer.ts b/apps/cli/src/commands/eval/artifact-writer.ts index 5813c58e..c7844ccf 100644 --- a/apps/cli/src/commands/eval/artifact-writer.ts +++ b/apps/cli/src/commands/eval/artifact-writer.ts @@ -3,9 +3,9 @@ import path from 'node:path'; import { DEFAULT_THRESHOLD, - toTranscriptJsonLines, type EvaluationResult, type EvaluatorResult, + toTranscriptJsonLines, } from '@agentv/core'; import { toSnakeCaseDeep } from '../../utils/case-conversion.js'; import { RESULT_INDEX_FILENAME } from './result-layout.js'; diff --git a/apps/cli/src/commands/import/claude.ts b/apps/cli/src/commands/import/claude.ts index e5975e43..aca1a6a3 100644 --- a/apps/cli/src/commands/import/claude.ts +++ b/apps/cli/src/commands/import/claude.ts @@ -96,7 +96,11 @@ export const importClaudeCommand = command({ // Write transcript as JSONL (one message per line, grouped by test_id) const jsonLines = toTranscriptJsonLines(transcript); - await writeFile(outputPath, `${jsonLines.map((line) => JSON.stringify(line)).join('\n')}\n`, 'utf8'); + await writeFile( + outputPath, + `${jsonLines.map((line) => JSON.stringify(line)).join('\n')}\n`, + 'utf8', + ); const msgCount = transcript.messages.length; const toolCount = transcript.messages.reduce((sum, m) => sum + (m.toolCalls?.length ?? 0), 0); diff --git a/apps/cli/src/commands/import/codex.ts b/apps/cli/src/commands/import/codex.ts index 367ea900..13305a02 100644 --- a/apps/cli/src/commands/import/codex.ts +++ b/apps/cli/src/commands/import/codex.ts @@ -93,7 +93,11 @@ export const importCodexCommand = command({ // Write transcript as JSONL (one message per line, grouped by test_id) const jsonLines = toTranscriptJsonLines(transcript); - await writeFile(outputPath, `${jsonLines.map((line) => JSON.stringify(line)).join('\n')}\n`, 'utf8'); + await writeFile( + outputPath, + `${jsonLines.map((line) => JSON.stringify(line)).join('\n')}\n`, + 'utf8', + ); const msgCount = transcript.messages.length; const toolCount = transcript.messages.reduce((sum, m) => sum + (m.toolCalls?.length ?? 0), 0); diff --git a/apps/cli/src/commands/import/copilot.ts b/apps/cli/src/commands/import/copilot.ts index 44661ce8..0915693f 100644 --- a/apps/cli/src/commands/import/copilot.ts +++ b/apps/cli/src/commands/import/copilot.ts @@ -101,7 +101,11 @@ export const importCopilotCommand = command({ // Write transcript as JSONL (one message per line, grouped by test_id) const jsonLines = toTranscriptJsonLines(transcript); - await writeFile(outputPath, `${jsonLines.map((line) => JSON.stringify(line)).join('\n')}\n`, 'utf8'); + await writeFile( + outputPath, + `${jsonLines.map((line) => JSON.stringify(line)).join('\n')}\n`, + 'utf8', + ); const msgCount = transcript.messages.length; const toolCount = transcript.messages.reduce((sum, m) => sum + (m.toolCalls?.length ?? 0), 0); diff --git a/packages/core/test/import/transcript-provider.test.ts b/packages/core/test/import/transcript-provider.test.ts index 0b38ea91..7bba6f38 100644 --- a/packages/core/test/import/transcript-provider.test.ts +++ b/packages/core/test/import/transcript-provider.test.ts @@ -3,7 +3,11 @@ import { mkdtemp, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import path from 'node:path'; -import { TranscriptProvider, toTranscriptJsonLines, type TranscriptEntry } from '../../src/index.js'; +import { + type TranscriptEntry, + TranscriptProvider, + toTranscriptJsonLines, +} from '../../src/index.js'; describe('TranscriptProvider', () => { const tempDirs: string[] = []; @@ -42,7 +46,11 @@ describe('TranscriptProvider', () => { testId: 'case-1', target: 'offline-codex', }); - await writeFile(transcriptPath, `${lines.map((line) => JSON.stringify(line)).join('\n')}\n`, 'utf8'); + await writeFile( + transcriptPath, + `${lines.map((line) => JSON.stringify(line)).join('\n')}\n`, + 'utf8', + ); const provider = await TranscriptProvider.fromFile(transcriptPath); expect(provider.lineCount).toBe(1);