diff --git a/packages/core/src/evaluation/providers/pi-cli.ts b/packages/core/src/evaluation/providers/pi-cli.ts index bf8307015..fb1434127 100644 --- a/packages/core/src/evaluation/providers/pi-cli.ts +++ b/packages/core/src/evaluation/providers/pi-cli.ts @@ -638,17 +638,9 @@ function extractMessages(events: unknown[]): readonly Message[] { if (messages) { for (let i = messages.length - 1; i >= 0; i--) { if (messages[i].role === 'assistant' && !messages[i].content) { - // Try to find content from the last message_end event - for (let j = events.length - 1; j >= 0; j--) { - const evt = events[j] as Record | null; - if (!evt || evt.type !== 'message_end') continue; - const msg = evt.message as Record | undefined; - if (msg?.role !== 'assistant') continue; - const text = extractPiTextContent(msg.content); - if (text) { - messages[i] = { ...messages[i], content: text }; - break; - } + const recoveredContent = extractAssistantContentFromEvents(events); + if (recoveredContent) { + messages[i] = { ...messages[i], content: recoveredContent }; } break; } @@ -667,6 +659,42 @@ function extractMessages(events: unknown[]): readonly Message[] { return messages; } +function extractAssistantContentFromEvents(events: unknown[]): string | undefined { + for (let i = events.length - 1; i >= 0; i--) { + const evt = events[i] as Record | null; + if (!evt || typeof evt !== 'object') continue; + + if (evt.type === 'message_end') { + const msg = evt.message as Record | undefined; + if (msg?.role !== 'assistant') continue; + const text = extractPiTextContent(msg.content); + if (text) return text; + continue; + } + + if (evt.type !== 'message_update') continue; + + const msg = evt.message as Record | undefined; + if (msg?.role !== 'assistant') continue; + + const deltaEvent = evt.assistantMessageEvent as Record | undefined; + const partial = deltaEvent?.partial as Record | undefined; + + const partialText = extractPiTextContent(partial?.content); + if (partialText) return partialText; + + if (typeof deltaEvent?.content === 'string' && deltaEvent.content.length > 0) { + return deltaEvent.content; + } + + if (typeof deltaEvent?.delta === 'string' && deltaEvent.delta.length > 0) { + return deltaEvent.delta; + } + } + + return undefined; +} + /** * Scan JSONL events for tool_execution_start / tool_execution_end pairs and * reconstruct ToolCall objects from them. diff --git a/packages/core/test/evaluation/providers/pi-cli-tool-extraction.test.ts b/packages/core/test/evaluation/providers/pi-cli-tool-extraction.test.ts index 84e5d0d6a..4269ffe00 100644 --- a/packages/core/test/evaluation/providers/pi-cli-tool-extraction.test.ts +++ b/packages/core/test/evaluation/providers/pi-cli-tool-extraction.test.ts @@ -228,4 +228,44 @@ describe('pi-cli tool call extraction from events', () => { path: '.agents/skills/csv-analyzer/SKILL.md', }); }); + + it('should recover assistant text from message_update deltas when agent_end content is empty', () => { + const events = [ + { + type: 'message_update', + message: { role: 'assistant', content: [] }, + assistantMessageEvent: { + type: 'text_delta', + contentIndex: 0, + delta: '2 + 2', + partial: { role: 'assistant', content: [{ type: 'text', text: '2 + 2' }] }, + }, + }, + { + type: 'message_update', + message: { role: 'assistant', content: [] }, + assistantMessageEvent: { + type: 'text_delta', + contentIndex: 0, + delta: ' = 4', + partial: { role: 'assistant', content: [{ type: 'text', text: '2 + 2 = 4' }] }, + }, + }, + { + type: 'agent_end', + messages: [ + { role: 'user', content: [{ type: 'text', text: 'What is 2+2?' }] }, + { role: 'assistant', content: [] }, + ], + }, + ]; + + const messages = extractMessages(events); + + expect(messages).toHaveLength(2); + expect(messages[1]).toMatchObject({ + role: 'assistant', + content: '2 + 2 = 4', + }); + }); });