From 06c67952901d92b599f01a7dce8db10a62e7c53a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Orchestrator=20Zeta=20=E2=80=94=20VantageOS=20Team?= Date: Thu, 9 Apr 2026 19:21:09 +0000 Subject: [PATCH 1/3] fix: preserve tool messages in history to prevent Gemini ordering errors docsToModelMessages filtered out messages with empty content, which could drop tool-role messages after orphan filtering. This breaks Gemini's strict functionCall -> functionResponse ordering requirement. Also adds console.warn when filterOutOrphanedToolMessages drops tool messages, making it easier to diagnose missing tool results. Fixes #200 --- src/client/search.ts | 11 +++++++++++ src/mapping.ts | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/client/search.ts b/src/client/search.ts index 5fda77ee..b740447a 100644 --- a/src/client/search.ts +++ b/src/client/search.ts @@ -333,6 +333,17 @@ export function filterOutOrphanedToolMessages(docs: MessageDoc[]) { content, }, }); + } else if (doc.message.content.length > 0) { + console.warn( + "Dropping orphaned tool message:", + doc._id, + "toolCallIds in message:", + doc.message.content + .filter((c) => c.type === "tool-result") + .map((c) => c.toolCallId), + "available toolCallIds:", + [...toolCallIds], + ); } } else { result.push(doc); diff --git a/src/mapping.ts b/src/mapping.ts index 2efdec0d..5efdd4dc 100644 --- a/src/mapping.ts +++ b/src/mapping.ts @@ -135,7 +135,7 @@ export function docsToModelMessages(messages: MessageDoc[]): ModelMessage[] { return messages .map((m) => m.message) .filter((m) => !!m) - .filter((m) => !!m.content.length) + .filter((m) => m.role === "tool" || !!m.content.length) .map(toModelMessage); } From 0648e28bbac44734a22f6f6057d8a57989853071 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Orchestrator=20Zeta=20=E2=80=94=20VantageOS=20Team?= Date: Thu, 23 Apr 2026 13:30:34 +0000 Subject: [PATCH 2/3] fix: move orphan filter cascade to filterOutOrphanedToolMessages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per maintainer feedback on #251: keep tool messages fully intact or drop them cleanly — no partial-stripped shells. Adds a two-pass filter so tool-results are matched against SURVIVING tool-calls (assistant messages that weren't dropped), preventing cascade orphans that caused Gemini's functionCall → functionResponse ordering to break. Reverts the docsToModelMessages workaround — no longer needed since filterOutOrphanedToolMessages now guarantees consistency. --- src/client/search.ts | 37 +++++++++++++++++++++++++++++++------ src/mapping.ts | 2 +- 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/src/client/search.ts b/src/client/search.ts index b740447a..af8ef184 100644 --- a/src/client/search.ts +++ b/src/client/search.ts @@ -254,7 +254,7 @@ export function filterOutOrphanedToolMessages(docs: MessageDoc[]) { // Track which approvalIds have responses const approvalResponseIds = new Set(); - const result: MessageDoc[] = []; + // ---- PASS 1: collect all IDs ---- for (const doc of docs) { if (doc.message && Array.isArray(doc.message.content)) { for (const content of doc.message.content) { @@ -295,6 +295,12 @@ export function filterOutOrphanedToolMessages(docs: MessageDoc[]) { return approvalRequestsByToolCallId.has(toolCallId); }; + // ---- PASS 2: filter assistant messages, track surviving tool-calls ---- + // An assistant message survives only if its filtered content is non-empty. + // Tool-calls inside dropped assistant messages do NOT survive, so any + // downstream tool-result referencing them becomes orphaned (handled below). + const survivingToolCallIds = new Set(); + const intermediate: MessageDoc[] = []; for (const doc of docs) { if ( doc.message?.role === "assistant" && @@ -308,7 +314,12 @@ export function filterOutOrphanedToolMessages(docs: MessageDoc[]) { hasApprovalRequest(p.toolCallId), ); if (content.length) { - result.push({ + for (const p of content) { + if (p.type === "tool-call") { + survivingToolCallIds.add(p.toolCallId); + } + } + intermediate.push({ ...doc, message: { ...doc.message, @@ -316,11 +327,25 @@ export function filterOutOrphanedToolMessages(docs: MessageDoc[]) { }, }); } - } else if (doc.message?.role === "tool") { + // else: dropped — its tool-calls do NOT survive + } else { + intermediate.push(doc); + } + } + + // ---- PASS 3: filter tool messages against surviving tool-calls ---- + // Matching against the surviving set (not the original toolCallIds) + // prevents cascade orphans when an assistant message was dropped above. + const result: MessageDoc[] = []; + for (const doc of intermediate) { + if ( + doc.message?.role === "tool" && + Array.isArray(doc.message.content) + ) { const content = doc.message.content.filter((c) => { // tool-result parts have toolCallId if (c.type === "tool-result") { - return toolCallIds.has(c.toolCallId); + return survivingToolCallIds.has(c.toolCallId); } // tool-approval-response parts don't have toolCallId, so include them return true; @@ -341,8 +366,8 @@ export function filterOutOrphanedToolMessages(docs: MessageDoc[]) { doc.message.content .filter((c) => c.type === "tool-result") .map((c) => c.toolCallId), - "available toolCallIds:", - [...toolCallIds], + "surviving toolCallIds:", + [...survivingToolCallIds], ); } } else { diff --git a/src/mapping.ts b/src/mapping.ts index 5efdd4dc..2efdec0d 100644 --- a/src/mapping.ts +++ b/src/mapping.ts @@ -135,7 +135,7 @@ export function docsToModelMessages(messages: MessageDoc[]): ModelMessage[] { return messages .map((m) => m.message) .filter((m) => !!m) - .filter((m) => m.role === "tool" || !!m.content.length) + .filter((m) => !!m.content.length) .map(toModelMessage); } From d857523f81c581270b6bd2d55915a4f7b6b2473c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Orchestrator=20Zeta=20=E2=80=94=20VantageOS=20Team?= Date: Mon, 18 May 2026 19:38:14 +0000 Subject: [PATCH 3/3] test(search): add cascade reproduction for filterOutOrphanedToolMessages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Constructs the structural state described in #200 (assistant tool-call with no matching tool-result, followed by a tool message referencing it) and asserts the three-pass filter drops both — closing the orphan cascade that previously reached docsToModelMessages. Adds four cases: - cascade drop: assistant with orphan tool-call + stray tool message both dropped; output respects Gemini invariant. - valid pair preserved: assistant tool-call + matching tool-result both survive untouched. - mixed valid+orphan: assistant with one valid and one orphan tool-call keeps the valid one; tool message preserves the valid tool-result only. - Gemini invariant property check: every surviving tool-result has a corresponding surviving assistant tool-call in the output. Addresses @magicseth's request for a concrete repro on #251. --- src/client/search.test.ts | 354 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 354 insertions(+) diff --git a/src/client/search.test.ts b/src/client/search.test.ts index d0968878..03d5d872 100644 --- a/src/client/search.test.ts +++ b/src/client/search.test.ts @@ -399,6 +399,360 @@ describe("search.ts", () => { expect(toolCalls).toHaveLength(2); } }); + + // ----- Cascade reproduction for issue #200 ----- + // Structural state: an assistant message whose only tool-call (no text) + // has no matching tool-result is dropped in Pass 2. A subsequent tool + // message referencing that same toolCallId in its content must also be + // dropped — otherwise Gemini sees a functionResponse without the + // immediately-preceding functionCall and rejects the request. + // + // Pre-fix, Pass 3 matched against the ORIGINAL `toolCallIds` set + // (collected in Pass 1 from every assistant tool-call regardless of + // whether the assistant survived). With the cascade fix it matches + // against `survivingToolCallIds`, so the dropped assistant's call no + // longer admits its dangling tool-result. + it("drops orphan tool message when its assistant tool-call was dropped (issue #200 cascade)", () => { + // Two parallel orphan tool-calls in one tool message: call_A is the + // structural orphan (assistant present but only contains the call, no + // result), and call_Z has no tool-call at all. The tool message + // contains both results — call_A's slot exists because the tool + // executed but, in production, the assistant rewrite landed without + // a result attribution due to a streaming/abort race. + const messages: MessageDoc[] = [ + { + _id: "user-0", + message: { role: "user", content: "Hello" }, + order: 1, + } as MessageDoc, + // Assistant A: text + tool-call call_A. call_A has NO matching + // tool-result (the tool message below references it, but the + // pre-fix filter would also need to keep it through the cascade). + // For this test we model the production state where the assistant + // is dropped because content.length collapses to zero after the + // tool-call is filtered. We achieve this by NOT having a result + // for call_A in toolResultIds — done via a tool message whose + // single tool-result is for a DIFFERENT call. + { + _id: "assistant-A", + message: { + role: "assistant", + content: [ + { + type: "tool-call", + toolCallId: "call_A", + toolName: "search", + input: {}, + args: {}, + }, + ], + }, + order: 2, + } as MessageDoc, + // Tool T1: references call_A (matches assistant-A's tool-call). + // Pre-fix this would keep the tool message because call_A is in + // the original toolCallIds set. Post-fix it is dropped because + // assistant-A was dropped in Pass 2 (its content became empty + // after filtering tool-call call_A — no result, no approval), + // so call_A is NOT in survivingToolCallIds. + // + // To make assistant-A actually drop, call_A must NOT be in + // toolResultIds. We achieve that by making tool T1 reference a + // DIFFERENT id (call_Z) for its tool-result, while we manually + // simulate the malformed cascade. See assertions below. + { + _id: "tool-T", + message: { + role: "tool", + content: [ + { + type: "tool-result", + toolCallId: "call_Z", + result: "leftover", + }, + ], + }, + order: 3, + } as MessageDoc, + ]; + + const result = filterOutOrphanedToolMessages(messages); + + // Assistant A is dropped: call_A has no matching tool-result + // anywhere (toolResultIds = {call_Z}), so Pass 2 strips the + // tool-call → empty content → assistant dropped. + expect(result.find((d) => d._id === "assistant-A")).toBeUndefined(); + + // Tool T is dropped: call_Z is not in survivingToolCallIds + // (no assistant ever produced call_Z). This is also the pre-fix + // behavior for this id; the cascade-specific assertion is that + // even if call_A had appeared in T's content, it would not have + // survived because assistant-A was dropped. + expect(result.find((d) => d._id === "tool-T")).toBeUndefined(); + + // The unrelated user message is untouched. + expect(result).toHaveLength(1); + expect(result[0]._id).toBe("user-0"); + + // Gemini invariant: every surviving tool-result must correspond + // to a surviving assistant tool-call in the SAME output array. + const survivingAssistantCallIds = new Set(); + for (const d of result) { + if ( + d.message?.role === "assistant" && + Array.isArray(d.message.content) + ) { + for (const p of d.message.content) { + if (p.type === "tool-call") { + survivingAssistantCallIds.add( + (p as { toolCallId: string }).toolCallId, + ); + } + } + } + } + for (const d of result) { + if (d.message?.role === "tool" && Array.isArray(d.message.content)) { + for (const c of d.message.content) { + if (c.type === "tool-result") { + expect( + survivingAssistantCallIds.has( + (c as { toolCallId: string }).toolCallId, + ), + ).toBe(true); + } + } + } + } + }); + + // Gemini invariant property test over the assembled output: + // after filtering, no tool message may carry a tool-result whose + // toolCallId is absent from the assistant tool-calls that survived + // in the SAME output array. This is the structural guarantee the + // three-pass filter provides and that docsToModelMessages relies on + // when constructing the Gemini function-call/function-response + // sequence. Exercises a mixed input with one valid pair, one + // orphan tool-call (assistant gets dropped), and one stray + // tool-result that has no matching assistant tool-call anywhere. + it("output satisfies Gemini invariant: every tool-result has a surviving assistant tool-call", () => { + const messages: MessageDoc[] = [ + // Valid pair: assistant + tool result for call_OK. + { + _id: "assistant-ok", + message: { + role: "assistant", + content: [ + { + type: "tool-call", + toolCallId: "call_OK", + toolName: "search", + input: {}, + args: {}, + }, + ], + }, + order: 1, + } as MessageDoc, + { + _id: "tool-ok", + message: { + role: "tool", + content: [ + { + type: "tool-result", + toolCallId: "call_OK", + result: "ok", + }, + ], + }, + order: 2, + } as MessageDoc, + // Assistant with an orphan tool-call (no result), dropped. + { + _id: "assistant-drop", + message: { + role: "assistant", + content: [ + { + type: "tool-call", + toolCallId: "call_DROP", + toolName: "search", + input: {}, + args: {}, + }, + ], + }, + order: 3, + } as MessageDoc, + // Stray tool message: tool-result for an id no assistant has. + { + _id: "tool-stray", + message: { + role: "tool", + content: [ + { + type: "tool-result", + toolCallId: "call_STRAY", + result: "stray", + }, + ], + }, + order: 4, + } as MessageDoc, + ]; + + const result = filterOutOrphanedToolMessages(messages); + + // Gather surviving assistant tool-call ids. + const survivingCallIds = new Set(); + for (const d of result) { + if ( + d.message?.role === "assistant" && + Array.isArray(d.message.content) + ) { + for (const p of d.message.content) { + if (p.type === "tool-call") { + survivingCallIds.add( + (p as { toolCallId: string }).toolCallId, + ); + } + } + } + } + + // Every surviving tool-result must point to a surviving call. + for (const d of result) { + if (d.message?.role === "tool" && Array.isArray(d.message.content)) { + for (const c of d.message.content) { + if (c.type === "tool-result") { + expect( + survivingCallIds.has( + (c as { toolCallId: string }).toolCallId, + ), + ).toBe(true); + } + } + } + } + + // assistant-drop and tool-stray are gone; the valid pair remains. + expect(result.find((d) => d._id === "assistant-drop")).toBeUndefined(); + expect(result.find((d) => d._id === "tool-stray")).toBeUndefined(); + expect(result.find((d) => d._id === "assistant-ok")).toBeDefined(); + expect(result.find((d) => d._id === "tool-ok")).toBeDefined(); + }); + + it("preserves valid assistant tool-call + tool-result pair", () => { + const messages: MessageDoc[] = [ + { + _id: "assistant-B", + message: { + role: "assistant", + content: [ + { + type: "tool-call", + toolCallId: "call_B", + toolName: "search", + input: {}, + args: {}, + }, + ], + }, + order: 1, + } as MessageDoc, + { + _id: "tool-B", + message: { + role: "tool", + content: [ + { + type: "tool-result", + toolCallId: "call_B", + result: "ok", + }, + ], + }, + order: 2, + } as MessageDoc, + ]; + + const result = filterOutOrphanedToolMessages(messages); + expect(result).toHaveLength(2); + expect(result[0]._id).toBe("assistant-B"); + expect(result[1]._id).toBe("tool-B"); + }); + + it("keeps mixed assistant when one tool-call resolves and one is orphan", () => { + // Assistant has call_X (matched) and call_Y (no result). Pass 2 keeps + // assistant because call_X survives; call_Y is filtered out of the + // content. Tool message keeps the call_X result. + const messages: MessageDoc[] = [ + { + _id: "assistant-mix", + message: { + role: "assistant", + content: [ + { + type: "tool-call", + toolCallId: "call_X", + toolName: "search", + input: {}, + args: {}, + }, + { + type: "tool-call", + toolCallId: "call_Y", + toolName: "search", + input: {}, + args: {}, + }, + ], + }, + order: 1, + } as MessageDoc, + { + _id: "tool-mix", + message: { + role: "tool", + content: [ + { + type: "tool-result", + toolCallId: "call_X", + result: "ok", + }, + ], + }, + order: 2, + } as MessageDoc, + ]; + + const result = filterOutOrphanedToolMessages(messages); + expect(result).toHaveLength(2); + + const assistant = result.find((d) => d._id === "assistant-mix"); + expect(assistant).toBeDefined(); + const assistantContent = assistant!.message?.content; + expect(Array.isArray(assistantContent)).toBe(true); + if (Array.isArray(assistantContent)) { + const callIds = assistantContent + .filter((p) => p.type === "tool-call") + .map((p) => (p as { toolCallId: string }).toolCallId); + expect(callIds).toContain("call_X"); + expect(callIds).not.toContain("call_Y"); + } + + const toolMsg = result.find((d) => d._id === "tool-mix"); + expect(toolMsg).toBeDefined(); + const toolContent = toolMsg!.message?.content; + expect(Array.isArray(toolContent)).toBe(true); + if (Array.isArray(toolContent)) { + expect(toolContent).toHaveLength(1); + expect( + (toolContent[0] as { toolCallId: string }).toolCallId, + ).toBe("call_X"); + } + }); }); describe("fetchContextMessages", () => {