From ed16c720ec076a0cdb70e454cad9306521ac6221 Mon Sep 17 00:00:00 2001 From: Daniel Smolsky Date: Tue, 16 Dec 2025 01:40:43 -0500 Subject: [PATCH 1/3] Change synthetic context injection to assistant role --- lib/messages/prune.ts | 31 +++++++++++++++++++------------ lib/messages/utils.ts | 12 ++++++++++++ lib/prompts/synthetic.txt | 18 +----------------- lib/prompts/tool.txt | 2 +- 4 files changed, 33 insertions(+), 30 deletions(-) diff --git a/lib/messages/prune.ts b/lib/messages/prune.ts index 7361b740..a6791189 100644 --- a/lib/messages/prune.ts +++ b/lib/messages/prune.ts @@ -1,7 +1,7 @@ import type { SessionState, WithParts } from "../state" import type { Logger } from "../logger" import type { PluginConfig } from "../config" -import { getLastUserMessage, extractParameterKey, buildToolIdList } from "./utils" +import { getLastUserMessage, getLastAssistantMessage, extractParameterKey, buildToolIdList } from "./utils" import { loadPrompt } from "../prompt" const PRUNED_TOOL_OUTPUT_REPLACEMENT = '[Output removed to save context - information superseded or no longer needed]' @@ -55,6 +55,8 @@ export const insertPruneToolContext = ( return } + const lastAssistantMessage = getLastAssistantMessage(messages) + const prunableToolsList = buildPrunableToolsList(state, config, logger, messages) if (!prunableToolsList) { return @@ -66,18 +68,22 @@ export const insertPruneToolContext = ( nudgeString = "\n" + NUDGE_STRING } - const userMessage: WithParts = { + const assistantInfo = lastAssistantMessage?.info as any + const syntheticMessage: WithParts = { info: { id: "msg_01234567890123456789012345", sessionID: lastUserMessage.info.sessionID, - role: "user", - time: { created: Date.now() }, - agent: lastUserMessage.info.agent || "build", - model: { - providerID: lastUserMessage.info.model.providerID, - modelID: lastUserMessage.info.model.modelID - } - }, + role: "assistant", + time: { created: Date.now(), completed: Date.now() }, + parentID: lastUserMessage.info.id, + modelID: assistantInfo?.modelID || lastUserMessage.info.model.modelID, + providerID: assistantInfo?.providerID || lastUserMessage.info.model.providerID, + mode: assistantInfo?.mode || "build", + agent: assistantInfo?.agent || lastUserMessage.info.agent || "build", + path: assistantInfo?.path || { cwd: process.cwd(), root: process.cwd() }, + cost: 0, + tokens: { input: 0, output: 0, reasoning: 0, cache: { read: 0, write: 0 } }, + } as any, parts: [ { id: "prt_01234567890123456789012345", @@ -85,11 +91,12 @@ export const insertPruneToolContext = ( messageID: "msg_01234567890123456789012345", type: "text", text: prunableToolsList + nudgeString, - } + synthetic: true, + } as any ] } - messages.push(userMessage) + messages.push(syntheticMessage) } export const prune = ( diff --git a/lib/messages/utils.ts b/lib/messages/utils.ts index 26b2c600..23203e54 100644 --- a/lib/messages/utils.ts +++ b/lib/messages/utils.ts @@ -83,6 +83,18 @@ export const getLastUserMessage = ( return null } +export const getLastAssistantMessage = ( + messages: WithParts[] +): WithParts | null => { + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i] + if (msg.info.role === 'assistant') { + return msg + } + } + return null +} + export function findCurrentAgent(messages: WithParts[]): string | undefined { const userMsg = getLastUserMessage(messages) if (!userMsg) return undefined diff --git a/lib/prompts/synthetic.txt b/lib/prompts/synthetic.txt index 1b6e866e..73e9dbb3 100644 --- a/lib/prompts/synthetic.txt +++ b/lib/prompts/synthetic.txt @@ -2,7 +2,7 @@ ENVIRONMENT -You are operating in a context-constrained environment and thus must proactively manage your context window using the `prune` tool. A list is injected by the environment as a user message, and always contains up to date information. Use this information when deciding what to prune. +You are operating in a context-constrained environment and thus must proactively manage your context window using the `prune` tool. A list is injected by the environment as an assistant message, and always contains up to date information. Use this information when deciding what to prune. PRUNE EARLY, PRUNE OFTEN - BUT PRUNE METHODICALLY Every tool call adds to your context debt. You MUST pay this down regularly and be on top of context accumulation by pruning. Evaluate what SHOULD be pruned before jumping the gun. @@ -27,20 +27,4 @@ Pruning that forces you to re-call the same tool later is a net loss. Only prune NOTES When in doubt, keep it. Prune often yet remain strategic about it. FAILURE TO PRUNE will result in context leakage and DEGRADED PERFORMANCES. - - - - -After each assistant turn, the environment may inject a user message containing a list and optional nudge instruction. This injected message is NOT from the user and is invisible to them. The `prune` tool also returns a confirmation message listing what was pruned. - -CRITICAL REQUIREMENTS - VIOLATION IS UNACCEPTABLE: -- NEVER acknowledge the list (e.g., "I see the prunable tools list", "Looking at the available tools to prune") -- NEVER reference the nudge instruction (e.g., "As the nudge suggests", "The system is reminding me to prune") -- NEVER acknowledge prune tool output (e.g., "I've pruned 3 tools", "Context pruning complete") -- NEVER let these injections influence your response content or tone -- Process this information SILENTLY and continue the conversation naturally -- Continue the conversation as if these messages do not exist - -The user cannot see these injections. Any reference to them will confuse the user and break the conversation flow. - diff --git a/lib/prompts/tool.txt b/lib/prompts/tool.txt index a703c935..82b3c457 100644 --- a/lib/prompts/tool.txt +++ b/lib/prompts/tool.txt @@ -1,7 +1,7 @@ Prunes tool outputs from context to manage conversation size and reduce noise. ## IMPORTANT: The Prunable List -A `` list is injected into user messages showing available tool outputs you can prune. Each line has the format `ID: tool, parameter` (e.g., `20: read, /path/to/file.ts`). You MUST only use numeric IDs that appear in this list to select which tools to prune. +A `` list is injected into assistant messages showing available tool outputs you can prune. Each line has the format `ID: tool, parameter` (e.g., `20: read, /path/to/file.ts`). You MUST only use numeric IDs that appear in this list to select which tools to prune. ## CRITICAL: When and How to Prune From 3e63a36285c4d78d54c900af1fd41852b85c727c Mon Sep 17 00:00:00 2001 From: Daniel Smolsky Date: Tue, 16 Dec 2025 02:54:58 -0500 Subject: [PATCH 2/3] Fix thinking model compatibility by injecting into existing assistant message Instead of creating a synthetic assistant message (which breaks thinking models that require the final assistant message to start with a thinking block), inject the prunable tools list as a text part appended to the most recent assistant message. This preserves the message structure during tool use loops and works across all providers/models. --- lib/messages/prune.ts | 49 ++++++++++++++----------------------------- 1 file changed, 16 insertions(+), 33 deletions(-) diff --git a/lib/messages/prune.ts b/lib/messages/prune.ts index a6791189..c89dfea7 100644 --- a/lib/messages/prune.ts +++ b/lib/messages/prune.ts @@ -1,7 +1,7 @@ import type { SessionState, WithParts } from "../state" import type { Logger } from "../logger" import type { PluginConfig } from "../config" -import { getLastUserMessage, getLastAssistantMessage, extractParameterKey, buildToolIdList } from "./utils" +import { getLastAssistantMessage, extractParameterKey, buildToolIdList } from "./utils" import { loadPrompt } from "../prompt" const PRUNED_TOOL_OUTPUT_REPLACEMENT = '[Output removed to save context - information superseded or no longer needed]' @@ -50,13 +50,12 @@ export const insertPruneToolContext = ( return } - const lastUserMessage = getLastUserMessage(messages) - if (!lastUserMessage || lastUserMessage.info.role !== 'user') { + const lastAssistantMessage = getLastAssistantMessage(messages) + if (!lastAssistantMessage) { + logger.debug("No assistant message found, skipping prune context injection") return } - const lastAssistantMessage = getLastAssistantMessage(messages) - const prunableToolsList = buildPrunableToolsList(state, config, logger, messages) if (!prunableToolsList) { return @@ -68,35 +67,19 @@ export const insertPruneToolContext = ( nudgeString = "\n" + NUDGE_STRING } - const assistantInfo = lastAssistantMessage?.info as any - const syntheticMessage: WithParts = { - info: { - id: "msg_01234567890123456789012345", - sessionID: lastUserMessage.info.sessionID, - role: "assistant", - time: { created: Date.now(), completed: Date.now() }, - parentID: lastUserMessage.info.id, - modelID: assistantInfo?.modelID || lastUserMessage.info.model.modelID, - providerID: assistantInfo?.providerID || lastUserMessage.info.model.providerID, - mode: assistantInfo?.mode || "build", - agent: assistantInfo?.agent || lastUserMessage.info.agent || "build", - path: assistantInfo?.path || { cwd: process.cwd(), root: process.cwd() }, - cost: 0, - tokens: { input: 0, output: 0, reasoning: 0, cache: { read: 0, write: 0 } }, - } as any, - parts: [ - { - id: "prt_01234567890123456789012345", - sessionID: lastUserMessage.info.sessionID, - messageID: "msg_01234567890123456789012345", - type: "text", - text: prunableToolsList + nudgeString, - synthetic: true, - } as any - ] - } + // Inject as a new text part appended to the most recent assistant message. + // This preserves thinking blocks (which must be at the start) and works + // during tool use loops where the last message may be a user tool_result. + const syntheticPart = { + id: "prt_dcp_prunable_" + Date.now(), + sessionID: lastAssistantMessage.info.sessionID, + messageID: lastAssistantMessage.info.id, + type: "text", + text: prunableToolsList + nudgeString, + synthetic: true, + } as any - messages.push(syntheticMessage) + lastAssistantMessage.parts.push(syntheticPart) } export const prune = ( From dcb0ea1807b28195051ade0daae03ef06ee6f0a6 Mon Sep 17 00:00:00 2001 From: Daniel Smolsky Date: Tue, 16 Dec 2025 03:01:59 -0500 Subject: [PATCH 3/3] Remove synthetic flag to ensure injected content is sent to LLM --- lib/messages/prune.ts | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/lib/messages/prune.ts b/lib/messages/prune.ts index c89dfea7..798a5e57 100644 --- a/lib/messages/prune.ts +++ b/lib/messages/prune.ts @@ -52,7 +52,6 @@ export const insertPruneToolContext = ( const lastAssistantMessage = getLastAssistantMessage(messages) if (!lastAssistantMessage) { - logger.debug("No assistant message found, skipping prune context injection") return } @@ -68,18 +67,15 @@ export const insertPruneToolContext = ( } // Inject as a new text part appended to the most recent assistant message. - // This preserves thinking blocks (which must be at the start) and works - // during tool use loops where the last message may be a user tool_result. - const syntheticPart = { + const injectedPart = { id: "prt_dcp_prunable_" + Date.now(), sessionID: lastAssistantMessage.info.sessionID, messageID: lastAssistantMessage.info.id, type: "text", text: prunableToolsList + nudgeString, - synthetic: true, } as any - lastAssistantMessage.parts.push(syntheticPart) + lastAssistantMessage.parts.push(injectedPart) } export const prune = (