diff --git a/README.md b/README.md index a82e839..7ee75ef 100644 --- a/README.md +++ b/README.md @@ -82,18 +82,20 @@ Or add to your opencode config: ACP hands the context-compression tool directly to the model. The model is **100% responsible** for context compression. The model's available tools are -mainly: **compress**, **decompress**, and **delete** (`mark_block` / `unmark_block`). +mainly: **compress** and **decompress**. A hardcoded 100% GC fallback acts as +a safety net when the context window is completely full. ### Lifecycle -Three operations: **compress**, **decompress**, and **delete**. Content loops -between raw and compressed, and eventually terminates in deletion: +Two operations: **compress** and **decompress**. Content loops between raw and +compressed. When context hits 100%, old-gen block summaries are truncated as +a last resort: ```mermaid stateDiagram-v2 Raw --> Compressed : compress Compressed --> Raw : decompress - Compressed --> Deleted : delete + Compressed --> Truncated : GC at 100% ``` ### Compression strategy @@ -305,7 +307,7 @@ Each level overrides the previous, so project settings take priority over global "protectedTools": [], }, }, - // Garbage collection and batch cleanup + // Garbage collection — hardcoded 100% fallback only "gc": { "algorithm": "truncate", // young → old generation promotion after this many survivals @@ -314,18 +316,8 @@ Each level overrides the previous, so project settings take priority over global "maxBlockAge": 15, // truncate old-gen summaries exceeding this length (chars) "maxOldGenSummaryLength": 3000, - // run major GC when context usage exceeds this + // run major GC when context usage exceeds this (hardcoded, not configurable) "majorGcThresholdPercent": "100%", - // Three-tier batch merge-cleanup for blocks flagged via mark_block. - // Accepts a number or "X%" of the model context window. - "batchCleanup": { - // At/above this usage, remind the model about marked blocks - "lowThreshold": "60%", - // At/above this usage, auto merge-compress all marked blocks into one - "highThreshold": "75%", - // At/above this usage, force-merge all old-gen blocks (before GC) - "forceThreshold": "90%", - }, }, } ``` @@ -354,7 +346,7 @@ To reset an override, delete the matching file from your overrides directory. ### Protected Tools By default, these tools are always protected from pruning: -`task`, `skill`, `todowrite`, `todoread`, `compress`, `decompress`, `mark_block`, `unmark_block`, `batch`, `plan_enter`, `plan_exit`, `write`, `edit` +`task`, `skill`, `todowrite`, `todoread`, `compress`, `decompress`, `batch`, `plan_enter`, `plan_exit`, `write`, `edit` The `protectedTools` arrays in `commands` and `strategies` add to this default list. diff --git a/README.zh-CN.md b/README.zh-CN.md index b29bd8b..c002db1 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -73,17 +73,17 @@ opencode plugin opencode-acp@latest --global ## 工作原理 -ACP 把上下文压缩工具直接交给模型。模型对上下文压缩**负全责**。模型可用的工具主要是:**compress**、**decompress** 和 **delete**(`mark_block` / `unmark_block`)。 +ACP 把上下文压缩工具直接交给模型。模型对上下文压缩**负全责**。模型可用的工具主要是:**compress** 和 **decompress**。当上下文达到 100% 时,系统自动触发 GC 截断作为兜底。 ### 生命周期 -三个操作:**压缩**、**解压缩**、**删除**。内容在原始与压缩之间循环,最终以删除终结: +两个操作:**压缩**、**解压缩**。内容在原始与压缩之间循环。当上下文达到 100% 时,GC 自动截断老年代 block 作为兜底: ```mermaid stateDiagram-v2 Raw --> Compressed : compress Compressed --> Raw : decompress - Compressed --> Deleted : delete + Compressed --> GC_Truncated : GC (100%) ``` ### 压缩策略 @@ -104,9 +104,9 @@ stateDiagram-v2 由模型决定何时解压。当上下文大到足以干扰模型的 self-attention 时,简短的 block 会让模型先压缩一部分内容,处理完紧急事务,再在后续工作中按需解压。 -### 删除策略 +### GC 兜底 -为了应对大量小块历史内容的堆积,新版本增加了删除策略。由模型决定是否删除。**一旦删除,内容不可恢复。** 这取代了原先的强制 GC,使得强制垃圾回收不再删除模型认为重要的内容。 +当上下文达到 100% 时,系统自动截断老年代 block 摘要,防止上下文溢出。这是最后的兜底机制,不影响模型的正常压缩/解压操作。 --- @@ -289,18 +289,8 @@ ACP 使用自己的配置文件,按以下顺序搜索: "maxBlockAge": 15, // 截断超过此长度(字符)的老年代摘要 "maxOldGenSummaryLength": 3000, - // 上下文使用率超过此值时执行主 GC + // 上下文使用率超过此值时执行主 GC(兜底,硬编码为 100%) "majorGcThresholdPercent": "100%", - // 通过 mark_block 标记的块的三级批量合并清理阈值。 - // 接受数字或 "X%"(模型上下文窗口的百分比)。 - "batchCleanup": { - // 达到此使用率时,提醒模型已标记的块 - "lowThreshold": "60%", - // 达到此使用率时,自动将所有已标记块合并压缩为一个 - "highThreshold": "75%", - // 达到此使用率时,强制合并所有老年代块(GC 之前) - "forceThreshold": "90%", - }, }, } ``` @@ -329,7 +319,7 @@ ACP 暴露六个可编辑的 prompt: ### 受保护工具 默认情况下,以下工具始终受保护不被剪枝: -`task`、`skill`、`todowrite`、`todoread`、`compress`、`decompress`、`mark_block`、`unmark_block`、`batch`、`plan_enter`、`plan_exit`、`write`、`edit` +`task`、`skill`、`todowrite`、`todoread`、`compress`、`decompress`、`batch`、`plan_enter`、`plan_exit`、`write`、`edit` `commands` 和 `strategies` 中的 `protectedTools` 数组会添加到此默认列表。 diff --git a/devlog/2026-06-29_context-optimization/REQ.md b/devlog/2026-06-29_context-optimization/REQ.md new file mode 100644 index 0000000..75171a3 --- /dev/null +++ b/devlog/2026-06-29_context-optimization/REQ.md @@ -0,0 +1,31 @@ +# Context Optimization — Reduce Token Waste + +## Problem + +Session ses_102504697ffeYg89Sn0k8aknYg grew to 47% context usage. Root cause analysis revealed systematic token waste: + +1. **Compress summaries too verbose**: avg 579 chars (~145 tokens), some up to 2011 chars. Include unnecessary metrics, reviewer quotes, experimental parameters. +2. **Compress tool calls are pure overhead**: 344 calls × 813 chars avg = 280K chars. Each stores full summary in input — duplicated with block summary. +3. **Step markers waste space**: 4698 step-start/step-finish parts × ~88 chars avg = 413K chars (~103K tokens). Only mark boundaries, no useful content. +4. **Large tool outputs not compressed**: Model keeps 20-50K char outputs "just in case". +5. **No minimum compress range**: Model compresses tiny ranges (<2K chars) where overhead exceeds savings. +6. **ACP guidance too verbose**: Multi-paragraph nudge text wastes ~200 tokens/turn. + +## Requirements + +1. **R1**: Limit compress summary length to configurable max (default 100 chars). Reject if exceeded. +2. **R2**: ~~Truncate compress tool input after execution~~ — NOT FEASIBLE (no API to modify stored parts). +3. **R3**: Strengthen nudge to target large tool outputs (>5K chars) explicitly. +4. **R5**: Truncate step markers in context construction (skip step-start, truncate step-finish to 50 chars). +5. **R6**: Shorten ACP guidance text (pressure levels + per-message guidance). +6. **R7**: Enforce minimum compress range (default 2000 chars). Reject if below. + +## Cache Safety + +All fixes are either cache-neutral (only affect future operations) or one-time breaks that stabilize after deployment. No recurring cache breaks. + +## Non-Goals + +- Excluding old reasoning from context (causes recurring cache breaks — cancelled). +- Modifying block ID list (accuracy risk — kept as-is). +- compress tool input cleanup (not feasible with current API). diff --git a/devlog/2026-06-29_context-optimization/WORKLOG.md b/devlog/2026-06-29_context-optimization/WORKLOG.md new file mode 100644 index 0000000..738d249 --- /dev/null +++ b/devlog/2026-06-29_context-optimization/WORKLOG.md @@ -0,0 +1,47 @@ +# Worklog — Context Optimization + +## Changes (8 files, +186/-8 lines) + +### Fix 1: Summary length limit (R1) +- **config.ts**: Added `maxSummaryLength` (default 100) to CompressConfig +- **config-validation.ts**: Type + key validation +- **compress/message.ts, compress/range.ts**: Check `summary.length > maxSummaryLength` → throw error before creating block + +### Fix 2: Compress tool cleanup (R2) — NOT FEASIBLE +- ToolContext API only allows modifying output/title/metadata, NOT input args +- Added TODO comments in both handlers noting `experimental.chat.messages.transform` as alternative +- Documented for future investigation + +### Fix 3: Nudge strengthening (R3) +- **inject/utils.ts**: Guidance text now explicitly mentions ">5000 characters" tool outputs +- Changed from generic "compress tool outputs" to targeted "if any tool output >5000 chars and you've finished reading, compress it into a summary NOW" + +### Fix 5: Step marker truncation (R5) +- **prune.ts**: New `stripStepMarkers()` function + - Skips `step-start` parts entirely (zero-value boundary markers) + - Truncates `step-finish` reason to 50 chars (was avg 155 chars) + - Called from `prune()` before context injection +- Estimated savings: ~90K tokens per session with heavy reasoning + +### Fix 6: ACP simplification (R6) +- **system.ts**: Pressure level descriptions shortened to 1 sentence each + - Normal: "Be frugal — compress tool outputs you've finished using into summaries." + - Elevated: "Context is growing — compress larger ranges you no longer need." + - Critical: "Compress aggressively now — target the largest visible ranges first." +- **inject/utils.ts**: Per-message guidance reduced from 5+ to 3 sentences +- Block ID list: UNCHANGED (accuracy requirement) + +### Fix 7: Minimum compress range (R7) +- **config.ts**: Added `minCompressRange` (default 2000) to CompressConfig +- **config-validation.ts**: Type + key validation +- **compress/message.ts, compress/range.ts**: Calculate total message chars via `countMessageCharacters()` → throw error if < minCompressRange +- **token-utils.ts**: New `countMessageCharacters()` helper + +## Verification +- `npm run typecheck`: clean ✅ +- `npm run test`: 487 pass, 0 fail ✅ +- Block ID list: verified unchanged (empty git diff on nudge.ts) + +## Not Implemented +- **Fix 4 (exclude old reasoning)**: Cancelled — causes recurring cache breaks as reasoning crosses age threshold every turn. +- **Fix 2 (compress input cleanup)**: Not feasible with current OpenCode plugin API. Needs `experimental.chat.messages.transform` hook investigation. diff --git a/index.ts b/index.ts index 87a1028..970a9f3 100644 --- a/index.ts +++ b/index.ts @@ -4,8 +4,6 @@ import { createCompressMessageTool, createCompressRangeTool, createDecompressTool, - createMarkBlockTool, - createUnmarkBlockTool, } from "./lib/compress" import { compressDisabledByOpencode, @@ -91,8 +89,6 @@ const server: Plugin = (async (ctx) => { ? createCompressMessageTool(compressToolContext) : createCompressRangeTool(compressToolContext), decompress: createDecompressTool(compressToolContext), - mark_block: createMarkBlockTool(compressToolContext), - unmark_block: createUnmarkBlockTool(compressToolContext), }), }, config: async (opencodeConfig) => { @@ -113,7 +109,7 @@ const server: Plugin = (async (ctx) => { const toolsToAdd: string[] = [] if (config.compress.permission !== "deny" && !config.experimental.allowSubAgents) { - toolsToAdd.push("compress", "decompress", "mark_block", "unmark_block") + toolsToAdd.push("compress", "decompress") } if (toolsToAdd.length > 0) { diff --git a/lib/compress/index.ts b/lib/compress/index.ts index b4fe6e7..6330869 100644 --- a/lib/compress/index.ts +++ b/lib/compress/index.ts @@ -2,4 +2,3 @@ export { ToolContext } from "./types" export { createCompressMessageTool } from "./message" export { createCompressRangeTool } from "./range" export { createDecompressTool } from "./decompress" -export { createMarkBlockTool, createUnmarkBlockTool } from "./mark-block" diff --git a/lib/compress/mark-block.ts b/lib/compress/mark-block.ts deleted file mode 100644 index 11168ac..0000000 --- a/lib/compress/mark-block.ts +++ /dev/null @@ -1,148 +0,0 @@ -import { tool } from "@opencode-ai/plugin" -import type { ToolContext } from "./types" -import { ensureSessionInitialized } from "../state" -import { saveSessionState } from "../state/persistence" -import { assignMessageRefs } from "../message-ids" -import { fetchSessionMessages } from "./search" -import { formatBlockRef, parseBlockRef } from "../message-ids" - -interface RunContext { - ask(input: { - permission: string - patterns: string[] - always: string[] - metadata: Record - }): Promise - metadata(input: { title: string }): void - sessionID: string -} - -async function prepareMarkSession( - ctx: ToolContext, - toolCtx: RunContext, -): Promise { - await toolCtx.ask({ - permission: "compress", - patterns: ["*"], - always: ["*"], - metadata: {}, - }) - - toolCtx.metadata({ title: "Mark block" }) - - const rawMessages = await fetchSessionMessages(ctx.client, toolCtx.sessionID) - - await ensureSessionInitialized( - ctx.client, - ctx.state, - toolCtx.sessionID, - ctx.logger, - rawMessages, - ctx.config.manualMode.enabled, - ) - - assignMessageRefs(ctx.state, rawMessages) -} - -const MARK_DESCRIPTION = `Marks a compressed block for batch merge-cleanup. - -Use this for blocks whose detailed content you no longer need, but whose summaries -you want to keep in context for now (to preserve prompt cache). Marked blocks stay -fully active with zero immediate effect on context or cache. When context pressure -rises, all marked blocks are merge-compressed together into a single summary in one -cache break, instead of being handled one at a time. - -Argument: blockId — the block reference to mark (e.g., "b1", "b3") - -Use mark_block instead of compress when you want deferred cleanup: the block keeps -serving cache hits now and gets consolidated later only if context gets tight.` - -const UNMARK_DESCRIPTION = `Removes the batch cleanup mark from a compressed block. - -Reverses mark_block. The block returns to normal handling and will not be -auto-merged during batch cleanup. - -Argument: blockId — the block reference to unmark (e.g., "b1", "b3")` - -function buildSchema() { - return { - blockId: tool.schema - .string() - .describe('Block reference to mark (e.g., "b1", "b3")'), - } -} - -function buildUnmarkSchema() { - return { - blockId: tool.schema - .string() - .describe('Block reference to unmark (e.g., "b1", "b3")'), - } -} - -export function createMarkBlockTool(ctx: ToolContext): ReturnType { - return tool({ - description: MARK_DESCRIPTION, - args: buildSchema(), - async execute(args, toolCtx) { - await prepareMarkSession(ctx, toolCtx) - - const targetBlockId = parseBlockRef(String(args.blockId)) - if (targetBlockId === null) { - return `Error: Invalid block ID "${args.blockId}". Use format "b0", "b1", etc.` - } - - const messagesState = ctx.state.prune.messages - const block = messagesState.blocksById.get(targetBlockId) - if (!block) { - return `Error: Block ${formatBlockRef(targetBlockId)} does not exist.` - } - - if (!block.active) { - return `Error: Block ${formatBlockRef(targetBlockId)} is not active.` - } - - messagesState.markedForCleanup.add(targetBlockId) - await saveSessionState(ctx.state, ctx.logger) - - const ref = formatBlockRef(targetBlockId) - const markedCount = messagesState.markedForCleanup.size - - ctx.logger.info("mark_block: block marked for cleanup", { - blockId: targetBlockId, - markedCount, - }) - - return `Block ${ref} marked for cleanup. It will be merge-compressed together with other marked blocks when context pressure rises. No immediate effect on context or cache. (${markedCount} block(s) currently marked.)` - }, - }) -} - -export function createUnmarkBlockTool(ctx: ToolContext): ReturnType { - return tool({ - description: UNMARK_DESCRIPTION, - args: buildUnmarkSchema(), - async execute(args, toolCtx) { - await prepareMarkSession(ctx, toolCtx) - - const targetBlockId = parseBlockRef(String(args.blockId)) - if (targetBlockId === null) { - return `Error: Invalid block ID "${args.blockId}". Use format "b0", "b1", etc.` - } - - const messagesState = ctx.state.prune.messages - if (!messagesState.markedForCleanup.has(targetBlockId)) { - return `Block ${formatBlockRef(targetBlockId)} was not marked for cleanup.` - } - - messagesState.markedForCleanup.delete(targetBlockId) - await saveSessionState(ctx.state, ctx.logger) - - ctx.logger.info("unmark_block: block unmarked", { - blockId: targetBlockId, - }) - - return `Block ${formatBlockRef(targetBlockId)} unmarked. It will no longer be auto-merged during batch cleanup.` - }, - }) -} diff --git a/lib/compress/message.ts b/lib/compress/message.ts index e7b8bae..d759b9f 100644 --- a/lib/compress/message.ts +++ b/lib/compress/message.ts @@ -1,6 +1,6 @@ import { tool } from "@opencode-ai/plugin" import type { ToolContext } from "./types" -import { countTokens } from "../token-utils" +import { countMessageCharacters, countTokens } from "../token-utils" import { MESSAGE_FORMAT_EXTENSION } from "../prompts/extensions/tool" import { formatIssues, formatResult, resolveMessages, validateArgs } from "./message-utils" import { finalizeSession, prepareSession, type NotificationEntry } from "./pipeline" @@ -13,7 +13,7 @@ import { } from "./state" import type { CompressMessageToolArgs } from "./types" -function buildSchema() { +function buildSchema(maxSummaryLength: number) { return { topic: tool.schema .string() @@ -31,7 +31,9 @@ function buildSchema() { .describe("Short label (3-5 words) for this one message summary"), summary: tool.schema .string() - .describe("Complete technical summary replacing that one message"), + .describe( + `Complete technical summary replacing that one message. Aim for <=${maxSummaryLength} chars; exceed only when strictly necessary to preserve critical detail (file paths, decisions, signatures, exact values). Never pad.`, + ), }), ) .describe("Batch of individual message summaries to create in one tool call"), @@ -44,10 +46,20 @@ export function createCompressMessageTool(ctx: ToolContext): ReturnType maxSummaryLengthHard) { + throw new Error( + `Summary too long (${entry.summary.length} chars, hard ceiling ${maxSummaryLengthHard}). Aim for <=${ctx.config.compress.maxSummaryLength}; exceed only when strictly necessary. Rewrite more concisely.`, + ) + } + } + const callId = typeof (toolCtx as unknown as { callID?: unknown }).callID === "string" ? (toolCtx as unknown as { callID: string }).callID @@ -69,6 +81,30 @@ export function createCompressMessageTool(ctx: ToolContext): ReturnType 0) { + let totalChars = 0 + const counted = new Set() + for (const plan of plans) { + for (const messageId of plan.selection.messageIds) { + if (counted.has(messageId)) continue + counted.add(messageId) + const rawMessage = searchContext.rawMessagesById.get(messageId) + if (rawMessage) { + totalChars += countMessageCharacters(rawMessage) + } + } + } + // Intentionally throws after prepareSession: the char count needs + // resolved plans + rawMessages, only available post-prepare. No state + // is persisted (finalizeSession/saveSessionState never runs). + if (totalChars < minCompressRange) { + throw new Error( + `Range too small (${totalChars} chars, min ${minCompressRange}). Not worth compressing — overhead exceeds savings.`, + ) + } + } + const notifications: NotificationEntry[] = [] const preparedPlans: Array<{ @@ -140,6 +176,14 @@ export function createCompressMessageTool(ctx: ToolContext): ReturnType !keptPlaceholderIds.has(id)) + const missingIds = strictRequiredIds.filter((id) => !keptPlaceholderIds.has(id)) + // [Plan B] Missing placeholders are non-fatal: the compress pipeline + // auto-detects every consumed block in range, so the model no longer + // needs to manually list (bN) placeholders in its summary. + if (missingIds.length > 0) { + console.warn( + `[ACP] compress summary omitted placeholders for required blocks: ${missingIds + .map((id) => `b${id}`) + .join(", ")}. They will be auto-attached as consumed blocks.`, + ) + } + return missingIds } export function injectBlockPlaceholders( diff --git a/lib/compress/range.ts b/lib/compress/range.ts index c86ebc7..9534a71 100644 --- a/lib/compress/range.ts +++ b/lib/compress/range.ts @@ -1,6 +1,6 @@ import { tool } from "@opencode-ai/plugin" import type { ToolContext } from "./types" -import { countTokens } from "../token-utils" +import { countMessageCharacters, countTokens } from "../token-utils" import { RANGE_FORMAT_EXTENSION } from "../prompts/extensions/tool" import { finalizeSession, prepareSession, type NotificationEntry } from "./pipeline" import { @@ -26,7 +26,7 @@ import { } from "./state" import type { CompressRangeToolArgs } from "./types" -function buildSchema() { +function buildSchema(maxSummaryLength: number) { return { topic: tool.schema .string() @@ -44,7 +44,9 @@ function buildSchema() { .describe("Message or block ID marking the end of range (e.g. m00012, b5)"), summary: tool.schema .string() - .describe("Complete technical summary replacing all content in range"), + .describe( + `Complete technical summary replacing all content in range. Aim for <=${maxSummaryLength} chars; exceed only when strictly necessary to preserve critical detail (file paths, decisions, signatures, exact values). Never pad.`, + ), }), ) .describe( @@ -59,10 +61,20 @@ export function createCompressRangeTool(ctx: ToolContext): ReturnType maxSummaryLengthHard) { + throw new Error( + `Summary too long (${entry.summary.length} chars, hard ceiling ${maxSummaryLengthHard}). Aim for <=${ctx.config.compress.maxSummaryLength}; exceed only when strictly necessary. Rewrite more concisely.`, + ) + } + } + const callId = typeof (toolCtx as unknown as { callID?: unknown }).callID === "string" ? (toolCtx as unknown as { callID: string }).callID @@ -76,6 +88,30 @@ export function createCompressRangeTool(ctx: ToolContext): ReturnType 0) { + let totalChars = 0 + const counted = new Set() + for (const plan of resolvedPlans) { + for (const messageId of plan.selection.messageIds) { + if (counted.has(messageId)) continue + counted.add(messageId) + const rawMessage = searchContext.rawMessagesById.get(messageId) + if (rawMessage) { + totalChars += countMessageCharacters(rawMessage) + } + } + } + // Intentionally throws after prepareSession: the char count needs + // resolved plans + rawMessages, only available post-prepare. No state + // is persisted (finalizeSession/saveSessionState never runs). + if (totalChars < minCompressRange) { + throw new Error( + `Range too small (${totalChars} chars, min ${minCompressRange}). Not worth compressing — overhead exceeds savings.`, + ) + } + } + const notifications: NotificationEntry[] = [] const preparedPlans: Array<{ entry: (typeof resolvedPlans)[number]["entry"] @@ -138,10 +174,22 @@ export function createCompressRangeTool(ctx: ToolContext): ReturnType() + const mergeConsumedBlockIds = [ + ...plan.selection.requiredBlockIds, + ...boundaryConsumed, + ].filter((id) => { + if (seenConsumed.has(id)) return false + seenConsumed.add(id) + return true + }) preparedPlans.push({ entry: plan.entry, @@ -192,6 +240,14 @@ export function createCompressRangeTool(ctx: ToolContext): ReturnType): ValidationErro }) } + if ( + compress.maxSummaryLength !== undefined && + typeof compress.maxSummaryLength !== "number" + ) { + errors.push({ + key: "compress.maxSummaryLength", + expected: "number", + actual: typeof compress.maxSummaryLength, + }) + } + + if ( + typeof compress.maxSummaryLength === "number" && + compress.maxSummaryLength < 1 + ) { + errors.push({ + key: "compress.maxSummaryLength", + expected: "positive number (>= 1)", + actual: `${compress.maxSummaryLength}`, + }) + } + + if ( + compress.maxSummaryLengthHard !== undefined && + typeof compress.maxSummaryLengthHard !== "number" + ) { + errors.push({ + key: "compress.maxSummaryLengthHard", + expected: "number", + actual: typeof compress.maxSummaryLengthHard, + }) + } + + if ( + typeof compress.maxSummaryLengthHard === "number" && + compress.maxSummaryLengthHard < 1 + ) { + errors.push({ + key: "compress.maxSummaryLengthHard", + expected: "positive number (>= 1)", + actual: `${compress.maxSummaryLengthHard}`, + }) + } + + if ( + typeof compress.maxSummaryLength === "number" && + typeof compress.maxSummaryLengthHard === "number" && + compress.maxSummaryLengthHard < compress.maxSummaryLength + ) { + errors.push({ + key: "compress.maxSummaryLengthHard", + expected: `>= maxSummaryLength (${compress.maxSummaryLength})`, + actual: `${compress.maxSummaryLengthHard}`, + }) + } + + if ( + compress.minCompressRange !== undefined && + typeof compress.minCompressRange !== "number" + ) { + errors.push({ + key: "compress.minCompressRange", + expected: "number", + actual: typeof compress.minCompressRange, + }) + } + + if ( + typeof compress.minCompressRange === "number" && + compress.minCompressRange < 0 + ) { + errors.push({ + key: "compress.minCompressRange", + expected: "non-negative number (>= 0)", + actual: `${compress.minCompressRange}`, + }) + } + if ( typeof compress.iterationNudgeThreshold === "number" && compress.iterationNudgeThreshold < 1 diff --git a/lib/config.ts b/lib/config.ts index c72e11c..4c7af04 100644 --- a/lib/config.ts +++ b/lib/config.ts @@ -30,6 +30,9 @@ export interface CompressConfig { protectedTools: string[] protectTags: boolean protectUserMessages: boolean + maxSummaryLength: number + maxSummaryLengthHard: number + minCompressRange: number } export interface Commands { @@ -101,8 +104,6 @@ const DEFAULT_PROTECTED_TOOLS = [ "todoread", "compress", "decompress", - "mark_block", - "unmark_block", "batch", "plan_enter", "plan_exit", @@ -196,6 +197,9 @@ const defaultConfig: PluginConfig = { protectedTools: [...COMPRESS_DEFAULT_PROTECTED_TOOLS], protectTags: false, protectUserMessages: false, + maxSummaryLength: 200, + maxSummaryLengthHard: 800, + minCompressRange: 2000, }, strategies: { deduplication: { @@ -403,6 +407,9 @@ function mergeCompress( protectedTools: [...new Set([...base.protectedTools, ...(override.protectedTools ?? [])])], protectTags: override.protectTags ?? base.protectTags, protectUserMessages: override.protectUserMessages ?? base.protectUserMessages, + maxSummaryLength: override.maxSummaryLength ?? base.maxSummaryLength, + maxSummaryLengthHard: override.maxSummaryLengthHard ?? base.maxSummaryLengthHard, + minCompressRange: override.minCompressRange ?? base.minCompressRange, } } diff --git a/lib/gc/merge.ts b/lib/gc/merge.ts index 0a2583d..30207bf 100644 --- a/lib/gc/merge.ts +++ b/lib/gc/merge.ts @@ -1,5 +1,5 @@ import type { CompressionBlock, SessionState, WithParts } from "../state" -import type { BatchCleanupConfig, GCConfig, PluginConfig } from "../config" +import type { PluginConfig } from "../config" import type { Logger } from "../logger" import { countTokens, getCurrentTokenUsage } from "../token-utils" import { @@ -8,7 +8,6 @@ import { allocateRunId, wrapCompressedSummary, } from "../compress/state" -import { formatBlockRef } from "../message-ids" export interface MergeMarkedResult { mergedCount: number @@ -23,33 +22,6 @@ export interface BatchCleanupResult { nudgeText?: string } -const DEFAULT_BATCH_CLEANUP: BatchCleanupConfig = { - lowThreshold: "55%", - highThreshold: "75%", - forceThreshold: "90%", -} - -/** Minimum marked-block count to trigger escalation nudge (tier 2 active compress). */ -const ESCALATE_MIN_MARKED = 3 - -/** Minimum marked/old-gen ratio to trigger escalation nudge. */ -const ESCALATE_MIN_RATIO = 0.4 - -function resolveBatchCleanup(gc: GCConfig): BatchCleanupConfig { - return gc.batchCleanup ?? DEFAULT_BATCH_CLEANUP -} - -function percentToTokens( - value: number | `${number}%`, - modelContextLimit: number, -): number { - if (typeof value === "number") return value - const percent = parseFloat(value.slice(0, -1)) - if (isNaN(percent)) return modelContextLimit - const clamped = Math.max(0, Math.min(100, Math.round(percent))) - return Math.round((clamped / 100) * modelContextLimit) -} - function collectActiveOldGenBlocks(state: SessionState, maxOldGenSummaryLength: number): CompressionBlock[] { const blocks: CompressionBlock[] = [] const ids = Array.from(state.prune.messages.activeBlockIds).sort((a, b) => a - b) @@ -67,28 +39,13 @@ function collectActiveOldGenBlocks(state: SessionState, maxOldGenSummaryLength: return blocks } -function collectActiveMarkedBlocks(state: SessionState): CompressionBlock[] { - const messagesState = state.prune.messages - const ids = Array.from(messagesState.markedForCleanup).sort((a, b) => a - b) - const blocks: CompressionBlock[] = [] - for (const id of ids) { - const block = messagesState.blocksById.get(id) - if (!block || !block.active) { - messagesState.markedForCleanup.delete(id) - continue - } - blocks.push(block) - } - return blocks -} - function extractSummaryBody(summary: string): string { let body = summary const headerPrefix = COMPRESSED_BLOCK_HEADER + "\n" if (body.startsWith(headerPrefix)) { body = body.slice(headerPrefix.length) } - body = body.replace(/\n]*>b\d+<\/dcp-message-id>$/, "") + body = body.replace(/\n]*>b\d+<\/dcp-message-id>$/, "") return body.trim() } @@ -228,70 +185,6 @@ export function mergeMarkedBlocks( return { mergedCount: sourceBlocks.length, savedTokens } } -function estimateTokens(blocks: CompressionBlock[]): number { - return blocks.reduce( - (sum, block) => sum + (block.summaryTokens || Math.round(block.summary.length / 4)), - 0, - ) -} - -function buildNudgeText(state: SessionState, maxMergedLength: number): string | undefined { - const marked = collectActiveMarkedBlocks(state) - const oldGen = collectActiveOldGenBlocks(state, maxMergedLength) - - if (oldGen.length === 0) return undefined - - const oldGenIds = new Set(oldGen.map((b) => b.blockId)) - const markedOldGen = marked.filter((b) => oldGenIds.has(b.blockId)) - const markedOldGenCount = markedOldGen.length - const oldGenCount = oldGen.length - const ratio = markedOldGenCount / oldGenCount - const ratioPct = Math.round(ratio * 100) - const escalateMinPct = Math.round(ESCALATE_MIN_RATIO * 100) - - // Escalation: enough old-gen blocks marked → urge active compress now - if (markedOldGenCount >= ESCALATE_MIN_MARKED && ratio >= ESCALATE_MIN_RATIO) { - const refs = marked.map((b) => formatBlockRef(b.blockId)).join(", ") - const firstRef = formatBlockRef(marked[0].blockId) - const lastRef = formatBlockRef(marked[marked.length - 1].blockId) - const estimatedSavings = Math.max(0, estimateTokens(marked) - Math.round(maxMergedLength / 4)) - - return [ - `🔥 ${markedOldGenCount}/${oldGenCount} old-gen blocks marked (${ratioPct}%) — ready for batch cleanup.`, - `Compressing ${refs} (range ${firstRef}–${lastRef}) would free ~${estimatedSavings} tokens in one cache break.`, - `Call compress with this range now to consolidate them.`, - ].join(" ") - } - - // Some marks, not enough to escalate → keep marking - if (marked.length >= 1) { - const refs = marked.map((b) => formatBlockRef(b.blockId)).join(", ") - const estimatedSavings = Math.max(0, estimateTokens(marked) - Math.round(maxMergedLength / 4)) - - return [ - `⚠️ ${marked.length} block(s) marked for batch cleanup (${refs}).`, - `Merge-compressing them would free ~${estimatedSavings} tokens.`, - marked.length >= 2 - ? "They will auto-merge when context pressure reaches the high threshold." - : "A single marked block won't auto-merge on its own — use compress to consolidate it, or unmark_block if no longer needed.", - `Mark more old-gen blocks (need ≥${ESCALATE_MIN_MARKED} at ≥${escalateMinPct}%) to trigger batch cleanup sooner.`, - "To act now, use compress with a range covering these blocks.", - ].join(" ") - } - - // No marks yet → guide the model to start marking (fixes chicken-and-egg deadlock) - const shown = oldGen.slice(0, 5) - const oldGenRefs = shown.map((b) => formatBlockRef(b.blockId)).join(", ") - const more = oldGenCount > 5 ? ` (+${oldGenCount - 5} more)` : "" - - return [ - `📋 Context pressure rising — ${oldGenCount} old-gen compressed block(s) occupy ~${estimateTokens(oldGen)} tokens (${oldGenRefs}${more}).`, - `Review which blocks contain information you no longer need, and use mark_block to flag them.`, - `Once enough are marked (≥${ESCALATE_MIN_MARKED} at ≥${escalateMinPct}% of old-gen), they'll be batch-merged in one cache break to preserve cache hit rate.`, - `Do NOT mark blocks you may still need.`, - ].join(" ") -} - export function runBatchCleanup( state: SessionState, config: PluginConfig, @@ -310,78 +203,37 @@ export function runBatchCleanup( } const currentTokens = getCurrentTokenUsage(state, messages) - const limit = state.modelContextLimit - const batchCleanup = resolveBatchCleanup(config.gc) - const maxMergedLength = config.gc.maxOldGenSummaryLength - - const forceTokens = percentToTokens(batchCleanup.forceThreshold, limit) - const highTokens = percentToTokens(batchCleanup.highThreshold, limit) - const lowTokens = percentToTokens(batchCleanup.lowThreshold, limit) - if (currentTokens >= forceTokens) { - const oldGenBlocks = collectActiveOldGenBlocks(state, maxMergedLength) - if (oldGenBlocks.length < 2) { - return noop - } - const ids = oldGenBlocks.map((b) => b.blockId) - const result = mergeMarkedBlocks(state, ids, maxMergedLength) - if (result.mergedCount === 0) { - return noop - } - logger.info("Batch cleanup tier 3 (force): merged old-gen blocks", { - mergedCount: result.mergedCount, - savedTokens: result.savedTokens, - currentTokens, - forceThreshold: batchCleanup.forceThreshold, - }) - return { - tier: 3, - action: "merge", - mergedCount: result.mergedCount, - savedTokens: result.savedTokens, - } + // Only a hardcoded 100% force fallback remains. The mark_block mechanism and + // the multi-tier (low/high/force) batch-cleanup were retired; full GC removal + // is tracked separately. Threshold is intentionally NOT read from config. + if (currentTokens < state.modelContextLimit) { + return noop } - if (currentTokens >= highTokens) { - const marked = collectActiveMarkedBlocks(state) - if (marked.length >= 2) { - const ids = marked.map((b) => b.blockId) - const result = mergeMarkedBlocks(state, ids, maxMergedLength) - if (result.mergedCount > 0) { - logger.info("Batch cleanup tier 2 (high): merged marked blocks", { - mergedCount: result.mergedCount, - savedTokens: result.savedTokens, - currentTokens, - highThreshold: batchCleanup.highThreshold, - }) - return { - tier: 2, - action: "merge", - mergedCount: result.mergedCount, - savedTokens: result.savedTokens, - } - } - } - // Not enough marks or merge produced nothing — fall through to nudge + const maxMergedLength = config.gc.maxOldGenSummaryLength + const oldGenBlocks = collectActiveOldGenBlocks(state, maxMergedLength) + if (oldGenBlocks.length < 2) { + return noop } - if (currentTokens >= lowTokens) { - const nudgeText = buildNudgeText(state, maxMergedLength) - if (!nudgeText) { - return noop - } - logger.info("Batch cleanup tier 1 (low): nudge injected", { - currentTokens, - lowThreshold: batchCleanup.lowThreshold, - }) - return { - tier: 1, - action: "nudge", - mergedCount: 0, - savedTokens: 0, - nudgeText, - } + const ids = oldGenBlocks.map((b) => b.blockId) + const result = mergeMarkedBlocks(state, ids, maxMergedLength) + if (result.mergedCount === 0) { + return noop } - return noop + logger.info("Batch cleanup force fallback (100%): merged old-gen blocks", { + mergedCount: result.mergedCount, + savedTokens: result.savedTokens, + currentTokens, + contextLimit: state.modelContextLimit, + }) + + return { + tier: 3, + action: "merge", + mergedCount: result.mergedCount, + savedTokens: result.savedTokens, + } } diff --git a/lib/hooks.ts b/lib/hooks.ts index ff083d4..3006079 100644 --- a/lib/hooks.ts +++ b/lib/hooks.ts @@ -43,7 +43,6 @@ import { cacheSystemPromptTokens } from "./ui/utils" import { runTruncateGC, shouldRunMajorGC, getGCParams } from "./gc/truncate" import { runBatchCleanup } from "./gc/merge" import { getCurrentTokenUsage } from "./token-utils" -import { appendToLastTextPart } from "./messages/utils" const INTERNAL_AGENT_SIGNATURES = [ "You are a title generator", @@ -207,12 +206,6 @@ function runMajorGC( } } -function appendBatchCleanupNudge(messages: WithParts[], nudgeText: string): void { - const lastUser = getLastUserMessage(messages) - if (!lastUser) return - appendToLastTextPart(lastUser, nudgeText) -} - export function createChatMessageTransformHandler( client: any, state: SessionState, @@ -259,9 +252,6 @@ export function createChatMessageTransformHandler( buildToolIdList(state, output.messages) runMajorGC(state, config, logger, output.messages) const batchResult = runBatchCleanup(state, config, logger, output.messages) - if (batchResult.tier === 1 && batchResult.nudgeText) { - appendBatchCleanupNudge(output.messages, batchResult.nudgeText) - } if (batchResult.mergedCount > 0) { void saveSessionState(state, logger) } diff --git a/lib/messages/inject/inject.ts b/lib/messages/inject/inject.ts index bbcb853..7260d45 100644 --- a/lib/messages/inject/inject.ts +++ b/lib/messages/inject/inject.ts @@ -190,7 +190,15 @@ export const injectCompressNudges = ( injectContextUsage(suffixMessage, config, currentTokens, modelContextLimit, !shouldNudge) if (config.compress.mode !== "message") { - const blockGuidance = buildCompressedBlockGuidance(state, config.gc, { currentTokens, modelContextLimit, includeHint: shouldNudge }) + const visibleMessageIds = new Set( + messages.map((message) => message.info.id), + ) + const blockGuidance = buildCompressedBlockGuidance(state, config.gc, { + currentTokens, + modelContextLimit, + includeHint: shouldNudge, + visibleMessageIds, + }) if (blockGuidance.trim() && suffixMessage) { appendToLastTextPart(suffixMessage, "\n\n" + blockGuidance) } diff --git a/lib/messages/inject/utils.ts b/lib/messages/inject/utils.ts index 605bd05..7ce4c97 100644 --- a/lib/messages/inject/utils.ts +++ b/lib/messages/inject/utils.ts @@ -402,11 +402,11 @@ export function buildContextUsageGuidance( let guidance: string if (pct < minPct) { - guidance = " 💡 Be frugal with context — if you see large completed outputs (>2000 tokens), compress them into summaries. If everything is already compressed, skip this nudge. You can decompress later if needed. Extract and keep what matters: user intent, key decisions, file paths, and important findings. Compress everything else." + guidance = " 💡 Be frugal with context. If any visible tool output exceeds 5000 characters and you've finished reading it, compress it into a summary now — don't keep large outputs 'just in case'. You can decompress later if needed." } else if (pct < maxPct) { - guidance = " ⚠️ Context is growing — compress completed sections and high-token waste now. Preserve key details." + guidance = " ⚠️ Context is growing — compress completed sections and high-token waste now." } else { - guidance = " 🔥 Context is high — compress aggressively but selectively. Preserve only what is essential." + guidance = " 🔥 Context is high — compress aggressively, preserve only what is essential." } return `\n\n${base}${guidance}` diff --git a/lib/messages/prune.ts b/lib/messages/prune.ts index 3b71a83..14d1bc4 100644 --- a/lib/messages/prune.ts +++ b/lib/messages/prune.ts @@ -18,6 +18,7 @@ export const prune = ( messages: WithParts[], ): void => { filterCompressedRanges(state, logger, config, messages) + stripStepMarkers(messages) // [HOTFIX] Disabled pruneToolOutputs/pruneToolInputs/pruneToolErrors — they mutate // existing messages in-place, breaking GLM prefix cache. Compression still works // via filterCompressedRanges + model-initiated compress tool. @@ -26,6 +27,43 @@ export const prune = ( // pruneToolErrors(state, logger, messages) } +const MAX_STEP_FINISH_REASON = 50 + +const stripStepMarkers = (messages: WithParts[]): void => { + for (const msg of messages) { + const parts = Array.isArray(msg.parts) ? msg.parts : [] + let changed = false + const filtered: typeof parts = [] + + for (const part of parts) { + if (part.type === "step-start") { + changed = true + continue + } + + if (part.type === "step-finish") { + const reason = (part as { reason?: unknown }).reason + if (typeof reason === "string" && reason.length > MAX_STEP_FINISH_REASON) { + const truncated = reason.slice(0, MAX_STEP_FINISH_REASON) + "..." + // Skip when already truncated: keeps `changed` false on idempotent + // re-runs so the parts array reference (and prefix cache) stays stable. + if (truncated !== reason) { + filtered.push({ ...part, reason: truncated }) + changed = true + continue + } + } + } + + filtered.push(part) + } + + if (changed) { + msg.parts = filtered + } + } +} + const pruneFullTool = (state: SessionState, logger: Logger, messages: WithParts[]): void => { const messagesToRemove: string[] = [] diff --git a/lib/prompts/compress-range.ts b/lib/prompts/compress-range.ts index 2dedb3a..6e1bf70 100644 --- a/lib/prompts/compress-range.ts +++ b/lib/prompts/compress-range.ts @@ -10,33 +10,18 @@ Directly quote user messages when they are short enough to include safely. Direc Yet be LEAN. Strip away the noise: failed attempts that led nowhere, verbose tool outputs, back-and-forth exploration. What remains should be pure signal - golden nuggets of detail that preserve full understanding with zero ambiguity. COMPRESSED BLOCK PLACEHOLDERS -When the selected range includes previously compressed blocks, use this exact placeholder format when referencing one: - -- \`(bN)\` +The system auto-detects any previously compressed blocks whose anchor messages fall inside your selected range. You do NOT need to manually list \`(bN)\` placeholders in your summary — every consumed block is tracked automatically. Compressed block sections in context are clearly marked with a header: - \`[Compressed conversation section]\` -Compressed block IDs always use the \`bN\` form (never \`mNNNNN\`) and are represented in the same XML metadata tag format. - Rules: -- Include every required block placeholder exactly once. +- Write a short prose summary. The system handles block consumption automatically. - Do not invent placeholders for blocks outside the selected range. -- Treat \`(bN)\` placeholders as RESERVED TOKENS. Do not emit \`(bN)\` text anywhere except intentional placeholders. -- If you need to mention a block in prose, use plain text like \`compressed bN\` (not as a placeholder). -- Preflight check before finalizing: the set of \`(bN)\` placeholders in your summary must exactly match the required set, with no duplicates. - -These placeholders are semantic references. They will be replaced with the full stored compressed block content when the tool processes your output. - -FLOW PRESERVATION WITH PLACEHOLDERS -When you use compressed block placeholders, write the surrounding summary text so it still reads correctly AFTER placeholder expansion. - -- Treat each placeholder as a stand-in for a full conversation segment, not as a short label. -- Ensure transitions before and after each placeholder preserve chronology and causality. -- Do not write text that depends on the placeholder staying literal (for example, "as noted in \`(b2)\`"). -- Your final meaning must be coherent once each placeholder is replaced with its full compressed block content. +- Treat \`(bN)\` as a RESERVED TOKEN. Do not emit \`(bN)\` text anywhere in the summary. +- If you need to mention a block in prose, use plain text like \`compressed bN\` (never as a placeholder). BOUNDARY IDS You specify boundaries by ID using the injected IDs visible in the conversation: diff --git a/lib/prompts/extensions/nudge.ts b/lib/prompts/extensions/nudge.ts index a260c6c..ca472c3 100644 --- a/lib/prompts/extensions/nudge.ts +++ b/lib/prompts/extensions/nudge.ts @@ -5,6 +5,12 @@ export interface BlockGuidanceContext { currentTokens?: number modelContextLimit?: number includeHint?: boolean + /** + * Raw message IDs currently visible in the model's context window. + * When provided, the directive nudge only suggests ranges whose anchor + * messages are still visible, preventing stale-ID and backwards-range bugs. + */ + visibleMessageIds?: Set } export function buildCompressedBlockGuidance( @@ -31,7 +37,7 @@ export function buildCompressedBlockGuidance( const lines = [ "Compressed block context:", `- Active compressed blocks: ${blockCount} (${blockList})`, - "- If your selected compression range includes any listed block, include each required placeholder exactly once in the summary using `(bN)`.", + "- System auto-detects blocks in range — no need to manually list (bN) placeholders. Just write a short prose summary.", ] if (includeHint) { @@ -39,7 +45,54 @@ export function buildCompressedBlockGuidance( } if (blockCount > 50) { - lines.push(`- 🔀 You have ${blockCount} blocks — consider merging adjacent same-topic blocks instead of finding new content to compress. This permanently reduces per-turn overhead.`) + const oldBlockIds = activeBlockIds.slice(0, Math.max(0, blockCount - 20)) + const allOldBlocks = oldBlockIds + .map((id) => state.prune.messages.blocksById.get(id)) + .filter((b): b is CompressionBlock => b !== undefined) + + // [Plan B] Filter to blocks whose anchor message is still visible, then + // build suggestion ranges from anchor refs (mNNNNN) instead of stored + // block startId/endId. This avoids suggesting IDs that are no longer + // visible and prevents backwards ranges (end < start). + const visibleMessageIds = context?.visibleMessageIds + const visibleOldBlocks = + visibleMessageIds === undefined + ? allOldBlocks + : allOldBlocks.filter((b) => b.anchorMessageId && visibleMessageIds.has(b.anchorMessageId)) + + if (visibleOldBlocks.length > 5) { + const blocksWithRef = visibleOldBlocks + .map((block) => { + const ref = state.messageIds.byRawId.get(block.anchorMessageId) + return ref ? { block, ref } : null + }) + .filter((x): x is { block: CompressionBlock; ref: string } => x !== null) + .sort((a, b) => a.ref.localeCompare(b.ref)) + + const totalTokens = blocksWithRef.reduce((s, x) => s + (x.block.summaryTokens ?? 0), 0) + const totalK = Math.max(1, Math.round(totalTokens / 1000)) + + const targets: string[] = [] + const chunkSize = Math.ceil(blocksWithRef.length / 3) + for (let i = 0; i < 3 && i * chunkSize < blocksWithRef.length; i++) { + const chunk = blocksWithRef.slice(i * chunkSize, (i + 1) * chunkSize) + if (chunk.length < 2) continue + // Sorted by ref above guarantees startRef <= endRef. + const startRef = chunk[0].ref + const endRef = chunk[chunk.length - 1].ref + const chunkTokens = chunk.reduce((s, x) => s + (x.block.summaryTokens ?? 0), 0) + const chunkK = Math.max(1, Math.round(chunkTokens / 1000)) + targets.push(` • compress ${startRef}→${endRef}: ${chunk.length} blocks (~${chunkK}K tokens)`) + } + + if (targets.length > 0) { + lines.push(`- 🔀 ${blocksWithRef.length} old blocks using ~${totalK}K tokens. Consolidate into ${targets.length}:`) + lines.push(...targets) + lines.push(` System auto-detects blocks in range — no need to manually list (bN) placeholders. Just write a short prose summary.`) + } + } else { + lines.push(`- 🔀 You have ${blockCount} blocks — use compress to consolidate adjacent same-topic blocks.`) + } } // [FIX Bug 35] Only show aging warnings when context usage is above 50%. diff --git a/lib/prompts/system.ts b/lib/prompts/system.ts index 59cce62..40a80bf 100644 --- a/lib/prompts/system.ts +++ b/lib/prompts/system.ts @@ -2,7 +2,7 @@ export const SYSTEM = ` You operate in a context-constrained environment. Context management helps preserve retrieval quality, but your primary goal is completing the task at hand. Do not let context management distract from the actual work. -The tools you have for context management are \`compress\`, \`decompress\`, \`mark_block\`, and \`unmark_block\`. \`compress\` replaces older conversation content with technical summaries you produce. \`decompress\` restores previously compressed content when you need exact details. \`mark_block\` flags a compressed block for deferred batch merge-cleanup — it has zero immediate effect on context or cache, but marked blocks are merge-compressed together in a single cache break when context pressure rises. Use it for blocks you no longer need in detail but want to keep cached for now. \`unmark_block\` removes that flag. +The tools you have for context management are \`compress\` and \`decompress\`. \`compress\` replaces older conversation content with technical summaries you produce. \`decompress\` restores previously compressed content when you need exact details. \`\` and \`\` tags are environment-injected metadata. Do not output them. @@ -16,9 +16,9 @@ Target the largest UNCOMPRESSED content first. Savings scale with original size CONTEXT PRESSURE LEVELS -- Normal: Be frugal — compress tool outputs you've finished using into summaries. You can decompress later. Extract and keep what matters from any message; compress verbose parts — including large logs in user messages or generated code. -- Elevated: Context is growing. Compress completed sections and high-token waste more urgently. -- Critical: Compress aggressively now. Every compression should free meaningful tokens. Preserve only what is essential for the current task. +- Normal: Be frugal — compress large completed outputs into summaries. You can decompress later if needed. +- Elevated: Context is growing — compress completed sections and high-token waste now. +- Critical: Compress aggressively now — preserve only what is essential for the current task. WHAT TO COMPRESS FIRST (high value, low risk) diff --git a/lib/token-utils.ts b/lib/token-utils.ts index be54aff..01648a0 100644 --- a/lib/token-utils.ts +++ b/lib/token-utils.ts @@ -178,3 +178,18 @@ export function countAllMessageTokens(msg: WithParts): number { if (texts.length === 0) return 0 return estimateTokensBatch(texts) } + +export function countMessageCharacters(msg: WithParts): number { + const parts = Array.isArray(msg.parts) ? msg.parts : [] + let total = 0 + for (const part of parts) { + if (part.type === "text" && typeof part.text === "string") { + total += part.text.length + } else { + for (const content of extractToolContent(part)) { + total += content.length + } + } + } + return total +} diff --git a/tests/config-validation.test.ts b/tests/config-validation.test.ts index 1063bee..2f63959 100644 --- a/tests/config-validation.test.ts +++ b/tests/config-validation.test.ts @@ -138,3 +138,28 @@ test("validateConfigTypes returns empty for undefined optional fields", () => { const result = validateConfigTypes({}) assert.deepEqual(result, []) }) + +test("validateConfigTypes accepts numeric compress.maxSummaryLengthHard", () => { + const result = validateConfigTypes({ + compress: { maxSummaryLengthHard: 800 }, + }) + assert.deepEqual(result, []) +}) + +test("validateConfigTypes catches wrong type for compress.maxSummaryLengthHard", () => { + const result = validateConfigTypes({ + compress: { maxSummaryLengthHard: "800" }, + }) + assert.equal(result.length, 1) + assert.equal(result[0].key, "compress.maxSummaryLengthHard") + assert.equal(result[0].actual, "string") +}) + +test("validateConfigTypes rejects compress.maxSummaryLengthHard < maxSummaryLength", () => { + const result = validateConfigTypes({ + compress: { maxSummaryLength: 200, maxSummaryLengthHard: 100 }, + }) + const hit = result.find((e) => e.key === "compress.maxSummaryLengthHard") + assert.ok(hit, "hard ceiling below soft target must be flagged") + assert.ok(hit!.expected.includes(">= maxSummaryLength")) +}) diff --git a/tests/gc-merge.test.ts b/tests/gc-merge.test.ts index be0627f..1c77e91 100644 --- a/tests/gc-merge.test.ts +++ b/tests/gc-merge.test.ts @@ -359,13 +359,19 @@ test("mergeMarkedBlocks: reports saved tokens as reduction from source summaries const logger = new Logger(false) -test("runBatchCleanup: below low threshold (50%) → noop tier 0", () => { +// ===================================================================== +// runBatchCleanup — hardcoded 100% force fallback only. +// The mark_block mechanism and the multi-tier (low/high/force) batch +// cleanup were retired; only a single last-resort merge at 100% remains. +// ===================================================================== + +test("runBatchCleanup: below 100% (95%) → noop tier 0", () => { const blocks = [ - makeBlock({ blockId: 1, anchorMessageId: "a1", summary: wrapCompressedSummary(1, "one") }), - makeBlock({ blockId: 2, runId: 2, anchorMessageId: "a2", summary: wrapCompressedSummary(2, "two") }), + makeBlock({ blockId: 1, anchorMessageId: "a1", summary: wrapCompressedSummary(1, "one"), generation: "old" }), + makeBlock({ blockId: 2, runId: 2, anchorMessageId: "a2", summary: wrapCompressedSummary(2, "two"), generation: "old" }), ] - const state = makeState(blocks, { modelContextLimit: 1000, marked: [1, 2] }) - const messages: WithParts[] = [makeAssistantMessage("a1", 500)] + const state = makeState(blocks, { modelContextLimit: 1000 }) + const messages: WithParts[] = [makeAssistantMessage("a1", 950)] const result = runBatchCleanup(state, buildConfig(), logger, messages) assert.equal(result.tier, 0) @@ -374,59 +380,7 @@ test("runBatchCleanup: below low threshold (50%) → noop tier 0", () => { assert.equal(state.prune.messages.activeBlockIds.size, 2) }) -test("runBatchCleanup: above low threshold (55%) with marked blocks → tier 1 nudge", () => { - const blocks = [ - makeBlock({ blockId: 1, anchorMessageId: "a1", summary: wrapCompressedSummary(1, "one") }), - makeBlock({ blockId: 2, runId: 2, anchorMessageId: "a2", summary: wrapCompressedSummary(2, "two") }), - ] - const state = makeState(blocks, { modelContextLimit: 1000, marked: [1, 2] }) - const messages: WithParts[] = [makeAssistantMessage("a1", 600)] - - const result = runBatchCleanup(state, buildConfig(), logger, messages) - assert.equal(result.tier, 1) - assert.equal(result.action, "nudge") - assert.equal(result.mergedCount, 0) - assert.ok(result.nudgeText, "nudge text should be provided") - assert.ok(result.nudgeText!.includes("b1")) - assert.ok(result.nudgeText!.includes("b2")) - assert.equal(state.prune.messages.activeBlockIds.size, 2) -}) - -test("runBatchCleanup: at high threshold (75%) with >= 2 marked blocks → tier 2 merge", () => { - const blocks = [ - makeBlock({ blockId: 1, anchorMessageId: "a1", summary: wrapCompressedSummary(1, "one") }), - makeBlock({ blockId: 2, runId: 2, anchorMessageId: "a2", summary: wrapCompressedSummary(2, "two") }), - ] - const state = makeState(blocks, { modelContextLimit: 1000, marked: [1, 2] }) - const messages: WithParts[] = [makeAssistantMessage("a1", 750)] - - const result = runBatchCleanup(state, buildConfig(), logger, messages) - assert.equal(result.tier, 2) - assert.equal(result.action, "merge") - assert.equal(result.mergedCount, 2) - assert.ok(result.savedTokens >= 0) - assert.equal(state.prune.messages.markedForCleanup.size, 0) - assert.equal(state.prune.messages.activeBlockIds.size, 1) -}) - -test("runBatchCleanup: at high threshold (75%) with 1 marked block → tier 1 nudge (not enough for merge)", () => { - const blocks = [ - makeBlock({ blockId: 1, anchorMessageId: "a1", summary: wrapCompressedSummary(1, "one") }), - makeBlock({ blockId: 2, runId: 2, anchorMessageId: "a2", summary: wrapCompressedSummary(2, "two") }), - ] - const state = makeState(blocks, { modelContextLimit: 1000, marked: [1] }) - const messages: WithParts[] = [makeAssistantMessage("a1", 750)] - - const result = runBatchCleanup(state, buildConfig(), logger, messages) - assert.equal(result.tier, 1, "should fall through to nudge when merge conditions unmet") - assert.equal(result.action, "nudge") - assert.equal(result.mergedCount, 0) - assert.ok(result.nudgeText, "nudge text should be provided") - assert.ok(result.nudgeText!.includes("b1"), "nudge should reference the marked block") - assert.equal(state.prune.messages.activeBlockIds.size, 2) -}) - -test("runBatchCleanup: at force threshold (90%) with >= 2 old-gen blocks → tier 3 force merge", () => { +test("runBatchCleanup: at 100% with >= 2 old-gen blocks → tier 3 force merge", () => { const blocks = [ makeBlock({ blockId: 1, @@ -443,7 +397,7 @@ test("runBatchCleanup: at force threshold (90%) with >= 2 old-gen blocks → tie }), ] const state = makeState(blocks, { modelContextLimit: 1000 }) - const messages: WithParts[] = [makeAssistantMessage("a1", 900)] + const messages: WithParts[] = [makeAssistantMessage("a1", 1000)] const result = runBatchCleanup(state, buildConfig(), logger, messages) assert.equal(result.tier, 3) @@ -452,173 +406,48 @@ test("runBatchCleanup: at force threshold (90%) with >= 2 old-gen blocks → tie assert.equal(state.prune.messages.activeBlockIds.size, 1) }) -test("runBatchCleanup: modelContextLimit undefined → noop", () => { +test("runBatchCleanup: at 100% with < 2 old-gen blocks → noop", () => { const blocks = [ - makeBlock({ blockId: 1, anchorMessageId: "a1", summary: wrapCompressedSummary(1, "one") }), - makeBlock({ blockId: 2, runId: 2, anchorMessageId: "a2", summary: wrapCompressedSummary(2, "two") }), + makeBlock({ blockId: 1, anchorMessageId: "a1", summary: wrapCompressedSummary(1, "one"), generation: "old" }), ] - const state = makeState(blocks, { modelContextLimit: undefined, marked: [1, 2] }) - const messages: WithParts[] = [makeAssistantMessage("a1", 999999)] + const state = makeState(blocks, { modelContextLimit: 1000 }) + const messages: WithParts[] = [makeAssistantMessage("a1", 1000)] const result = runBatchCleanup(state, buildConfig(), logger, messages) assert.equal(result.tier, 0) assert.equal(result.action, "none") assert.equal(result.mergedCount, 0) + assert.equal(state.prune.messages.activeBlockIds.size, 1) }) -test("runBatchCleanup: tier ordering — force takes precedence over high and low at 95%", () => { - const blocks = [ - makeBlock({ - blockId: 1, - anchorMessageId: "a1", - summary: wrapCompressedSummary(1, "one"), - generation: "old", - }), - makeBlock({ - blockId: 2, - runId: 2, - anchorMessageId: "a2", - summary: wrapCompressedSummary(2, "two"), - generation: "old", - }), - ] - const state = makeState(blocks, { modelContextLimit: 1000, marked: [1, 2] }) - const messages: WithParts[] = [makeAssistantMessage("a1", 950)] - - const result = runBatchCleanup(state, buildConfig(), logger, messages) - assert.equal(result.tier, 3, "force tier must win over high/low when usage >= 90%") - assert.equal(result.action, "merge") -}) - -test("runBatchCleanup: at high threshold with unmarked old-gen → tier 1 nudge (mark guidance, fixes chicken-and-egg)", () => { - const blocks = [ - makeBlock({ - blockId: 1, - anchorMessageId: "a1", - summary: wrapCompressedSummary(1, "one"), - generation: "old", - }), - makeBlock({ - blockId: 2, - runId: 2, - anchorMessageId: "a2", - summary: wrapCompressedSummary(2, "two"), - generation: "old", - }), - ] - const state = makeState(blocks, { modelContextLimit: 1000 }) - const messages: WithParts[] = [makeAssistantMessage("a1", 800)] - - const result = runBatchCleanup(state, buildConfig(), logger, messages) - assert.equal(result.tier, 1, "should nudge even without marks — fixes chicken-and-egg deadlock") - assert.equal(result.action, "nudge") - assert.ok(result.nudgeText, "nudge text should be provided") - assert.ok(result.nudgeText!.includes("mark_block"), "should guide model to use mark_block") - assert.ok(result.nudgeText!.includes("b1"), "should reference old-gen blocks") - assert.ok(result.nudgeText!.includes("b2")) -}) - -test("runBatchCleanup: tier 1b nudge — no marks, old-gen blocks → guides marking", () => { +test("runBatchCleanup: modelContextLimit undefined → noop", () => { const blocks = [ - makeBlock({ blockId: 1, anchorMessageId: "a1", summary: wrapCompressedSummary(1, "one"), generation: "old" }), - makeBlock({ blockId: 2, runId: 2, anchorMessageId: "a2", summary: wrapCompressedSummary(2, "two"), generation: "old" }), - makeBlock({ blockId: 3, runId: 3, anchorMessageId: "a3", summary: wrapCompressedSummary(3, "three"), generation: "old" }), + makeBlock({ blockId: 1, anchorMessageId: "a1", summary: wrapCompressedSummary(1, "one") }), + makeBlock({ blockId: 2, runId: 2, anchorMessageId: "a2", summary: wrapCompressedSummary(2, "two") }), ] - const state = makeState(blocks, { modelContextLimit: 1000 }) - const messages: WithParts[] = [makeAssistantMessage("a1", 560)] + const state = makeState(blocks, { modelContextLimit: undefined }) + const messages: WithParts[] = [makeAssistantMessage("a1", 999999)] const result = runBatchCleanup(state, buildConfig(), logger, messages) - assert.equal(result.tier, 1) - assert.equal(result.action, "nudge") - assert.ok(result.nudgeText!.includes("mark_block")) - assert.ok(result.nudgeText!.includes("3 old-gen")) - assert.ok(!result.nudgeText!.includes("🔥"), "should not show escalation emoji without marks") + assert.equal(result.tier, 0) + assert.equal(result.action, "none") + assert.equal(result.mergedCount, 0) }) -test("runBatchCleanup: tier 1 escalation — ≥3 marked at ≥40% → urges active compress", () => { +test("runBatchCleanup: mark tiers removed — marked blocks below 100% → noop (no nudge, no merge)", () => { const blocks = [ makeBlock({ blockId: 1, anchorMessageId: "a1", summary: wrapCompressedSummary(1, "one"), generation: "old" }), makeBlock({ blockId: 2, runId: 2, anchorMessageId: "a2", summary: wrapCompressedSummary(2, "two"), generation: "old" }), makeBlock({ blockId: 3, runId: 3, anchorMessageId: "a3", summary: wrapCompressedSummary(3, "three"), generation: "old" }), - makeBlock({ blockId: 4, runId: 4, anchorMessageId: "a4", summary: wrapCompressedSummary(4, "four"), generation: "old" }), ] + // Legacy marks that would previously have triggered tier 1/2 — now ignored. const state = makeState(blocks, { modelContextLimit: 1000, marked: [1, 2, 3] }) - const messages: WithParts[] = [makeAssistantMessage("a1", 560)] - - const result = runBatchCleanup(state, buildConfig(), logger, messages) - assert.equal(result.tier, 1) - assert.equal(result.action, "nudge") - assert.ok(result.nudgeText!.includes("🔥"), "should show escalation indicator") - assert.ok(result.nudgeText!.includes("3/4"), "should show marked/total ratio") - assert.ok(result.nudgeText!.includes("75%"), "should show percentage") - assert.ok(result.nudgeText!.includes("compress"), "should urge compress action") - assert.ok(result.nudgeText!.includes("b1") && result.nudgeText!.includes("b3"), "should reference range") -}) - -test("runBatchCleanup: tier 1 count gate — 2 marked (100% ratio) but < 3 count → no escalation", () => { - const blocks = [ - makeBlock({ blockId: 1, anchorMessageId: "a1", summary: wrapCompressedSummary(1, "one"), generation: "old" }), - makeBlock({ blockId: 2, runId: 2, anchorMessageId: "a2", summary: wrapCompressedSummary(2, "two"), generation: "old" }), - ] - const state = makeState(blocks, { modelContextLimit: 1000, marked: [1, 2] }) - const messages: WithParts[] = [makeAssistantMessage("a1", 560)] - - const result = runBatchCleanup(state, buildConfig(), logger, messages) - assert.equal(result.tier, 1) - assert.ok(result.nudgeText!.includes("⚠️"), "should show some-marks indicator, not escalation") - assert.ok(!result.nudgeText!.includes("🔥"), "should NOT escalate with only 2 marked blocks") -}) - -test("runBatchCleanup: tier 1 ratio gate — 3 marked out of 10 (30%) → no escalation", () => { - const blocks: CompressionBlock[] = [] - for (let i = 1; i <= 10; i++) { - blocks.push(makeBlock({ - blockId: i, - runId: i, - anchorMessageId: `a${i}`, - summary: wrapCompressedSummary(i, `block ${i}`), - generation: "old", - })) - } - const state = makeState(blocks, { modelContextLimit: 1000, marked: [1, 2, 3] }) - const messages: WithParts[] = [makeAssistantMessage("a1", 560)] - - const result = runBatchCleanup(state, buildConfig(), logger, messages) - assert.equal(result.tier, 1) - assert.ok(result.nudgeText!.includes("⚠️"), "should show some-marks indicator, not escalation") - assert.ok(!result.nudgeText!.includes("🔥"), "should NOT escalate with 30% ratio < 40% threshold") - assert.ok(result.nudgeText!.includes("b1"), "should still reference marked blocks") -}) - -test("runBatchCleanup: young-gen block marked → escalation ratio uses old-gen subset only", () => { - const blocks = [ - makeBlock({ blockId: 1, anchorMessageId: "a1", summary: wrapCompressedSummary(1, "one"), generation: "old" }), - makeBlock({ blockId: 2, runId: 2, anchorMessageId: "a2", summary: wrapCompressedSummary(2, "two"), generation: "old" }), - makeBlock({ blockId: 3, runId: 3, anchorMessageId: "a3", summary: wrapCompressedSummary(3, "three"), generation: "old" }), - makeBlock({ blockId: 4, runId: 4, anchorMessageId: "a4", summary: wrapCompressedSummary(4, "four"), generation: "old" }), - makeBlock({ blockId: 5, runId: 5, anchorMessageId: "a5", summary: wrapCompressedSummary(5, "young"), generation: "young" }), - ] - // Mark 2 old-gen + 1 young-gen = 3 total, but only 2 old-gen - const state = makeState(blocks, { modelContextLimit: 1000, marked: [1, 2, 5] }) - const messages: WithParts[] = [makeAssistantMessage("a1", 560)] + const messages: WithParts[] = [makeAssistantMessage("a1", 800)] const result = runBatchCleanup(state, buildConfig(), logger, messages) - assert.equal(result.tier, 1) - assert.ok(!result.nudgeText!.includes("🔥"), "should NOT escalate: only 2 old-gen marked < 3 minimum") - assert.ok(result.nudgeText!.includes("⚠️"), "should show some-marks indicator") -}) - -test("collectActiveMarkedBlocks: sweeps stale marks for deactivated blocks", () => { - const block1 = makeBlock({ blockId: 1, anchorMessageId: "a1", summary: wrapCompressedSummary(1, "one"), generation: "old" }) - const block2 = makeBlock({ blockId: 2, runId: 2, anchorMessageId: "a2", summary: wrapCompressedSummary(2, "two"), generation: "old" }) - const state = makeState([block1, block2], { modelContextLimit: 1000, marked: [1, 2] }) - - // Simulate block 2 being deactivated by something other than merge/unmark - block2.active = false - - const messages: WithParts[] = [makeAssistantMessage("a1", 560)] - runBatchCleanup(state, buildConfig(), logger, messages) - - assert.equal(state.prune.messages.markedForCleanup.has(2), false, "stale mark for deactivated block should be swept") - assert.equal(state.prune.messages.markedForCleanup.has(1), true, "active block mark should remain") + assert.equal(result.tier, 0, "no nudge/merge below 100% even with marks") + assert.equal(result.action, "none") + assert.equal(result.mergedCount, 0) + assert.ok(!result.nudgeText, "no nudge text — mark_block nudge is retired") + assert.equal(state.prune.messages.activeBlockIds.size, 3) }) diff --git a/tests/prune.test.ts b/tests/prune.test.ts index 410eb8b..e7e2be9 100644 --- a/tests/prune.test.ts +++ b/tests/prune.test.ts @@ -587,3 +587,109 @@ test("prune preserves message order for surviving messages", () => { assert.ok(m1Idx < m3Idx, "m1 should come before m3") assert.ok(m3Idx < m4Idx, "m3 should come before m4") }) + +// ===================================================================== +// stripStepMarkers — step-start removal + step-finish truncation +// ===================================================================== + +function stepStartPart(msgId: string, id: string) { + return { id, messageID: msgId, sessionID: SID, type: "step-start" as const } +} + +function stepFinishPart(msgId: string, id: string, reason: string) { + return { id, messageID: msgId, sessionID: SID, type: "step-finish" as const, reason } +} + +test("stripStepMarkers removes step-start parts entirely", () => { + const state = createSessionState() + const messages: WithParts[] = [ + assistantMessage("a1", 1, [ + stepStartPart("a1", "a1-ss"), + textPart("a1", "a1-t", "real content"), + ]), + ] + + prune(state, logger, buildConfig(), messages) + + const types = messages[0]!.parts.map((p: any) => p.type) + assert.ok(!types.includes("step-start"), "step-start should be removed") + assert.ok(types.includes("text"), "text part should remain") +}) + +test("stripStepMarkers truncates long step-finish reason to 50 chars", () => { + const state = createSessionState() + const longReason = "x".repeat(155) + const messages: WithParts[] = [ + assistantMessage("a1", 1, [ + stepFinishPart("a1", "a1-sf", longReason), + ]), + ] + + prune(state, logger, buildConfig(), messages) + + const sf = messages[0]!.parts.find((p: any) => p.type === "step-finish") as any + assert.ok(sf, "step-finish part should remain") + assert.equal(sf.reason.length, 53, "reason should be 50 chars + '...'") + assert.ok(sf.reason.endsWith("..."), "truncated reason should end with '...'") +}) + +test("stripStepMarkers preserves short step-finish reason unchanged", () => { + const state = createSessionState() + const messages: WithParts[] = [ + assistantMessage("a1", 1, [ + stepFinishPart("a1", "a1-sf", "short reason"), + ]), + ] + + prune(state, logger, buildConfig(), messages) + + const sf = messages[0]!.parts.find((p: any) => p.type === "step-finish") as any + assert.equal(sf.reason, "short reason", "short reason should be preserved") +}) + +test("stripStepMarkers is idempotent: second run keeps parts reference stable", () => { + const state = createSessionState() + const longReason = "y".repeat(120) + const messages: WithParts[] = [ + assistantMessage("a1", 1, [ + stepStartPart("a1", "a1-ss"), + stepFinishPart("a1", "a1-sf", longReason), + textPart("a1", "a1-t", "keep me"), + ]), + ] + + prune(state, logger, buildConfig(), messages) + const partsRefAfterFirst = messages[0]!.parts + const reasonAfterFirst = (partsRefAfterFirst.find((p: any) => p.type === "step-finish") as any).reason + + // Second pass over already-stripped messages + prune(state, logger, buildConfig(), messages) + + // Prefix-cache invariant: parts array must NOT be reassigned on idempotent re-run + assert.equal( + messages[0]!.parts, + partsRefAfterFirst, + "parts array reference must stay stable on idempotent re-run (prefix cache)", + ) + const reasonAfterSecond = (messages[0]!.parts.find((p: any) => p.type === "step-finish") as any).reason + assert.equal(reasonAfterSecond, reasonAfterFirst, "reason must be byte-identical on re-run") +}) + +test("stripStepMarkers leaves messages without step markers untouched", () => { + const state = createSessionState() + const messages: WithParts[] = [ + assistantMessage("a1", 1, [ + textPart("a1", "a1-t", "plain text only"), + toolPart("call-1", "bash", "output"), + ]), + ] + const originalParts = messages[0]!.parts + + prune(state, logger, buildConfig(), messages) + + assert.equal( + messages[0]!.parts, + originalParts, + "parts array reference unchanged when no step markers present", + ) +})