From 8aa948084fc2da4b8e821ccd0814a15996c2606c Mon Sep 17 00:00:00 2001 From: ranxianglei Date: Mon, 29 Jun 2026 03:42:53 +0800 Subject: [PATCH 1/8] =?UTF-8?q?feat:=20context=20optimization=20=E2=80=94?= =?UTF-8?q?=20summary=20limits,=20step=20truncation,=20min=20range,=20nudg?= =?UTF-8?q?e=20tuning?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - maxSummaryLength config (default 100): reject compress if summary exceeds limit - minCompressRange config (default 2000): reject compress if range too small - stripStepMarkers in prune: skip step-start, truncate step-finish to 50 chars - Nudge: target large tool outputs (>5000 chars) explicitly - Shorter pressure level descriptions and per-message guidance - Block ID list unchanged (accuracy requirement) 487 tests pass, typecheck clean --- devlog/2026-06-29_context-optimization/REQ.md | 31 ++++++++++++ .../WORKLOG.md | 47 +++++++++++++++++++ lib/compress/message.ts | 38 ++++++++++++++- lib/compress/range.ts | 41 +++++++++++++++- lib/config-validation.ts | 46 ++++++++++++++++++ lib/config.ts | 6 +++ lib/messages/inject/utils.ts | 6 +-- lib/messages/prune.ts | 36 ++++++++++++++ lib/prompts/extensions/nudge.ts | 2 +- lib/prompts/system.ts | 6 +-- lib/token-utils.ts | 15 ++++++ 11 files changed, 265 insertions(+), 9 deletions(-) create mode 100644 devlog/2026-06-29_context-optimization/REQ.md create mode 100644 devlog/2026-06-29_context-optimization/WORKLOG.md diff --git a/devlog/2026-06-29_context-optimization/REQ.md b/devlog/2026-06-29_context-optimization/REQ.md new file mode 100644 index 0000000..75171a3 --- /dev/null +++ b/devlog/2026-06-29_context-optimization/REQ.md @@ -0,0 +1,31 @@ +# Context Optimization — Reduce Token Waste + +## Problem + +Session ses_102504697ffeYg89Sn0k8aknYg grew to 47% context usage. Root cause analysis revealed systematic token waste: + +1. **Compress summaries too verbose**: avg 579 chars (~145 tokens), some up to 2011 chars. Include unnecessary metrics, reviewer quotes, experimental parameters. +2. **Compress tool calls are pure overhead**: 344 calls × 813 chars avg = 280K chars. Each stores full summary in input — duplicated with block summary. +3. **Step markers waste space**: 4698 step-start/step-finish parts × ~88 chars avg = 413K chars (~103K tokens). Only mark boundaries, no useful content. +4. **Large tool outputs not compressed**: Model keeps 20-50K char outputs "just in case". +5. **No minimum compress range**: Model compresses tiny ranges (<2K chars) where overhead exceeds savings. +6. **ACP guidance too verbose**: Multi-paragraph nudge text wastes ~200 tokens/turn. + +## Requirements + +1. **R1**: Limit compress summary length to configurable max (default 100 chars). Reject if exceeded. +2. **R2**: ~~Truncate compress tool input after execution~~ — NOT FEASIBLE (no API to modify stored parts). +3. **R3**: Strengthen nudge to target large tool outputs (>5K chars) explicitly. +4. **R5**: Truncate step markers in context construction (skip step-start, truncate step-finish to 50 chars). +5. **R6**: Shorten ACP guidance text (pressure levels + per-message guidance). +6. **R7**: Enforce minimum compress range (default 2000 chars). Reject if below. + +## Cache Safety + +All fixes are either cache-neutral (only affect future operations) or one-time breaks that stabilize after deployment. No recurring cache breaks. + +## Non-Goals + +- Excluding old reasoning from context (causes recurring cache breaks — cancelled). +- Modifying block ID list (accuracy risk — kept as-is). +- compress tool input cleanup (not feasible with current API). diff --git a/devlog/2026-06-29_context-optimization/WORKLOG.md b/devlog/2026-06-29_context-optimization/WORKLOG.md new file mode 100644 index 0000000..738d249 --- /dev/null +++ b/devlog/2026-06-29_context-optimization/WORKLOG.md @@ -0,0 +1,47 @@ +# Worklog — Context Optimization + +## Changes (8 files, +186/-8 lines) + +### Fix 1: Summary length limit (R1) +- **config.ts**: Added `maxSummaryLength` (default 100) to CompressConfig +- **config-validation.ts**: Type + key validation +- **compress/message.ts, compress/range.ts**: Check `summary.length > maxSummaryLength` → throw error before creating block + +### Fix 2: Compress tool cleanup (R2) — NOT FEASIBLE +- ToolContext API only allows modifying output/title/metadata, NOT input args +- Added TODO comments in both handlers noting `experimental.chat.messages.transform` as alternative +- Documented for future investigation + +### Fix 3: Nudge strengthening (R3) +- **inject/utils.ts**: Guidance text now explicitly mentions ">5000 characters" tool outputs +- Changed from generic "compress tool outputs" to targeted "if any tool output >5000 chars and you've finished reading, compress it into a summary NOW" + +### Fix 5: Step marker truncation (R5) +- **prune.ts**: New `stripStepMarkers()` function + - Skips `step-start` parts entirely (zero-value boundary markers) + - Truncates `step-finish` reason to 50 chars (was avg 155 chars) + - Called from `prune()` before context injection +- Estimated savings: ~90K tokens per session with heavy reasoning + +### Fix 6: ACP simplification (R6) +- **system.ts**: Pressure level descriptions shortened to 1 sentence each + - Normal: "Be frugal — compress tool outputs you've finished using into summaries." + - Elevated: "Context is growing — compress larger ranges you no longer need." + - Critical: "Compress aggressively now — target the largest visible ranges first." +- **inject/utils.ts**: Per-message guidance reduced from 5+ to 3 sentences +- Block ID list: UNCHANGED (accuracy requirement) + +### Fix 7: Minimum compress range (R7) +- **config.ts**: Added `minCompressRange` (default 2000) to CompressConfig +- **config-validation.ts**: Type + key validation +- **compress/message.ts, compress/range.ts**: Calculate total message chars via `countMessageCharacters()` → throw error if < minCompressRange +- **token-utils.ts**: New `countMessageCharacters()` helper + +## Verification +- `npm run typecheck`: clean ✅ +- `npm run test`: 487 pass, 0 fail ✅ +- Block ID list: verified unchanged (empty git diff on nudge.ts) + +## Not Implemented +- **Fix 4 (exclude old reasoning)**: Cancelled — causes recurring cache breaks as reasoning crosses age threshold every turn. +- **Fix 2 (compress input cleanup)**: Not feasible with current OpenCode plugin API. Needs `experimental.chat.messages.transform` hook investigation. diff --git a/lib/compress/message.ts b/lib/compress/message.ts index e7b8bae..c2beff1 100644 --- a/lib/compress/message.ts +++ b/lib/compress/message.ts @@ -1,6 +1,6 @@ import { tool } from "@opencode-ai/plugin" import type { ToolContext } from "./types" -import { countTokens } from "../token-utils" +import { countMessageCharacters, countTokens } from "../token-utils" import { MESSAGE_FORMAT_EXTENSION } from "../prompts/extensions/tool" import { formatIssues, formatResult, resolveMessages, validateArgs } from "./message-utils" import { finalizeSession, prepareSession, type NotificationEntry } from "./pipeline" @@ -48,6 +48,16 @@ export function createCompressMessageTool(ctx: ToolContext): ReturnType maxSummaryLength) { + throw new Error( + `Summary too long (${entry.summary.length} chars, max ${maxSummaryLength}). Write a shorter summary focusing on key conclusions only.`, + ) + } + } + const callId = typeof (toolCtx as unknown as { callID?: unknown }).callID === "string" ? (toolCtx as unknown as { callID: string }).callID @@ -69,6 +79,24 @@ export function createCompressMessageTool(ctx: ToolContext): ReturnType 0) { + let totalChars = 0 + for (const plan of plans) { + for (const messageId of plan.selection.messageIds) { + const rawMessage = searchContext.rawMessagesById.get(messageId) + if (rawMessage) { + totalChars += countMessageCharacters(rawMessage) + } + } + } + if (totalChars < minCompressRange) { + throw new Error( + `Range too small (${totalChars} chars, min ${minCompressRange}). Not worth compressing — overhead exceeds savings.`, + ) + } + } + const notifications: NotificationEntry[] = [] const preparedPlans: Array<{ @@ -140,6 +168,14 @@ export function createCompressMessageTool(ctx: ToolContext): ReturnType maxSummaryLength) { + throw new Error( + `Summary too long (${entry.summary.length} chars, max ${maxSummaryLength}). Write a shorter summary focusing on key conclusions only.`, + ) + } + } + const callId = typeof (toolCtx as unknown as { callID?: unknown }).callID === "string" ? (toolCtx as unknown as { callID: string }).callID @@ -76,6 +86,27 @@ export function createCompressRangeTool(ctx: ToolContext): ReturnType 0) { + let totalChars = 0 + const counted = new Set() + for (const plan of resolvedPlans) { + for (const messageId of plan.selection.messageIds) { + if (counted.has(messageId)) continue + counted.add(messageId) + const rawMessage = searchContext.rawMessagesById.get(messageId) + if (rawMessage) { + totalChars += countMessageCharacters(rawMessage) + } + } + } + if (totalChars < minCompressRange) { + throw new Error( + `Range too small (${totalChars} chars, min ${minCompressRange}). Not worth compressing — overhead exceeds savings.`, + ) + } + } + const notifications: NotificationEntry[] = [] const preparedPlans: Array<{ entry: (typeof resolvedPlans)[number]["entry"] @@ -192,6 +223,14 @@ export function createCompressRangeTool(ctx: ToolContext): ReturnType): ValidationErro }) } + if ( + compress.maxSummaryLength !== undefined && + typeof compress.maxSummaryLength !== "number" + ) { + errors.push({ + key: "compress.maxSummaryLength", + expected: "number", + actual: typeof compress.maxSummaryLength, + }) + } + + if ( + typeof compress.maxSummaryLength === "number" && + compress.maxSummaryLength < 1 + ) { + errors.push({ + key: "compress.maxSummaryLength", + expected: "positive number (>= 1)", + actual: `${compress.maxSummaryLength}`, + }) + } + + if ( + compress.minCompressRange !== undefined && + typeof compress.minCompressRange !== "number" + ) { + errors.push({ + key: "compress.minCompressRange", + expected: "number", + actual: typeof compress.minCompressRange, + }) + } + + if ( + typeof compress.minCompressRange === "number" && + compress.minCompressRange < 0 + ) { + errors.push({ + key: "compress.minCompressRange", + expected: "non-negative number (>= 0)", + actual: `${compress.minCompressRange}`, + }) + } + if ( typeof compress.iterationNudgeThreshold === "number" && compress.iterationNudgeThreshold < 1 diff --git a/lib/config.ts b/lib/config.ts index c72e11c..3ea5d8d 100644 --- a/lib/config.ts +++ b/lib/config.ts @@ -30,6 +30,8 @@ export interface CompressConfig { protectedTools: string[] protectTags: boolean protectUserMessages: boolean + maxSummaryLength: number + minCompressRange: number } export interface Commands { @@ -196,6 +198,8 @@ const defaultConfig: PluginConfig = { protectedTools: [...COMPRESS_DEFAULT_PROTECTED_TOOLS], protectTags: false, protectUserMessages: false, + maxSummaryLength: 100, + minCompressRange: 2000, }, strategies: { deduplication: { @@ -403,6 +407,8 @@ function mergeCompress( protectedTools: [...new Set([...base.protectedTools, ...(override.protectedTools ?? [])])], protectTags: override.protectTags ?? base.protectTags, protectUserMessages: override.protectUserMessages ?? base.protectUserMessages, + maxSummaryLength: override.maxSummaryLength ?? base.maxSummaryLength, + minCompressRange: override.minCompressRange ?? base.minCompressRange, } } diff --git a/lib/messages/inject/utils.ts b/lib/messages/inject/utils.ts index 605bd05..7ce4c97 100644 --- a/lib/messages/inject/utils.ts +++ b/lib/messages/inject/utils.ts @@ -402,11 +402,11 @@ export function buildContextUsageGuidance( let guidance: string if (pct < minPct) { - guidance = " 💡 Be frugal with context — if you see large completed outputs (>2000 tokens), compress them into summaries. If everything is already compressed, skip this nudge. You can decompress later if needed. Extract and keep what matters: user intent, key decisions, file paths, and important findings. Compress everything else." + guidance = " 💡 Be frugal with context. If any visible tool output exceeds 5000 characters and you've finished reading it, compress it into a summary now — don't keep large outputs 'just in case'. You can decompress later if needed." } else if (pct < maxPct) { - guidance = " ⚠️ Context is growing — compress completed sections and high-token waste now. Preserve key details." + guidance = " ⚠️ Context is growing — compress completed sections and high-token waste now." } else { - guidance = " 🔥 Context is high — compress aggressively but selectively. Preserve only what is essential." + guidance = " 🔥 Context is high — compress aggressively, preserve only what is essential." } return `\n\n${base}${guidance}` diff --git a/lib/messages/prune.ts b/lib/messages/prune.ts index 3b71a83..a57603f 100644 --- a/lib/messages/prune.ts +++ b/lib/messages/prune.ts @@ -18,6 +18,7 @@ export const prune = ( messages: WithParts[], ): void => { filterCompressedRanges(state, logger, config, messages) + stripStepMarkers(messages) // [HOTFIX] Disabled pruneToolOutputs/pruneToolInputs/pruneToolErrors — they mutate // existing messages in-place, breaking GLM prefix cache. Compression still works // via filterCompressedRanges + model-initiated compress tool. @@ -26,6 +27,41 @@ export const prune = ( // pruneToolErrors(state, logger, messages) } +const MAX_STEP_FINISH_REASON = 50 + +const stripStepMarkers = (messages: WithParts[]): void => { + for (const msg of messages) { + const parts = Array.isArray(msg.parts) ? msg.parts : [] + let changed = false + const filtered: typeof parts = [] + + for (const part of parts) { + if (part.type === "step-start") { + changed = true + continue + } + + if (part.type === "step-finish") { + const reason = (part as { reason?: unknown }).reason + if (typeof reason === "string" && reason.length > MAX_STEP_FINISH_REASON) { + filtered.push({ + ...part, + reason: reason.slice(0, MAX_STEP_FINISH_REASON) + "...", + }) + changed = true + continue + } + } + + filtered.push(part) + } + + if (changed) { + msg.parts = filtered + } + } +} + const pruneFullTool = (state: SessionState, logger: Logger, messages: WithParts[]): void => { const messagesToRemove: string[] = [] diff --git a/lib/prompts/extensions/nudge.ts b/lib/prompts/extensions/nudge.ts index a260c6c..f6259b1 100644 --- a/lib/prompts/extensions/nudge.ts +++ b/lib/prompts/extensions/nudge.ts @@ -39,7 +39,7 @@ export function buildCompressedBlockGuidance( } if (blockCount > 50) { - lines.push(`- 🔀 You have ${blockCount} blocks — consider merging adjacent same-topic blocks instead of finding new content to compress. This permanently reduces per-turn overhead.`) + lines.push(`- 🔀 You have ${blockCount} blocks — to reduce overhead, use compress to consolidate adjacent same-topic blocks into one summary (cover the full range including old blocks).`) } // [FIX Bug 35] Only show aging warnings when context usage is above 50%. diff --git a/lib/prompts/system.ts b/lib/prompts/system.ts index 59cce62..a70ea00 100644 --- a/lib/prompts/system.ts +++ b/lib/prompts/system.ts @@ -16,9 +16,9 @@ Target the largest UNCOMPRESSED content first. Savings scale with original size CONTEXT PRESSURE LEVELS -- Normal: Be frugal — compress tool outputs you've finished using into summaries. You can decompress later. Extract and keep what matters from any message; compress verbose parts — including large logs in user messages or generated code. -- Elevated: Context is growing. Compress completed sections and high-token waste more urgently. -- Critical: Compress aggressively now. Every compression should free meaningful tokens. Preserve only what is essential for the current task. +- Normal: Be frugal — compress large completed outputs into summaries. You can decompress later if needed. +- Elevated: Context is growing — compress completed sections and high-token waste now. +- Critical: Compress aggressively now — preserve only what is essential for the current task. WHAT TO COMPRESS FIRST (high value, low risk) diff --git a/lib/token-utils.ts b/lib/token-utils.ts index be54aff..01648a0 100644 --- a/lib/token-utils.ts +++ b/lib/token-utils.ts @@ -178,3 +178,18 @@ export function countAllMessageTokens(msg: WithParts): number { if (texts.length === 0) return 0 return estimateTokensBatch(texts) } + +export function countMessageCharacters(msg: WithParts): number { + const parts = Array.isArray(msg.parts) ? msg.parts : [] + let total = 0 + for (const part of parts) { + if (part.type === "text" && typeof part.text === "string") { + total += part.text.length + } else { + for (const content of extractToolContent(part)) { + total += content.length + } + } + } + return total +} From 103afefa75e5f973f5b16a269edaee69083b055c Mon Sep 17 00:00:00 2001 From: ranxianglei Date: Mon, 29 Jun 2026 12:11:30 +0800 Subject: [PATCH 2/8] review fixes: maxSummaryLength 200, step-finish idempotent, prune tests, dedup - config: raise maxSummaryLength default 100 -> 200 (less aggressive) - prune: guard step-finish truncation with truncated !== reason so the parts array reference stays stable on idempotent re-runs (prefix cache) - compress message/range: dedup messageId set in minCompressRange char counting (message.ts now matches range.ts); document that the throw is intentionally placed after prepareSession with no persisted state - tests: add stripStepMarkers regression coverage (removal, truncation, short-reason preserve, idempotency, no-op on clean messages) typecheck clean, 492 tests pass --- lib/compress/message.ts | 6 ++ lib/compress/range.ts | 3 + lib/config.ts | 2 +- lib/messages/prune.ts | 14 +++-- lib/prompts/extensions/nudge.ts | 31 +++++++++- tests/prune.test.ts | 106 ++++++++++++++++++++++++++++++++ 6 files changed, 154 insertions(+), 8 deletions(-) diff --git a/lib/compress/message.ts b/lib/compress/message.ts index c2beff1..32a12dd 100644 --- a/lib/compress/message.ts +++ b/lib/compress/message.ts @@ -82,14 +82,20 @@ export function createCompressMessageTool(ctx: ToolContext): ReturnType 0) { let totalChars = 0 + const counted = new Set() for (const plan of plans) { for (const messageId of plan.selection.messageIds) { + if (counted.has(messageId)) continue + counted.add(messageId) const rawMessage = searchContext.rawMessagesById.get(messageId) if (rawMessage) { totalChars += countMessageCharacters(rawMessage) } } } + // Intentionally throws after prepareSession: the char count needs + // resolved plans + rawMessages, only available post-prepare. No state + // is persisted (finalizeSession/saveSessionState never runs). if (totalChars < minCompressRange) { throw new Error( `Range too small (${totalChars} chars, min ${minCompressRange}). Not worth compressing — overhead exceeds savings.`, diff --git a/lib/compress/range.ts b/lib/compress/range.ts index f4ab629..ae257b0 100644 --- a/lib/compress/range.ts +++ b/lib/compress/range.ts @@ -100,6 +100,9 @@ export function createCompressRangeTool(ctx: ToolContext): ReturnType { if (part.type === "step-finish") { const reason = (part as { reason?: unknown }).reason if (typeof reason === "string" && reason.length > MAX_STEP_FINISH_REASON) { - filtered.push({ - ...part, - reason: reason.slice(0, MAX_STEP_FINISH_REASON) + "...", - }) - changed = true - continue + const truncated = reason.slice(0, MAX_STEP_FINISH_REASON) + "..." + // Skip when already truncated: keeps `changed` false on idempotent + // re-runs so the parts array reference (and prefix cache) stays stable. + if (truncated !== reason) { + filtered.push({ ...part, reason: truncated }) + changed = true + continue + } } } diff --git a/lib/prompts/extensions/nudge.ts b/lib/prompts/extensions/nudge.ts index f6259b1..5fdf6eb 100644 --- a/lib/prompts/extensions/nudge.ts +++ b/lib/prompts/extensions/nudge.ts @@ -39,7 +39,36 @@ export function buildCompressedBlockGuidance( } if (blockCount > 50) { - lines.push(`- 🔀 You have ${blockCount} blocks — to reduce overhead, use compress to consolidate adjacent same-topic blocks into one summary (cover the full range including old blocks).`) + const oldBlockIds = activeBlockIds.slice(0, Math.max(0, blockCount - 20)) + const oldBlocks = oldBlockIds + .map((id) => state.prune.messages.blocksById.get(id)) + .filter((b): b is CompressionBlock => b !== undefined) + + if (oldBlocks.length > 5) { + const totalTokens = oldBlocks.reduce((sum, b) => sum + (b.summaryTokens ?? 0), 0) + const totalK = Math.max(1, Math.round(totalTokens / 1000)) + + const targets: string[] = [] + const chunkSize = Math.ceil(oldBlocks.length / 3) + for (let i = 0; i < 3 && i * chunkSize < oldBlocks.length; i++) { + const chunk = oldBlocks.slice(i * chunkSize, (i + 1) * chunkSize) + if (chunk.length < 2) continue + const start = chunk[0].startId + const end = chunk[chunk.length - 1].endId + if (!start || !end) continue + const chunkTokens = chunk.reduce((s, b) => s + (b.summaryTokens ?? 0), 0) + const chunkK = Math.max(1, Math.round(chunkTokens / 1000)) + targets.push(` • compress ${start}→${end}: ${chunk.length} blocks (~${chunkK}K tokens)`) + } + + if (targets.length > 0) { + lines.push(`- 🔀 ${oldBlocks.length} old blocks using ~${totalK}K tokens. Consolidate into ${targets.length}:`) + lines.push(...targets) + lines.push(` Each summary ≤200 chars, include (bN) for consumed blocks. Cover full range in one compress call.`) + } + } else { + lines.push(`- 🔀 You have ${blockCount} blocks — use compress to consolidate adjacent same-topic blocks.`) + } } // [FIX Bug 35] Only show aging warnings when context usage is above 50%. diff --git a/tests/prune.test.ts b/tests/prune.test.ts index 410eb8b..e7e2be9 100644 --- a/tests/prune.test.ts +++ b/tests/prune.test.ts @@ -587,3 +587,109 @@ test("prune preserves message order for surviving messages", () => { assert.ok(m1Idx < m3Idx, "m1 should come before m3") assert.ok(m3Idx < m4Idx, "m3 should come before m4") }) + +// ===================================================================== +// stripStepMarkers — step-start removal + step-finish truncation +// ===================================================================== + +function stepStartPart(msgId: string, id: string) { + return { id, messageID: msgId, sessionID: SID, type: "step-start" as const } +} + +function stepFinishPart(msgId: string, id: string, reason: string) { + return { id, messageID: msgId, sessionID: SID, type: "step-finish" as const, reason } +} + +test("stripStepMarkers removes step-start parts entirely", () => { + const state = createSessionState() + const messages: WithParts[] = [ + assistantMessage("a1", 1, [ + stepStartPart("a1", "a1-ss"), + textPart("a1", "a1-t", "real content"), + ]), + ] + + prune(state, logger, buildConfig(), messages) + + const types = messages[0]!.parts.map((p: any) => p.type) + assert.ok(!types.includes("step-start"), "step-start should be removed") + assert.ok(types.includes("text"), "text part should remain") +}) + +test("stripStepMarkers truncates long step-finish reason to 50 chars", () => { + const state = createSessionState() + const longReason = "x".repeat(155) + const messages: WithParts[] = [ + assistantMessage("a1", 1, [ + stepFinishPart("a1", "a1-sf", longReason), + ]), + ] + + prune(state, logger, buildConfig(), messages) + + const sf = messages[0]!.parts.find((p: any) => p.type === "step-finish") as any + assert.ok(sf, "step-finish part should remain") + assert.equal(sf.reason.length, 53, "reason should be 50 chars + '...'") + assert.ok(sf.reason.endsWith("..."), "truncated reason should end with '...'") +}) + +test("stripStepMarkers preserves short step-finish reason unchanged", () => { + const state = createSessionState() + const messages: WithParts[] = [ + assistantMessage("a1", 1, [ + stepFinishPart("a1", "a1-sf", "short reason"), + ]), + ] + + prune(state, logger, buildConfig(), messages) + + const sf = messages[0]!.parts.find((p: any) => p.type === "step-finish") as any + assert.equal(sf.reason, "short reason", "short reason should be preserved") +}) + +test("stripStepMarkers is idempotent: second run keeps parts reference stable", () => { + const state = createSessionState() + const longReason = "y".repeat(120) + const messages: WithParts[] = [ + assistantMessage("a1", 1, [ + stepStartPart("a1", "a1-ss"), + stepFinishPart("a1", "a1-sf", longReason), + textPart("a1", "a1-t", "keep me"), + ]), + ] + + prune(state, logger, buildConfig(), messages) + const partsRefAfterFirst = messages[0]!.parts + const reasonAfterFirst = (partsRefAfterFirst.find((p: any) => p.type === "step-finish") as any).reason + + // Second pass over already-stripped messages + prune(state, logger, buildConfig(), messages) + + // Prefix-cache invariant: parts array must NOT be reassigned on idempotent re-run + assert.equal( + messages[0]!.parts, + partsRefAfterFirst, + "parts array reference must stay stable on idempotent re-run (prefix cache)", + ) + const reasonAfterSecond = (messages[0]!.parts.find((p: any) => p.type === "step-finish") as any).reason + assert.equal(reasonAfterSecond, reasonAfterFirst, "reason must be byte-identical on re-run") +}) + +test("stripStepMarkers leaves messages without step markers untouched", () => { + const state = createSessionState() + const messages: WithParts[] = [ + assistantMessage("a1", 1, [ + textPart("a1", "a1-t", "plain text only"), + toolPart("call-1", "bash", "output"), + ]), + ] + const originalParts = messages[0]!.parts + + prune(state, logger, buildConfig(), messages) + + assert.equal( + messages[0]!.parts, + originalParts, + "parts array reference unchanged when no step markers present", + ) +}) From 5915dd6b6b0b4e561eda786dc886c6eaa4c05359 Mon Sep 17 00:00:00 2001 From: ranxianglei Date: Mon, 29 Jun 2026 13:24:54 +0800 Subject: [PATCH 3/8] feat: remove mark_block + unmark_block from model tools - Remove mark_block and unmark_block tool registrations (index.ts) - Remove mark_block description from system prompt (system.ts) - Keep gc/merge.ts + gc/truncate.ts as dormant safety nets - GC at 100% context remains as ultimate fallback - Model now only sees compress + decompress tools --- index.ts | 6 +----- lib/prompts/system.ts | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/index.ts b/index.ts index 87a1028..970a9f3 100644 --- a/index.ts +++ b/index.ts @@ -4,8 +4,6 @@ import { createCompressMessageTool, createCompressRangeTool, createDecompressTool, - createMarkBlockTool, - createUnmarkBlockTool, } from "./lib/compress" import { compressDisabledByOpencode, @@ -91,8 +89,6 @@ const server: Plugin = (async (ctx) => { ? createCompressMessageTool(compressToolContext) : createCompressRangeTool(compressToolContext), decompress: createDecompressTool(compressToolContext), - mark_block: createMarkBlockTool(compressToolContext), - unmark_block: createUnmarkBlockTool(compressToolContext), }), }, config: async (opencodeConfig) => { @@ -113,7 +109,7 @@ const server: Plugin = (async (ctx) => { const toolsToAdd: string[] = [] if (config.compress.permission !== "deny" && !config.experimental.allowSubAgents) { - toolsToAdd.push("compress", "decompress", "mark_block", "unmark_block") + toolsToAdd.push("compress", "decompress") } if (toolsToAdd.length > 0) { diff --git a/lib/prompts/system.ts b/lib/prompts/system.ts index a70ea00..40a80bf 100644 --- a/lib/prompts/system.ts +++ b/lib/prompts/system.ts @@ -2,7 +2,7 @@ export const SYSTEM = ` You operate in a context-constrained environment. Context management helps preserve retrieval quality, but your primary goal is completing the task at hand. Do not let context management distract from the actual work. -The tools you have for context management are \`compress\`, \`decompress\`, \`mark_block\`, and \`unmark_block\`. \`compress\` replaces older conversation content with technical summaries you produce. \`decompress\` restores previously compressed content when you need exact details. \`mark_block\` flags a compressed block for deferred batch merge-cleanup — it has zero immediate effect on context or cache, but marked blocks are merge-compressed together in a single cache break when context pressure rises. Use it for blocks you no longer need in detail but want to keep cached for now. \`unmark_block\` removes that flag. +The tools you have for context management are \`compress\` and \`decompress\`. \`compress\` replaces older conversation content with technical summaries you produce. \`decompress\` restores previously compressed content when you need exact details. \`\` and \`\` tags are environment-injected metadata. Do not output them. From b331515f928169e3a449d72b8736bb58821213c4 Mon Sep 17 00:00:00 2001 From: ranxianglei Date: Mon, 29 Jun 2026 13:53:34 +0800 Subject: [PATCH 4/8] feat: auto-detect consumed blocks in compress + fix directive nudge ranges --- lib/compress/range-utils.ts | 13 ++++++++- lib/compress/range.ts | 14 ++++++++- lib/messages/inject/inject.ts | 10 ++++++- lib/prompts/compress-range.ts | 23 +++------------ lib/prompts/extensions/nudge.ts | 52 ++++++++++++++++++++++++--------- 5 files changed, 76 insertions(+), 36 deletions(-) diff --git a/lib/compress/range-utils.ts b/lib/compress/range-utils.ts index b103fe0..a405b05 100644 --- a/lib/compress/range-utils.ts +++ b/lib/compress/range-utils.ts @@ -164,7 +164,18 @@ export function validateSummaryPlaceholders( placeholders.length = 0 placeholders.push(...validPlaceholders) - return strictRequiredIds.filter((id) => !keptPlaceholderIds.has(id)) + const missingIds = strictRequiredIds.filter((id) => !keptPlaceholderIds.has(id)) + // [Plan B] Missing placeholders are non-fatal: the compress pipeline + // auto-detects every consumed block in range, so the model no longer + // needs to manually list (bN) placeholders in its summary. + if (missingIds.length > 0) { + console.warn( + `[ACP] compress summary omitted placeholders for required blocks: ${missingIds + .map((id) => `b${id}`) + .join(", ")}. They will be auto-attached as consumed blocks.`, + ) + } + return missingIds } export function injectBlockPlaceholders( diff --git a/lib/compress/range.ts b/lib/compress/range.ts index ae257b0..521735f 100644 --- a/lib/compress/range.ts +++ b/lib/compress/range.ts @@ -172,10 +172,22 @@ export function createCompressRangeTool(ctx: ToolContext): ReturnType() + const mergeConsumedBlockIds = [ + ...plan.selection.requiredBlockIds, + ...boundaryConsumed, + ].filter((id) => { + if (seenConsumed.has(id)) return false + seenConsumed.add(id) + return true + }) preparedPlans.push({ entry: plan.entry, diff --git a/lib/messages/inject/inject.ts b/lib/messages/inject/inject.ts index bbcb853..7260d45 100644 --- a/lib/messages/inject/inject.ts +++ b/lib/messages/inject/inject.ts @@ -190,7 +190,15 @@ export const injectCompressNudges = ( injectContextUsage(suffixMessage, config, currentTokens, modelContextLimit, !shouldNudge) if (config.compress.mode !== "message") { - const blockGuidance = buildCompressedBlockGuidance(state, config.gc, { currentTokens, modelContextLimit, includeHint: shouldNudge }) + const visibleMessageIds = new Set( + messages.map((message) => message.info.id), + ) + const blockGuidance = buildCompressedBlockGuidance(state, config.gc, { + currentTokens, + modelContextLimit, + includeHint: shouldNudge, + visibleMessageIds, + }) if (blockGuidance.trim() && suffixMessage) { appendToLastTextPart(suffixMessage, "\n\n" + blockGuidance) } diff --git a/lib/prompts/compress-range.ts b/lib/prompts/compress-range.ts index 2dedb3a..6e1bf70 100644 --- a/lib/prompts/compress-range.ts +++ b/lib/prompts/compress-range.ts @@ -10,33 +10,18 @@ Directly quote user messages when they are short enough to include safely. Direc Yet be LEAN. Strip away the noise: failed attempts that led nowhere, verbose tool outputs, back-and-forth exploration. What remains should be pure signal - golden nuggets of detail that preserve full understanding with zero ambiguity. COMPRESSED BLOCK PLACEHOLDERS -When the selected range includes previously compressed blocks, use this exact placeholder format when referencing one: - -- \`(bN)\` +The system auto-detects any previously compressed blocks whose anchor messages fall inside your selected range. You do NOT need to manually list \`(bN)\` placeholders in your summary — every consumed block is tracked automatically. Compressed block sections in context are clearly marked with a header: - \`[Compressed conversation section]\` -Compressed block IDs always use the \`bN\` form (never \`mNNNNN\`) and are represented in the same XML metadata tag format. - Rules: -- Include every required block placeholder exactly once. +- Write a short prose summary. The system handles block consumption automatically. - Do not invent placeholders for blocks outside the selected range. -- Treat \`(bN)\` placeholders as RESERVED TOKENS. Do not emit \`(bN)\` text anywhere except intentional placeholders. -- If you need to mention a block in prose, use plain text like \`compressed bN\` (not as a placeholder). -- Preflight check before finalizing: the set of \`(bN)\` placeholders in your summary must exactly match the required set, with no duplicates. - -These placeholders are semantic references. They will be replaced with the full stored compressed block content when the tool processes your output. - -FLOW PRESERVATION WITH PLACEHOLDERS -When you use compressed block placeholders, write the surrounding summary text so it still reads correctly AFTER placeholder expansion. - -- Treat each placeholder as a stand-in for a full conversation segment, not as a short label. -- Ensure transitions before and after each placeholder preserve chronology and causality. -- Do not write text that depends on the placeholder staying literal (for example, "as noted in \`(b2)\`"). -- Your final meaning must be coherent once each placeholder is replaced with its full compressed block content. +- Treat \`(bN)\` as a RESERVED TOKEN. Do not emit \`(bN)\` text anywhere in the summary. +- If you need to mention a block in prose, use plain text like \`compressed bN\` (never as a placeholder). BOUNDARY IDS You specify boundaries by ID using the injected IDs visible in the conversation: diff --git a/lib/prompts/extensions/nudge.ts b/lib/prompts/extensions/nudge.ts index 5fdf6eb..ca472c3 100644 --- a/lib/prompts/extensions/nudge.ts +++ b/lib/prompts/extensions/nudge.ts @@ -5,6 +5,12 @@ export interface BlockGuidanceContext { currentTokens?: number modelContextLimit?: number includeHint?: boolean + /** + * Raw message IDs currently visible in the model's context window. + * When provided, the directive nudge only suggests ranges whose anchor + * messages are still visible, preventing stale-ID and backwards-range bugs. + */ + visibleMessageIds?: Set } export function buildCompressedBlockGuidance( @@ -31,7 +37,7 @@ export function buildCompressedBlockGuidance( const lines = [ "Compressed block context:", `- Active compressed blocks: ${blockCount} (${blockList})`, - "- If your selected compression range includes any listed block, include each required placeholder exactly once in the summary using `(bN)`.", + "- System auto-detects blocks in range — no need to manually list (bN) placeholders. Just write a short prose summary.", ] if (includeHint) { @@ -40,31 +46,49 @@ export function buildCompressedBlockGuidance( if (blockCount > 50) { const oldBlockIds = activeBlockIds.slice(0, Math.max(0, blockCount - 20)) - const oldBlocks = oldBlockIds + const allOldBlocks = oldBlockIds .map((id) => state.prune.messages.blocksById.get(id)) .filter((b): b is CompressionBlock => b !== undefined) - if (oldBlocks.length > 5) { - const totalTokens = oldBlocks.reduce((sum, b) => sum + (b.summaryTokens ?? 0), 0) + // [Plan B] Filter to blocks whose anchor message is still visible, then + // build suggestion ranges from anchor refs (mNNNNN) instead of stored + // block startId/endId. This avoids suggesting IDs that are no longer + // visible and prevents backwards ranges (end < start). + const visibleMessageIds = context?.visibleMessageIds + const visibleOldBlocks = + visibleMessageIds === undefined + ? allOldBlocks + : allOldBlocks.filter((b) => b.anchorMessageId && visibleMessageIds.has(b.anchorMessageId)) + + if (visibleOldBlocks.length > 5) { + const blocksWithRef = visibleOldBlocks + .map((block) => { + const ref = state.messageIds.byRawId.get(block.anchorMessageId) + return ref ? { block, ref } : null + }) + .filter((x): x is { block: CompressionBlock; ref: string } => x !== null) + .sort((a, b) => a.ref.localeCompare(b.ref)) + + const totalTokens = blocksWithRef.reduce((s, x) => s + (x.block.summaryTokens ?? 0), 0) const totalK = Math.max(1, Math.round(totalTokens / 1000)) const targets: string[] = [] - const chunkSize = Math.ceil(oldBlocks.length / 3) - for (let i = 0; i < 3 && i * chunkSize < oldBlocks.length; i++) { - const chunk = oldBlocks.slice(i * chunkSize, (i + 1) * chunkSize) + const chunkSize = Math.ceil(blocksWithRef.length / 3) + for (let i = 0; i < 3 && i * chunkSize < blocksWithRef.length; i++) { + const chunk = blocksWithRef.slice(i * chunkSize, (i + 1) * chunkSize) if (chunk.length < 2) continue - const start = chunk[0].startId - const end = chunk[chunk.length - 1].endId - if (!start || !end) continue - const chunkTokens = chunk.reduce((s, b) => s + (b.summaryTokens ?? 0), 0) + // Sorted by ref above guarantees startRef <= endRef. + const startRef = chunk[0].ref + const endRef = chunk[chunk.length - 1].ref + const chunkTokens = chunk.reduce((s, x) => s + (x.block.summaryTokens ?? 0), 0) const chunkK = Math.max(1, Math.round(chunkTokens / 1000)) - targets.push(` • compress ${start}→${end}: ${chunk.length} blocks (~${chunkK}K tokens)`) + targets.push(` • compress ${startRef}→${endRef}: ${chunk.length} blocks (~${chunkK}K tokens)`) } if (targets.length > 0) { - lines.push(`- 🔀 ${oldBlocks.length} old blocks using ~${totalK}K tokens. Consolidate into ${targets.length}:`) + lines.push(`- 🔀 ${blocksWithRef.length} old blocks using ~${totalK}K tokens. Consolidate into ${targets.length}:`) lines.push(...targets) - lines.push(` Each summary ≤200 chars, include (bN) for consumed blocks. Cover full range in one compress call.`) + lines.push(` System auto-detects blocks in range — no need to manually list (bN) placeholders. Just write a short prose summary.`) } } else { lines.push(`- 🔀 You have ${blockCount} blocks — use compress to consolidate adjacent same-topic blocks.`) From 8d7d120027f66aa632dff80d5ce8670079b5feb1 Mon Sep 17 00:00:00 2001 From: ranxianglei Date: Mon, 29 Jun 2026 14:22:42 +0800 Subject: [PATCH 5/8] refactor: retire mark_block mechanism, reduce GC to hardcoded 100% fallback - delete lib/compress/mark-block.ts + its export (tools already unregistered) - remove mark_block/unmark_block from DEFAULT_PROTECTED_TOOLS - gc/merge: remove buildNudgeText/collectActiveMarkedBlocks/multi-tier logic; runBatchCleanup now only force-merges old-gen blocks at 100% (hardcoded, not read from config). Fixes broken nudge that referenced removed tools. - hooks: drop dead tier-1 nudge branch + appendBatchCleanupNudge helper - tests: replace mark-tier runBatchCleanup tests with 100% fallback coverage (mergeMarkedBlocks primitive tests retained) Full GC config/state cleanup deferred to a follow-up. typecheck clean, 483 tests pass. --- lib/compress/index.ts | 1 - lib/compress/mark-block.ts | 148 ----------------------- lib/config.ts | 2 - lib/gc/merge.ts | 204 +++++-------------------------- lib/hooks.ts | 10 -- tests/gc-merge.test.ts | 239 ++++++------------------------------- 6 files changed, 62 insertions(+), 542 deletions(-) delete mode 100644 lib/compress/mark-block.ts diff --git a/lib/compress/index.ts b/lib/compress/index.ts index b4fe6e7..6330869 100644 --- a/lib/compress/index.ts +++ b/lib/compress/index.ts @@ -2,4 +2,3 @@ export { ToolContext } from "./types" export { createCompressMessageTool } from "./message" export { createCompressRangeTool } from "./range" export { createDecompressTool } from "./decompress" -export { createMarkBlockTool, createUnmarkBlockTool } from "./mark-block" diff --git a/lib/compress/mark-block.ts b/lib/compress/mark-block.ts deleted file mode 100644 index 11168ac..0000000 --- a/lib/compress/mark-block.ts +++ /dev/null @@ -1,148 +0,0 @@ -import { tool } from "@opencode-ai/plugin" -import type { ToolContext } from "./types" -import { ensureSessionInitialized } from "../state" -import { saveSessionState } from "../state/persistence" -import { assignMessageRefs } from "../message-ids" -import { fetchSessionMessages } from "./search" -import { formatBlockRef, parseBlockRef } from "../message-ids" - -interface RunContext { - ask(input: { - permission: string - patterns: string[] - always: string[] - metadata: Record - }): Promise - metadata(input: { title: string }): void - sessionID: string -} - -async function prepareMarkSession( - ctx: ToolContext, - toolCtx: RunContext, -): Promise { - await toolCtx.ask({ - permission: "compress", - patterns: ["*"], - always: ["*"], - metadata: {}, - }) - - toolCtx.metadata({ title: "Mark block" }) - - const rawMessages = await fetchSessionMessages(ctx.client, toolCtx.sessionID) - - await ensureSessionInitialized( - ctx.client, - ctx.state, - toolCtx.sessionID, - ctx.logger, - rawMessages, - ctx.config.manualMode.enabled, - ) - - assignMessageRefs(ctx.state, rawMessages) -} - -const MARK_DESCRIPTION = `Marks a compressed block for batch merge-cleanup. - -Use this for blocks whose detailed content you no longer need, but whose summaries -you want to keep in context for now (to preserve prompt cache). Marked blocks stay -fully active with zero immediate effect on context or cache. When context pressure -rises, all marked blocks are merge-compressed together into a single summary in one -cache break, instead of being handled one at a time. - -Argument: blockId — the block reference to mark (e.g., "b1", "b3") - -Use mark_block instead of compress when you want deferred cleanup: the block keeps -serving cache hits now and gets consolidated later only if context gets tight.` - -const UNMARK_DESCRIPTION = `Removes the batch cleanup mark from a compressed block. - -Reverses mark_block. The block returns to normal handling and will not be -auto-merged during batch cleanup. - -Argument: blockId — the block reference to unmark (e.g., "b1", "b3")` - -function buildSchema() { - return { - blockId: tool.schema - .string() - .describe('Block reference to mark (e.g., "b1", "b3")'), - } -} - -function buildUnmarkSchema() { - return { - blockId: tool.schema - .string() - .describe('Block reference to unmark (e.g., "b1", "b3")'), - } -} - -export function createMarkBlockTool(ctx: ToolContext): ReturnType { - return tool({ - description: MARK_DESCRIPTION, - args: buildSchema(), - async execute(args, toolCtx) { - await prepareMarkSession(ctx, toolCtx) - - const targetBlockId = parseBlockRef(String(args.blockId)) - if (targetBlockId === null) { - return `Error: Invalid block ID "${args.blockId}". Use format "b0", "b1", etc.` - } - - const messagesState = ctx.state.prune.messages - const block = messagesState.blocksById.get(targetBlockId) - if (!block) { - return `Error: Block ${formatBlockRef(targetBlockId)} does not exist.` - } - - if (!block.active) { - return `Error: Block ${formatBlockRef(targetBlockId)} is not active.` - } - - messagesState.markedForCleanup.add(targetBlockId) - await saveSessionState(ctx.state, ctx.logger) - - const ref = formatBlockRef(targetBlockId) - const markedCount = messagesState.markedForCleanup.size - - ctx.logger.info("mark_block: block marked for cleanup", { - blockId: targetBlockId, - markedCount, - }) - - return `Block ${ref} marked for cleanup. It will be merge-compressed together with other marked blocks when context pressure rises. No immediate effect on context or cache. (${markedCount} block(s) currently marked.)` - }, - }) -} - -export function createUnmarkBlockTool(ctx: ToolContext): ReturnType { - return tool({ - description: UNMARK_DESCRIPTION, - args: buildUnmarkSchema(), - async execute(args, toolCtx) { - await prepareMarkSession(ctx, toolCtx) - - const targetBlockId = parseBlockRef(String(args.blockId)) - if (targetBlockId === null) { - return `Error: Invalid block ID "${args.blockId}". Use format "b0", "b1", etc.` - } - - const messagesState = ctx.state.prune.messages - if (!messagesState.markedForCleanup.has(targetBlockId)) { - return `Block ${formatBlockRef(targetBlockId)} was not marked for cleanup.` - } - - messagesState.markedForCleanup.delete(targetBlockId) - await saveSessionState(ctx.state, ctx.logger) - - ctx.logger.info("unmark_block: block unmarked", { - blockId: targetBlockId, - }) - - return `Block ${formatBlockRef(targetBlockId)} unmarked. It will no longer be auto-merged during batch cleanup.` - }, - }) -} diff --git a/lib/config.ts b/lib/config.ts index 38bfa21..14a0ee6 100644 --- a/lib/config.ts +++ b/lib/config.ts @@ -103,8 +103,6 @@ const DEFAULT_PROTECTED_TOOLS = [ "todoread", "compress", "decompress", - "mark_block", - "unmark_block", "batch", "plan_enter", "plan_exit", diff --git a/lib/gc/merge.ts b/lib/gc/merge.ts index 0a2583d..30207bf 100644 --- a/lib/gc/merge.ts +++ b/lib/gc/merge.ts @@ -1,5 +1,5 @@ import type { CompressionBlock, SessionState, WithParts } from "../state" -import type { BatchCleanupConfig, GCConfig, PluginConfig } from "../config" +import type { PluginConfig } from "../config" import type { Logger } from "../logger" import { countTokens, getCurrentTokenUsage } from "../token-utils" import { @@ -8,7 +8,6 @@ import { allocateRunId, wrapCompressedSummary, } from "../compress/state" -import { formatBlockRef } from "../message-ids" export interface MergeMarkedResult { mergedCount: number @@ -23,33 +22,6 @@ export interface BatchCleanupResult { nudgeText?: string } -const DEFAULT_BATCH_CLEANUP: BatchCleanupConfig = { - lowThreshold: "55%", - highThreshold: "75%", - forceThreshold: "90%", -} - -/** Minimum marked-block count to trigger escalation nudge (tier 2 active compress). */ -const ESCALATE_MIN_MARKED = 3 - -/** Minimum marked/old-gen ratio to trigger escalation nudge. */ -const ESCALATE_MIN_RATIO = 0.4 - -function resolveBatchCleanup(gc: GCConfig): BatchCleanupConfig { - return gc.batchCleanup ?? DEFAULT_BATCH_CLEANUP -} - -function percentToTokens( - value: number | `${number}%`, - modelContextLimit: number, -): number { - if (typeof value === "number") return value - const percent = parseFloat(value.slice(0, -1)) - if (isNaN(percent)) return modelContextLimit - const clamped = Math.max(0, Math.min(100, Math.round(percent))) - return Math.round((clamped / 100) * modelContextLimit) -} - function collectActiveOldGenBlocks(state: SessionState, maxOldGenSummaryLength: number): CompressionBlock[] { const blocks: CompressionBlock[] = [] const ids = Array.from(state.prune.messages.activeBlockIds).sort((a, b) => a - b) @@ -67,28 +39,13 @@ function collectActiveOldGenBlocks(state: SessionState, maxOldGenSummaryLength: return blocks } -function collectActiveMarkedBlocks(state: SessionState): CompressionBlock[] { - const messagesState = state.prune.messages - const ids = Array.from(messagesState.markedForCleanup).sort((a, b) => a - b) - const blocks: CompressionBlock[] = [] - for (const id of ids) { - const block = messagesState.blocksById.get(id) - if (!block || !block.active) { - messagesState.markedForCleanup.delete(id) - continue - } - blocks.push(block) - } - return blocks -} - function extractSummaryBody(summary: string): string { let body = summary const headerPrefix = COMPRESSED_BLOCK_HEADER + "\n" if (body.startsWith(headerPrefix)) { body = body.slice(headerPrefix.length) } - body = body.replace(/\n]*>b\d+<\/dcp-message-id>$/, "") + body = body.replace(/\n]*>b\d+<\/dcp-message-id>$/, "") return body.trim() } @@ -228,70 +185,6 @@ export function mergeMarkedBlocks( return { mergedCount: sourceBlocks.length, savedTokens } } -function estimateTokens(blocks: CompressionBlock[]): number { - return blocks.reduce( - (sum, block) => sum + (block.summaryTokens || Math.round(block.summary.length / 4)), - 0, - ) -} - -function buildNudgeText(state: SessionState, maxMergedLength: number): string | undefined { - const marked = collectActiveMarkedBlocks(state) - const oldGen = collectActiveOldGenBlocks(state, maxMergedLength) - - if (oldGen.length === 0) return undefined - - const oldGenIds = new Set(oldGen.map((b) => b.blockId)) - const markedOldGen = marked.filter((b) => oldGenIds.has(b.blockId)) - const markedOldGenCount = markedOldGen.length - const oldGenCount = oldGen.length - const ratio = markedOldGenCount / oldGenCount - const ratioPct = Math.round(ratio * 100) - const escalateMinPct = Math.round(ESCALATE_MIN_RATIO * 100) - - // Escalation: enough old-gen blocks marked → urge active compress now - if (markedOldGenCount >= ESCALATE_MIN_MARKED && ratio >= ESCALATE_MIN_RATIO) { - const refs = marked.map((b) => formatBlockRef(b.blockId)).join(", ") - const firstRef = formatBlockRef(marked[0].blockId) - const lastRef = formatBlockRef(marked[marked.length - 1].blockId) - const estimatedSavings = Math.max(0, estimateTokens(marked) - Math.round(maxMergedLength / 4)) - - return [ - `🔥 ${markedOldGenCount}/${oldGenCount} old-gen blocks marked (${ratioPct}%) — ready for batch cleanup.`, - `Compressing ${refs} (range ${firstRef}–${lastRef}) would free ~${estimatedSavings} tokens in one cache break.`, - `Call compress with this range now to consolidate them.`, - ].join(" ") - } - - // Some marks, not enough to escalate → keep marking - if (marked.length >= 1) { - const refs = marked.map((b) => formatBlockRef(b.blockId)).join(", ") - const estimatedSavings = Math.max(0, estimateTokens(marked) - Math.round(maxMergedLength / 4)) - - return [ - `⚠️ ${marked.length} block(s) marked for batch cleanup (${refs}).`, - `Merge-compressing them would free ~${estimatedSavings} tokens.`, - marked.length >= 2 - ? "They will auto-merge when context pressure reaches the high threshold." - : "A single marked block won't auto-merge on its own — use compress to consolidate it, or unmark_block if no longer needed.", - `Mark more old-gen blocks (need ≥${ESCALATE_MIN_MARKED} at ≥${escalateMinPct}%) to trigger batch cleanup sooner.`, - "To act now, use compress with a range covering these blocks.", - ].join(" ") - } - - // No marks yet → guide the model to start marking (fixes chicken-and-egg deadlock) - const shown = oldGen.slice(0, 5) - const oldGenRefs = shown.map((b) => formatBlockRef(b.blockId)).join(", ") - const more = oldGenCount > 5 ? ` (+${oldGenCount - 5} more)` : "" - - return [ - `📋 Context pressure rising — ${oldGenCount} old-gen compressed block(s) occupy ~${estimateTokens(oldGen)} tokens (${oldGenRefs}${more}).`, - `Review which blocks contain information you no longer need, and use mark_block to flag them.`, - `Once enough are marked (≥${ESCALATE_MIN_MARKED} at ≥${escalateMinPct}% of old-gen), they'll be batch-merged in one cache break to preserve cache hit rate.`, - `Do NOT mark blocks you may still need.`, - ].join(" ") -} - export function runBatchCleanup( state: SessionState, config: PluginConfig, @@ -310,78 +203,37 @@ export function runBatchCleanup( } const currentTokens = getCurrentTokenUsage(state, messages) - const limit = state.modelContextLimit - const batchCleanup = resolveBatchCleanup(config.gc) - const maxMergedLength = config.gc.maxOldGenSummaryLength - - const forceTokens = percentToTokens(batchCleanup.forceThreshold, limit) - const highTokens = percentToTokens(batchCleanup.highThreshold, limit) - const lowTokens = percentToTokens(batchCleanup.lowThreshold, limit) - if (currentTokens >= forceTokens) { - const oldGenBlocks = collectActiveOldGenBlocks(state, maxMergedLength) - if (oldGenBlocks.length < 2) { - return noop - } - const ids = oldGenBlocks.map((b) => b.blockId) - const result = mergeMarkedBlocks(state, ids, maxMergedLength) - if (result.mergedCount === 0) { - return noop - } - logger.info("Batch cleanup tier 3 (force): merged old-gen blocks", { - mergedCount: result.mergedCount, - savedTokens: result.savedTokens, - currentTokens, - forceThreshold: batchCleanup.forceThreshold, - }) - return { - tier: 3, - action: "merge", - mergedCount: result.mergedCount, - savedTokens: result.savedTokens, - } + // Only a hardcoded 100% force fallback remains. The mark_block mechanism and + // the multi-tier (low/high/force) batch-cleanup were retired; full GC removal + // is tracked separately. Threshold is intentionally NOT read from config. + if (currentTokens < state.modelContextLimit) { + return noop } - if (currentTokens >= highTokens) { - const marked = collectActiveMarkedBlocks(state) - if (marked.length >= 2) { - const ids = marked.map((b) => b.blockId) - const result = mergeMarkedBlocks(state, ids, maxMergedLength) - if (result.mergedCount > 0) { - logger.info("Batch cleanup tier 2 (high): merged marked blocks", { - mergedCount: result.mergedCount, - savedTokens: result.savedTokens, - currentTokens, - highThreshold: batchCleanup.highThreshold, - }) - return { - tier: 2, - action: "merge", - mergedCount: result.mergedCount, - savedTokens: result.savedTokens, - } - } - } - // Not enough marks or merge produced nothing — fall through to nudge + const maxMergedLength = config.gc.maxOldGenSummaryLength + const oldGenBlocks = collectActiveOldGenBlocks(state, maxMergedLength) + if (oldGenBlocks.length < 2) { + return noop } - if (currentTokens >= lowTokens) { - const nudgeText = buildNudgeText(state, maxMergedLength) - if (!nudgeText) { - return noop - } - logger.info("Batch cleanup tier 1 (low): nudge injected", { - currentTokens, - lowThreshold: batchCleanup.lowThreshold, - }) - return { - tier: 1, - action: "nudge", - mergedCount: 0, - savedTokens: 0, - nudgeText, - } + const ids = oldGenBlocks.map((b) => b.blockId) + const result = mergeMarkedBlocks(state, ids, maxMergedLength) + if (result.mergedCount === 0) { + return noop } - return noop + logger.info("Batch cleanup force fallback (100%): merged old-gen blocks", { + mergedCount: result.mergedCount, + savedTokens: result.savedTokens, + currentTokens, + contextLimit: state.modelContextLimit, + }) + + return { + tier: 3, + action: "merge", + mergedCount: result.mergedCount, + savedTokens: result.savedTokens, + } } diff --git a/lib/hooks.ts b/lib/hooks.ts index ff083d4..3006079 100644 --- a/lib/hooks.ts +++ b/lib/hooks.ts @@ -43,7 +43,6 @@ import { cacheSystemPromptTokens } from "./ui/utils" import { runTruncateGC, shouldRunMajorGC, getGCParams } from "./gc/truncate" import { runBatchCleanup } from "./gc/merge" import { getCurrentTokenUsage } from "./token-utils" -import { appendToLastTextPart } from "./messages/utils" const INTERNAL_AGENT_SIGNATURES = [ "You are a title generator", @@ -207,12 +206,6 @@ function runMajorGC( } } -function appendBatchCleanupNudge(messages: WithParts[], nudgeText: string): void { - const lastUser = getLastUserMessage(messages) - if (!lastUser) return - appendToLastTextPart(lastUser, nudgeText) -} - export function createChatMessageTransformHandler( client: any, state: SessionState, @@ -259,9 +252,6 @@ export function createChatMessageTransformHandler( buildToolIdList(state, output.messages) runMajorGC(state, config, logger, output.messages) const batchResult = runBatchCleanup(state, config, logger, output.messages) - if (batchResult.tier === 1 && batchResult.nudgeText) { - appendBatchCleanupNudge(output.messages, batchResult.nudgeText) - } if (batchResult.mergedCount > 0) { void saveSessionState(state, logger) } diff --git a/tests/gc-merge.test.ts b/tests/gc-merge.test.ts index be0627f..1c77e91 100644 --- a/tests/gc-merge.test.ts +++ b/tests/gc-merge.test.ts @@ -359,13 +359,19 @@ test("mergeMarkedBlocks: reports saved tokens as reduction from source summaries const logger = new Logger(false) -test("runBatchCleanup: below low threshold (50%) → noop tier 0", () => { +// ===================================================================== +// runBatchCleanup — hardcoded 100% force fallback only. +// The mark_block mechanism and the multi-tier (low/high/force) batch +// cleanup were retired; only a single last-resort merge at 100% remains. +// ===================================================================== + +test("runBatchCleanup: below 100% (95%) → noop tier 0", () => { const blocks = [ - makeBlock({ blockId: 1, anchorMessageId: "a1", summary: wrapCompressedSummary(1, "one") }), - makeBlock({ blockId: 2, runId: 2, anchorMessageId: "a2", summary: wrapCompressedSummary(2, "two") }), + makeBlock({ blockId: 1, anchorMessageId: "a1", summary: wrapCompressedSummary(1, "one"), generation: "old" }), + makeBlock({ blockId: 2, runId: 2, anchorMessageId: "a2", summary: wrapCompressedSummary(2, "two"), generation: "old" }), ] - const state = makeState(blocks, { modelContextLimit: 1000, marked: [1, 2] }) - const messages: WithParts[] = [makeAssistantMessage("a1", 500)] + const state = makeState(blocks, { modelContextLimit: 1000 }) + const messages: WithParts[] = [makeAssistantMessage("a1", 950)] const result = runBatchCleanup(state, buildConfig(), logger, messages) assert.equal(result.tier, 0) @@ -374,59 +380,7 @@ test("runBatchCleanup: below low threshold (50%) → noop tier 0", () => { assert.equal(state.prune.messages.activeBlockIds.size, 2) }) -test("runBatchCleanup: above low threshold (55%) with marked blocks → tier 1 nudge", () => { - const blocks = [ - makeBlock({ blockId: 1, anchorMessageId: "a1", summary: wrapCompressedSummary(1, "one") }), - makeBlock({ blockId: 2, runId: 2, anchorMessageId: "a2", summary: wrapCompressedSummary(2, "two") }), - ] - const state = makeState(blocks, { modelContextLimit: 1000, marked: [1, 2] }) - const messages: WithParts[] = [makeAssistantMessage("a1", 600)] - - const result = runBatchCleanup(state, buildConfig(), logger, messages) - assert.equal(result.tier, 1) - assert.equal(result.action, "nudge") - assert.equal(result.mergedCount, 0) - assert.ok(result.nudgeText, "nudge text should be provided") - assert.ok(result.nudgeText!.includes("b1")) - assert.ok(result.nudgeText!.includes("b2")) - assert.equal(state.prune.messages.activeBlockIds.size, 2) -}) - -test("runBatchCleanup: at high threshold (75%) with >= 2 marked blocks → tier 2 merge", () => { - const blocks = [ - makeBlock({ blockId: 1, anchorMessageId: "a1", summary: wrapCompressedSummary(1, "one") }), - makeBlock({ blockId: 2, runId: 2, anchorMessageId: "a2", summary: wrapCompressedSummary(2, "two") }), - ] - const state = makeState(blocks, { modelContextLimit: 1000, marked: [1, 2] }) - const messages: WithParts[] = [makeAssistantMessage("a1", 750)] - - const result = runBatchCleanup(state, buildConfig(), logger, messages) - assert.equal(result.tier, 2) - assert.equal(result.action, "merge") - assert.equal(result.mergedCount, 2) - assert.ok(result.savedTokens >= 0) - assert.equal(state.prune.messages.markedForCleanup.size, 0) - assert.equal(state.prune.messages.activeBlockIds.size, 1) -}) - -test("runBatchCleanup: at high threshold (75%) with 1 marked block → tier 1 nudge (not enough for merge)", () => { - const blocks = [ - makeBlock({ blockId: 1, anchorMessageId: "a1", summary: wrapCompressedSummary(1, "one") }), - makeBlock({ blockId: 2, runId: 2, anchorMessageId: "a2", summary: wrapCompressedSummary(2, "two") }), - ] - const state = makeState(blocks, { modelContextLimit: 1000, marked: [1] }) - const messages: WithParts[] = [makeAssistantMessage("a1", 750)] - - const result = runBatchCleanup(state, buildConfig(), logger, messages) - assert.equal(result.tier, 1, "should fall through to nudge when merge conditions unmet") - assert.equal(result.action, "nudge") - assert.equal(result.mergedCount, 0) - assert.ok(result.nudgeText, "nudge text should be provided") - assert.ok(result.nudgeText!.includes("b1"), "nudge should reference the marked block") - assert.equal(state.prune.messages.activeBlockIds.size, 2) -}) - -test("runBatchCleanup: at force threshold (90%) with >= 2 old-gen blocks → tier 3 force merge", () => { +test("runBatchCleanup: at 100% with >= 2 old-gen blocks → tier 3 force merge", () => { const blocks = [ makeBlock({ blockId: 1, @@ -443,7 +397,7 @@ test("runBatchCleanup: at force threshold (90%) with >= 2 old-gen blocks → tie }), ] const state = makeState(blocks, { modelContextLimit: 1000 }) - const messages: WithParts[] = [makeAssistantMessage("a1", 900)] + const messages: WithParts[] = [makeAssistantMessage("a1", 1000)] const result = runBatchCleanup(state, buildConfig(), logger, messages) assert.equal(result.tier, 3) @@ -452,173 +406,48 @@ test("runBatchCleanup: at force threshold (90%) with >= 2 old-gen blocks → tie assert.equal(state.prune.messages.activeBlockIds.size, 1) }) -test("runBatchCleanup: modelContextLimit undefined → noop", () => { +test("runBatchCleanup: at 100% with < 2 old-gen blocks → noop", () => { const blocks = [ - makeBlock({ blockId: 1, anchorMessageId: "a1", summary: wrapCompressedSummary(1, "one") }), - makeBlock({ blockId: 2, runId: 2, anchorMessageId: "a2", summary: wrapCompressedSummary(2, "two") }), + makeBlock({ blockId: 1, anchorMessageId: "a1", summary: wrapCompressedSummary(1, "one"), generation: "old" }), ] - const state = makeState(blocks, { modelContextLimit: undefined, marked: [1, 2] }) - const messages: WithParts[] = [makeAssistantMessage("a1", 999999)] + const state = makeState(blocks, { modelContextLimit: 1000 }) + const messages: WithParts[] = [makeAssistantMessage("a1", 1000)] const result = runBatchCleanup(state, buildConfig(), logger, messages) assert.equal(result.tier, 0) assert.equal(result.action, "none") assert.equal(result.mergedCount, 0) + assert.equal(state.prune.messages.activeBlockIds.size, 1) }) -test("runBatchCleanup: tier ordering — force takes precedence over high and low at 95%", () => { - const blocks = [ - makeBlock({ - blockId: 1, - anchorMessageId: "a1", - summary: wrapCompressedSummary(1, "one"), - generation: "old", - }), - makeBlock({ - blockId: 2, - runId: 2, - anchorMessageId: "a2", - summary: wrapCompressedSummary(2, "two"), - generation: "old", - }), - ] - const state = makeState(blocks, { modelContextLimit: 1000, marked: [1, 2] }) - const messages: WithParts[] = [makeAssistantMessage("a1", 950)] - - const result = runBatchCleanup(state, buildConfig(), logger, messages) - assert.equal(result.tier, 3, "force tier must win over high/low when usage >= 90%") - assert.equal(result.action, "merge") -}) - -test("runBatchCleanup: at high threshold with unmarked old-gen → tier 1 nudge (mark guidance, fixes chicken-and-egg)", () => { - const blocks = [ - makeBlock({ - blockId: 1, - anchorMessageId: "a1", - summary: wrapCompressedSummary(1, "one"), - generation: "old", - }), - makeBlock({ - blockId: 2, - runId: 2, - anchorMessageId: "a2", - summary: wrapCompressedSummary(2, "two"), - generation: "old", - }), - ] - const state = makeState(blocks, { modelContextLimit: 1000 }) - const messages: WithParts[] = [makeAssistantMessage("a1", 800)] - - const result = runBatchCleanup(state, buildConfig(), logger, messages) - assert.equal(result.tier, 1, "should nudge even without marks — fixes chicken-and-egg deadlock") - assert.equal(result.action, "nudge") - assert.ok(result.nudgeText, "nudge text should be provided") - assert.ok(result.nudgeText!.includes("mark_block"), "should guide model to use mark_block") - assert.ok(result.nudgeText!.includes("b1"), "should reference old-gen blocks") - assert.ok(result.nudgeText!.includes("b2")) -}) - -test("runBatchCleanup: tier 1b nudge — no marks, old-gen blocks → guides marking", () => { +test("runBatchCleanup: modelContextLimit undefined → noop", () => { const blocks = [ - makeBlock({ blockId: 1, anchorMessageId: "a1", summary: wrapCompressedSummary(1, "one"), generation: "old" }), - makeBlock({ blockId: 2, runId: 2, anchorMessageId: "a2", summary: wrapCompressedSummary(2, "two"), generation: "old" }), - makeBlock({ blockId: 3, runId: 3, anchorMessageId: "a3", summary: wrapCompressedSummary(3, "three"), generation: "old" }), + makeBlock({ blockId: 1, anchorMessageId: "a1", summary: wrapCompressedSummary(1, "one") }), + makeBlock({ blockId: 2, runId: 2, anchorMessageId: "a2", summary: wrapCompressedSummary(2, "two") }), ] - const state = makeState(blocks, { modelContextLimit: 1000 }) - const messages: WithParts[] = [makeAssistantMessage("a1", 560)] + const state = makeState(blocks, { modelContextLimit: undefined }) + const messages: WithParts[] = [makeAssistantMessage("a1", 999999)] const result = runBatchCleanup(state, buildConfig(), logger, messages) - assert.equal(result.tier, 1) - assert.equal(result.action, "nudge") - assert.ok(result.nudgeText!.includes("mark_block")) - assert.ok(result.nudgeText!.includes("3 old-gen")) - assert.ok(!result.nudgeText!.includes("🔥"), "should not show escalation emoji without marks") + assert.equal(result.tier, 0) + assert.equal(result.action, "none") + assert.equal(result.mergedCount, 0) }) -test("runBatchCleanup: tier 1 escalation — ≥3 marked at ≥40% → urges active compress", () => { +test("runBatchCleanup: mark tiers removed — marked blocks below 100% → noop (no nudge, no merge)", () => { const blocks = [ makeBlock({ blockId: 1, anchorMessageId: "a1", summary: wrapCompressedSummary(1, "one"), generation: "old" }), makeBlock({ blockId: 2, runId: 2, anchorMessageId: "a2", summary: wrapCompressedSummary(2, "two"), generation: "old" }), makeBlock({ blockId: 3, runId: 3, anchorMessageId: "a3", summary: wrapCompressedSummary(3, "three"), generation: "old" }), - makeBlock({ blockId: 4, runId: 4, anchorMessageId: "a4", summary: wrapCompressedSummary(4, "four"), generation: "old" }), ] + // Legacy marks that would previously have triggered tier 1/2 — now ignored. const state = makeState(blocks, { modelContextLimit: 1000, marked: [1, 2, 3] }) - const messages: WithParts[] = [makeAssistantMessage("a1", 560)] - - const result = runBatchCleanup(state, buildConfig(), logger, messages) - assert.equal(result.tier, 1) - assert.equal(result.action, "nudge") - assert.ok(result.nudgeText!.includes("🔥"), "should show escalation indicator") - assert.ok(result.nudgeText!.includes("3/4"), "should show marked/total ratio") - assert.ok(result.nudgeText!.includes("75%"), "should show percentage") - assert.ok(result.nudgeText!.includes("compress"), "should urge compress action") - assert.ok(result.nudgeText!.includes("b1") && result.nudgeText!.includes("b3"), "should reference range") -}) - -test("runBatchCleanup: tier 1 count gate — 2 marked (100% ratio) but < 3 count → no escalation", () => { - const blocks = [ - makeBlock({ blockId: 1, anchorMessageId: "a1", summary: wrapCompressedSummary(1, "one"), generation: "old" }), - makeBlock({ blockId: 2, runId: 2, anchorMessageId: "a2", summary: wrapCompressedSummary(2, "two"), generation: "old" }), - ] - const state = makeState(blocks, { modelContextLimit: 1000, marked: [1, 2] }) - const messages: WithParts[] = [makeAssistantMessage("a1", 560)] - - const result = runBatchCleanup(state, buildConfig(), logger, messages) - assert.equal(result.tier, 1) - assert.ok(result.nudgeText!.includes("⚠️"), "should show some-marks indicator, not escalation") - assert.ok(!result.nudgeText!.includes("🔥"), "should NOT escalate with only 2 marked blocks") -}) - -test("runBatchCleanup: tier 1 ratio gate — 3 marked out of 10 (30%) → no escalation", () => { - const blocks: CompressionBlock[] = [] - for (let i = 1; i <= 10; i++) { - blocks.push(makeBlock({ - blockId: i, - runId: i, - anchorMessageId: `a${i}`, - summary: wrapCompressedSummary(i, `block ${i}`), - generation: "old", - })) - } - const state = makeState(blocks, { modelContextLimit: 1000, marked: [1, 2, 3] }) - const messages: WithParts[] = [makeAssistantMessage("a1", 560)] - - const result = runBatchCleanup(state, buildConfig(), logger, messages) - assert.equal(result.tier, 1) - assert.ok(result.nudgeText!.includes("⚠️"), "should show some-marks indicator, not escalation") - assert.ok(!result.nudgeText!.includes("🔥"), "should NOT escalate with 30% ratio < 40% threshold") - assert.ok(result.nudgeText!.includes("b1"), "should still reference marked blocks") -}) - -test("runBatchCleanup: young-gen block marked → escalation ratio uses old-gen subset only", () => { - const blocks = [ - makeBlock({ blockId: 1, anchorMessageId: "a1", summary: wrapCompressedSummary(1, "one"), generation: "old" }), - makeBlock({ blockId: 2, runId: 2, anchorMessageId: "a2", summary: wrapCompressedSummary(2, "two"), generation: "old" }), - makeBlock({ blockId: 3, runId: 3, anchorMessageId: "a3", summary: wrapCompressedSummary(3, "three"), generation: "old" }), - makeBlock({ blockId: 4, runId: 4, anchorMessageId: "a4", summary: wrapCompressedSummary(4, "four"), generation: "old" }), - makeBlock({ blockId: 5, runId: 5, anchorMessageId: "a5", summary: wrapCompressedSummary(5, "young"), generation: "young" }), - ] - // Mark 2 old-gen + 1 young-gen = 3 total, but only 2 old-gen - const state = makeState(blocks, { modelContextLimit: 1000, marked: [1, 2, 5] }) - const messages: WithParts[] = [makeAssistantMessage("a1", 560)] + const messages: WithParts[] = [makeAssistantMessage("a1", 800)] const result = runBatchCleanup(state, buildConfig(), logger, messages) - assert.equal(result.tier, 1) - assert.ok(!result.nudgeText!.includes("🔥"), "should NOT escalate: only 2 old-gen marked < 3 minimum") - assert.ok(result.nudgeText!.includes("⚠️"), "should show some-marks indicator") -}) - -test("collectActiveMarkedBlocks: sweeps stale marks for deactivated blocks", () => { - const block1 = makeBlock({ blockId: 1, anchorMessageId: "a1", summary: wrapCompressedSummary(1, "one"), generation: "old" }) - const block2 = makeBlock({ blockId: 2, runId: 2, anchorMessageId: "a2", summary: wrapCompressedSummary(2, "two"), generation: "old" }) - const state = makeState([block1, block2], { modelContextLimit: 1000, marked: [1, 2] }) - - // Simulate block 2 being deactivated by something other than merge/unmark - block2.active = false - - const messages: WithParts[] = [makeAssistantMessage("a1", 560)] - runBatchCleanup(state, buildConfig(), logger, messages) - - assert.equal(state.prune.messages.markedForCleanup.has(2), false, "stale mark for deactivated block should be swept") - assert.equal(state.prune.messages.markedForCleanup.has(1), true, "active block mark should remain") + assert.equal(result.tier, 0, "no nudge/merge below 100% even with marks") + assert.equal(result.action, "none") + assert.equal(result.mergedCount, 0) + assert.ok(!result.nudgeText, "no nudge text — mark_block nudge is retired") + assert.equal(state.prune.messages.activeBlockIds.size, 3) }) From 2541be555273ce2aad19b88adfff2a6e19ec9582 Mon Sep 17 00:00:00 2001 From: ranxianglei Date: Mon, 29 Jun 2026 14:36:52 +0800 Subject: [PATCH 6/8] =?UTF-8?q?docs:=20update=20README=20=E2=80=94=20remov?= =?UTF-8?q?e=20mark=5Fblock,=20simplify=20GC=20to=20100%=20fallback?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index a82e839..7ee75ef 100644 --- a/README.md +++ b/README.md @@ -82,18 +82,20 @@ Or add to your opencode config: ACP hands the context-compression tool directly to the model. The model is **100% responsible** for context compression. The model's available tools are -mainly: **compress**, **decompress**, and **delete** (`mark_block` / `unmark_block`). +mainly: **compress** and **decompress**. A hardcoded 100% GC fallback acts as +a safety net when the context window is completely full. ### Lifecycle -Three operations: **compress**, **decompress**, and **delete**. Content loops -between raw and compressed, and eventually terminates in deletion: +Two operations: **compress** and **decompress**. Content loops between raw and +compressed. When context hits 100%, old-gen block summaries are truncated as +a last resort: ```mermaid stateDiagram-v2 Raw --> Compressed : compress Compressed --> Raw : decompress - Compressed --> Deleted : delete + Compressed --> Truncated : GC at 100% ``` ### Compression strategy @@ -305,7 +307,7 @@ Each level overrides the previous, so project settings take priority over global "protectedTools": [], }, }, - // Garbage collection and batch cleanup + // Garbage collection — hardcoded 100% fallback only "gc": { "algorithm": "truncate", // young → old generation promotion after this many survivals @@ -314,18 +316,8 @@ Each level overrides the previous, so project settings take priority over global "maxBlockAge": 15, // truncate old-gen summaries exceeding this length (chars) "maxOldGenSummaryLength": 3000, - // run major GC when context usage exceeds this + // run major GC when context usage exceeds this (hardcoded, not configurable) "majorGcThresholdPercent": "100%", - // Three-tier batch merge-cleanup for blocks flagged via mark_block. - // Accepts a number or "X%" of the model context window. - "batchCleanup": { - // At/above this usage, remind the model about marked blocks - "lowThreshold": "60%", - // At/above this usage, auto merge-compress all marked blocks into one - "highThreshold": "75%", - // At/above this usage, force-merge all old-gen blocks (before GC) - "forceThreshold": "90%", - }, }, } ``` @@ -354,7 +346,7 @@ To reset an override, delete the matching file from your overrides directory. ### Protected Tools By default, these tools are always protected from pruning: -`task`, `skill`, `todowrite`, `todoread`, `compress`, `decompress`, `mark_block`, `unmark_block`, `batch`, `plan_enter`, `plan_exit`, `write`, `edit` +`task`, `skill`, `todowrite`, `todoread`, `compress`, `decompress`, `batch`, `plan_enter`, `plan_exit`, `write`, `edit` The `protectedTools` arrays in `commands` and `strategies` add to this default list. From 96b98d0d7911f0470fbcae777b71d8f6c2cd07ae Mon Sep 17 00:00:00 2001 From: ranxianglei Date: Mon, 29 Jun 2026 14:51:19 +0800 Subject: [PATCH 7/8] =?UTF-8?q?docs:=20sync=20Chinese=20README=20=E2=80=94?= =?UTF-8?q?=20remove=20mark=5Fblock,=20simplify=20GC=20to=20100%=20fallbac?= =?UTF-8?q?k?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.zh-CN.md | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/README.zh-CN.md b/README.zh-CN.md index b29bd8b..c002db1 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -73,17 +73,17 @@ opencode plugin opencode-acp@latest --global ## 工作原理 -ACP 把上下文压缩工具直接交给模型。模型对上下文压缩**负全责**。模型可用的工具主要是:**compress**、**decompress** 和 **delete**(`mark_block` / `unmark_block`)。 +ACP 把上下文压缩工具直接交给模型。模型对上下文压缩**负全责**。模型可用的工具主要是:**compress** 和 **decompress**。当上下文达到 100% 时,系统自动触发 GC 截断作为兜底。 ### 生命周期 -三个操作:**压缩**、**解压缩**、**删除**。内容在原始与压缩之间循环,最终以删除终结: +两个操作:**压缩**、**解压缩**。内容在原始与压缩之间循环。当上下文达到 100% 时,GC 自动截断老年代 block 作为兜底: ```mermaid stateDiagram-v2 Raw --> Compressed : compress Compressed --> Raw : decompress - Compressed --> Deleted : delete + Compressed --> GC_Truncated : GC (100%) ``` ### 压缩策略 @@ -104,9 +104,9 @@ stateDiagram-v2 由模型决定何时解压。当上下文大到足以干扰模型的 self-attention 时,简短的 block 会让模型先压缩一部分内容,处理完紧急事务,再在后续工作中按需解压。 -### 删除策略 +### GC 兜底 -为了应对大量小块历史内容的堆积,新版本增加了删除策略。由模型决定是否删除。**一旦删除,内容不可恢复。** 这取代了原先的强制 GC,使得强制垃圾回收不再删除模型认为重要的内容。 +当上下文达到 100% 时,系统自动截断老年代 block 摘要,防止上下文溢出。这是最后的兜底机制,不影响模型的正常压缩/解压操作。 --- @@ -289,18 +289,8 @@ ACP 使用自己的配置文件,按以下顺序搜索: "maxBlockAge": 15, // 截断超过此长度(字符)的老年代摘要 "maxOldGenSummaryLength": 3000, - // 上下文使用率超过此值时执行主 GC + // 上下文使用率超过此值时执行主 GC(兜底,硬编码为 100%) "majorGcThresholdPercent": "100%", - // 通过 mark_block 标记的块的三级批量合并清理阈值。 - // 接受数字或 "X%"(模型上下文窗口的百分比)。 - "batchCleanup": { - // 达到此使用率时,提醒模型已标记的块 - "lowThreshold": "60%", - // 达到此使用率时,自动将所有已标记块合并压缩为一个 - "highThreshold": "75%", - // 达到此使用率时,强制合并所有老年代块(GC 之前) - "forceThreshold": "90%", - }, }, } ``` @@ -329,7 +319,7 @@ ACP 暴露六个可编辑的 prompt: ### 受保护工具 默认情况下,以下工具始终受保护不被剪枝: -`task`、`skill`、`todowrite`、`todoread`、`compress`、`decompress`、`mark_block`、`unmark_block`、`batch`、`plan_enter`、`plan_exit`、`write`、`edit` +`task`、`skill`、`todowrite`、`todoread`、`compress`、`decompress`、`batch`、`plan_enter`、`plan_exit`、`write`、`edit` `commands` 和 `strategies` 中的 `protectedTools` 数组会添加到此默认列表。 From 10b6abd1236fa04336ef1db43dc99eacbdb268f3 Mon Sep 17 00:00:00 2001 From: ranxianglei Date: Mon, 29 Jun 2026 14:54:16 +0800 Subject: [PATCH 8/8] feat(compress): soft summary target + generous hard ceiling Replace the aggressive 200-char hard reject (which forced expensive full retries and pushed the model to drop detail) with a two-tier scheme: - maxSummaryLength (default 200, unchanged): now a SOFT target interpolated into the compress-message/compress-range tool descriptions, guiding the model to write concise summaries upfront. - maxSummaryLengthHard (default 800): the new hard ceiling. Only summaries beyond this are rejected, so reasonable 220-700 char summaries pass one-shot. Compression becomes near-retry-free. Also validates maxSummaryLengthHard >= maxSummaryLength. typecheck clean, 486 tests pass. --- lib/compress/message.ts | 14 +++++++------ lib/compress/range.ts | 14 +++++++------ lib/config-validation.ts | 35 +++++++++++++++++++++++++++++++++ lib/config.ts | 3 +++ tests/config-validation.test.ts | 25 +++++++++++++++++++++++ 5 files changed, 79 insertions(+), 12 deletions(-) diff --git a/lib/compress/message.ts b/lib/compress/message.ts index 32a12dd..d759b9f 100644 --- a/lib/compress/message.ts +++ b/lib/compress/message.ts @@ -13,7 +13,7 @@ import { } from "./state" import type { CompressMessageToolArgs } from "./types" -function buildSchema() { +function buildSchema(maxSummaryLength: number) { return { topic: tool.schema .string() @@ -31,7 +31,9 @@ function buildSchema() { .describe("Short label (3-5 words) for this one message summary"), summary: tool.schema .string() - .describe("Complete technical summary replacing that one message"), + .describe( + `Complete technical summary replacing that one message. Aim for <=${maxSummaryLength} chars; exceed only when strictly necessary to preserve critical detail (file paths, decisions, signatures, exact values). Never pad.`, + ), }), ) .describe("Batch of individual message summaries to create in one tool call"), @@ -44,16 +46,16 @@ export function createCompressMessageTool(ctx: ToolContext): ReturnType maxSummaryLength) { + if (entry.summary.length > maxSummaryLengthHard) { throw new Error( - `Summary too long (${entry.summary.length} chars, max ${maxSummaryLength}). Write a shorter summary focusing on key conclusions only.`, + `Summary too long (${entry.summary.length} chars, hard ceiling ${maxSummaryLengthHard}). Aim for <=${ctx.config.compress.maxSummaryLength}; exceed only when strictly necessary. Rewrite more concisely.`, ) } } diff --git a/lib/compress/range.ts b/lib/compress/range.ts index 521735f..9534a71 100644 --- a/lib/compress/range.ts +++ b/lib/compress/range.ts @@ -26,7 +26,7 @@ import { } from "./state" import type { CompressRangeToolArgs } from "./types" -function buildSchema() { +function buildSchema(maxSummaryLength: number) { return { topic: tool.schema .string() @@ -44,7 +44,9 @@ function buildSchema() { .describe("Message or block ID marking the end of range (e.g. m00012, b5)"), summary: tool.schema .string() - .describe("Complete technical summary replacing all content in range"), + .describe( + `Complete technical summary replacing all content in range. Aim for <=${maxSummaryLength} chars; exceed only when strictly necessary to preserve critical detail (file paths, decisions, signatures, exact values). Never pad.`, + ), }), ) .describe( @@ -59,16 +61,16 @@ export function createCompressRangeTool(ctx: ToolContext): ReturnType maxSummaryLength) { + if (entry.summary.length > maxSummaryLengthHard) { throw new Error( - `Summary too long (${entry.summary.length} chars, max ${maxSummaryLength}). Write a shorter summary focusing on key conclusions only.`, + `Summary too long (${entry.summary.length} chars, hard ceiling ${maxSummaryLengthHard}). Aim for <=${ctx.config.compress.maxSummaryLength}; exceed only when strictly necessary. Rewrite more concisely.`, ) } } diff --git a/lib/config-validation.ts b/lib/config-validation.ts index 34d3606..163c234 100644 --- a/lib/config-validation.ts +++ b/lib/config-validation.ts @@ -41,6 +41,7 @@ export const VALID_CONFIG_KEYS = new Set([ "compress.protectTags", "compress.protectUserMessages", "compress.maxSummaryLength", + "compress.maxSummaryLengthHard", "compress.minCompressRange", "gc", "gc.algorithm", @@ -402,6 +403,40 @@ export function validateConfigTypes(config: Record): ValidationErro }) } + if ( + compress.maxSummaryLengthHard !== undefined && + typeof compress.maxSummaryLengthHard !== "number" + ) { + errors.push({ + key: "compress.maxSummaryLengthHard", + expected: "number", + actual: typeof compress.maxSummaryLengthHard, + }) + } + + if ( + typeof compress.maxSummaryLengthHard === "number" && + compress.maxSummaryLengthHard < 1 + ) { + errors.push({ + key: "compress.maxSummaryLengthHard", + expected: "positive number (>= 1)", + actual: `${compress.maxSummaryLengthHard}`, + }) + } + + if ( + typeof compress.maxSummaryLength === "number" && + typeof compress.maxSummaryLengthHard === "number" && + compress.maxSummaryLengthHard < compress.maxSummaryLength + ) { + errors.push({ + key: "compress.maxSummaryLengthHard", + expected: `>= maxSummaryLength (${compress.maxSummaryLength})`, + actual: `${compress.maxSummaryLengthHard}`, + }) + } + if ( compress.minCompressRange !== undefined && typeof compress.minCompressRange !== "number" diff --git a/lib/config.ts b/lib/config.ts index 14a0ee6..4c7af04 100644 --- a/lib/config.ts +++ b/lib/config.ts @@ -31,6 +31,7 @@ export interface CompressConfig { protectTags: boolean protectUserMessages: boolean maxSummaryLength: number + maxSummaryLengthHard: number minCompressRange: number } @@ -197,6 +198,7 @@ const defaultConfig: PluginConfig = { protectTags: false, protectUserMessages: false, maxSummaryLength: 200, + maxSummaryLengthHard: 800, minCompressRange: 2000, }, strategies: { @@ -406,6 +408,7 @@ function mergeCompress( protectTags: override.protectTags ?? base.protectTags, protectUserMessages: override.protectUserMessages ?? base.protectUserMessages, maxSummaryLength: override.maxSummaryLength ?? base.maxSummaryLength, + maxSummaryLengthHard: override.maxSummaryLengthHard ?? base.maxSummaryLengthHard, minCompressRange: override.minCompressRange ?? base.minCompressRange, } } diff --git a/tests/config-validation.test.ts b/tests/config-validation.test.ts index 1063bee..2f63959 100644 --- a/tests/config-validation.test.ts +++ b/tests/config-validation.test.ts @@ -138,3 +138,28 @@ test("validateConfigTypes returns empty for undefined optional fields", () => { const result = validateConfigTypes({}) assert.deepEqual(result, []) }) + +test("validateConfigTypes accepts numeric compress.maxSummaryLengthHard", () => { + const result = validateConfigTypes({ + compress: { maxSummaryLengthHard: 800 }, + }) + assert.deepEqual(result, []) +}) + +test("validateConfigTypes catches wrong type for compress.maxSummaryLengthHard", () => { + const result = validateConfigTypes({ + compress: { maxSummaryLengthHard: "800" }, + }) + assert.equal(result.length, 1) + assert.equal(result[0].key, "compress.maxSummaryLengthHard") + assert.equal(result[0].actual, "string") +}) + +test("validateConfigTypes rejects compress.maxSummaryLengthHard < maxSummaryLength", () => { + const result = validateConfigTypes({ + compress: { maxSummaryLength: 200, maxSummaryLengthHard: 100 }, + }) + const hit = result.find((e) => e.key === "compress.maxSummaryLengthHard") + assert.ok(hit, "hard ceiling below soft target must be flagged") + assert.ok(hit!.expected.includes(">= maxSummaryLength")) +})