Skip to content

Commit 72ed36f

Browse files
authored
Merge branch 'dev' into feature/modelLimits
2 parents 76392cc + e387576 commit 72ed36f

File tree

4 files changed

+245
-51
lines changed

4 files changed

+245
-51
lines changed

lib/tools/compress.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ export function createCompressTool(ctx: PruneToolContext): ReturnType<typeof too
6363
throw new Error("content.summary is required and must be a non-empty string")
6464
}
6565

66-
logger.info("Compress tool invoked")
66+
// logger.info("Compress tool invoked")
6767
// logger.info(
6868
// JSON.stringify({
6969
// startString: startString?.substring(0, 50) + "...",

lib/tools/utils.ts

Lines changed: 213 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,82 +1,252 @@
1+
import { partial_ratio } from "fuzzball"
12
import type { WithParts, CompressSummary } from "../state"
23
import type { Logger } from "../logger"
34

4-
/**
5-
* Searches messages for a string and returns the message ID where it's found.
6-
* Searches in text parts, tool outputs, tool inputs, and other textual content.
7-
* Also searches through existing compress summaries to enable chained compression.
8-
* Throws an error if the string is not found or found more than once.
9-
*/
10-
export function findStringInMessages(
5+
export interface FuzzyConfig {
6+
minScore: number
7+
minGap: number
8+
}
9+
10+
export const DEFAULT_FUZZY_CONFIG: FuzzyConfig = {
11+
minScore: 95,
12+
minGap: 15,
13+
}
14+
15+
interface MatchResult {
16+
messageId: string
17+
messageIndex: number
18+
score: number
19+
matchType: "exact" | "fuzzy"
20+
}
21+
22+
function extractMessageContent(msg: WithParts): string {
23+
const parts = Array.isArray(msg.parts) ? msg.parts : []
24+
let content = ""
25+
26+
for (const part of parts) {
27+
const p = part as Record<string, unknown>
28+
29+
switch (part.type) {
30+
case "text":
31+
case "reasoning":
32+
if (typeof p.text === "string") {
33+
content += " " + p.text
34+
}
35+
break
36+
37+
case "tool": {
38+
const state = p.state as Record<string, unknown> | undefined
39+
if (!state) break
40+
41+
// Include tool output (completed or error)
42+
if (state.status === "completed" && typeof state.output === "string") {
43+
content += " " + state.output
44+
} else if (state.status === "error" && typeof state.error === "string") {
45+
content += " " + state.error
46+
}
47+
48+
// Include tool input
49+
if (state.input) {
50+
content +=
51+
" " +
52+
(typeof state.input === "string"
53+
? state.input
54+
: JSON.stringify(state.input))
55+
}
56+
break
57+
}
58+
59+
case "compaction":
60+
if (typeof p.summary === "string") {
61+
content += " " + p.summary
62+
}
63+
break
64+
65+
case "subtask":
66+
if (typeof p.summary === "string") {
67+
content += " " + p.summary
68+
}
69+
if (typeof p.result === "string") {
70+
content += " " + p.result
71+
}
72+
break
73+
}
74+
}
75+
76+
return content
77+
}
78+
79+
function findExactMatches(
1180
messages: WithParts[],
1281
searchString: string,
13-
logger: Logger,
14-
compressSummaries: CompressSummary[] = [],
15-
stringType: "startString" | "endString",
16-
): { messageId: string; messageIndex: number } {
17-
const matches: { messageId: string; messageIndex: number }[] = []
82+
compressSummaries: CompressSummary[],
83+
): MatchResult[] {
84+
const matches: MatchResult[] = []
85+
const seenMessageIds = new Set<string>()
1886

19-
// First, search through existing compress summaries
20-
// This allows referencing text from previous compress operations
87+
// Search compress summaries first
2188
for (const summary of compressSummaries) {
2289
if (summary.summary.includes(searchString)) {
2390
const anchorIndex = messages.findIndex((m) => m.info.id === summary.anchorMessageId)
24-
if (anchorIndex !== -1) {
91+
if (anchorIndex !== -1 && !seenMessageIds.has(summary.anchorMessageId)) {
92+
seenMessageIds.add(summary.anchorMessageId)
2593
matches.push({
2694
messageId: summary.anchorMessageId,
2795
messageIndex: anchorIndex,
96+
score: 100,
97+
matchType: "exact",
2898
})
2999
}
30100
}
31101
}
32102

33-
// Then search through raw messages
103+
// Search raw messages
34104
for (let i = 0; i < messages.length; i++) {
35105
const msg = messages[i]
36-
const parts = Array.isArray(msg.parts) ? msg.parts : []
106+
if (seenMessageIds.has(msg.info.id)) continue
37107

38-
for (const part of parts) {
39-
let content = ""
108+
const content = extractMessageContent(msg)
109+
if (content.includes(searchString)) {
110+
seenMessageIds.add(msg.info.id)
111+
matches.push({
112+
messageId: msg.info.id,
113+
messageIndex: i,
114+
score: 100,
115+
matchType: "exact",
116+
})
117+
}
118+
}
40119

41-
if (part.type === "text" && typeof part.text === "string") {
42-
content = part.text
43-
} else if (part.type === "tool" && part.state?.status === "completed") {
44-
if (typeof part.state.output === "string") {
45-
content = part.state.output
46-
}
47-
if (part.state.input) {
48-
const inputStr =
49-
typeof part.state.input === "string"
50-
? part.state.input
51-
: JSON.stringify(part.state.input)
52-
content += " " + inputStr
53-
}
54-
}
120+
return matches
121+
}
55122

56-
if (content.includes(searchString)) {
57-
matches.push({ messageId: msg.info.id, messageIndex: i })
123+
function findFuzzyMatches(
124+
messages: WithParts[],
125+
searchString: string,
126+
compressSummaries: CompressSummary[],
127+
minScore: number,
128+
): MatchResult[] {
129+
const matches: MatchResult[] = []
130+
const seenMessageIds = new Set<string>()
131+
132+
// Search compress summaries first
133+
for (const summary of compressSummaries) {
134+
const score = partial_ratio(searchString, summary.summary)
135+
if (score >= minScore) {
136+
const anchorIndex = messages.findIndex((m) => m.info.id === summary.anchorMessageId)
137+
if (anchorIndex !== -1 && !seenMessageIds.has(summary.anchorMessageId)) {
138+
seenMessageIds.add(summary.anchorMessageId)
139+
matches.push({
140+
messageId: summary.anchorMessageId,
141+
messageIndex: anchorIndex,
142+
score,
143+
matchType: "fuzzy",
144+
})
58145
}
59146
}
60147
}
61148

62-
if (matches.length === 0) {
149+
// Search raw messages
150+
for (let i = 0; i < messages.length; i++) {
151+
const msg = messages[i]
152+
if (seenMessageIds.has(msg.info.id)) continue
153+
154+
const content = extractMessageContent(msg)
155+
const score = partial_ratio(searchString, content)
156+
if (score >= minScore) {
157+
seenMessageIds.add(msg.info.id)
158+
matches.push({
159+
messageId: msg.info.id,
160+
messageIndex: i,
161+
score,
162+
matchType: "fuzzy",
163+
})
164+
}
165+
}
166+
167+
return matches
168+
}
169+
170+
export function findStringInMessages(
171+
messages: WithParts[],
172+
searchString: string,
173+
logger: Logger,
174+
compressSummaries: CompressSummary[] = [],
175+
stringType: "startString" | "endString",
176+
fuzzyConfig: FuzzyConfig = DEFAULT_FUZZY_CONFIG,
177+
): { messageId: string; messageIndex: number } {
178+
const searchableMessages = messages.length > 1 ? messages.slice(0, -1) : messages
179+
const lastMessage = messages.length > 0 ? messages[messages.length - 1] : undefined
180+
181+
const exactMatches = findExactMatches(searchableMessages, searchString, compressSummaries)
182+
183+
if (exactMatches.length === 1) {
184+
return { messageId: exactMatches[0].messageId, messageIndex: exactMatches[0].messageIndex }
185+
}
186+
187+
if (exactMatches.length > 1) {
63188
throw new Error(
64-
`${stringType} not found in conversation. Make sure the string exists and is spelled exactly as it appears.`,
189+
`Found multiple matches for ${stringType}. ` +
190+
`Provide more surrounding context to uniquely identify the intended match.`,
65191
)
66192
}
67193

68-
if (matches.length > 1) {
194+
const fuzzyMatches = findFuzzyMatches(
195+
searchableMessages,
196+
searchString,
197+
compressSummaries,
198+
fuzzyConfig.minScore,
199+
)
200+
201+
if (fuzzyMatches.length === 0) {
202+
if (lastMessage) {
203+
const lastMsgContent = extractMessageContent(lastMessage)
204+
const lastMsgIndex = messages.length - 1
205+
if (lastMsgContent.includes(searchString)) {
206+
// logger.info(
207+
// `${stringType} found in last message (last resort) at index ${lastMsgIndex}`,
208+
// )
209+
return {
210+
messageId: lastMessage.info.id,
211+
messageIndex: lastMsgIndex,
212+
}
213+
}
214+
}
215+
69216
throw new Error(
70-
`Found multiple matches for ${stringType}. Provide more surrounding context to uniquely identify the intended match.`,
217+
`${stringType} not found in conversation. ` +
218+
`Make sure the string exists and is spelled exactly as it appears.`,
71219
)
72220
}
73221

74-
return matches[0]
222+
fuzzyMatches.sort((a, b) => b.score - a.score)
223+
224+
const best = fuzzyMatches[0]
225+
const secondBest = fuzzyMatches[1]
226+
227+
// Log fuzzy match candidates
228+
// logger.info(
229+
// `Fuzzy match for ${stringType}: best=${best.score}% (msg ${best.messageIndex})` +
230+
// (secondBest
231+
// ? `, secondBest=${secondBest.score}% (msg ${secondBest.messageIndex})`
232+
// : ""),
233+
// )
234+
235+
// Check confidence gap - best must be significantly better than second best
236+
if (secondBest && best.score - secondBest.score < fuzzyConfig.minGap) {
237+
throw new Error(
238+
`Found multiple matches for ${stringType}. ` +
239+
`Provide more unique surrounding context to disambiguate.`,
240+
)
241+
}
242+
243+
logger.info(
244+
`Fuzzy matched ${stringType} with ${best.score}% confidence at message index ${best.messageIndex}`,
245+
)
246+
247+
return { messageId: best.messageId, messageIndex: best.messageIndex }
75248
}
76249

77-
/**
78-
* Collects all tool callIDs from messages between start and end indices (inclusive).
79-
*/
80250
export function collectToolIdsInRange(
81251
messages: WithParts[],
82252
startIndex: number,
@@ -100,9 +270,6 @@ export function collectToolIdsInRange(
100270
return toolIds
101271
}
102272

103-
/**
104-
* Collects all message IDs from messages between start and end indices (inclusive).
105-
*/
106273
export function collectMessageIdsInRange(
107274
messages: WithParts[],
108275
startIndex: number,
@@ -120,10 +287,6 @@ export function collectMessageIdsInRange(
120287
return messageIds
121288
}
122289

123-
/**
124-
* Collects all textual content (text parts, tool inputs, and tool outputs)
125-
* from a range of messages. Used for token estimation.
126-
*/
127290
export function collectContentInRange(
128291
messages: WithParts[],
129292
startIndex: number,

package-lock.json

Lines changed: 30 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
"dependencies": {
4545
"@anthropic-ai/tokenizer": "^0.0.4",
4646
"@opencode-ai/sdk": "^1.1.48",
47+
"fuzzball": "^2.2.3",
4748
"jsonc-parser": "^3.3.1",
4849
"ulid": "^3.0.2",
4950
"zod": "^4.3.6"

0 commit comments

Comments
 (0)