1+ import { partial_ratio } from "fuzzball"
12import type { WithParts , CompressSummary } from "../state"
23import type { Logger } from "../logger"
34
4- /**
5- * Searches messages for a string and returns the message ID where it's found.
6- * Searches in text parts, tool outputs, tool inputs, and other textual content.
7- * Also searches through existing compress summaries to enable chained compression.
8- * Throws an error if the string is not found or found more than once.
9- */
10- export function findStringInMessages (
5+ export interface FuzzyConfig {
6+ minScore : number
7+ minGap : number
8+ }
9+
10+ export const DEFAULT_FUZZY_CONFIG : FuzzyConfig = {
11+ minScore : 95 ,
12+ minGap : 15 ,
13+ }
14+
15+ interface MatchResult {
16+ messageId : string
17+ messageIndex : number
18+ score : number
19+ matchType : "exact" | "fuzzy"
20+ }
21+
22+ function extractMessageContent ( msg : WithParts ) : string {
23+ const parts = Array . isArray ( msg . parts ) ? msg . parts : [ ]
24+ let content = ""
25+
26+ for ( const part of parts ) {
27+ const p = part as Record < string , unknown >
28+
29+ switch ( part . type ) {
30+ case "text" :
31+ case "reasoning" :
32+ if ( typeof p . text === "string" ) {
33+ content += " " + p . text
34+ }
35+ break
36+
37+ case "tool" : {
38+ const state = p . state as Record < string , unknown > | undefined
39+ if ( ! state ) break
40+
41+ // Include tool output (completed or error)
42+ if ( state . status === "completed" && typeof state . output === "string" ) {
43+ content += " " + state . output
44+ } else if ( state . status === "error" && typeof state . error === "string" ) {
45+ content += " " + state . error
46+ }
47+
48+ // Include tool input
49+ if ( state . input ) {
50+ content +=
51+ " " +
52+ ( typeof state . input === "string"
53+ ? state . input
54+ : JSON . stringify ( state . input ) )
55+ }
56+ break
57+ }
58+
59+ case "compaction" :
60+ if ( typeof p . summary === "string" ) {
61+ content += " " + p . summary
62+ }
63+ break
64+
65+ case "subtask" :
66+ if ( typeof p . summary === "string" ) {
67+ content += " " + p . summary
68+ }
69+ if ( typeof p . result === "string" ) {
70+ content += " " + p . result
71+ }
72+ break
73+ }
74+ }
75+
76+ return content
77+ }
78+
79+ function findExactMatches (
1180 messages : WithParts [ ] ,
1281 searchString : string ,
13- logger : Logger ,
14- compressSummaries : CompressSummary [ ] = [ ] ,
15- stringType : "startString" | "endString" ,
16- ) : { messageId : string ; messageIndex : number } {
17- const matches : { messageId : string ; messageIndex : number } [ ] = [ ]
82+ compressSummaries : CompressSummary [ ] ,
83+ ) : MatchResult [ ] {
84+ const matches : MatchResult [ ] = [ ]
85+ const seenMessageIds = new Set < string > ( )
1886
19- // First, search through existing compress summaries
20- // This allows referencing text from previous compress operations
87+ // Search compress summaries first
2188 for ( const summary of compressSummaries ) {
2289 if ( summary . summary . includes ( searchString ) ) {
2390 const anchorIndex = messages . findIndex ( ( m ) => m . info . id === summary . anchorMessageId )
24- if ( anchorIndex !== - 1 ) {
91+ if ( anchorIndex !== - 1 && ! seenMessageIds . has ( summary . anchorMessageId ) ) {
92+ seenMessageIds . add ( summary . anchorMessageId )
2593 matches . push ( {
2694 messageId : summary . anchorMessageId ,
2795 messageIndex : anchorIndex ,
96+ score : 100 ,
97+ matchType : "exact" ,
2898 } )
2999 }
30100 }
31101 }
32102
33- // Then search through raw messages
103+ // Search raw messages
34104 for ( let i = 0 ; i < messages . length ; i ++ ) {
35105 const msg = messages [ i ]
36- const parts = Array . isArray ( msg . parts ) ? msg . parts : [ ]
106+ if ( seenMessageIds . has ( msg . info . id ) ) continue
37107
38- for ( const part of parts ) {
39- let content = ""
108+ const content = extractMessageContent ( msg )
109+ if ( content . includes ( searchString ) ) {
110+ seenMessageIds . add ( msg . info . id )
111+ matches . push ( {
112+ messageId : msg . info . id ,
113+ messageIndex : i ,
114+ score : 100 ,
115+ matchType : "exact" ,
116+ } )
117+ }
118+ }
40119
41- if ( part . type === "text" && typeof part . text === "string" ) {
42- content = part . text
43- } else if ( part . type === "tool" && part . state ?. status === "completed" ) {
44- if ( typeof part . state . output === "string" ) {
45- content = part . state . output
46- }
47- if ( part . state . input ) {
48- const inputStr =
49- typeof part . state . input === "string"
50- ? part . state . input
51- : JSON . stringify ( part . state . input )
52- content += " " + inputStr
53- }
54- }
120+ return matches
121+ }
55122
56- if ( content . includes ( searchString ) ) {
57- matches . push ( { messageId : msg . info . id , messageIndex : i } )
123+ function findFuzzyMatches (
124+ messages : WithParts [ ] ,
125+ searchString : string ,
126+ compressSummaries : CompressSummary [ ] ,
127+ minScore : number ,
128+ ) : MatchResult [ ] {
129+ const matches : MatchResult [ ] = [ ]
130+ const seenMessageIds = new Set < string > ( )
131+
132+ // Search compress summaries first
133+ for ( const summary of compressSummaries ) {
134+ const score = partial_ratio ( searchString , summary . summary )
135+ if ( score >= minScore ) {
136+ const anchorIndex = messages . findIndex ( ( m ) => m . info . id === summary . anchorMessageId )
137+ if ( anchorIndex !== - 1 && ! seenMessageIds . has ( summary . anchorMessageId ) ) {
138+ seenMessageIds . add ( summary . anchorMessageId )
139+ matches . push ( {
140+ messageId : summary . anchorMessageId ,
141+ messageIndex : anchorIndex ,
142+ score,
143+ matchType : "fuzzy" ,
144+ } )
58145 }
59146 }
60147 }
61148
62- if ( matches . length === 0 ) {
149+ // Search raw messages
150+ for ( let i = 0 ; i < messages . length ; i ++ ) {
151+ const msg = messages [ i ]
152+ if ( seenMessageIds . has ( msg . info . id ) ) continue
153+
154+ const content = extractMessageContent ( msg )
155+ const score = partial_ratio ( searchString , content )
156+ if ( score >= minScore ) {
157+ seenMessageIds . add ( msg . info . id )
158+ matches . push ( {
159+ messageId : msg . info . id ,
160+ messageIndex : i ,
161+ score,
162+ matchType : "fuzzy" ,
163+ } )
164+ }
165+ }
166+
167+ return matches
168+ }
169+
170+ export function findStringInMessages (
171+ messages : WithParts [ ] ,
172+ searchString : string ,
173+ logger : Logger ,
174+ compressSummaries : CompressSummary [ ] = [ ] ,
175+ stringType : "startString" | "endString" ,
176+ fuzzyConfig : FuzzyConfig = DEFAULT_FUZZY_CONFIG ,
177+ ) : { messageId : string ; messageIndex : number } {
178+ const searchableMessages = messages . length > 1 ? messages . slice ( 0 , - 1 ) : messages
179+ const lastMessage = messages . length > 0 ? messages [ messages . length - 1 ] : undefined
180+
181+ const exactMatches = findExactMatches ( searchableMessages , searchString , compressSummaries )
182+
183+ if ( exactMatches . length === 1 ) {
184+ return { messageId : exactMatches [ 0 ] . messageId , messageIndex : exactMatches [ 0 ] . messageIndex }
185+ }
186+
187+ if ( exactMatches . length > 1 ) {
63188 throw new Error (
64- `${ stringType } not found in conversation. Make sure the string exists and is spelled exactly as it appears.` ,
189+ `Found multiple matches for ${ stringType } . ` +
190+ `Provide more surrounding context to uniquely identify the intended match.` ,
65191 )
66192 }
67193
68- if ( matches . length > 1 ) {
194+ const fuzzyMatches = findFuzzyMatches (
195+ searchableMessages ,
196+ searchString ,
197+ compressSummaries ,
198+ fuzzyConfig . minScore ,
199+ )
200+
201+ if ( fuzzyMatches . length === 0 ) {
202+ if ( lastMessage ) {
203+ const lastMsgContent = extractMessageContent ( lastMessage )
204+ const lastMsgIndex = messages . length - 1
205+ if ( lastMsgContent . includes ( searchString ) ) {
206+ // logger.info(
207+ // `${stringType} found in last message (last resort) at index ${lastMsgIndex}`,
208+ // )
209+ return {
210+ messageId : lastMessage . info . id ,
211+ messageIndex : lastMsgIndex ,
212+ }
213+ }
214+ }
215+
69216 throw new Error (
70- `Found multiple matches for ${ stringType } . Provide more surrounding context to uniquely identify the intended match.` ,
217+ `${ stringType } not found in conversation. ` +
218+ `Make sure the string exists and is spelled exactly as it appears.` ,
71219 )
72220 }
73221
74- return matches [ 0 ]
222+ fuzzyMatches . sort ( ( a , b ) => b . score - a . score )
223+
224+ const best = fuzzyMatches [ 0 ]
225+ const secondBest = fuzzyMatches [ 1 ]
226+
227+ // Log fuzzy match candidates
228+ // logger.info(
229+ // `Fuzzy match for ${stringType}: best=${best.score}% (msg ${best.messageIndex})` +
230+ // (secondBest
231+ // ? `, secondBest=${secondBest.score}% (msg ${secondBest.messageIndex})`
232+ // : ""),
233+ // )
234+
235+ // Check confidence gap - best must be significantly better than second best
236+ if ( secondBest && best . score - secondBest . score < fuzzyConfig . minGap ) {
237+ throw new Error (
238+ `Found multiple matches for ${ stringType } . ` +
239+ `Provide more unique surrounding context to disambiguate.` ,
240+ )
241+ }
242+
243+ logger . info (
244+ `Fuzzy matched ${ stringType } with ${ best . score } % confidence at message index ${ best . messageIndex } ` ,
245+ )
246+
247+ return { messageId : best . messageId , messageIndex : best . messageIndex }
75248}
76249
77- /**
78- * Collects all tool callIDs from messages between start and end indices (inclusive).
79- */
80250export function collectToolIdsInRange (
81251 messages : WithParts [ ] ,
82252 startIndex : number ,
@@ -100,9 +270,6 @@ export function collectToolIdsInRange(
100270 return toolIds
101271}
102272
103- /**
104- * Collects all message IDs from messages between start and end indices (inclusive).
105- */
106273export function collectMessageIdsInRange (
107274 messages : WithParts [ ] ,
108275 startIndex : number ,
@@ -120,10 +287,6 @@ export function collectMessageIdsInRange(
120287 return messageIds
121288}
122289
123- /**
124- * Collects all textual content (text parts, tool inputs, and tool outputs)
125- * from a range of messages. Used for token estimation.
126- */
127290export function collectContentInRange (
128291 messages : WithParts [ ] ,
129292 startIndex : number ,
0 commit comments