Merge branch 'dev' into feature/modelLimits

Tarquinen · web-flow · commit 72ed36f9539b · 2026-02-06T21:15:14.000-05:00
diff --git a/lib/tools/compress.ts b/lib/tools/compress.ts
@@ -63,7 +63,7 @@ export function createCompressTool(ctx: PruneToolContext): ReturnType<typeof too
                 throw new Error("content.summary is required and must be a non-empty string")
             }
 
-            logger.info("Compress tool invoked")
+            // logger.info("Compress tool invoked")
             // logger.info(
             //     JSON.stringify({
             //         startString: startString?.substring(0, 50) + "...",
diff --git a/lib/tools/utils.ts b/lib/tools/utils.ts
@@ -1,82 +1,252 @@
+import { partial_ratio } from "fuzzball"
 import type { WithParts, CompressSummary } from "../state"
 import type { Logger } from "../logger"
 
-/**
- * Searches messages for a string and returns the message ID where it's found.
- * Searches in text parts, tool outputs, tool inputs, and other textual content.
- * Also searches through existing compress summaries to enable chained compression.
- * Throws an error if the string is not found or found more than once.
- */
-export function findStringInMessages(
+export interface FuzzyConfig {
+    minScore: number
+    minGap: number
+}
+
+export const DEFAULT_FUZZY_CONFIG: FuzzyConfig = {
+    minScore: 95,
+    minGap: 15,
+}
+
+interface MatchResult {
+    messageId: string
+    messageIndex: number
+    score: number
+    matchType: "exact" | "fuzzy"
+}
+
+function extractMessageContent(msg: WithParts): string {
+    const parts = Array.isArray(msg.parts) ? msg.parts : []
+    let content = ""
+
+    for (const part of parts) {
+        const p = part as Record<string, unknown>
+
+        switch (part.type) {
+            case "text":
+            case "reasoning":
+                if (typeof p.text === "string") {
+                    content += " " + p.text
+                }
+                break
+
+            case "tool": {
+                const state = p.state as Record<string, unknown> | undefined
+                if (!state) break
+
+                // Include tool output (completed or error)
+                if (state.status === "completed" && typeof state.output === "string") {
+                    content += " " + state.output
+                } else if (state.status === "error" && typeof state.error === "string") {
+                    content += " " + state.error
+                }
+
+                // Include tool input
+                if (state.input) {
+                    content +=
+                        " " +
+                        (typeof state.input === "string"
+                            ? state.input
+                            : JSON.stringify(state.input))
+                }
+                break
+            }
+
+            case "compaction":
+                if (typeof p.summary === "string") {
+                    content += " " + p.summary
+                }
+                break
+
+            case "subtask":
+                if (typeof p.summary === "string") {
+                    content += " " + p.summary
+                }
+                if (typeof p.result === "string") {
+                    content += " " + p.result
+                }
+                break
+        }
+    }
+
+    return content
+}
+
+function findExactMatches(
     messages: WithParts[],
     searchString: string,
-    logger: Logger,
-    compressSummaries: CompressSummary[] = [],
-    stringType: "startString" | "endString",
-): { messageId: string; messageIndex: number } {
-    const matches: { messageId: string; messageIndex: number }[] = []
+    compressSummaries: CompressSummary[],
+): MatchResult[] {
+    const matches: MatchResult[] = []
+    const seenMessageIds = new Set<string>()
 
-    // First, search through existing compress summaries
-    // This allows referencing text from previous compress operations
+    // Search compress summaries first
     for (const summary of compressSummaries) {
         if (summary.summary.includes(searchString)) {
             const anchorIndex = messages.findIndex((m) => m.info.id === summary.anchorMessageId)
-            if (anchorIndex !== -1) {
+            if (anchorIndex !== -1 && !seenMessageIds.has(summary.anchorMessageId)) {
+                seenMessageIds.add(summary.anchorMessageId)
                 matches.push({
                     messageId: summary.anchorMessageId,
                     messageIndex: anchorIndex,
+                    score: 100,
+                    matchType: "exact",
                 })
             }
         }
     }
 
-    // Then search through raw messages
+    // Search raw messages
     for (let i = 0; i < messages.length; i++) {
         const msg = messages[i]
-        const parts = Array.isArray(msg.parts) ? msg.parts : []
+        if (seenMessageIds.has(msg.info.id)) continue
 
-        for (const part of parts) {
-            let content = ""
+        const content = extractMessageContent(msg)
+        if (content.includes(searchString)) {
+            seenMessageIds.add(msg.info.id)
+            matches.push({
+                messageId: msg.info.id,
+                messageIndex: i,
+                score: 100,
+                matchType: "exact",
+            })
+        }
+    }
 
-            if (part.type === "text" && typeof part.text === "string") {
-                content = part.text
-            } else if (part.type === "tool" && part.state?.status === "completed") {
-                if (typeof part.state.output === "string") {
-                    content = part.state.output
-                }
-                if (part.state.input) {
-                    const inputStr =
-                        typeof part.state.input === "string"
-                            ? part.state.input
-                            : JSON.stringify(part.state.input)
-                    content += " " + inputStr
-                }
-            }
+    return matches
+}
 
-            if (content.includes(searchString)) {
-                matches.push({ messageId: msg.info.id, messageIndex: i })
+function findFuzzyMatches(
+    messages: WithParts[],
+    searchString: string,
+    compressSummaries: CompressSummary[],
+    minScore: number,
+): MatchResult[] {
+    const matches: MatchResult[] = []
+    const seenMessageIds = new Set<string>()
+
+    // Search compress summaries first
+    for (const summary of compressSummaries) {
+        const score = partial_ratio(searchString, summary.summary)
+        if (score >= minScore) {
+            const anchorIndex = messages.findIndex((m) => m.info.id === summary.anchorMessageId)
+            if (anchorIndex !== -1 && !seenMessageIds.has(summary.anchorMessageId)) {
+                seenMessageIds.add(summary.anchorMessageId)
+                matches.push({
+                    messageId: summary.anchorMessageId,
+                    messageIndex: anchorIndex,
+                    score,
+                    matchType: "fuzzy",
+                })
             }
         }
     }
 
-    if (matches.length === 0) {
+    // Search raw messages
+    for (let i = 0; i < messages.length; i++) {
+        const msg = messages[i]
+        if (seenMessageIds.has(msg.info.id)) continue
+
+        const content = extractMessageContent(msg)
+        const score = partial_ratio(searchString, content)
+        if (score >= minScore) {
+            seenMessageIds.add(msg.info.id)
+            matches.push({
+                messageId: msg.info.id,
+                messageIndex: i,
+                score,
+                matchType: "fuzzy",
+            })
+        }
+    }
+
+    return matches
+}
+
+export function findStringInMessages(
+    messages: WithParts[],
+    searchString: string,
+    logger: Logger,
+    compressSummaries: CompressSummary[] = [],
+    stringType: "startString" | "endString",
+    fuzzyConfig: FuzzyConfig = DEFAULT_FUZZY_CONFIG,
+): { messageId: string; messageIndex: number } {
+    const searchableMessages = messages.length > 1 ? messages.slice(0, -1) : messages
+    const lastMessage = messages.length > 0 ? messages[messages.length - 1] : undefined
+
+    const exactMatches = findExactMatches(searchableMessages, searchString, compressSummaries)
+
+    if (exactMatches.length === 1) {
+        return { messageId: exactMatches[0].messageId, messageIndex: exactMatches[0].messageIndex }
+    }
+
+    if (exactMatches.length > 1) {
         throw new Error(
-            `${stringType} not found in conversation. Make sure the string exists and is spelled exactly as it appears.`,
+            `Found multiple matches for ${stringType}. ` +
+                `Provide more surrounding context to uniquely identify the intended match.`,
         )
     }
 
-    if (matches.length > 1) {
+    const fuzzyMatches = findFuzzyMatches(
+        searchableMessages,
+        searchString,
+        compressSummaries,
+        fuzzyConfig.minScore,
+    )
+
+    if (fuzzyMatches.length === 0) {
+        if (lastMessage) {
+            const lastMsgContent = extractMessageContent(lastMessage)
+            const lastMsgIndex = messages.length - 1
+            if (lastMsgContent.includes(searchString)) {
+                // logger.info(
+                //     `${stringType} found in last message (last resort) at index ${lastMsgIndex}`,
+                // )
+                return {
+                    messageId: lastMessage.info.id,
+                    messageIndex: lastMsgIndex,
+                }
+            }
+        }
+
         throw new Error(
-            `Found multiple matches for ${stringType}. Provide more surrounding context to uniquely identify the intended match.`,
+            `${stringType} not found in conversation. ` +
+                `Make sure the string exists and is spelled exactly as it appears.`,
         )
     }
 
-    return matches[0]
+    fuzzyMatches.sort((a, b) => b.score - a.score)
+
+    const best = fuzzyMatches[0]
+    const secondBest = fuzzyMatches[1]
+
+    // Log fuzzy match candidates
+    // logger.info(
+    //     `Fuzzy match for ${stringType}: best=${best.score}% (msg ${best.messageIndex})` +
+    //     (secondBest
+    //         ? `, secondBest=${secondBest.score}% (msg ${secondBest.messageIndex})`
+    //         : ""),
+    // )
+
+    // Check confidence gap - best must be significantly better than second best
+    if (secondBest && best.score - secondBest.score < fuzzyConfig.minGap) {
+        throw new Error(
+            `Found multiple matches for ${stringType}. ` +
+                `Provide more unique surrounding context to disambiguate.`,
+        )
+    }
+
+    logger.info(
+        `Fuzzy matched ${stringType} with ${best.score}% confidence at message index ${best.messageIndex}`,
+    )
+
+    return { messageId: best.messageId, messageIndex: best.messageIndex }
 }
 
-/**
- * Collects all tool callIDs from messages between start and end indices (inclusive).
- */
 export function collectToolIdsInRange(
     messages: WithParts[],
     startIndex: number,
@@ -100,9 +270,6 @@ export function collectToolIdsInRange(
     return toolIds
 }
 
-/**
- * Collects all message IDs from messages between start and end indices (inclusive).
- */
 export function collectMessageIdsInRange(
     messages: WithParts[],
     startIndex: number,
@@ -120,10 +287,6 @@ export function collectMessageIdsInRange(
     return messageIds
 }
 
-/**
- * Collects all textual content (text parts, tool inputs, and tool outputs)
- * from a range of messages. Used for token estimation.
- */
 export function collectContentInRange(
     messages: WithParts[],
     startIndex: number,
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -44,6 +44,7 @@
     "dependencies": {
         "@anthropic-ai/tokenizer": "^0.0.4",
         "@opencode-ai/sdk": "^1.1.48",
+        "fuzzball": "^2.2.3",
         "jsonc-parser": "^3.3.1",
         "ulid": "^3.0.2",
         "zod": "^4.3.6"

Original file line number	Diff line number	Diff line change
`@@ -63,7 +63,7 @@ export function createCompressTool(ctx: PruneToolContext): ReturnType<typeof too`
`63`	`63`	`throw new Error("content.summary is required and must be a non-empty string")`
`64`	`64`	`}`
`65`	`65`
`66`		`- logger.info("Compress tool invoked")`
	`66`	`+ // logger.info("Compress tool invoked")`
`67`	`67`	`// logger.info(`
`68`	`68`	`// JSON.stringify({`
`69`	`69`	`// startString: startString?.substring(0, 50) + "...",`