From f09ea8c85dfe7ce9dd4c8a0319c7dd871ee8def3 Mon Sep 17 00:00:00 2001 From: Hung Nguyen Date: Sat, 6 Jun 2026 08:05:57 +0700 Subject: [PATCH] fix(agent): strip anti-loop warning before cross-turn re-seed hash The injected [SYSTEM WARNING] embeds a consecutive-call count, so persisted tool results hashed differently when loopDetectorFromHistory re-seeded across turns, making the kill threshold unreachable. Strip the suffix on re-seed to restore byte-identity with the live AddCall path. --- pkg/agent/anti_loop.go | 21 ++++++++++++++++- pkg/agent/anti_loop_test.go | 46 +++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) diff --git a/pkg/agent/anti_loop.go b/pkg/agent/anti_loop.go index be8a5a0..d638139 100644 --- a/pkg/agent/anti_loop.go +++ b/pkg/agent/anti_loop.go @@ -3,6 +3,7 @@ package agent import ( "fmt" "hash/fnv" + "strings" "sync" "github.com/hung12ct/gopheragent/pkg/history" @@ -19,6 +20,24 @@ const loopWarnThreshold = 3 // pattern the detector cares about while keeping the struct cache-friendly. const maxRecentCalls = 30 +// loopWarnMarker is the prefix of the anti-loop warning that runToolCall +// appends to a tool result before persisting it (loop_execute.go). The live +// AddCall path hashes the raw result (the warning is appended afterwards), but +// loopDetectorFromHistory re-reads the persisted content, which carries the +// warning. Because the warning embeds the consecutive count ("3 times" vs +// "4 times"), each persisted result would otherwise hash differently, so the +// kill threshold could never be reached across turns. Stripping at this marker +// restores byte-identity with the live path. Keep in sync with the Detect +// warning format below and the append in loop_execute.go. +const loopWarnMarker = "\n\n[SYSTEM WARNING:" + +// stripLoopWarning removes the anti-loop warning suffix appended to a persisted +// tool result so its hash matches the live raw result the model first saw. +func stripLoopWarning(content string) string { + before, _, _ := strings.Cut(content, loopWarnMarker) + return before +} + // callEntry records a single tool invocation for loop detection. Hashes are // FNV-64 sums of args/result — equality is the only operation performed on // them, so cryptographic strength buys nothing. @@ -70,7 +89,7 @@ func loopDetectorFromHistory(msgs []history.Message) *loopDetector { results := make(map[string]string, len(msgs)/2+1) for i := range msgs { if msgs[i].Role == "tool" && msgs[i].ToolCallID != "" { - results[msgs[i].ToolCallID] = msgs[i].Content + results[msgs[i].ToolCallID] = stripLoopWarning(msgs[i].Content) } } // Collect entries in chronological order. We cap at maxRecentCalls diff --git a/pkg/agent/anti_loop_test.go b/pkg/agent/anti_loop_test.go index 9a8fa1d..2ce466d 100644 --- a/pkg/agent/anti_loop_test.go +++ b/pkg/agent/anti_loop_test.go @@ -178,6 +178,52 @@ func TestLoopDetectorFromHistory_DifferentTrailingToolNoFalsePositive(t *testing } } +func TestLoopDetectorFromHistory_WarningSuffixDoesNotPoisonHash(t *testing.T) { + // Regression (Phin memory_list loop): the anti-loop warning is appended to + // the tool result before it is persisted (loop_execute.go). On the next turn + // loopDetectorFromHistory re-reads that persisted content; because the warning + // embeds the live consecutive count ("3 times" vs "4 times"), each result + // would hash differently and loopKillThreshold could never be reached across + // turns — the model loops forever, only ever re-warned. Stripping the warning + // before hashing restores byte-identity with the live raw call so the kill + // fires. + const rawResult = `{"keys":[],"count":0}` + var msgs []history.Message + for i := 0; i < loopKillThreshold; i++ { + id := fmt.Sprintf("m%d", i) + content := rawResult + // Mirror loop_execute.go: the warning is appended once the count crosses + // the warn threshold, carrying the live count in its text. + if n := i + 1; n >= loopWarnThreshold { + content += fmt.Sprintf("\n\n[SYSTEM WARNING: You have called memory_list with the exact same arguments %d times consecutively. STOP doing this and try a different approach.]", n) + } + msgs = append(msgs, + history.Message{Role: "assistant", ToolCalls: []history.ToolCall{ + {ID: id, Name: "memory_list", Arguments: `{"count":3}`}, + }}, + history.Message{Role: "tool", ToolCallID: id, Content: content}, + ) + } + ld := loopDetectorFromHistory(msgs) + if got := ld.Len(); got != loopKillThreshold { + t.Fatalf("expected %d seeded entries, got %d", loopKillThreshold, got) + } + if _, err := ld.Detect(); err == nil { + t.Fatal("expected kill after loopKillThreshold identical calls; warning suffix poisoned the result hash") + } +} + +func TestStripLoopWarning(t *testing.T) { + const raw = `{"keys":[],"count":0}` + if got := stripLoopWarning(raw); got != raw { + t.Fatalf("raw result must pass through unchanged; got %q", got) + } + warned := raw + "\n\n[SYSTEM WARNING: You have called memory_list with the exact same arguments 4 times consecutively. STOP doing this and try a different approach.]" + if got := stripLoopWarning(warned); got != raw { + t.Fatalf("warning suffix must be stripped; got %q", got) + } +} + func BenchmarkLoopDetector_DetectNoLoop(b *testing.B) { ld := newLoopDetector() for i := 0; i < 10; i++ {