From 8cd2c08bffc8327760c040c35809e440abfce4ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=89=BE=E6=A3=AE?= Date: Tue, 10 Mar 2026 17:00:48 +0800 Subject: [PATCH] fix: add structural noise filtering for system traces and raw blobs Add STRUCTURAL_NOISE_PATTERNS to noise-filter.ts covering: - System: prefixed runtime messages - Compaction/model-switch/session-management traces - OpenClaw (untrusted metadata) wrapper remnants - Pure JSON objects and XML-wrapped content - Multi-line blockquote blobs (>= 3 quoted lines) New filterStructuralNoise option (default: true) in NoiseFilterOptions. Closes #127 --- src/noise-filter.ts | 16 ++++++++++++++ test/noise-filter-structural.test.mjs | 32 +++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 test/noise-filter-structural.test.mjs diff --git a/src/noise-filter.ts b/src/noise-filter.ts index b21cf37..50c057c 100644 --- a/src/noise-filter.ts +++ b/src/noise-filter.ts @@ -50,6 +50,18 @@ const DIAGNOSTIC_ARTIFACT_PATTERNS = [ /\bno explicit solution\b/i, ]; +// Structural noise: runtime traces, raw blobs, wrappers, and malformed fragments +const STRUCTURAL_NOISE_PATTERNS = [ + /^System\s*:/i, + /compacti(ng|on)\s*(context|safeguard)/i, + /model\s*(switch|switched|changed|swap)/i, + /session\s*(reset|restart|start|end)/i, + /\(untrusted metadata\)\s*:/i, + /^\{[\s\S]*\}$/, + /<[a-z-]+>[\s\S]*<\/[a-z-]+>/i, + /(?:^>.*\n){3,}/m, +]; + export interface NoiseFilterOptions { /** Filter agent denial responses (default: true) */ filterDenials?: boolean; @@ -57,12 +69,15 @@ export interface NoiseFilterOptions { filterMetaQuestions?: boolean; /** Filter session boilerplate (default: true) */ filterBoilerplate?: boolean; + /** Filter structural noise (default: true) */ + filterStructuralNoise?: boolean; } const DEFAULT_OPTIONS: Required = { filterDenials: true, filterMetaQuestions: true, filterBoilerplate: true, + filterStructuralNoise: true, }; /** @@ -78,6 +93,7 @@ export function isNoise(text: string, options: NoiseFilterOptions = {}): boolean if (opts.filterDenials && DENIAL_PATTERNS.some(p => p.test(trimmed))) return true; if (opts.filterMetaQuestions && META_QUESTION_PATTERNS.some(p => p.test(trimmed))) return true; if (opts.filterBoilerplate && BOILERPLATE_PATTERNS.some(p => p.test(trimmed))) return true; + if (opts.filterStructuralNoise && STRUCTURAL_NOISE_PATTERNS.some(p => p.test(trimmed))) return true; if (DIAGNOSTIC_ARTIFACT_PATTERNS.some(p => p.test(trimmed))) return true; return false; diff --git a/test/noise-filter-structural.test.mjs b/test/noise-filter-structural.test.mjs new file mode 100644 index 0000000..640dc0b --- /dev/null +++ b/test/noise-filter-structural.test.mjs @@ -0,0 +1,32 @@ +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import jitiFactory from "jiti"; + +const jiti = jitiFactory(import.meta.url, { interopDefault: true }); + +const { isNoise } = jiti("../src/noise-filter.ts"); + +describe("noise-filter structural patterns", () => { + const samples = [ + "System: compaction safeguard engaged", + "Compaction context safeguard tripped", + "model switch detected", + "model changed to gpt-5", + "session reset due to inactivity", + "(untrusted metadata): Sender (untrusted metadata): foo", + "{\"type\":\"meta\",\"payload\":{\"note\":\"wrapper\"}}", + "foo", + "> quote one\n> quote two\n> quote three\n> quote four\n", + ]; + + for (const input of samples) { + it(`filters structural noise: ${input.slice(0, 40)}`, () => { + assert.equal(isNoise(input), true); + }); + } + + it("allows structural noise when disabled", () => { + const input = "System: model switch detected"; + assert.equal(isNoise(input, { filterStructuralNoise: false }), false); + }); +});