Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 51 additions & 5 deletions src/noise-filter.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/**
* Noise Filter
* Filters out low-quality memories (meta-questions, agent denials, session boilerplate)
* Inspired by openclaw-plugin-continuity's noise filtering approach.
* and strips untrusted metadata wrappers from text before storage/retrieval.
*/

// Agent-side denial patterns
Expand Down Expand Up @@ -33,6 +33,23 @@ const BOILERPLATE_PATTERNS = [
/^HEARTBEAT/i,
];

// Known noisy wrappers injected by chat transport / system envelopes
const METADATA_BLOCK_PATTERNS = [
/Conversation info \(untrusted metadata\):\s*```json[\s\S]*?```/gi,
/Sender \(untrusted metadata\):\s*```json[\s\S]*?```/gi,
/\[Queued messages while agent was busy\]/gi,
/^\s*---\s*Queued\s*#\d+\s*$/gmi,
/^\s*Queued\s*#\d+\s*$/gmi,
/^\s*---\s*$/gmi,
];

const METADATA_MARKERS = [
/Conversation info \(untrusted metadata\)/i,
/Sender \(untrusted metadata\)/i,
/\[Queued messages while agent was busy\]/i,
/Queued\s*#\d+/i,
];

export interface NoiseFilterOptions {
/** Filter agent denial responses (default: true) */
filterDenials?: boolean;
Expand All @@ -48,19 +65,48 @@ const DEFAULT_OPTIONS: Required<NoiseFilterOptions> = {
filterBoilerplate: true,
};

/**
* Remove transport/system wrappers while preserving human-readable content.
*/
export function sanitizeMemoryText(text: string): string {
let cleaned = (text || "").trim();
if (!cleaned) return "";

for (const pattern of METADATA_BLOCK_PATTERNS) {
cleaned = cleaned.replace(pattern, " ");
}

cleaned = cleaned
.replace(/\n{3,}/g, "\n\n")
.replace(/[ \t]{2,}/g, " ")
.trim();

return cleaned;
}

/**
* Check if a memory text is noise that should be filtered out.
* Returns true if the text is noise.
*/
export function isNoise(text: string, options: NoiseFilterOptions = {}): boolean {
const opts = { ...DEFAULT_OPTIONS, ...options };
const trimmed = text.trim();
const trimmed = (text || "").trim();

if (trimmed.length < 5) return true;

if (opts.filterDenials && DENIAL_PATTERNS.some(p => p.test(trimmed))) return true;
if (opts.filterMetaQuestions && META_QUESTION_PATTERNS.some(p => p.test(trimmed))) return true;
if (opts.filterBoilerplate && BOILERPLATE_PATTERNS.some(p => p.test(trimmed))) return true;
const sanitized = sanitizeMemoryText(trimmed);
if (sanitized.length < 5) return true;

// If text is mostly wrappers/metadata after sanitization, treat as noise.
const hasMetadataMarker = METADATA_MARKERS.some(p => p.test(trimmed));
if (hasMetadataMarker) {
const keepRatio = sanitized.length / Math.max(1, trimmed.length);
if (keepRatio < 0.35) return true;
}

if (opts.filterDenials && DENIAL_PATTERNS.some(p => p.test(sanitized))) return true;
if (opts.filterMetaQuestions && META_QUESTION_PATTERNS.some(p => p.test(sanitized))) return true;
if (opts.filterBoilerplate && BOILERPLATE_PATTERNS.some(p => p.test(sanitized))) return true;

return false;
}
Expand Down
30 changes: 17 additions & 13 deletions src/tools.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import { homedir } from "node:os";
import { join } from "node:path";
import type { MemoryRetriever, RetrievalResult } from "./retriever.js";
import type { MemoryStore } from "./store.js";
import { isNoise } from "./noise-filter.js";
import { isNoise, sanitizeMemoryText } from "./noise-filter.js";
import type { MemoryScopeManager } from "./scopes.js";
import type { Embedder } from "./embedder.js";
import { appendSelfImprovementEntry, ensureSelfImprovementLearningFiles } from "./self-improvement-files.js";
Expand Down Expand Up @@ -68,7 +68,7 @@ function clamp01(value: number, fallback = 0.7): number {
function sanitizeMemoryForSerialization(results: RetrievalResult[]) {
return results.map((r) => ({
id: r.entry.id,
text: r.entry.text,
text: sanitizeMemoryText(r.entry.text),
category: getDisplayCategoryTag(r.entry),
rawCategory: r.entry.category,
scope: r.entry.scope,
Expand Down Expand Up @@ -414,7 +414,8 @@ export function registerMemoryRecallTool(
if (r.sources.reranked) sources.push("reranked");

const categoryTag = getDisplayCategoryTag(r.entry);
return `${i + 1}. [${r.entry.id}] [${categoryTag}] ${r.entry.text} (${(r.score * 100).toFixed(0)}%${sources.length > 0 ? `, ${sources.join("+")}` : ""})`;
const cleanText = sanitizeMemoryText(r.entry.text) || r.entry.text;
return `${i + 1}. [${r.entry.id}] [${categoryTag}] ${cleanText} (${(r.score * 100).toFixed(0)}%${sources.length > 0 ? `, ${sources.join("+")}` : ""})`;
})
.join("\n");

Expand Down Expand Up @@ -508,21 +509,22 @@ export function registerMemoryStoreTool(
};
}

// Reject noise before wasting an embedding API call
if (isNoise(text)) {
// Strip transport/system wrappers and reject noise before embedding
const cleanedText = sanitizeMemoryText(text);
if (isNoise(cleanedText)) {
return {
content: [
{
type: "text",
text: `Skipped: text detected as noise (greeting, boilerplate, or meta-question)`,
text: `Skipped: text detected as noise (greeting, boilerplate, metadata, or meta-question)`,
},
],
details: { action: "noise_filtered", text: text.slice(0, 60) },
details: { action: "noise_filtered", text: cleanedText.slice(0, 60) },
};
}

const safeImportance = clamp01(importance, 0.7);
const vector = await context.embedder.embedPassage(text);
const vector = await context.embedder.embedPassage(cleanedText);

// Check for duplicates using raw vector similarity (bypasses importance/recency weighting)
// Fail-open by design: dedup must never block a legitimate memory write.
Expand Down Expand Up @@ -859,25 +861,27 @@ export function registerMemoryUpdateTool(
}
}

// If text changed, re-embed; reject noise
// If text changed, sanitize first, then re-embed
let newVector: number[] | undefined;
let cleanedUpdateText: string | undefined;
if (text) {
if (isNoise(text)) {
cleanedUpdateText = sanitizeMemoryText(text);
if (isNoise(cleanedUpdateText)) {
return {
content: [
{
type: "text",
text: "Skipped: updated text detected as noise",
text: "Skipped: updated text detected as noise or metadata wrapper",
},
],
details: { action: "noise_filtered" },
};
}
newVector = await context.embedder.embedPassage(text);
newVector = await context.embedder.embedPassage(cleanedUpdateText);
}

const updates: Record<string, any> = {};
if (text) updates.text = text;
if (cleanedUpdateText) updates.text = cleanedUpdateText;
if (newVector) updates.vector = newVector;
if (importance !== undefined)
updates.importance = clamp01(importance, 0.7);
Expand Down