From ee0b4e305ce4578919e46ac38923650fc3e420c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AE=89=E9=97=B2=E9=9D=99=E9=9B=85?= Date: Wed, 11 Mar 2026 14:39:07 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20add=20degeneracy=20guard=20to=20NoisePro?= =?UTF-8?q?totypeBank=20=E2=80=94=20prevent=20false-positive=20noise=20fil?= =?UTF-8?q?tering=20with=20non-discriminative=20embeddings?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the embedding model produces identical vectors for all inputs (e.g. deterministic mock embeddings in tests), every text matches every noise prototype with cosine similarity 1.0, causing the noise filter to reject all content. Smart extraction is skipped entirely, falling back to regex which also captures nothing. Add a self-diagnostic after init(): if the first two prototype vectors have cosine similarity > 0.98, the bank recognizes the embedding model is degenerate and disables itself. This fixes the flaky smart-extractor-branches.mjs test on master (all 9 scenarios now pass reliably). --- src/noise-prototypes.ts | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/noise-prototypes.ts b/src/noise-prototypes.ts index ac62dbf..4dc8827 100644 --- a/src/noise-prototypes.ts +++ b/src/noise-prototypes.ts @@ -83,6 +83,23 @@ export class NoisePrototypeBank { } this.builtinCount = this.vectors.length; this._initialized = true; + + // Degeneracy check: if all prototype vectors are nearly identical, the + // embedding model does not produce discriminative outputs (e.g. a + // deterministic mock that ignores text). In that case the noise bank + // would flag every input as noise, so we disable ourselves. + if (this.vectors.length >= 2) { + const sim = cosine(this.vectors[0], this.vectors[1]); + if (sim > 0.98) { + this.debugLog( + `noise-prototype-bank: degenerate embeddings detected (pairwise cosine=${sim.toFixed(4)}), disabling noise filter`, + ); + this._initialized = false; + this.vectors = []; + return; + } + } + this.debugLog( `noise-prototype-bank: initialized with ${this.builtinCount} built-in prototypes`, );