From c79a80762f874babb8b5917d5230be23fbe186d9 Mon Sep 17 00:00:00 2001 From: lpf <398618101@qq.com> Date: Wed, 11 Mar 2026 11:10:04 +0800 Subject: [PATCH 1/5] =?UTF-8?q?fix:=20retrieval=20correctness=20=E2=80=94?= =?UTF-8?q?=20cosine=20distance,=20FTS=20diagnostics,=20CLI=20source=20typ?= =?UTF-8?q?ing?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - store.ts: add .distanceType('cosine') to vectorSearch (critical: L2 default drops valid results) - store.ts: add getFtsStatus(), rebuildFtsIndex() for BM25 health diagnostics - retriever.ts: extend source typing with 'cli' for CLI trace distinction - cli.ts: mark CLI retrievals with source='cli', add reindex-fts command - test: add vector-search-cosine.test.mjs (4 tests) --- cli.ts | 35 ++++++-- src/retriever.ts | 4 +- src/store.ts | 65 ++++++++++++--- test/vector-search-cosine.test.mjs | 123 +++++++++++++++++++++++++++++ 4 files changed, 208 insertions(+), 19 deletions(-) create mode 100644 test/vector-search-cosine.test.mjs diff --git a/cli.ts b/cli.ts index 946cfb1..0781f21 100644 --- a/cli.ts +++ b/cli.ts @@ -83,6 +83,7 @@ export function registerMemoryCLI(program: Command, context: CLIContext): void { limit, scopeFilter, category, + source: "cli", }); if (results.length === 0 && context.embedder) { @@ -92,6 +93,7 @@ export function registerMemoryCLI(program: Command, context: CLIContext): void { limit, scopeFilter, category, + source: "cli", }); } @@ -417,10 +419,10 @@ export function registerMemoryCLI(program: Command, context: CLIContext): void { const categoryRaw = memory.category; const category: MemoryEntry["category"] = categoryRaw === "preference" || - categoryRaw === "fact" || - categoryRaw === "decision" || - categoryRaw === "entity" || - categoryRaw === "other" + categoryRaw === "fact" || + categoryRaw === "decision" || + categoryRaw === "entity" || + categoryRaw === "other" ? categoryRaw : "other"; @@ -531,10 +533,10 @@ export function registerMemoryCLI(program: Command, context: CLIContext): void { let targetReal = context.store.dbPath; try { sourceReal = await fs.realpath(sourceDbPath); - } catch {} + } catch { } try { targetReal = await fs.realpath(context.store.dbPath); - } catch {} + } catch { } if (!force && sourceReal === targetReal) { console.error("Refusing to re-embed in-place: source-db equals target dbPath. Use a new dbPath or pass --force."); @@ -781,6 +783,27 @@ export function registerMemoryCLI(program: Command, context: CLIContext): void { process.exit(1); } }); + + // reindex-fts: Rebuild FTS index + program + .command("reindex-fts") + .description("Rebuild the BM25 full-text search index") + .action(async () => { + try { + const status = context.store.getFtsStatus(); + console.log(`FTS status before: available=${status.available}, lastError=${status.lastError || "none"}`); + const result = await context.store.rebuildFtsIndex(); + if (result.success) { + console.log("✅ FTS index rebuilt successfully"); + } else { + console.error("❌ FTS rebuild failed:", result.error); + process.exit(1); + } + } catch (error) { + console.error("FTS rebuild error:", error); + process.exit(1); + } + }); } // ============================================================================ diff --git a/src/retriever.ts b/src/retriever.ts index 44f3f47..ecf4e31 100644 --- a/src/retriever.ts +++ b/src/retriever.ts @@ -80,8 +80,8 @@ export interface RetrievalContext { limit: number; scopeFilter?: string[]; category?: string; - /** Retrieval source: "manual" for user-triggered, "auto-recall" for system-initiated. */ - source?: "manual" | "auto-recall"; + /** Retrieval source: "manual" for user-triggered, "auto-recall" for system-initiated, "cli" for CLI commands. */ + source?: "manual" | "auto-recall" | "cli"; } export interface RetrievalResult extends MemorySearchResult { diff --git a/src/store.ts b/src/store.ts index 764fa55..ca466cf 100644 --- a/src/store.ts +++ b/src/store.ts @@ -121,8 +121,8 @@ export function validateStoragePath(dbPath: string): string { } catch (err: any) { throw new Error( `dbPath "${dbPath}" is a symlink whose target does not exist.\n` + - ` Fix: Create the target directory, or update the symlink to point to a valid path.\n` + - ` Details: ${err.code || ""} ${err.message}`, + ` Fix: Create the target directory, or update the symlink to point to a valid path.\n` + + ` Details: ${err.code || ""} ${err.message}`, ); } } @@ -147,9 +147,9 @@ export function validateStoragePath(dbPath: string): string { } catch (err: any) { throw new Error( `Failed to create dbPath directory "${resolvedPath}".\n` + - ` Fix: Ensure the parent directory "${dirname(resolvedPath)}" exists and is writable,\n` + - ` or create it manually: mkdir -p "${resolvedPath}"\n` + - ` Details: ${err.code || ""} ${err.message}`, + ` Fix: Ensure the parent directory "${dirname(resolvedPath)}" exists and is writable,\n` + + ` or create it manually: mkdir -p "${resolvedPath}"\n` + + ` Details: ${err.code || ""} ${err.message}`, ); } } @@ -160,9 +160,9 @@ export function validateStoragePath(dbPath: string): string { } catch (err: any) { throw new Error( `dbPath directory "${resolvedPath}" is not writable.\n` + - ` Fix: Check permissions with: ls -la "${dirname(resolvedPath)}"\n` + - ` Or grant write access: chmod u+w "${resolvedPath}"\n` + - ` Details: ${err.code || ""} ${err.message}`, + ` Fix: Check permissions with: ls -la "${dirname(resolvedPath)}"\n` + + ` Or grant write access: chmod u+w "${resolvedPath}"\n` + + ` Details: ${err.code || ""} ${err.message}`, ); } @@ -182,7 +182,7 @@ export class MemoryStore { private ftsIndexCreated = false; private updateQueue: Promise = Promise.resolve(); - constructor(private readonly config: StoreConfig) {} + constructor(private readonly config: StoreConfig) { } get dbPath(): string { return this.config.dbPath; @@ -214,7 +214,7 @@ export class MemoryStore { const message = err.message || String(err); throw new Error( `Failed to open LanceDB at "${this.config.dbPath}": ${code} ${message}\n` + - ` Fix: Verify the path exists and is writable. Check parent directory permissions.`, + ` Fix: Verify the path exists and is writable. Check parent directory permissions.`, ); } @@ -421,7 +421,7 @@ export class MemoryStore { const safeLimit = clampInt(limit, 1, 20); const fetchLimit = Math.min(safeLimit * 10, 200); // Over-fetch for scope filtering - let query = this.table!.vectorSearch(vector).limit(fetchLimit); + let query = this.table!.vectorSearch(vector).distanceType('cosine').limit(fetchLimit); // Apply scope filter if provided if (scopeFilter && scopeFilter.length > 0) { @@ -952,4 +952,47 @@ export class MemoryStore { get hasFtsSupport(): boolean { return this.ftsIndexCreated; } + + /** Last FTS error for diagnostics */ + private _lastFtsError: string | null = null; + + get lastFtsError(): string | null { + return this._lastFtsError; + } + + /** Get FTS index health status */ + getFtsStatus(): { available: boolean; lastError: string | null } { + return { + available: this.ftsIndexCreated, + lastError: this._lastFtsError, + }; + } + + /** Rebuild FTS index (drops and recreates). Useful for recovery after corruption. */ + async rebuildFtsIndex(): Promise<{ success: boolean; error?: string }> { + await this.ensureInitialized(); + try { + // Drop existing FTS index if any + const indices = await this.table!.listIndices(); + for (const idx of indices) { + if (idx.indexType === "FTS" || idx.columns?.includes("text")) { + try { + await this.table!.dropIndex((idx as any).name || "text"); + } catch { + // Ignore drop errors + } + } + } + // Recreate + await this.createFtsIndex(this.table!); + this.ftsIndexCreated = true; + this._lastFtsError = null; + return { success: true }; + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + this._lastFtsError = msg; + this.ftsIndexCreated = false; + return { success: false, error: msg }; + } + } } diff --git a/test/vector-search-cosine.test.mjs b/test/vector-search-cosine.test.mjs new file mode 100644 index 0000000..b4c8b3f --- /dev/null +++ b/test/vector-search-cosine.test.mjs @@ -0,0 +1,123 @@ +/** + * Vector Search Cosine Distance Test + * Validates that vectorSearch uses cosine distance (not L2) so that + * score = 1 / (1 + distance) produces meaningful results for high-dim embeddings. + */ + +import assert from "node:assert/strict"; + +// Minimal mock to verify .distanceType('cosine') is called +let distanceTypeCalled = null; + +const mockTable = { + vectorSearch(vector) { + return { + distanceType(type) { + distanceTypeCalled = type; + return this; + }, + limit(n) { + return this; + }, + where(cond) { + return this; + }, + async toArray() { + // Return a mock result with cosine-like distance + return [ + { + id: "test-1", + text: "test memory", + vector: vector, + category: "preference", + scope: "global", + importance: 0.8, + timestamp: Date.now(), + metadata: "{}", + _distance: 0.1, // cosine distance → score = 1/(1+0.1) = 0.91 + }, + ]; + }, + }; + }, + query() { + return { + limit() { return this; }, + select() { return this; }, + where() { return this; }, + async toArray() { return []; }, + }; + }, + async listIndices() { return []; }, + async createIndex() { }, +}; + +// Test 1: distanceType is called with 'cosine' +console.log("Test 1: vectorSearch calls distanceType('cosine')..."); + +// Create a minimal store-like object that exercises the vectorSearch path +const fakeStore = { + table: mockTable, + config: { vectorDim: 4 }, + ftsIndexCreated: false, + get hasFtsSupport() { return this.ftsIndexCreated; }, + async ensureInitialized() { }, + async vectorSearch(vector, limit = 5, minScore = 0.3, scopeFilter) { + const safeLimit = Math.min(Math.max(1, Math.floor(limit)), 20); + const fetchLimit = Math.min(safeLimit * 10, 200); + let query = this.table.vectorSearch(vector).distanceType('cosine').limit(fetchLimit); + const results = await query.toArray(); + const mapped = []; + for (const row of results) { + const distance = Number(row._distance ?? 0); + const score = 1 / (1 + distance); + if (score < minScore) continue; + mapped.push({ + entry: { + id: row.id, + text: row.text, + vector: row.vector, + category: row.category, + scope: row.scope ?? "global", + importance: Number(row.importance), + timestamp: Number(row.timestamp), + metadata: row.metadata || "{}", + }, + score, + }); + if (mapped.length >= safeLimit) break; + } + return mapped; + }, +}; + +const results = await fakeStore.vectorSearch([1, 0, 0, 0], 5, 0.3); +assert.strictEqual(distanceTypeCalled, "cosine", "Should call distanceType with 'cosine'"); +console.log(" ✅ distanceType('cosine') confirmed"); + +// Test 2: score computation is correct for cosine distance +console.log("Test 2: Score formula 1/(1+distance) produces correct values..."); +assert.strictEqual(results.length, 1, "Should return 1 result"); +const expectedScore = 1 / (1 + 0.1); +assert.ok( + Math.abs(results[0].score - expectedScore) < 0.001, + `Score should be ~${expectedScore.toFixed(3)}, got ${results[0].score.toFixed(3)}`, +); +console.log(" ✅ Score = 0.909 (correct for distance=0.1)"); + +// Test 3: Results below minScore are filtered out +console.log("Test 3: Low-score results are filtered..."); +const strictResults = await fakeStore.vectorSearch([1, 0, 0, 0], 5, 0.95); +assert.strictEqual(strictResults.length, 0, "Score 0.909 should be filtered by minScore=0.95"); +console.log(" ✅ minScore filtering works"); + +// Test 4: Without cosine, L2 distance would produce wrong scores +console.log("Test 4: Verify L2 would fail (documentation test)..."); +// For 1024-dim embeddings, L2 distance ≈ 40-60 for typical vectors +// score = 1/(1+45) ≈ 0.022 — way below any reasonable minScore +const l2TypicalDistance = 45; +const l2Score = 1 / (1 + l2TypicalDistance); +assert.ok(l2Score < 0.3, `L2 score ${l2Score.toFixed(4)} should be below minScore=0.3`); +console.log(` ✅ L2 score = ${l2Score.toFixed(4)} (would drop all results, confirming cosine is needed)`); + +console.log("\n=== All vector-search-cosine tests passed! ==="); From 18ed0b9b33d83f92fa1a6a500efc7b28bde308b7 Mon Sep 17 00:00:00 2001 From: lpf <398618101@qq.com> Date: Wed, 11 Mar 2026 11:17:52 +0800 Subject: [PATCH 2/5] =?UTF-8?q?feat:=20contextual=20support=20=E2=80=94=20?= =?UTF-8?q?6-decision=20dedup=20with=20per-context=20preference=20tracking?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends OpenViking's smart memory architecture with context-aware support: - smart-metadata.ts: add SupportInfoV2/ContextualSupport types, normalizeContext, parseSupportInfo (V1→V2 migration), updateSupportStats; fix LegacyStoreCategory missing 'reflection' - memory-categories.ts: extend DedupDecision with support/contextualize/contradict, add contextLabel to DedupResult, supported count to ExtractionStats - extraction-prompts.ts: extend dedup prompt with 3 new decisions + context_label - smart-extractor.ts: add handleSupport/handleContextualize/handleContradict handlers in processCandidate pipeline, extract contextLabel in llmDedupDecision - test: add smart-metadata-v2.mjs (6 tests, all passing) --- src/extraction-prompts.ts | 52 +++++++----- src/memory-categories.ts | 4 +- src/smart-extractor.ts | 169 ++++++++++++++++++++++++++++++++++++- src/smart-metadata.ts | 160 ++++++++++++++++++++++++++++++++++- test/smart-metadata-v2.mjs | 139 ++++++++++++++++++++++++++++++ 5 files changed, 496 insertions(+), 28 deletions(-) create mode 100644 test/smart-metadata-v2.mjs diff --git a/src/extraction-prompts.ts b/src/extraction-prompts.ts index b7ebd16..6fd6004 100644 --- a/src/extraction-prompts.ts +++ b/src/extraction-prompts.ts @@ -149,20 +149,26 @@ Please decide: - SKIP: Candidate memory duplicates existing memories, no need to save. Also SKIP if the candidate contains LESS information than an existing memory on the same topic (information degradation — e.g., candidate says "programming language preference" but existing memory already says "programming language preference: Python, TypeScript") - CREATE: This is completely new information not covered by any existing memory, should be created - MERGE: Candidate memory adds genuinely NEW details to an existing memory and should be merged +- SUPPORT: Candidate reinforces/confirms an existing memory in a specific context (e.g. "still prefers tea in the evening") +- CONTEXTUALIZE: Candidate adds a situational nuance to an existing memory (e.g. existing: "likes coffee", candidate: "prefers tea at night" — different context, same topic) +- CONTRADICT: Candidate directly contradicts an existing memory in a specific context (e.g. existing: "runs on weekends", candidate: "stopped running on weekends") IMPORTANT: -- "events" and "cases" categories are independent records — they do NOT support MERGE. For these categories, only use SKIP or CREATE. +- "events" and "cases" categories are independent records — they do NOT support MERGE/SUPPORT/CONTEXTUALIZE/CONTRADICT. For these categories, only use SKIP or CREATE. - If the candidate appears to be derived from a recall question (e.g., "Do you remember X?" / "你记得X吗?") and an existing memory already covers topic X with equal or more detail, you MUST choose SKIP. - A candidate with less information than an existing memory on the same topic should NEVER be CREATED or MERGED — always SKIP. +- For SUPPORT/CONTEXTUALIZE/CONTRADICT, you MUST provide a context_label from this vocabulary: general, morning, evening, night, weekday, weekend, work, leisure, summer, winter, travel. Return JSON format: { - "decision": "skip|create|merge", + "decision": "skip|create|merge|support|contextualize|contradict", "match_index": 1, - "reason": "Decision reason" + "reason": "Decision reason", + "context_label": "evening" } -If decision is "merge", set "match_index" to the number of the existing memory to merge with (1-based).`; +- If decision is "merge"/"support"/"contextualize"/"contradict", set "match_index" to the number of the existing memory (1-based). +- Only include "context_label" for support/contextualize/contradict decisions.`; } export function buildMergePrompt( @@ -176,32 +182,32 @@ export function buildMergePrompt( ): string { return `Merge the following memory into a single coherent record with all three levels. -**Category**: ${category} +** Category **: ${category} -**Existing Memory:** -Abstract: ${existingAbstract} -Overview: +** Existing Memory:** + Abstract: ${existingAbstract} + Overview: ${existingOverview} -Content: + Content: ${existingContent} -**New Information:** -Abstract: ${newAbstract} -Overview: +** New Information:** + Abstract: ${newAbstract} + Overview: ${newOverview} -Content: + Content: ${newContent} -Requirements: -- Remove duplicate information -- Keep the most up-to-date details -- Maintain a coherent narrative -- Keep code identifiers / URIs / model names unchanged when they are proper nouns + Requirements: + - Remove duplicate information + - Keep the most up - to - date details + - Maintain a coherent narrative + - Keep code identifiers / URIs / model names unchanged when they are proper nouns Return JSON: -{ - "abstract": "Merged one-line abstract", - "overview": "Merged structured Markdown overview", - "content": "Merged full content" -}`; + { + "abstract": "Merged one-line abstract", + "overview": "Merged structured Markdown overview", + "content": "Merged full content" + } `; } diff --git a/src/memory-categories.ts b/src/memory-categories.ts index 931e609..7361565 100644 --- a/src/memory-categories.ts +++ b/src/memory-categories.ts @@ -44,18 +44,20 @@ export type CandidateMemory = { }; /** Dedup decision from LLM. */ -export type DedupDecision = "create" | "merge" | "skip"; +export type DedupDecision = "create" | "merge" | "skip" | "support" | "contextualize" | "contradict"; export type DedupResult = { decision: DedupDecision; reason: string; matchId?: string; // ID of existing memory to merge with + contextLabel?: string; // Optional context label for support/contextualize/contradict }; export type ExtractionStats = { created: number; merged: number; skipped: number; + supported?: number; // context-aware support count }; /** Validate and normalize a category string. */ diff --git a/src/smart-extractor.ts b/src/smart-extractor.ts index 379f668..6ee06d1 100644 --- a/src/smart-extractor.ts +++ b/src/smart-extractor.ts @@ -27,7 +27,7 @@ import { } from "./memory-categories.js"; import { isNoise } from "./noise-filter.js"; import type { NoisePrototypeBank } from "./noise-prototypes.js"; -import { buildSmartMetadata, parseSmartMetadata, stringifySmartMetadata } from "./smart-metadata.js"; +import { buildSmartMetadata, parseSmartMetadata, stringifySmartMetadata, parseSupportInfo, updateSupportStats } from "./smart-metadata.js"; // ============================================================================ // Constants @@ -36,7 +36,7 @@ import { buildSmartMetadata, parseSmartMetadata, stringifySmartMetadata } from " const SIMILARITY_THRESHOLD = 0.7; const MAX_SIMILAR_FOR_PROMPT = 3; const MAX_MEMORIES_PER_EXTRACTION = 5; -const VALID_DECISIONS = new Set(["create", "merge", "skip"]); +const VALID_DECISIONS = new Set(["create", "merge", "skip", "support", "contextualize", "contradict"]); // ============================================================================ // Smart Extractor @@ -356,6 +356,36 @@ export class SmartExtractor { ); stats.skipped++; break; + + case "support": + if (dedupResult.matchId) { + await this.handleSupport(dedupResult.matchId, scopeFilter, { session: sessionKey, timestamp: Date.now() }, dedupResult.reason, dedupResult.contextLabel); + stats.supported = (stats.supported ?? 0) + 1; + } else { + await this.storeCandidate(candidate, vector, sessionKey, targetScope); + stats.created++; + } + break; + + case "contextualize": + if (dedupResult.matchId) { + await this.handleContextualize(candidate, vector, dedupResult.matchId, sessionKey, targetScope, scopeFilter, dedupResult.contextLabel); + stats.created++; + } else { + await this.storeCandidate(candidate, vector, sessionKey, targetScope); + stats.created++; + } + break; + + case "contradict": + if (dedupResult.matchId) { + await this.handleContradict(candidate, vector, dedupResult.matchId, sessionKey, targetScope, scopeFilter, dedupResult.contextLabel); + stats.created++; + } else { + await this.storeCandidate(candidate, vector, sessionKey, targetScope); + stats.created++; + } + break; } } @@ -445,7 +475,8 @@ export class SmartExtractor { return { decision, reason: data.reason ?? "", - matchId: decision === "merge" ? matchEntry?.entry.id : undefined, + matchId: ["merge", "support", "contextualize", "contradict"].includes(decision) ? matchEntry?.entry.id : undefined, + contextLabel: typeof (data as any).context_label === "string" ? (data as any).context_label : undefined, }; } catch (err) { this.log( @@ -593,6 +624,138 @@ export class SmartExtractor { ); } + // -------------------------------------------------------------------------- + // Context-Aware Handlers (support / contextualize / contradict) + // -------------------------------------------------------------------------- + + /** + * Handle SUPPORT: update support stats on existing memory for a specific context. + */ + private async handleSupport( + matchId: string, + scopeFilter: string[], + source: { session: string; timestamp: number }, + reason: string, + contextLabel?: string, + ): Promise { + const existing = await this.store.getById(matchId, scopeFilter); + if (!existing) return; + + const meta = parseSmartMetadata(existing.metadata, existing); + const supportInfo = parseSupportInfo(meta.support_info); + const updated = updateSupportStats(supportInfo, contextLabel, "support"); + meta.support_info = updated; + + await this.store.update( + matchId, + { metadata: stringifySmartMetadata(meta) }, + scopeFilter, + ); + + this.log( + `memory-pro: smart-extractor: support [${contextLabel || "general"}] on ${matchId.slice(0, 8)} — ${reason}`, + ); + } + + /** + * Handle CONTEXTUALIZE: create a new entry that adds situational nuance, + * linked to the original via a relation in metadata. + */ + private async handleContextualize( + candidate: CandidateMemory, + vector: number[], + matchId: string, + sessionKey: string, + targetScope: string, + scopeFilter: string[], + contextLabel?: string, + ): Promise { + const storeCategory = this.mapToStoreCategory(candidate.category); + const metadata = stringifySmartMetadata({ + l0_abstract: candidate.abstract, + l1_overview: candidate.overview, + l2_content: candidate.content, + memory_category: candidate.category, + tier: "working" as const, + access_count: 0, + confidence: 0.7, + last_accessed_at: Date.now(), + source_session: sessionKey, + contexts: contextLabel ? [contextLabel] : [], + relations: [{ type: "contextualizes", targetId: matchId }], + }); + + await this.store.store({ + text: candidate.abstract, + vector, + category: storeCategory, + scope: targetScope, + importance: this.getDefaultImportance(candidate.category), + metadata, + }); + + this.log( + `memory-pro: smart-extractor: contextualize [${contextLabel || "general"}] new entry linked to ${matchId.slice(0, 8)}`, + ); + } + + /** + * Handle CONTRADICT: create contradicting entry + record contradiction evidence + * on the original memory's support stats. + */ + private async handleContradict( + candidate: CandidateMemory, + vector: number[], + matchId: string, + sessionKey: string, + targetScope: string, + scopeFilter: string[], + contextLabel?: string, + ): Promise { + // 1. Record contradiction on the existing memory + const existing = await this.store.getById(matchId, scopeFilter); + if (existing) { + const meta = parseSmartMetadata(existing.metadata, existing); + const supportInfo = parseSupportInfo(meta.support_info); + const updated = updateSupportStats(supportInfo, contextLabel, "contradict"); + meta.support_info = updated; + await this.store.update( + matchId, + { metadata: stringifySmartMetadata(meta) }, + scopeFilter, + ); + } + + // 2. Store the contradicting entry as a new memory + const storeCategory = this.mapToStoreCategory(candidate.category); + const metadata = stringifySmartMetadata({ + l0_abstract: candidate.abstract, + l1_overview: candidate.overview, + l2_content: candidate.content, + memory_category: candidate.category, + tier: "working" as const, + access_count: 0, + confidence: 0.7, + last_accessed_at: Date.now(), + source_session: sessionKey, + contexts: contextLabel ? [contextLabel] : [], + relations: [{ type: "contradicts", targetId: matchId }], + }); + + await this.store.store({ + text: candidate.abstract, + vector, + category: storeCategory, + scope: targetScope, + importance: this.getDefaultImportance(candidate.category), + metadata, + }); + + this.log( + `memory-pro: smart-extractor: contradict [${contextLabel || "general"}] on ${matchId.slice(0, 8)}, new entry created`, + ); + } + // -------------------------------------------------------------------------- // Store Helper // -------------------------------------------------------------------------- diff --git a/src/smart-metadata.ts b/src/smart-metadata.ts index 90fb984..be233ef 100644 --- a/src/smart-metadata.ts +++ b/src/smart-metadata.ts @@ -6,7 +6,8 @@ type LegacyStoreCategory = | "fact" | "decision" | "entity" - | "other"; + | "other" + | "reflection"; type EntryLike = { text?: string; @@ -228,3 +229,160 @@ export function getDecayableFromEntry( return { memory, meta }; } + +// ============================================================================ +// Contextual Support — optional extension to SmartMemoryMetadata +// ============================================================================ + +/** Predefined context vocabulary for support slices */ +export const SUPPORT_CONTEXT_VOCABULARY = [ + "general", "morning", "evening", "night", + "weekday", "weekend", "work", "leisure", + "summer", "winter", "travel", +] as const; + +export type SupportContext = (typeof SUPPORT_CONTEXT_VOCABULARY)[number] | string; + +/** Max number of context slices per memory to prevent metadata bloat */ +export const MAX_SUPPORT_SLICES = 8; + +/** A single context-specific support slice */ +export interface ContextualSupport { + context: SupportContext; + confirmations: number; + contradictions: number; + strength: number; // confirmations / (confirmations + contradictions) + last_observed_at: number; +} + +/** V2 support info with per-context slices */ +export interface SupportInfoV2 { + global_strength: number; // weighted average across all slices + total_observations: number; // sum of all confirmations + contradictions + slices: ContextualSupport[]; +} + +/** + * Normalize a raw context label to a canonical context. + * Maps common variants (e.g. "晚上" → "evening") and falls back to "general". + */ +export function normalizeContext(raw: string | undefined): SupportContext { + if (!raw || !raw.trim()) return "general"; + const lower = raw.trim().toLowerCase(); + + // Direct vocabulary match + if ((SUPPORT_CONTEXT_VOCABULARY as readonly string[]).includes(lower)) { + return lower as SupportContext; + } + + // Common Chinese/English mappings + const aliases: Record = { + "早上": "morning", "上午": "morning", "早晨": "morning", + "下午": "evening", "傍晚": "evening", "晚上": "evening", + "深夜": "night", "夜晚": "night", "凌晨": "night", + "工作日": "weekday", "平时": "weekday", + "周末": "weekend", "假日": "weekend", "休息日": "weekend", + "工作": "work", "上班": "work", "办公": "work", + "休闲": "leisure", "放松": "leisure", "休息": "leisure", + "夏天": "summer", "夏季": "summer", + "冬天": "winter", "冬季": "winter", + "旅行": "travel", "出差": "travel", "旅游": "travel", + }; + + return aliases[lower] || lower; // keep as custom context if not mapped +} + +/** + * Parse support_info from metadata JSON. Handles V1 (flat) → V2 (sliced) migration. + */ +export function parseSupportInfo(raw: unknown): SupportInfoV2 { + const defaultV2: SupportInfoV2 = { + global_strength: 0.5, + total_observations: 0, + slices: [], + }; + + if (!raw || typeof raw !== "object") return defaultV2; + const obj = raw as Record; + + // V2 format: has slices array + if (Array.isArray(obj.slices)) { + return { + global_strength: typeof obj.global_strength === "number" ? obj.global_strength : 0.5, + total_observations: typeof obj.total_observations === "number" ? obj.total_observations : 0, + slices: (obj.slices as ContextualSupport[]).filter( + s => s && typeof s.context === "string", + ), + }; + } + + // V1 format: flat { confirmations, contradictions, strength } + const conf = typeof obj.confirmations === "number" ? obj.confirmations : 0; + const contra = typeof obj.contradictions === "number" ? obj.contradictions : 0; + const total = conf + contra; + if (total === 0) return defaultV2; + + return { + global_strength: total > 0 ? conf / total : 0.5, + total_observations: total, + slices: [{ + context: "general", + confirmations: conf, + contradictions: contra, + strength: total > 0 ? conf / total : 0.5, + last_observed_at: Date.now(), + }], + }; +} + +/** + * Update support stats for a specific context. + * Returns a new SupportInfoV2 with the updated slice. + */ +export function updateSupportStats( + existing: SupportInfoV2, + contextLabel: string | undefined, + event: "support" | "contradict", +): SupportInfoV2 { + const ctx = normalizeContext(contextLabel); + const base = { ...existing, slices: [...existing.slices.map(s => ({ ...s }))] }; + + // Find or create the context slice + let slice = base.slices.find(s => s.context === ctx); + if (!slice) { + slice = { context: ctx, confirmations: 0, contradictions: 0, strength: 0.5, last_observed_at: Date.now() }; + base.slices.push(slice); + } + + // Update slice + if (event === "support") slice.confirmations++; + else slice.contradictions++; + const sliceTotal = slice.confirmations + slice.contradictions; + slice.strength = sliceTotal > 0 ? slice.confirmations / sliceTotal : 0.5; + slice.last_observed_at = Date.now(); + + // Cap slices (keep most recently observed, but preserve dropped evidence) + let slices = base.slices; + let droppedConf = 0, droppedContra = 0; + if (slices.length > MAX_SUPPORT_SLICES) { + slices = slices + .sort((a, b) => b.last_observed_at - a.last_observed_at); + const dropped = slices.slice(MAX_SUPPORT_SLICES); + for (const d of dropped) { + droppedConf += d.confirmations; + droppedContra += d.contradictions; + } + slices = slices.slice(0, MAX_SUPPORT_SLICES); + } + + // Recompute global strength including evidence from dropped slices + let totalConf = droppedConf, totalContra = droppedContra; + for (const s of slices) { + totalConf += s.confirmations; + totalContra += s.contradictions; + } + const totalObs = totalConf + totalContra; + const global_strength = totalObs > 0 ? totalConf / totalObs : 0.5; + + return { global_strength, total_observations: totalObs, slices }; +} diff --git a/test/smart-metadata-v2.mjs b/test/smart-metadata-v2.mjs new file mode 100644 index 0000000..5cca024 --- /dev/null +++ b/test/smart-metadata-v2.mjs @@ -0,0 +1,139 @@ +/** + * Smart Metadata V2 Test — SupportInfo / ContextualSupport + * Tests the contextual support extension to OpenViking's SmartMemoryMetadata. + */ + +import assert from "node:assert/strict"; + +// ============================================================================ +// Mock: import the functions directly (they're pure functions) +// ============================================================================ + +// Since we can't import .ts directly, we test the logic inline + +// --- normalizeContext --- +console.log("Test 1: normalizeContext maps Chinese aliases..."); +const aliases = { + "晚上": "evening", "早上": "morning", "周末": "weekend", + "工作": "work", "旅行": "travel", "冬天": "winter", + "evening": "evening", "morning": "morning", +}; +for (const [input, expected] of Object.entries(aliases)) { + // Implementation of normalizeContext inline for testing + const VOCAB = ["general", "morning", "evening", "night", "weekday", "weekend", "work", "leisure", "summer", "winter", "travel"]; + const ALIASES = { + "早上": "morning", "上午": "morning", "早晨": "morning", + "下午": "evening", "傍晚": "evening", "晚上": "evening", + "深夜": "night", "夜晚": "night", "凌晨": "night", + "工作日": "weekday", "平时": "weekday", + "周末": "weekend", "假日": "weekend", "休息日": "weekend", + "工作": "work", "上班": "work", "办公": "work", + "休闲": "leisure", "放松": "leisure", "休息": "leisure", + "夏天": "summer", "夏季": "summer", + "冬天": "winter", "冬季": "winter", + "旅行": "travel", "出差": "travel", "旅游": "travel", + }; + const lower = input.trim().toLowerCase(); + const result = VOCAB.includes(lower) ? lower : (ALIASES[lower] || lower); + assert.strictEqual(result, expected, `normalizeContext("${input}") should be "${expected}", got "${result}"`); +} +console.log(" ✅ Chinese alias mapping works correctly"); + +// --- parseSupportInfo (V1 → V2 migration) --- +console.log("\nTest 2: parseSupportInfo handles V1 flat format..."); +const v1Raw = { confirmations: 3, contradictions: 1 }; +// Simulate parseSupportInfo +const conf = typeof v1Raw.confirmations === "number" ? v1Raw.confirmations : 0; +const contra = typeof v1Raw.contradictions === "number" ? v1Raw.contradictions : 0; +const total = conf + contra; +const v2FromV1 = { + global_strength: total > 0 ? conf / total : 0.5, + total_observations: total, + slices: [{ context: "general", confirmations: conf, contradictions: contra, strength: conf / total, last_observed_at: Date.now() }], +}; +assert.strictEqual(v2FromV1.global_strength, 0.75, "V1 {3 conf, 1 contra} → strength 0.75"); +assert.strictEqual(v2FromV1.total_observations, 4); +assert.strictEqual(v2FromV1.slices.length, 1); +assert.strictEqual(v2FromV1.slices[0].context, "general"); +console.log(" ✅ V1 → V2 migration preserves data"); + +// --- parseSupportInfo (V2 format) --- +console.log("\nTest 3: parseSupportInfo handles V2 sliced format..."); +const v2Raw = { + global_strength: 0.8, + total_observations: 5, + slices: [ + { context: "morning", confirmations: 3, contradictions: 0, strength: 1.0, last_observed_at: 1000 }, + { context: "evening", confirmations: 1, contradictions: 1, strength: 0.5, last_observed_at: 2000 }, + ], +}; +assert.strictEqual(v2Raw.slices.length, 2); +assert.strictEqual(v2Raw.slices[0].context, "morning"); +assert.strictEqual(v2Raw.slices[1].strength, 0.5); +console.log(" ✅ V2 format parsed correctly"); + +// --- updateSupportStats --- +console.log("\nTest 4: updateSupportStats adds new context slice..."); +const existing = { + global_strength: 0.75, + total_observations: 4, + slices: [{ context: "general", confirmations: 3, contradictions: 1, strength: 0.75, last_observed_at: 1000 }], +}; + +// Simulate update for "evening" support +const ctx = "evening"; +const base = { ...existing, slices: [...existing.slices.map(s => ({ ...s }))] }; +let slice = base.slices.find(s => s.context === ctx); +if (!slice) { + slice = { context: ctx, confirmations: 0, contradictions: 0, strength: 0.5, last_observed_at: Date.now() }; + base.slices.push(slice); +} +slice.confirmations++; +const sliceTotal = slice.confirmations + slice.contradictions; +slice.strength = sliceTotal > 0 ? slice.confirmations / sliceTotal : 0.5; +slice.last_observed_at = Date.now(); + +let totalConf = 0, totalContra = 0; +for (const s of base.slices) { + totalConf += s.confirmations; + totalContra += s.contradictions; +} +const totalObs = totalConf + totalContra; +const global_strength = totalObs > 0 ? totalConf / totalObs : 0.5; + +const updated = { global_strength, total_observations: totalObs, slices: base.slices }; + +assert.strictEqual(updated.slices.length, 2, "Should have 2 slices (general + evening)"); +assert.strictEqual(updated.total_observations, 5, "Total observations should be 5"); +assert.strictEqual(updated.global_strength, 4 / 5, "Global strength = 4/5 = 0.8"); +const eveningSlice = updated.slices.find(s => s.context === "evening"); +assert.ok(eveningSlice, "Evening slice should exist"); +assert.strictEqual(eveningSlice.confirmations, 1); +assert.strictEqual(eveningSlice.strength, 1.0, "1 confirm, 0 contra = 1.0"); +console.log(" ✅ New context slice added correctly"); + +// --- updateSupportStats for contradict --- +console.log("\nTest 5: updateSupportStats handles contradict event..."); +// Start from the updated state and contradict evening +const eveningSlice2 = updated.slices.find(s => s.context === "evening"); +eveningSlice2.contradictions++; +const st2 = eveningSlice2.confirmations + eveningSlice2.contradictions; +eveningSlice2.strength = st2 > 0 ? eveningSlice2.confirmations / st2 : 0.5; + +assert.strictEqual(eveningSlice2.contradictions, 1); +assert.strictEqual(eveningSlice2.strength, 0.5, "1 conf + 1 contra = 0.5"); +console.log(" ✅ Contradict event recorded correctly"); + +// --- MAX_SUPPORT_SLICES cap --- +console.log("\nTest 6: Support slices capped at MAX_SUPPORT_SLICES=8..."); +const MAX_SUPPORT_SLICES = 8; +const manySlices = []; +for (let i = 0; i < 10; i++) { + manySlices.push({ context: `ctx_${i}`, confirmations: 1, contradictions: 0, strength: 1.0, last_observed_at: i * 1000 }); +} +const capped = manySlices.sort((a, b) => b.last_observed_at - a.last_observed_at).slice(0, MAX_SUPPORT_SLICES); +assert.strictEqual(capped.length, 8, "Should cap at 8 slices"); +assert.strictEqual(capped[0].context, "ctx_9", "Most recent slice first"); +console.log(" ✅ Slice cap works correctly"); + +console.log("\n=== All Smart Metadata V2 tests passed! ==="); From f9eef509411fa8082ed3ac7f76a038142c4f4226 Mon Sep 17 00:00:00 2001 From: lpf <398618101@qq.com> Date: Wed, 11 Mar 2026 13:06:33 +0800 Subject: [PATCH 3/5] feat: handleMerge contextLabel + metadata caps + E2E test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - smart-extractor.ts: handleMerge now accepts contextLabel and updates support stats after successful merge (aligns with support/contextualize/ contradict handlers) - smart-metadata.ts: stringifySmartMetadata caps arrays to prevent JSON bloat (sources≤20, history≤50, relations≤16) - test/context-support-e2e.mjs: 3 E2E scenarios testing support, contextualize, and contradict decisions end-to-end --- src/smart-extractor.ts | 18 ++- src/smart-metadata.ts | 20 ++- test/context-support-e2e.mjs | 241 +++++++++++++++++++++++++++++++++++ 3 files changed, 277 insertions(+), 2 deletions(-) create mode 100644 test/context-support-e2e.mjs diff --git a/src/smart-extractor.ts b/src/smart-extractor.ts index 6ee06d1..85cc8fe 100644 --- a/src/smart-extractor.ts +++ b/src/smart-extractor.ts @@ -341,6 +341,7 @@ export class SmartExtractor { dedupResult.matchId, scopeFilter, targetScope, + dedupResult.contextLabel, ); stats.merged++; } else { @@ -540,6 +541,7 @@ export class SmartExtractor { matchId: string, scopeFilter: string[], targetScope: string, + contextLabel?: string, ): Promise { let existingAbstract = ""; let existingOverview = ""; @@ -619,8 +621,22 @@ export class SmartExtractor { scopeFilter, ); + // Update support stats on the merged memory + try { + const updatedEntry = await this.store.getById(matchId, scopeFilter); + if (updatedEntry) { + const meta = parseSmartMetadata(updatedEntry.metadata, updatedEntry); + const supportInfo = parseSupportInfo(meta.support_info); + updateSupportStats(supportInfo, contextLabel, "support"); + const finalMetadata = stringifySmartMetadata({ ...meta, support_info: supportInfo }); + await this.store.update(matchId, { metadata: finalMetadata }, scopeFilter); + } + } catch { + // Non-critical: merge succeeded, support stats update is best-effort + } + this.log( - `memory-pro: smart-extractor: merged [${candidate.category}] into ${matchId.slice(0, 8)}`, + `memory-pro: smart-extractor: merged [${candidate.category}]${contextLabel ? ` [${contextLabel}]` : ""} into ${matchId.slice(0, 8)}`, ); } diff --git a/src/smart-metadata.ts b/src/smart-metadata.ts index be233ef..323c0b9 100644 --- a/src/smart-metadata.ts +++ b/src/smart-metadata.ts @@ -171,10 +171,28 @@ export function buildSmartMetadata( }; } +// Metadata array size caps — prevent unbounded JSON growth +const MAX_SOURCES = 20; +const MAX_HISTORY = 50; +const MAX_RELATIONS = 16; + export function stringifySmartMetadata( metadata: SmartMemoryMetadata | Record, ): string { - return JSON.stringify(metadata); + const capped = { ...metadata } as Record; + + // Cap array fields to prevent metadata bloat + if (Array.isArray(capped.sources) && capped.sources.length > MAX_SOURCES) { + capped.sources = capped.sources.slice(-MAX_SOURCES); // keep most recent + } + if (Array.isArray(capped.history) && capped.history.length > MAX_HISTORY) { + capped.history = capped.history.slice(-MAX_HISTORY); + } + if (Array.isArray(capped.relations) && capped.relations.length > MAX_RELATIONS) { + capped.relations = capped.relations.slice(0, MAX_RELATIONS); + } + + return JSON.stringify(capped); } export function toLifecycleMemory( diff --git a/test/context-support-e2e.mjs b/test/context-support-e2e.mjs new file mode 100644 index 0000000..d18c374 --- /dev/null +++ b/test/context-support-e2e.mjs @@ -0,0 +1,241 @@ +/** + * Context-Aware Support E2E Test + * + * Tests the full pipeline for support/contextualize/contradict decisions + * using mock LLM and embedding servers against a real LanceDB store. + */ + +import assert from "node:assert/strict"; +import http from "node:http"; +import { mkdtempSync, rmSync } from "node:fs"; +import Module from "node:module"; +import { tmpdir } from "node:os"; +import path from "node:path"; + +import jitiFactory from "jiti"; + +process.env.NODE_PATH = [ + process.env.NODE_PATH, + "/opt/homebrew/lib/node_modules/openclaw/node_modules", + "/opt/homebrew/lib/node_modules", +].filter(Boolean).join(":"); +Module._initPaths(); + +const jiti = jitiFactory(import.meta.url, { interopDefault: true }); +const { MemoryStore } = jiti("../src/store.ts"); +const { createEmbedder } = jiti("../src/embedder.ts"); +const { SmartExtractor } = jiti("../src/smart-extractor.ts"); +const { createLlmClient } = jiti("../src/llm-client.ts"); +const { buildSmartMetadata, stringifySmartMetadata, parseSupportInfo } = jiti("../src/smart-metadata.ts"); + +const EMBEDDING_DIMENSIONS = 2560; + +// ============================================================================ +// Mock Embedding Server (constant vectors — fine for unit-level E2E) +// ============================================================================ + +function createEmbeddingServer() { + return http.createServer(async (req, res) => { + if (req.method !== "POST" || req.url !== "/v1/embeddings") { + res.writeHead(404); res.end(); return; + } + const chunks = []; + for await (const chunk of req) chunks.push(chunk); + const payload = JSON.parse(Buffer.concat(chunks).toString("utf8")); + const inputs = Array.isArray(payload.input) ? payload.input : [payload.input]; + const value = 1 / Math.sqrt(EMBEDDING_DIMENSIONS); + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ + object: "list", + data: inputs.map((_, index) => ({ + object: "embedding", index, + embedding: new Array(EMBEDDING_DIMENSIONS).fill(value), + })), + model: "mock", usage: { prompt_tokens: 0, total_tokens: 0 }, + })); + }); +} + +// ============================================================================ +// Test Runner +// ============================================================================ + +async function runTest() { + const workDir = mkdtempSync(path.join(tmpdir(), "ctx-support-e2e-")); + const dbPath = path.join(workDir, "db"); + const logs = []; + let dedupDecision = "support"; // controlled per scenario + let dedupContextLabel = "evening"; + + const embeddingServer = createEmbeddingServer(); + + // Mock LLM: extraction returns 1 memory, dedup returns controlled decision + const llmServer = http.createServer(async (req, res) => { + if (req.method !== "POST" || req.url !== "/chat/completions") { + res.writeHead(404); res.end(); return; + } + const chunks = []; + for await (const chunk of req) chunks.push(chunk); + const payload = JSON.parse(Buffer.concat(chunks).toString("utf8")); + const prompt = payload.messages?.[1]?.content || ""; + let content; + + if (prompt.includes("Analyze the following session context")) { + content = JSON.stringify({ + memories: [{ + category: "preferences", + abstract: "饮品偏好:乌龙茶", + overview: "## Preference\n- 喜欢乌龙茶", + content: "用户喜欢乌龙茶。", + }], + }); + } else if (prompt.includes("Determine how to handle this candidate memory")) { + content = JSON.stringify({ + decision: dedupDecision, + match_index: 1, + reason: `test ${dedupDecision}`, + context_label: dedupContextLabel, + }); + } else { + content = JSON.stringify({ memories: [] }); + } + + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ + id: "test", object: "chat.completion", + created: Math.floor(Date.now() / 1000), model: "mock", + choices: [{ index: 0, message: { role: "assistant", content }, finish_reason: "stop" }], + })); + }); + + await new Promise(r => embeddingServer.listen(0, "127.0.0.1", r)); + await new Promise(r => llmServer.listen(0, "127.0.0.1", r)); + const embPort = embeddingServer.address().port; + const llmPort = llmServer.address().port; + process.env.TEST_EMBEDDING_BASE_URL = `http://127.0.0.1:${embPort}/v1`; + + try { + const store = new MemoryStore({ dbPath, vectorDim: EMBEDDING_DIMENSIONS }); + const embedder = createEmbedder({ + provider: "openai-compatible", apiKey: "dummy", model: "mock", + baseURL: `http://127.0.0.1:${embPort}/v1`, dimensions: EMBEDDING_DIMENSIONS, + }); + const llm = createLlmClient({ + apiKey: "dummy", model: "mock", + baseURL: `http://127.0.0.1:${llmPort}`, + timeoutMs: 10000, + log: (msg) => logs.push(msg), + }); + + // Seed a preference memory + const seedText = "饮品偏好:乌龙茶"; + const seedVector = await embedder.embedPassage(seedText); + await store.store({ + text: seedText, vector: seedVector, category: "preference", + scope: "test", importance: 0.8, + metadata: stringifySmartMetadata( + buildSmartMetadata({ text: seedText, category: "preference", importance: 0.8 }, { + l0_abstract: seedText, + l1_overview: "## Preference\n- 喜欢乌龙茶", + l2_content: "用户喜欢乌龙茶。", + memory_category: "preferences", tier: "working", confidence: 0.8, + }), + ), + }); + + const extractor = new SmartExtractor(store, embedder, llm, { + user: "User", extractMinMessages: 1, extractMaxChars: 8000, + defaultScope: "test", + log: (msg) => logs.push(msg), + }); + + // ---------------------------------------------------------------- + // Scenario 1: support — should update support_info, no new entry + // ---------------------------------------------------------------- + console.log("Test 1: support decision updates support_info..."); + dedupDecision = "support"; + dedupContextLabel = "evening"; + logs.length = 0; + + const stats1 = await extractor.extractAndPersist( + "用户再次确认喜欢乌龙茶,特别是晚上。", + "test-session", + { scope: "test", scopeFilter: ["test"] }, + ); + + const entries1 = await store.list(["test"], undefined, 10, 0); + assert.equal(entries1.length, 1, "support should NOT create new entry"); + assert.equal(stats1.supported, 1, "supported count should be 1"); + + // Check support_info was updated + const meta1 = JSON.parse(entries1[0].metadata || "{}"); + const si1 = parseSupportInfo(meta1.support_info); + assert.ok(si1.total_observations >= 1, "total_observations should increase"); + const eveningSlice = si1.slices.find(s => s.context === "evening"); + assert.ok(eveningSlice, "evening slice should exist"); + assert.equal(eveningSlice.confirmations, 1, "evening confirmations should be 1"); + console.log(" ✅ support decision works correctly"); + + // ---------------------------------------------------------------- + // Scenario 2: contextualize — should create linked entry + // ---------------------------------------------------------------- + console.log("Test 2: contextualize decision creates linked entry..."); + dedupDecision = "contextualize"; + dedupContextLabel = "night"; + logs.length = 0; + + const stats2 = await extractor.extractAndPersist( + "用户说晚上改喝花茶。", + "test-session", + { scope: "test", scopeFilter: ["test"] }, + ); + + const entries2 = await store.list(["test"], undefined, 10, 0); + assert.equal(entries2.length, 2, "contextualize should create 1 new entry"); + assert.equal(stats2.created, 1, "created count should be 1"); + console.log(" ✅ contextualize decision works correctly"); + + // ---------------------------------------------------------------- + // Scenario 3: contradict — should record contradiction + new entry + // ---------------------------------------------------------------- + console.log("Test 3: contradict decision records contradiction..."); + dedupDecision = "contradict"; + dedupContextLabel = "weekend"; + logs.length = 0; + + const stats3 = await extractor.extractAndPersist( + "用户说周末不喝茶了。", + "test-session", + { scope: "test", scopeFilter: ["test"] }, + ); + + const entries3 = await store.list(["test"], undefined, 10, 0); + assert.equal(entries3.length, 3, "contradict should create 1 new entry"); + assert.equal(stats3.created, 1, "created count should be 1"); + + // Check contradictions recorded on some existing entry + // (with constant vectors, dedup may match any existing entry) + let foundWeekend = false; + for (const entry of entries3) { + const meta = JSON.parse(entry.metadata || "{}"); + const si = parseSupportInfo(meta.support_info); + const weekendSlice = si.slices.find(s => s.context === "weekend"); + if (weekendSlice && weekendSlice.contradictions >= 1) { + foundWeekend = true; + break; + } + } + assert.ok(foundWeekend, "at least one entry should have weekend contradiction"); + console.log(" ✅ contradict decision works correctly"); + + console.log("\n=== All Context-Support E2E tests passed! ==="); + + } finally { + delete process.env.TEST_EMBEDDING_BASE_URL; + await new Promise(r => embeddingServer.close(r)); + await new Promise(r => llmServer.close(r)); + rmSync(workDir, { recursive: true, force: true }); + } +} + +await runTest(); From da12b2c4f933accaae2a03f608d8816c49d5e64a Mon Sep 17 00:00:00 2001 From: lpf <398618101@qq.com> Date: Wed, 11 Mar 2026 14:01:03 +0800 Subject: [PATCH 4/5] fix: address review feedback (#161) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Blocking: 1. Add smart-metadata-v2, vector-search-cosine, context-support-e2e to npm test chain (package.json) 2. Rewrite smart-metadata-v2.mjs to import production code via jiti (normalizeContext, parseSupportInfo, updateSupportStats, etc.) 3. Rewrite vector-search-cosine.test.mjs to use real MemoryStore against temp LanceDB (no more fakeStore) Suggestions: 4. Fix '下午' mapping: evening → afternoon (add to vocabulary) 5. parseSupportInfo: validate slice numeric fields (confirmations, contradictions, strength, last_observed_at) 6. Document slice truncation drift as accepted trade-off 7. dropIndex: log warning instead of silently swallowing errors --- package.json | 2 +- src/smart-metadata.ts | 19 ++- src/store.ts | 4 +- test/smart-metadata-v2.mjs | 168 +++++++++++------------- test/vector-search-cosine.test.mjs | 200 ++++++++++++----------------- 5 files changed, 175 insertions(+), 218 deletions(-) diff --git a/package.json b/package.json index 7bb0dab..8a4797f 100644 --- a/package.json +++ b/package.json @@ -35,7 +35,7 @@ ] }, "scripts": { - "test": "node test/embedder-error-hints.test.mjs && node test/migrate-legacy-schema.test.mjs && node --test test/config-session-strategy-migration.test.mjs && node test/update-consistency-lancedb.test.mjs && node test/cli-smoke.mjs && node test/functional-e2e.mjs && node test/retriever-rerank-regression.mjs && node test/smart-memory-lifecycle.mjs && node test/smart-extractor-branches.mjs && node test/plugin-manifest-regression.mjs", + "test": "node test/embedder-error-hints.test.mjs && node test/migrate-legacy-schema.test.mjs && node --test test/config-session-strategy-migration.test.mjs && node test/update-consistency-lancedb.test.mjs && node test/cli-smoke.mjs && node test/functional-e2e.mjs && node test/retriever-rerank-regression.mjs && node test/smart-memory-lifecycle.mjs && node test/smart-extractor-branches.mjs && node test/plugin-manifest-regression.mjs && node test/smart-metadata-v2.mjs && node test/vector-search-cosine.test.mjs && node test/context-support-e2e.mjs", "test:openclaw-host": "node test/openclaw-host-functional.mjs" }, "devDependencies": { diff --git a/src/smart-metadata.ts b/src/smart-metadata.ts index 323c0b9..f62d874 100644 --- a/src/smart-metadata.ts +++ b/src/smart-metadata.ts @@ -254,7 +254,7 @@ export function getDecayableFromEntry( /** Predefined context vocabulary for support slices */ export const SUPPORT_CONTEXT_VOCABULARY = [ - "general", "morning", "evening", "night", + "general", "morning", "afternoon", "evening", "night", "weekday", "weekend", "work", "leisure", "summer", "winter", "travel", ] as const; @@ -296,7 +296,7 @@ export function normalizeContext(raw: string | undefined): SupportContext { // Common Chinese/English mappings const aliases: Record = { "早上": "morning", "上午": "morning", "早晨": "morning", - "下午": "evening", "傍晚": "evening", "晚上": "evening", + "下午": "afternoon", "傍晚": "evening", "晚上": "evening", "深夜": "night", "夜晚": "night", "凌晨": "night", "工作日": "weekday", "平时": "weekday", "周末": "weekend", "假日": "weekend", "休息日": "weekend", @@ -328,9 +328,15 @@ export function parseSupportInfo(raw: unknown): SupportInfoV2 { return { global_strength: typeof obj.global_strength === "number" ? obj.global_strength : 0.5, total_observations: typeof obj.total_observations === "number" ? obj.total_observations : 0, - slices: (obj.slices as ContextualSupport[]).filter( + slices: (obj.slices as Record[]).filter( s => s && typeof s.context === "string", - ), + ).map(s => ({ + context: String(s.context), + confirmations: typeof s.confirmations === "number" && s.confirmations >= 0 ? s.confirmations : 0, + contradictions: typeof s.contradictions === "number" && s.contradictions >= 0 ? s.contradictions : 0, + strength: typeof s.strength === "number" && s.strength >= 0 && s.strength <= 1 ? s.strength : 0.5, + last_observed_at: typeof s.last_observed_at === "number" ? s.last_observed_at : Date.now(), + })), }; } @@ -379,7 +385,10 @@ export function updateSupportStats( slice.strength = sliceTotal > 0 ? slice.confirmations / sliceTotal : 0.5; slice.last_observed_at = Date.now(); - // Cap slices (keep most recently observed, but preserve dropped evidence) + // Cap slices (keep most recently observed, but preserve dropped evidence). + // NOTE: Evidence from slices dropped in *previous* updates is already baked + // into total_observations/global_strength, so those values may drift slightly + // over many truncation cycles. This is an accepted trade-off for bounded JSON size. let slices = base.slices; let droppedConf = 0, droppedContra = 0; if (slices.length > MAX_SUPPORT_SLICES) { diff --git a/src/store.ts b/src/store.ts index ca466cf..2745a5c 100644 --- a/src/store.ts +++ b/src/store.ts @@ -978,8 +978,8 @@ export class MemoryStore { if (idx.indexType === "FTS" || idx.columns?.includes("text")) { try { await this.table!.dropIndex((idx as any).name || "text"); - } catch { - // Ignore drop errors + } catch (err) { + console.warn(`memory-lancedb-pro: dropIndex(${(idx as any).name || "text"}) failed:`, err); } } } diff --git a/test/smart-metadata-v2.mjs b/test/smart-metadata-v2.mjs index 5cca024..72baea5 100644 --- a/test/smart-metadata-v2.mjs +++ b/test/smart-metadata-v2.mjs @@ -1,108 +1,79 @@ /** * Smart Metadata V2 Test — SupportInfo / ContextualSupport * Tests the contextual support extension to OpenViking's SmartMemoryMetadata. + * Imports production code via jiti (same pattern as other tests in this repo). */ import assert from "node:assert/strict"; +import Module from "node:module"; -// ============================================================================ -// Mock: import the functions directly (they're pure functions) -// ============================================================================ +process.env.NODE_PATH = [ + process.env.NODE_PATH, + "/opt/homebrew/lib/node_modules/openclaw/node_modules", + "/opt/homebrew/lib/node_modules", +].filter(Boolean).join(":"); +Module._initPaths(); -// Since we can't import .ts directly, we test the logic inline +import jitiFactory from "jiti"; +const jiti = jitiFactory(import.meta.url, { interopDefault: true }); +const { + normalizeContext, + parseSupportInfo, + updateSupportStats, + SUPPORT_CONTEXT_VOCABULARY, + stringifySmartMetadata, +} = jiti("../src/smart-metadata.ts"); -// --- normalizeContext --- +// --- Test 1: normalizeContext maps Chinese aliases --- console.log("Test 1: normalizeContext maps Chinese aliases..."); -const aliases = { - "晚上": "evening", "早上": "morning", "周末": "weekend", - "工作": "work", "旅行": "travel", "冬天": "winter", - "evening": "evening", "morning": "morning", -}; -for (const [input, expected] of Object.entries(aliases)) { - // Implementation of normalizeContext inline for testing - const VOCAB = ["general", "morning", "evening", "night", "weekday", "weekend", "work", "leisure", "summer", "winter", "travel"]; - const ALIASES = { - "早上": "morning", "上午": "morning", "早晨": "morning", - "下午": "evening", "傍晚": "evening", "晚上": "evening", - "深夜": "night", "夜晚": "night", "凌晨": "night", - "工作日": "weekday", "平时": "weekday", - "周末": "weekend", "假日": "weekend", "休息日": "weekend", - "工作": "work", "上班": "work", "办公": "work", - "休闲": "leisure", "放松": "leisure", "休息": "leisure", - "夏天": "summer", "夏季": "summer", - "冬天": "winter", "冬季": "winter", - "旅行": "travel", "出差": "travel", "旅游": "travel", - }; - const lower = input.trim().toLowerCase(); - const result = VOCAB.includes(lower) ? lower : (ALIASES[lower] || lower); +const testCases = [ + ["晚上", "evening"], ["早上", "morning"], ["周末", "weekend"], + ["工作", "work"], ["旅行", "travel"], ["冬天", "winter"], + ["evening", "evening"], ["morning", "morning"], + ["下午", "afternoon"], // Fix #4: previously mapped to evening + ["", "general"], [undefined, "general"], +]; +for (const [input, expected] of testCases) { + const result = normalizeContext(input); assert.strictEqual(result, expected, `normalizeContext("${input}") should be "${expected}", got "${result}"`); } console.log(" ✅ Chinese alias mapping works correctly"); -// --- parseSupportInfo (V1 → V2 migration) --- +// --- Test 2: parseSupportInfo handles V1 flat format --- console.log("\nTest 2: parseSupportInfo handles V1 flat format..."); -const v1Raw = { confirmations: 3, contradictions: 1 }; -// Simulate parseSupportInfo -const conf = typeof v1Raw.confirmations === "number" ? v1Raw.confirmations : 0; -const contra = typeof v1Raw.contradictions === "number" ? v1Raw.contradictions : 0; -const total = conf + contra; -const v2FromV1 = { - global_strength: total > 0 ? conf / total : 0.5, - total_observations: total, - slices: [{ context: "general", confirmations: conf, contradictions: contra, strength: conf / total, last_observed_at: Date.now() }], -}; +const v2FromV1 = parseSupportInfo({ confirmations: 3, contradictions: 1 }); assert.strictEqual(v2FromV1.global_strength, 0.75, "V1 {3 conf, 1 contra} → strength 0.75"); assert.strictEqual(v2FromV1.total_observations, 4); assert.strictEqual(v2FromV1.slices.length, 1); assert.strictEqual(v2FromV1.slices[0].context, "general"); +assert.strictEqual(v2FromV1.slices[0].confirmations, 3); +assert.strictEqual(v2FromV1.slices[0].contradictions, 1); console.log(" ✅ V1 → V2 migration preserves data"); -// --- parseSupportInfo (V2 format) --- -console.log("\nTest 3: parseSupportInfo handles V2 sliced format..."); -const v2Raw = { +// --- Test 3: parseSupportInfo handles V2 sliced format with field validation --- +console.log("\nTest 3: parseSupportInfo validates V2 slice fields..."); +const v2WithBadFields = parseSupportInfo({ global_strength: 0.8, total_observations: 5, slices: [ { context: "morning", confirmations: 3, contradictions: 0, strength: 1.0, last_observed_at: 1000 }, - { context: "evening", confirmations: 1, contradictions: 1, strength: 0.5, last_observed_at: 2000 }, + { context: "evening", confirmations: -1, contradictions: "bad", strength: 2.0, last_observed_at: null }, + { context: 123 }, // invalid — should be filtered out ], -}; -assert.strictEqual(v2Raw.slices.length, 2); -assert.strictEqual(v2Raw.slices[0].context, "morning"); -assert.strictEqual(v2Raw.slices[1].strength, 0.5); -console.log(" ✅ V2 format parsed correctly"); +}); +assert.strictEqual(v2WithBadFields.slices.length, 2, "Invalid slice (context=123) should be filtered"); +assert.strictEqual(v2WithBadFields.slices[1].confirmations, 0, "Negative confirmations should be clamped to 0"); +assert.strictEqual(v2WithBadFields.slices[1].contradictions, 0, "Non-number contradictions should default to 0"); +assert.strictEqual(v2WithBadFields.slices[1].strength, 0.5, "Out-of-range strength should default to 0.5"); +console.log(" ✅ V2 field validation works correctly"); -// --- updateSupportStats --- +// --- Test 4: updateSupportStats adds new context slice --- console.log("\nTest 4: updateSupportStats adds new context slice..."); -const existing = { - global_strength: 0.75, - total_observations: 4, +const existing = parseSupportInfo({ + global_strength: 0.75, total_observations: 4, slices: [{ context: "general", confirmations: 3, contradictions: 1, strength: 0.75, last_observed_at: 1000 }], -}; - -// Simulate update for "evening" support -const ctx = "evening"; -const base = { ...existing, slices: [...existing.slices.map(s => ({ ...s }))] }; -let slice = base.slices.find(s => s.context === ctx); -if (!slice) { - slice = { context: ctx, confirmations: 0, contradictions: 0, strength: 0.5, last_observed_at: Date.now() }; - base.slices.push(slice); -} -slice.confirmations++; -const sliceTotal = slice.confirmations + slice.contradictions; -slice.strength = sliceTotal > 0 ? slice.confirmations / sliceTotal : 0.5; -slice.last_observed_at = Date.now(); - -let totalConf = 0, totalContra = 0; -for (const s of base.slices) { - totalConf += s.confirmations; - totalContra += s.contradictions; -} -const totalObs = totalConf + totalContra; -const global_strength = totalObs > 0 ? totalConf / totalObs : 0.5; - -const updated = { global_strength, total_observations: totalObs, slices: base.slices }; - +}); +const updated = updateSupportStats(existing, "evening", "support"); assert.strictEqual(updated.slices.length, 2, "Should have 2 slices (general + evening)"); assert.strictEqual(updated.total_observations, 5, "Total observations should be 5"); assert.strictEqual(updated.global_strength, 4 / 5, "Global strength = 4/5 = 0.8"); @@ -112,28 +83,39 @@ assert.strictEqual(eveningSlice.confirmations, 1); assert.strictEqual(eveningSlice.strength, 1.0, "1 confirm, 0 contra = 1.0"); console.log(" ✅ New context slice added correctly"); -// --- updateSupportStats for contradict --- +// --- Test 5: updateSupportStats handles contradict event --- console.log("\nTest 5: updateSupportStats handles contradict event..."); -// Start from the updated state and contradict evening -const eveningSlice2 = updated.slices.find(s => s.context === "evening"); -eveningSlice2.contradictions++; -const st2 = eveningSlice2.confirmations + eveningSlice2.contradictions; -eveningSlice2.strength = st2 > 0 ? eveningSlice2.confirmations / st2 : 0.5; - -assert.strictEqual(eveningSlice2.contradictions, 1); -assert.strictEqual(eveningSlice2.strength, 0.5, "1 conf + 1 contra = 0.5"); +const contradicted = updateSupportStats(updated, "evening", "contradict"); +const eveningAfter = contradicted.slices.find(s => s.context === "evening"); +assert.strictEqual(eveningAfter.contradictions, 1); +assert.strictEqual(eveningAfter.strength, 0.5, "1 conf + 1 contra = 0.5"); console.log(" ✅ Contradict event recorded correctly"); -// --- MAX_SUPPORT_SLICES cap --- +// --- Test 6: Support slices capped at MAX_SUPPORT_SLICES=8 --- console.log("\nTest 6: Support slices capped at MAX_SUPPORT_SLICES=8..."); -const MAX_SUPPORT_SLICES = 8; -const manySlices = []; +let big = { global_strength: 0.5, total_observations: 0, slices: [] }; for (let i = 0; i < 10; i++) { - manySlices.push({ context: `ctx_${i}`, confirmations: 1, contradictions: 0, strength: 1.0, last_observed_at: i * 1000 }); + big = updateSupportStats(big, `ctx_${i}`, "support"); } -const capped = manySlices.sort((a, b) => b.last_observed_at - a.last_observed_at).slice(0, MAX_SUPPORT_SLICES); -assert.strictEqual(capped.length, 8, "Should cap at 8 slices"); -assert.strictEqual(capped[0].context, "ctx_9", "Most recent slice first"); -console.log(" ✅ Slice cap works correctly"); +assert.ok(big.slices.length <= 8, `Should cap at 8 slices, got ${big.slices.length}`); +// total_observations may be slightly less than 10 due to slice truncation drift: +// each updateSupportStats only recovers evidence from slices dropped in *that* call, +// not from earlier truncation cycles. This is the documented trade-off (see code comment). +assert.ok(big.total_observations >= 9, `total_observations should be >=9, got ${big.total_observations}`); +console.log(` ✅ Slice cap works correctly (${big.slices.length} slices, ${big.total_observations} observations)`); + +// --- Test 7: stringifySmartMetadata caps array fields --- +console.log("\nTest 7: stringifySmartMetadata caps sources/history/relations..."); +const bigMeta = { + l0_abstract: "test", + sources: Array.from({ length: 30 }, (_, i) => `src_${i}`), + history: Array.from({ length: 60 }, (_, i) => `hist_${i}`), + relations: Array.from({ length: 20 }, (_, i) => ({ type: "ref", targetId: `t_${i}` })), +}; +const serialized = JSON.parse(stringifySmartMetadata(bigMeta)); +assert.ok(serialized.sources.length <= 20, `sources should be capped at 20, got ${serialized.sources.length}`); +assert.ok(serialized.history.length <= 50, `history should be capped at 50, got ${serialized.history.length}`); +assert.ok(serialized.relations.length <= 16, `relations should be capped at 16, got ${serialized.relations.length}`); +console.log(" ✅ Metadata caps work correctly"); console.log("\n=== All Smart Metadata V2 tests passed! ==="); diff --git a/test/vector-search-cosine.test.mjs b/test/vector-search-cosine.test.mjs index b4c8b3f..cf7bd0f 100644 --- a/test/vector-search-cosine.test.mjs +++ b/test/vector-search-cosine.test.mjs @@ -1,123 +1,89 @@ /** * Vector Search Cosine Distance Test - * Validates that vectorSearch uses cosine distance (not L2) so that - * score = 1 / (1 + distance) produces meaningful results for high-dim embeddings. + * Tests that the real MemoryStore.vectorSearch uses cosine distance (not L2) + * and produces correct score values. */ import assert from "node:assert/strict"; +import { mkdtempSync, rmSync } from "node:fs"; +import Module from "node:module"; +import { tmpdir } from "node:os"; +import path from "node:path"; -// Minimal mock to verify .distanceType('cosine') is called -let distanceTypeCalled = null; - -const mockTable = { - vectorSearch(vector) { - return { - distanceType(type) { - distanceTypeCalled = type; - return this; - }, - limit(n) { - return this; - }, - where(cond) { - return this; - }, - async toArray() { - // Return a mock result with cosine-like distance - return [ - { - id: "test-1", - text: "test memory", - vector: vector, - category: "preference", - scope: "global", - importance: 0.8, - timestamp: Date.now(), - metadata: "{}", - _distance: 0.1, // cosine distance → score = 1/(1+0.1) = 0.91 - }, - ]; - }, - }; - }, - query() { - return { - limit() { return this; }, - select() { return this; }, - where() { return this; }, - async toArray() { return []; }, - }; - }, - async listIndices() { return []; }, - async createIndex() { }, -}; - -// Test 1: distanceType is called with 'cosine' -console.log("Test 1: vectorSearch calls distanceType('cosine')..."); - -// Create a minimal store-like object that exercises the vectorSearch path -const fakeStore = { - table: mockTable, - config: { vectorDim: 4 }, - ftsIndexCreated: false, - get hasFtsSupport() { return this.ftsIndexCreated; }, - async ensureInitialized() { }, - async vectorSearch(vector, limit = 5, minScore = 0.3, scopeFilter) { - const safeLimit = Math.min(Math.max(1, Math.floor(limit)), 20); - const fetchLimit = Math.min(safeLimit * 10, 200); - let query = this.table.vectorSearch(vector).distanceType('cosine').limit(fetchLimit); - const results = await query.toArray(); - const mapped = []; - for (const row of results) { - const distance = Number(row._distance ?? 0); - const score = 1 / (1 + distance); - if (score < minScore) continue; - mapped.push({ - entry: { - id: row.id, - text: row.text, - vector: row.vector, - category: row.category, - scope: row.scope ?? "global", - importance: Number(row.importance), - timestamp: Number(row.timestamp), - metadata: row.metadata || "{}", - }, - score, - }); - if (mapped.length >= safeLimit) break; - } - return mapped; - }, -}; - -const results = await fakeStore.vectorSearch([1, 0, 0, 0], 5, 0.3); -assert.strictEqual(distanceTypeCalled, "cosine", "Should call distanceType with 'cosine'"); -console.log(" ✅ distanceType('cosine') confirmed"); - -// Test 2: score computation is correct for cosine distance -console.log("Test 2: Score formula 1/(1+distance) produces correct values..."); -assert.strictEqual(results.length, 1, "Should return 1 result"); -const expectedScore = 1 / (1 + 0.1); -assert.ok( - Math.abs(results[0].score - expectedScore) < 0.001, - `Score should be ~${expectedScore.toFixed(3)}, got ${results[0].score.toFixed(3)}`, -); -console.log(" ✅ Score = 0.909 (correct for distance=0.1)"); - -// Test 3: Results below minScore are filtered out -console.log("Test 3: Low-score results are filtered..."); -const strictResults = await fakeStore.vectorSearch([1, 0, 0, 0], 5, 0.95); -assert.strictEqual(strictResults.length, 0, "Score 0.909 should be filtered by minScore=0.95"); -console.log(" ✅ minScore filtering works"); - -// Test 4: Without cosine, L2 distance would produce wrong scores -console.log("Test 4: Verify L2 would fail (documentation test)..."); -// For 1024-dim embeddings, L2 distance ≈ 40-60 for typical vectors -// score = 1/(1+45) ≈ 0.022 — way below any reasonable minScore -const l2TypicalDistance = 45; -const l2Score = 1 / (1 + l2TypicalDistance); -assert.ok(l2Score < 0.3, `L2 score ${l2Score.toFixed(4)} should be below minScore=0.3`); -console.log(` ✅ L2 score = ${l2Score.toFixed(4)} (would drop all results, confirming cosine is needed)`); - -console.log("\n=== All vector-search-cosine tests passed! ==="); +process.env.NODE_PATH = [ + process.env.NODE_PATH, + "/opt/homebrew/lib/node_modules/openclaw/node_modules", + "/opt/homebrew/lib/node_modules", +].filter(Boolean).join(":"); +Module._initPaths(); + +import jitiFactory from "jiti"; +const jiti = jitiFactory(import.meta.url, { interopDefault: true }); +const { MemoryStore } = jiti("../src/store.ts"); + +const DIM = 64; // small dim for fast tests +const workDir = mkdtempSync(path.join(tmpdir(), "cosine-test-")); +const dbPath = path.join(workDir, "db"); + +try { + const store = new MemoryStore({ dbPath, vectorDim: DIM }); + + // Create two known vectors + const vecA = new Array(DIM).fill(0); + vecA[0] = 1.0; // unit vector along dim 0 + + const vecB = new Array(DIM).fill(0); + vecB[0] = 0.9; vecB[1] = 0.436; // ~cos_sim=0.9 with vecA (angle ~26°) + + const vecC = new Array(DIM).fill(0); + vecC[1] = 1.0; // orthogonal to vecA → cos_sim=0 + + // Store memories with known vectors + await store.store({ text: "similar memory", vector: vecB, category: "preference", scope: "test", importance: 0.8 }); + await store.store({ text: "orthogonal memory", vector: vecC, category: "fact", scope: "test", importance: 0.5 }); + + // Test 1: vectorSearch returns results with correct cosine-based scores + console.log("Test 1: vectorSearch uses cosine distance and scores are meaningful..."); + const results = await store.vectorSearch(vecA, 10, 0.0, ["test"]); + assert.ok(results.length >= 1, "Should return at least 1 result"); + + // Find the similar result + const similar = results.find(r => r.entry.text === "similar memory"); + assert.ok(similar, "Similar memory should be in results"); + // cosine distance for ~0.9 similarity → distance ~0.1 → score = 1/(1+0.1) ≈ 0.91 + assert.ok(similar.score > 0.5, `Similar memory score should be >0.5, got ${similar.score.toFixed(3)}`); + console.log(` ✅ Similar memory score = ${similar.score.toFixed(3)} (cosine-based, >0.5)`); + + // Test 2: Orthogonal vector gets low score + console.log("Test 2: Orthogonal vector gets low score..."); + const orthogonal = results.find(r => r.entry.text === "orthogonal memory"); + if (orthogonal) { + assert.ok(orthogonal.score < similar.score, "Orthogonal should score lower than similar"); + console.log(` ✅ Orthogonal memory score = ${orthogonal.score.toFixed(3)} (lower than similar)`); + } else { + // May have been filtered by internal minScore + console.log(" ✅ Orthogonal memory filtered out (too low score)"); + } + + // Test 3: minScore filtering works + console.log("Test 3: minScore filtering excludes low-score results..."); + const strictResults = await store.vectorSearch(vecA, 10, 0.95, ["test"]); + // With strict minScore, some results should be filtered + const filtered = results.length - strictResults.length; + assert.ok(filtered >= 0, "Strict minScore should filter equal or more results"); + console.log(` ✅ minScore=0.95 filtered ${filtered} results (${results.length} → ${strictResults.length})`); + + // Test 4: L2 distance would produce wrong scores (documentation) + console.log("Test 4: Verify L2 would fail (documentation test)..."); + // For 1024-dim normalized embeddings, L2 distance ≈ 40-60 + // score = 1/(1+45) ≈ 0.022 — below any reasonable minScore + const l2TypicalDistance = 45; + const l2Score = 1 / (1 + l2TypicalDistance); + assert.ok(l2Score < 0.3, `L2 score ${l2Score.toFixed(4)} should be below minScore=0.3`); + console.log(` ✅ L2 score = ${l2Score.toFixed(4)} (would drop all results, confirming cosine is needed)`); + + console.log("\n=== All vector-search-cosine tests passed! ==="); + +} finally { + rmSync(workDir, { recursive: true, force: true }); +} From 7061f06b9225c2054c00a3fae739c8dc75d530dc Mon Sep 17 00:00:00 2001 From: lpf <398618101@qq.com> Date: Thu, 12 Mar 2026 13:47:09 +0800 Subject: [PATCH 5/5] fix: handleMerge must use return value of updateSupportStats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updateSupportStats returns a new SupportInfoV2 object — the original is not mutated. The old code discarded the return value and persisted the empty original, causing merge+contextLabel to always write empty support_info ({global_strength:0.5, total_observations:0, slices:[]}). --- src/smart-extractor.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/smart-extractor.ts b/src/smart-extractor.ts index 85cc8fe..733f589 100644 --- a/src/smart-extractor.ts +++ b/src/smart-extractor.ts @@ -627,8 +627,8 @@ export class SmartExtractor { if (updatedEntry) { const meta = parseSmartMetadata(updatedEntry.metadata, updatedEntry); const supportInfo = parseSupportInfo(meta.support_info); - updateSupportStats(supportInfo, contextLabel, "support"); - const finalMetadata = stringifySmartMetadata({ ...meta, support_info: supportInfo }); + const updated = updateSupportStats(supportInfo, contextLabel, "support"); + const finalMetadata = stringifySmartMetadata({ ...meta, support_info: updated }); await this.store.update(matchId, { metadata: finalMetadata }, scopeFilter); } } catch {