diff --git a/cli.ts b/cli.ts index 946cfb1..0781f21 100644 --- a/cli.ts +++ b/cli.ts @@ -83,6 +83,7 @@ export function registerMemoryCLI(program: Command, context: CLIContext): void { limit, scopeFilter, category, + source: "cli", }); if (results.length === 0 && context.embedder) { @@ -92,6 +93,7 @@ export function registerMemoryCLI(program: Command, context: CLIContext): void { limit, scopeFilter, category, + source: "cli", }); } @@ -417,10 +419,10 @@ export function registerMemoryCLI(program: Command, context: CLIContext): void { const categoryRaw = memory.category; const category: MemoryEntry["category"] = categoryRaw === "preference" || - categoryRaw === "fact" || - categoryRaw === "decision" || - categoryRaw === "entity" || - categoryRaw === "other" + categoryRaw === "fact" || + categoryRaw === "decision" || + categoryRaw === "entity" || + categoryRaw === "other" ? categoryRaw : "other"; @@ -531,10 +533,10 @@ export function registerMemoryCLI(program: Command, context: CLIContext): void { let targetReal = context.store.dbPath; try { sourceReal = await fs.realpath(sourceDbPath); - } catch {} + } catch { } try { targetReal = await fs.realpath(context.store.dbPath); - } catch {} + } catch { } if (!force && sourceReal === targetReal) { console.error("Refusing to re-embed in-place: source-db equals target dbPath. Use a new dbPath or pass --force."); @@ -781,6 +783,27 @@ export function registerMemoryCLI(program: Command, context: CLIContext): void { process.exit(1); } }); + + // reindex-fts: Rebuild FTS index + program + .command("reindex-fts") + .description("Rebuild the BM25 full-text search index") + .action(async () => { + try { + const status = context.store.getFtsStatus(); + console.log(`FTS status before: available=${status.available}, lastError=${status.lastError || "none"}`); + const result = await context.store.rebuildFtsIndex(); + if (result.success) { + console.log("✅ FTS index rebuilt successfully"); + } else { + console.error("❌ FTS rebuild failed:", result.error); + process.exit(1); + } + } catch (error) { + console.error("FTS rebuild error:", error); + process.exit(1); + } + }); } // ============================================================================ diff --git a/package.json b/package.json index 3d6582a..e05749e 100644 --- a/package.json +++ b/package.json @@ -35,7 +35,7 @@ ] }, "scripts": { - "test": "node test/embedder-error-hints.test.mjs && node test/migrate-legacy-schema.test.mjs && node --test test/config-session-strategy-migration.test.mjs && node --test test/recall-text-cleanup.test.mjs && node test/update-consistency-lancedb.test.mjs && node test/cli-smoke.mjs && node test/functional-e2e.mjs && node test/retriever-rerank-regression.mjs && node test/smart-memory-lifecycle.mjs && node test/smart-extractor-branches.mjs && node test/plugin-manifest-regression.mjs", + "test": "node test/embedder-error-hints.test.mjs && node test/migrate-legacy-schema.test.mjs && node --test test/config-session-strategy-migration.test.mjs && node --test test/recall-text-cleanup.test.mjs && node test/update-consistency-lancedb.test.mjs && node test/cli-smoke.mjs && node test/functional-e2e.mjs && node test/retriever-rerank-regression.mjs && node test/smart-memory-lifecycle.mjs && node test/smart-extractor-branches.mjs && node test/plugin-manifest-regression.mjs && node test/smart-metadata-v2.mjs && node test/vector-search-cosine.test.mjs && node test/context-support-e2e.mjs", "test:openclaw-host": "node test/openclaw-host-functional.mjs" }, "devDependencies": { diff --git a/src/extraction-prompts.ts b/src/extraction-prompts.ts index b7ebd16..6fd6004 100644 --- a/src/extraction-prompts.ts +++ b/src/extraction-prompts.ts @@ -149,20 +149,26 @@ Please decide: - SKIP: Candidate memory duplicates existing memories, no need to save. Also SKIP if the candidate contains LESS information than an existing memory on the same topic (information degradation — e.g., candidate says "programming language preference" but existing memory already says "programming language preference: Python, TypeScript") - CREATE: This is completely new information not covered by any existing memory, should be created - MERGE: Candidate memory adds genuinely NEW details to an existing memory and should be merged +- SUPPORT: Candidate reinforces/confirms an existing memory in a specific context (e.g. "still prefers tea in the evening") +- CONTEXTUALIZE: Candidate adds a situational nuance to an existing memory (e.g. existing: "likes coffee", candidate: "prefers tea at night" — different context, same topic) +- CONTRADICT: Candidate directly contradicts an existing memory in a specific context (e.g. existing: "runs on weekends", candidate: "stopped running on weekends") IMPORTANT: -- "events" and "cases" categories are independent records — they do NOT support MERGE. For these categories, only use SKIP or CREATE. +- "events" and "cases" categories are independent records — they do NOT support MERGE/SUPPORT/CONTEXTUALIZE/CONTRADICT. For these categories, only use SKIP or CREATE. - If the candidate appears to be derived from a recall question (e.g., "Do you remember X?" / "你记得X吗?") and an existing memory already covers topic X with equal or more detail, you MUST choose SKIP. - A candidate with less information than an existing memory on the same topic should NEVER be CREATED or MERGED — always SKIP. +- For SUPPORT/CONTEXTUALIZE/CONTRADICT, you MUST provide a context_label from this vocabulary: general, morning, evening, night, weekday, weekend, work, leisure, summer, winter, travel. Return JSON format: { - "decision": "skip|create|merge", + "decision": "skip|create|merge|support|contextualize|contradict", "match_index": 1, - "reason": "Decision reason" + "reason": "Decision reason", + "context_label": "evening" } -If decision is "merge", set "match_index" to the number of the existing memory to merge with (1-based).`; +- If decision is "merge"/"support"/"contextualize"/"contradict", set "match_index" to the number of the existing memory (1-based). +- Only include "context_label" for support/contextualize/contradict decisions.`; } export function buildMergePrompt( @@ -176,32 +182,32 @@ export function buildMergePrompt( ): string { return `Merge the following memory into a single coherent record with all three levels. -**Category**: ${category} +** Category **: ${category} -**Existing Memory:** -Abstract: ${existingAbstract} -Overview: +** Existing Memory:** + Abstract: ${existingAbstract} + Overview: ${existingOverview} -Content: + Content: ${existingContent} -**New Information:** -Abstract: ${newAbstract} -Overview: +** New Information:** + Abstract: ${newAbstract} + Overview: ${newOverview} -Content: + Content: ${newContent} -Requirements: -- Remove duplicate information -- Keep the most up-to-date details -- Maintain a coherent narrative -- Keep code identifiers / URIs / model names unchanged when they are proper nouns + Requirements: + - Remove duplicate information + - Keep the most up - to - date details + - Maintain a coherent narrative + - Keep code identifiers / URIs / model names unchanged when they are proper nouns Return JSON: -{ - "abstract": "Merged one-line abstract", - "overview": "Merged structured Markdown overview", - "content": "Merged full content" -}`; + { + "abstract": "Merged one-line abstract", + "overview": "Merged structured Markdown overview", + "content": "Merged full content" + } `; } diff --git a/src/memory-categories.ts b/src/memory-categories.ts index 931e609..7361565 100644 --- a/src/memory-categories.ts +++ b/src/memory-categories.ts @@ -44,18 +44,20 @@ export type CandidateMemory = { }; /** Dedup decision from LLM. */ -export type DedupDecision = "create" | "merge" | "skip"; +export type DedupDecision = "create" | "merge" | "skip" | "support" | "contextualize" | "contradict"; export type DedupResult = { decision: DedupDecision; reason: string; matchId?: string; // ID of existing memory to merge with + contextLabel?: string; // Optional context label for support/contextualize/contradict }; export type ExtractionStats = { created: number; merged: number; skipped: number; + supported?: number; // context-aware support count }; /** Validate and normalize a category string. */ diff --git a/src/retriever.ts b/src/retriever.ts index 44f3f47..ecf4e31 100644 --- a/src/retriever.ts +++ b/src/retriever.ts @@ -80,8 +80,8 @@ export interface RetrievalContext { limit: number; scopeFilter?: string[]; category?: string; - /** Retrieval source: "manual" for user-triggered, "auto-recall" for system-initiated. */ - source?: "manual" | "auto-recall"; + /** Retrieval source: "manual" for user-triggered, "auto-recall" for system-initiated, "cli" for CLI commands. */ + source?: "manual" | "auto-recall" | "cli"; } export interface RetrievalResult extends MemorySearchResult { diff --git a/src/smart-extractor.ts b/src/smart-extractor.ts index 379f668..85cc8fe 100644 --- a/src/smart-extractor.ts +++ b/src/smart-extractor.ts @@ -27,7 +27,7 @@ import { } from "./memory-categories.js"; import { isNoise } from "./noise-filter.js"; import type { NoisePrototypeBank } from "./noise-prototypes.js"; -import { buildSmartMetadata, parseSmartMetadata, stringifySmartMetadata } from "./smart-metadata.js"; +import { buildSmartMetadata, parseSmartMetadata, stringifySmartMetadata, parseSupportInfo, updateSupportStats } from "./smart-metadata.js"; // ============================================================================ // Constants @@ -36,7 +36,7 @@ import { buildSmartMetadata, parseSmartMetadata, stringifySmartMetadata } from " const SIMILARITY_THRESHOLD = 0.7; const MAX_SIMILAR_FOR_PROMPT = 3; const MAX_MEMORIES_PER_EXTRACTION = 5; -const VALID_DECISIONS = new Set(["create", "merge", "skip"]); +const VALID_DECISIONS = new Set(["create", "merge", "skip", "support", "contextualize", "contradict"]); // ============================================================================ // Smart Extractor @@ -341,6 +341,7 @@ export class SmartExtractor { dedupResult.matchId, scopeFilter, targetScope, + dedupResult.contextLabel, ); stats.merged++; } else { @@ -356,6 +357,36 @@ export class SmartExtractor { ); stats.skipped++; break; + + case "support": + if (dedupResult.matchId) { + await this.handleSupport(dedupResult.matchId, scopeFilter, { session: sessionKey, timestamp: Date.now() }, dedupResult.reason, dedupResult.contextLabel); + stats.supported = (stats.supported ?? 0) + 1; + } else { + await this.storeCandidate(candidate, vector, sessionKey, targetScope); + stats.created++; + } + break; + + case "contextualize": + if (dedupResult.matchId) { + await this.handleContextualize(candidate, vector, dedupResult.matchId, sessionKey, targetScope, scopeFilter, dedupResult.contextLabel); + stats.created++; + } else { + await this.storeCandidate(candidate, vector, sessionKey, targetScope); + stats.created++; + } + break; + + case "contradict": + if (dedupResult.matchId) { + await this.handleContradict(candidate, vector, dedupResult.matchId, sessionKey, targetScope, scopeFilter, dedupResult.contextLabel); + stats.created++; + } else { + await this.storeCandidate(candidate, vector, sessionKey, targetScope); + stats.created++; + } + break; } } @@ -445,7 +476,8 @@ export class SmartExtractor { return { decision, reason: data.reason ?? "", - matchId: decision === "merge" ? matchEntry?.entry.id : undefined, + matchId: ["merge", "support", "contextualize", "contradict"].includes(decision) ? matchEntry?.entry.id : undefined, + contextLabel: typeof (data as any).context_label === "string" ? (data as any).context_label : undefined, }; } catch (err) { this.log( @@ -509,6 +541,7 @@ export class SmartExtractor { matchId: string, scopeFilter: string[], targetScope: string, + contextLabel?: string, ): Promise { let existingAbstract = ""; let existingOverview = ""; @@ -588,8 +621,154 @@ export class SmartExtractor { scopeFilter, ); + // Update support stats on the merged memory + try { + const updatedEntry = await this.store.getById(matchId, scopeFilter); + if (updatedEntry) { + const meta = parseSmartMetadata(updatedEntry.metadata, updatedEntry); + const supportInfo = parseSupportInfo(meta.support_info); + updateSupportStats(supportInfo, contextLabel, "support"); + const finalMetadata = stringifySmartMetadata({ ...meta, support_info: supportInfo }); + await this.store.update(matchId, { metadata: finalMetadata }, scopeFilter); + } + } catch { + // Non-critical: merge succeeded, support stats update is best-effort + } + + this.log( + `memory-pro: smart-extractor: merged [${candidate.category}]${contextLabel ? ` [${contextLabel}]` : ""} into ${matchId.slice(0, 8)}`, + ); + } + + // -------------------------------------------------------------------------- + // Context-Aware Handlers (support / contextualize / contradict) + // -------------------------------------------------------------------------- + + /** + * Handle SUPPORT: update support stats on existing memory for a specific context. + */ + private async handleSupport( + matchId: string, + scopeFilter: string[], + source: { session: string; timestamp: number }, + reason: string, + contextLabel?: string, + ): Promise { + const existing = await this.store.getById(matchId, scopeFilter); + if (!existing) return; + + const meta = parseSmartMetadata(existing.metadata, existing); + const supportInfo = parseSupportInfo(meta.support_info); + const updated = updateSupportStats(supportInfo, contextLabel, "support"); + meta.support_info = updated; + + await this.store.update( + matchId, + { metadata: stringifySmartMetadata(meta) }, + scopeFilter, + ); + + this.log( + `memory-pro: smart-extractor: support [${contextLabel || "general"}] on ${matchId.slice(0, 8)} — ${reason}`, + ); + } + + /** + * Handle CONTEXTUALIZE: create a new entry that adds situational nuance, + * linked to the original via a relation in metadata. + */ + private async handleContextualize( + candidate: CandidateMemory, + vector: number[], + matchId: string, + sessionKey: string, + targetScope: string, + scopeFilter: string[], + contextLabel?: string, + ): Promise { + const storeCategory = this.mapToStoreCategory(candidate.category); + const metadata = stringifySmartMetadata({ + l0_abstract: candidate.abstract, + l1_overview: candidate.overview, + l2_content: candidate.content, + memory_category: candidate.category, + tier: "working" as const, + access_count: 0, + confidence: 0.7, + last_accessed_at: Date.now(), + source_session: sessionKey, + contexts: contextLabel ? [contextLabel] : [], + relations: [{ type: "contextualizes", targetId: matchId }], + }); + + await this.store.store({ + text: candidate.abstract, + vector, + category: storeCategory, + scope: targetScope, + importance: this.getDefaultImportance(candidate.category), + metadata, + }); + + this.log( + `memory-pro: smart-extractor: contextualize [${contextLabel || "general"}] new entry linked to ${matchId.slice(0, 8)}`, + ); + } + + /** + * Handle CONTRADICT: create contradicting entry + record contradiction evidence + * on the original memory's support stats. + */ + private async handleContradict( + candidate: CandidateMemory, + vector: number[], + matchId: string, + sessionKey: string, + targetScope: string, + scopeFilter: string[], + contextLabel?: string, + ): Promise { + // 1. Record contradiction on the existing memory + const existing = await this.store.getById(matchId, scopeFilter); + if (existing) { + const meta = parseSmartMetadata(existing.metadata, existing); + const supportInfo = parseSupportInfo(meta.support_info); + const updated = updateSupportStats(supportInfo, contextLabel, "contradict"); + meta.support_info = updated; + await this.store.update( + matchId, + { metadata: stringifySmartMetadata(meta) }, + scopeFilter, + ); + } + + // 2. Store the contradicting entry as a new memory + const storeCategory = this.mapToStoreCategory(candidate.category); + const metadata = stringifySmartMetadata({ + l0_abstract: candidate.abstract, + l1_overview: candidate.overview, + l2_content: candidate.content, + memory_category: candidate.category, + tier: "working" as const, + access_count: 0, + confidence: 0.7, + last_accessed_at: Date.now(), + source_session: sessionKey, + contexts: contextLabel ? [contextLabel] : [], + relations: [{ type: "contradicts", targetId: matchId }], + }); + + await this.store.store({ + text: candidate.abstract, + vector, + category: storeCategory, + scope: targetScope, + importance: this.getDefaultImportance(candidate.category), + metadata, + }); + this.log( - `memory-pro: smart-extractor: merged [${candidate.category}] into ${matchId.slice(0, 8)}`, + `memory-pro: smart-extractor: contradict [${contextLabel || "general"}] on ${matchId.slice(0, 8)}, new entry created`, ); } diff --git a/src/smart-metadata.ts b/src/smart-metadata.ts index 90fb984..f62d874 100644 --- a/src/smart-metadata.ts +++ b/src/smart-metadata.ts @@ -6,7 +6,8 @@ type LegacyStoreCategory = | "fact" | "decision" | "entity" - | "other"; + | "other" + | "reflection"; type EntryLike = { text?: string; @@ -170,10 +171,28 @@ export function buildSmartMetadata( }; } +// Metadata array size caps — prevent unbounded JSON growth +const MAX_SOURCES = 20; +const MAX_HISTORY = 50; +const MAX_RELATIONS = 16; + export function stringifySmartMetadata( metadata: SmartMemoryMetadata | Record, ): string { - return JSON.stringify(metadata); + const capped = { ...metadata } as Record; + + // Cap array fields to prevent metadata bloat + if (Array.isArray(capped.sources) && capped.sources.length > MAX_SOURCES) { + capped.sources = capped.sources.slice(-MAX_SOURCES); // keep most recent + } + if (Array.isArray(capped.history) && capped.history.length > MAX_HISTORY) { + capped.history = capped.history.slice(-MAX_HISTORY); + } + if (Array.isArray(capped.relations) && capped.relations.length > MAX_RELATIONS) { + capped.relations = capped.relations.slice(0, MAX_RELATIONS); + } + + return JSON.stringify(capped); } export function toLifecycleMemory( @@ -228,3 +247,169 @@ export function getDecayableFromEntry( return { memory, meta }; } + +// ============================================================================ +// Contextual Support — optional extension to SmartMemoryMetadata +// ============================================================================ + +/** Predefined context vocabulary for support slices */ +export const SUPPORT_CONTEXT_VOCABULARY = [ + "general", "morning", "afternoon", "evening", "night", + "weekday", "weekend", "work", "leisure", + "summer", "winter", "travel", +] as const; + +export type SupportContext = (typeof SUPPORT_CONTEXT_VOCABULARY)[number] | string; + +/** Max number of context slices per memory to prevent metadata bloat */ +export const MAX_SUPPORT_SLICES = 8; + +/** A single context-specific support slice */ +export interface ContextualSupport { + context: SupportContext; + confirmations: number; + contradictions: number; + strength: number; // confirmations / (confirmations + contradictions) + last_observed_at: number; +} + +/** V2 support info with per-context slices */ +export interface SupportInfoV2 { + global_strength: number; // weighted average across all slices + total_observations: number; // sum of all confirmations + contradictions + slices: ContextualSupport[]; +} + +/** + * Normalize a raw context label to a canonical context. + * Maps common variants (e.g. "晚上" → "evening") and falls back to "general". + */ +export function normalizeContext(raw: string | undefined): SupportContext { + if (!raw || !raw.trim()) return "general"; + const lower = raw.trim().toLowerCase(); + + // Direct vocabulary match + if ((SUPPORT_CONTEXT_VOCABULARY as readonly string[]).includes(lower)) { + return lower as SupportContext; + } + + // Common Chinese/English mappings + const aliases: Record = { + "早上": "morning", "上午": "morning", "早晨": "morning", + "下午": "afternoon", "傍晚": "evening", "晚上": "evening", + "深夜": "night", "夜晚": "night", "凌晨": "night", + "工作日": "weekday", "平时": "weekday", + "周末": "weekend", "假日": "weekend", "休息日": "weekend", + "工作": "work", "上班": "work", "办公": "work", + "休闲": "leisure", "放松": "leisure", "休息": "leisure", + "夏天": "summer", "夏季": "summer", + "冬天": "winter", "冬季": "winter", + "旅行": "travel", "出差": "travel", "旅游": "travel", + }; + + return aliases[lower] || lower; // keep as custom context if not mapped +} + +/** + * Parse support_info from metadata JSON. Handles V1 (flat) → V2 (sliced) migration. + */ +export function parseSupportInfo(raw: unknown): SupportInfoV2 { + const defaultV2: SupportInfoV2 = { + global_strength: 0.5, + total_observations: 0, + slices: [], + }; + + if (!raw || typeof raw !== "object") return defaultV2; + const obj = raw as Record; + + // V2 format: has slices array + if (Array.isArray(obj.slices)) { + return { + global_strength: typeof obj.global_strength === "number" ? obj.global_strength : 0.5, + total_observations: typeof obj.total_observations === "number" ? obj.total_observations : 0, + slices: (obj.slices as Record[]).filter( + s => s && typeof s.context === "string", + ).map(s => ({ + context: String(s.context), + confirmations: typeof s.confirmations === "number" && s.confirmations >= 0 ? s.confirmations : 0, + contradictions: typeof s.contradictions === "number" && s.contradictions >= 0 ? s.contradictions : 0, + strength: typeof s.strength === "number" && s.strength >= 0 && s.strength <= 1 ? s.strength : 0.5, + last_observed_at: typeof s.last_observed_at === "number" ? s.last_observed_at : Date.now(), + })), + }; + } + + // V1 format: flat { confirmations, contradictions, strength } + const conf = typeof obj.confirmations === "number" ? obj.confirmations : 0; + const contra = typeof obj.contradictions === "number" ? obj.contradictions : 0; + const total = conf + contra; + if (total === 0) return defaultV2; + + return { + global_strength: total > 0 ? conf / total : 0.5, + total_observations: total, + slices: [{ + context: "general", + confirmations: conf, + contradictions: contra, + strength: total > 0 ? conf / total : 0.5, + last_observed_at: Date.now(), + }], + }; +} + +/** + * Update support stats for a specific context. + * Returns a new SupportInfoV2 with the updated slice. + */ +export function updateSupportStats( + existing: SupportInfoV2, + contextLabel: string | undefined, + event: "support" | "contradict", +): SupportInfoV2 { + const ctx = normalizeContext(contextLabel); + const base = { ...existing, slices: [...existing.slices.map(s => ({ ...s }))] }; + + // Find or create the context slice + let slice = base.slices.find(s => s.context === ctx); + if (!slice) { + slice = { context: ctx, confirmations: 0, contradictions: 0, strength: 0.5, last_observed_at: Date.now() }; + base.slices.push(slice); + } + + // Update slice + if (event === "support") slice.confirmations++; + else slice.contradictions++; + const sliceTotal = slice.confirmations + slice.contradictions; + slice.strength = sliceTotal > 0 ? slice.confirmations / sliceTotal : 0.5; + slice.last_observed_at = Date.now(); + + // Cap slices (keep most recently observed, but preserve dropped evidence). + // NOTE: Evidence from slices dropped in *previous* updates is already baked + // into total_observations/global_strength, so those values may drift slightly + // over many truncation cycles. This is an accepted trade-off for bounded JSON size. + let slices = base.slices; + let droppedConf = 0, droppedContra = 0; + if (slices.length > MAX_SUPPORT_SLICES) { + slices = slices + .sort((a, b) => b.last_observed_at - a.last_observed_at); + const dropped = slices.slice(MAX_SUPPORT_SLICES); + for (const d of dropped) { + droppedConf += d.confirmations; + droppedContra += d.contradictions; + } + slices = slices.slice(0, MAX_SUPPORT_SLICES); + } + + // Recompute global strength including evidence from dropped slices + let totalConf = droppedConf, totalContra = droppedContra; + for (const s of slices) { + totalConf += s.confirmations; + totalContra += s.contradictions; + } + const totalObs = totalConf + totalContra; + const global_strength = totalObs > 0 ? totalConf / totalObs : 0.5; + + return { global_strength, total_observations: totalObs, slices }; +} diff --git a/src/store.ts b/src/store.ts index 764fa55..2745a5c 100644 --- a/src/store.ts +++ b/src/store.ts @@ -121,8 +121,8 @@ export function validateStoragePath(dbPath: string): string { } catch (err: any) { throw new Error( `dbPath "${dbPath}" is a symlink whose target does not exist.\n` + - ` Fix: Create the target directory, or update the symlink to point to a valid path.\n` + - ` Details: ${err.code || ""} ${err.message}`, + ` Fix: Create the target directory, or update the symlink to point to a valid path.\n` + + ` Details: ${err.code || ""} ${err.message}`, ); } } @@ -147,9 +147,9 @@ export function validateStoragePath(dbPath: string): string { } catch (err: any) { throw new Error( `Failed to create dbPath directory "${resolvedPath}".\n` + - ` Fix: Ensure the parent directory "${dirname(resolvedPath)}" exists and is writable,\n` + - ` or create it manually: mkdir -p "${resolvedPath}"\n` + - ` Details: ${err.code || ""} ${err.message}`, + ` Fix: Ensure the parent directory "${dirname(resolvedPath)}" exists and is writable,\n` + + ` or create it manually: mkdir -p "${resolvedPath}"\n` + + ` Details: ${err.code || ""} ${err.message}`, ); } } @@ -160,9 +160,9 @@ export function validateStoragePath(dbPath: string): string { } catch (err: any) { throw new Error( `dbPath directory "${resolvedPath}" is not writable.\n` + - ` Fix: Check permissions with: ls -la "${dirname(resolvedPath)}"\n` + - ` Or grant write access: chmod u+w "${resolvedPath}"\n` + - ` Details: ${err.code || ""} ${err.message}`, + ` Fix: Check permissions with: ls -la "${dirname(resolvedPath)}"\n` + + ` Or grant write access: chmod u+w "${resolvedPath}"\n` + + ` Details: ${err.code || ""} ${err.message}`, ); } @@ -182,7 +182,7 @@ export class MemoryStore { private ftsIndexCreated = false; private updateQueue: Promise = Promise.resolve(); - constructor(private readonly config: StoreConfig) {} + constructor(private readonly config: StoreConfig) { } get dbPath(): string { return this.config.dbPath; @@ -214,7 +214,7 @@ export class MemoryStore { const message = err.message || String(err); throw new Error( `Failed to open LanceDB at "${this.config.dbPath}": ${code} ${message}\n` + - ` Fix: Verify the path exists and is writable. Check parent directory permissions.`, + ` Fix: Verify the path exists and is writable. Check parent directory permissions.`, ); } @@ -421,7 +421,7 @@ export class MemoryStore { const safeLimit = clampInt(limit, 1, 20); const fetchLimit = Math.min(safeLimit * 10, 200); // Over-fetch for scope filtering - let query = this.table!.vectorSearch(vector).limit(fetchLimit); + let query = this.table!.vectorSearch(vector).distanceType('cosine').limit(fetchLimit); // Apply scope filter if provided if (scopeFilter && scopeFilter.length > 0) { @@ -952,4 +952,47 @@ export class MemoryStore { get hasFtsSupport(): boolean { return this.ftsIndexCreated; } + + /** Last FTS error for diagnostics */ + private _lastFtsError: string | null = null; + + get lastFtsError(): string | null { + return this._lastFtsError; + } + + /** Get FTS index health status */ + getFtsStatus(): { available: boolean; lastError: string | null } { + return { + available: this.ftsIndexCreated, + lastError: this._lastFtsError, + }; + } + + /** Rebuild FTS index (drops and recreates). Useful for recovery after corruption. */ + async rebuildFtsIndex(): Promise<{ success: boolean; error?: string }> { + await this.ensureInitialized(); + try { + // Drop existing FTS index if any + const indices = await this.table!.listIndices(); + for (const idx of indices) { + if (idx.indexType === "FTS" || idx.columns?.includes("text")) { + try { + await this.table!.dropIndex((idx as any).name || "text"); + } catch (err) { + console.warn(`memory-lancedb-pro: dropIndex(${(idx as any).name || "text"}) failed:`, err); + } + } + } + // Recreate + await this.createFtsIndex(this.table!); + this.ftsIndexCreated = true; + this._lastFtsError = null; + return { success: true }; + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + this._lastFtsError = msg; + this.ftsIndexCreated = false; + return { success: false, error: msg }; + } + } } diff --git a/test/context-support-e2e.mjs b/test/context-support-e2e.mjs new file mode 100644 index 0000000..d18c374 --- /dev/null +++ b/test/context-support-e2e.mjs @@ -0,0 +1,241 @@ +/** + * Context-Aware Support E2E Test + * + * Tests the full pipeline for support/contextualize/contradict decisions + * using mock LLM and embedding servers against a real LanceDB store. + */ + +import assert from "node:assert/strict"; +import http from "node:http"; +import { mkdtempSync, rmSync } from "node:fs"; +import Module from "node:module"; +import { tmpdir } from "node:os"; +import path from "node:path"; + +import jitiFactory from "jiti"; + +process.env.NODE_PATH = [ + process.env.NODE_PATH, + "/opt/homebrew/lib/node_modules/openclaw/node_modules", + "/opt/homebrew/lib/node_modules", +].filter(Boolean).join(":"); +Module._initPaths(); + +const jiti = jitiFactory(import.meta.url, { interopDefault: true }); +const { MemoryStore } = jiti("../src/store.ts"); +const { createEmbedder } = jiti("../src/embedder.ts"); +const { SmartExtractor } = jiti("../src/smart-extractor.ts"); +const { createLlmClient } = jiti("../src/llm-client.ts"); +const { buildSmartMetadata, stringifySmartMetadata, parseSupportInfo } = jiti("../src/smart-metadata.ts"); + +const EMBEDDING_DIMENSIONS = 2560; + +// ============================================================================ +// Mock Embedding Server (constant vectors — fine for unit-level E2E) +// ============================================================================ + +function createEmbeddingServer() { + return http.createServer(async (req, res) => { + if (req.method !== "POST" || req.url !== "/v1/embeddings") { + res.writeHead(404); res.end(); return; + } + const chunks = []; + for await (const chunk of req) chunks.push(chunk); + const payload = JSON.parse(Buffer.concat(chunks).toString("utf8")); + const inputs = Array.isArray(payload.input) ? payload.input : [payload.input]; + const value = 1 / Math.sqrt(EMBEDDING_DIMENSIONS); + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ + object: "list", + data: inputs.map((_, index) => ({ + object: "embedding", index, + embedding: new Array(EMBEDDING_DIMENSIONS).fill(value), + })), + model: "mock", usage: { prompt_tokens: 0, total_tokens: 0 }, + })); + }); +} + +// ============================================================================ +// Test Runner +// ============================================================================ + +async function runTest() { + const workDir = mkdtempSync(path.join(tmpdir(), "ctx-support-e2e-")); + const dbPath = path.join(workDir, "db"); + const logs = []; + let dedupDecision = "support"; // controlled per scenario + let dedupContextLabel = "evening"; + + const embeddingServer = createEmbeddingServer(); + + // Mock LLM: extraction returns 1 memory, dedup returns controlled decision + const llmServer = http.createServer(async (req, res) => { + if (req.method !== "POST" || req.url !== "/chat/completions") { + res.writeHead(404); res.end(); return; + } + const chunks = []; + for await (const chunk of req) chunks.push(chunk); + const payload = JSON.parse(Buffer.concat(chunks).toString("utf8")); + const prompt = payload.messages?.[1]?.content || ""; + let content; + + if (prompt.includes("Analyze the following session context")) { + content = JSON.stringify({ + memories: [{ + category: "preferences", + abstract: "饮品偏好:乌龙茶", + overview: "## Preference\n- 喜欢乌龙茶", + content: "用户喜欢乌龙茶。", + }], + }); + } else if (prompt.includes("Determine how to handle this candidate memory")) { + content = JSON.stringify({ + decision: dedupDecision, + match_index: 1, + reason: `test ${dedupDecision}`, + context_label: dedupContextLabel, + }); + } else { + content = JSON.stringify({ memories: [] }); + } + + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ + id: "test", object: "chat.completion", + created: Math.floor(Date.now() / 1000), model: "mock", + choices: [{ index: 0, message: { role: "assistant", content }, finish_reason: "stop" }], + })); + }); + + await new Promise(r => embeddingServer.listen(0, "127.0.0.1", r)); + await new Promise(r => llmServer.listen(0, "127.0.0.1", r)); + const embPort = embeddingServer.address().port; + const llmPort = llmServer.address().port; + process.env.TEST_EMBEDDING_BASE_URL = `http://127.0.0.1:${embPort}/v1`; + + try { + const store = new MemoryStore({ dbPath, vectorDim: EMBEDDING_DIMENSIONS }); + const embedder = createEmbedder({ + provider: "openai-compatible", apiKey: "dummy", model: "mock", + baseURL: `http://127.0.0.1:${embPort}/v1`, dimensions: EMBEDDING_DIMENSIONS, + }); + const llm = createLlmClient({ + apiKey: "dummy", model: "mock", + baseURL: `http://127.0.0.1:${llmPort}`, + timeoutMs: 10000, + log: (msg) => logs.push(msg), + }); + + // Seed a preference memory + const seedText = "饮品偏好:乌龙茶"; + const seedVector = await embedder.embedPassage(seedText); + await store.store({ + text: seedText, vector: seedVector, category: "preference", + scope: "test", importance: 0.8, + metadata: stringifySmartMetadata( + buildSmartMetadata({ text: seedText, category: "preference", importance: 0.8 }, { + l0_abstract: seedText, + l1_overview: "## Preference\n- 喜欢乌龙茶", + l2_content: "用户喜欢乌龙茶。", + memory_category: "preferences", tier: "working", confidence: 0.8, + }), + ), + }); + + const extractor = new SmartExtractor(store, embedder, llm, { + user: "User", extractMinMessages: 1, extractMaxChars: 8000, + defaultScope: "test", + log: (msg) => logs.push(msg), + }); + + // ---------------------------------------------------------------- + // Scenario 1: support — should update support_info, no new entry + // ---------------------------------------------------------------- + console.log("Test 1: support decision updates support_info..."); + dedupDecision = "support"; + dedupContextLabel = "evening"; + logs.length = 0; + + const stats1 = await extractor.extractAndPersist( + "用户再次确认喜欢乌龙茶,特别是晚上。", + "test-session", + { scope: "test", scopeFilter: ["test"] }, + ); + + const entries1 = await store.list(["test"], undefined, 10, 0); + assert.equal(entries1.length, 1, "support should NOT create new entry"); + assert.equal(stats1.supported, 1, "supported count should be 1"); + + // Check support_info was updated + const meta1 = JSON.parse(entries1[0].metadata || "{}"); + const si1 = parseSupportInfo(meta1.support_info); + assert.ok(si1.total_observations >= 1, "total_observations should increase"); + const eveningSlice = si1.slices.find(s => s.context === "evening"); + assert.ok(eveningSlice, "evening slice should exist"); + assert.equal(eveningSlice.confirmations, 1, "evening confirmations should be 1"); + console.log(" ✅ support decision works correctly"); + + // ---------------------------------------------------------------- + // Scenario 2: contextualize — should create linked entry + // ---------------------------------------------------------------- + console.log("Test 2: contextualize decision creates linked entry..."); + dedupDecision = "contextualize"; + dedupContextLabel = "night"; + logs.length = 0; + + const stats2 = await extractor.extractAndPersist( + "用户说晚上改喝花茶。", + "test-session", + { scope: "test", scopeFilter: ["test"] }, + ); + + const entries2 = await store.list(["test"], undefined, 10, 0); + assert.equal(entries2.length, 2, "contextualize should create 1 new entry"); + assert.equal(stats2.created, 1, "created count should be 1"); + console.log(" ✅ contextualize decision works correctly"); + + // ---------------------------------------------------------------- + // Scenario 3: contradict — should record contradiction + new entry + // ---------------------------------------------------------------- + console.log("Test 3: contradict decision records contradiction..."); + dedupDecision = "contradict"; + dedupContextLabel = "weekend"; + logs.length = 0; + + const stats3 = await extractor.extractAndPersist( + "用户说周末不喝茶了。", + "test-session", + { scope: "test", scopeFilter: ["test"] }, + ); + + const entries3 = await store.list(["test"], undefined, 10, 0); + assert.equal(entries3.length, 3, "contradict should create 1 new entry"); + assert.equal(stats3.created, 1, "created count should be 1"); + + // Check contradictions recorded on some existing entry + // (with constant vectors, dedup may match any existing entry) + let foundWeekend = false; + for (const entry of entries3) { + const meta = JSON.parse(entry.metadata || "{}"); + const si = parseSupportInfo(meta.support_info); + const weekendSlice = si.slices.find(s => s.context === "weekend"); + if (weekendSlice && weekendSlice.contradictions >= 1) { + foundWeekend = true; + break; + } + } + assert.ok(foundWeekend, "at least one entry should have weekend contradiction"); + console.log(" ✅ contradict decision works correctly"); + + console.log("\n=== All Context-Support E2E tests passed! ==="); + + } finally { + delete process.env.TEST_EMBEDDING_BASE_URL; + await new Promise(r => embeddingServer.close(r)); + await new Promise(r => llmServer.close(r)); + rmSync(workDir, { recursive: true, force: true }); + } +} + +await runTest(); diff --git a/test/smart-metadata-v2.mjs b/test/smart-metadata-v2.mjs new file mode 100644 index 0000000..72baea5 --- /dev/null +++ b/test/smart-metadata-v2.mjs @@ -0,0 +1,121 @@ +/** + * Smart Metadata V2 Test — SupportInfo / ContextualSupport + * Tests the contextual support extension to OpenViking's SmartMemoryMetadata. + * Imports production code via jiti (same pattern as other tests in this repo). + */ + +import assert from "node:assert/strict"; +import Module from "node:module"; + +process.env.NODE_PATH = [ + process.env.NODE_PATH, + "/opt/homebrew/lib/node_modules/openclaw/node_modules", + "/opt/homebrew/lib/node_modules", +].filter(Boolean).join(":"); +Module._initPaths(); + +import jitiFactory from "jiti"; +const jiti = jitiFactory(import.meta.url, { interopDefault: true }); +const { + normalizeContext, + parseSupportInfo, + updateSupportStats, + SUPPORT_CONTEXT_VOCABULARY, + stringifySmartMetadata, +} = jiti("../src/smart-metadata.ts"); + +// --- Test 1: normalizeContext maps Chinese aliases --- +console.log("Test 1: normalizeContext maps Chinese aliases..."); +const testCases = [ + ["晚上", "evening"], ["早上", "morning"], ["周末", "weekend"], + ["工作", "work"], ["旅行", "travel"], ["冬天", "winter"], + ["evening", "evening"], ["morning", "morning"], + ["下午", "afternoon"], // Fix #4: previously mapped to evening + ["", "general"], [undefined, "general"], +]; +for (const [input, expected] of testCases) { + const result = normalizeContext(input); + assert.strictEqual(result, expected, `normalizeContext("${input}") should be "${expected}", got "${result}"`); +} +console.log(" ✅ Chinese alias mapping works correctly"); + +// --- Test 2: parseSupportInfo handles V1 flat format --- +console.log("\nTest 2: parseSupportInfo handles V1 flat format..."); +const v2FromV1 = parseSupportInfo({ confirmations: 3, contradictions: 1 }); +assert.strictEqual(v2FromV1.global_strength, 0.75, "V1 {3 conf, 1 contra} → strength 0.75"); +assert.strictEqual(v2FromV1.total_observations, 4); +assert.strictEqual(v2FromV1.slices.length, 1); +assert.strictEqual(v2FromV1.slices[0].context, "general"); +assert.strictEqual(v2FromV1.slices[0].confirmations, 3); +assert.strictEqual(v2FromV1.slices[0].contradictions, 1); +console.log(" ✅ V1 → V2 migration preserves data"); + +// --- Test 3: parseSupportInfo handles V2 sliced format with field validation --- +console.log("\nTest 3: parseSupportInfo validates V2 slice fields..."); +const v2WithBadFields = parseSupportInfo({ + global_strength: 0.8, + total_observations: 5, + slices: [ + { context: "morning", confirmations: 3, contradictions: 0, strength: 1.0, last_observed_at: 1000 }, + { context: "evening", confirmations: -1, contradictions: "bad", strength: 2.0, last_observed_at: null }, + { context: 123 }, // invalid — should be filtered out + ], +}); +assert.strictEqual(v2WithBadFields.slices.length, 2, "Invalid slice (context=123) should be filtered"); +assert.strictEqual(v2WithBadFields.slices[1].confirmations, 0, "Negative confirmations should be clamped to 0"); +assert.strictEqual(v2WithBadFields.slices[1].contradictions, 0, "Non-number contradictions should default to 0"); +assert.strictEqual(v2WithBadFields.slices[1].strength, 0.5, "Out-of-range strength should default to 0.5"); +console.log(" ✅ V2 field validation works correctly"); + +// --- Test 4: updateSupportStats adds new context slice --- +console.log("\nTest 4: updateSupportStats adds new context slice..."); +const existing = parseSupportInfo({ + global_strength: 0.75, total_observations: 4, + slices: [{ context: "general", confirmations: 3, contradictions: 1, strength: 0.75, last_observed_at: 1000 }], +}); +const updated = updateSupportStats(existing, "evening", "support"); +assert.strictEqual(updated.slices.length, 2, "Should have 2 slices (general + evening)"); +assert.strictEqual(updated.total_observations, 5, "Total observations should be 5"); +assert.strictEqual(updated.global_strength, 4 / 5, "Global strength = 4/5 = 0.8"); +const eveningSlice = updated.slices.find(s => s.context === "evening"); +assert.ok(eveningSlice, "Evening slice should exist"); +assert.strictEqual(eveningSlice.confirmations, 1); +assert.strictEqual(eveningSlice.strength, 1.0, "1 confirm, 0 contra = 1.0"); +console.log(" ✅ New context slice added correctly"); + +// --- Test 5: updateSupportStats handles contradict event --- +console.log("\nTest 5: updateSupportStats handles contradict event..."); +const contradicted = updateSupportStats(updated, "evening", "contradict"); +const eveningAfter = contradicted.slices.find(s => s.context === "evening"); +assert.strictEqual(eveningAfter.contradictions, 1); +assert.strictEqual(eveningAfter.strength, 0.5, "1 conf + 1 contra = 0.5"); +console.log(" ✅ Contradict event recorded correctly"); + +// --- Test 6: Support slices capped at MAX_SUPPORT_SLICES=8 --- +console.log("\nTest 6: Support slices capped at MAX_SUPPORT_SLICES=8..."); +let big = { global_strength: 0.5, total_observations: 0, slices: [] }; +for (let i = 0; i < 10; i++) { + big = updateSupportStats(big, `ctx_${i}`, "support"); +} +assert.ok(big.slices.length <= 8, `Should cap at 8 slices, got ${big.slices.length}`); +// total_observations may be slightly less than 10 due to slice truncation drift: +// each updateSupportStats only recovers evidence from slices dropped in *that* call, +// not from earlier truncation cycles. This is the documented trade-off (see code comment). +assert.ok(big.total_observations >= 9, `total_observations should be >=9, got ${big.total_observations}`); +console.log(` ✅ Slice cap works correctly (${big.slices.length} slices, ${big.total_observations} observations)`); + +// --- Test 7: stringifySmartMetadata caps array fields --- +console.log("\nTest 7: stringifySmartMetadata caps sources/history/relations..."); +const bigMeta = { + l0_abstract: "test", + sources: Array.from({ length: 30 }, (_, i) => `src_${i}`), + history: Array.from({ length: 60 }, (_, i) => `hist_${i}`), + relations: Array.from({ length: 20 }, (_, i) => ({ type: "ref", targetId: `t_${i}` })), +}; +const serialized = JSON.parse(stringifySmartMetadata(bigMeta)); +assert.ok(serialized.sources.length <= 20, `sources should be capped at 20, got ${serialized.sources.length}`); +assert.ok(serialized.history.length <= 50, `history should be capped at 50, got ${serialized.history.length}`); +assert.ok(serialized.relations.length <= 16, `relations should be capped at 16, got ${serialized.relations.length}`); +console.log(" ✅ Metadata caps work correctly"); + +console.log("\n=== All Smart Metadata V2 tests passed! ==="); diff --git a/test/vector-search-cosine.test.mjs b/test/vector-search-cosine.test.mjs new file mode 100644 index 0000000..cf7bd0f --- /dev/null +++ b/test/vector-search-cosine.test.mjs @@ -0,0 +1,89 @@ +/** + * Vector Search Cosine Distance Test + * Tests that the real MemoryStore.vectorSearch uses cosine distance (not L2) + * and produces correct score values. + */ + +import assert from "node:assert/strict"; +import { mkdtempSync, rmSync } from "node:fs"; +import Module from "node:module"; +import { tmpdir } from "node:os"; +import path from "node:path"; + +process.env.NODE_PATH = [ + process.env.NODE_PATH, + "/opt/homebrew/lib/node_modules/openclaw/node_modules", + "/opt/homebrew/lib/node_modules", +].filter(Boolean).join(":"); +Module._initPaths(); + +import jitiFactory from "jiti"; +const jiti = jitiFactory(import.meta.url, { interopDefault: true }); +const { MemoryStore } = jiti("../src/store.ts"); + +const DIM = 64; // small dim for fast tests +const workDir = mkdtempSync(path.join(tmpdir(), "cosine-test-")); +const dbPath = path.join(workDir, "db"); + +try { + const store = new MemoryStore({ dbPath, vectorDim: DIM }); + + // Create two known vectors + const vecA = new Array(DIM).fill(0); + vecA[0] = 1.0; // unit vector along dim 0 + + const vecB = new Array(DIM).fill(0); + vecB[0] = 0.9; vecB[1] = 0.436; // ~cos_sim=0.9 with vecA (angle ~26°) + + const vecC = new Array(DIM).fill(0); + vecC[1] = 1.0; // orthogonal to vecA → cos_sim=0 + + // Store memories with known vectors + await store.store({ text: "similar memory", vector: vecB, category: "preference", scope: "test", importance: 0.8 }); + await store.store({ text: "orthogonal memory", vector: vecC, category: "fact", scope: "test", importance: 0.5 }); + + // Test 1: vectorSearch returns results with correct cosine-based scores + console.log("Test 1: vectorSearch uses cosine distance and scores are meaningful..."); + const results = await store.vectorSearch(vecA, 10, 0.0, ["test"]); + assert.ok(results.length >= 1, "Should return at least 1 result"); + + // Find the similar result + const similar = results.find(r => r.entry.text === "similar memory"); + assert.ok(similar, "Similar memory should be in results"); + // cosine distance for ~0.9 similarity → distance ~0.1 → score = 1/(1+0.1) ≈ 0.91 + assert.ok(similar.score > 0.5, `Similar memory score should be >0.5, got ${similar.score.toFixed(3)}`); + console.log(` ✅ Similar memory score = ${similar.score.toFixed(3)} (cosine-based, >0.5)`); + + // Test 2: Orthogonal vector gets low score + console.log("Test 2: Orthogonal vector gets low score..."); + const orthogonal = results.find(r => r.entry.text === "orthogonal memory"); + if (orthogonal) { + assert.ok(orthogonal.score < similar.score, "Orthogonal should score lower than similar"); + console.log(` ✅ Orthogonal memory score = ${orthogonal.score.toFixed(3)} (lower than similar)`); + } else { + // May have been filtered by internal minScore + console.log(" ✅ Orthogonal memory filtered out (too low score)"); + } + + // Test 3: minScore filtering works + console.log("Test 3: minScore filtering excludes low-score results..."); + const strictResults = await store.vectorSearch(vecA, 10, 0.95, ["test"]); + // With strict minScore, some results should be filtered + const filtered = results.length - strictResults.length; + assert.ok(filtered >= 0, "Strict minScore should filter equal or more results"); + console.log(` ✅ minScore=0.95 filtered ${filtered} results (${results.length} → ${strictResults.length})`); + + // Test 4: L2 distance would produce wrong scores (documentation) + console.log("Test 4: Verify L2 would fail (documentation test)..."); + // For 1024-dim normalized embeddings, L2 distance ≈ 40-60 + // score = 1/(1+45) ≈ 0.022 — below any reasonable minScore + const l2TypicalDistance = 45; + const l2Score = 1 / (1 + l2TypicalDistance); + assert.ok(l2Score < 0.3, `L2 score ${l2Score.toFixed(4)} should be below minScore=0.3`); + console.log(` ✅ L2 score = ${l2Score.toFixed(4)} (would drop all results, confirming cosine is needed)`); + + console.log("\n=== All vector-search-cosine tests passed! ==="); + +} finally { + rmSync(workDir, { recursive: true, force: true }); +}