diff --git a/README.md b/README.md index 58aa74f..16f3cc8 100644 --- a/README.md +++ b/README.md @@ -259,6 +259,8 @@ docker exec -it hermes-memory hermes | `storeBackend` | `"sqlite"` | Storage backend: `sqlite` | | `recall.strategy` | `"hybrid"` | Recall strategy: `keyword` / `embedding` / `hybrid` (RRF fusion, recommended) | | `recall.maxResults` | `5` | Number of items returned per recall | +| `recall.maxCharsPerMemory` | `0` | Max characters injected for one recalled L1 memory; `0` disables this guard | +| `recall.maxTotalRecallChars` | `0` | Total character budget for auto-recalled L1 memories; `0` disables this guard | | `pipeline.everyNConversations` | `5` | Trigger an L1 memory extraction every N turns | | `extraction.maxMemoriesPerSession` | `20` | Max memories extracted per L1 pass | | `persona.triggerEveryN` | `50` | Generate the user persona every N new memories | diff --git a/README_CN.md b/README_CN.md index c1180c5..8fc60e8 100644 --- a/README_CN.md +++ b/README_CN.md @@ -262,6 +262,8 @@ docker exec -it hermes-memory hermes | `storeBackend` | `"sqlite"` | 存储后端:`sqlite` | | `recall.strategy` | `"hybrid"` | 召回策略:`keyword` / `embedding` / `hybrid`(RRF 融合,推荐) | | `recall.maxResults` | `5` | 每次召回条数 | +| `recall.maxCharsPerMemory` | `0` | 单条 L1 记忆注入的最大字符数;`0` 表示不限制 | +| `recall.maxTotalRecallChars` | `0` | 每轮 auto-recall 注入的 L1 记忆总字符预算;`0` 表示不限制 | | `pipeline.everyNConversations` | `5` | 每 N 轮对话触发一次 L1 记忆提取 | | `extraction.maxMemoriesPerSession` | `20` | 单次 L1 最多提取多少条 | | `persona.triggerEveryN` | `50` | 每 N 条新记忆触发用户画像生成 | diff --git a/openclaw.plugin.json b/openclaw.plugin.json index f6ea5fd..f0b82a0 100644 --- a/openclaw.plugin.json +++ b/openclaw.plugin.json @@ -69,6 +69,8 @@ "properties": { "enabled": { "type": "boolean", "default": true, "description": "是否启用自动召回" }, "maxResults": { "type": "number", "default": 5, "description": "召回最大结果数" }, + "maxCharsPerMemory": { "type": "number", "default": 0, "description": "单条 L1 记忆注入的最大字符数;填 0 表示不限制" }, + "maxTotalRecallChars": { "type": "number", "default": 0, "description": "本轮 auto-recall 注入的 L1 记忆总字符预算;填 0 表示不限制" }, "scoreThreshold": { "type": "number", "default": 0.3, "description": "最低分数阈值" }, "strategy": { "type": "string", "enum": ["embedding", "keyword", "hybrid"], "default": "hybrid", "description": "搜索策略:keyword(关键词)、embedding(向量)、hybrid(混合RRF融合,推荐)" }, "timeoutMs": { "type": "number", "default": 5000, "description": "召回整体超时(毫秒),超时后跳过记忆注入并打印警告日志" } diff --git a/src/config.ts b/src/config.ts index e09cff5..184874b 100644 --- a/src/config.ts +++ b/src/config.ts @@ -80,6 +80,10 @@ export interface RecallConfig { enabled: boolean; /** Max results to return (default: 5) */ maxResults: number; + /** Max characters injected for a single recalled L1 memory. 0 disables the per-memory limit. */ + maxCharsPerMemory: number; + /** Max total characters injected for all recalled L1 memories. 0 disables the total limit. */ + maxTotalRecallChars: number; /** Minimum score threshold (default: 0.3) */ scoreThreshold: number; /** Search strategy (default: "hybrid") */ @@ -486,6 +490,8 @@ export function parseConfig(raw: Record | undefined): MemoryTda recall: { enabled: bool(recallGroup, "enabled") ?? true, maxResults: num(recallGroup, "maxResults") ?? 5, + maxCharsPerMemory: num(recallGroup, "maxCharsPerMemory") ?? 0, + maxTotalRecallChars: num(recallGroup, "maxTotalRecallChars") ?? 0, scoreThreshold: num(recallGroup, "scoreThreshold") ?? 0.3, strategy: validateStrategy(str(recallGroup, "strategy")) ?? "hybrid", timeoutMs: num(recallGroup, "timeoutMs") ?? 5000, diff --git a/src/core/hooks/auto-recall.ts b/src/core/hooks/auto-recall.ts index cccb864..5ddb9b6 100644 --- a/src/core/hooks/auto-recall.ts +++ b/src/core/hooks/auto-recall.ts @@ -22,6 +22,9 @@ import type { EmbeddingService, EmbeddingCallOptions } from "../store/embedding. import { sanitizeText } from "../../utils/sanitize.js"; const TAG = "[memory-tdai] [recall]"; +const RECALL_TRUNCATION_SUFFIX = "…(已截断;可用 tdai_memory_search 或 tdai_conversation_search 查看详情)"; +const MIN_TRUNCATED_RECALL_LINE_CHARS = 40; +const RECALL_LINE_SEPARATOR = "\n"; /** * Memory tools usage guide — injected at the end of memory context so the @@ -127,6 +130,7 @@ async function performAutoRecallInner(params: { const searchResult = await searchMemories(userText, pluginDataDir, cfg, logger, effectiveStrategy as "keyword" | "embedding" | "hybrid", vectorStore, embeddingService); memoryLines = searchResult.lines; searchTiming = searchResult.timing; + memoryLines = applyRecallBudget(memoryLines, cfg.recall, logger); // Extract structured RecalledMemory from formatted lines for metric reporting recalledL1Memories = memoryLines.map((line) => { @@ -206,7 +210,7 @@ async function performAutoRecallInner(params: { let prependContext: string | undefined; if (memoryLines.length > 0) { prependContext = - `\n以下是当前对话召回的相关记忆,不代表当前任务进程,仅作为参考:\n\n${memoryLines.join("\n")}\n`; + `\n以下是当前对话召回的相关记忆,不代表当前任务进程,仅作为参考:\n\n${memoryLines.join(RECALL_LINE_SEPARATOR)}\n`; } // Append memory tools usage guide to the stable part so the agent knows @@ -706,6 +710,85 @@ function formatMemoryLine(m: FormatableMemory): string { return line; } +function applyRecallBudget( + lines: string[], + recall: MemoryTdaiConfig["recall"], + logger?: Logger, +): string[] { + const maxCharsPerMemory = normalizeBudgetLimit(recall.maxCharsPerMemory); + const maxTotalRecallChars = normalizeBudgetLimit(recall.maxTotalRecallChars); + + if (!maxCharsPerMemory && !maxTotalRecallChars) { + return lines; + } + + const budgeted: string[] = []; + let usedChars = 0; + let truncatedCount = 0; + let droppedCount = 0; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + const perMemoryBounded = maxCharsPerMemory + ? truncateRecallLine(line, maxCharsPerMemory) + : line; + let wasTruncated = perMemoryBounded !== line; + + if (!maxTotalRecallChars) { + budgeted.push(perMemoryBounded); + if (wasTruncated) truncatedCount++; + continue; + } + + const separatorChars = budgeted.length > 0 ? RECALL_LINE_SEPARATOR.length : 0; + const remainingChars = maxTotalRecallChars - usedChars - separatorChars; + if (remainingChars <= 0) { + droppedCount += lines.length - i; + break; + } + + if (perMemoryBounded.length > remainingChars) { + const canFit = remainingChars >= MIN_TRUNCATED_RECALL_LINE_CHARS; + if (canFit) { + const totalBounded = truncateRecallLine(perMemoryBounded, remainingChars); + budgeted.push(totalBounded); + usedChars += separatorChars + totalBounded.length; + wasTruncated ||= totalBounded !== perMemoryBounded; + if (wasTruncated) truncatedCount++; + } + droppedCount += lines.length - i - (canFit ? 1 : 0); + break; + } + + budgeted.push(perMemoryBounded); + usedChars += separatorChars + perMemoryBounded.length; + if (wasTruncated) truncatedCount++; + } + + if (truncatedCount > 0 || droppedCount > 0) { + logger?.debug?.( + `${TAG} Recall budget applied: input=${lines.length}, output=${budgeted.length}, ` + + `truncated=${truncatedCount}, dropped=${droppedCount}, ` + + `maxCharsPerMemory=${recall.maxCharsPerMemory}, maxTotalRecallChars=${recall.maxTotalRecallChars}`, + ); + } + + return budgeted; +} + +function normalizeBudgetLimit(value: number | undefined): number | undefined { + if (value == null || !Number.isFinite(value) || value <= 0) return undefined; + return Math.floor(value); +} + +function truncateRecallLine(line: string, maxChars: number): string { + if (line.length <= maxChars) return line; + if (maxChars <= RECALL_TRUNCATION_SUFFIX.length) { + return line.slice(0, maxChars); + } + return `${line.slice(0, maxChars - RECALL_TRUNCATION_SUFFIX.length).trimEnd()}${RECALL_TRUNCATION_SUFFIX}`; +} + /** * Format an ISO 8601 timestamp to a concise date or datetime string. * - If the time part is 00:00:00 → show date only (e.g. "2025-03-01")