Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ docker exec -it hermes-memory hermes
| `pipeline.l1IdleTimeoutSeconds` | `600` | Trigger L1 after the user has been idle for this many seconds |
| `pipeline.l2MinIntervalSeconds` | `900` | Minimum interval between two L2 passes within the same session |
| `recall.timeoutMs` | `5000` | Recall timeout; on timeout, skip injection without blocking the conversation |
| `recall.rerank.enabled` | `false` | Optional remote rerank for L1 recall candidates; falls back to the original order on timeout or API failure |
| `extraction.enableDedup` | `true` | L1 vector dedup / conflict detection |
| `capture.excludeAgents` | `[]` | Glob patterns to exclude specific agents (e.g. `bench-judge-*`) |
| `capture.l0l1RetentionDays` | `0` | Local retention days for L0 / L1 files; `0` = never clean up |
Expand All @@ -291,6 +292,7 @@ docker exec -it hermes-memory hermes
For all fields, types, and constraints see [`openclaw.plugin.json`](./openclaw.plugin.json)。

- `embedding.*` — remote embedding service (OpenAI-compatible API)
- `recall.rerank.*` — remote rerank service compatible with `/rerank` APIs
- `llm.*` — standalone LLM mode (bypass OpenClaw's built-in model and run L1/L2/L3 with a designated API)
- `offload.backendUrl / backendApiKey` — offload the L1/L1.5/L2/L4 flow to a backend service
- `report.*` — metrics reporting
Expand Down
2 changes: 2 additions & 0 deletions README_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ docker exec -it hermes-memory hermes
| `pipeline.l1IdleTimeoutSeconds` | `600` | 用户停止对话多久后触发 L1 |
| `pipeline.l2MinIntervalSeconds` | `900` | 同 session 两次 L2 之间的最小间隔 |
| `recall.timeoutMs` | `5000` | 召回超时阈值,超时跳过注入不阻塞对话 |
| `recall.rerank.enabled` | `false` | 可选远程 rerank,用于重排 L1 召回候选;超时或 API 失败时回退原排序 |
| `extraction.enableDedup` | `true` | L1 向量去重 / 冲突检测 |
| `capture.excludeAgents` | `[]` | Glob 模式排除特定 Agent(如 `bench-judge-*`) |
| `capture.l0l1RetentionDays` | `0` | L0/L1 本地文件保留天数,`0` = 永不清理 |
Expand All @@ -295,6 +296,7 @@ docker exec -it hermes-memory hermes
完整字段、类型、约束见 [`openclaw.plugin.json`](./openclaw.plugin.json) 。

- `embedding.*` — 远程 embedding 服务(OpenAI 兼容 API)
- `recall.rerank.*` — 兼容 `/rerank` API 的远程 rerank 服务
- `llm.*` — 独立 LLM 模式(绕过 OpenClaw 内置模型,用指定 API 跑 L1/L2/L3)
- `offload.backendUrl / backendApiKey` — 将 L1/L1.5/L2/L4 offload 流程卸载到后端服务
- `report.*` — 指标上报
Expand Down
14 changes: 13 additions & 1 deletion openclaw.plugin.json
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,19 @@
"maxResults": { "type": "number", "default": 5, "description": "召回最大结果数" },
"scoreThreshold": { "type": "number", "default": 0.3, "description": "最低分数阈值" },
"strategy": { "type": "string", "enum": ["embedding", "keyword", "hybrid"], "default": "hybrid", "description": "搜索策略:keyword(关键词)、embedding(向量)、hybrid(混合RRF融合,推荐)" },
"timeoutMs": { "type": "number", "default": 5000, "description": "召回整体超时(毫秒),超时后跳过记忆注入并打印警告日志" }
"timeoutMs": { "type": "number", "default": 5000, "description": "召回整体超时(毫秒),超时后跳过记忆注入并打印警告日志" },
"rerank": {
"type": "object",
"description": "远程 rerank 设置。默认关闭;开启后会先多召回候选,再调用兼容 /rerank 的云端 API 重排,失败时自动回退原排序",
"properties": {
"enabled": { "type": "boolean", "default": false, "description": "是否启用远程 rerank" },
"baseUrl": { "type": "string", "description": "Rerank API Base URL;如果未以 /rerank 结尾,会自动追加 /rerank" },
"apiKey": { "type": "string", "description": "Rerank API Key" },
"model": { "type": "string", "description": "Rerank 模型名称" },
"timeoutMs": { "type": "number", "default": 3000, "description": "Rerank 请求超时(毫秒),超时后回退原排序" },
"candidateMultiplier": { "type": "number", "default": 3, "description": "Rerank 前候选召回倍数,相对于 recall.maxResults;范围建议 1-10" }
}
}
}
},
"embedding": {
Expand Down
26 changes: 26 additions & 0 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,21 @@ export interface PipelineTriggerConfig {
}

/** Recall settings — controls memory retrieval for context injection. */
export interface RecallRerankConfig {
/** Enable remote rerank for L1 recall candidates (default: false). */
enabled: boolean;
/** OpenAI-compatible rerank API base URL. "/rerank" is appended when omitted. */
baseUrl?: string;
/** API key for the rerank provider. */
apiKey?: string;
/** Rerank model name. */
model?: string;
/** Request timeout in milliseconds (default: 3000). */
timeoutMs: number;
/** Candidate multiplier before rerank, relative to recall.maxResults (default: 3). */
candidateMultiplier: number;
}

export interface RecallConfig {
/** Enable auto-recall (default: true) */
enabled: boolean;
Expand All @@ -86,6 +101,8 @@ export interface RecallConfig {
strategy: "embedding" | "keyword" | "hybrid";
/** Overall recall timeout in milliseconds (default: 5000). When exceeded, recall is skipped with a warning. */
timeoutMs: number;
/** Optional remote rerank configuration. Disabled by default. */
rerank: RecallRerankConfig;
}

/** Embedding service configuration for vector search. */
Expand Down Expand Up @@ -322,6 +339,7 @@ export function parseConfig(raw: Record<string, unknown> | undefined): MemoryTda

// --- Recall ---
const recallGroup = obj(c, "recall");
const rerankGroup = obj(recallGroup, "rerank");

// --- Embedding ---
const embeddingGroup = obj(c, "embedding");
Expand Down Expand Up @@ -489,6 +507,14 @@ export function parseConfig(raw: Record<string, unknown> | undefined): MemoryTda
scoreThreshold: num(recallGroup, "scoreThreshold") ?? 0.3,
strategy: validateStrategy(str(recallGroup, "strategy")) ?? "hybrid",
timeoutMs: num(recallGroup, "timeoutMs") ?? 5000,
rerank: {
enabled: bool(rerankGroup, "enabled") ?? false,
baseUrl: str(rerankGroup, "baseUrl"),
apiKey: str(rerankGroup, "apiKey"),
model: str(rerankGroup, "model"),
timeoutMs: num(rerankGroup, "timeoutMs") ?? 3000,
candidateMultiplier: num(rerankGroup, "candidateMultiplier") ?? 3,
},
},
embedding: {
enabled: embeddingEnabled,
Expand Down
101 changes: 101 additions & 0 deletions src/core/hooks/auto-recall.rerank.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import { mkdtempSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import path from "node:path";
import { afterEach, describe, expect, it, vi } from "vitest";

import { parseConfig } from "../../config.js";
import type { IMemoryStore, L1FtsResult } from "../store/types.js";
import { performAutoRecall } from "./auto-recall.js";

describe("performAutoRecall rerank", () => {
let dataDir: string | undefined;

afterEach(() => {
vi.unstubAllGlobals();
if (dataDir) {
rmSync(dataDir, { recursive: true, force: true });
dataDir = undefined;
}
});

it("reranks over-retrieved L1 candidates before injecting top results", async () => {
dataDir = mkdtempSync(path.join(tmpdir(), "memory-tdai-rerank-"));
vi.stubGlobal("fetch", vi.fn(async () => new Response(
JSON.stringify({
results: [
{ index: 2, relevance_score: 0.95 },
{ index: 0, relevance_score: 0.52 },
],
}),
{ status: 200 },
)));

const store = {
isFtsAvailable: () => true,
searchL1Fts: vi.fn(async (_query: string, limit?: number): Promise<L1FtsResult[]> => {
expect(limit).toBeGreaterThanOrEqual(6);
return [
makeFtsResult("a", "无关的天气记录", 0.9),
makeFtsResult("b", "用户喜欢 Python", 0.89),
makeFtsResult("c", "用户明确偏好 TypeScript", 0.88),
];
}),
} as unknown as IMemoryStore;

const cfg = parseConfig({
recall: {
strategy: "keyword",
maxResults: 2,
scoreThreshold: 0,
rerank: {
enabled: true,
baseUrl: "https://api.example.com/v1",
apiKey: "test-key",
model: "bge-reranker-v2-m3",
candidateMultiplier: 3,
},
},
});

const result = await performAutoRecall({
userText: "TypeScript 偏好",
actorId: "user",
sessionKey: "session",
cfg,
pluginDataDir: dataDir,
vectorStore: store,
});

const injected = extractRelevantMemoryLines(result?.prependContext);
expect(injected).toContain("用户明确偏好 TypeScript");
expect(injected).toContain("无关的天气记录");
expect(injected).not.toContain("用户喜欢 Python");
expect(injected.indexOf("用户明确偏好 TypeScript")).toBeLessThan(
injected.indexOf("无关的天气记录"),
);
});
});

function makeFtsResult(id: string, content: string, score: number): L1FtsResult {
return {
record_id: id,
content,
type: "episodic",
priority: 80,
scene_name: "test",
score,
timestamp_str: "",
timestamp_start: "",
timestamp_end: "",
session_key: "session",
session_id: "session-1",
metadata_json: "{}",
};
}

function extractRelevantMemoryLines(prependContext: string | undefined): string {
const match = prependContext?.match(
/<relevant-memories>[\s\S]*?\n\n([\s\S]*?)\n<\/relevant-memories>/,
);
return match?.[1] ?? "";
}
67 changes: 58 additions & 9 deletions src/core/hooks/auto-recall.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import type { IMemoryStore, L1SearchResult, L1FtsResult } from "../store/types.j
import { buildFtsQuery } from "../store/sqlite.js";
import type { EmbeddingService, EmbeddingCallOptions } from "../store/embedding.js";
import { sanitizeText } from "../../utils/sanitize.js";
import { getRerankCandidateLimit, rerankTextCandidates } from "../recall/reranker.js";

const TAG = "[memory-tdai] [recall]";

Expand Down Expand Up @@ -331,7 +332,8 @@ async function searchMemories(
);
}

const maxResults = cfg.recall.maxResults ?? 5;
const maxResults = normalizePositiveInt(cfg.recall.maxResults, 5);
const candidateLimit = getRerankCandidateLimit(maxResults, cfg.recall.rerank);
const threshold = cfg.recall.scoreThreshold ?? 0.3;

const embeddingAvailable = !!vectorStore && !!embeddingService;
Expand All @@ -340,7 +342,7 @@ async function searchMemories(
`${TAG} [searchMemories] strategy=${strategy}, embeddingAvailable=${embeddingAvailable}, ` +
`vectorStore=${vectorStore ? "available" : "UNAVAILABLE"}, ` +
`embeddingService=${embeddingService ? "available" : "UNAVAILABLE"}, ` +
`maxResults=${maxResults}, threshold=${threshold}`,
`maxResults=${maxResults}, candidateLimit=${candidateLimit}, threshold=${threshold}`,
);

// Determine effective strategy (fall back to keyword if embedding not available)
Expand All @@ -362,36 +364,78 @@ async function searchMemories(
try {
if (effectiveStrategy === "keyword") {
const tFts = performance.now();
const lines = await searchByKeyword(cleanText, pluginDataDir, maxResults, threshold, logger, vectorStore);
return { lines, timing: { ftsMs: performance.now() - tFts, embeddingMs: 0, ftsHits: lines.length, embeddingHits: 0 } };
const lines = await searchByKeyword(cleanText, pluginDataDir, candidateLimit, threshold, logger, vectorStore);
return await finalizeSearchResult(
{ lines, timing: { ftsMs: performance.now() - tFts, embeddingMs: 0, ftsHits: lines.length, embeddingHits: 0 } },
cleanText,
cfg,
logger,
maxResults,
);
}

if (effectiveStrategy === "embedding") {
const tEmb = performance.now();
const lines = await searchByEmbedding(cleanText, maxResults, threshold, vectorStore!, embeddingService!, logger, embeddingCallOpts);
return { lines, timing: { ftsMs: 0, embeddingMs: performance.now() - tEmb, ftsHits: 0, embeddingHits: lines.length } };
const lines = await searchByEmbedding(cleanText, candidateLimit, threshold, vectorStore!, embeddingService!, logger, embeddingCallOpts);
return await finalizeSearchResult(
{ lines, timing: { ftsMs: 0, embeddingMs: performance.now() - tEmb, ftsHits: 0, embeddingHits: lines.length } },
cleanText,
cfg,
logger,
maxResults,
);
}

// Hybrid: if the store natively supports hybrid search (e.g. TCVDB does
// server-side dense + sparse + RRF in a single API call), short-circuit
// to avoid a redundant second HTTP request and a wasted local embed().
if (vectorStore?.getCapabilities().nativeHybridSearch) {
const tNative = performance.now();
const results = await vectorStore.searchL1Hybrid({ query: cleanText, topK: maxResults });
const results = await vectorStore.searchL1Hybrid({ query: cleanText, topK: candidateLimit });
const nativeMs = performance.now() - tNative;
logger?.debug?.(`${TAG} [hybrid-native] Single-call hybrid: ${results.length} results in ${nativeMs.toFixed(0)}ms`);
const lines = results.map((r) => formatMemoryLine(vectorResultToFormatable(r)));
return { lines, timing: { ftsMs: 0, embeddingMs: nativeMs, ftsHits: 0, embeddingHits: results.length } };
return await finalizeSearchResult(
{ lines, timing: { ftsMs: 0, embeddingMs: nativeMs, ftsHits: 0, embeddingHits: results.length } },
cleanText,
cfg,
logger,
maxResults,
);
}

// Fallback: run keyword + embedding in parallel, merge with client-side RRF (SQLite path)
return await searchHybrid(cleanText, pluginDataDir, maxResults, threshold, vectorStore!, embeddingService!, logger, embeddingCallOpts);
return await finalizeSearchResult(
await searchHybrid(cleanText, pluginDataDir, candidateLimit, threshold, vectorStore!, embeddingService!, logger, embeddingCallOpts),
cleanText,
cfg,
logger,
maxResults,
);
} catch (err) {
logger?.warn?.(`${TAG} Memory search failed (strategy=${effectiveStrategy}): ${err instanceof Error ? err.message : String(err)}`);
return emptyResult;
}
}

async function finalizeSearchResult(
result: SearchResult,
query: string,
cfg: MemoryTdaiConfig,
logger: Logger | undefined,
maxResults: number,
): Promise<SearchResult> {
if (result.lines.length === 0) return result;
const lines = await rerankTextCandidates({
query,
documents: result.lines,
topN: maxResults,
config: cfg.recall.rerank,
logger,
});
return { ...result, lines };
}

// ============================
// Strategy: Keyword (FTS5 BM25, no in-memory fallback)
// ============================
Expand Down Expand Up @@ -725,6 +769,11 @@ function formatTimestamp(ts: string | undefined): string | undefined {
return `${datePart} ${timePart}`;
}

function normalizePositiveInt(value: number | undefined, fallback: number): number {
if (value == null || !Number.isFinite(value) || value <= 0) return fallback;
return Math.floor(value);
}

/**
* Build a FormatableMemory from a full MemoryRecord (keyword search path).
* Handles empty metadata, empty timestamps array gracefully.
Expand Down
Loading
Loading