Tencent · YOMXXX · May 26, 2026
diff --git a/README.md b/README.md
@@ -275,6 +275,7 @@ docker exec -it hermes-memory hermes
 | `pipeline.l1IdleTimeoutSeconds` | `600` | Trigger L1 after the user has been idle for this many seconds |
 | `pipeline.l2MinIntervalSeconds` | `900` | Minimum interval between two L2 passes within the same session |
 | `recall.timeoutMs` | `5000` | Recall timeout; on timeout, skip injection without blocking the conversation |
+| `recall.rerank.enabled` | `false` | Optional remote rerank for L1 recall candidates; falls back to the original order on timeout or API failure |
 | `extraction.enableDedup` | `true` | L1 vector dedup / conflict detection |
 | `capture.excludeAgents` | `[]` | Glob patterns to exclude specific agents (e.g. `bench-judge-*`) |
 | `capture.l0l1RetentionDays` | `0` | Local retention days for L0 / L1 files; `0` = never clean up |
@@ -291,6 +292,7 @@ docker exec -it hermes-memory hermes
 For all fields, types, and constraints see [`openclaw.plugin.json`](./openclaw.plugin.json)。
 
 - `embedding.*` — remote embedding service (OpenAI-compatible API)
+- `recall.rerank.*` — remote rerank service compatible with `/rerank` APIs
 - `llm.*` — standalone LLM mode (bypass OpenClaw's built-in model and run L1/L2/L3 with a designated API)
 - `offload.backendUrl / backendApiKey` — offload the L1/L1.5/L2/L4 flow to a backend service
 - `report.*` — metrics reporting

diff --git a/README_CN.md b/README_CN.md
@@ -279,6 +279,7 @@ docker exec -it hermes-memory hermes
 | `pipeline.l1IdleTimeoutSeconds` | `600` | 用户停止对话多久后触发 L1 |
 | `pipeline.l2MinIntervalSeconds` | `900` | 同 session 两次 L2 之间的最小间隔 |
 | `recall.timeoutMs` | `5000` | 召回超时阈值，超时跳过注入不阻塞对话 |
+| `recall.rerank.enabled` | `false` | 可选远程 rerank，用于重排 L1 召回候选；超时或 API 失败时回退原排序 |
 | `extraction.enableDedup` | `true` | L1 向量去重 / 冲突检测 |
 | `capture.excludeAgents` | `[]` | Glob 模式排除特定 Agent（如 `bench-judge-*`） |
 | `capture.l0l1RetentionDays` | `0` | L0/L1 本地文件保留天数，`0` = 永不清理 |
@@ -295,6 +296,7 @@ docker exec -it hermes-memory hermes
 完整字段、类型、约束见 [`openclaw.plugin.json`](./openclaw.plugin.json) 。
 
 - `embedding.*` — 远程 embedding 服务（OpenAI 兼容 API）
+- `recall.rerank.*` — 兼容 `/rerank` API 的远程 rerank 服务
 - `llm.*` — 独立 LLM 模式（绕过 OpenClaw 内置模型，用指定 API 跑 L1/L2/L3）
 - `offload.backendUrl / backendApiKey` — 将 L1/L1.5/L2/L4 offload 流程卸载到后端服务
 - `report.*` — 指标上报

diff --git a/openclaw.plugin.json b/openclaw.plugin.json
@@ -71,7 +71,19 @@
           "maxResults": { "type": "number", "default": 5, "description": "召回最大结果数" },
           "scoreThreshold": { "type": "number", "default": 0.3, "description": "最低分数阈值" },
           "strategy": { "type": "string", "enum": ["embedding", "keyword", "hybrid"], "default": "hybrid", "description": "搜索策略：keyword(关键词)、embedding(向量)、hybrid(混合RRF融合，推荐)" },
-          "timeoutMs": { "type": "number", "default": 5000, "description": "召回整体超时（毫秒），超时后跳过记忆注入并打印警告日志" }
+          "timeoutMs": { "type": "number", "default": 5000, "description": "召回整体超时（毫秒），超时后跳过记忆注入并打印警告日志" },
+          "rerank": {
+            "type": "object",
+            "description": "远程 rerank 设置。默认关闭；开启后会先多召回候选，再调用兼容 /rerank 的云端 API 重排，失败时自动回退原排序",
+            "properties": {
+              "enabled": { "type": "boolean", "default": false, "description": "是否启用远程 rerank" },
+              "baseUrl": { "type": "string", "description": "Rerank API Base URL；如果未以 /rerank 结尾，会自动追加 /rerank" },
+              "apiKey": { "type": "string", "description": "Rerank API Key" },
+              "model": { "type": "string", "description": "Rerank 模型名称" },
+              "timeoutMs": { "type": "number", "default": 3000, "description": "Rerank 请求超时（毫秒），超时后回退原排序" },
+              "candidateMultiplier": { "type": "number", "default": 3, "description": "Rerank 前候选召回倍数，相对于 recall.maxResults；范围建议 1-10" }
+            }
+          }
         }
       },
       "embedding": {

diff --git a/src/config.ts b/src/config.ts
@@ -75,6 +75,21 @@ export interface PipelineTriggerConfig {
 }
 
 /** Recall settings — controls memory retrieval for context injection. */
+export interface RecallRerankConfig {
+  /** Enable remote rerank for L1 recall candidates (default: false). */
+  enabled: boolean;
+  /** OpenAI-compatible rerank API base URL. "/rerank" is appended when omitted. */
+  baseUrl?: string;
+  /** API key for the rerank provider. */
+  apiKey?: string;
+  /** Rerank model name. */
+  model?: string;
+  /** Request timeout in milliseconds (default: 3000). */
+  timeoutMs: number;
+  /** Candidate multiplier before rerank, relative to recall.maxResults (default: 3). */
+  candidateMultiplier: number;
+}
+
 export interface RecallConfig {
   /** Enable auto-recall (default: true) */
   enabled: boolean;
@@ -86,6 +101,8 @@ export interface RecallConfig {
   strategy: "embedding" | "keyword" | "hybrid";
   /** Overall recall timeout in milliseconds (default: 5000). When exceeded, recall is skipped with a warning. */
   timeoutMs: number;
+  /** Optional remote rerank configuration. Disabled by default. */
+  rerank: RecallRerankConfig;
 }
 
 /** Embedding service configuration for vector search. */
@@ -322,6 +339,7 @@ export function parseConfig(raw: Record<string, unknown> | undefined): MemoryTda
 
   // --- Recall ---
   const recallGroup = obj(c, "recall");
+  const rerankGroup = obj(recallGroup, "rerank");
 
   // --- Embedding ---
   const embeddingGroup = obj(c, "embedding");
@@ -489,6 +507,14 @@ export function parseConfig(raw: Record<string, unknown> | undefined): MemoryTda
       scoreThreshold: num(recallGroup, "scoreThreshold") ?? 0.3,
       strategy: validateStrategy(str(recallGroup, "strategy")) ?? "hybrid",
       timeoutMs: num(recallGroup, "timeoutMs") ?? 5000,
+      rerank: {
+        enabled: bool(rerankGroup, "enabled") ?? false,
+        baseUrl: str(rerankGroup, "baseUrl"),
+        apiKey: str(rerankGroup, "apiKey"),
+        model: str(rerankGroup, "model"),
+        timeoutMs: num(rerankGroup, "timeoutMs") ?? 3000,
+        candidateMultiplier: num(rerankGroup, "candidateMultiplier") ?? 3,
+      },
     },
     embedding: {
       enabled: embeddingEnabled,

diff --git a/src/core/hooks/auto-recall.rerank.test.ts b/src/core/hooks/auto-recall.rerank.test.ts
@@ -0,0 +1,101 @@
+import { mkdtempSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import path from "node:path";
+import { afterEach, describe, expect, it, vi } from "vitest";
+
+import { parseConfig } from "../../config.js";
+import type { IMemoryStore, L1FtsResult } from "../store/types.js";
+import { performAutoRecall } from "./auto-recall.js";
+
+describe("performAutoRecall rerank", () => {
+  let dataDir: string | undefined;
+
+  afterEach(() => {
+    vi.unstubAllGlobals();
+    if (dataDir) {
+      rmSync(dataDir, { recursive: true, force: true });
+      dataDir = undefined;
+    }
+  });
+
+  it("reranks over-retrieved L1 candidates before injecting top results", async () => {
+    dataDir = mkdtempSync(path.join(tmpdir(), "memory-tdai-rerank-"));
+    vi.stubGlobal("fetch", vi.fn(async () => new Response(
+      JSON.stringify({
+        results: [
+          { index: 2, relevance_score: 0.95 },
+          { index: 0, relevance_score: 0.52 },
+        ],
+      }),
+      { status: 200 },
+    )));
+
+    const store = {
+      isFtsAvailable: () => true,
+      searchL1Fts: vi.fn(async (_query: string, limit?: number): Promise<L1FtsResult[]> => {
+        expect(limit).toBeGreaterThanOrEqual(6);
+        return [
+          makeFtsResult("a", "无关的天气记录", 0.9),
+          makeFtsResult("b", "用户喜欢 Python", 0.89),
+          makeFtsResult("c", "用户明确偏好 TypeScript", 0.88),
+        ];
+      }),
+    } as unknown as IMemoryStore;
+
+    const cfg = parseConfig({
+      recall: {
+        strategy: "keyword",
+        maxResults: 2,
+        scoreThreshold: 0,
+        rerank: {
+          enabled: true,
+          baseUrl: "https://api.example.com/v1",
+          apiKey: "test-key",
+          model: "bge-reranker-v2-m3",
+          candidateMultiplier: 3,
+        },
+      },
+    });
+
+    const result = await performAutoRecall({
+      userText: "TypeScript 偏好",
+      actorId: "user",
+      sessionKey: "session",
+      cfg,
+      pluginDataDir: dataDir,
+      vectorStore: store,
+    });
+
+    const injected = extractRelevantMemoryLines(result?.prependContext);
+    expect(injected).toContain("用户明确偏好 TypeScript");
+    expect(injected).toContain("无关的天气记录");
+    expect(injected).not.toContain("用户喜欢 Python");
+    expect(injected.indexOf("用户明确偏好 TypeScript")).toBeLessThan(
+      injected.indexOf("无关的天气记录"),
+    );
+  });
+});
+
+function makeFtsResult(id: string, content: string, score: number): L1FtsResult {
+  return {
+    record_id: id,
+    content,
+    type: "episodic",
+    priority: 80,
+    scene_name: "test",
+    score,
+    timestamp_str: "",
+    timestamp_start: "",
+    timestamp_end: "",
+    session_key: "session",
+    session_id: "session-1",
+    metadata_json: "{}",
+  };
+}
+
+function extractRelevantMemoryLines(prependContext: string | undefined): string {
+  const match = prependContext?.match(
+    /<relevant-memories>[\s\S]*?\n\n([\s\S]*?)\n<\/relevant-memories>/,
+  );
+  return match?.[1] ?? "";
+}
diff --git a/src/core/hooks/auto-recall.ts b/src/core/hooks/auto-recall.ts
@@ -20,6 +20,7 @@ import type { IMemoryStore, L1SearchResult, L1FtsResult } from "../store/types.j
 import { buildFtsQuery } from "../store/sqlite.js";
 import type { EmbeddingService, EmbeddingCallOptions } from "../store/embedding.js";
 import { sanitizeText } from "../../utils/sanitize.js";
+import { getRerankCandidateLimit, rerankTextCandidates } from "../recall/reranker.js";
 
 const TAG = "[memory-tdai] [recall]";
 
@@ -331,7 +332,8 @@ async function searchMemories(
     );
   }
 
-  const maxResults = cfg.recall.maxResults ?? 5;
+  const maxResults = normalizePositiveInt(cfg.recall.maxResults, 5);
+  const candidateLimit = getRerankCandidateLimit(maxResults, cfg.recall.rerank);
   const threshold = cfg.recall.scoreThreshold ?? 0.3;
 
   const embeddingAvailable = !!vectorStore && !!embeddingService;
@@ -340,7 +342,7 @@ async function searchMemories(
     `${TAG} [searchMemories] strategy=${strategy}, embeddingAvailable=${embeddingAvailable}, ` +
     `vectorStore=${vectorStore ? "available" : "UNAVAILABLE"}, ` +
     `embeddingService=${embeddingService ? "available" : "UNAVAILABLE"}, ` +
-    `maxResults=${maxResults}, threshold=${threshold}`,
+    `maxResults=${maxResults}, candidateLimit=${candidateLimit}, threshold=${threshold}`,
   );
 
   // Determine effective strategy (fall back to keyword if embedding not available)
@@ -362,36 +364,78 @@ async function searchMemories(
   try {
     if (effectiveStrategy === "keyword") {
       const tFts = performance.now();
-      const lines = await searchByKeyword(cleanText, pluginDataDir, maxResults, threshold, logger, vectorStore);
-      return { lines, timing: { ftsMs: performance.now() - tFts, embeddingMs: 0, ftsHits: lines.length, embeddingHits: 0 } };
+      const lines = await searchByKeyword(cleanText, pluginDataDir, candidateLimit, threshold, logger, vectorStore);
+      return await finalizeSearchResult(
+        { lines, timing: { ftsMs: performance.now() - tFts, embeddingMs: 0, ftsHits: lines.length, embeddingHits: 0 } },
+        cleanText,
+        cfg,
+        logger,
+        maxResults,
+      );
     }
 
     if (effectiveStrategy === "embedding") {
       const tEmb = performance.now();
-      const lines = await searchByEmbedding(cleanText, maxResults, threshold, vectorStore!, embeddingService!, logger, embeddingCallOpts);
-      return { lines, timing: { ftsMs: 0, embeddingMs: performance.now() - tEmb, ftsHits: 0, embeddingHits: lines.length } };
+      const lines = await searchByEmbedding(cleanText, candidateLimit, threshold, vectorStore!, embeddingService!, logger, embeddingCallOpts);
+      return await finalizeSearchResult(
+        { lines, timing: { ftsMs: 0, embeddingMs: performance.now() - tEmb, ftsHits: 0, embeddingHits: lines.length } },
+        cleanText,
+        cfg,
+        logger,
+        maxResults,
+      );
     }
 
     // Hybrid: if the store natively supports hybrid search (e.g. TCVDB does
     // server-side dense + sparse + RRF in a single API call), short-circuit
     // to avoid a redundant second HTTP request and a wasted local embed().
     if (vectorStore?.getCapabilities().nativeHybridSearch) {
       const tNative = performance.now();
-      const results = await vectorStore.searchL1Hybrid({ query: cleanText, topK: maxResults });
+      const results = await vectorStore.searchL1Hybrid({ query: cleanText, topK: candidateLimit });
       const nativeMs = performance.now() - tNative;
       logger?.debug?.(`${TAG} [hybrid-native] Single-call hybrid: ${results.length} results in ${nativeMs.toFixed(0)}ms`);
       const lines = results.map((r) => formatMemoryLine(vectorResultToFormatable(r)));
-      return { lines, timing: { ftsMs: 0, embeddingMs: nativeMs, ftsHits: 0, embeddingHits: results.length } };
+      return await finalizeSearchResult(
+        { lines, timing: { ftsMs: 0, embeddingMs: nativeMs, ftsHits: 0, embeddingHits: results.length } },
+        cleanText,
+        cfg,
+        logger,
+        maxResults,
+      );
     }
 
     // Fallback: run keyword + embedding in parallel, merge with client-side RRF (SQLite path)
-    return await searchHybrid(cleanText, pluginDataDir, maxResults, threshold, vectorStore!, embeddingService!, logger, embeddingCallOpts);
+    return await finalizeSearchResult(
+      await searchHybrid(cleanText, pluginDataDir, candidateLimit, threshold, vectorStore!, embeddingService!, logger, embeddingCallOpts),
+      cleanText,
+      cfg,
+      logger,
+      maxResults,
+    );
   } catch (err) {
     logger?.warn?.(`${TAG} Memory search failed (strategy=${effectiveStrategy}): ${err instanceof Error ? err.message : String(err)}`);
     return emptyResult;
   }
 }
 
+async function finalizeSearchResult(
+  result: SearchResult,
+  query: string,
+  cfg: MemoryTdaiConfig,
+  logger: Logger | undefined,
+  maxResults: number,
+): Promise<SearchResult> {
+  if (result.lines.length === 0) return result;
+  const lines = await rerankTextCandidates({
+    query,
+    documents: result.lines,
+    topN: maxResults,
+    config: cfg.recall.rerank,
+    logger,
+  });
+  return { ...result, lines };
+}
+
 // ============================
 // Strategy: Keyword (FTS5 BM25, no in-memory fallback)
 // ============================
@@ -725,6 +769,11 @@ function formatTimestamp(ts: string | undefined): string | undefined {
   return `${datePart} ${timePart}`;
 }
 
+function normalizePositiveInt(value: number | undefined, fallback: number): number {
+  if (value == null || !Number.isFinite(value) || value <= 0) return fallback;
+  return Math.floor(value);
+}
+
 /**
  * Build a FormatableMemory from a full MemoryRecord (keyword search path).
  * Handles empty metadata, empty timestamps array gracefully.