diff --git a/packages/adapter-openclaw/src/ChatTurnWriter.ts b/packages/adapter-openclaw/src/ChatTurnWriter.ts index 1f4556543..83242c388 100644 --- a/packages/adapter-openclaw/src/ChatTurnWriter.ts +++ b/packages/adapter-openclaw/src/ChatTurnWriter.ts @@ -739,6 +739,7 @@ export class ChatTurnWriter { const sessionId = this.deriveSessionId(ctx); if (!sessionId) return; const identity = this.identityFieldsFromPayload(ctx); + const stripChannelMetadataFromUserText = this.shouldStripChannelMetadataForChannel(identity.channelId); const externalCursorKey = this.externalCursorKeyFromSessionKey(identity.sessionKey); const typedW4bCursorKeys = this.typedW4bMarkerCursorKeys(identity); const w4bInflightSessionIds = this.w4bInflightGuardSessionIds(identity, sessionId); @@ -769,6 +770,7 @@ export class ChatTurnWriter { typedW4bCursorKeys, w4bInflightSessionIds, w4aCrossPathSessionIds, + stripChannelMetadataFromUserText, ); }); this.w4aSessionChains.set(sessionId, work); @@ -791,6 +793,7 @@ export class ChatTurnWriter { typedW4bCursorKeys: string[] = [], w4bInflightSessionIds: string[] = [sessionId], w4aCrossPathSessionIds: string[] = [sessionId], + stripChannelMetadataFromUserText = false, ): Promise { try { // R18.2 — Take the MAX of W4a's pair-indexed watermark and W4b's @@ -804,7 +807,7 @@ export class ChatTurnWriter { const w4aWatermark = this.loadWatermark(sessionId); const w4bCount = this.w4bSessionCounts.get(sessionId) ?? 0; const savedUpTo = Math.max(w4aWatermark, w4bCount - 1); - const pairs = this.computeDelta(event.messages, savedUpTo); + const pairs = this.computeDelta(event.messages, savedUpTo, stripChannelMetadataFromUserText); if (pairs.length === 0) return; // T362 — Cold-start clamp. When no prior watermark exists for this // session (savedUpTo === -1) AND `messages[]` carries more than one @@ -1440,16 +1443,19 @@ export class ChatTurnWriter { void pendingReset.then(() => this.onMessageReceived(ev)).catch(() => undefined); return; } - const text = readEventText(ev); + const rawText = readEventText(ev); + const text = this.shouldStripChannelMetadataForChannel(channelId) + ? this.stripChannelMetadata(rawText) + : rawText; // R15.2 — Skip attachment-only / non-text inbound events. `readEventText` // returns "" when the envelope carries no text payload (e.g. an image // upload from Telegram). Enqueueing an empty string here would let the // next `message:sent` pair its assistant reply with a blank user side, // persisting an assistant-only turn for a conversation that had no - // textual inbound. Drop until we add a recoverable representation for - // attachment-only turns. + // textual inbound. Leading channel metadata that strips to empty is + // treated the same way because it is runtime-only context, not user text. if (!text) return; - const inboundDedupKey = this.messageHookDedupKey("inbound", ev, text); + const inboundDedupKey = this.messageHookDedupKey("inbound", ev, rawText); if (inboundDedupKey) { const existingConversationKey = this.messageHookInboundQueueKeys.get(inboundDedupKey); if (existingConversationKey && existingConversationKey !== conversationKey) { @@ -1514,9 +1520,10 @@ export class ChatTurnWriter { // Strip injected `` from assistant text — the model may // echo the auto-recall block, and if we persist the raw version here // while the W4a path persists the stripped version, the two turnIds - // diverge and cross-path dedup misses. User text is NOT stripped: - // legitimate pastes (XML, logs) containing the tag would otherwise be - // silently corrupted. + // diverge and cross-path dedup misses. Telegram user text was already + // normalized at enqueue time by `stripChannelMetadata`, and user text is + // not passed through `stripRecalledMemory`: legitimate user pastes + // containing XML/log tags must survive verbatim. const assistantText = this.stripRecalledMemory(readEventText(ev)); // R20.1 — Compute `assistantText` BEFORE consuming the pending user. // A `message:sent` with `success: true` but no textual content @@ -2288,6 +2295,7 @@ export class ChatTurnWriter { private computeDelta( messages: ChatTurnMessage[], savedUpTo: number, + stripChannelMetadataFromUserText = false, ): ComputedChatTurnPair[] { const pairs: ComputedChatTurnPair[] = []; // R19.1 — Queue of unmatched user messages. Two transcript shapes @@ -2351,7 +2359,10 @@ export class ChatTurnWriter { // that semantic in `computeDelta` or it produces an // assistant-only pair (`{ user: "", assistant: reply }`) // for any image-only user message followed by a reply. - const userText = this.extractText(msg.content); + const extractedUserText = this.extractText(msg.content); + const userText = stripChannelMetadataFromUserText + ? this.stripChannelMetadata(extractedUserText) + : extractedUserText; if (userText) { pendingUsers.push({ text: userText, @@ -2437,6 +2448,50 @@ export class ChatTurnWriter { ); } + /** + * Strip leading runtime-only channel metadata blocks from persisted user text. + * + * OpenClaw's Telegram channel plugin can prepend fenced JSON context for the + * agent (Telegram conversation/sender details). + * That metadata is useful before the model responds, but persisting it as user + * text pollutes recall and may leak sender/chat identifiers. + * + * Call this only after trusted channel context says the source is Telegram; + * the labels below are user-writable text without that channel context. Keep + * the recognized labels to the concrete Telegram wrapper shape; broader + * channel/message labels need their own trusted source before they can be + * stripped safely. Only contiguous, non-repeated labels at the very start are + * removed so a user-pasted metadata example after the runtime wrapper remains + * verbatim. Separator blank lines after stripped blocks are removed, but + * indentation on the first real user line is kept. + * + * Because W4a and W4b both call this before turnId/content hashing, metadata + * changes do not create distinct persisted turn identities for the same user + * utterance. Existing historical turns are not rewritten. + */ + private shouldStripChannelMetadataForChannel(channelId?: unknown): boolean { + return typeof channelId === "string" && channelId.trim().toLowerCase() === "telegram"; + } + + private stripChannelMetadata(text: string): string { + if (!text) return ""; + const metadataBlock = + /^(Conversation info|Sender) \(untrusted metadata\):[ \t]*\r?\n[ \t]*```json[ \t]*\r?\n[\s\S]*?\r?\n[ \t]*```[ \t]*(?:[ \t]*\r?\n)*/; + let out = text; + let stripped = false; + const strippedLabels = new Set(); + while (true) { + const match = metadataBlock.exec(out); + if (!match) break; + const label = match[1]; + if (strippedLabels.has(label)) break; + strippedLabels.add(label); + stripped = true; + out = out.slice(match[0].length); + } + return stripped ? out : text; + } + /** * Strip the auto-injected `` block from assistant text * before persistence. Prevents the per-turn auto-recall block from diff --git a/packages/adapter-openclaw/test/ChatTurnWriter.test.ts b/packages/adapter-openclaw/test/ChatTurnWriter.test.ts index 8afb10f44..156a31274 100644 --- a/packages/adapter-openclaw/test/ChatTurnWriter.test.ts +++ b/packages/adapter-openclaw/test/ChatTurnWriter.test.ts @@ -8,6 +8,46 @@ import type { AgentEndContext, InternalMessageEvent } from "../src/ChatTurnWrite /** Wait long enough for fire-and-forget persistOne() to complete. */ const flushMicrotasks = () => new Promise((r) => setTimeout(r, 20)); +const conversationInfoMetadataBlock = [ + "Conversation info (untrusted metadata):", + "```json", + "{", + " \"chat_id\": \"telegram:1417780778\",", + " \"message_id\": \"1021\",", + " \"sender_id\": \"1417780778\",", + " \"sender\": \"Jurij\",", + " \"timestamp\": \"Mon 2026-05-04 13:08 GMT+2\"", + "}", + "```", +].join("\n"); + +const senderMetadataBlock = [ + "Sender (untrusted metadata):", + "```json", + "{", + " \"label\": \"Jurij (1417780778)\",", + " \"id\": \"1417780778\",", + " \"name\": \"Jurij\",", + " \"username\": \"Jurij_89\"", + "}", + "```", +].join("\n"); + +const channelContextMetadataBlock = [ + "Channel context (untrusted metadata):", + "```json", + "{", + " \"example\": \"user-pasted channel context\"", + "}", + "```", +].join("\n"); + +function telegramWrappedUserText(userText: string, opts: { sender?: boolean } = {}): string { + const blocks = [conversationInfoMetadataBlock]; + if (opts.sender !== false) blocks.push(senderMetadataBlock); + return [...blocks, userText].join("\n\n"); +} + describe("ChatTurnWriter", () => { let writer: ChatTurnWriter; let mockClient: { storeChatTurn: ReturnType }; @@ -556,6 +596,193 @@ describe("ChatTurnWriter", () => { expect(call[2]).toBe("answer text"); }); + it("T380 - W4a strips leading Conversation info and Sender metadata from persisted user text", async () => { + const event: AgentEndContext = { + sessionId: "test", + messages: [ + { role: "user", content: telegramWrappedUserText("hello") }, + { role: "assistant", content: "reply" }, + ], + }; + + writer.onAgentEnd(event, { channelId: "telegram", sessionKey: "sk" }); + await flushMicrotasks(); + + const [, persistedUser] = mockClient.storeChatTurn.mock.calls[0]; + expect(persistedUser).toBe("hello"); + expect(persistedUser).not.toContain("Conversation info"); + expect(persistedUser).not.toContain("Sender"); + expect(persistedUser).not.toContain("chat_id"); + expect(persistedUser).not.toContain("sender_id"); + expect(persistedUser).not.toContain("username"); + expect(persistedUser).not.toContain("timestamp"); + }); + + it("T380 - W4a strips leading Conversation info-only metadata", async () => { + const event: AgentEndContext = { + sessionId: "test", + messages: [ + { role: "user", content: telegramWrappedUserText("hello", { sender: false }) }, + { role: "assistant", content: "reply" }, + ], + }; + + writer.onAgentEnd(event, { channelId: "telegram", sessionKey: "sk" }); + await flushMicrotasks(); + + const [, persistedUser] = mockClient.storeChatTurn.mock.calls[0]; + expect(persistedUser).toBe("hello"); + }); + + it("T380 - W4a leaves pure user text unchanged", async () => { + const pureUserText = " plain user text without channel metadata"; + const event: AgentEndContext = { + sessionId: "test", + messages: [ + { role: "user", content: pureUserText }, + { role: "assistant", content: "reply" }, + ], + }; + + writer.onAgentEnd(event, { channelId: "telegram", sessionKey: "sk" }); + await flushMicrotasks(); + + const [, persistedUser] = mockClient.storeChatTurn.mock.calls[0]; + expect(persistedUser).toBe(pureUserText); + }); + + it("T380 - W4a preserves multi-line user text after leading metadata blocks", async () => { + const actualUserText = "hello\nline two\n\nline four"; + const event: AgentEndContext = { + sessionId: "test", + messages: [ + { role: "user", content: telegramWrappedUserText(actualUserText) }, + { role: "assistant", content: "reply" }, + ], + }; + + writer.onAgentEnd(event, { channelId: "telegram", sessionKey: "sk" }); + await flushMicrotasks(); + + const [, persistedUser] = mockClient.storeChatTurn.mock.calls[0]; + expect(persistedUser).toBe(actualUserText); + }); + + it("T380 - W4a preserves indentation on the first user line after metadata blocks", async () => { + const actualUserText = " const answer = 42;\n return answer;"; + const event: AgentEndContext = { + sessionId: "test", + messages: [ + { role: "user", content: telegramWrappedUserText(actualUserText) }, + { role: "assistant", content: "reply" }, + ], + }; + + writer.onAgentEnd(event, { channelId: "telegram", sessionKey: "sk" }); + await flushMicrotasks(); + + const [, persistedUser] = mockClient.storeChatTurn.mock.calls[0]; + expect(persistedUser).toBe(actualUserText); + }); + + it("T380 - W4a preserves metadata-shaped text pasted in the middle", async () => { + const pastedMetadata = [ + "Please inspect this payload:", + conversationInfoMetadataBlock, + "The block above is part of my message.", + ].join("\n"); + const event: AgentEndContext = { + sessionId: "test", + messages: [ + { role: "user", content: pastedMetadata }, + { role: "assistant", content: "reply" }, + ], + }; + + writer.onAgentEnd(event, { channelId: "telegram", sessionKey: "sk" }); + await flushMicrotasks(); + + const [, persistedUser] = mockClient.storeChatTurn.mock.calls[0]; + expect(persistedUser).toBe(pastedMetadata); + expect(persistedUser).toContain("Conversation info (untrusted metadata):"); + }); + + it("T380 - W4a preserves leading metadata-shaped text outside Telegram context", async () => { + const pastedMetadata = telegramWrappedUserText("This block is part of my message"); + const event: AgentEndContext = { + sessionId: "test", + messages: [ + { role: "user", content: pastedMetadata }, + { role: "assistant", content: "reply" }, + ], + }; + + writer.onAgentEnd(event, { channelId: "discord", sessionKey: "sk" }); + await flushMicrotasks(); + + const [, persistedUser] = mockClient.storeChatTurn.mock.calls[0]; + expect(persistedUser).toBe(pastedMetadata); + expect(persistedUser).toContain("Conversation info (untrusted metadata):"); + }); + + it("T380 - W4a preserves leading metadata-shaped user text after Telegram wrapper", async () => { + const actualUserText = [conversationInfoMetadataBlock, "This block is part of my message"].join("\n"); + const event: AgentEndContext = { + sessionId: "test", + messages: [ + { role: "user", content: telegramWrappedUserText(actualUserText) }, + { role: "assistant", content: "reply" }, + ], + }; + + writer.onAgentEnd(event, { channelId: "telegram", sessionKey: "sk" }); + await flushMicrotasks(); + + const [, persistedUser] = mockClient.storeChatTurn.mock.calls[0]; + expect(persistedUser).toBe(actualUserText); + expect(persistedUser).toContain("Conversation info (untrusted metadata):"); + }); + + it("T380 - W4a preserves non-Telegram metadata labels after Telegram wrapper", async () => { + const actualUserText = [channelContextMetadataBlock, "This block is part of my message"].join("\n"); + const event: AgentEndContext = { + sessionId: "test", + messages: [ + { role: "user", content: telegramWrappedUserText(actualUserText) }, + { role: "assistant", content: "reply" }, + ], + }; + + writer.onAgentEnd(event, { channelId: "telegram", sessionKey: "sk" }); + await flushMicrotasks(); + + const [, persistedUser] = mockClient.storeChatTurn.mock.calls[0]; + expect(persistedUser).toBe(actualUserText); + expect(persistedUser).toContain("Channel context (untrusted metadata):"); + }); + + it("T380 - W4a turnId hashing uses stripped user text", async () => { + const event: AgentEndContext = { + sessionId: "test", + messages: [ + { role: "user", content: telegramWrappedUserText("hello") }, + { role: "assistant", content: "reply" }, + ], + }; + + writer.onAgentEnd(event, { channelId: "telegram", sessionKey: "sk" }); + await flushMicrotasks(); + + const call = mockClient.storeChatTurn.mock.calls[0]; + expect(call[1]).toBe("hello"); + expect(call[3]?.turnId).toBe( + (writer as any).deterministicTurnId("openclaw:telegram:::sk", "hello", "reply", 0), + ); + expect(call[3]?.turnId).not.toBe( + (writer as any).deterministicTurnId("openclaw:telegram:::sk", telegramWrappedUserText("hello"), "reply", 0), + ); + }); + it("stores user message on onMessageReceived", () => { writer.onMessageReceived({ sessionKey: "session-123", @@ -580,6 +807,170 @@ describe("ChatTurnWriter", () => { expect(mockClient.storeChatTurn).toHaveBeenCalled(); }); + it("T380 - W4b onMessageReceived strips leading OpenClaw metadata before persist", async () => { + writer.onMessageReceived({ + sessionKey: "key123", + direction: "inbound", + text: telegramWrappedUserText("hello"), + ...({ context: { channelId: "telegram" } } as any), + } as any); + await writer.onMessageSent({ + sessionKey: "key123", + direction: "outbound", + text: "response", + ...({ context: { success: true, channelId: "telegram" } } as any), + } as any); + await flushMicrotasks(); + + const [, persistedUser] = mockClient.storeChatTurn.mock.calls[0]; + expect(persistedUser).toBe("hello"); + expect(persistedUser).not.toContain("chat_id"); + expect(persistedUser).not.toContain("username"); + }); + + it("T380 - W4b persists canonical internal Telegram envelopes and logs success", async () => { + writer.onMessageReceived({ + sessionKey: "key123", + direction: "inbound", + context: { + channelId: "telegram", + accountId: "bot", + conversationId: "telegram:1417780778", + content: telegramWrappedUserText("hello"), + messageId: "in-1021", + }, + } as any); + + await writer.onMessageSent({ + sessionKey: "key123", + direction: "outbound", + context: { + channelId: "telegram", + accountId: "bot", + conversationId: "telegram:1417780778", + content: "response", + success: true, + messageId: "out-1022", + }, + } as any); + await writer.flush(); + + expect(mockClient.storeChatTurn).toHaveBeenCalledTimes(1); + const [sessionId, persistedUser, persistedAssistant] = mockClient.storeChatTurn.mock.calls[0]; + expect(sessionId).toBe("openclaw:telegram:bot:telegram%3A1417780778:key123"); + expect(persistedUser).toBe("hello"); + expect(persistedAssistant).toBe("response"); + expect(mockLogger.info).toHaveBeenCalledWith( + expect.stringContaining("[ChatTurnWriter] Persisted turn"), + ); + }); + + it("T380 - W4b preserves leading metadata-shaped text outside Telegram context", async () => { + const pastedMetadata = telegramWrappedUserText("This block is part of my message"); + writer.onMessageReceived({ + sessionKey: "key123", + direction: "inbound", + text: pastedMetadata, + ...({ context: { channelId: "discord" } } as any), + } as any); + await writer.onMessageSent({ + sessionKey: "key123", + direction: "outbound", + text: "response", + ...({ context: { success: true, channelId: "discord" } } as any), + } as any); + await flushMicrotasks(); + + const [, persistedUser] = mockClient.storeChatTurn.mock.calls[0]; + expect(persistedUser).toBe(pastedMetadata); + expect(persistedUser).toContain("Conversation info (untrusted metadata):"); + }); + + it("T380 - W4b preserves leading metadata-shaped user text after Telegram wrapper", async () => { + const actualUserText = [conversationInfoMetadataBlock, "This block is part of my message"].join("\n"); + writer.onMessageReceived({ + sessionKey: "key123", + direction: "inbound", + text: telegramWrappedUserText(actualUserText), + ...({ context: { channelId: "telegram" } } as any), + } as any); + await writer.onMessageSent({ + sessionKey: "key123", + direction: "outbound", + text: "response", + ...({ context: { success: true, channelId: "telegram" } } as any), + } as any); + await flushMicrotasks(); + + const [, persistedUser] = mockClient.storeChatTurn.mock.calls[0]; + expect(persistedUser).toBe(actualUserText); + expect(persistedUser).toContain("Conversation info (untrusted metadata):"); + }); + + it("T380 - W4b preserves non-Telegram metadata labels after Telegram wrapper", async () => { + const actualUserText = [channelContextMetadataBlock, "This block is part of my message"].join("\n"); + writer.onMessageReceived({ + sessionKey: "key123", + direction: "inbound", + text: telegramWrappedUserText(actualUserText), + ...({ context: { channelId: "telegram" } } as any), + } as any); + await writer.onMessageSent({ + sessionKey: "key123", + direction: "outbound", + text: "response", + ...({ context: { success: true, channelId: "telegram" } } as any), + } as any); + await flushMicrotasks(); + + const [, persistedUser] = mockClient.storeChatTurn.mock.calls[0]; + expect(persistedUser).toBe(actualUserText); + expect(persistedUser).toContain("Channel context (untrusted metadata):"); + }); + + it("T380 - W4b strips each queued inbound before joining", async () => { + writer.onMessageReceived({ + sessionKey: "key123", + direction: "inbound", + text: telegramWrappedUserText("first"), + ...({ context: { channelId: "telegram", messageId: "in-1" } } as any), + } as any); + writer.onMessageReceived({ + sessionKey: "key123", + direction: "inbound", + text: telegramWrappedUserText("second"), + ...({ context: { channelId: "telegram", messageId: "in-2" } } as any), + } as any); + await writer.onMessageSent({ + sessionKey: "key123", + direction: "outbound", + text: "response", + ...({ context: { success: true, channelId: "telegram", messageId: "out-1" } } as any), + } as any); + await flushMicrotasks(); + + const [, persistedUser] = mockClient.storeChatTurn.mock.calls[0]; + expect(persistedUser).toBe("first\nsecond"); + }); + + it("T380 - typed Telegram W4b path strips leading metadata via normalization", async () => { + writer.onTypedMessageReceived( + { + from: "user-1", + content: telegramWrappedUserText("hello"), + metadata: { chatId: "chat-1", messageId: "typed-in-1" }, + }, + { channelId: "telegram", accountId: "bot", conversationId: "chat-1" }, + ); + await writer.onTypedMessageSent( + { to: "user-1", content: "typed response", success: true, metadata: { messageId: "typed-out-1" } }, + { channelId: "telegram", accountId: "bot", conversationId: "chat-1" }, + ); + + const [, persistedUser] = mockClient.storeChatTurn.mock.calls[0]; + expect(persistedUser).toBe("hello"); + }); + it("T359 - typed message hooks persist one Telegram turn without internal sessionKey", async () => { writer.onTypedMessageReceived( { from: "user-1", content: "typed hello", metadata: { messageId: "typed-in-1" } },