Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 64 additions & 9 deletions packages/adapter-openclaw/src/ChatTurnWriter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -739,6 +739,7 @@ export class ChatTurnWriter {
const sessionId = this.deriveSessionId(ctx);
if (!sessionId) return;
const identity = this.identityFieldsFromPayload(ctx);
const stripChannelMetadataFromUserText = this.shouldStripChannelMetadataForChannel(identity.channelId);
const externalCursorKey = this.externalCursorKeyFromSessionKey(identity.sessionKey);
const typedW4bCursorKeys = this.typedW4bMarkerCursorKeys(identity);
const w4bInflightSessionIds = this.w4bInflightGuardSessionIds(identity, sessionId);
Expand Down Expand Up @@ -769,6 +770,7 @@ export class ChatTurnWriter {
typedW4bCursorKeys,
w4bInflightSessionIds,
w4aCrossPathSessionIds,
stripChannelMetadataFromUserText,
);
});
this.w4aSessionChains.set(sessionId, work);
Expand All @@ -791,6 +793,7 @@ export class ChatTurnWriter {
typedW4bCursorKeys: string[] = [],
w4bInflightSessionIds: string[] = [sessionId],
w4aCrossPathSessionIds: string[] = [sessionId],
stripChannelMetadataFromUserText = false,
): Promise<void> {
try {
// R18.2 — Take the MAX of W4a's pair-indexed watermark and W4b's
Expand All @@ -804,7 +807,7 @@ export class ChatTurnWriter {
const w4aWatermark = this.loadWatermark(sessionId);
const w4bCount = this.w4bSessionCounts.get(sessionId) ?? 0;
const savedUpTo = Math.max(w4aWatermark, w4bCount - 1);
const pairs = this.computeDelta(event.messages, savedUpTo);
const pairs = this.computeDelta(event.messages, savedUpTo, stripChannelMetadataFromUserText);
if (pairs.length === 0) return;
// T362 — Cold-start clamp. When no prior watermark exists for this
// session (savedUpTo === -1) AND `messages[]` carries more than one
Expand Down Expand Up @@ -1440,16 +1443,19 @@ export class ChatTurnWriter {
void pendingReset.then(() => this.onMessageReceived(ev)).catch(() => undefined);
return;
}
const text = readEventText(ev);
const rawText = readEventText(ev);
const text = this.shouldStripChannelMetadataForChannel(channelId)
? this.stripChannelMetadata(rawText)
: rawText;
// R15.2 — Skip attachment-only / non-text inbound events. `readEventText`
// returns "" when the envelope carries no text payload (e.g. an image
// upload from Telegram). Enqueueing an empty string here would let the
// next `message:sent` pair its assistant reply with a blank user side,
// persisting an assistant-only turn for a conversation that had no
// textual inbound. Drop until we add a recoverable representation for
// attachment-only turns.
// textual inbound. Leading channel metadata that strips to empty is
// treated the same way because it is runtime-only context, not user text.
if (!text) return;
const inboundDedupKey = this.messageHookDedupKey("inbound", ev, text);
const inboundDedupKey = this.messageHookDedupKey("inbound", ev, rawText);
if (inboundDedupKey) {
const existingConversationKey = this.messageHookInboundQueueKeys.get(inboundDedupKey);
if (existingConversationKey && existingConversationKey !== conversationKey) {
Expand Down Expand Up @@ -1514,9 +1520,10 @@ export class ChatTurnWriter {
// Strip injected `<recalled-memory>` from assistant text — the model may
// echo the auto-recall block, and if we persist the raw version here
// while the W4a path persists the stripped version, the two turnIds
// diverge and cross-path dedup misses. User text is NOT stripped:
// legitimate pastes (XML, logs) containing the tag would otherwise be
// silently corrupted.
// diverge and cross-path dedup misses. Telegram user text was already
// normalized at enqueue time by `stripChannelMetadata`, and user text is
// not passed through `stripRecalledMemory`: legitimate user pastes
// containing XML/log tags must survive verbatim.
const assistantText = this.stripRecalledMemory(readEventText(ev));
// R20.1 — Compute `assistantText` BEFORE consuming the pending user.
// A `message:sent` with `success: true` but no textual content
Expand Down Expand Up @@ -2288,6 +2295,7 @@ export class ChatTurnWriter {
private computeDelta(
messages: ChatTurnMessage[],
savedUpTo: number,
stripChannelMetadataFromUserText = false,
): ComputedChatTurnPair[] {
const pairs: ComputedChatTurnPair[] = [];
// R19.1 — Queue of unmatched user messages. Two transcript shapes
Expand Down Expand Up @@ -2351,7 +2359,10 @@ export class ChatTurnWriter {
// that semantic in `computeDelta` or it produces an
// assistant-only pair (`{ user: "", assistant: reply }`)
// for any image-only user message followed by a reply.
const userText = this.extractText(msg.content);
const extractedUserText = this.extractText(msg.content);
const userText = stripChannelMetadataFromUserText
? this.stripChannelMetadata(extractedUserText)
: extractedUserText;
if (userText) {
pendingUsers.push({
text: userText,
Expand Down Expand Up @@ -2437,6 +2448,50 @@ export class ChatTurnWriter {
);
}

/**
* Strip leading runtime-only channel metadata blocks from persisted user text.
*
* OpenClaw's Telegram channel plugin can prepend fenced JSON context for the
* agent (Telegram conversation/sender details).
* That metadata is useful before the model responds, but persisting it as user
* text pollutes recall and may leak sender/chat identifiers.
*
* Call this only after trusted channel context says the source is Telegram;
* the labels below are user-writable text without that channel context. Keep
* the recognized labels to the concrete Telegram wrapper shape; broader
* channel/message labels need their own trusted source before they can be
* stripped safely. Only contiguous, non-repeated labels at the very start are
* removed so a user-pasted metadata example after the runtime wrapper remains
* verbatim. Separator blank lines after stripped blocks are removed, but
* indentation on the first real user line is kept.
*
* Because W4a and W4b both call this before turnId/content hashing, metadata
* changes do not create distinct persisted turn identities for the same user
* utterance. Existing historical turns are not rewritten.
*/
private shouldStripChannelMetadataForChannel(channelId?: unknown): boolean {
return typeof channelId === "string" && channelId.trim().toLowerCase() === "telegram";
}

private stripChannelMetadata(text: string): string {
if (!text) return "";
const metadataBlock =
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 Bug: This heuristic strips any Telegram message that starts with one of these fenced blocks, even if the user authored it intentionally. A user debugging Telegram metadata and pasting a single Conversation info ... block as the first part of their message will have that content silently removed from persistence. Please gate stripping on a trusted adapter signal (or validate the parsed block against envelope metadata) instead of matching only on user-writable text.

/^(Conversation info|Sender) \(untrusted metadata\):[ \t]*\r?\n[ \t]*```json[ \t]*\r?\n[\s\S]*?\r?\n[ \t]*```[ \t]*(?:[ \t]*\r?\n)*/;
Comment thread
Jurij89 marked this conversation as resolved.
let out = text;
let stripped = false;
const strippedLabels = new Set<string>();
while (true) {
const match = metadataBlock.exec(out);
if (!match) break;
const label = match[1];
if (strippedLabels.has(label)) break;
strippedLabels.add(label);
stripped = true;
out = out.slice(match[0].length);
}
return stripped ? out : text;
}

/**
* Strip the auto-injected `<recalled-memory>` block from assistant text
* before persistence. Prevents the per-turn auto-recall block from
Expand Down
Loading
Loading