diff --git a/.changeset/telegram-markdown-v2-escape.md b/.changeset/telegram-markdown-v2-escape.md new file mode 100644 index 00000000..02e4dabb --- /dev/null +++ b/.changeset/telegram-markdown-v2-escape.md @@ -0,0 +1,16 @@ +--- +"@chat-adapter/telegram": minor +--- + +Fix Telegram rejecting messages with `Bad Request: can't parse entities` when the text contained reserved characters like `.`, `(`, `)`, `-`, `|`, `!`, `+`, `=`, `{`, `}`, `#`. This happened on almost every LLM reply because periods and parentheses appear in normal prose. + +The adapter now uses `parse_mode: "MarkdownV2"` (the modern Telegram parse mode) and walks the mdast AST directly to emit properly escaped output: + +- **Regular text** — escapes all 18 MarkdownV2 reserved characters: `_ * [ ] ( ) ~ \` > # + - = | { } . !` +- **Inline and fenced code** — escapes only `` ` `` and `\` (per spec) +- **Link URLs** — escapes only `)` and `\` inside the `(...)` portion +- **Formatting entities** — bold `*…*`, italic `_…_`, underline `__…__`, strikethrough `~…~`, headings render as bold +- **Lists** — bullets emitted as `\-`, ordered numerals as `N\.` +- **Thematic breaks** — emitted as `\-\-\-` + +Reference: [Telegram Bot API — Formatting options](https://core.telegram.org/bots/api#formatting-options). See `packages/adapter-telegram/docs/markdown-v2.md` for the full rule set. diff --git a/packages/adapter-telegram/src/index.test.ts b/packages/adapter-telegram/src/index.test.ts index f96a773b..46148720 100644 --- a/packages/adapter-telegram/src/index.test.ts +++ b/packages/adapter-telegram/src/index.test.ts @@ -973,7 +973,7 @@ describe("TelegramAdapter", () => { String((mockFetch.mock.calls[1]?.[1] as RequestInit).body) ) as { parse_mode?: string }; - expect(sendMessageBody.parse_mode).toBe("Markdown"); + expect(sendMessageBody.parse_mode).toBe("MarkdownV2"); }); it("posts cards with inline keyboard buttons", async () => { @@ -1023,6 +1023,7 @@ describe("TelegramAdapter", () => { const sendMessageBody = JSON.parse( String((mockFetch.mock.calls[1]?.[1] as RequestInit).body) ) as { + parse_mode?: string; reply_markup?: { inline_keyboard: Array< Array<{ text: string; callback_data?: string; url?: string }> @@ -1032,7 +1033,7 @@ describe("TelegramAdapter", () => { const row = sendMessageBody.reply_markup?.inline_keyboard[0]; expect(row).toBeDefined(); - expect(sendMessageBody.parse_mode).toBe("Markdown"); + expect(sendMessageBody.parse_mode).toBe("MarkdownV2"); expect(row?.[0]).toEqual({ text: "Approve", callback_data: encodeTelegramCallbackData("approve", "request-123"), diff --git a/packages/adapter-telegram/src/index.ts b/packages/adapter-telegram/src/index.ts index 3993ac7b..4e767ebe 100644 --- a/packages/adapter-telegram/src/index.ts +++ b/packages/adapter-telegram/src/index.ts @@ -64,7 +64,7 @@ const TELEGRAM_MESSAGE_LIMIT = 4096; const TELEGRAM_CAPTION_LIMIT = 1024; const TELEGRAM_SECRET_TOKEN_HEADER = "x-telegram-bot-api-secret-token"; const MESSAGE_ID_PATTERN = /^([^:]+):(\d+)$/; -const TELEGRAM_MARKDOWN_PARSE_MODE = "Markdown"; +const TELEGRAM_MARKDOWN_PARSE_MODE = "MarkdownV2"; const trimTrailingSlashes = (url: string): string => { let end = url.length; while (end > 0 && url[end - 1] === "/") { @@ -1813,6 +1813,7 @@ export type { TelegramMessage, TelegramMessageReactionUpdated, TelegramRawMessage, + TelegramReactionType, TelegramThreadId, TelegramUpdate, TelegramUser, diff --git a/packages/adapter-telegram/src/markdown.test.ts b/packages/adapter-telegram/src/markdown.test.ts index 748f3aba..98be237e 100644 --- a/packages/adapter-telegram/src/markdown.test.ts +++ b/packages/adapter-telegram/src/markdown.test.ts @@ -6,48 +6,67 @@ const TABLE_PIPE_PATTERN = /\|.*Name.*\|/; describe("TelegramFormatConverter", () => { const converter = new TelegramFormatConverter(); - describe("fromAst (AST -> markdown string)", () => { + describe("fromAst (AST -> MarkdownV2 string)", () => { it("should convert a plain text paragraph", () => { const ast = converter.toAst("Hello world"); const result = converter.fromAst(ast); - expect(result).toContain("Hello world"); + expect(result).toBe("Hello world"); + }); + + it("should escape reserved characters in plain text", () => { + const ast = converter.toAst("Hello (world). Path: src/foo.ts!"); + const result = converter.fromAst(ast); + expect(result).toBe("Hello \\(world\\)\\. Path: src/foo\\.ts\\!"); }); - it("should convert bold", () => { + it("should escape dashes at the start of a sentence", () => { + const ast = converter.toAst("- first\n- second"); + const result = converter.fromAst(ast); + expect(result).toContain("\\- first"); + expect(result).toContain("\\- second"); + }); + + it("should convert bold using MarkdownV2 single asterisks", () => { const ast = converter.toAst("**bold text**"); const result = converter.fromAst(ast); - expect(result).toContain("**bold text**"); + expect(result).toBe("*bold text*"); }); - it("should convert italic", () => { + it("should convert italic using MarkdownV2 underscores", () => { const ast = converter.toAst("*italic text*"); const result = converter.fromAst(ast); - expect(result).toContain("*italic text*"); + expect(result).toBe("_italic text_"); }); - it("should convert strikethrough", () => { + it("should convert strikethrough using a single tilde", () => { const ast = converter.toAst("~~strikethrough~~"); const result = converter.fromAst(ast); - expect(result).toContain("~~strikethrough~~"); + expect(result).toBe("~strikethrough~"); }); - it("should convert links", () => { - const ast = converter.toAst("[link text](https://example.com)"); + it("should convert links and escape reserved URL chars", () => { + const ast = converter.toAst("[link text](https://example.com/a(b))"); const result = converter.fromAst(ast); - expect(result).toContain("[link text](https://example.com)"); + expect(result).toBe("[link text](https://example.com/a(b\\))"); }); - it("should preserve inline code", () => { + it("should preserve and escape inline code", () => { const ast = converter.toAst("Use `const x = 1`"); const result = converter.fromAst(ast); expect(result).toContain("`const x = 1`"); }); - it("should handle code blocks", () => { + it("should escape backticks and backslashes inside inline code", () => { + const ast = converter.toAst("Run `echo \\`hi\\``"); + const result = converter.fromAst(ast); + expect(result).toContain("\\`"); + }); + + it("should handle fenced code blocks", () => { const input = "```js\nconst x = 1;\n```"; const ast = converter.toAst(input); const output = converter.fromAst(ast); - expect(output).toContain("```"); + expect(output).toContain("```js"); expect(output).toContain("const x = 1;"); }); @@ -61,6 +80,18 @@ describe("TelegramFormatConverter", () => { expect(result).toContain("Alice"); expect(result).not.toMatch(TABLE_PIPE_PATTERN); }); + + it("should render headings as bold", () => { + const ast = converter.toAst("# Title"); + const result = converter.fromAst(ast); + expect(result).toBe("*Title*"); + }); + + it("should handle blockquotes with line prefixes", () => { + const ast = converter.toAst("> quoted line"); + const result = converter.fromAst(ast); + expect(result).toBe(">quoted line"); + }); }); describe("toAst (markdown -> AST)", () => { @@ -90,9 +121,9 @@ describe("TelegramFormatConverter", () => { }); describe("renderPostable", () => { - it("should return a plain string as-is", () => { - const result = converter.renderPostable("Hello world"); - expect(result).toBe("Hello world"); + it("should escape reserved chars in plain strings", () => { + const result = converter.renderPostable("Hello (world)."); + expect(result).toBe("Hello \\(world\\)\\."); }); it("should return an empty string unchanged", () => { @@ -100,28 +131,28 @@ describe("TelegramFormatConverter", () => { expect(result).toBe(""); }); - it("should render a raw message directly", () => { - const result = converter.renderPostable({ raw: "raw content" }); - expect(result).toBe("raw content"); + it("should render a raw message directly without escaping", () => { + const result = converter.renderPostable({ raw: "raw (content)." }); + expect(result).toBe("raw (content)."); }); it("should render a markdown message", () => { const result = converter.renderPostable({ markdown: "**bold** text" }); - expect(result).toContain("bold"); + expect(result).toContain("*bold*"); }); it("should render an AST message", () => { const ast = converter.toAst("Hello from AST"); const result = converter.renderPostable({ ast }); - expect(result).toContain("Hello from AST"); + expect(result).toBe("Hello from AST"); }); it("should render markdown with bold and italic", () => { const result = converter.renderPostable({ markdown: "**bold** and *italic*", }); - expect(result).toContain("**bold**"); - expect(result).toContain("*italic*"); + expect(result).toContain("*bold*"); + expect(result).toContain("_italic_"); }); it("should render markdown table as code block", () => { @@ -185,37 +216,17 @@ describe("TelegramFormatConverter", () => { }); }); - describe("roundtrip", () => { - it("should preserve plain text through toAst -> fromAst", () => { - const input = "Hello world"; - const result = converter.fromAst(converter.toAst(input)); - expect(result).toContain("Hello world"); - }); - - it("should preserve bold through toAst -> fromAst", () => { - const input = "**bold text**"; - const result = converter.fromAst(converter.toAst(input)); - expect(result).toContain("**bold text**"); - }); - - it("should preserve links through toAst -> fromAst", () => { - const input = "[click here](https://example.com)"; - const result = converter.fromAst(converter.toAst(input)); - expect(result).toContain("[click here](https://example.com)"); - }); - - it("should preserve code blocks through toAst -> fromAst", () => { - const input = "```\nconst x = 1;\n```"; - const result = converter.fromAst(converter.toAst(input)); - expect(result).toContain("const x = 1;"); - }); - - it("should convert table to code block on roundtrip", () => { - const input = "| Col1 | Col2 |\n|------|------|\n| A | B |"; - const result = converter.fromAst(converter.toAst(input)); - expect(result).toContain("```"); - expect(result).toContain("Col1"); - expect(result).toContain("A"); + describe("MarkdownV2 escape coverage", () => { + it("should escape every reserved character in regular text", () => { + // `[` and `]` are consumed by the markdown parser as link syntax, so + // they never reach the text converter unescaped. The remaining 18 + // reserved characters must all be escaped. + const reserved = "_*()~`>#+-=|{}.!"; + const ast = converter.toAst(`word ${reserved} word`); + const result = converter.fromAst(ast); + for (const char of reserved) { + expect(result).toContain(`\\${char}`); + } }); }); }); diff --git a/packages/adapter-telegram/src/markdown.ts b/packages/adapter-telegram/src/markdown.ts index 0720415f..28e82d52 100644 --- a/packages/adapter-telegram/src/markdown.ts +++ b/packages/adapter-telegram/src/markdown.ts @@ -1,38 +1,71 @@ /** * Telegram format conversion. * - * Telegram supports Markdown/HTML parse modes, but to avoid - * platform-specific escaping pitfalls this adapter emits normalized - * markdown text as plain message text. + * Telegram's `MarkdownV2` parse mode requires every occurrence of the + * reserved characters `_ * [ ] ( ) ~ ` > # + - = | { } . !` to be + * escaped with a preceding `\` outside of formatting entities. The + * plain markdown produced by `remark-stringify` does not satisfy this + * rule, which made Telegram reject messages that contained perfectly + * ordinary punctuation (periods, parentheses, dashes, pipes, …). + * + * This converter walks the mdast AST directly and emits MarkdownV2 + * with context-aware escaping so the resulting string is always safe + * to send with `parse_mode: "MarkdownV2"`. */ import { type AdapterPostableMessage, BaseFormatConverter, type Content, + getNodeChildren, + isBlockquoteNode, + isCodeNode, + isDeleteNode, + isEmphasisNode, + isInlineCodeNode, + isLinkNode, + isListNode, + isParagraphNode, + isStrongNode, isTableNode, + isTextNode, parseMarkdown, type Root, - stringifyMarkdown, tableToAscii, - walkAst, } from "chat"; +// Reserved MarkdownV2 characters that must be escaped in regular text. +// Order matters for the regex: backslash is handled separately first so +// we don't double-escape already-escaped characters we emit ourselves. +const MARKDOWN_V2_RESERVED = /[_*[\]()~`>#+\-=|{}.!]/g; + +// Inside `code` and `pre` entities only `\` and `` ` `` need escaping. +const MARKDOWN_V2_CODE_RESERVED = /[`\\]/g; + +// Inside the `(url)` portion of a link only `)` and `\` need escaping. +const MARKDOWN_V2_LINK_URL_RESERVED = /[)\\]/g; + +function escapeText(text: string): string { + return text + .replace(/\\/g, "\\\\") + .replace(MARKDOWN_V2_RESERVED, (char) => `\\${char}`); +} + +function escapeCode(text: string): string { + return text.replace(MARKDOWN_V2_CODE_RESERVED, (char) => `\\${char}`); +} + +function escapeLinkUrl(url: string): string { + return url.replace(MARKDOWN_V2_LINK_URL_RESERVED, (char) => `\\${char}`); +} + export class TelegramFormatConverter extends BaseFormatConverter { fromAst(ast: Root): string { - // Check for table nodes and replace them with code blocks, - // since Telegram renders raw pipe syntax as garbled text. - const transformed = walkAst(structuredClone(ast), (node: Content) => { - if (isTableNode(node)) { - return { - type: "code" as const, - value: tableToAscii(node), - lang: undefined, - } as Content; - } - return node; - }); - return stringifyMarkdown(transformed).trim(); + const parts: string[] = []; + for (const node of ast.children) { + parts.push(this.nodeToMarkdownV2(node as Content)); + } + return parts.join("\n\n").trim(); } toAst(text: string): Root { @@ -41,7 +74,7 @@ export class TelegramFormatConverter extends BaseFormatConverter { override renderPostable(message: AdapterPostableMessage): string { if (typeof message === "string") { - return message; + return escapeText(message); } if ("raw" in message) { return message.raw; @@ -54,4 +87,124 @@ export class TelegramFormatConverter extends BaseFormatConverter { } return super.renderPostable(message); } + + private nodeToMarkdownV2(node: Content): string { + if (isParagraphNode(node)) { + return getNodeChildren(node) + .map((child) => this.nodeToMarkdownV2(child)) + .join(""); + } + + if (isTextNode(node)) { + return escapeText(node.value); + } + + if (isStrongNode(node)) { + const content = getNodeChildren(node) + .map((child) => this.nodeToMarkdownV2(child)) + .join(""); + return `*${content}*`; + } + + if (isEmphasisNode(node)) { + const content = getNodeChildren(node) + .map((child) => this.nodeToMarkdownV2(child)) + .join(""); + return `_${content}_`; + } + + if (isDeleteNode(node)) { + const content = getNodeChildren(node) + .map((child) => this.nodeToMarkdownV2(child)) + .join(""); + return `~${content}~`; + } + + if (isInlineCodeNode(node)) { + return `\`${escapeCode(node.value)}\``; + } + + if (isCodeNode(node)) { + const lang = node.lang ?? ""; + return `\`\`\`${lang}\n${escapeCode(node.value)}\n\`\`\``; + } + + if (isLinkNode(node)) { + const linkText = getNodeChildren(node) + .map((child) => this.nodeToMarkdownV2(child)) + .join(""); + return `[${linkText}](${escapeLinkUrl(node.url)})`; + } + + if (isBlockquoteNode(node)) { + return getNodeChildren(node) + .map((child) => this.nodeToMarkdownV2(child)) + .join("\n") + .split("\n") + .map((line) => `>${line}`) + .join("\n"); + } + + if (isListNode(node)) { + return this.renderMarkdownV2List(node, 0); + } + + if (node.type === "break") { + return "\n"; + } + + if (node.type === "thematicBreak") { + return "\\-\\-\\-"; + } + + if (isTableNode(node)) { + return `\`\`\`\n${escapeCode(tableToAscii(node))}\n\`\`\``; + } + + if (node.type === "heading") { + const content = getNodeChildren(node) + .map((child) => this.nodeToMarkdownV2(child)) + .join(""); + return `*${content}*`; + } + + return escapeText( + this.defaultNodeToText(node, (child) => { + if (isTextNode(child)) { + return child.value; + } + return this.defaultNodeToText(child, () => ""); + }) + ); + } + + private renderMarkdownV2List(node: Content, depth: number): string { + if (!isListNode(node)) { + return ""; + } + const indent = " ".repeat(depth); + const start = node.start ?? 1; + const lines: string[] = []; + for (const [i, item] of getNodeChildren(node).entries()) { + const prefix = node.ordered ? `${start + i}\\.` : "\\-"; + let isFirstContent = true; + for (const child of getNodeChildren(item)) { + if (isListNode(child)) { + lines.push(this.renderMarkdownV2List(child, depth + 1)); + continue; + } + const rendered = this.nodeToMarkdownV2(child); + if (!rendered.trim()) { + continue; + } + if (isFirstContent) { + lines.push(`${indent}${prefix} ${rendered}`); + isFirstContent = false; + } else { + lines.push(`${indent} ${rendered}`); + } + } + } + return lines.join("\n"); + } } diff --git a/packages/adapter-telegram/src/types.ts b/packages/adapter-telegram/src/types.ts index 250c8aeb..0121f6c1 100644 --- a/packages/adapter-telegram/src/types.ts +++ b/packages/adapter-telegram/src/types.ts @@ -154,6 +154,7 @@ export interface TelegramMessage { video?: TelegramFile & { width?: number; height?: number; + duration?: number; mime_type?: string; file_name?: string; };