diff --git a/eslint.config.js b/eslint.config.js index c8452e0..e089a0a 100644 --- a/eslint.config.js +++ b/eslint.config.js @@ -5,7 +5,7 @@ export default antfu({ indent: 2, semi: false, quotes: 'single', - jsx: true, + jsx: false, }, formatters: true, typescript: true, @@ -15,5 +15,8 @@ export default antfu({ rules: { 'ts/no-empty-object-type': 'off', 'ts/no-namespace': 'off', + 'style/brace-style': ['error', '1tbs', { allowSingleLine: false }], + 'style/arrow-parens': ['error', 'always'], + 'curly': ['error', 'all'], }, }) diff --git a/src/index.ts b/src/index.ts index 1ceffe9..c8f32ea 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,4 +1,5 @@ export * from './jsx.ts' export { Line } from './Line.ts' +export * from './parse.ts' export * from './render.ts' export * from './types.ts' diff --git a/src/jsx.ts b/src/jsx.ts index 9fd6bdb..ab6dfb1 100644 --- a/src/jsx.ts +++ b/src/jsx.ts @@ -28,8 +28,9 @@ export function createElement( }) } - if (typeof type === 'function') + if (typeof type === 'function') { return type({ ...props, children }) + } throw new Error(`Invalid JSX component: ${type}.`) } @@ -49,11 +50,13 @@ function elementsFromNode(node: TgxNode): TgxElement[] { return [{ type: 'plain', value: node }] } - if (node == null) + if (node == null) { return [{ type: 'plain', value: node }] + } - if (Array.isArray(node)) - return node.flatMap(child => elementsFromNode(child)) + if (Array.isArray(node)) { + return node.flatMap((child) => elementsFromNode(child)) + } return [node] } diff --git a/src/parse.ts b/src/parse.ts new file mode 100644 index 0000000..5363e34 --- /dev/null +++ b/src/parse.ts @@ -0,0 +1,260 @@ +import type { TextEntity, TgxElement, TgxElementText } from './types.ts' + +/** + * @see https://core.telegram.org/bots/api#messageentity + */ +export interface MessageEntity { + type: + | 'mention' + | 'hashtag' + | 'cashtag' + | 'bot_command' + | 'url' + | 'email' + | 'phone_number' + | 'bold' + | 'italic' + | 'underline' + | 'strikethrough' + | 'spoiler' + | 'code' + | 'pre' + | 'blockquote' + | 'expandable_blockquote' + | 'text_link' + | 'text_mention' + | 'custom_emoji' + | 'date_time' + offset: number + length: number + url?: string + language?: string + custom_emoji_id?: string + unix_time?: number + date_time_format?: string +} + +/** + * Converts formatted Telegram text with message entities to a {@link TgxElement}. + */ +export function parseEntities( + text: string, + entities: ReadonlyArray = [], +): TgxElement { + const parsedEntities = mergeAdjacentEntities(collectValidEntities(text, entities)) + const root: TgxElement[] = [] + const openStack: OpenEntity[] = [] + const boundaries = buildBoundaries(parsedEntities, text.length) + + let cursor = 0 + for (const boundary of boundaries) { + const chunk = text.slice(cursor, boundary) + if (chunk && !isSuppressedChunk(openStack)) { + const target = openStack.length > 0 ? openStack.at(-1)!.node.subelements : root + target.push({ type: 'plain', value: chunk }) + } + + while (openStack.length > 0 && openStack.at(-1)!.end === boundary) { + openStack.pop() + } + + for (const item of parsedEntities) { + if (item.start !== boundary) { + continue + } + + const node: TgxElementText = { type: 'text', entity: item.entity, subelements: [] } + const target = openStack.length > 0 ? openStack.at(-1)!.node.subelements : root + target.push(node) + openStack.push({ end: item.end, node }) + } + + cursor = boundary + } + + return { type: 'fragment', subelements: root } +} + +interface ParsedEntity { + start: number + end: number + entity: TextEntity +} + +interface OpenEntity { + end: number + node: TgxElementText +} + +/** + * Merges adjacent entities of the same type that exist at the same nesting + * level. Two entities A and B (A.end === B.start) can be merged when no other + * entity C creates a nesting boundary exactly between them: + * - Condition 1: no C with C.start ≤ A.start and C.end === A.end + * (C ends exactly at the boundary — merged M would escape C) + * - Condition 2: no C with C.start === B.start and C.end > B.end + * (C starts at the boundary and extends further — merged M would cross C) + * + * The process is repeated until no more merges are possible. + */ +function mergeAdjacentEntities(entities: ParsedEntity[]): ParsedEntity[] { + let result = entities.slice() + let pair = findMergePair(result) + while (pair !== null) { + const [i, j] = pair + const A = result[i]! + const B = result[j]! + const merged: ParsedEntity = { start: A.start, end: B.end, entity: A.entity } + result = result.filter((_, k) => k !== i && k !== j) + result.push(merged) + result.sort((a, b) => (a.start !== b.start ? a.start - b.start : b.end - a.end)) + pair = findMergePair(result) + } + return result +} + +function findMergePair(entities: ParsedEntity[]): [number, number] | null { + for (let i = 0; i < entities.length; i++) { + for (let j = 0; j < entities.length; j++) { + if (i === j) { + continue + } + const A = entities[i]! + const B = entities[j]! + if (A.end !== B.start || !entitiesDeepEqual(A.entity, B.entity)) { + continue + } + if (canMergeAdjacentEntities(A, B, entities)) { + return [i, j] + } + } + } + return null +} + +function canMergeAdjacentEntities( + A: ParsedEntity, + B: ParsedEntity, + all: ParsedEntity[], +): boolean { + for (const C of all) { + if (C === A || C === B) { + continue + } + if (C.start <= A.start && C.end === A.end) { + return false + } + if (C.start === B.start && C.end > B.end) { + return false + } + } + return true +} + +function entitiesDeepEqual(a: TextEntity, b: TextEntity): boolean { + return JSON.stringify(a) === JSON.stringify(b) +} + +function isSuppressedChunk(openStack: Array): boolean { + return openStack.at(-1)?.node.entity.type === 'custom-emoji' +} + +function buildBoundaries(entities: Array, textLength: number): number[] { + const boundaries = new Set([textLength]) + for (const entity of entities) { + boundaries.add(entity.start) + boundaries.add(entity.end) + } + return Array.from(boundaries).sort((a, b) => a - b) +} + +function collectValidEntities( + text: string, + entities: readonly MessageEntity[], +): ParsedEntity[] { + const sorted = entities.toSorted((a, b) => { + if (a.offset !== b.offset) { + return a.offset - b.offset + } + return b.length - a.length + }) + + const valid: ParsedEntity[] = [] + const stack: number[] = [] + + for (const entity of sorted) { + const start = entity.offset + const end = entity.offset + entity.length + + if (!isValidRange(start, end, text.length)) { + continue + } + + const mapped = mapEntity(text.slice(start, end), entity) + if (!mapped) { + continue + } + + while (stack.length > 0 && start >= stack.at(-1)!) { + stack.pop() + } + + if (stack.length > 0 && end > stack.at(-1)!) { + continue + } + + valid.push({ start, end, entity: mapped }) + stack.push(end) + } + + return valid +} + +function isValidRange(start: number, end: number, textLength: number): boolean { + if (!Number.isSafeInteger(start) || !Number.isSafeInteger(end)) { + return false + } + if (start < 0 || end < 0 || end <= start) { + return false + } + return end <= textLength +} + +function mapEntity( + content: string, + entity: MessageEntity, +): TextEntity | null { + switch (entity.type) { + case 'bold': + return { type: 'bold' } + case 'italic': + return { type: 'italic' } + case 'underline': + return { type: 'underline' } + case 'strikethrough': + return { type: 'strikethrough' } + case 'spoiler': + return { type: 'spoiler' } + case 'code': + return { type: 'code' } + case 'pre': + return { type: 'codeblock', language: entity.language } + case 'text_link': + return entity.url + ? { type: 'link', url: entity.url } + : null + case 'custom_emoji': + return entity.custom_emoji_id + ? { type: 'custom-emoji', id: entity.custom_emoji_id, alt: content } + : null + case 'blockquote': + return { type: 'blockquote', expandable: false } + case 'expandable_blockquote': + return { type: 'blockquote', expandable: true } + case 'date_time': + return entity.unix_time != null + ? { type: 'date-time', unix: entity.unix_time, format: entity.date_time_format } + : null + } + return null +} diff --git a/src/render.ts b/src/render.ts index ec4a4ea..1c4c898 100644 --- a/src/render.ts +++ b/src/render.ts @@ -53,8 +53,9 @@ function renderTextElement(el: TgxElementText): string { } function renderPlainElement({ value }: TgxElementPlain): string { - if (value == null || typeof value === 'boolean') + if (value == null || typeof value === 'boolean') { return '' + } return sanitize(String(value)) } diff --git a/test/jsx.test.tsx b/test/jsx.test.tsx index 3273ba0..dc22bc1 100644 --- a/test/jsx.test.tsx +++ b/test/jsx.test.tsx @@ -2,7 +2,6 @@ import { describe, expect, it } from 'vitest' describe('jsx', () => { it('should transform fragments', () => { - // deno-lint-ignore jsx-no-useless-fragment expect(<>).toEqual({ type: 'fragment', subelements: [], diff --git a/test/parse.test.tsx b/test/parse.test.tsx new file mode 100644 index 0000000..4398b47 --- /dev/null +++ b/test/parse.test.tsx @@ -0,0 +1,170 @@ +import type { MessageEntity, TgxElement } from '../src/index.ts' +import { describe, expect, it } from 'vitest' +import { parseEntities } from '../src/index.ts' + +describe('parseEntities', () => { + const TEST_CASES: Array<{ + name: string + text: string + entities?: Array + expected: TgxElement + }> = [ + { + name: 'plain text w/o entities', + text: 'hello', + entities: [], + expected: <>{'hello'}, + }, + { + name: 'nested entities with stable structure', + text: 'hello world', + entities: [ + { type: 'bold', offset: 0, length: 11 }, + { type: 'italic', offset: 6, length: 5 }, + ], + expected: <>{'hello '}{'world'}, + }, + { + name: 'adjacent entities', + text: 'abcd', + entities: [ + { type: 'bold', offset: 0, length: 2 }, + { type: 'italic', offset: 2, length: 2 }, + ], + expected: <>{'ab'}{'cd'}, + }, + { + name: 'link, codeblock and custom emoji', + text: 'ab🙂de', + entities: [ + { type: 'text_link', offset: 0, length: 2, url: 'https://example.com' }, + { type: 'custom_emoji', offset: 2, length: 2, custom_emoji_id: '42' }, + { type: 'pre', offset: 4, length: 1, language: 'ts' }, + ], + expected: ( + <> + {'ab'} + + {'d'} + {'e'} + + ), + }, + { + name: 'ignoring invalid and crossing entities', + text: 'abcdef', + entities: [ + { type: 'bold', offset: -1, length: 1 }, + { type: 'italic', offset: 0, length: 0 }, + { type: 'underline', offset: 0, length: 4 }, + { type: 'strikethrough', offset: 2, length: 4 }, // crossing with underline, should be skipped + { type: 'code', offset: 4, length: 2 }, + { type: 'mention', offset: 0, length: 2 }, // unsupported, ignored + ], + expected: ( + <> + {'abcd'} + {'ef'} + + ), + }, + { + name: 'UTF-16 offsets for ZWJ emoji sequences', + text: `x${'👨‍👩‍👧‍👦'}y`, + entities: [ + { type: 'custom_emoji', offset: 1, length: 11, custom_emoji_id: 'family-1' }, + { type: 'code', offset: 12, length: 1 }, + ], + expected: <>{'x'}{'y'}, + }, + { + name: 'surrogate pairs', + text: 'A🙂e\u0301Ж', + entities: [ + { offset: 1, length: 2, type: 'bold' }, + { offset: 3, length: 2, type: 'italic' }, + { offset: 5, length: 1, type: 'underline' }, + ], + expected: <>{'A'}{'🙂'}{'e\u0301'}{'Ж'}, + }, + { + name: 'complex example', + text: '😮‍💨ПРИМЕР🚫\n\n🇷🇺 - 俄罗斯\n🇺🇸 – Америка\n🇨🇳 — China\n\nмоноширинный ↔️ text\n\n\nconsole.log("👋 Hi there!")\n\n\nCopy this code → الرمز الترويجي ← for nothing.\n\nExpandable blockquote with bold, italic, spoiler, strikethrough, underline, and bold italic spoiler strikethrough underline text!\n\nTomorrow at 12:34.', + entities: [ + { offset: 0, length: 5, type: 'custom_emoji', custom_emoji_id: '5192886773948107844' }, + { offset: 5, length: 6, type: 'bold' }, + { offset: 11, length: 2, type: 'custom_emoji', custom_emoji_id: '5776064103483184336' }, + { offset: 15, length: 4, type: 'text_link', url: 'https://en.wikipedia.org/wiki/Russia' }, + { offset: 26, length: 4, type: 'text_link', url: 'https://en.wikipedia.org/wiki/United_States' }, + { offset: 41, length: 4, type: 'text_link', url: 'https://en.wikipedia.org/wiki/China' }, + { offset: 55, length: 20, type: 'code' }, + { offset: 77, length: 29, type: 'pre', language: 'js' }, + { offset: 125, length: 14, type: 'code' }, + { offset: 156, length: 129, type: 'expandable_blockquote' }, + { offset: 183, length: 4, type: 'bold' }, + { offset: 189, length: 6, type: 'italic' }, + { offset: 197, length: 7, type: 'spoiler' }, + { offset: 206, length: 13, type: 'strikethrough' }, + { offset: 221, length: 9, type: 'underline' }, + { offset: 236, length: 5, type: 'bold' }, + { offset: 241, length: 7, type: 'bold' }, + { offset: 241, length: 7, type: 'italic' }, + { offset: 248, length: 8, type: 'bold' }, + { offset: 248, length: 8, type: 'italic' }, + { offset: 248, length: 8, type: 'spoiler' }, + { offset: 256, length: 14, type: 'bold' }, + { offset: 256, length: 14, type: 'italic' }, + { offset: 256, length: 14, type: 'strikethrough' }, + { offset: 256, length: 14, type: 'spoiler' }, + { offset: 270, length: 14, type: 'bold' }, + { offset: 270, length: 14, type: 'italic' }, + { offset: 270, length: 14, type: 'underline' }, + { offset: 270, length: 14, type: 'strikethrough' }, + { offset: 270, length: 14, type: 'spoiler' }, + { offset: 287, length: 17, type: 'date_time', unix_time: 1774510496, date_time_format: 'dt' }, + ], + expected: ( + <> + + ПРИМЕР + + {'\n\n'} + 🇷🇺 + {' - 俄罗斯\n'} + 🇺🇸 + {' – Америка\n'} + 🇨🇳 + {' — China\n\n'} + моноширинный ↔️ text + {'\n\n'} + {'\nconsole.log("👋 Hi there!")\n'} + {'\n\nCopy this code → '} + {'الرمز الترويجي'} + {' ← for nothing.\n\n'} +
+ {'Expandable blockquote with '} + bold + {', '} + italic + {', '} + spoiler + {', '} + strikethrough + {', '} + underline + {', and '} + bold italic spoiler strikethrough underline text + {'!'} +
+ {'\n\n'} + + {'.'} + + ), + }, + ] + + it.each(TEST_CASES)('should parse $name', ({ text, entities, expected }) => { + expect(parseEntities(text, entities)).toEqual(expected) + }) +}) diff --git a/tsconfig.lib.json b/tsconfig.lib.json index ac9e52c..13e334c 100644 --- a/tsconfig.lib.json +++ b/tsconfig.lib.json @@ -2,7 +2,7 @@ "compilerOptions": { "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.lib.tsbuildinfo", "target": "es2022", - "lib": ["ES2022"], + "lib": ["ESNext"], "moduleDetection": "force", "rootDir": "./src/", "module": "node20",