Skip to content

Commit 04dd510

Browse files
committed
improve tests & implementation
1 parent a8a24da commit 04dd510

6 files changed

Lines changed: 432 additions & 383 deletions

File tree

src/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
export * from './jsx.ts'
22
export { Line } from './Line.ts'
3-
export * from './parser.ts'
3+
export * from './parse.ts'
44
export * from './render.ts'
55
export * from './types.ts'

src/parse.ts

Lines changed: 260 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,260 @@
1+
import type { TextEntity, TgxElement, TgxElementText } from './types.ts'
2+
3+
/**
4+
* @see https://core.telegram.org/bots/api#messageentity
5+
*/
6+
export interface MessageEntity {
7+
type:
8+
| 'mention'
9+
| 'hashtag'
10+
| 'cashtag'
11+
| 'bot_command'
12+
| 'url'
13+
| 'email'
14+
| 'phone_number'
15+
| 'bold'
16+
| 'italic'
17+
| 'underline'
18+
| 'strikethrough'
19+
| 'spoiler'
20+
| 'code'
21+
| 'pre'
22+
| 'blockquote'
23+
| 'expandable_blockquote'
24+
| 'text_link'
25+
| 'text_mention'
26+
| 'custom_emoji'
27+
| 'date_time'
28+
offset: number
29+
length: number
30+
url?: string
31+
language?: string
32+
custom_emoji_id?: string
33+
unix_time?: number
34+
date_time_format?: string
35+
}
36+
37+
/**
38+
* Converts formatted Telegram text with message entities to a {@link TgxElement}.
39+
*/
40+
export function parseEntities(
41+
text: string,
42+
entities: ReadonlyArray<MessageEntity> = [],
43+
): TgxElement {
44+
const parsedEntities = mergeAdjacentEntities(collectValidEntities(text, entities))
45+
const root: TgxElement[] = []
46+
const openStack: OpenEntity[] = []
47+
const boundaries = buildBoundaries(parsedEntities, text.length)
48+
49+
let cursor = 0
50+
for (const boundary of boundaries) {
51+
const chunk = text.slice(cursor, boundary)
52+
if (chunk && !isSuppressedChunk(openStack)) {
53+
const target = openStack.length > 0 ? openStack.at(-1)!.node.subelements : root
54+
target.push({ type: 'plain', value: chunk })
55+
}
56+
57+
while (openStack.length > 0 && openStack.at(-1)!.end === boundary) {
58+
openStack.pop()
59+
}
60+
61+
for (const item of parsedEntities) {
62+
if (item.start !== boundary) {
63+
continue
64+
}
65+
66+
const node: TgxElementText = { type: 'text', entity: item.entity, subelements: [] }
67+
const target = openStack.length > 0 ? openStack.at(-1)!.node.subelements : root
68+
target.push(node)
69+
openStack.push({ end: item.end, node })
70+
}
71+
72+
cursor = boundary
73+
}
74+
75+
return { type: 'fragment', subelements: root }
76+
}
77+
78+
interface ParsedEntity {
79+
start: number
80+
end: number
81+
entity: TextEntity
82+
}
83+
84+
interface OpenEntity {
85+
end: number
86+
node: TgxElementText
87+
}
88+
89+
/**
90+
* Merges adjacent entities of the same type that exist at the same nesting
91+
* level. Two entities A and B (A.end === B.start) can be merged when no other
92+
* entity C creates a nesting boundary exactly between them:
93+
* - Condition 1: no C with C.start ≤ A.start and C.end === A.end
94+
* (C ends exactly at the boundary — merged M would escape C)
95+
* - Condition 2: no C with C.start === B.start and C.end > B.end
96+
* (C starts at the boundary and extends further — merged M would cross C)
97+
*
98+
* The process is repeated until no more merges are possible.
99+
*/
100+
function mergeAdjacentEntities(entities: ParsedEntity[]): ParsedEntity[] {
101+
let result = entities.slice()
102+
let pair = findMergePair(result)
103+
while (pair !== null) {
104+
const [i, j] = pair
105+
const A = result[i]!
106+
const B = result[j]!
107+
const merged: ParsedEntity = { start: A.start, end: B.end, entity: A.entity }
108+
result = result.filter((_, k) => k !== i && k !== j)
109+
result.push(merged)
110+
result.sort((a, b) => (a.start !== b.start ? a.start - b.start : b.end - a.end))
111+
pair = findMergePair(result)
112+
}
113+
return result
114+
}
115+
116+
function findMergePair(entities: ParsedEntity[]): [number, number] | null {
117+
for (let i = 0; i < entities.length; i++) {
118+
for (let j = 0; j < entities.length; j++) {
119+
if (i === j) {
120+
continue
121+
}
122+
const A = entities[i]!
123+
const B = entities[j]!
124+
if (A.end !== B.start || !entitiesDeepEqual(A.entity, B.entity)) {
125+
continue
126+
}
127+
if (canMergeAdjacentEntities(A, B, entities)) {
128+
return [i, j]
129+
}
130+
}
131+
}
132+
return null
133+
}
134+
135+
function canMergeAdjacentEntities(
136+
A: ParsedEntity,
137+
B: ParsedEntity,
138+
all: ParsedEntity[],
139+
): boolean {
140+
for (const C of all) {
141+
if (C === A || C === B) {
142+
continue
143+
}
144+
if (C.start <= A.start && C.end === A.end) {
145+
return false
146+
}
147+
if (C.start === B.start && C.end > B.end) {
148+
return false
149+
}
150+
}
151+
return true
152+
}
153+
154+
function entitiesDeepEqual(a: TextEntity, b: TextEntity): boolean {
155+
return JSON.stringify(a) === JSON.stringify(b)
156+
}
157+
158+
function isSuppressedChunk(openStack: Array<OpenEntity>): boolean {
159+
return openStack.at(-1)?.node.entity.type === 'custom-emoji'
160+
}
161+
162+
function buildBoundaries(entities: Array<ParsedEntity>, textLength: number): number[] {
163+
const boundaries = new Set<number>([textLength])
164+
for (const entity of entities) {
165+
boundaries.add(entity.start)
166+
boundaries.add(entity.end)
167+
}
168+
return Array.from(boundaries).sort((a, b) => a - b)
169+
}
170+
171+
function collectValidEntities(
172+
text: string,
173+
entities: readonly MessageEntity[],
174+
): ParsedEntity[] {
175+
const sorted = entities.toSorted((a, b) => {
176+
if (a.offset !== b.offset) {
177+
return a.offset - b.offset
178+
}
179+
return b.length - a.length
180+
})
181+
182+
const valid: ParsedEntity[] = []
183+
const stack: number[] = []
184+
185+
for (const entity of sorted) {
186+
const start = entity.offset
187+
const end = entity.offset + entity.length
188+
189+
if (!isValidRange(start, end, text.length)) {
190+
continue
191+
}
192+
193+
const mapped = mapEntity(text.slice(start, end), entity)
194+
if (!mapped) {
195+
continue
196+
}
197+
198+
while (stack.length > 0 && start >= stack.at(-1)!) {
199+
stack.pop()
200+
}
201+
202+
if (stack.length > 0 && end > stack.at(-1)!) {
203+
continue
204+
}
205+
206+
valid.push({ start, end, entity: mapped })
207+
stack.push(end)
208+
}
209+
210+
return valid
211+
}
212+
213+
function isValidRange(start: number, end: number, textLength: number): boolean {
214+
if (!Number.isSafeInteger(start) || !Number.isSafeInteger(end)) {
215+
return false
216+
}
217+
if (start < 0 || end < 0 || end <= start) {
218+
return false
219+
}
220+
return end <= textLength
221+
}
222+
223+
function mapEntity(
224+
content: string,
225+
entity: MessageEntity,
226+
): TextEntity | null {
227+
switch (entity.type) {
228+
case 'bold':
229+
return { type: 'bold' }
230+
case 'italic':
231+
return { type: 'italic' }
232+
case 'underline':
233+
return { type: 'underline' }
234+
case 'strikethrough':
235+
return { type: 'strikethrough' }
236+
case 'spoiler':
237+
return { type: 'spoiler' }
238+
case 'code':
239+
return { type: 'code' }
240+
case 'pre':
241+
return { type: 'codeblock', language: entity.language }
242+
case 'text_link':
243+
return entity.url
244+
? { type: 'link', url: entity.url }
245+
: null
246+
case 'custom_emoji':
247+
return entity.custom_emoji_id
248+
? { type: 'custom-emoji', id: entity.custom_emoji_id, alt: content }
249+
: null
250+
case 'blockquote':
251+
return { type: 'blockquote', expandable: false }
252+
case 'expandable_blockquote':
253+
return { type: 'blockquote', expandable: true }
254+
case 'date_time':
255+
return entity.unix_time != null
256+
? { type: 'date-time', unix: entity.unix_time, format: entity.date_time_format }
257+
: null
258+
}
259+
return null
260+
}

0 commit comments

Comments
 (0)