1- import type { TelegramMessageEntity } from '../src/parser.ts'
1+ import type { MessageEntity } from '../src/parser.ts'
22import { describe , expect , it } from 'vitest'
33import { parseMessageEntities } from '../src/parser.ts'
44
5+ function cu ( value : string ) : number {
6+ return value . length
7+ }
8+
59describe ( 'parseMessageEntities' , ( ) => {
610 it ( 'returns plain fragment when entities are missing' , ( ) => {
711 expect ( parseMessageEntities ( 'hello' ) ) . toEqual ( {
@@ -12,7 +16,7 @@ describe('parseMessageEntities', () => {
1216
1317 it ( 'parses nested entities with stable structure' , ( ) => {
1418 const text = 'hello world'
15- const entities : TelegramMessageEntity [ ] = [
19+ const entities : MessageEntity [ ] = [
1620 { type : 'bold' , offset : 0 , length : 11 } ,
1721 { type : 'italic' , offset : 6 , length : 5 } ,
1822 ]
@@ -36,7 +40,7 @@ describe('parseMessageEntities', () => {
3640
3741 it ( 'parses adjacent entities correctly' , ( ) => {
3842 const text = 'abcd'
39- const entities : TelegramMessageEntity [ ] = [
43+ const entities : MessageEntity [ ] = [
4044 { type : 'bold' , offset : 0 , length : 2 } ,
4145 { type : 'italic' , offset : 2 , length : 2 } ,
4246 ]
@@ -60,7 +64,7 @@ describe('parseMessageEntities', () => {
6064
6165 it ( 'maps link, codeblock and custom emoji entities' , ( ) => {
6266 const text = 'ab🙂de'
63- const entities : TelegramMessageEntity [ ] = [
67+ const entities : MessageEntity [ ] = [
6468 { type : 'text_link' , offset : 0 , length : 2 , url : 'https://example.com' } ,
6569 { type : 'custom_emoji' , offset : 2 , length : 2 , custom_emoji_id : '42' } ,
6670 { type : 'pre' , offset : 4 , length : 1 , language : 'ts' } ,
@@ -91,7 +95,7 @@ describe('parseMessageEntities', () => {
9195
9296 it ( 'ignores invalid and crossing entities' , ( ) => {
9397 const text = 'abcdef'
94- const entities : TelegramMessageEntity [ ] = [
98+ const entities : MessageEntity [ ] = [
9599 { type : 'bold' , offset : - 1 , length : 1 } ,
96100 { type : 'italic' , offset : 0 , length : 0 } ,
97101 { type : 'underline' , offset : 0 , length : 4 } ,
@@ -116,4 +120,83 @@ describe('parseMessageEntities', () => {
116120 ] ,
117121 } )
118122 } )
123+
124+ it ( 'handles UTF-16 offsets for surrogate pairs and combining marks' , ( ) => {
125+ const text = 'A🙂e\u0301Ж'
126+ const emojiOffset = cu ( 'A' )
127+ const emojiLength = cu ( '🙂' )
128+ const combiningOffset = cu ( 'A🙂' )
129+ const combiningLength = cu ( 'e\u0301' )
130+ const cyrillicOffset = cu ( 'A🙂e\u0301' )
131+ const cyrillicLength = cu ( 'Ж' )
132+
133+ expect ( emojiOffset ) . toBe ( 1 )
134+ expect ( emojiLength ) . toBe ( 2 )
135+ expect ( combiningOffset ) . toBe ( 3 )
136+ expect ( combiningLength ) . toBe ( 2 )
137+ expect ( cyrillicOffset ) . toBe ( 5 )
138+ expect ( cyrillicLength ) . toBe ( 1 )
139+
140+ const entities : MessageEntity [ ] = [
141+ { type : 'bold' , offset : emojiOffset , length : emojiLength } , // 🙂
142+ { type : 'italic' , offset : combiningOffset , length : combiningLength } , // e + combining acute
143+ { type : 'underline' , offset : cyrillicOffset , length : cyrillicLength } , // Ж
144+ ]
145+
146+ expect ( parseMessageEntities ( text , entities ) ) . toEqual ( {
147+ type : 'fragment' ,
148+ subelements : [
149+ { type : 'plain' , value : 'A' } ,
150+ {
151+ type : 'text' ,
152+ entity : { type : 'bold' } ,
153+ subelements : [ { type : 'plain' , value : '🙂' } ] ,
154+ } ,
155+ {
156+ type : 'text' ,
157+ entity : { type : 'italic' } ,
158+ subelements : [ { type : 'plain' , value : 'e\u0301' } ] ,
159+ } ,
160+ {
161+ type : 'text' ,
162+ entity : { type : 'underline' } ,
163+ subelements : [ { type : 'plain' , value : 'Ж' } ] ,
164+ } ,
165+ ] ,
166+ } )
167+ } )
168+
169+ it ( 'handles UTF-16 offsets for ZWJ emoji sequences' , ( ) => {
170+ const family = '👨👩👧👦'
171+ const text = `x${ family } y`
172+ const familyOffset = cu ( 'x' )
173+ const familyLength = cu ( family )
174+ const tailOffset = cu ( `x${ family } ` )
175+
176+ expect ( familyOffset ) . toBe ( 1 )
177+ expect ( familyLength ) . toBe ( 11 )
178+ expect ( tailOffset ) . toBe ( 12 )
179+
180+ const entities : MessageEntity [ ] = [
181+ { type : 'custom_emoji' , offset : familyOffset , length : familyLength , custom_emoji_id : 'family-1' } ,
182+ { type : 'code' , offset : tailOffset , length : cu ( 'y' ) } ,
183+ ]
184+
185+ expect ( parseMessageEntities ( text , entities ) ) . toEqual ( {
186+ type : 'fragment' ,
187+ subelements : [
188+ { type : 'plain' , value : 'x' } ,
189+ {
190+ type : 'text' ,
191+ entity : { type : 'custom-emoji' , id : 'family-1' , alt : family } ,
192+ subelements : [ ] ,
193+ } ,
194+ {
195+ type : 'text' ,
196+ entity : { type : 'code' } ,
197+ subelements : [ { type : 'plain' , value : 'y' } ] ,
198+ } ,
199+ ] ,
200+ } )
201+ } )
119202} )
0 commit comments