33import { OBJECT , BLOCK_MAXSIZE , TOTAL_OVERHEAD } from "./rt/common" ;
44import { compareImpl , strtol , strtod , isSpace , isAscii , isFinalSigma , toLower8 , toUpper8 } from "./util/string" ;
55import { SPECIALS_UPPER , casemap , bsearch } from "./util/casemap" ;
6- import { E_INDEXOUTOFRANGE , E_INVALIDLENGTH } from "./util/error" ;
6+ import { E_INDEXOUTOFRANGE , E_INVALIDLENGTH , E_UNPAIRED_SURROGATE } from "./util/error" ;
77import { idof } from "./builtins" ;
88import { Array } from "./array" ;
99
@@ -661,6 +661,12 @@ export namespace String {
661661
662662 export namespace UTF8 {
663663
664+ export const enum ErrorMode {
665+ WTF8 ,
666+ REPLACE ,
667+ ERROR
668+ }
669+
664670 export function byteLength ( str : string , nullTerminated : bool = false ) : i32 {
665671 var strOff = changetype < usize > ( str ) ;
666672 var strEnd = strOff + < usize > changetype < OBJECT > ( changetype < usize > ( str ) - TOTAL_OVERHEAD ) . rtSize ;
@@ -687,15 +693,15 @@ export namespace String {
687693 return bufLen ;
688694 }
689695
690- export function encode ( str : string , nullTerminated : bool = false ) : ArrayBuffer {
696+ export function encode ( str : string , nullTerminated : bool = false , errorMode : ErrorMode = ErrorMode . WTF8 ) : ArrayBuffer {
691697 var buf = changetype < ArrayBuffer > ( __new ( < usize > byteLength ( str , nullTerminated ) , idof < ArrayBuffer > ( ) ) ) ;
692- encodeUnsafe ( changetype < usize > ( str ) , str . length , changetype < usize > ( buf ) , nullTerminated ) ;
698+ encodeUnsafe ( changetype < usize > ( str ) , str . length , changetype < usize > ( buf ) , nullTerminated , errorMode ) ;
693699 return buf ;
694700 }
695701
696702 // @ts -ignore: decorator
697703 @unsafe
698- export function encodeUnsafe ( str : usize , len : i32 , buf : usize , nullTerminated : bool = false ) : usize {
704+ export function encodeUnsafe ( str : usize , len : i32 , buf : usize , nullTerminated : bool = false , errorMode : ErrorMode = ErrorMode . WTF8 ) : usize {
699705 var strEnd = str + ( < usize > len << 1 ) ;
700706 var bufOff = buf ;
701707 while ( str < strEnd ) {
@@ -709,17 +715,29 @@ export namespace String {
709715 store < u16 > ( bufOff , b1 << 8 | b0 ) ;
710716 bufOff += 2 ;
711717 } else {
712- if ( ( c1 & 0xFC00 ) == 0xD800 && str + 2 < strEnd ) {
713- let c2 = < u32 > load < u16 > ( str , 2 ) ;
714- if ( ( c2 & 0xFC00 ) == 0xDC00 ) {
715- c1 = 0x10000 + ( ( c1 & 0x03FF ) << 10 ) | ( c2 & 0x03FF ) ;
716- let b0 = c1 >> 18 | 240 ;
717- let b1 = c1 >> 12 & 63 | 128 ;
718- let b2 = c1 >> 6 & 63 | 128 ;
719- let b3 = c1 & 63 | 128 ;
720- store < u32 > ( bufOff , b3 << 24 | b2 << 16 | b1 << 8 | b0 ) ;
721- bufOff += 4 ; str += 4 ;
722- continue ;
718+ // D800: 11011 0 0000000000 Lead
719+ // DBFF: 11011 0 1111111111
720+ // DC00: 11011 1 0000000000 Trail
721+ // DFFF: 11011 1 1111111111
722+ // F800: 11111 0 0000000000 Mask
723+ // FC00: 11111 1 0000000000
724+ if ( ( c1 & 0xF800 ) == 0xD800 ) {
725+ if ( c1 < 0xDC00 && str + 2 < strEnd ) {
726+ let c2 = < u32 > load < u16 > ( str , 2 ) ;
727+ if ( ( c2 & 0xFC00 ) == 0xDC00 ) {
728+ c1 = 0x10000 + ( ( c1 & 0x03FF ) << 10 ) | ( c2 & 0x03FF ) ;
729+ let b0 = c1 >> 18 | 240 ;
730+ let b1 = c1 >> 12 & 63 | 128 ;
731+ let b2 = c1 >> 6 & 63 | 128 ;
732+ let b3 = c1 & 63 | 128 ;
733+ store < u32 > ( bufOff , b3 << 24 | b2 << 16 | b1 << 8 | b0 ) ;
734+ bufOff += 4 ; str += 4 ;
735+ continue ;
736+ }
737+ }
738+ if ( errorMode != ErrorMode . WTF8 ) { // unlikely
739+ if ( errorMode == ErrorMode . ERROR ) throw new Error ( E_UNPAIRED_SURROGATE ) ;
740+ c1 = 0xFFFD ;
723741 }
724742 }
725743 let b0 = c1 >> 12 | 224 ;
0 commit comments