Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions src/bxstring.c
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ bxstr_t *bxs_from_unicode(uint32_t *pInput)
bxstr_t *result = (bxstr_t *) calloc(1, sizeof(bxstr_t));
result->memory = u32_strdup(pInput);
result->num_chars = u32_strlen(pInput);
size_t ascii_len = ((size_t) u32_strwidth(pInput, encoding)) + 1;
size_t ascii_len = ((size_t) u32_strwidth(pInput, encoding)) + count_vs16_promotions(pInput) + 1;
result->ascii = (char *) calloc(ascii_len, sizeof(char));
size_t map_size = 5;
result->first_char = (size_t *) calloc(map_size, sizeof(size_t));
Expand Down Expand Up @@ -129,15 +129,21 @@ bxstr_t *bxs_from_unicode(uint32_t *pInput)
else {
int cols = 1;
if (is_ascii_printable(c)) {
*ascii_ptr = c & 0xff;
++ascii_ptr;
if (rest[1] == VARIATION_SELECTOR_16) {
cols = 2;
}
memset(ascii_ptr, c & 0xff, cols);
ascii_ptr += cols;
}
else if (c == char_tab) {
*ascii_ptr = ' ';
++ascii_ptr;
}
else {
cols = BMAX(0, uc_width(c, encoding));
if (cols < 2 && c != VARIATION_SELECTOR_16 && rest[1] == VARIATION_SELECTOR_16) {
cols = 2;
}
if (cols > 0) {
memset(ascii_ptr, (int) (uc_is_blank(c) ? ' ' : 'x'), cols);
ascii_ptr += cols;
Expand Down
17 changes: 13 additions & 4 deletions src/tools.c
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,7 @@ size_t count_invisible_chars(const uint32_t *s, size_t *num_esc, char **ascii, s
return 0;
}

size_t buflen = (size_t) u32_strwidth(s, encoding) + 1;
size_t buflen = (size_t) u32_strwidth(s, encoding) + count_vs16_promotions(s) + 1;
size_t map_size = BMAX((size_t) 5, buflen);
size_t map_idx = 0;
size_t *map = (size_t *) calloc(map_size, sizeof(size_t)); /* might not be enough if many double-wide chars */
Expand All @@ -517,12 +517,21 @@ size_t count_invisible_chars(const uint32_t *s, size_t *num_esc, char **ascii, s
(*num_esc)++;
}
else if (is_ascii_printable(c)) {
*p = c & 0xff;
map[map_idx++] = mb_idx;
++p;
int cols = 1;
if (rest[1] == VARIATION_SELECTOR_16) {
cols = 2;
}
memset(p, c & 0xff, cols);
for (int i = 0; i < cols; i++) {
map[map_idx++] = mb_idx;
}
p += cols;
}
else {
int cols = uc_width(c, encoding);
if (cols < 2 && c != VARIATION_SELECTOR_16 && rest[1] == VARIATION_SELECTOR_16) {
cols = 2;
}
if (cols > 0) {
memset(p, (int) 'x', cols);
for (int i = 0; i < cols; i++) {
Expand Down
12 changes: 12 additions & 0 deletions src/unicode.c
Original file line number Diff line number Diff line change
Expand Up @@ -360,4 +360,16 @@ void u32_insert_space_at(uint32_t **s, const size_t idx, const size_t n)
}


size_t count_vs16_promotions(const uint32_t *s)
{
size_t count = 0;
for (size_t i = 0; s[i] != char_nul; i++) {
if (s[i] == VARIATION_SELECTOR_16) {
count++;
}
}
return count;
}


/* vim: set cindent sw=4: */
13 changes: 13 additions & 0 deletions src/unicode.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ extern const ucs4_t char_esc;
/** ucs4_t character '\0' (zero) */
extern const ucs4_t char_nul;

/** U+FE0F Variation Selector 16 — switches preceding character to emoji presentation */
#define VARIATION_SELECTOR_16 0xFE0F


/**
* Check whether the character at the given index has the given value.
Expand Down Expand Up @@ -243,6 +246,16 @@ uint32_t *u32_strnrstr(const uint32_t *haystack, const uint32_t *needle, const s
void u32_insert_space_at(uint32_t **s, const size_t idx, const size_t n);


/**
* Count occurrences of U+FE0F (VS-16) in `s`. Used to estimate extra buffer space needed for characters promoted from
* text presentation (1 column) to emoji presentation (2 columns). May slightly overcount, which is safe for sizing.
*
* @param s the UTF-32 string to scan
* @return the number of VS-16 codepoints found
*/
size_t count_vs16_promotions(const uint32_t *s);


#endif

/* vim: set cindent sw=4: */
143 changes: 143 additions & 0 deletions utest/bxstring_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -1499,4 +1499,147 @@ void test_bxs_concat_nullarg(void **state)
}


void test_ansi_unicode_vs16_warning(void **state)
{
UNUSED(state);

/* ⚠️ = U+26A0 (WARNING SIGN) + U+FE0F (VS-16) */
uint32_t *ustr32 = u32_strconv_from_arg("\xe2\x9a\xa0\xef\xb8\x8f", "UTF-8");
assert_non_null(ustr32);
bxstr_t *actual = bxs_from_unicode(ustr32);

assert_non_null(actual);
assert_non_null(actual->memory);
assert_string_equal("xx", actual->ascii);
assert_int_equal(0, (int) actual->indent);
assert_int_equal(2, (int) actual->num_columns);
assert_int_equal(2, (int) actual->num_chars);
assert_int_equal(2, (int) actual->num_chars_visible);
assert_int_equal(0, (int) actual->num_chars_invisible);
assert_int_equal(0, (int) actual->trailing);

BFREE(ustr32);
bxs_free(actual);
}



void test_ansi_unicode_no_vs16_warning(void **state)
{
UNUSED(state);

/* ⚠ = U+26A0 (WARNING SIGN) without VS-16 */
uint32_t *ustr32 = u32_strconv_from_arg("\xe2\x9a\xa0", "UTF-8");
assert_non_null(ustr32);
bxstr_t *actual = bxs_from_unicode(ustr32);

assert_non_null(actual);
assert_non_null(actual->memory);
assert_string_equal("x", actual->ascii);
assert_int_equal(0, (int) actual->indent);
assert_int_equal(1, (int) actual->num_columns);
assert_int_equal(1, (int) actual->num_chars);
assert_int_equal(1, (int) actual->num_chars_visible);
assert_int_equal(0, (int) actual->num_chars_invisible);
assert_int_equal(0, (int) actual->trailing);

BFREE(ustr32);
bxs_free(actual);
}



void test_ansi_unicode_wide_bulb(void **state)
{
UNUSED(state);

/* 💡 = U+1F4A1 (inherently wide, East Asian Width W) */
uint32_t *ustr32 = u32_strconv_from_arg("\xf0\x9f\x92\xa1", "UTF-8");
assert_non_null(ustr32);
bxstr_t *actual = bxs_from_unicode(ustr32);

assert_non_null(actual);
assert_non_null(actual->memory);
assert_string_equal("xx", actual->ascii);
assert_int_equal(0, (int) actual->indent);
assert_int_equal(2, (int) actual->num_columns);
assert_int_equal(1, (int) actual->num_chars);
assert_int_equal(1, (int) actual->num_chars_visible);
assert_int_equal(0, (int) actual->num_chars_invisible);
assert_int_equal(0, (int) actual->trailing);

BFREE(ustr32);
bxs_free(actual);
}



void test_ansi_unicode_vs16_keycap(void **state)
{
UNUSED(state);

/* 1️⃣ = U+0031 (digit '1') + U+FE0F (VS-16) + U+20E3 (combining enclosing keycap) */
uint32_t *ustr32 = u32_strconv_from_arg("\x31\xef\xb8\x8f\xe2\x83\xa3", "UTF-8");
assert_non_null(ustr32);
bxstr_t *actual = bxs_from_unicode(ustr32);

assert_non_null(actual);
assert_non_null(actual->memory);
assert_string_equal("11", actual->ascii);
assert_int_equal(0, (int) actual->indent);
assert_int_equal(2, (int) actual->num_columns);
assert_int_equal(3, (int) actual->num_chars);
assert_int_equal(3, (int) actual->num_chars_visible);
assert_int_equal(0, (int) actual->num_chars_invisible);
assert_int_equal(0, (int) actual->trailing);

BFREE(ustr32);
bxs_free(actual);
}



void test_ansi_unicode_vs16_consecutive(void **state)
{
UNUSED(state);

/* Three consecutive VS-16 (U+FE0F U+FE0F U+FE0F) — pathological input, must not overflow */
uint32_t *ustr32 = u32_strconv_from_arg("\xef\xb8\x8f\xef\xb8\x8f\xef\xb8\x8f", "UTF-8");
assert_non_null(ustr32);
bxstr_t *actual = bxs_from_unicode(ustr32);

assert_non_null(actual);
assert_int_equal(0, (int) actual->num_columns);

BFREE(ustr32);
bxs_free(actual);
}



void test_ansi_unicode_vs16_mixed(void **state)
{
UNUSED(state);

/* "⚠️ hi 💡 ok" = U+26A0 U+FE0F ' ' 'h' 'i' ' ' U+1F4A1 ' ' 'o' 'k' */
uint32_t *ustr32 = u32_strconv_from_arg(
"\xe2\x9a\xa0\xef\xb8\x8f hi \xf0\x9f\x92\xa1 ok", "UTF-8");
assert_non_null(ustr32);
bxstr_t *actual = bxs_from_unicode(ustr32);

assert_non_null(actual);
assert_non_null(actual->memory);
assert_string_equal("xx hi xx ok", actual->ascii);
assert_int_equal(0, (int) actual->indent);
assert_int_equal(11, (int) actual->num_columns);
assert_int_equal(10, (int) actual->num_chars);
assert_int_equal(10, (int) actual->num_chars_visible);
assert_int_equal(0, (int) actual->num_chars_invisible);
assert_int_equal(0, (int) actual->trailing);

BFREE(ustr32);
bxs_free(actual);
}


/* vim: set cindent sw=4: */
7 changes: 7 additions & 0 deletions utest/bxstring_test.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,13 @@ void test_bxs_free_null(void **state);
void test_bxs_concat(void **state);
void test_bxs_concat_nullarg(void **state);

void test_ansi_unicode_vs16_warning(void **state);
void test_ansi_unicode_no_vs16_warning(void **state);
void test_ansi_unicode_wide_bulb(void **state);
void test_ansi_unicode_vs16_keycap(void **state);
void test_ansi_unicode_vs16_consecutive(void **state);
void test_ansi_unicode_vs16_mixed(void **state);


#endif

Expand Down
8 changes: 7 additions & 1 deletion utest/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,13 @@ int main(void)
cmocka_unit_test_setup(test_bxs_valid_in_filename_error, beforeTest),
cmocka_unit_test_setup(test_bxs_free_null, beforeTest),
cmocka_unit_test_setup(test_bxs_concat, beforeTest),
cmocka_unit_test_setup(test_bxs_concat_nullarg, beforeTest)
cmocka_unit_test_setup(test_bxs_concat_nullarg, beforeTest),
cmocka_unit_test_setup(test_ansi_unicode_vs16_warning, beforeTest),
cmocka_unit_test_setup(test_ansi_unicode_no_vs16_warning, beforeTest),
cmocka_unit_test_setup(test_ansi_unicode_wide_bulb, beforeTest),
cmocka_unit_test_setup(test_ansi_unicode_vs16_keycap, beforeTest),
cmocka_unit_test_setup(test_ansi_unicode_vs16_consecutive, beforeTest),
cmocka_unit_test_setup(test_ansi_unicode_vs16_mixed, beforeTest)
};

const struct CMUnitTest remove_tests[] = {
Expand Down
Loading