Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions common/CI/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ static_libs:
- llvm
- llvm-15
- llvm-bolt
- musl
- ocaml
- ocamlbuild
- opencolorio
Expand Down
4 changes: 4 additions & 0 deletions packages/m/musl/abi_symbols
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ libc.so:__fwritable
libc.so:__fwriting
libc.so:__fxstat
libc.so:__fxstatat
libc.so:__getauxval
libc.so:__h_errno_location
libc.so:__isalnum_l
libc.so:__isalpha_l
Expand Down Expand Up @@ -228,6 +229,7 @@ libc.so:cfgetospeed
libc.so:cfmakeraw
libc.so:cfsetispeed
libc.so:cfsetospeed
libc.so:cfsetspeed
libc.so:chdir
libc.so:chmod
libc.so:chown
Expand Down Expand Up @@ -857,6 +859,7 @@ libc.so:popen
libc.so:posix_close
libc.so:posix_fadvise
libc.so:posix_fallocate
libc.so:posix_getdents
libc.so:posix_madvise
libc.so:posix_memalign
libc.so:posix_openpt
Expand Down Expand Up @@ -1035,6 +1038,7 @@ libc.so:remquof
libc.so:remquol
libc.so:rename
libc.so:renameat
libc.so:renameat2
libc.so:res_init
libc.so:res_query
libc.so:res_querydomain
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,320 @@
>From 67219f0130ec7c876ac0b299046460fad31caabf Mon Sep 17 00:00:00 2001
From: Rich Felker <dalias () aerifal cx>
Date: Mon, 30 Mar 2026 16:00:50 -0400
Subject: [PATCH] fix pathological slowness & incorrect mappings in iconv
gb18030 decoder

in order to implement the "UTF" aspect of gb18030 (ability to
represent arbitrary unicode characters not present in the 2-byte
mapping), we have to apply the index obtained from the encoded 4-byte
sequence into the set of unmapped characters. this was done by
scanning repeatedly over the table of mapped characters and counting
off mapped characters below a running index by which to adjust the
running index by on each iteration. this iterative process eventually
leaves us with the value of the Nth unmapped character replacing the
index, but depending on which particular character that is, the number
of iterations needed to find it can be in the tens of thousands, and
each iteration traverses the whole 126x190 table in the inner loop.
this can lead to run times exceeding an entire second per character on
moderate-speed machines.

on top of that, the transformation logic produced wrong results for
BMP characters above the the surrogate range, as a result of not
correctly accounting for it being excluded, and for characters outside
the BMP, as a result of a misunderstanding of how gb18030 encodes
them.

this patch replaces the unmapped character lookup with a single linear
search of a list of unmapped ranges. there are only 206 such ranges,
and these are permanently assigned and unchangeable as a consequence
of the character encoding having to be stable, so a simple array of
16-bit start/length values for each range consumes only 824 bytes, a
very reasonable size cost here.

this new table accounts for the previously-incorrect surrogate
handling, and non-BMP characters are handled correctly by a single
offset, without the need for any unmapped-range search.

there are still a small number of mappings that are incorrect due to
late changes made in the definition of gb18030, swapping PUA
codepoints with proper Unicode characters. correcting these requires a
postprocessing step that will be added later.
---
src/locale/gb18030utf.h | 206 ++++++++++++++++++++++++++++++++++++++++
src/locale/iconv.c | 33 +++++--
2 files changed, 230 insertions(+), 9 deletions(-)
create mode 100644 src/locale/gb18030utf.h

diff --git a/src/locale/gb18030utf.h b/src/locale/gb18030utf.h
new file mode 100644
index 00000000..322a2440
--- /dev/null
+++ b/src/locale/gb18030utf.h
@@ -0,0 +1,206 @@
+{ 0x80, 36 },
+{ 0xa5, 2 },
+{ 0xa9, 7 },
+{ 0xb2, 5 },
+{ 0xb8, 31 },
+{ 0xd8, 8 },
+{ 0xe2, 6 },
+{ 0xeb, 1 },
+{ 0xee, 4 },
+{ 0xf4, 3 },
+{ 0xf8, 1 },
+{ 0xfb, 1 },
+{ 0xfd, 4 },
+{ 0x102, 17 },
+{ 0x114, 7 },
+{ 0x11c, 15 },
+{ 0x12c, 24 },
+{ 0x145, 3 },
+{ 0x149, 4 },
+{ 0x14e, 29 },
+{ 0x16c, 98 },
+{ 0x1cf, 1 },
+{ 0x1d1, 1 },
+{ 0x1d3, 1 },
+{ 0x1d5, 1 },
+{ 0x1d7, 1 },
+{ 0x1d9, 1 },
+{ 0x1db, 1 },
+{ 0x1dd, 28 },
+{ 0x1fa, 87 },
+{ 0x252, 15 },
+{ 0x262, 101 },
+{ 0x2c8, 1 },
+{ 0x2cc, 13 },
+{ 0x2da, 183 },
+{ 0x3a2, 1 },
+{ 0x3aa, 7 },
+{ 0x3c2, 1 },
+{ 0x3ca, 55 },
+{ 0x402, 14 },
+{ 0x450, 1 },
+{ 0x452, 7102 },
+{ 0x2011, 2 },
+{ 0x2017, 1 },
+{ 0x201a, 2 },
+{ 0x201e, 7 },
+{ 0x2027, 9 },
+{ 0x2031, 1 },
+{ 0x2034, 1 },
+{ 0x2036, 5 },
+{ 0x203c, 112 },
+{ 0x20ad, 86 },
+{ 0x2104, 1 },
+{ 0x2106, 3 },
+{ 0x210a, 12 },
+{ 0x2117, 10 },
+{ 0x2122, 62 },
+{ 0x216c, 4 },
+{ 0x217a, 22 },
+{ 0x2194, 2 },
+{ 0x219a, 110 },
+{ 0x2209, 6 },
+{ 0x2210, 1 },
+{ 0x2212, 3 },
+{ 0x2216, 4 },
+{ 0x221b, 2 },
+{ 0x2221, 2 },
+{ 0x2224, 1 },
+{ 0x2226, 1 },
+{ 0x222c, 2 },
+{ 0x222f, 5 },
+{ 0x2238, 5 },
+{ 0x223e, 10 },
+{ 0x2249, 3 },
+{ 0x224d, 5 },
+{ 0x2253, 13 },
+{ 0x2262, 2 },
+{ 0x2268, 6 },
+{ 0x2270, 37 },
+{ 0x2296, 3 },
+{ 0x229a, 11 },
+{ 0x22a6, 25 },
+{ 0x22c0, 82 },
+{ 0x2313, 333 },
+{ 0x246a, 10 },
+{ 0x249c, 100 },
+{ 0x254c, 4 },
+{ 0x2574, 13 },
+{ 0x2590, 3 },
+{ 0x2596, 10 },
+{ 0x25a2, 16 },
+{ 0x25b4, 8 },
+{ 0x25be, 8 },
+{ 0x25c8, 3 },
+{ 0x25cc, 2 },
+{ 0x25d0, 18 },
+{ 0x25e6, 31 },
+{ 0x2607, 2 },
+{ 0x260a, 54 },
+{ 0x2641, 1 },
+{ 0x2643, 2110 },
+{ 0x2e82, 2 },
+{ 0x2e85, 3 },
+{ 0x2e89, 2 },
+{ 0x2e8d, 10 },
+{ 0x2e98, 15 },
+{ 0x2ea8, 2 },
+{ 0x2eab, 3 },
+{ 0x2eaf, 4 },
+{ 0x2eb4, 2 },
+{ 0x2eb8, 3 },
+{ 0x2ebc, 14 },
+{ 0x2ecb, 293 },
+{ 0x2ffc, 4 },
+{ 0x3004, 1 },
+{ 0x3018, 5 },
+{ 0x301f, 2 },
+{ 0x302a, 20 },
+{ 0x303f, 2 },
+{ 0x3094, 7 },
+{ 0x309f, 2 },
+{ 0x30f7, 5 },
+{ 0x30ff, 6 },
+{ 0x312a, 246 },
+{ 0x322a, 7 },
+{ 0x3232, 113 },
+{ 0x32a4, 234 },
+{ 0x3390, 12 },
+{ 0x339f, 2 },
+{ 0x33a2, 34 },
+{ 0x33c5, 9 },
+{ 0x33cf, 2 },
+{ 0x33d3, 2 },
+{ 0x33d6, 113 },
+{ 0x3448, 43 },
+{ 0x3474, 298 },
+{ 0x359f, 111 },
+{ 0x360f, 11 },
+{ 0x361b, 765 },
+{ 0x3919, 85 },
+{ 0x396f, 96 },
+{ 0x39d1, 14 },
+{ 0x39e0, 147 },
+{ 0x3a74, 218 },
+{ 0x3b4f, 287 },
+{ 0x3c6f, 113 },
+{ 0x3ce1, 885 },
+{ 0x4057, 264 },
+{ 0x4160, 471 },
+{ 0x4338, 116 },
+{ 0x43ad, 4 },
+{ 0x43b2, 43 },
+{ 0x43de, 248 },
+{ 0x44d7, 373 },
+{ 0x464d, 20 },
+{ 0x4662, 193 },
+{ 0x4724, 5 },
+{ 0x472a, 82 },
+{ 0x477d, 16 },
+{ 0x478e, 441 },
+{ 0x4948, 50 },
+{ 0x497b, 2 },
+{ 0x497e, 4 },
+{ 0x4984, 1 },
+{ 0x4987, 20 },
+{ 0x499c, 3 },
+{ 0x49a0, 22 },
+{ 0x49b8, 703 },
+{ 0x4c78, 39 },
+{ 0x4ca4, 111 },
+{ 0x4d1a, 148 },
+{ 0x4daf, 81 },
+{ 0x9fa6, 14426 },
+{ 0xe76c, 1 },
+{ 0xe7c8, 1 },
+{ 0xe7e7, 13 },
+{ 0xe815, 1 },
+{ 0xe819, 5 },
+{ 0xe81f, 7 },
+{ 0xe827, 4 },
+{ 0xe82d, 4 },
+{ 0xe833, 8 },
+{ 0xe83c, 7 },
+{ 0xe844, 16 },
+{ 0xe856, 14 },
+{ 0xe865, 4295 },
+{ 0xf92d, 76 },
+{ 0xf97a, 27 },
+{ 0xf996, 81 },
+{ 0xf9e8, 9 },
+{ 0xf9f2, 26 },
+{ 0xfa10, 1 },
+{ 0xfa12, 1 },
+{ 0xfa15, 3 },
+{ 0xfa19, 6 },
+{ 0xfa22, 1 },
+{ 0xfa25, 2 },
+{ 0xfa2a, 1030 },
+{ 0xfe32, 1 },
+{ 0xfe45, 4 },
+{ 0xfe53, 1 },
+{ 0xfe58, 1 },
+{ 0xfe67, 1 },
+{ 0xfe6c, 149 },
+{ 0xff5f, 129 },
+{ 0xffe6, 26 },
diff --git a/src/locale/iconv.c b/src/locale/iconv.c
index 52178950..4151411d 100644
--- a/src/locale/iconv.c
+++ b/src/locale/iconv.c
@@ -74,6 +74,10 @@ static const unsigned short gb18030[126][190] = {
#include "gb18030.h"
};

+static const unsigned short gb18030utf[][2] = {
+#include "gb18030utf.h"
+};
+
static const unsigned short big5[89][157] = {
#include "big5.h"
};
@@ -224,6 +228,8 @@ static unsigned uni_to_jis(unsigned c)
}
}

+#define countof(a) (sizeof (a) / sizeof *(a))
+
size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restrict out, size_t *restrict outb)
{
size_t x=0;
@@ -430,15 +436,24 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri
d = *((unsigned char *)*in + 3);
if (d-'0'>9) goto ilseq;
c += d-'0';
- c += 128;
- for (d=0; d<=c; ) {
- k = 0;
- for (int i=0; i<126; i++)
- for (int j=0; j<190; j++)
- if (gb18030[i][j]-d <= c-d)
- k++;
- d = c+1;
- c += k;
+ /* Starting at 90 30 81 30 (189000), mapping is
+ * linear without gaps, to U+10000 and up. */
+ if (c >= 189000) {
+ c -= 189000;
+ c += 0x10000;
+ if (c >= 0x110000) goto ilseq;
+ break;
+ }
+ /* Otherwise we must process an index into set
+ * of characters unmapped by 2-byte table. */
+ for (int i=0; ; i++) {
+ if (i==countof(gb18030utf))
+ goto ilseq;
+ if (c<gb18030utf[i][1]) {
+ c += gb18030utf[i][0];
+ break;
+ }
+ c -= gb18030utf[i][1];
}
break;
}
--
2.21.0

1 change: 1 addition & 0 deletions packages/m/musl/files/series
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
security/0001-fix-pathological-slowness-incorrect-mappings-in-icon.patch
0001-Support-stateless-etc-shells-file.patch
7 changes: 4 additions & 3 deletions packages/m/musl/package.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# yaml-language-server: $schema=/usr/share/ypkg/schema/schema.json
name : musl
version : 1.2.5
release : 11
version : 1.2.6
release : 12
source :
- https://musl.libc.org/releases/musl-1.2.5.tar.gz : a9a118bbe84d8764da0ea0d28b3ab3fae8477fc7e4085d90102b8596fc7c75e4
- https://musl.libc.org/releases/musl-1.2.6.tar.gz : d585fd3b613c66151fc3249e8ed44f77020cb5e6c1e635a616d3f9f82460512a
license : MIT
component : programming.library
homepage : https://musl.libc.org/
Expand Down Expand Up @@ -35,3 +35,4 @@ build : |
%make
install : |
%make_install
%install_license COPYRIGHT
Loading
Loading