From 7b727534ca34061614402dcf10e97edeafe81d7a Mon Sep 17 00:00:00 2001 From: andreadellacorte Date: Wed, 13 May 2026 10:37:05 +0100 Subject: [PATCH 1/4] xm-darwin: force 32-bit HOST_WIDE_INT to match Linux i386 reference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The macOS build of cc1 produces non-bit-identical machine code vs the Linux i386 reference build for the same C input. Root cause: machmode.h derives HOST_WIDE_INT from `long`, which is 64-bit on darwin LP64 and 32-bit on i386-host Linux (where the reference is built with -m32): #if HOST_BITS_PER_LONG > HOST_BITS_PER_INT #define HOST_WIDE_INT long /* 64-bit on darwin */ #else #define HOST_WIDE_INT int /* 32-bit on i386 */ #endif cc1's internal integer-constant arithmetic (constant folding, RTL constants, shifts, sign-extension) all operate on HOST_WIDE_INT. With 64-bit width, intermediate values keep precision the source never asked for, propagating into different code generation in non-trivial ways. Override HOST_WIDE_INT to 32-bit in xm-darwin.h. cc1 itself remains a 64-bit Mach-O binary; only target-constant width is constrained, mirroring the i386-host Linux build. Host pointer/size_t semantics are unaffected. Add a regression test (tests/host_wide_int.c) that compiles a function returning `1u << 31` and greps for `# 0x80000000$` in the output. cc1 prints constants at HOST_WIDE_INT width, so a 64-bit-host build emits `# 0xffffffff80000000` (sign-extended); the test catches the divergence without needing to compare bit-exact machine code. Wired into all three macos.sh build scripts and the corresponding Linux Dockerfiles for parity. Concrete validation: rood-reverse (https://github.com/ser-pounce/rood-reverse) goes from 8 PRG mismatches in `make check` to all-match with this patch. The size of the codegen drift before the fix was substantial — e.g. MENU8.PRG/88.o was 44 bytes shorter than the Linux reference, which cascaded into VMA shifts across multiple downstream sections. --- gcc-2.7.2-cdk-macos.sh | 3 +++ gcc-2.7.2-cdk.Dockerfile | 5 +++++ gcc-2.7.2-psx-macos.sh | 5 ++++- gcc-2.7.2-psx.Dockerfile | 7 ++++++- gcc-2.8.1-psx-macos.sh | 5 ++++- gcc-2.8.1-psx.Dockerfile | 7 ++++++- patches/xm-darwin.h | 13 +++++++++++++ tests/host_wide_int.c | 3 +++ 8 files changed, 44 insertions(+), 4 deletions(-) create mode 100644 tests/host_wide_int.c diff --git a/gcc-2.7.2-cdk-macos.sh b/gcc-2.7.2-cdk-macos.sh index 0709a80..c987b01 100755 --- a/gcc-2.7.2-cdk-macos.sh +++ b/gcc-2.7.2-cdk-macos.sh @@ -82,6 +82,9 @@ make cpp cc1 xgcc cc1plus g++ \ ./cc1 -quiet -O2 "$SCRIPT_DIR/tests/little_endian.c" -o little_endian.s grep -E 'lbu\s\$2,0\(\$4\)' little_endian.s ./cc1 -quiet -O2 "$SCRIPT_DIR/tests/section_attribute.c" -o /dev/null +# Regression test for HOST_WIDE_INT width — see tests/host_wide_int.c. +./cc1 -quiet -O2 "$SCRIPT_DIR/tests/host_wide_int.c" -o host_wide_int.s +grep -E '# 0x80000000$' host_wide_int.s mkdir -p "$OUTDIR" cp cpp cc1 xgcc cc1plus g++ "$OUTDIR/" diff --git a/gcc-2.7.2-cdk.Dockerfile b/gcc-2.7.2-cdk.Dockerfile index ef2317e..a932b56 100644 --- a/gcc-2.7.2-cdk.Dockerfile +++ b/gcc-2.7.2-cdk.Dockerfile @@ -31,6 +31,11 @@ RUN make --jobs $(nproc) cpp cc1 xgcc cc1plus g++ CFLAGS="-std=gnu89 -m32 -stati COPY tests /work/tests RUN ./cc1 -quiet -O2 /work/tests/little_endian.c && grep -E 'lbu\s\$2,0\(\$4\)' /work/tests/little_endian.s RUN ./cc1 -quiet -O2 /work/tests/section_attribute.c +# Regression test for HOST_WIDE_INT width: cc1 prints constants at HOST_WIDE_INT +# width. The i386 reference build (32-bit) prints `# 0x80000000`; a 64-bit-host +# build prints `# 0xffffffff80000000`, indicating long has leaked into cc1's +# internal integer arithmetic and machine code will diverge from the reference. +RUN ./cc1 -quiet -O2 /work/tests/host_wide_int.c && grep -E '# 0x80000000$' /work/tests/host_wide_int.s RUN mv xgcc gcc RUN mkdir /build && cp cpp cc1 gcc cc1plus g++ /build/ diff --git a/gcc-2.7.2-psx-macos.sh b/gcc-2.7.2-psx-macos.sh index 31d20be..bdbbc80 100755 --- a/gcc-2.7.2-psx-macos.sh +++ b/gcc-2.7.2-psx-macos.sh @@ -71,7 +71,10 @@ make --jobs "$(sysctl -n hw.ncpu)" cpp cc1 xgcc cc1plus g++ \ ./cc1 -quiet -O2 "$SCRIPT_DIR/tests/little_endian.c" -o little_endian.s grep -E 'lbu\s\$2,0\(\$4\)' little_endian.s ./cc1 -quiet -O2 "$SCRIPT_DIR/tests/section_attribute.c" -o /dev/null -./cc1 -quiet -help &1 | grep -- -msoft-float +# Regression test for HOST_WIDE_INT width — see tests/host_wide_int.c. +./cc1 -quiet -O2 "$SCRIPT_DIR/tests/host_wide_int.c" -o host_wide_int.s +grep -E '# 0x80000000$' host_wide_int.s +./cc1 -quiet -help&1 | grep -- -msoft-float mkdir -p "$OUTDIR" cp cpp cc1 xgcc cc1plus g++ "$OUTDIR/" diff --git a/gcc-2.7.2-psx.Dockerfile b/gcc-2.7.2-psx.Dockerfile index 0cf30ef..5aa6b44 100644 --- a/gcc-2.7.2-psx.Dockerfile +++ b/gcc-2.7.2-psx.Dockerfile @@ -37,7 +37,12 @@ RUN make --jobs $(nproc) cpp cc1 xgcc cc1plus g++ CFLAGS="-std=gnu89 -m32 -stati COPY tests /work/tests RUN ./cc1 -quiet -O2 /work/tests/little_endian.c && grep -E 'lbu\s\$2,0\(\$4\)' /work/tests/little_endian.s RUN ./cc1 -quiet -O2 /work/tests/section_attribute.c -RUN ./cc1 -quiet -help &1 | grep -- -msoft-float +# Regression test for HOST_WIDE_INT width: cc1 prints constants at HOST_WIDE_INT +# width. The i386 reference build (32-bit) prints `# 0x80000000`; a 64-bit-host +# build prints `# 0xffffffff80000000`, indicating long has leaked into cc1's +# internal integer arithmetic and machine code will diverge from the reference. +RUN ./cc1 -quiet -O2 /work/tests/host_wide_int.c && grep -E '# 0x80000000$' /work/tests/host_wide_int.s +RUN ./cc1 -quiet -help&1 | grep -- -msoft-float RUN mv xgcc gcc RUN mkdir /build && cp cpp cc1 gcc cc1plus g++ /build/ diff --git a/gcc-2.8.1-psx-macos.sh b/gcc-2.8.1-psx-macos.sh index 961eca5..4cbb7f6 100755 --- a/gcc-2.8.1-psx-macos.sh +++ b/gcc-2.8.1-psx-macos.sh @@ -82,7 +82,10 @@ make --jobs "$(sysctl -n hw.ncpu)" cpp cc1 xgcc cc1plus g++ \ ./cc1 -quiet -O2 "$SCRIPT_DIR/tests/little_endian.c" -o little_endian.s grep -E 'lbu\s\$2,0\(\$4\)' little_endian.s ./cc1 -quiet -O2 "$SCRIPT_DIR/tests/section_attribute.c" -o /dev/null -./cc1 -version &1 | grep -- -msoft-float +# Regression test for HOST_WIDE_INT width — see tests/host_wide_int.c. +./cc1 -quiet -O2 "$SCRIPT_DIR/tests/host_wide_int.c" -o host_wide_int.s +grep -E '# 0x80000000$' host_wide_int.s +./cc1 -version&1 | grep -- -msoft-float ./cc1 -version &1 | grep -- -msplit-addresses ./cc1 -version &1 | grep -- -mgpopt diff --git a/gcc-2.8.1-psx.Dockerfile b/gcc-2.8.1-psx.Dockerfile index 3da13ee..102865d 100644 --- a/gcc-2.8.1-psx.Dockerfile +++ b/gcc-2.8.1-psx.Dockerfile @@ -37,7 +37,12 @@ RUN make --jobs $(nproc) cpp cc1 xgcc cc1plus g++ CFLAGS="-std=gnu89 -m32 -stati COPY tests /work/tests RUN ./cc1 -quiet -O2 /work/tests/little_endian.c && grep -E 'lbu\s\$2,0\(\$4\)' /work/tests/little_endian.s RUN ./cc1 -quiet -O2 /work/tests/section_attribute.c -RUN ./cc1 -version &1 | grep -- -msoft-float +# Regression test for HOST_WIDE_INT width: cc1 prints constants at HOST_WIDE_INT +# width. The i386 reference build (32-bit) prints `# 0x80000000`; a 64-bit-host +# build prints `# 0xffffffff80000000`, indicating long has leaked into cc1's +# internal integer arithmetic and machine code will diverge from the reference. +RUN ./cc1 -quiet -O2 /work/tests/host_wide_int.c && grep -E '# 0x80000000$' /work/tests/host_wide_int.s +RUN ./cc1 -version&1 | grep -- -msoft-float RUN ./cc1 -version &1 | grep -- -msplit-addresses RUN ./cc1 -version &1 | grep -- -mgpopt diff --git a/patches/xm-darwin.h b/patches/xm-darwin.h index 8197624..69aaeb9 100644 --- a/patches/xm-darwin.h +++ b/patches/xm-darwin.h @@ -22,4 +22,17 @@ /* macOS provides bcopy/bcmp/bzero via . */ #define BSTRING +/* Force 32-bit HOST_WIDE_INT to match the Linux i386 -m32 reference build. + Without this override, machmode.h derives HOST_WIDE_INT from `long` + (64-bit on darwin LP64), which changes cc1's internal integer-constant + arithmetic (constant folding, RTL constants, shift/sign-extend) vs the + reference Linux build. The result is non-bit-identical machine code for + the same input C — verified against rood-reverse where this caused 8 + PRGs to fail `make check`. + + cc1 remains a 64-bit Mach-O binary; only the target-constant width is + constrained, mirroring what the i386-host Linux cc1 does. */ +#define HOST_BITS_PER_WIDE_INT 32 +#define HOST_WIDE_INT int + #include "tm.h" diff --git a/tests/host_wide_int.c b/tests/host_wide_int.c new file mode 100644 index 0000000..6a20b62 --- /dev/null +++ b/tests/host_wide_int.c @@ -0,0 +1,3 @@ +unsigned int signbit_mask(void) { + return 1u << 31; +} From 74f8d9b01bb052193a457f9b0eb9f1cd110b2d48 Mon Sep 17 00:00:00 2001 From: Andrea Della Corte Date: Wed, 13 May 2026 11:14:50 +0100 Subject: [PATCH 2/4] Update gcc-2.7.2-cdk-macos.sh Co-authored-by: Luciano Ciccariello --- gcc-2.7.2-cdk-macos.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/gcc-2.7.2-cdk-macos.sh b/gcc-2.7.2-cdk-macos.sh index c987b01..91deac1 100755 --- a/gcc-2.7.2-cdk-macos.sh +++ b/gcc-2.7.2-cdk-macos.sh @@ -82,7 +82,6 @@ make cpp cc1 xgcc cc1plus g++ \ ./cc1 -quiet -O2 "$SCRIPT_DIR/tests/little_endian.c" -o little_endian.s grep -E 'lbu\s\$2,0\(\$4\)' little_endian.s ./cc1 -quiet -O2 "$SCRIPT_DIR/tests/section_attribute.c" -o /dev/null -# Regression test for HOST_WIDE_INT width — see tests/host_wide_int.c. ./cc1 -quiet -O2 "$SCRIPT_DIR/tests/host_wide_int.c" -o host_wide_int.s grep -E '# 0x80000000$' host_wide_int.s From 6b63e707304785c6c972137bef07131256b71ab6 Mon Sep 17 00:00:00 2001 From: Andrea Della Corte Date: Wed, 13 May 2026 11:14:56 +0100 Subject: [PATCH 3/4] Update gcc-2.7.2-cdk.Dockerfile Co-authored-by: Luciano Ciccariello --- gcc-2.7.2-cdk.Dockerfile | 4 ---- 1 file changed, 4 deletions(-) diff --git a/gcc-2.7.2-cdk.Dockerfile b/gcc-2.7.2-cdk.Dockerfile index a932b56..37ff7a6 100644 --- a/gcc-2.7.2-cdk.Dockerfile +++ b/gcc-2.7.2-cdk.Dockerfile @@ -31,10 +31,6 @@ RUN make --jobs $(nproc) cpp cc1 xgcc cc1plus g++ CFLAGS="-std=gnu89 -m32 -stati COPY tests /work/tests RUN ./cc1 -quiet -O2 /work/tests/little_endian.c && grep -E 'lbu\s\$2,0\(\$4\)' /work/tests/little_endian.s RUN ./cc1 -quiet -O2 /work/tests/section_attribute.c -# Regression test for HOST_WIDE_INT width: cc1 prints constants at HOST_WIDE_INT -# width. The i386 reference build (32-bit) prints `# 0x80000000`; a 64-bit-host -# build prints `# 0xffffffff80000000`, indicating long has leaked into cc1's -# internal integer arithmetic and machine code will diverge from the reference. RUN ./cc1 -quiet -O2 /work/tests/host_wide_int.c && grep -E '# 0x80000000$' /work/tests/host_wide_int.s RUN mv xgcc gcc From 1af52ffe2673c31ab4941b8e6e47417762b8fd54 Mon Sep 17 00:00:00 2001 From: andreadellacorte Date: Wed, 13 May 2026 11:17:28 +0100 Subject: [PATCH 4/4] Strip explanatory comments per review feedback --- gcc-2.7.2-psx-macos.sh | 1 - gcc-2.7.2-psx.Dockerfile | 4 ---- gcc-2.8.1-psx-macos.sh | 1 - gcc-2.8.1-psx.Dockerfile | 4 ---- patches/xm-darwin.h | 10 ---------- 5 files changed, 20 deletions(-) diff --git a/gcc-2.7.2-psx-macos.sh b/gcc-2.7.2-psx-macos.sh index bdbbc80..888e883 100755 --- a/gcc-2.7.2-psx-macos.sh +++ b/gcc-2.7.2-psx-macos.sh @@ -71,7 +71,6 @@ make --jobs "$(sysctl -n hw.ncpu)" cpp cc1 xgcc cc1plus g++ \ ./cc1 -quiet -O2 "$SCRIPT_DIR/tests/little_endian.c" -o little_endian.s grep -E 'lbu\s\$2,0\(\$4\)' little_endian.s ./cc1 -quiet -O2 "$SCRIPT_DIR/tests/section_attribute.c" -o /dev/null -# Regression test for HOST_WIDE_INT width — see tests/host_wide_int.c. ./cc1 -quiet -O2 "$SCRIPT_DIR/tests/host_wide_int.c" -o host_wide_int.s grep -E '# 0x80000000$' host_wide_int.s ./cc1 -quiet -help&1 | grep -- -msoft-float diff --git a/gcc-2.7.2-psx.Dockerfile b/gcc-2.7.2-psx.Dockerfile index 5aa6b44..7bc6bc2 100644 --- a/gcc-2.7.2-psx.Dockerfile +++ b/gcc-2.7.2-psx.Dockerfile @@ -37,10 +37,6 @@ RUN make --jobs $(nproc) cpp cc1 xgcc cc1plus g++ CFLAGS="-std=gnu89 -m32 -stati COPY tests /work/tests RUN ./cc1 -quiet -O2 /work/tests/little_endian.c && grep -E 'lbu\s\$2,0\(\$4\)' /work/tests/little_endian.s RUN ./cc1 -quiet -O2 /work/tests/section_attribute.c -# Regression test for HOST_WIDE_INT width: cc1 prints constants at HOST_WIDE_INT -# width. The i386 reference build (32-bit) prints `# 0x80000000`; a 64-bit-host -# build prints `# 0xffffffff80000000`, indicating long has leaked into cc1's -# internal integer arithmetic and machine code will diverge from the reference. RUN ./cc1 -quiet -O2 /work/tests/host_wide_int.c && grep -E '# 0x80000000$' /work/tests/host_wide_int.s RUN ./cc1 -quiet -help&1 | grep -- -msoft-float diff --git a/gcc-2.8.1-psx-macos.sh b/gcc-2.8.1-psx-macos.sh index 4cbb7f6..41f8a05 100755 --- a/gcc-2.8.1-psx-macos.sh +++ b/gcc-2.8.1-psx-macos.sh @@ -82,7 +82,6 @@ make --jobs "$(sysctl -n hw.ncpu)" cpp cc1 xgcc cc1plus g++ \ ./cc1 -quiet -O2 "$SCRIPT_DIR/tests/little_endian.c" -o little_endian.s grep -E 'lbu\s\$2,0\(\$4\)' little_endian.s ./cc1 -quiet -O2 "$SCRIPT_DIR/tests/section_attribute.c" -o /dev/null -# Regression test for HOST_WIDE_INT width — see tests/host_wide_int.c. ./cc1 -quiet -O2 "$SCRIPT_DIR/tests/host_wide_int.c" -o host_wide_int.s grep -E '# 0x80000000$' host_wide_int.s ./cc1 -version&1 | grep -- -msoft-float diff --git a/gcc-2.8.1-psx.Dockerfile b/gcc-2.8.1-psx.Dockerfile index 102865d..36cd135 100644 --- a/gcc-2.8.1-psx.Dockerfile +++ b/gcc-2.8.1-psx.Dockerfile @@ -37,10 +37,6 @@ RUN make --jobs $(nproc) cpp cc1 xgcc cc1plus g++ CFLAGS="-std=gnu89 -m32 -stati COPY tests /work/tests RUN ./cc1 -quiet -O2 /work/tests/little_endian.c && grep -E 'lbu\s\$2,0\(\$4\)' /work/tests/little_endian.s RUN ./cc1 -quiet -O2 /work/tests/section_attribute.c -# Regression test for HOST_WIDE_INT width: cc1 prints constants at HOST_WIDE_INT -# width. The i386 reference build (32-bit) prints `# 0x80000000`; a 64-bit-host -# build prints `# 0xffffffff80000000`, indicating long has leaked into cc1's -# internal integer arithmetic and machine code will diverge from the reference. RUN ./cc1 -quiet -O2 /work/tests/host_wide_int.c && grep -E '# 0x80000000$' /work/tests/host_wide_int.s RUN ./cc1 -version&1 | grep -- -msoft-float RUN ./cc1 -version &1 | grep -- -msplit-addresses diff --git a/patches/xm-darwin.h b/patches/xm-darwin.h index 69aaeb9..1df192a 100644 --- a/patches/xm-darwin.h +++ b/patches/xm-darwin.h @@ -22,16 +22,6 @@ /* macOS provides bcopy/bcmp/bzero via . */ #define BSTRING -/* Force 32-bit HOST_WIDE_INT to match the Linux i386 -m32 reference build. - Without this override, machmode.h derives HOST_WIDE_INT from `long` - (64-bit on darwin LP64), which changes cc1's internal integer-constant - arithmetic (constant folding, RTL constants, shift/sign-extend) vs the - reference Linux build. The result is non-bit-identical machine code for - the same input C — verified against rood-reverse where this caused 8 - PRGs to fail `make check`. - - cc1 remains a 64-bit Mach-O binary; only the target-constant width is - constrained, mirroring what the i386-host Linux cc1 does. */ #define HOST_BITS_PER_WIDE_INT 32 #define HOST_WIDE_INT int