From 3c9f5ebadcc5fc83b59aa0f2802817a9a627b3ae Mon Sep 17 00:00:00 2001
From: Trevor Gross <tg@trevorgross.com>
Date: Mon, 13 Apr 2026 23:44:54 -0400
Subject: [PATCH] fuzz: Add host operations for testing x87_f80

---
 fuzz/src/host.rs | 344 ++++++++++++++++++++++++++++++++++++++++++++++-
 fuzz/src/main.rs | 137 ++++++++++++++-----
 2 files changed, 441 insertions(+), 40 deletions(-)

diff --git a/fuzz/src/host.rs b/fuzz/src/host.rs
index e6e566f..2005ab0 100644
--- a/fuzz/src/host.rs
+++ b/fuzz/src/host.rs
@@ -116,14 +116,20 @@ impl_host_float!(f128, u128);
 /// Assembly implementations on x86 which respect rounding mode.
 #[cfg(x86_sse2)]
 #[cfg(target_has_reliable_f16)]
-mod x86 {
+pub mod x86 {
+    use std::mem::offset_of;
+
+    use rustc_apfloat::{Float, ieee::X87DoubleExtended};
+
     use super::*;
 
-    /// Given a rounding mode, assembly operation, and assembly configuration,
+    /// Given a rounding mode, assembly operations, and assembly configuration, invoke the
+    /// operations within a context that has the correct rounding mode set for SSE. Returns
+    /// the status registers.
     macro_rules! with_fp_env {
         (
             $rm:ident,
-            $op:literal,
+            $($op:literal,)+
             $( $name:ident = $dir:ident($kind:ident) $val:expr $(=> $dst:ident)?, )+
         ) => {{
             let mut csr_stash = 0u32;
@@ -134,7 +140,8 @@ mod x86 {
                 "stmxcsr [{csr_stash}]",
                 // set the control state we want, clears flags
                 "ldmxcsr [{csr}]",
-                $op,
+                // Per-invocation instructions
+                $($op,)+
                 // get the new control state
                 "stmxcsr [{csr}]",
                 // restore the original control state
@@ -146,7 +153,49 @@ mod x86 {
             );
 
             check_exceptions(csr)
-        }}
+        }};
+    }
+
+    /// Same as `with_fp_env` but for the x87 environment.
+    macro_rules! with_x87_env {
+        (
+            $rm:ident,
+            $($op:literal,)+
+            $( $name:ident = $dir:ident($kind:ident) $val:expr $(=> $dst:ident)?, )+
+        ) => {{
+            let mut orig_env = X87FpEnv::default();
+            let mut env = X87FpEnv::default();
+            let cw = make_x87_cw($rm, X87Precision::Extended)?;
+
+            core::arch::asm!(
+                // We need to use fnstenv since there is no way to load only the status word. We
+                // take two copies and modify one.
+                "fnstenv [{orig_env}]",
+                "fnstenv [{env}]",
+                // Set the control word we want then clear any existing exceptions
+                "mov word ptr [{env}], {cw:x}",
+                "and word ptr [{env} + {STATUS_OFFSET}], {STATUS_MASK}",
+                // Store the modified env back
+                "fldenv [{env}]",
+                // Per-invocation instructions
+                $($op,)+
+                // Restore the original environment
+                "fldenv [{orig_env}]",
+                orig_env = in(reg) &mut orig_env,
+                env = in(reg) &mut env,
+                cw = in(reg) cw,
+                STATUS_OFFSET = const offset_of!(X87FpEnv, status),
+                STATUS_MASK = const !X87_STATUS_EXCEPTIONS,
+                $( $name = $dir($kind) $val $(=> $dst)?, )+
+                out("st(0)") _, out("st(1)") _,
+                out("st(2)") _, out("st(3)") _,
+                out("st(4)") _, out("st(5)") _,
+                out("st(6)") _, out("st(7)") _,
+                options(nostack),
+            );
+
+            check_exceptions(env.status.into())
+        }};
     }
 
     impl HostFloat for f16 {
@@ -604,11 +653,291 @@ mod x86 {
         }
     }
 
-    /// Make a control word or return `None` if the rounding mode is not supported.
+    /// Proxy for running x87 operations on the host without a builtin type.
+    #[derive(Clone, Copy)]
+    #[repr(transparent)]
+    #[allow(non_camel_case_types)]
+    pub struct x87_f80(u128);
+
+    impl fmt::Display for x87_f80 {
+        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+            // Display via apfloat.
+            X87DoubleExtended::from_bits(self.0).fmt(f)
+        }
+    }
+
+    impl fmt::Debug for x87_f80 {
+        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+            write!(f, "{:#022x}", self.0)
+        }
+    }
+
+    impl HostFloat for x87_f80 {
+        type UInt = u128;
+
+        fn from_bits(bits: Self::UInt) -> Self {
+            Self(bits)
+        }
+
+        fn to_bits(self) -> Self::UInt {
+            self.0
+        }
+
+        fn neg(mut self) -> Self {
+            unsafe {
+                core::arch::asm!(
+                    // Load the operand, flip the sign, and store it back.
+                    "fld tbyte ptr [{x}]",
+                    "fchs",
+                    "fstp tbyte ptr [{x}]",
+                    x = in(reg) &mut self,
+                    out("st(0)") _, out("st(1)") _,
+                    out("st(2)") _, out("st(3)") _,
+                    out("st(4)") _, out("st(5)") _,
+                    out("st(6)") _, out("st(7)") _,
+                    options(nostack),
+                );
+            }
+            self
+        }
+
+        fn add_r(mut self, other: Self, rm: Round) -> Option<StatusAnd<Self>> {
+            unsafe {
+                let status = with_x87_env!(
+                    rm,
+                    "fld tbyte ptr [{x}]",
+                    "fld tbyte ptr [{y}]",
+                    "faddp st(1), st",
+                    "fstp tbyte ptr [{x}]",
+                    x = in(reg) &mut self,
+                    y = in(reg) &other,
+                );
+                Some(status.and(self))
+            }
+        }
+
+        fn sub_r(mut self, other: Self, rm: Round) -> Option<StatusAnd<Self>> {
+            unsafe {
+                let status = with_x87_env!(
+                    rm,
+                    "fld tbyte ptr [{x}]",
+                    "fld tbyte ptr [{y}]",
+                    "fsubp st(1), st",
+                    "fstp tbyte ptr [{x}]",
+                    x = in(reg) &mut self,
+                    y = in(reg) &other,
+                );
+                Some(status.and(self))
+            }
+        }
+
+        fn mul_r(mut self, other: Self, rm: Round) -> Option<StatusAnd<Self>> {
+            unsafe {
+                let status = with_x87_env!(
+                    rm,
+                    "fld tbyte ptr [{x}]",
+                    "fld tbyte ptr [{y}]",
+                    "fmulp st(1), st",
+                    "fstp tbyte ptr [{x}]",
+                    x = in(reg) &mut self,
+                    y = in(reg) &other,
+                );
+                Some(status.and(self))
+            }
+        }
+
+        fn div_r(mut self, other: Self, rm: Round) -> Option<StatusAnd<Self>> {
+            unsafe {
+                let status = with_x87_env!(
+                    rm,
+                    "fld tbyte ptr [{x}]",
+                    "fld tbyte ptr [{y}]",
+                    "fdivp st(1), st",
+                    "fstp tbyte ptr [{x}]",
+                    x = in(reg) &mut self,
+                    y = in(reg) &other,
+                );
+                Some(status.and(self))
+            }
+        }
+
+        fn rem(mut self, other: Self) -> Self {
+            unsafe {
+                core::arch::asm!(
+                    "fld tbyte ptr [{y}]",
+                    "fld tbyte ptr [{x}]",
+                    // fprem must be repeated until C2 is cleared. Based on assembly from
+                    // musl fmodl.
+                    "2:",
+                        "fprem",
+                        "fnstsw ax",
+                        // Check if 0x0400 is set
+                        "test ah, 4",
+                        "jne 2b",
+                    "fstp st(1)",
+                    "fstp tbyte ptr [{x}]",
+                    x = in(reg) &mut self,
+                    y = in(reg) &other,
+                    out("ax") _,
+                    out("st(0)") _, out("st(1)") _,
+                    out("st(2)") _, out("st(3)") _,
+                    out("st(4)") _, out("st(5)") _,
+                    out("st(6)") _, out("st(7)") _,
+                    options(nostack),
+                );
+            }
+            self
+        }
+
+        fn mul_add_r(self, _mul: Self, _add: Self, _rm: Round) -> Option<StatusAnd<Self>> {
+            None
+        }
+
+        /* We could call `__fixsfti` and similar, but that may not be available or may not
+         * respect rounding modes. */
+
+        fn to_i128_r(self, _rm: Round) -> Option<StatusAnd<i128>> {
+            None
+        }
+
+        fn to_u128_r(self, _rm: Round) -> Option<StatusAnd<u128>> {
+            None
+        }
+
+        fn from_i128_r(_x: i128, _rm: Round) -> Option<StatusAnd<Self>> {
+            None
+        }
+
+        fn from_u128_r(_x: u128, _rm: Round) -> Option<StatusAnd<Self>> {
+            None
+        }
+
+        /* A load + store with the correct access sizes handles width conversions */
+
+        fn to_double_r(self, rm: Round) -> Option<StatusAnd<f64>> {
+            unsafe {
+                let mut dst: f64 = 0.0;
+                let status = with_x87_env!(
+                    rm,
+                    "fld tbyte ptr [{x}]",
+                    "fstp qword ptr [{y}]",
+                    x = in(reg) &self,
+                    y = in(reg) &mut dst,
+                );
+                Some(status.and(dst))
+            }
+        }
+
+        fn from_double_r(x: f64, rm: Round) -> Option<StatusAnd<Self>> {
+            unsafe {
+                let mut dst = x87_f80(0);
+                let status = with_x87_env!(
+                    rm,
+                    "fld qword ptr [{x}]",
+                    "fstp tbyte ptr [{y}]",
+                    x = in(reg) &x,
+                    y = in(reg) &mut dst,
+                );
+                Some(status.and(dst))
+            }
+        }
+
+        fn to_single_r(self, rm: Round) -> Option<StatusAnd<f32>> {
+            unsafe {
+                let mut dst: f32 = 0.0;
+                let status = with_x87_env!(
+                    rm,
+                    "fld tbyte ptr [{x}]",
+                    "fstp dword ptr [{y}]",
+                    x = in(reg) &self,
+                    y = in(reg) &mut dst,
+                );
+                Some(status.and(dst))
+            }
+        }
+
+        fn from_single_r(x: f32, rm: Round) -> Option<StatusAnd<Self>> {
+            unsafe {
+                let mut dst = x87_f80(0);
+                let status = with_x87_env!(
+                    rm,
+                    "fld dword ptr [{x}]",
+                    "fstp tbyte ptr [{y}]",
+                    x = in(reg) &x,
+                    y = in(reg) &mut dst,
+                );
+                Some(status.and(dst))
+            }
+        }
+    }
+
+    /// Result of fsave and input to fstenv.
+    ///
+    /// See: Intel 64 and IA-32 Architectures Software Developer's Manual Volume 1:
+    /// Basic Architecture, section 8.1.10 Saving the x87 FPU State, figure 8-9.
+    #[repr(C)]
+    #[derive(Clone, Copy, Debug, Default)]
+    struct X87FpEnv {
+        control: u16,
+        reserved0: u16,
+        status: u16,
+        reserved1: u16,
+        tag: u16,
+        reserved2: u16,
+        iptr_offset: u32,
+        iptr_sel: u16,
+        opcode: u16,
+        dptr_offset: u32,
+        dptr_sel: u32,
+        reserved3: u16,
+    }
+
+    /// Invert this mask to clear exceptions.
+    const X87_STATUS_EXCEPTIONS: u16 = 0b111111;
+
+    #[expect(unused)] // only extended is used
+    #[derive(Clone, Copy, Debug, PartialEq)]
+    enum X87Precision {
+        Single,
+        Double,
+        Extended,
+    }
+
+    /// Make an x87 control word or return `None` if the rounding mode is not supported.
+    ///
+    /// See: Intel 64 and IA-32 Architectures Software Developer's Manual Volume 1:
+    /// Basic Architecture, section 8.1.5 x87 FPU Control Word.
+    const fn make_x87_cw(round: Round, prec: X87Precision) -> Option<u16> {
+        let mut csr = 0u16;
+        // Set all exception masks so fp status doesn't turn into SIGFPE
+        csr |= 0b00111111;
+        // Reserved field is usually set by default
+        csr |= 0b01000000;
+
+        let pc = match prec {
+            X87Precision::Single => 0b00,
+            X87Precision::Double => 0b10,
+            X87Precision::Extended => 0b11,
+        };
+        csr |= pc << 8;
+
+        let rc = match round {
+            Round::NearestTiesToEven => 0b00,
+            Round::TowardNegative => 0b01,
+            Round::TowardPositive => 0b10,
+            Round::TowardZero => 0b11,
+            Round::NearestTiesToAway => return None,
+        };
+        csr |= rc << 10;
+
+        Some(csr)
+    }
+
+    /// Make a SSE control word or return `None` if the rounding mode is not supported.
     fn make_mxcsr_cw(round: Round) -> Option<u32> {
         // Default: Clear exception flags, no DAZ, no FTZ
         let mut csr = 0u32;
-        // Set all masks so fp status doesn't turn into SIGFPE
+        // Set all exception masks so fp status doesn't turn into SIGFPE
         csr |= 0b111111 << 7;
 
         let rc = match round {
@@ -623,6 +952,7 @@ mod x86 {
         Some(csr)
     }
 
+    /// Turn status flags from a register into a `Status`. Works for both x87 and SSE.
     fn check_exceptions(csr: u32) -> Status {
         let mut status = Status::OK;
 
diff --git a/fuzz/src/main.rs b/fuzz/src/main.rs
index 1f26e93..68831f4 100644
--- a/fuzz/src/main.rs
+++ b/fuzz/src/main.rs
@@ -10,6 +10,7 @@ mod host;
 use io::IsTerminal;
 use io::Read;
 use std::io;
+use std::marker::PhantomData;
 use std::path::PathBuf;
 use std::{fmt, fs};
 
@@ -202,7 +203,10 @@ macro_rules! float_reprs {
     ($($name:ident($repr:ty) {
         type RustcApFloat = $rs_apf_ty:ty;
         extern fn = $cxx_apf_eval_fuzz_op:ident;
-        $(type HardFloat = $hard_float_ty:ty;)?
+        $(
+            $(#[$($hard_float_cfg:tt)*])?
+            type HardFloat = $hard_float_ty:ty;
+        )?
     })+) => {
         macro_rules! for_each_repr {
             (for $ty_var:ident in all_reprs!() $block:block) => {
@@ -267,13 +271,18 @@ macro_rules! float_reprs {
                     status.and(Self(out))
                 }
 
-                #[allow(unused_variables)]
+                #[allow(unused)]
                 fn host_eval_fuzz_op_if_supported(
                     op: Op, rm: Round, a: Self, b: Self, c: Self
                 ) -> Option<StatusAnd<Self>> {
-                    None $(.or(
-                        Some(eval_host::<$hard_float_ty>(op, rm, a.0, b.0, c.0)?.map(Self))
-                    ))?
+                    let mut ret = None;
+                    $(
+                        $(#[$($hard_float_cfg)*])?
+                        {
+                            ret = Some(eval_host::<$hard_float_ty>(op, rm, a.0, b.0, c.0)?.map(Self));
+                        }
+                    )?
+                    ret
                 }
             }
 
@@ -345,6 +354,8 @@ float_reprs! {
     X87_F80(u128) {
         type RustcApFloat = rustc_apfloat::ieee::X87DoubleExtended;
         extern fn = cxx_apf_eval_op_x87_f80;
+        #[cfg(x86_sse2)]
+        type HardFloat = host::x86::x87_f80;
     }
 }
 
@@ -860,7 +871,7 @@ fn ignore_cxx<F: FloatRepr>(
         return None;
     }
 
-    let Masks { qnan_bit_mask, .. } = Masks::for_float::<F>();
+    let Masks { qnan_bit_mask, .. } = Masks::<F>::new();
 
     // For the F1->F2->F1 conversions where F1 and F2 are the same type, it seems like LLVM
     // doesn't actually do a conversion which means that sNaNs do not wind up quiet.
@@ -890,21 +901,20 @@ fn ignore_host<F: FloatRepr>(
         return None;
     }
 
-    let Masks {
-        sign_bit_mask,
-        exp_mask,
-        sig_mask,
-        qnan_bit_mask,
-    } = Masks::for_float::<F>();
-
-    let is_nan = |bits| {
-        let is_nan = (bits & exp_mask) == exp_mask && (bits & sig_mask) != 0;
-        assert_eq!(F::RustcApFloat::from_bits(bits).is_nan(), is_nan);
-        is_nan
-    };
+    let masks = Masks::<F>::new();
+
+    // FIXME: APFloat will implicitly normalize x87 denormals, but `fneg` is a bitwise
+    // operation on the sign bit.
+    if F::KIND == FpKind::X87_F80
+        && cfg.op == Op::Neg
+        && masks.is_x87_pseudo_subnormal(a.to_bits_u128())
+        && rs_apf_bits == (host_bits | (masks.explicit_one_mask.unwrap() << 1))
+    {
+        return Some("ignoring normalization of pseudo subnormals in `neg`");
+    }
 
     // Everything else is for handling NaNs.
-    if !(is_nan(host_bits) && is_nan(rs_apf_bits)) {
+    if !(masks.is_nan(host_bits) && masks.is_nan(rs_apf_bits)) {
         return None;
     }
 
@@ -913,7 +923,7 @@ fn ignore_host<F: FloatRepr>(
     let zero_sign_mask = if cfg.cli_strict_host_nan_sign {
         u128::MAX
     } else {
-        !sign_bit_mask
+        !masks.sign_bit_mask
     };
 
     let host_zero_sign = host_bits & zero_sign_mask;
@@ -939,14 +949,14 @@ fn ignore_host<F: FloatRepr>(
             // `INFINITY.to_bits() | qnan_bit_mask == NAN.to_bits()`,
             // i.e. seeting the QNaN is more than enough to turn
             // a non-NaN (infinities, specifically) into a NaN.
-            if !is_nan(in_bits) {
+            if !masks.is_nan(in_bits) {
                 continue;
             }
 
             // Make sure to "quiet" (i.e. turn SNaN into QNaN)
             // the input first, as propagation does (in the
             // default exception handling mode, at least).
-            let in_quiet = in_bits | qnan_bit_mask;
+            let in_quiet = in_bits | masks.qnan_bit_mask;
             let in_zero_sign = in_quiet & zero_sign_mask;
 
             if in_zero_sign == host_zero_sign {
@@ -970,10 +980,10 @@ fn ignore_host<F: FloatRepr>(
     // existing NaN, but APFloat returns the fresh default NaN instead).
     if cfg.cli_ignore_fma_nan_generate_vs_propagate {
         if cfg.op == Op::MulAdd
-            && !is_nan(a.to_bits_u128())
-            && !is_nan(b.to_bits_u128())
-            && is_nan(c.to_bits_u128())
-            && host_zero_sign == (c.to_bits_u128() | qnan_bit_mask) & zero_sign_mask
+            && !masks.is_nan(a.to_bits_u128())
+            && !masks.is_nan(b.to_bits_u128())
+            && masks.is_nan(c.to_bits_u128())
+            && host_zero_sign == (c.to_bits_u128() | masks.qnan_bit_mask) & zero_sign_mask
             && rs_apf_bits == F::RustcApFloat::NAN.to_bits()
         {
             return Some("fresh NaN from FMA");
@@ -1097,30 +1107,84 @@ fn round_to_u8(rm: Round) -> u8 {
 
 /// Masks for bitwise operations.
 #[derive(Clone, Copy, Debug)]
-struct Masks {
+struct Masks<F> {
     sign_bit_mask: u128,
     exp_mask: u128,
+    /// Significant including the explicit integer bit, if applicable.
+    #[cfg_attr(not(test), expect(unused))]
     sig_mask: u128,
+    /// Significand excluding the explicit integer bit, if applicable.
+    frac_mask: u128,
     qnan_bit_mask: u128,
+    explicit_one_mask: Option<u128>,
+    float: PhantomData<F>,
 }
 
-impl Masks {
-    fn for_float<F: FloatRepr>() -> Self {
+impl<F: FloatRepr> Masks<F> {
+    /// Note: This does not work for x87 double extended due to the explicit bit and pseudo
+    /// numbers (apfloat treats "pseudoinfinity" and "unnormlal" as NaN).
+    fn new() -> Self {
         // HACK(eddyb) to avoid putting this behind a `HasHardFloat` bound,
         // we hardcode some aspects of the IEEE binary float representation,
         // relying on `rustc_apfloat`-provided constants as a source of truth.
         let sign_bit_mask = 1 << (F::BIT_WIDTH - 1);
-        let exp_mask = F::RustcApFloat::INFINITY.to_bits();
-        let sig_mask = (1 << exp_mask.trailing_zeros()) - 1;
+        let mut exp_mask = F::RustcApFloat::INFINITY.to_bits();
+        let mut sig_mask = (1 << exp_mask.trailing_zeros()) - 1;
+        let frac_mask = sig_mask;
         let qnan_bit_mask = (sig_mask + 1) >> 1;
+        let mut explicit_one_mask = None;
+
+        if F::KIND == FpKind::X87_F80 {
+            let top = 1u128 << 63;
+            sig_mask |= top;
+            exp_mask &= !top;
+            explicit_one_mask = Some(top);
+        }
 
         Self {
             sign_bit_mask,
             exp_mask,
             sig_mask,
+            frac_mask,
             qnan_bit_mask,
+            explicit_one_mask,
+            float: PhantomData,
         }
     }
+
+    /// Check whether an input is considered a NaN using masks.
+    fn is_nan(&self, bits: u128) -> bool {
+        let apf_nan = F::RustcApFloat::from_bits(bits).is_nan();
+        let exp = bits & self.exp_mask;
+        let sig = bits & self.frac_mask;
+        let mut mask_nan = (exp == self.exp_mask) && sig != 0;
+
+        // x87 needs handling for pseudovalues, which apfloat considers NaNs but simple
+        // masks may not.
+        if F::KIND == FpKind::X87_F80 {
+            let top = bits & self.explicit_one_mask.unwrap();
+
+            // Proper x87 normals require the integer bit set
+            mask_nan &= top != 0;
+
+            // "pseudoinfinity", saturated exponent with the rest zeros
+            mask_nan |= exp == self.exp_mask && top == 0 && sig == 0;
+            // "pseudoinfinity"
+            mask_nan |= exp == self.exp_mask && top == 0 && sig != 0;
+            // "unnormal", non-saturated non-zero exponent plus zero integer bit
+            mask_nan |= exp != 0 && exp != self.exp_mask && top == 0;
+        }
+
+        assert_eq!(apf_nan, mask_nan);
+        mask_nan
+    }
+
+    fn is_x87_pseudo_subnormal(&self, bits: u128) -> bool {
+        assert_eq!(F::KIND, FpKind::X87_F80);
+        let exp = bits & self.exp_mask;
+        let top = bits & self.explicit_one_mask.unwrap();
+        exp == 0 && top != 0
+    }
 }
 
 /// Helper for printing with color.
@@ -1266,15 +1330,19 @@ mod tests {
     }
 
     fn mask_assertions<F: FloatRepr>() {
-        let masks = Masks::for_float::<F>();
+        let masks = Masks::<F>::new();
         println!("{} masks: {masks:#?}", F::NAME);
         let Masks {
             sign_bit_mask,
             exp_mask,
             sig_mask,
+            frac_mask,
             qnan_bit_mask,
+            explicit_one_mask,
+            float: _,
         } = masks;
 
+        assert_eq!(frac_mask | explicit_one_mask.unwrap_or(0), sig_mask);
         // Sanity Checks
         assert_eq!(
             sign_bit_mask | exp_mask | sig_mask,
@@ -1292,7 +1360,10 @@ mod tests {
         } else {
             assert!(qnan_bit_mask.is_power_of_two());
         }
-        assert_eq!(exp_mask | qnan_bit_mask, F::RustcApFloat::NAN.to_bits());
+        assert_eq!(
+            exp_mask | explicit_one_mask.unwrap_or(0) | qnan_bit_mask,
+            F::RustcApFloat::NAN.to_bits()
+        );
     }
 
     /* Check that `ALL` actually contains all variants. */