nadavrot · ptomsich · Mar 29, 2026
diff --git a/src/arithmetic.rs b/src/arithmetic.rs
@@ -4,7 +4,7 @@ extern crate alloc;
 use crate::bigint::BigInt;
 
 use super::bigint::LossFraction;
-use super::float::{Category, Float, RoundingMode};
+use super::float::{Category, Float, RoundingMode, Status};
 use core::cmp::Ordering;
 use core::ops::{
     Add, AddAssign, Div, DivAssign, Mul, MulAssign, Sub, SubAssign,
@@ -94,14 +94,25 @@ impl Float {
 
     /// Computes a+b using the rounding mode `rm`.
     pub fn add_with_rm(a: &Self, b: &Self, rm: RoundingMode) -> Self {
-        Self::add_sub(a, b, false, rm)
+        Self::add_with_status(a, b, rm).0
     }
     /// Computes a-b using the rounding mode `rm`.
     pub fn sub_with_rm(a: &Self, b: &Self, rm: RoundingMode) -> Self {
-        Self::add_sub(a, b, true, rm)
+        Self::sub_with_status(a, b, rm).0
     }
 
-    fn add_sub(a: &Self, b: &Self, subtract: bool, rm: RoundingMode) -> Self {
+    /// Computes a+b using the rounding mode `rm`, returning the result
+    /// and IEEE 754 exception status flags.
+    pub fn add_with_status(a: &Self, b: &Self, rm: RoundingMode) -> (Self, Status) {
+        Self::add_sub_with_status(a, b, false, rm)
+    }
+    /// Computes a-b using the rounding mode `rm`, returning the result
+    /// and IEEE 754 exception status flags.
+    pub fn sub_with_status(a: &Self, b: &Self, rm: RoundingMode) -> (Self, Status) {
+        Self::add_sub_with_status(a, b, true, rm)
+    }
+
+    fn add_sub_with_status(a: &Self, b: &Self, subtract: bool, rm: RoundingMode) -> (Self, Status) {
         let sem = a.get_semantics();
         // Table 8.2: Specification of addition for positive floating-point
         // data. Pg 247.
@@ -112,35 +123,36 @@ impl Float {
             | (Category::NaN, Category::Zero)
             | (Category::Normal, Category::Zero)
             | (Category::Infinity, Category::Normal)
-            | (Category::Infinity, Category::Zero) => a.clone(),
+            | (Category::Infinity, Category::Zero) => (a.clone(), Status::default()),
 
             (Category::Zero, Category::NaN)
             | (Category::Normal, Category::NaN)
             | (Category::Infinity, Category::NaN) => {
-                Self::nan(sem, b.get_sign())
+                (Self::nan(sem, b.get_sign()), Status::default())
             }
 
             (Category::Normal, Category::Infinity)
             | (Category::Zero, Category::Infinity) => {
-                Self::inf(sem, b.get_sign() ^ subtract)
+                (Self::inf(sem, b.get_sign() ^ subtract), Status::default())
             }
 
-            (Category::Zero, Category::Normal) => Self::from_parts(
+            (Category::Zero, Category::Normal) => (Self::from_parts(
                 sem,
                 b.get_sign() ^ subtract,
                 b.get_exp(),
                 b.get_mantissa(),
-            ),
+            ), Status::default()),
 
             (Category::Zero, Category::Zero) => {
-                Self::zero(sem, a.get_sign() && b.get_sign())
+                (Self::zero(sem, a.get_sign() && b.get_sign()), Status::default())
             }
 
             (Category::Infinity, Category::Infinity) => {
                 if a.get_sign() ^ b.get_sign() ^ subtract {
-                    return Self::nan(sem, a.get_sign() ^ b.get_sign());
+                    return (Self::nan(sem, a.get_sign() ^ b.get_sign()),
+                            Status { invalid: true, ..Default::default() });
                 }
-                Self::inf(sem, a.get_sign())
+                (Self::inf(sem, a.get_sign()), Status::default())
             }
 
             (Category::Normal, Category::Normal) => {
@@ -151,12 +163,12 @@ impl Float {
                 let same_absolute_number = a.same_absolute_value(b);
                 if cancellation && same_absolute_number {
                     let is_negative = RoundingMode::Negative == rm;
-                    return Self::zero(sem, is_negative);
+                    return (Self::zero(sem, is_negative), Status::default());
                 }
 
                 let mut res = Self::add_or_sub_normals(a, b, subtract);
-                res.0.normalize(rm, res.1);
-                res.0
+                let status = res.0.normalize(rm, res.1);
+                (res.0, status)
             }
         }
     }
@@ -353,6 +365,12 @@ fn test_add_random_vals() {
 impl Float {
     /// Compute a*b using the rounding mode `rm`.
     pub fn mul_with_rm(a: &Self, b: &Self, rm: RoundingMode) -> Self {
+        Self::mul_with_status(a, b, rm).0
+    }
+
+    /// Compute a*b using the rounding mode `rm`, returning the result
+    /// and IEEE 754 exception status flags.
+    pub fn mul_with_status(a: &Self, b: &Self, rm: RoundingMode) -> (Self, Status) {
         let sem = a.get_semantics();
         let sign = a.get_sign() ^ b.get_sign();
 
@@ -362,26 +380,28 @@ impl Float {
             (Category::Zero, Category::NaN)
             | (Category::Normal, Category::NaN)
             | (Category::Infinity, Category::NaN) => {
-                Self::nan(sem, b.get_sign())
+                (Self::nan(sem, b.get_sign()), Status::default())
             }
             (Category::NaN, Category::Infinity)
             | (Category::NaN, Category::NaN)
             | (Category::NaN, Category::Normal)
-            | (Category::NaN, Category::Zero) => Self::nan(sem, a.get_sign()),
+            | (Category::NaN, Category::Zero) => (Self::nan(sem, a.get_sign()), Status::default()),
             (Category::Normal, Category::Infinity)
             | (Category::Infinity, Category::Normal)
-            | (Category::Infinity, Category::Infinity) => Self::inf(sem, sign),
+            | (Category::Infinity, Category::Infinity) => (Self::inf(sem, sign), Status::default()),
             (Category::Normal, Category::Zero)
             | (Category::Zero, Category::Normal)
-            | (Category::Zero, Category::Zero) => Self::zero(sem, sign),
+            | (Category::Zero, Category::Zero) => (Self::zero(sem, sign), Status::default()),
 
             (Category::Zero, Category::Infinity)
-            | (Category::Infinity, Category::Zero) => Self::nan(sem, sign),
+            | (Category::Infinity, Category::Zero) => {
+                (Self::nan(sem, sign), Status { invalid: true, ..Default::default() })
+            }
 
             (Category::Normal, Category::Normal) => {
                 let (mut res, loss) = Self::mul_normals(a, b, sign);
-                res.normalize(rm, loss);
-                res
+                let status = res.normalize(rm, loss);
+                (res, status)
             }
         }
     }
@@ -510,23 +530,34 @@ fn test_mul_random_vals() {
 impl Float {
     /// Compute a/b, with the rounding mode `rm`.
     pub fn div_with_rm(a: &Self, b: &Self, rm: RoundingMode) -> Self {
+        Self::div_with_status(a, b, rm).0
+    }
+
+    /// Compute a/b with the rounding mode `rm`, returning the result
+    /// and IEEE 754 exception status flags.
+    pub fn div_with_status(a: &Self, b: &Self, rm: RoundingMode) -> (Self, Status) {
         let sem = a.get_semantics();
         let sign = a.get_sign() ^ b.get_sign();
         // Table 8.5: Special values for x/y - Page 263.
         match (a.get_category(), b.get_category()) {
             (Category::NaN, _)
-            | (_, Category::NaN)
-            | (Category::Zero, Category::Zero)
-            | (Category::Infinity, Category::Infinity) => Self::nan(sem, sign),
-
-            (_, Category::Infinity) => Self::zero(sem, sign),
-            (Category::Zero, _) => Self::zero(sem, sign),
-            (_, Category::Zero) => Self::inf(sem, sign),
-            (Category::Infinity, _) => Self::inf(sem, sign),
+            | (_, Category::NaN) => (Self::nan(sem, sign), Status::default()),
+
+            (Category::Zero, Category::Zero)
+            | (Category::Infinity, Category::Infinity) => {
+                (Self::nan(sem, sign), Status { invalid: true, ..Default::default() })
+            }
+
+            (_, Category::Infinity) => (Self::zero(sem, sign), Status::default()),
+            (Category::Zero, _) => (Self::zero(sem, sign), Status::default()),
+            (_, Category::Zero) => {
+                (Self::inf(sem, sign), Status { divide_by_zero: true, ..Default::default() })
+            }
+            (Category::Infinity, _) => (Self::inf(sem, sign), Status::default()),
             (Category::Normal, Category::Normal) => {
                 let (mut res, loss) = Self::div_normals(a, b);
-                res.normalize(rm, loss);
-                res
+                let status = res.normalize(rm, loss);
+                (res, status)
             }
         }
     }
@@ -818,7 +849,7 @@ impl Float {
     ) -> Self {
         if a.is_normal() && b.is_normal() && c.is_normal() {
             let (mut res, loss) = Self::fused_mul_add_normals(a, b, c);
-            res.normalize(rm, loss); // Finally, round the result.
+            let _ = res.normalize(rm, loss); // Finally, round the result.
             res
         } else {
             // Perform two operations. First, handle non-normal values.
@@ -942,3 +973,129 @@ fn test_fma_random_vals() {
         assert!(r1.is_nan() || r0_bits == r1_bits);
     }
 }
+
+#[test]
+fn test_status_exact_ops() {
+    use super::float::Status;
+    let rm = RoundingMode::NearestTiesToEven;
+    // 1.0 + 1.0 = 2.0 exactly.
+    let one = Float::from_f64(1.0);
+    let (res, st) = Float::add_with_status(&one, &one, rm);
+    assert_eq!(res.as_f64(), 2.0);
+    assert_eq!(st, Status::default());
+    // 2.0 * 3.0 = 6.0 exactly.
+    let two = Float::from_f64(2.0);
+    let three = Float::from_f64(3.0);
+    let (res, st) = Float::mul_with_status(&two, &three, rm);
+    assert_eq!(res.as_f64(), 6.0);
+    assert_eq!(st, Status::default());
+    // 6.0 / 2.0 = 3.0 exactly.
+    let six = Float::from_f64(6.0);
+    let (res, st) = Float::div_with_status(&six, &two, rm);
+    assert_eq!(res.as_f64(), 3.0);
+    assert_eq!(st, Status::default());
+}
+
+#[test]
+fn test_status_inexact() {
+    let rm = RoundingMode::NearestTiesToEven;
+    let one = Float::from_f64(1.0);
+    let three = Float::from_f64(3.0);
+    let (_, st) = Float::div_with_status(&one, &three, rm);
+    assert!(st.inexact);
+    assert!(!st.overflow);
+    assert!(!st.invalid);
+    assert!(!st.divide_by_zero);
+}
+
+#[test]
+fn test_status_overflow() {
+    use super::float::FP16;
+    let rm = RoundingMode::NearestTiesToEven;
+    // FP16 max is 65504. Multiplying two large values should overflow.
+    let big = Float::from_u64(FP16, 60000);
+    let two = Float::from_u64(FP16, 2);
+    let (res, st) = Float::mul_with_status(&big, &two, rm);
+    assert!(res.is_inf());
+    assert!(st.overflow);
+    assert!(st.inexact);
+}
+
+#[test]
+fn test_status_invalid() {
+    use super::float::FP64;
+    let rm = RoundingMode::NearestTiesToEven;
+    let sem = FP64;
+
+    // inf + (-inf) = NaN, invalid.
+    let pos_inf = Float::inf(sem, false);
+    let neg_inf = Float::inf(sem, true);
+    let (res, st) = Float::add_with_status(&pos_inf, &neg_inf, rm);
+    assert!(res.is_nan());
+    assert!(st.invalid);
+
+    // 0 * inf = NaN, invalid.
+    let zero = Float::zero(sem, false);
+    let (res, st) = Float::mul_with_status(&zero, &pos_inf, rm);
+    assert!(res.is_nan());
+    assert!(st.invalid);
+
+    // 0 / 0 = NaN, invalid.
+    let zero2 = Float::zero(sem, false);
+    let (res, st) = Float::div_with_status(&zero, &zero2, rm);
+    assert!(res.is_nan());
+    assert!(st.invalid);
+
+    // inf / inf = NaN, invalid.
+    let (res, st) = Float::div_with_status(&pos_inf, &pos_inf, rm);
+    assert!(res.is_nan());
+    assert!(st.invalid);
+}
+
+#[test]
+fn test_status_divide_by_zero() {
+    use super::float::FP64;
+    let rm = RoundingMode::NearestTiesToEven;
+    let one = Float::from_f64(1.0);
+    let zero = Float::zero(FP64, false);
+    let (res, st) = Float::div_with_status(&one, &zero, rm);
+    assert!(res.is_inf());
+    assert!(st.divide_by_zero);
+    assert!(!st.inexact);
+    assert!(!st.invalid);
+}
+
+#[test]
+fn test_status_bitor() {
+    use super::float::Status;
+    let a = Status { invalid: true, ..Default::default() };
+    let b = Status { inexact: true, ..Default::default() };
+    let c = a | b;
+    assert!(c.invalid);
+    assert!(c.inexact);
+    assert!(!c.overflow);
+
+    let mut d = Status::default();
+    d |= a;
+    d |= b;
+    assert_eq!(c, d);
+}
+
+#[test]
+fn test_status_wrapper_equivalence() {
+    use super::float::FP64;
+    let rm = RoundingMode::NearestTiesToEven;
+    let a = Float::from_f64(1.234);
+    let b = Float::from_f64(5.678);
+
+    assert_eq!(Float::add_with_rm(&a, &b, rm).as_f64(),
+               Float::add_with_status(&a, &b, rm).0.as_f64());
+    assert_eq!(Float::sub_with_rm(&a, &b, rm).as_f64(),
+               Float::sub_with_status(&a, &b, rm).0.as_f64());
+    assert_eq!(Float::mul_with_rm(&a, &b, rm).as_f64(),
+               Float::mul_with_status(&a, &b, rm).0.as_f64());
+    assert_eq!(Float::div_with_rm(&a, &b, rm).as_f64(),
+               Float::div_with_status(&a, &b, rm).0.as_f64());
+    assert_eq!(a.cast_with_rm(FP64, rm).as_f64(),
+               a.cast_with_status(FP64, rm).0.as_f64());
+}
diff --git a/src/cast.rs b/src/cast.rs
@@ -6,7 +6,7 @@ use crate::FP128;
 use super::bigint::BigInt;
 use super::bigint::LossFraction;
 use super::float::{self, Category};
-use super::float::{Float, RoundingMode, FP32, FP64};
+use super::float::{Float, RoundingMode, Status, FP32, FP64};
 use super::utils;
 use super::utils::mask;
 
@@ -22,7 +22,7 @@ impl Float {
     pub fn from_bigint(sem: Semantics, val: BigInt) -> Self {
         let mut a =
             Self::from_parts(sem, false, sem.get_mantissa_len() as i64, val);
-        a.normalize(sem.get_rounding_mode(), LossFraction::ExactlyZero);
+        let _ = a.normalize(sem.get_rounding_mode(), LossFraction::ExactlyZero);
         a
     }
 
@@ -194,6 +194,12 @@ impl Float {
 
     /// Cast to another float using the non-default rounding mode `rm`.
     pub fn cast_with_rm(&self, to: Semantics, rm: RoundingMode) -> Float {
+        self.cast_with_status(to, rm).0
+    }
+
+    /// Cast to another float using the rounding mode `rm`, returning the
+    /// result and IEEE 754 exception status flags.
+    pub fn cast_with_status(&self, to: Semantics, rm: RoundingMode) -> (Float, Status) {
         let mut loss = LossFraction::ExactlyZero;
         let exp_delta =
             self.get_mantissa_len() as i64 - to.get_mantissa_len() as i64;
@@ -213,12 +219,14 @@ impl Float {
             temp.get_category(),
         );
         // Don't normalize if this is a nop conversion.
-        if to.get_exponent_len() != self.get_exponent_len()
+        let status = if to.get_exponent_len() != self.get_exponent_len()
             || to.get_mantissa_len() != self.get_mantissa_len()
         {
-            x.normalize(rm, loss);
-        }
-        x
+            x.normalize(rm, loss)
+        } else {
+            Status::default()
+        };
+        (x, status)
     }
     /// Convert from one float format to another.
     pub fn cast(&self, to: Semantics) -> Float {