Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
223 changes: 190 additions & 33 deletions src/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ extern crate alloc;
use crate::bigint::BigInt;

use super::bigint::LossFraction;
use super::float::{Category, Float, RoundingMode};
use super::float::{Category, Float, RoundingMode, Status};
use core::cmp::Ordering;
use core::ops::{
Add, AddAssign, Div, DivAssign, Mul, MulAssign, Sub, SubAssign,
Expand Down Expand Up @@ -94,14 +94,25 @@ impl Float {

/// Computes a+b using the rounding mode `rm`.
pub fn add_with_rm(a: &Self, b: &Self, rm: RoundingMode) -> Self {
Self::add_sub(a, b, false, rm)
Self::add_with_status(a, b, rm).0
}
/// Computes a-b using the rounding mode `rm`.
pub fn sub_with_rm(a: &Self, b: &Self, rm: RoundingMode) -> Self {
Self::add_sub(a, b, true, rm)
Self::sub_with_status(a, b, rm).0
}

fn add_sub(a: &Self, b: &Self, subtract: bool, rm: RoundingMode) -> Self {
/// Computes a+b using the rounding mode `rm`, returning the result
/// and IEEE 754 exception status flags.
pub fn add_with_status(a: &Self, b: &Self, rm: RoundingMode) -> (Self, Status) {
Self::add_sub_with_status(a, b, false, rm)
}
/// Computes a-b using the rounding mode `rm`, returning the result
/// and IEEE 754 exception status flags.
pub fn sub_with_status(a: &Self, b: &Self, rm: RoundingMode) -> (Self, Status) {
Self::add_sub_with_status(a, b, true, rm)
}

fn add_sub_with_status(a: &Self, b: &Self, subtract: bool, rm: RoundingMode) -> (Self, Status) {
let sem = a.get_semantics();
// Table 8.2: Specification of addition for positive floating-point
// data. Pg 247.
Expand All @@ -112,35 +123,36 @@ impl Float {
| (Category::NaN, Category::Zero)
| (Category::Normal, Category::Zero)
| (Category::Infinity, Category::Normal)
| (Category::Infinity, Category::Zero) => a.clone(),
| (Category::Infinity, Category::Zero) => (a.clone(), Status::default()),

(Category::Zero, Category::NaN)
| (Category::Normal, Category::NaN)
| (Category::Infinity, Category::NaN) => {
Self::nan(sem, b.get_sign())
(Self::nan(sem, b.get_sign()), Status::default())
}

(Category::Normal, Category::Infinity)
| (Category::Zero, Category::Infinity) => {
Self::inf(sem, b.get_sign() ^ subtract)
(Self::inf(sem, b.get_sign() ^ subtract), Status::default())
}

(Category::Zero, Category::Normal) => Self::from_parts(
(Category::Zero, Category::Normal) => (Self::from_parts(
sem,
b.get_sign() ^ subtract,
b.get_exp(),
b.get_mantissa(),
),
), Status::default()),

(Category::Zero, Category::Zero) => {
Self::zero(sem, a.get_sign() && b.get_sign())
(Self::zero(sem, a.get_sign() && b.get_sign()), Status::default())
}

(Category::Infinity, Category::Infinity) => {
if a.get_sign() ^ b.get_sign() ^ subtract {
return Self::nan(sem, a.get_sign() ^ b.get_sign());
return (Self::nan(sem, a.get_sign() ^ b.get_sign()),
Status { invalid: true, ..Default::default() });
}
Self::inf(sem, a.get_sign())
(Self::inf(sem, a.get_sign()), Status::default())
}

(Category::Normal, Category::Normal) => {
Expand All @@ -151,12 +163,12 @@ impl Float {
let same_absolute_number = a.same_absolute_value(b);
if cancellation && same_absolute_number {
let is_negative = RoundingMode::Negative == rm;
return Self::zero(sem, is_negative);
return (Self::zero(sem, is_negative), Status::default());
}

let mut res = Self::add_or_sub_normals(a, b, subtract);
res.0.normalize(rm, res.1);
res.0
let status = res.0.normalize(rm, res.1);
(res.0, status)
}
}
}
Expand Down Expand Up @@ -353,6 +365,12 @@ fn test_add_random_vals() {
impl Float {
/// Compute a*b using the rounding mode `rm`.
pub fn mul_with_rm(a: &Self, b: &Self, rm: RoundingMode) -> Self {
Self::mul_with_status(a, b, rm).0
}

/// Compute a*b using the rounding mode `rm`, returning the result
/// and IEEE 754 exception status flags.
pub fn mul_with_status(a: &Self, b: &Self, rm: RoundingMode) -> (Self, Status) {
let sem = a.get_semantics();
let sign = a.get_sign() ^ b.get_sign();

Expand All @@ -362,26 +380,28 @@ impl Float {
(Category::Zero, Category::NaN)
| (Category::Normal, Category::NaN)
| (Category::Infinity, Category::NaN) => {
Self::nan(sem, b.get_sign())
(Self::nan(sem, b.get_sign()), Status::default())
}
(Category::NaN, Category::Infinity)
| (Category::NaN, Category::NaN)
| (Category::NaN, Category::Normal)
| (Category::NaN, Category::Zero) => Self::nan(sem, a.get_sign()),
| (Category::NaN, Category::Zero) => (Self::nan(sem, a.get_sign()), Status::default()),
(Category::Normal, Category::Infinity)
| (Category::Infinity, Category::Normal)
| (Category::Infinity, Category::Infinity) => Self::inf(sem, sign),
| (Category::Infinity, Category::Infinity) => (Self::inf(sem, sign), Status::default()),
(Category::Normal, Category::Zero)
| (Category::Zero, Category::Normal)
| (Category::Zero, Category::Zero) => Self::zero(sem, sign),
| (Category::Zero, Category::Zero) => (Self::zero(sem, sign), Status::default()),

(Category::Zero, Category::Infinity)
| (Category::Infinity, Category::Zero) => Self::nan(sem, sign),
| (Category::Infinity, Category::Zero) => {
(Self::nan(sem, sign), Status { invalid: true, ..Default::default() })
}

(Category::Normal, Category::Normal) => {
let (mut res, loss) = Self::mul_normals(a, b, sign);
res.normalize(rm, loss);
res
let status = res.normalize(rm, loss);
(res, status)
}
}
}
Expand Down Expand Up @@ -510,23 +530,34 @@ fn test_mul_random_vals() {
impl Float {
/// Compute a/b, with the rounding mode `rm`.
pub fn div_with_rm(a: &Self, b: &Self, rm: RoundingMode) -> Self {
Self::div_with_status(a, b, rm).0
}

/// Compute a/b with the rounding mode `rm`, returning the result
/// and IEEE 754 exception status flags.
pub fn div_with_status(a: &Self, b: &Self, rm: RoundingMode) -> (Self, Status) {
let sem = a.get_semantics();
let sign = a.get_sign() ^ b.get_sign();
// Table 8.5: Special values for x/y - Page 263.
match (a.get_category(), b.get_category()) {
(Category::NaN, _)
| (_, Category::NaN)
| (Category::Zero, Category::Zero)
| (Category::Infinity, Category::Infinity) => Self::nan(sem, sign),

(_, Category::Infinity) => Self::zero(sem, sign),
(Category::Zero, _) => Self::zero(sem, sign),
(_, Category::Zero) => Self::inf(sem, sign),
(Category::Infinity, _) => Self::inf(sem, sign),
| (_, Category::NaN) => (Self::nan(sem, sign), Status::default()),

(Category::Zero, Category::Zero)
| (Category::Infinity, Category::Infinity) => {
(Self::nan(sem, sign), Status { invalid: true, ..Default::default() })
}

(_, Category::Infinity) => (Self::zero(sem, sign), Status::default()),
(Category::Zero, _) => (Self::zero(sem, sign), Status::default()),
(_, Category::Zero) => {
(Self::inf(sem, sign), Status { divide_by_zero: true, ..Default::default() })
}
(Category::Infinity, _) => (Self::inf(sem, sign), Status::default()),
(Category::Normal, Category::Normal) => {
let (mut res, loss) = Self::div_normals(a, b);
res.normalize(rm, loss);
res
let status = res.normalize(rm, loss);
(res, status)
}
}
}
Expand Down Expand Up @@ -818,7 +849,7 @@ impl Float {
) -> Self {
if a.is_normal() && b.is_normal() && c.is_normal() {
let (mut res, loss) = Self::fused_mul_add_normals(a, b, c);
res.normalize(rm, loss); // Finally, round the result.
let _ = res.normalize(rm, loss); // Finally, round the result.
res
} else {
// Perform two operations. First, handle non-normal values.
Expand Down Expand Up @@ -942,3 +973,129 @@ fn test_fma_random_vals() {
assert!(r1.is_nan() || r0_bits == r1_bits);
}
}

#[test]
fn test_status_exact_ops() {
use super::float::Status;
let rm = RoundingMode::NearestTiesToEven;
// 1.0 + 1.0 = 2.0 exactly.
let one = Float::from_f64(1.0);
let (res, st) = Float::add_with_status(&one, &one, rm);
assert_eq!(res.as_f64(), 2.0);
assert_eq!(st, Status::default());
// 2.0 * 3.0 = 6.0 exactly.
let two = Float::from_f64(2.0);
let three = Float::from_f64(3.0);
let (res, st) = Float::mul_with_status(&two, &three, rm);
assert_eq!(res.as_f64(), 6.0);
assert_eq!(st, Status::default());
// 6.0 / 2.0 = 3.0 exactly.
let six = Float::from_f64(6.0);
let (res, st) = Float::div_with_status(&six, &two, rm);
assert_eq!(res.as_f64(), 3.0);
assert_eq!(st, Status::default());
}

#[test]
fn test_status_inexact() {
let rm = RoundingMode::NearestTiesToEven;
let one = Float::from_f64(1.0);
let three = Float::from_f64(3.0);
let (_, st) = Float::div_with_status(&one, &three, rm);
assert!(st.inexact);
assert!(!st.overflow);
assert!(!st.invalid);
assert!(!st.divide_by_zero);
}

#[test]
fn test_status_overflow() {
use super::float::FP16;
let rm = RoundingMode::NearestTiesToEven;
// FP16 max is 65504. Multiplying two large values should overflow.
let big = Float::from_u64(FP16, 60000);
let two = Float::from_u64(FP16, 2);
let (res, st) = Float::mul_with_status(&big, &two, rm);
assert!(res.is_inf());
assert!(st.overflow);
assert!(st.inexact);
}

#[test]
fn test_status_invalid() {
use super::float::FP64;
let rm = RoundingMode::NearestTiesToEven;
let sem = FP64;

// inf + (-inf) = NaN, invalid.
let pos_inf = Float::inf(sem, false);
let neg_inf = Float::inf(sem, true);
let (res, st) = Float::add_with_status(&pos_inf, &neg_inf, rm);
assert!(res.is_nan());
assert!(st.invalid);

// 0 * inf = NaN, invalid.
let zero = Float::zero(sem, false);
let (res, st) = Float::mul_with_status(&zero, &pos_inf, rm);
assert!(res.is_nan());
assert!(st.invalid);

// 0 / 0 = NaN, invalid.
let zero2 = Float::zero(sem, false);
let (res, st) = Float::div_with_status(&zero, &zero2, rm);
assert!(res.is_nan());
assert!(st.invalid);

// inf / inf = NaN, invalid.
let (res, st) = Float::div_with_status(&pos_inf, &pos_inf, rm);
assert!(res.is_nan());
assert!(st.invalid);
}

#[test]
fn test_status_divide_by_zero() {
use super::float::FP64;
let rm = RoundingMode::NearestTiesToEven;
let one = Float::from_f64(1.0);
let zero = Float::zero(FP64, false);
let (res, st) = Float::div_with_status(&one, &zero, rm);
assert!(res.is_inf());
assert!(st.divide_by_zero);
assert!(!st.inexact);
assert!(!st.invalid);
}

#[test]
fn test_status_bitor() {
use super::float::Status;
let a = Status { invalid: true, ..Default::default() };
let b = Status { inexact: true, ..Default::default() };
let c = a | b;
assert!(c.invalid);
assert!(c.inexact);
assert!(!c.overflow);

let mut d = Status::default();
d |= a;
d |= b;
assert_eq!(c, d);
}

#[test]
fn test_status_wrapper_equivalence() {
use super::float::FP64;
let rm = RoundingMode::NearestTiesToEven;
let a = Float::from_f64(1.234);
let b = Float::from_f64(5.678);

assert_eq!(Float::add_with_rm(&a, &b, rm).as_f64(),
Float::add_with_status(&a, &b, rm).0.as_f64());
assert_eq!(Float::sub_with_rm(&a, &b, rm).as_f64(),
Float::sub_with_status(&a, &b, rm).0.as_f64());
assert_eq!(Float::mul_with_rm(&a, &b, rm).as_f64(),
Float::mul_with_status(&a, &b, rm).0.as_f64());
assert_eq!(Float::div_with_rm(&a, &b, rm).as_f64(),
Float::div_with_status(&a, &b, rm).0.as_f64());
assert_eq!(a.cast_with_rm(FP64, rm).as_f64(),
a.cast_with_status(FP64, rm).0.as_f64());
}
20 changes: 14 additions & 6 deletions src/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::FP128;
use super::bigint::BigInt;
use super::bigint::LossFraction;
use super::float::{self, Category};
use super::float::{Float, RoundingMode, FP32, FP64};
use super::float::{Float, RoundingMode, Status, FP32, FP64};
use super::utils;
use super::utils::mask;

Expand All @@ -22,7 +22,7 @@ impl Float {
pub fn from_bigint(sem: Semantics, val: BigInt) -> Self {
let mut a =
Self::from_parts(sem, false, sem.get_mantissa_len() as i64, val);
a.normalize(sem.get_rounding_mode(), LossFraction::ExactlyZero);
let _ = a.normalize(sem.get_rounding_mode(), LossFraction::ExactlyZero);
a
}

Expand Down Expand Up @@ -194,6 +194,12 @@ impl Float {

/// Cast to another float using the non-default rounding mode `rm`.
pub fn cast_with_rm(&self, to: Semantics, rm: RoundingMode) -> Float {
self.cast_with_status(to, rm).0
}

/// Cast to another float using the rounding mode `rm`, returning the
/// result and IEEE 754 exception status flags.
pub fn cast_with_status(&self, to: Semantics, rm: RoundingMode) -> (Float, Status) {
let mut loss = LossFraction::ExactlyZero;
let exp_delta =
self.get_mantissa_len() as i64 - to.get_mantissa_len() as i64;
Expand All @@ -213,12 +219,14 @@ impl Float {
temp.get_category(),
);
// Don't normalize if this is a nop conversion.
if to.get_exponent_len() != self.get_exponent_len()
let status = if to.get_exponent_len() != self.get_exponent_len()
|| to.get_mantissa_len() != self.get_mantissa_len()
{
x.normalize(rm, loss);
}
x
x.normalize(rm, loss)
} else {
Status::default()
};
(x, status)
}
/// Convert from one float format to another.
pub fn cast(&self, to: Semantics) -> Float {
Expand Down
Loading