diff --git a/docs/B14.adoc b/docs/B14.adoc new file mode 100644 index 0000000..84ea798 --- /dev/null +++ b/docs/B14.adoc @@ -0,0 +1,151 @@ += Floating Point Model Documentation +:toc: +:toclevels: 3 +:sectnums: +:stem: latexmath + + +== B14 Multiply-Add: Shift + +Aharoni et al. + +=== Description + +This model tests every possible value for a shift between the addends of the +multiply-add (FMA) operation. For the difference between the unbiased +exponent of the addend and the unbiased exponent of the result of the +multiplication, test the following values: + +1. A value smaller than `-(2p + 1)` +2. All the values in the range `[-(2p + 1), (p + 1)]` +3. A value larger than `(p + 1)` + +*Precisions Supported:* `BF_16`, `FP_16`, `FP_32`, `FP_64`, `FP_128` + +*Operations Supported:* FMADD, FMSUB, FNMADD, FNMSUB + +*Rounding Mode:* Round-to-Nearest-Even (RNE) + +== Implementation + +=== General Implementation + +An FMA (fused multiply-add) unit computes `X = A * B + C`, where `X` is the +result, `A` and `B` are the multiplicands, and `C` is the addend. + +The shift is defined as the difference between the unbiased exponent of the +multiplication product and the unbiased exponent of the addend `C`: + +[source] +---- +S = unbiased_exp(product) - unbiased_exp(C) + = (unbiased_exp(A) + unbiased_exp(B)) - unbiased_exp(C) +---- + +In order for a processor to properly execute a fused multiply-add operation, +it must first compute the product and then align the significand of the +addend with the significand of the multiplication result (or the other way +around) so that both quantities share the same exponent before addition or +subtraction is performed. + +=== Definitions + +`p`:: `mantissa_bits + 1`, the precision +`min_u`:: Minimum unbiased exponent for the format +`max_u`:: Maximum unbiased exponent for the format +`bias`:: `(2 ^ (E - 1)) - 1`, exponent bias for the format +`A`, `B`, `C`:: The three FMA operands +`P = A * B`:: The intermediate product +`S`:: The target shift, measured as `unbiased_exp(P) - unbiased_exp(C)` + +=== General Procedure + +The generator produces a single group of test vectors per format, +corresponding directly to Aharoni's three shift regions: + +[cols="2,1,2,2",options="header"] +|=== +| Group | Aharoni Case | Shift Constraint | Vectors per format + +| Group 1 (small_diff) | 1 | `S <= -(2p + 2)` | 4 +| Group 2 (mid_diff sweep) | 2 | `S ∈ [-(2p + 1), (p + 1)]` | `(3p + 3) * 4` +| Group 3 (large_diff) | 3 | `S >= (p + 2)` | 4 +|=== + +Where `S = unbiased_exp(product) - unbiased_exp(C)` and +`p = mantissa_bits + 1`. + +For every group, the generator loops through the four FMA operations +{FMADD, FMSUB, FNMADD, FNMSUB}, and the rounding mode is fixed at RNE. The +sign, mantissa, and exponent-split choices are randomized within the +constraints of the target shift. + +== Test Implementation + +=== Group 1 — The Deep-Negative Shift (small_diff) + +*Goal:* Drive the alignment shift into the region `S <= -(2p + 2)`. This +region corresponds to the case where the addend `C` is so much larger than +the product `P` that `P` effectively becomes a sticky bit relative to `C`. + +*Intermediate Constraint:* `S <= -(2p + 2)`, achieved by pairing a +small-exponent product with a large-exponent addend. + +*Construction Method:* + +1. Given the fixed target shift `S = -(2p + 2)`, pick an unbiased addend + exponent `C_u` from the reachable range + `[max(min_u, 2*min_u - S), min(max_u, 2*max_u - S)] ∩ [min_u, max_u]`. +2. Compute the required unbiased product exponent `P_u = S + C_u`. +3. Split `P_u = A_u + B_u` with both in `[min_u, max_u]` by randomly picking + `A_u ∈ [max(min_u, P_u - max_u), min(max_u, P_u - min_u)]` and setting + `B_u = P_u - A_u`. +4. Emit the test vector with random signs and random mantissas for `A`, + `B`, `C`. + +*Total Test Cases for Group 1:* `1 shift x 4 operations = 4 vectors per format.` + +=== Group 2 — The Mid Range Sweep (mid_diff) + +*Goal:* Exercise every integer alignment shift in the range where the +addend and product have comparable magnitudes. Every integer shift +`S ∈ [-(2p + 1), (p + 1)]` is visited. + +*Intermediate Constraint:* `S` takes every value in `[-(2p + 1), (p + 1)]`. + +*Construction Method:* For each target shift `S` in the sweep and for each +of the four FMA operations, the generator applies the same construction as +Group 1. + +*Total Test Cases for Group 2:* `(3p + 3) shifts x 4 operations = (12p + 12) vectors per format.` + +=== Group 3 — The Deep-Positive Shift (large_diff) + +*Goal:* Drive the alignment shift into the region `S >= (p + 2)`. This +region corresponds to the case where the product `P` is so much larger than +the addend `C` that `C` effectively becomes a sticky bit relative to `P`. + +*Intermediate Constraint:* `S >= (p + 2)`, achieved by pairing a +large-exponent product with a small-exponent addend. + +*Construction Method:* Same as Group 1. + +*Total Test Cases for Group 3:* `1 shift x 4 operations = 4 vectors per format.` + +=== Vector Count Summary + +NOTE: There is no redundancy between groups, and no vector contributes to +more than one shift. + +[cols="1,1,1,1,1,1",options="header"] +|=== +| Precision | p | Group 1 | Group 2 | Group 3 | Total per format + +| BF16 | 8 | 4 | 108 | 4 | 116 +| F16 | 11 | 4 | 144 | 4 | 152 +| F32 | 24 | 4 | 300 | 4 | 308 +| F64 | 53 | 4 | 648 | 4 | 656 +| F128 | 113 | 4 | 1,368 | 4 | 1,376 +|=== + +*Total across 5 formats:* 2,608 vectors diff --git a/src/cover_float/testgen/B14.py b/src/cover_float/testgen/B14.py index b320a5d..cd20547 100644 --- a/src/cover_float/testgen/B14.py +++ b/src/cover_float/testgen/B14.py @@ -15,21 +15,15 @@ # By: Sisi Wang # B14.py -# Fuse Multiply Add(FMA) +# Fused Multiply-Add (FMA) # B14 -> Multiply-Add: Shift - - -# This model tests every possible value for a shift between the addends of the multiply-add operation. -# For the difference between the unbiased exponent of the addend and the unbiased exponent of the result of the -# multiplication, test the following values: -# 1.A value smaller than -(2* p + 1) -# 2.All the values in the range:[-(2*p +1), (p +1) ] -# 3.A value larger than (p + 1) - +# +# Tests every possible value of the alignment shift between the multiplication +# result and the addend in a fused multiply-add (FMA) operation. The shift is +# defined as S = unbiased_exp(A*B) - unbiased_exp(C). import logging import random -from random import seed from typing import TextIO, cast import cover_float.common.constants as const @@ -53,79 +47,72 @@ def decimalComponentsToHex(fmt: str, sign: int, biased_exp: int, mantissa: int) def generate_b14_tests(test_f: TextIO, cover_f: TextIO, fmt: str) -> None: - _p = const.MANTISSA_BITS[fmt] + 1 # defines the precision we are working with - bias = (1 << (const.EXPONENT_BITS[fmt] - 1)) - 1 # calculates the correct bias depending on fmt - min_exp = const.BIASED_EXP[fmt][0] - max_exp = const.BIASED_EXP[fmt][1] - - # Define Format-Specific Shift Limits - # This defines the sweep range [ -limit, +limit ] - # We want to cover the full range of (ExpA + ExpB) - ExpC - SHIFT_LIMITS = { - const.FMT_HALF: 31, # Max exp diff is ~30. We use 31 - const.FMT_BF16: 256, # Max exp diff is ~255. We use 256 - const.FMT_SINGLE: 256, # Max exp diff is ~255. We use 256 - const.FMT_DOUBLE: 2050, # Max exp diff is ~2047. We use 2050 - const.FMT_QUAD: 32001, # Max exp diff is ~32001 - } - - # Get the limit (default to 500 if format missing) - limit = SHIFT_LIMITS.get(fmt, 500) - - start_shift = -limit - end_shift = limit - - with logger.progress_bar(f"{fmt} Shifts", show_m_of_n=True) as pbar: - for target_shift in pbar.track(range(start_shift, end_shift + 1)): - for op in OPS: - hashval = reproducible_hash(op + fmt + "b14") # Unique hash for (op, fmt) seed - seed(hashval) # Seed the random generator for reproducibility - # Randomize & generate 15 variations per shift - for _ in range(15): - ##Part 1: Randomize a and b exponents (and make sure their product is valid) - # a: - # safe margin defined to keep 'a' somewhat central to avoid immediate overflows - safe_margin = int(max_exp / 4) - exp_a = random.randint(min_exp + safe_margin, max_exp - safe_margin) - # b: - low_bound = max(min_exp, bias - 50) - high_bound = min(max_exp, bias + 50) - # Pick 'b' near the bias (so product exponent is close to exp_a) or random. This is simplified; - # we might want more range here. - exp_b = random.randint(low_bound, high_bound) - - ##Part 2: Calculate the addends - # Calculate product exponent:Exp_Prod = Exp_A + Exp_B - Bias - exp_prod = exp_a + exp_b - bias - - # Calculate required Exp_C to hit the Target Shift: target_shift = Exp_C - Exp_Prod - exp_c = target_shift + exp_prod - - # Quick validity check -> If the calculated exp_c is invalid (too big/small), skip this variation - if exp_c < min_exp or exp_c > max_exp: - continue - - ##Part 3: Generate mantissa componenets + Assemble >:3 - # Create random signs (0 or 1) - sign_a = random.randint(0, 1) - sign_b = random.randint(0, 1) - sign_c = random.randint(0, 1) - - # Randomize mantissas -> Uses random bits to trigger different carry/rounding paths. - mant_a = random.getrandbits(const.MANTISSA_BITS[fmt]) - mant_b = random.getrandbits(const.MANTISSA_BITS[fmt]) - mant_c = random.getrandbits(const.MANTISSA_BITS[fmt]) - - # Converts the components created above to hex - hex_a = decimalComponentsToHex(fmt, sign_a, exp_a, mant_a) - hex_b = decimalComponentsToHex(fmt, sign_b, exp_b, mant_b) - hex_c = decimalComponentsToHex(fmt, sign_c, exp_c, mant_c) - - run_and_store_test_vector( - f"{op}_{const.ROUND_NEAR_EVEN}_{hex_a}_{hex_b}_{hex_c}_{fmt}_{32 * '0'}_{fmt}_00", - test_f, - cover_f, - ) + p = const.MANTISSA_BITS[fmt] + 1 + min_u, max_u = const.UNBIASED_EXP[fmt] + bias = const.BIAS[fmt] + + # Build shift list: [small_anchor] + [mid_range] + [large_anchor] + small_anchor = -(2 * p + 2) + mid_range = list(range(-(2 * p + 1), (p + 1) + 1)) + large_anchor = p + 2 + shift_list = [small_anchor, *mid_range, large_anchor] + + for op in OPS: + # Seed once per (fmt, op) pair, outside the shift loop + rng = random.Random(reproducible_hash(f"B14 {fmt} {op}")) + + for target_shift in shift_list: + # Section 5: Exponent Construction Algorithm + + # 1. Pick unbiased C exponent + c_lo = max(min_u, 2 * min_u - target_shift) + c_hi = min(max_u, 2 * max_u - target_shift) + + if c_lo > c_hi: + continue + + c_u = rng.randint(c_lo, c_hi) + + # 2. Compute unbiased product exponent + p_u = target_shift + c_u + + # 3. Split p_u into a_u + b_u + a_lo = max(min_u, p_u - max_u) + a_hi = min(max_u, p_u - min_u) + + if a_lo > a_hi: + continue + + a_u = rng.randint(a_lo, a_hi) + b_u = p_u - a_u + + # 4. Convert to biased exponents + exp_a = a_u + bias + exp_b = b_u + bias + exp_c = c_u + bias + + # Section 7: Test Vector Assembly + + # Draw random signs and mantissas + sign_a = rng.randint(0, 1) + sign_b = rng.randint(0, 1) + sign_c = rng.randint(0, 1) + + mant_a = rng.getrandbits(const.MANTISSA_BITS[fmt]) + mant_b = rng.getrandbits(const.MANTISSA_BITS[fmt]) + mant_c = rng.getrandbits(const.MANTISSA_BITS[fmt]) + + # Encode operands to hex + hex_a = decimalComponentsToHex(fmt, sign_a, exp_a, mant_a) + hex_b = decimalComponentsToHex(fmt, sign_b, exp_b, mant_b) + hex_c = decimalComponentsToHex(fmt, sign_c, exp_c, mant_c) + + # Emit test vector + run_and_store_test_vector( + f"{op}_{const.ROUND_NEAR_EVEN}_{hex_a}_{hex_b}_{hex_c}_{fmt}_{32 * '0'}_{fmt}_00", + test_f, + cover_f, + ) @register_model("B14")