From c4750b57e6a02da2812e94a17880a353bdc16240 Mon Sep 17 00:00:00 2001
From: Angela Zheng <angela20061015@gmail.com>
Date: Mon, 6 Apr 2026 14:42:53 -0700
Subject: [PATCH 1/7] B16 initial commit

---
 Makefile                            |   3 +
 src/cover_float/cli.py              |   5 +
 src/cover_float/testgen/B16.py      | 180 ++++++++++++++++++++++++++++
 src/cover_float/testgen/__init__.py |   2 +
 4 files changed, 190 insertions(+)
 create mode 100644 src/cover_float/testgen/B16.py

diff --git a/Makefile b/Makefile
index b57523d..51fa897 100644
--- a/Makefile
+++ b/Makefile
@@ -56,6 +56,9 @@ B14:
 B15:
 	uv run --managed-python cover-float-testgen --model B15
 
+B16:
+	uv run --managed-python cover-float-testgen --model B16
+
 B21:
 	uv run --managed-python cover-float-testgen --model B21
 
diff --git a/src/cover_float/cli.py b/src/cover_float/cli.py
index 9f21c64..c1093ce 100644
--- a/src/cover_float/cli.py
+++ b/src/cover_float/cli.py
@@ -82,6 +82,8 @@ def testgen() -> None:
         auto_parse("B14", args.output_dir)
         tg.B15.main()
         auto_parse("B15", args.output_dir)
+        tg.B16.main()
+        auto_parse("B16", args.output_dir)
         tg.B21.main()
         auto_parse("B21", args.output_dir)
     else:
@@ -124,6 +126,9 @@ def testgen() -> None:
         if "B15" in args.models:
             tg.B15.main()
             auto_parse("B15", args.output_dir)
+        if "B16" in args.models:
+            tg.B16.main()
+            auto_parse("B16", args.output_dir)
         if "B21" in args.models:
             tg.B21.main()
             auto_parse("B21", args.output_dir)
diff --git a/src/cover_float/testgen/B16.py b/src/cover_float/testgen/B16.py
new file mode 100644
index 0000000..2d4ae8f
--- /dev/null
+++ b/src/cover_float/testgen/B16.py
@@ -0,0 +1,180 @@
+"""
+Angela Zheng (angela20061015@gmail.com)
+
+Created:        February 10, 2026
+Last Edited:    March 4, 2026
+"""
+
+# TODO: For future: implement logic to get different a and b exponents in regular cases
+import random
+from pathlib import Path
+from random import seed
+from typing import TextIO
+
+from cover_float.common.constants import (
+    BIASED_EXP,
+    EXPONENT_BITS,
+    FLOAT_FMTS,
+    MANTISSA_BITS,
+    OP_ADD,
+    OP_SUB,
+    ROUND_NEAR_EVEN,
+)
+from cover_float.common.util import reproducible_hash
+from cover_float.reference import run_and_store_test_vector
+
+
+def decimalComponentsToHex(fmt: str, sign: int, biased_exp: int, mantissa: int) -> str:
+    """Converts binary fp components into a 32-character padded hex string."""
+    b_sign = f"{sign:01b}"
+    b_exp = f"{biased_exp:0{EXPONENT_BITS[fmt]}b}"
+    b_man = f"{mantissa:0{MANTISSA_BITS[fmt]}b}"
+    bits = b_sign + b_exp + b_man
+    return f"{int(bits, 2):032X}"
+
+
+def writeAdd(fmt: str, a_hex: str, b_hex: str, test_f: TextIO, cover_f: TextIO) -> None:
+    run_and_store_test_vector(
+        f"{OP_ADD}_{ROUND_NEAR_EVEN}_{a_hex}_{b_hex}_{32 * '0'}_{fmt}_{32 * '0'}_{fmt}_00", test_f, cover_f
+    )
+
+
+def writeSub(fmt: str, a_hex: str, b_hex: str, test_f: TextIO, cover_f: TextIO) -> None:
+    run_and_store_test_vector(
+        f"{OP_SUB}_{ROUND_NEAR_EVEN}_{a_hex}_{b_hex}_{32 * '0'}_{fmt}_{32 * '0'}_{fmt}_00", test_f, cover_f
+    )
+
+
+def makeNegPMantissas(fmt: str) -> tuple[int, int]:
+    """Create mantissas for the most extreme cancellation (d = -p)"""
+    m = MANTISSA_BITS[fmt]
+
+    a_m = 0  # A = 1.00...0 (Mantissa 0)
+    b_m = (1 << m) - 1  # B = 1.11...1 (Mantissa all 1s)
+
+    return a_m, b_m
+
+
+def makeCancellationMantissas(fmt: str, d: int) -> tuple[int, int]:
+    """Generate identical -d bits for both mantissas such that exactly -d bits cancel."""
+    m = MANTISSA_BITS[fmt]
+    k = -d
+
+    # generate identical prefixes for both operands
+    if k > 1:
+        a_prefix = random.getrandbits(k - 1) << (m - k + 1)
+        b_prefix = a_prefix
+    else:
+        a_prefix = 0
+        b_prefix = 0
+
+    diff_bit = 1 << (m - k)  # differing bit
+
+    # randomly generate tails for both operands
+    if k < (m - 1):
+        a_tail = 1 << (m - k - 2) | random.getrandbits(m - k - 2)
+        b_tail = random.getrandbits(m - k - 2)
+    else:
+        a_tail = 0
+        b_tail = 0
+
+    a_m = a_prefix | diff_bit | a_tail
+    b_m = b_prefix | b_tail
+
+    return a_m, b_m
+
+
+def makeNoCancelMantissas(fmt: str) -> tuple[int, int]:
+    """Generate mantissas that result in no bit cancellation (d = 0)"""
+    m = MANTISSA_BITS[fmt]
+
+    a_m = (1 << m) - 1
+    b_m = ((1 << (m - 1)) - 1) << 1
+
+    return a_m, b_m
+
+
+def makeCarryMantissas(fmt: str) -> tuple[int, int]:
+    """Generate mantissas that will cause a carry (d = +1)"""
+    m = MANTISSA_BITS[fmt]
+
+    a_m = (1 << m) - 1  # 1.111...111
+    b_m = a_m  # 1.111...111
+
+    return a_m, b_m
+
+
+def makeTestVectors(fmt: str, d: int, operation: str, test_f: TextIO, cover_f: TextIO) -> None:
+    m = MANTISSA_BITS[fmt]
+    p = m + 1
+    min_exp, max_exp = BIASED_EXP[fmt]
+
+    is_carry = False
+    is_add = operation == "add"
+    write_fn = writeAdd if is_add else writeSub
+
+    # Randomly generate exponents
+    a_exp = random.randint(min_exp - d + 1, max_exp)
+    b_exp = a_exp
+
+    # Generate mantissas based on d
+    if d == 1:
+        is_carry = True
+        a_m, b_m = makeCarryMantissas(fmt)
+    elif d == 0:
+        a_m, b_m = makeNoCancelMantissas(fmt)
+        b_exp -= 1
+    elif d == -p:
+        a_m, b_m = makeNegPMantissas(fmt)
+        b_exp -= 1
+    else:
+        a_m, b_m = makeCancellationMantissas(fmt, d)
+
+    # Sign assignments based on whether d is 1
+    if is_add:
+        if is_carry:
+            a_sign = 0
+            b_sign = 0
+        else:
+            a_sign = 0
+            b_sign = 1
+    else:
+        if is_carry:
+            a_sign = 0
+            b_sign = 1
+        else:
+            a_sign = 0
+            b_sign = 0
+
+    a_hex = decimalComponentsToHex(fmt, a_sign, a_exp, a_m)
+    b_hex = decimalComponentsToHex(fmt, b_sign, b_exp, b_m)
+
+    write_fn(fmt, a_hex, b_hex, test_f, cover_f)
+
+
+def CancellationTests(test_f: TextIO, cover_f: TextIO, fmt: str) -> None:
+    p = MANTISSA_BITS[fmt] + 1
+
+    for d in range(-p, 2):  # [-p, +1]
+        hashval = reproducible_hash(OP_ADD + fmt + "b12")
+        seed(hashval)
+        makeTestVectors(fmt, d, "add", test_f, cover_f)
+        hashval = reproducible_hash(OP_SUB + fmt + "b12")
+        seed(hashval)
+        makeTestVectors(fmt, d, "sub", test_f, cover_f)
+
+
+def main() -> None:
+    with (
+        Path("./tests/testvectors/B12_tv.txt").open("w") as test_f,
+        Path("./tests/covervectors/B12_cv.txt").open("w") as cover_f,
+    ):
+        test_f.write("// Cancellation tests\n")
+        test_f.write("// Operations: ADD, SUB\n")
+
+        for fmt in FLOAT_FMTS:
+            CancellationTests(test_f, cover_f, fmt)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/cover_float/testgen/__init__.py b/src/cover_float/testgen/__init__.py
index 5266b80..a842e54 100644
--- a/src/cover_float/testgen/__init__.py
+++ b/src/cover_float/testgen/__init__.py
@@ -11,6 +11,7 @@
 import cover_float.testgen.B13 as B13
 import cover_float.testgen.B14 as B14
 import cover_float.testgen.B15 as B15
+import cover_float.testgen.B16 as B16
 import cover_float.testgen.B21 as B21
 
 __all__ = [
@@ -27,5 +28,6 @@
     "B13",
     "B14",
     "B15",
+    "B16",
     "B21",
 ]

From 141874d3711daeae57104efc480cb2a1a56a51a4 Mon Sep 17 00:00:00 2001
From: Angela Zheng <angela20061015@gmail.com>
Date: Thu, 9 Apr 2026 13:06:16 -0700
Subject: [PATCH 2/7] B16 written

---
 Makefile                       |   2 +-
 docs/B16.adoc                  | 113 +++++++++++++
 src/cover_float/testgen/B16.py | 279 +++++++++++++++++++--------------
 3 files changed, 271 insertions(+), 123 deletions(-)
 create mode 100644 docs/B16.adoc

diff --git a/Makefile b/Makefile
index 51fa897..5b8bfa1 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 
 RM_CMD ?= rm -rf
 
-.PHONY: build clean sim all B1 B2 B3 B9 B10 B12 B13 B14 B15 B21
+.PHONY: build clean sim all B1 B2 B3 B9 B10 B12 B13 B14 B15 B16 B21
 
 # Notice that we pass --managed-python, we do this so that uv (scikit-build-core)
 # will have a python enviornment with Python.h to build with.
diff --git a/docs/B16.adoc b/docs/B16.adoc
new file mode 100644
index 0000000..8463c1f
--- /dev/null
+++ b/docs/B16.adoc
@@ -0,0 +1,113 @@
+= Floating Point Model Documentation
+:toc:
+:toclevels: 3
+:sectnums:
+
+== B12 Add: Cancellation
+
+Aharoni et al.
+
+=== Description
+
+This model tests every possible value for cancellation. For the difference between the exponent of the intermediate result and the maximum between the exponent of the intermediate result and the maximum between the exponents of the inputs, test all values in the range: [-p, +1].
+
+That is, for two operands `a` and `b` with exponents `a_exp` and `b_exp` and a intermediate result exponent `exp`, we test all cases where `exp` - max(`a_exp`, `b_exp`) takes each value in [-p, +1].
+
+*Number of tests generated:* 438 (4E2)
+
+*Precisions Supported:* `BF_16`, `FP_16`, `FP_32`, `FP_64`, `FP_128`
+
+*Operations Supported:* Fmadd, fmsub, fnmadd, fnmsub
+
+== Definitions
+
+`a`:: Operand 1
+`b`:: Operand 2
+`m`:: Mantissa. Number of bits in mantissa, excluding the hidden 1
+`p`:: Precision. Number of significant bits, including the hidden 1. p = m + 1
+`d`:: The difference between max(a_exp, b_exp) and exponent of the intermediate result. This is the interest of this model, where we test all d's in [-p, +1]
+`k`:: Magnitude of d. Defined as -d in the code since we deal with d = 1 separately
+`a_m`:: Mantissa of `a`
+`b_m`:: Mantissa of `b`
+`a_exp`:: Exponent of operand `a`, generally in `[min_exp, max_exp]`
+`b_exp`:: Exponent of operand `b`, generally in `[min_exp, max_exp]`
+`exp`:: Exponent of the result, generally in `[min_exp, max_exp]`
+`max_exp`:: Maximum exponent value based on precision
+`min_exp`:: Minimum exponent value based on precision
+
+== Background
+
+Cancellation occurs when the most significant digits of two operands subtract to zero, triggering a massive normalization effort as the hardware shifts out the many leading zeros. In high-performing floating point processors, a Leading Zero Anticipator (LZA) is used to predict the location of the most significant bit of the result to perform normalization. Generating operands that force specific cancellation depths thus tests the accuracy of the LZA, especially for the most difficult edge cases where the depth of cancellation is close to the precision.
+
+=== Design Choices
+
+. Even though Aharoni implied that `a_exp` and `b_exp` can be different, we defined `a_exp` and `b_exp` to be the same for most cases to make controlling cancellation easier. Since we're only interested in the difference between the exponent of the operands and the intermediate result, the difference between `a_exp` and `b_exp` should not be of main concern.
+. Mantissas for `d = -p, 0, 1` are not randomly generated but are selected based on obvious cases that produce that specific `d`. Again, since we're only interested in the exponents, we should not be concerned with generating all mantissas randomly.
+
+=== Notes
+
+All explanation in this document will be done with one instead of both operations because the only difference will be sign assignments. All exponents used in the examples are unbiased exponents.
+
+== General Procedure
+
+d = -2(p+1)
+
+== Test Count Breakdown
+
+[cols="1,1,1,1,1,1,1",options="header"]
+|===
+| Precision | p | Case 1 (`d = -p`) | Case 2 (all other `d`'s) | Case 3 (`d = 0`) | Case 4 (`d = 1`) | Total
+
+| BF_16
+| 8
+| 1
+| 7
+| 1
+| 1
+| 10
+
+| FP_16
+| 11
+| 1
+| 10
+| 1
+| 1
+| 13
+
+| FP_32
+| 24
+| 1
+| 23
+| 1
+| 1
+| 26
+
+| FP_64
+| 53
+| 1
+| 52
+| 1
+| 1
+| 55
+
+| FP_128
+| 113
+| 1
+| 112
+| 1
+| 1
+| 115
+|===
+
+=== Overall Test Count
+
+[cols="1,1",options="header"]
+|===
+| Description | Value
+
+| Total cancellation tests
+| 219
+
+| Accounting for Add and Subtract (×2)
+| 438
+|===
diff --git a/src/cover_float/testgen/B16.py b/src/cover_float/testgen/B16.py
index 2d4ae8f..d80b713 100644
--- a/src/cover_float/testgen/B16.py
+++ b/src/cover_float/testgen/B16.py
@@ -1,23 +1,22 @@
 """
 Angela Zheng (angela20061015@gmail.com)
-
-Created:        February 10, 2026
-Last Edited:    March 4, 2026
 """
 
-# TODO: For future: implement logic to get different a and b exponents in regular cases
 import random
 from pathlib import Path
 from random import seed
 from typing import TextIO
 
 from cover_float.common.constants import (
+    BIAS,
     BIASED_EXP,
     EXPONENT_BITS,
     FLOAT_FMTS,
     MANTISSA_BITS,
-    OP_ADD,
-    OP_SUB,
+    OP_FMADD,
+    OP_FMSUB,
+    OP_FNMADD,
+    OP_FNMSUB,
     ROUND_NEAR_EVEN,
 )
 from cover_float.common.util import reproducible_hash
@@ -33,147 +32,183 @@ def decimalComponentsToHex(fmt: str, sign: int, biased_exp: int, mantissa: int)
     return f"{int(bits, 2):032X}"
 
 
-def writeAdd(fmt: str, a_hex: str, b_hex: str, test_f: TextIO, cover_f: TextIO) -> None:
-    run_and_store_test_vector(
-        f"{OP_ADD}_{ROUND_NEAR_EVEN}_{a_hex}_{b_hex}_{32 * '0'}_{fmt}_{32 * '0'}_{fmt}_00", test_f, cover_f
-    )
-
-
-def writeSub(fmt: str, a_hex: str, b_hex: str, test_f: TextIO, cover_f: TextIO) -> None:
-    run_and_store_test_vector(
-        f"{OP_SUB}_{ROUND_NEAR_EVEN}_{a_hex}_{b_hex}_{32 * '0'}_{fmt}_{32 * '0'}_{fmt}_00", test_f, cover_f
-    )
-
-
-def makeNegPMantissas(fmt: str) -> tuple[int, int]:
-    """Create mantissas for the most extreme cancellation (d = -p)"""
-    m = MANTISSA_BITS[fmt]
-
-    a_m = 0  # A = 1.00...0 (Mantissa 0)
-    b_m = (1 << m) - 1  # B = 1.11...1 (Mantissa all 1s)
-
-    return a_m, b_m
-
-
-def makeCancellationMantissas(fmt: str, d: int) -> tuple[int, int]:
-    """Generate identical -d bits for both mantissas such that exactly -d bits cancel."""
-    m = MANTISSA_BITS[fmt]
-    k = -d
-
-    # generate identical prefixes for both operands
-    if k > 1:
-        a_prefix = random.getrandbits(k - 1) << (m - k + 1)
-        b_prefix = a_prefix
-    else:
-        a_prefix = 0
-        b_prefix = 0
-
-    diff_bit = 1 << (m - k)  # differing bit
-
-    # randomly generate tails for both operands
-    if k < (m - 1):
-        a_tail = 1 << (m - k - 2) | random.getrandbits(m - k - 2)
-        b_tail = random.getrandbits(m - k - 2)
-    else:
-        a_tail = 0
-        b_tail = 0
-
-    a_m = a_prefix | diff_bit | a_tail
-    b_m = b_prefix | b_tail
-
-    return a_m, b_m
-
-
-def makeNoCancelMantissas(fmt: str) -> tuple[int, int]:
-    """Generate mantissas that result in no bit cancellation (d = 0)"""
+# def makeFMATestVectors(fmt: str, d: int, op: str, test_f: TextIO, cover_f: TextIO) -> None:
+#     m = MANTISSA_BITS[fmt]
+#     p = m + 1
+#     bias = BIAS[fmt]
+#     min_exp, max_exp = BIASED_EXP[fmt]
+
+#     # Align product exponent with C
+#     a_exp = random.randint(bias, max_exp - 2)
+#     test = max_exp - a_exp
+#     print(test)
+#     b_exp = random.randint(0, test)
+#     prod_exp = a_exp + b_exp - bias
+
+#     # Determine signs for subtractive interaction
+#     a_sign = 0
+#     b_sign = 0
+#     prod_sign = a_sign ^ b_sign
+#     if op in [OP_FMADD, OP_FNMSUB]:
+#         c_sign = 1 - prod_sign
+#     else:
+#         c_sign = prod_sign
+
+#     # d = -(2p + 1) (Maximum Cancellation) ---
+#     if d == -(2 * p + 1):
+#         a_m, b_m = 0, 0
+#         c_m = (1 << m) - 1
+#         c_exp = prod_exp - 1
+
+#     # d = 1 (Carry) ---
+#     elif d == 1:
+#         c_sign = prod_sign if op in [OP_FMADD, OP_FNMSUB] else 1 - prod_sign
+#         a_m, b_m = (1 << m) - 1, (1 << m) - 1
+#         c_m = (1 << m) - 1
+#         c_exp = prod_exp
+
+#     # d = 0 (No Cancellation) ---
+#     elif d == 0:
+#         a_m, b_m = (1 << m) - 1, 0
+#         c_m = ((1 << m) - 1) & ~1
+#         c_exp = prod_exp - 1
+
+#     # other cases
+#     else:
+#         k = -d
+#         c_exp = prod_exp
+#         a_m = random.getrandbits(m)
+#         b_m = 0
+
+#         # Use max(0, ...) to prevent negative shift counts
+#         shift_amt = max(0, m - k + 1)
+#         prefix = (a_m >> shift_amt) << shift_amt
+#         c_m_prefix = prefix
+
+#         # only place bit if it's within the m-bit range
+#         bit_pos = m - k
+#         if bit_pos >= 0:
+#             diff_bit = 1 << bit_pos
+#         else:
+#             # If k > m, the difference happens in the internal "lower"
+#             # product bits. For the m-bit C, we just set a small value.
+#             diff_bit = 0
+
+#         # prevent negative shift in getrandbits
+#         tail_len = max(0, m - k - 2)
+#         if tail_len > 0:
+#             a_tail = (1 << (tail_len)) | random.getrandbits(tail_len)
+#             c_tail = random.getrandbits(tail_len)
+#         else:
+#             a_tail = 0
+#             c_tail = 0
+
+#         mask = (1 << shift_amt) - 1
+#         a_m = (a_m & ~mask) | diff_bit | a_tail
+#         c_m = c_m_prefix | c_tail
+
+#     a = decimalComponentsToHex(fmt, a_sign, a_exp, a_m)
+#     b = decimalComponentsToHex(fmt, b_sign, b_exp, b_m)
+#     c = decimalComponentsToHex(fmt, c_sign, c_exp, c_m)
+
+#     run_and_store_test_vector(
+#         f"{OP_FMADD}_{ROUND_NEAR_EVEN}_{a}_{b}_{c}_{fmt}_{32 * '0'}_{fmt}_00", test_f, cover_f
+#     )
+#     run_and_store_test_vector(
+#         f"{OP_FMSUB}_{ROUND_NEAR_EVEN}_{a}_{b}_{c}_{fmt}_{32 * '0'}_{fmt}_00", test_f, cover_f
+#     )
+#     run_and_store_test_vector(
+#         f"{OP_FNMADD}_{ROUND_NEAR_EVEN}_{a}_{b}_{c}_{fmt}_{32 * '0'}_{fmt}_00", test_f, cover_f
+#     )
+#     run_and_store_test_vector(
+#         f"{OP_FNMSUB}_{ROUND_NEAR_EVEN}_{a}_{b}_{c}_{fmt}_{32 * '0'}_{fmt}_00", test_f, cover_f
+#     )
+
+
+def makeFMATestVectors(fmt: str, d: int, op: str, test_f: TextIO, cover_f: TextIO) -> None:
     m = MANTISSA_BITS[fmt]
+    p = m + 1
+    bias = BIAS[fmt]
+    min_exp, max_exp = BIASED_EXP[fmt]
 
-    a_m = (1 << m) - 1
-    b_m = ((1 << (m - 1)) - 1) << 1
+    a_exp = random.randint(min_exp, max_exp - 2)
+    b_exp = random.randint(min_exp, max_exp - 2)
 
-    return a_m, b_m
+    a_m = random.getrandbits(m)
+    b_m = random.getrandbits(m)
 
+    # Approximate product exponent
+    prod_exp = a_exp + b_exp - bias
 
-def makeCarryMantissas(fmt: str) -> tuple[int, int]:
-    """Generate mantissas that will cause a carry (d = +1)"""
-    m = MANTISSA_BITS[fmt]
+    prod_m = random.getrandbits(m)
 
-    a_m = (1 << m) - 1  # 1.111...111
-    b_m = a_m  # 1.111...111
+    a_sign = random.getrandbits(1)
+    b_sign = random.getrandbits(1)
+    prod_sign = a_sign ^ b_sign
 
-    return a_m, b_m
+    c_sign = 1 - prod_sign if op in [OP_FMADD, OP_FNMSUB] else prod_sign
 
+    # control cancellation depth:
+    k = -d  # number of bits that cancel
 
-def makeTestVectors(fmt: str, d: int, operation: str, test_f: TextIO, cover_f: TextIO) -> None:
-    m = MANTISSA_BITS[fmt]
-    p = m + 1
-    min_exp, max_exp = BIASED_EXP[fmt]
+    c_exp = prod_exp
 
-    is_carry = False
-    is_add = operation == "add"
-    write_fn = writeAdd if is_add else writeSub
+    if d == 1:
+        # Carry case
+        c_sign = prod_sign
+        c_m = (1 << m) - 1
 
-    # Randomly generate exponents
-    a_exp = random.randint(min_exp - d + 1, max_exp)
-    b_exp = a_exp
+    elif d == -(2 * p + 1):
+        # Maximum cancellation → almost equal
+        c_exp = prod_exp - 1
+        c_m = prod_m ^ 1  # tiny diff
 
-    # Generate mantissas based on d
-    if d == 1:
-        is_carry = True
-        a_m, b_m = makeCarryMantissas(fmt)
-    elif d == 0:
-        a_m, b_m = makeNoCancelMantissas(fmt)
-        b_exp -= 1
-    elif d == -p:
-        a_m, b_m = makeNegPMantissas(fmt)
-        b_exp -= 1
     else:
-        a_m, b_m = makeCancellationMantissas(fmt, d)
+        # General case
+        if k <= m:
+            # Match top k bits
+            prefix_mask = ((1 << k) - 1) << (m - k)
+            prefix = prod_m & prefix_mask
 
-    # Sign assignments based on whether d is 1
-    if is_add:
-        if is_carry:
-            a_sign = 0
-            b_sign = 0
-        else:
-            a_sign = 0
-            b_sign = 1
-    else:
-        if is_carry:
-            a_sign = 0
-            b_sign = 1
-        else:
-            a_sign = 0
-            b_sign = 0
+            # Flip next bit
+            flip_pos = m - k - 1
+            flip_bit = 1 << flip_pos if flip_pos >= 0 else 0
 
-    a_hex = decimalComponentsToHex(fmt, a_sign, a_exp, a_m)
-    b_hex = decimalComponentsToHex(fmt, b_sign, b_exp, b_m)
+            # Random tail
+            tail_len = max(0, flip_pos)
+            tail = random.getrandbits(tail_len) if tail_len > 0 else 0
 
-    write_fn(fmt, a_hex, b_hex, test_f, cover_f)
+            c_m = prefix | flip_bit | tail
 
+        else:
+            # Deep cancellation beyond mantissa
+            c_m = prod_m
+            c_exp = prod_exp - (k - m)
 
-def CancellationTests(test_f: TextIO, cover_f: TextIO, fmt: str) -> None:
-    p = MANTISSA_BITS[fmt] + 1
+    a = decimalComponentsToHex(fmt, a_sign, a_exp, a_m)
+    b = decimalComponentsToHex(fmt, b_sign, b_exp, b_m)
+    c = decimalComponentsToHex(fmt, c_sign, c_exp, c_m)
 
-    for d in range(-p, 2):  # [-p, +1]
-        hashval = reproducible_hash(OP_ADD + fmt + "b12")
-        seed(hashval)
-        makeTestVectors(fmt, d, "add", test_f, cover_f)
-        hashval = reproducible_hash(OP_SUB + fmt + "b12")
-        seed(hashval)
-        makeTestVectors(fmt, d, "sub", test_f, cover_f)
+    for opcode in [OP_FMADD, OP_FMSUB, OP_FNMADD, OP_FNMSUB]:
+        run_and_store_test_vector(
+            f"{opcode}_{ROUND_NEAR_EVEN}_{a}_{b}_{c}_{fmt}_{32 * '0'}_{fmt}_00",
+            test_f,
+            cover_f,
+        )
 
 
 def main() -> None:
     with (
-        Path("./tests/testvectors/B12_tv.txt").open("w") as test_f,
-        Path("./tests/covervectors/B12_cv.txt").open("w") as cover_f,
+        Path("./tests/testvectors/B16_tv.txt").open("w") as test_f,
+        Path("./tests/covervectors/B16_cv.txt").open("w") as cover_f,
     ):
-        test_f.write("// Cancellation tests\n")
-        test_f.write("// Operations: ADD, SUB\n")
-
         for fmt in FLOAT_FMTS:
-            CancellationTests(test_f, cover_f, fmt)
+            p = MANTISSA_BITS[fmt] + 1
+            # Range adjusted for FMA: [-(2*p + 1), 1]
+            for d in range(-(2 * p + 1), 2):
+                for op in [OP_FMADD, OP_FMSUB, OP_FNMADD, OP_FNMSUB]:
+                    seed(reproducible_hash(f"{fmt}_b16_{d}_{op}"))
+                    makeFMATestVectors(fmt, d, op, test_f, cover_f)
 
 
 if __name__ == "__main__":

From d85fd9ad7193da84111d2613c2a4b8444da87faf Mon Sep 17 00:00:00 2001
From: Angela Zheng <angela20061015@gmail.com>
Date: Mon, 27 Apr 2026 19:35:41 -0700
Subject: [PATCH 3/7] B16 98% coverage

---
 docs/B16.adoc                  |  17 +-
 src/cover_float/testgen/B16.py | 362 +++++++++++++++++++--------------
 2 files changed, 220 insertions(+), 159 deletions(-)

diff --git a/docs/B16.adoc b/docs/B16.adoc
index 8463c1f..ed1320c 100644
--- a/docs/B16.adoc
+++ b/docs/B16.adoc
@@ -41,6 +41,8 @@ Cancellation occurs when the most significant digits of two operands subtract to
 
 === Design Choices
 
+. If a_exp and b_exp are both negative, their sum must not exceed the lower exponent bound. This means that a_exp + b_exp > min_exp
+. If a_exp and b_exp are both positive, their sum must not exceed the upper exponent bound. This means that a_exp + b_exp < max_exp
 . Even though Aharoni implied that `a_exp` and `b_exp` can be different, we defined `a_exp` and `b_exp` to be the same for most cases to make controlling cancellation easier. Since we're only interested in the difference between the exponent of the operands and the intermediate result, the difference between `a_exp` and `b_exp` should not be of main concern.
 . Mantissas for `d = -p, 0, 1` are not randomly generated but are selected based on obvious cases that produce that specific `d`. Again, since we're only interested in the exponents, we should not be concerned with generating all mantissas randomly.
 
@@ -49,9 +51,22 @@ Cancellation occurs when the most significant digits of two operands subtract to
 All explanation in this document will be done with one instead of both operations because the only difference will be sign assignments. All exponents used in the examples are unbiased exponents.
 
 == General Procedure
-
+We need to ensure that the product exponent $(a_{raw} + b_{raw})$ is high enough that even at the most extreme cancellation $(d = -(2p+1))$, the result stays above the subnormal floor ($min\_raw$). Conversely, it must be low enough that at $d=1$, we don't overflow $max\_raw$.
 d = -2(p+1)
 
+Two situations: ab_exp is max, or c_exp is max. When we multiply a, b, the intermediate product is 2p bits wide
+
+product of a and b cannot be too different from desired, otherwise c would dominate and cancellation would be 0
+
+Unlike B2, we cannot use softfloat to generate desired result because ab has 2p precision and softfloat will remove that and round it to p.
+
+For single precision, the `c` only has `m` bits of precision (23 bits for f32). It can only cancel the top $m$ bits of the product $A \times B$. If you want a cancellation depth of 40, bits 24 through 39 of $A \times B$ must be exactly zero. If we use purely random mantissas for $A$ and $B$, their cross-multiplication will fill that gap with random noise, $C$ won't be able to reach deep enough to cancel it, and your depth will permanently collapse back to $\approx -24$.
+
+Case 1: d < 2m, then we have to generate subnormal numbers, so a_raw and b_raw both need to be the smallest exponent possible
+Case 2: other d, current random generation we have
+Case 3: d = 0: We make the exponent of c larger than ab_exp, so the result can be the same exp as c
+Case 4: d = 1: Again we make the exponent of c larger than ab_exp, and force c to be the largest mantissa so the result definitely carries
+
 == Test Count Breakdown
 
 [cols="1,1,1,1,1,1,1",options="header"]
diff --git a/src/cover_float/testgen/B16.py b/src/cover_float/testgen/B16.py
index d80b713..8b757df 100644
--- a/src/cover_float/testgen/B16.py
+++ b/src/cover_float/testgen/B16.py
@@ -1,5 +1,21 @@
 """
 Angela Zheng (angela20061015@gmail.com)
+
+B16. Multiply-Add: Cancellation
+This model tests every possible value for cancellation.
+For the difference between the exponent of the intermediate result and the
+maximum between the exponents of the addend and the multiplication result,
+test all values in the range:
+ [-(2 * p + 1), 1].
+
+My plan:
+For each of the fmadd, fmsub, fnmadd, fnmsub operations:
+
+We must ensure that a_exp is the largest exp out of the
+three operands because with +c alone would only be able to cancel -p. So,
+randomly generate a_exp, and generate b_exp (would probably be negative) so that b_exp = d
+and make c_exp = a_exp + b_exp and generate a_m, b_m, and c_m so that they don't result in carry or
+more cancellation.
 """
 
 import random
@@ -9,7 +25,6 @@
 
 from cover_float.common.constants import (
     BIAS,
-    BIASED_EXP,
     EXPONENT_BITS,
     FLOAT_FMTS,
     MANTISSA_BITS,
@@ -17,184 +32,202 @@
     OP_FMSUB,
     OP_FNMADD,
     OP_FNMSUB,
-    ROUND_NEAR_EVEN,
+    OP_MUL,
+    UNBIASED_EXP,
+)
+from cover_float.common.util import (
+    decimal_components_to_hex,
+    generate_test_vector,
+    get_result_from_ref,
+    reproducible_hash,
 )
-from cover_float.common.util import reproducible_hash
 from cover_float.reference import run_and_store_test_vector
 
+OPS = [OP_FMADD, OP_FMSUB, OP_FNMADD, OP_FNMSUB]
+SOLVER_OPS = {
+    OP_FMADD: OP_FNMSUB,
+    OP_FMSUB: OP_FMSUB,
+    OP_FNMADD: OP_FNMADD,
+    OP_FNMSUB: OP_FMADD,
+}
+
+
+def extract_unbiased_exp(fp_hex: str, fmt: str) -> int:
+    bits = int(fp_hex, 16)
+    exp_bits = EXPONENT_BITS[fmt]
+    mant_bits = MANTISSA_BITS[fmt]
+    bias = BIAS[fmt]
+    exp_mask = (1 << exp_bits) - 1
+    exp = (bits >> mant_bits) & exp_mask
+    return exp - bias
+
+
+def generate_deep_cancel(fmt: str, d: int, op: str, test_f: TextIO, cover_f: TextIO) -> bool:
+    bias = BIAS[fmt]
+    min_raw, max_raw = UNBIASED_EXP[fmt]
+
+    a_sign, b_sign = random.randint(0, 1), random.randint(0, 1)
+
+    # Force Result to EXACTLY 0. Zero has an exponent of (min_raw - 1)
+    res_raw = min_raw - 1
+    res_m = 0
+    res_sign = 0
+    res_hex = decimal_components_to_hex(fmt, res_sign, res_raw + bias, res_m)
+
+    # Calculate required product exponent to achieve depth d relative to 0
+    target_prod_exp = res_raw - d
+
+    a_raw_min = max(min_raw, target_prod_exp - max_raw)
+    a_raw_max = min(max_raw, target_prod_exp - min_raw)
+
+    if a_raw_min > a_raw_max:
+        return False
+
+    a_raw = random.randint(a_raw_min, a_raw_max)
+    b_raw = target_prod_exp - a_raw
+
+    # Keep mantissas 0 so A*B is exactly representable, guaranteeing the solver succeeds
+    a_m, b_m = 0, 0
+
+    a_hex = decimal_components_to_hex(fmt, a_sign, a_raw + bias, a_m)
+    b_hex = decimal_components_to_hex(fmt, b_sign, b_raw + bias, b_m)
+
+    try:
+        c_hex = get_result_from_ref(SOLVER_OPS[op], a_hex, b_hex, res_hex, fmt)
+        vector = generate_test_vector(op, int(a_hex, 16), int(b_hex, 16), int(c_hex, 16), fmt, fmt)
+        run_and_store_test_vector(vector, test_f, cover_f)
+        return True
+    except Exception:
+        return False
+
+
+# Maybe see whether ab_exp is greater than c_exp
+# force c_exp to be greater than ab_exp
+def generate_same_exp(fmt: str, d: int, op: str, test_f: TextIO, cover_f: TextIO) -> bool:
+    m = MANTISSA_BITS[fmt]
+    bias = BIAS[fmt]
+    max_raw = UNBIASED_EXP[fmt][1]
+
+    a_sign, b_sign = random.randint(0, 1), random.randint(0, 1)
+    # res_sign = a_sign ^ b_sign
+    c_sign = (a_sign ^ b_sign) if op in [OP_FMADD, OP_FNMADD] else not (a_sign ^ b_sign)
+
+    # We have to make sure c_exp is the greatest, so a_exp and b_exp must both be positive
+    target_prod_exp = random.randint(0, max_raw)
+    a_raw = random.randint(0, target_prod_exp)
+    b_raw = target_prod_exp - a_raw
+    c_raw = a_raw + b_raw + 3
+
+    a_m, b_m, c_m = random.getrandbits(m), random.getrandbits(m), random.getrandbits(m)
+
+    a_hex = decimal_components_to_hex(fmt, a_sign, a_raw + bias, a_m)
+    b_hex = decimal_components_to_hex(fmt, b_sign, b_raw + bias, b_m)
+
+    # prod_hex = get_result_from_ref(OP_MUL, a_hex, b_hex, "0", fmt)
+    # ab_exp = extract_unbiased_exp(prod_hex, fmt)
+    # c_raw = ab_exp + 2
+
+    # if not (min_raw <= res_raw <= max_raw):
+    #     return False
+
+    # res_hex = decimal_components_to_hex(fmt, res_sign, res_raw + bias, res_m)
+
+    # c_hex = get_result_from_ref(SOLVER_OPS[op], a_hex, b_hex, res_hex, fmt)
+    c_hex = decimal_components_to_hex(fmt, c_sign, c_raw + bias, c_m)
 
-def decimalComponentsToHex(fmt: str, sign: int, biased_exp: int, mantissa: int) -> str:
-    """Converts binary fp components into a 32-character padded hex string."""
-    b_sign = f"{sign:01b}"
-    b_exp = f"{biased_exp:0{EXPONENT_BITS[fmt]}b}"
-    b_man = f"{mantissa:0{MANTISSA_BITS[fmt]}b}"
-    bits = b_sign + b_exp + b_man
-    return f"{int(bits, 2):032X}"
-
-
-# def makeFMATestVectors(fmt: str, d: int, op: str, test_f: TextIO, cover_f: TextIO) -> None:
-#     m = MANTISSA_BITS[fmt]
-#     p = m + 1
-#     bias = BIAS[fmt]
-#     min_exp, max_exp = BIASED_EXP[fmt]
-
-#     # Align product exponent with C
-#     a_exp = random.randint(bias, max_exp - 2)
-#     test = max_exp - a_exp
-#     print(test)
-#     b_exp = random.randint(0, test)
-#     prod_exp = a_exp + b_exp - bias
-
-#     # Determine signs for subtractive interaction
-#     a_sign = 0
-#     b_sign = 0
-#     prod_sign = a_sign ^ b_sign
-#     if op in [OP_FMADD, OP_FNMSUB]:
-#         c_sign = 1 - prod_sign
-#     else:
-#         c_sign = prod_sign
-
-#     # d = -(2p + 1) (Maximum Cancellation) ---
-#     if d == -(2 * p + 1):
-#         a_m, b_m = 0, 0
-#         c_m = (1 << m) - 1
-#         c_exp = prod_exp - 1
-
-#     # d = 1 (Carry) ---
-#     elif d == 1:
-#         c_sign = prod_sign if op in [OP_FMADD, OP_FNMSUB] else 1 - prod_sign
-#         a_m, b_m = (1 << m) - 1, (1 << m) - 1
-#         c_m = (1 << m) - 1
-#         c_exp = prod_exp
-
-#     # d = 0 (No Cancellation) ---
-#     elif d == 0:
-#         a_m, b_m = (1 << m) - 1, 0
-#         c_m = ((1 << m) - 1) & ~1
-#         c_exp = prod_exp - 1
-
-#     # other cases
-#     else:
-#         k = -d
-#         c_exp = prod_exp
-#         a_m = random.getrandbits(m)
-#         b_m = 0
-
-#         # Use max(0, ...) to prevent negative shift counts
-#         shift_amt = max(0, m - k + 1)
-#         prefix = (a_m >> shift_amt) << shift_amt
-#         c_m_prefix = prefix
-
-#         # only place bit if it's within the m-bit range
-#         bit_pos = m - k
-#         if bit_pos >= 0:
-#             diff_bit = 1 << bit_pos
-#         else:
-#             # If k > m, the difference happens in the internal "lower"
-#             # product bits. For the m-bit C, we just set a small value.
-#             diff_bit = 0
-
-#         # prevent negative shift in getrandbits
-#         tail_len = max(0, m - k - 2)
-#         if tail_len > 0:
-#             a_tail = (1 << (tail_len)) | random.getrandbits(tail_len)
-#             c_tail = random.getrandbits(tail_len)
-#         else:
-#             a_tail = 0
-#             c_tail = 0
-
-#         mask = (1 << shift_amt) - 1
-#         a_m = (a_m & ~mask) | diff_bit | a_tail
-#         c_m = c_m_prefix | c_tail
-
-#     a = decimalComponentsToHex(fmt, a_sign, a_exp, a_m)
-#     b = decimalComponentsToHex(fmt, b_sign, b_exp, b_m)
-#     c = decimalComponentsToHex(fmt, c_sign, c_exp, c_m)
-
-#     run_and_store_test_vector(
-#         f"{OP_FMADD}_{ROUND_NEAR_EVEN}_{a}_{b}_{c}_{fmt}_{32 * '0'}_{fmt}_00", test_f, cover_f
-#     )
-#     run_and_store_test_vector(
-#         f"{OP_FMSUB}_{ROUND_NEAR_EVEN}_{a}_{b}_{c}_{fmt}_{32 * '0'}_{fmt}_00", test_f, cover_f
-#     )
-#     run_and_store_test_vector(
-#         f"{OP_FNMADD}_{ROUND_NEAR_EVEN}_{a}_{b}_{c}_{fmt}_{32 * '0'}_{fmt}_00", test_f, cover_f
-#     )
-#     run_and_store_test_vector(
-#         f"{OP_FNMSUB}_{ROUND_NEAR_EVEN}_{a}_{b}_{c}_{fmt}_{32 * '0'}_{fmt}_00", test_f, cover_f
-#     )
-
-
-def makeFMATestVectors(fmt: str, d: int, op: str, test_f: TextIO, cover_f: TextIO) -> None:
+    prod_hex = get_result_from_ref(op, a_hex, b_hex, c_hex, fmt)
+    prod_exp = extract_unbiased_exp(prod_hex, fmt)
+    if prod_exp != c_raw:
+        return False
+    vector = generate_test_vector(op, int(a_hex, 16), int(b_hex, 16), int(c_hex, 16), fmt, fmt)
+    run_and_store_test_vector(vector, test_f, cover_f)
+    return True
+
+
+def generate_carry(fmt: str, d: int, op: str, test_f: TextIO, cover_f: TextIO) -> bool:
     m = MANTISSA_BITS[fmt]
-    p = m + 1
     bias = BIAS[fmt]
-    min_exp, max_exp = BIASED_EXP[fmt]
+    max_raw = UNBIASED_EXP[fmt][1]
+
+    a_m, b_m = random.getrandbits(m), random.getrandbits(m)
+    c_m = (1 << m) - 1
 
-    a_exp = random.randint(min_exp, max_exp - 2)
-    b_exp = random.randint(min_exp, max_exp - 2)
+    # Exponents are guarded against overflow by dividing max exponent by two to
+    # account for that the intermediate product exponent is a_raw + b_raw. But as d = 0, both
+    # a_exp and b_exp need to be positive to make C greatest
+    a_raw = random.randint(0, (max_raw - 1) // 2)
+    b_raw = random.randint(0, (max_raw - 1) // 2)
+    c_raw = a_raw + b_raw + 1
 
-    a_m = random.getrandbits(m)
-    b_m = random.getrandbits(m)
+    a_sign = random.randint(0, 1)
+    b_sign = a_sign
+    c_sign = 0 if op in [OP_FMADD, OP_FNMADD] else 1
 
-    # Approximate product exponent
-    prod_exp = a_exp + b_exp - bias
+    a_hex = decimal_components_to_hex(fmt, a_sign, a_raw + bias, a_m)
+    b_hex = decimal_components_to_hex(fmt, b_sign, b_raw + bias, b_m)
+    c_hex = decimal_components_to_hex(fmt, c_sign, c_raw + bias, c_m)
 
-    prod_m = random.getrandbits(m)
+    vector = generate_test_vector(op, int(a_hex, 16), int(b_hex, 16), int(c_hex, 16), fmt, fmt)
+    run_and_store_test_vector(vector, test_f, cover_f)
+    return True
 
-    a_sign = random.getrandbits(1)
-    b_sign = random.getrandbits(1)
-    prod_sign = a_sign ^ b_sign
 
-    c_sign = 1 - prod_sign if op in [OP_FMADD, OP_FNMSUB] else prod_sign
+def generate_standard(fmt: str, d: int, op: str, test_f: TextIO, cover_f: TextIO) -> bool:
+    m = MANTISSA_BITS[fmt]
+    bias = BIAS[fmt]
+    min_raw, max_raw = UNBIASED_EXP[fmt]
 
-    # control cancellation depth:
-    k = -d  # number of bits that cancel
+    a_sign, b_sign = random.randint(0, 1), random.randint(0, 1)
+    res_sign = random.randint(0, 1)
 
-    c_exp = prod_exp
+    valid_min_prod = max(min_raw, (min_raw - 1) - d)
+    valid_max_prod = min(max_raw, max_raw - d)
 
-    if d == 1:
-        # Carry case
-        c_sign = prod_sign
-        c_m = (1 << m) - 1
+    if valid_min_prod > valid_max_prod:
+        return False
 
-    elif d == -(2 * p + 1):
-        # Maximum cancellation → almost equal
-        c_exp = prod_exp - 1
-        c_m = prod_m ^ 1  # tiny diff
+    target_prod_exp = random.randint(valid_min_prod, valid_max_prod)
 
+    a_raw_min = max(min_raw, target_prod_exp - max_raw)
+    a_raw_max = min(max_raw, target_prod_exp - min_raw)
+
+    if a_raw_min > a_raw_max:
+        return False
+
+    a_raw = random.randint(a_raw_min, a_raw_max)
+    b_raw = target_prod_exp - a_raw
+
+    if d < -m:
+        target_depth = abs(d)
+        sum_kj = max(0, 2 * m - target_depth)
+        k = sum_kj // 2
+        j = sum_kj - k
+        a_m = 1 << k
+        b_m = 1 << j
+        res_m = 0
     else:
-        # General case
-        if k <= m:
-            # Match top k bits
-            prefix_mask = ((1 << k) - 1) << (m - k)
-            prefix = prod_m & prefix_mask
+        a_m, b_m, res_m = random.getrandbits(m), random.getrandbits(m), random.getrandbits(m)
 
-            # Flip next bit
-            flip_pos = m - k - 1
-            flip_bit = 1 << flip_pos if flip_pos >= 0 else 0
+    a_hex = decimal_components_to_hex(fmt, a_sign, a_raw + bias, a_m)
+    b_hex = decimal_components_to_hex(fmt, b_sign, b_raw + bias, b_m)
 
-            # Random tail
-            tail_len = max(0, flip_pos)
-            tail = random.getrandbits(tail_len) if tail_len > 0 else 0
+    prod_hex = get_result_from_ref(OP_MUL, a_hex, b_hex, "0", fmt)
+    ab_exp = extract_unbiased_exp(prod_hex, fmt)
 
-            c_m = prefix | flip_bit | tail
+    res_raw = ab_exp + d
 
-        else:
-            # Deep cancellation beyond mantissa
-            c_m = prod_m
-            c_exp = prod_exp - (k - m)
+    if not (min_raw - 1 <= res_raw <= max_raw):
+        return False
 
-    a = decimalComponentsToHex(fmt, a_sign, a_exp, a_m)
-    b = decimalComponentsToHex(fmt, b_sign, b_exp, b_m)
-    c = decimalComponentsToHex(fmt, c_sign, c_exp, c_m)
+    res_hex = decimal_components_to_hex(fmt, res_sign, res_raw + bias, res_m)
 
-    for opcode in [OP_FMADD, OP_FMSUB, OP_FNMADD, OP_FNMSUB]:
-        run_and_store_test_vector(
-            f"{opcode}_{ROUND_NEAR_EVEN}_{a}_{b}_{c}_{fmt}_{32 * '0'}_{fmt}_00",
-            test_f,
-            cover_f,
-        )
+    try:
+        c_hex = get_result_from_ref(SOLVER_OPS[op], a_hex, b_hex, res_hex, fmt)
+        vector = generate_test_vector(op, int(a_hex, 16), int(b_hex, 16), int(c_hex, 16), fmt, fmt)
+        run_and_store_test_vector(vector, test_f, cover_f)
+        return True
+    except Exception:
+        return False
 
 
 def main() -> None:
@@ -204,11 +237,24 @@ def main() -> None:
     ):
         for fmt in FLOAT_FMTS:
             p = MANTISSA_BITS[fmt] + 1
-            # Range adjusted for FMA: [-(2*p + 1), 1]
             for d in range(-(2 * p + 1), 2):
-                for op in [OP_FMADD, OP_FMSUB, OP_FNMADD, OP_FNMSUB]:
+                for op in OPS:
                     seed(reproducible_hash(f"{fmt}_b16_{d}_{op}"))
-                    makeFMATestVectors(fmt, d, op, test_f, cover_f)
+
+                    max_retries = 5
+                    for _ in range(max_retries):
+                        success = False
+
+                        if d <= -(2 * p - 1):
+                            success = generate_deep_cancel(fmt, d, op, test_f, cover_f)
+                        elif d == 0:
+                            success = generate_same_exp(fmt, d, op, test_f, cover_f)
+                        elif d == 1:
+                            success = generate_carry(fmt, d, op, test_f, cover_f)
+                        else:
+                            success = generate_standard(fmt, d, op, test_f, cover_f)
+                        if success:
+                            break
 
 
 if __name__ == "__main__":

From 93ca7ae25783a9c80c1ad97daed8cf6447f95cbc Mon Sep 17 00:00:00 2001
From: Angela Zheng <angela20061015@gmail.com>
Date: Mon, 27 Apr 2026 19:35:58 -0700
Subject: [PATCH 4/7] Makefile add B16

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 3eca004..89349a1 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ ifeq ($(AGGRESSIVENESS), 0)
 	COVER_FLOAT_FLAGS += --partial-output
 endif
 
-MODELS := B1 B2 B3 B6 B7 B8 B9 B10 B11 B12 B13 B14 B15 B20 B21 B25 B26 B27 B29
+MODELS := B1 B2 B3 B6 B7 B8 B9 B10 B11 B12 B13 B14 B15 B16 B20 B21 B25 B26 B27 B29
 
 .PHONY: build clean sim all $(MODELS)
 

From cb87ab07791de3af7a8f82f79342100e9998d172 Mon Sep 17 00:00:00 2001
From: Angela Zheng <angela20061015@gmail.com>
Date: Tue, 28 Apr 2026 21:09:35 -0700
Subject: [PATCH 5/7] B16 100% coverage

---
 docs/B16.adoc                  |  82 +++----
 src/cover_float/testgen/B16.py | 393 +++++++++++++++------------------
 2 files changed, 224 insertions(+), 251 deletions(-)

diff --git a/docs/B16.adoc b/docs/B16.adoc
index ed1320c..3184315 100644
--- a/docs/B16.adoc
+++ b/docs/B16.adoc
@@ -9,15 +9,18 @@ Aharoni et al.
 
 === Description
 
-This model tests every possible value for cancellation. For the difference between the exponent of the intermediate result and the maximum between the exponent of the intermediate result and the maximum between the exponents of the inputs, test all values in the range: [-p, +1].
+B16. Multiply-Add: Cancellation
+This model tests every possible value for cancellation.
+For the difference between the exponent of the intermediate result and the
+maximum between the exponents of the addend and the multiplication result,
+test all values in the range:
+ [-(2 * p + 1), 1].
 
-That is, for two operands `a` and `b` with exponents `a_exp` and `b_exp` and a intermediate result exponent `exp`, we test all cases where `exp` - max(`a_exp`, `b_exp`) takes each value in [-p, +1].
-
-*Number of tests generated:* 438 (4E2)
+*Number of tests generated:* 1732 (1E3)
 
 *Precisions Supported:* `BF_16`, `FP_16`, `FP_32`, `FP_64`, `FP_128`
 
-*Operations Supported:* Fmadd, fmsub, fnmadd, fnmsub
+*Operations Supported:* fmadd, fmsub, fnmadd, fnmsub
 
 == Definitions
 
@@ -60,58 +63,55 @@ product of a and b cannot be too different from desired, otherwise c would domin
 
 Unlike B2, we cannot use softfloat to generate desired result because ab has 2p precision and softfloat will remove that and round it to p.
 
-For single precision, the `c` only has `m` bits of precision (23 bits for f32). It can only cancel the top $m$ bits of the product $A \times B$. If you want a cancellation depth of 40, bits 24 through 39 of $A \times B$ must be exactly zero. If we use purely random mantissas for $A$ and $B$, their cross-multiplication will fill that gap with random noise, $C$ won't be able to reach deep enough to cancel it, and your depth will permanently collapse back to $\approx -24$.
-
-Case 1: d < 2m, then we have to generate subnormal numbers, so a_raw and b_raw both need to be the smallest exponent possible
-Case 2: other d, current random generation we have
-Case 3: d = 0: We make the exponent of c larger than ab_exp, so the result can be the same exp as c
-Case 4: d = 1: Again we make the exponent of c larger than ab_exp, and force c to be the largest mantissa so the result definitely carries
+For single precision, the `c` only has `m` bits of precision (23 bits for f32). It can only cancel the top `m` bits of the product `a*b`. If you want a cancellation depth of 40, bits 24 through 39 of `a*b` must be exactly zero. If we use purely random mantissas for `a`` and `b`, their cross-multiplication will fill that gap with random noise, `c` won't be able to reach deep enough to cancel it, and the cancellation depth will permanently collapse back to around -24.
+
+Case 1: d < -2m, then we have to generate subnormal numbers, so a_raw and b_raw both need to be the smallest exponent possible
+Case 2: d < -m, meaning the cancellation is deeper than the precision. Set all operands to be power of 2 is an easy way to solve this
+Case 3: shallow cancellation d= -6, -5, -4, -3, -2, -1. (values are experimentally determined) Because shallow bins are very sensitive to
+1. whether c_exp becomes larger than ab_exp
+2. whether the final rounded result carries up one exponent
+3. whether cancellation becomes slightly deeper than intended
+4. whether product rounding changes ab_exp
+Case 4: Other d, random generation
+Case 5: d = 0. We make the exponent of c larger than ab_exp, so the result can be the same exp as c
+Case 6: d = 1. Again we make the exponent of c larger than ab_exp, and force c to be the largest mantissa so the result definitely carries
+
+== Specific Test Procedure
+Case 3: for d = -6, -5, -4, -3, -2, -1, the process looks like
+1. Generate a and b
+2. Compute the real rounded product exponent ab_exp.
+3. Choose a desired result with exponent ab_exp + d
+4. Solve for c
+5. Reject unless c_exp == ab_exp
+6. Reject unless actual_b16_d(...) == d
 
 == Test Count Breakdown
 
-[cols="1,1,1,1,1,1,1",options="header"]
+Since each precision has tests going from -(2*p+1) to 1, the amount of test case per precision per operation is (1- -(2*p+1))/1+1 = 2*p+3
+
+[cols="1,1,1",options="header"]
 |===
-| Precision | p | Case 1 (`d = -p`) | Case 2 (all other `d`'s) | Case 3 (`d = 0`) | Case 4 (`d = 1`) | Total
+| Precision | p | Total (2*p+3)
 
 | BF_16
 | 8
-| 1
-| 7
-| 1
-| 1
-| 10
+| 19
 
 | FP_16
 | 11
-| 1
-| 10
-| 1
-| 1
-| 13
+| 25
 
 | FP_32
 | 24
-| 1
-| 23
-| 1
-| 1
-| 26
+| 51
 
 | FP_64
 | 53
-| 1
-| 52
-| 1
-| 1
-| 55
+| 109
 
 | FP_128
 | 113
-| 1
-| 112
-| 1
-| 1
-| 115
+| 229
 |===
 
 === Overall Test Count
@@ -121,8 +121,8 @@ Case 4: d = 1: Again we make the exponent of c larger than ab_exp, and force c t
 | Description | Value
 
 | Total cancellation tests
-| 219
+| 433
 
-| Accounting for Add and Subtract (×2)
-| 438
+| Accounting for FMADD, FMSUB, FNMADD, and FNMSUB (×4)
+| 1732
 |===
diff --git a/src/cover_float/testgen/B16.py b/src/cover_float/testgen/B16.py
index 8b757df..6cb2ac4 100644
--- a/src/cover_float/testgen/B16.py
+++ b/src/cover_float/testgen/B16.py
@@ -1,28 +1,18 @@
 """
 Angela Zheng (angela20061015@gmail.com)
 
-B16. Multiply-Add: Cancellation
-This model tests every possible value for cancellation.
-For the difference between the exponent of the intermediate result and the
-maximum between the exponents of the addend and the multiplication result,
-test all values in the range:
- [-(2 * p + 1), 1].
-
-My plan:
-For each of the fmadd, fmsub, fnmadd, fnmsub operations:
-
-We must ensure that a_exp is the largest exp out of the
-three operands because with +c alone would only be able to cancel -p. So,
-randomly generate a_exp, and generate b_exp (would probably be negative) so that b_exp = d
-and make c_exp = a_exp + b_exp and generate a_m, b_m, and c_m so that they don't result in carry or
-more cancellation.
+Created: 4/28/2026
+Last Modified: 4/28/2026
 """
 
+import logging
 import random
+from dataclasses import dataclass
 from pathlib import Path
 from random import seed
-from typing import TextIO
+from typing import TextIO, cast
 
+import cover_float.common.log as log
 from cover_float.common.constants import (
     BIAS,
     EXPONENT_BITS,
@@ -42,6 +32,9 @@
     reproducible_hash,
 )
 from cover_float.reference import run_and_store_test_vector
+from cover_float.testgen.model import register_model
+
+logger: log.ModelLogger = cast(log.ModelLogger, logging.getLogger("B16"))
 
 OPS = [OP_FMADD, OP_FMSUB, OP_FNMADD, OP_FNMSUB]
 SOLVER_OPS = {
@@ -52,210 +45,190 @@
 }
 
 
-def extract_unbiased_exp(fp_hex: str, fmt: str) -> int:
-    bits = int(fp_hex, 16)
-    exp_bits = EXPONENT_BITS[fmt]
-    mant_bits = MANTISSA_BITS[fmt]
-    bias = BIAS[fmt]
-    exp_mask = (1 << exp_bits) - 1
-    exp = (bits >> mant_bits) & exp_mask
-    return exp - bias
-
-
-def generate_deep_cancel(fmt: str, d: int, op: str, test_f: TextIO, cover_f: TextIO) -> bool:
-    bias = BIAS[fmt]
-    min_raw, max_raw = UNBIASED_EXP[fmt]
-
-    a_sign, b_sign = random.randint(0, 1), random.randint(0, 1)
-
-    # Force Result to EXACTLY 0. Zero has an exponent of (min_raw - 1)
-    res_raw = min_raw - 1
-    res_m = 0
-    res_sign = 0
-    res_hex = decimal_components_to_hex(fmt, res_sign, res_raw + bias, res_m)
-
-    # Calculate required product exponent to achieve depth d relative to 0
-    target_prod_exp = res_raw - d
-
-    a_raw_min = max(min_raw, target_prod_exp - max_raw)
-    a_raw_max = min(max_raw, target_prod_exp - min_raw)
-
-    if a_raw_min > a_raw_max:
-        return False
-
-    a_raw = random.randint(a_raw_min, a_raw_max)
-    b_raw = target_prod_exp - a_raw
-
-    # Keep mantissas 0 so A*B is exactly representable, guaranteeing the solver succeeds
-    a_m, b_m = 0, 0
-
-    a_hex = decimal_components_to_hex(fmt, a_sign, a_raw + bias, a_m)
-    b_hex = decimal_components_to_hex(fmt, b_sign, b_raw + bias, b_m)
-
-    try:
-        c_hex = get_result_from_ref(SOLVER_OPS[op], a_hex, b_hex, res_hex, fmt)
-        vector = generate_test_vector(op, int(a_hex, 16), int(b_hex, 16), int(c_hex, 16), fmt, fmt)
-        run_and_store_test_vector(vector, test_f, cover_f)
+@dataclass(frozen=True)
+class FloatFormat:
+    name: str
+    m_bits: int
+    e_bits: int
+    bias: int
+    min_exp: int
+    max_exp: int
+
+    @property
+    def p(self) -> int:
+        return self.m_bits + 1
+
+    @classmethod
+    def from_name(cls, name: str) -> "FloatFormat":
+        min_e, max_e = UNBIASED_EXP[name]
+        return cls(name, MANTISSA_BITS[name], EXPONENT_BITS[name], BIAS[name], min_e, max_e)
+
+    def to_hex(self, sign: int, exp: int, mant: int) -> str:
+        return decimal_components_to_hex(self.name, sign, exp + self.bias, mant)
+
+    def get_exp(self, fp_hex: str) -> int:
+        bits = int(fp_hex, 16)
+        return ((bits >> self.m_bits) & ((1 << self.e_bits) - 1)) - self.bias
+
+
+class B16Generator:
+    def __init__(self, fmt: str, test_f: TextIO, cover_f: TextIO) -> None:
+        self.f = FloatFormat.from_name(fmt)
+        self.test_f, self.cover_f = test_f, cover_f
+
+    def get_op_details(self, op: str, a: str, b: str, c: str) -> tuple[int, int]:
+        """Helper to get actual product exp and final result exp."""
+        p_hex = get_result_from_ref(OP_MUL, a, b, "0", self.f.name)
+        r_hex = get_result_from_ref(op, a, b, c, self.f.name)
+        return self.f.get_exp(p_hex), self.f.get_exp(r_hex)
+
+    def store(self, op: str, a: str, b: str, c: str) -> None:
+        v = generate_test_vector(op, int(a, 16), int(b, 16), int(c, 16), self.f.name, self.f.name)
+        run_and_store_test_vector(v, self.test_f, self.cover_f)
+
+    def get_random_split(self, target_exp: int) -> tuple[int, int]:
+        """Splits a target product exponent into two valid operand exponents."""
+        lo, hi = self.f.min_exp, self.f.max_exp
+        a_min, a_max = max(lo, target_exp - hi), min(hi, target_exp - lo)
+        a = random.randint(a_min, a_max)
+        return a, target_exp - a
+
+    def generate_same_exp(self, d: int, op: str) -> bool:
+        f, m = self.f, self.f.m_bits
+        a_s, b_s = random.randint(0, 1), random.randint(0, 1)
+        c_s = (a_s ^ b_s) if op in [OP_FMADD, OP_FNMADD] else (a_s ^ b_s) ^ 1
+        target_p_exp = random.randint(0, f.max_exp)
+        a_r = random.randint(0, target_p_exp)
+        b_r = target_p_exp - a_r
+        c_r = a_r + b_r + 3
+        a_m, b_m, c_m = random.getrandbits(m), random.getrandbits(m), random.getrandbits(m)
+        a_h = f.to_hex(a_s, a_r, a_m)
+        b_h = f.to_hex(b_s, b_r, b_m)
+        c_h = f.to_hex(c_s, c_r, c_m)
+        r_hex = get_result_from_ref(op, a_h, b_h, c_h, f.name)
+        r_exp = f.get_exp(r_hex)
+        if r_exp != c_r:
+            return False
+        self.store(op, a_h, b_h, c_h)
         return True
-    except Exception:
-        return False
-
-
-# Maybe see whether ab_exp is greater than c_exp
-# force c_exp to be greater than ab_exp
-def generate_same_exp(fmt: str, d: int, op: str, test_f: TextIO, cover_f: TextIO) -> bool:
-    m = MANTISSA_BITS[fmt]
-    bias = BIAS[fmt]
-    max_raw = UNBIASED_EXP[fmt][1]
-
-    a_sign, b_sign = random.randint(0, 1), random.randint(0, 1)
-    # res_sign = a_sign ^ b_sign
-    c_sign = (a_sign ^ b_sign) if op in [OP_FMADD, OP_FNMADD] else not (a_sign ^ b_sign)
-
-    # We have to make sure c_exp is the greatest, so a_exp and b_exp must both be positive
-    target_prod_exp = random.randint(0, max_raw)
-    a_raw = random.randint(0, target_prod_exp)
-    b_raw = target_prod_exp - a_raw
-    c_raw = a_raw + b_raw + 3
-
-    a_m, b_m, c_m = random.getrandbits(m), random.getrandbits(m), random.getrandbits(m)
-
-    a_hex = decimal_components_to_hex(fmt, a_sign, a_raw + bias, a_m)
-    b_hex = decimal_components_to_hex(fmt, b_sign, b_raw + bias, b_m)
-
-    # prod_hex = get_result_from_ref(OP_MUL, a_hex, b_hex, "0", fmt)
-    # ab_exp = extract_unbiased_exp(prod_hex, fmt)
-    # c_raw = ab_exp + 2
-
-    # if not (min_raw <= res_raw <= max_raw):
-    #     return False
-
-    # res_hex = decimal_components_to_hex(fmt, res_sign, res_raw + bias, res_m)
-
-    # c_hex = get_result_from_ref(SOLVER_OPS[op], a_hex, b_hex, res_hex, fmt)
-    c_hex = decimal_components_to_hex(fmt, c_sign, c_raw + bias, c_m)
-
-    prod_hex = get_result_from_ref(op, a_hex, b_hex, c_hex, fmt)
-    prod_exp = extract_unbiased_exp(prod_hex, fmt)
-    if prod_exp != c_raw:
-        return False
-    vector = generate_test_vector(op, int(a_hex, 16), int(b_hex, 16), int(c_hex, 16), fmt, fmt)
-    run_and_store_test_vector(vector, test_f, cover_f)
-    return True
-
-
-def generate_carry(fmt: str, d: int, op: str, test_f: TextIO, cover_f: TextIO) -> bool:
-    m = MANTISSA_BITS[fmt]
-    bias = BIAS[fmt]
-    max_raw = UNBIASED_EXP[fmt][1]
-
-    a_m, b_m = random.getrandbits(m), random.getrandbits(m)
-    c_m = (1 << m) - 1
-
-    # Exponents are guarded against overflow by dividing max exponent by two to
-    # account for that the intermediate product exponent is a_raw + b_raw. But as d = 0, both
-    # a_exp and b_exp need to be positive to make C greatest
-    a_raw = random.randint(0, (max_raw - 1) // 2)
-    b_raw = random.randint(0, (max_raw - 1) // 2)
-    c_raw = a_raw + b_raw + 1
-
-    a_sign = random.randint(0, 1)
-    b_sign = a_sign
-    c_sign = 0 if op in [OP_FMADD, OP_FNMADD] else 1
-
-    a_hex = decimal_components_to_hex(fmt, a_sign, a_raw + bias, a_m)
-    b_hex = decimal_components_to_hex(fmt, b_sign, b_raw + bias, b_m)
-    c_hex = decimal_components_to_hex(fmt, c_sign, c_raw + bias, c_m)
 
-    vector = generate_test_vector(op, int(a_hex, 16), int(b_hex, 16), int(c_hex, 16), fmt, fmt)
-    run_and_store_test_vector(vector, test_f, cover_f)
-    return True
-
-
-def generate_standard(fmt: str, d: int, op: str, test_f: TextIO, cover_f: TextIO) -> bool:
-    m = MANTISSA_BITS[fmt]
-    bias = BIAS[fmt]
-    min_raw, max_raw = UNBIASED_EXP[fmt]
-
-    a_sign, b_sign = random.randint(0, 1), random.randint(0, 1)
-    res_sign = random.randint(0, 1)
-
-    valid_min_prod = max(min_raw, (min_raw - 1) - d)
-    valid_max_prod = min(max_raw, max_raw - d)
-
-    if valid_min_prod > valid_max_prod:
+    def generate_shallow_cancel(self, d: int, op: str) -> bool:
+        f, m = self.f, self.f.m_bits
+        a_s, b_s = random.randint(0, 1), random.randint(0, 1)
+
+        # pick a safe product exponent away from underflow/overflow
+        target_p_exp = random.randint(f.min_exp + 10, f.max_exp - 10)
+        a_min = max(f.min_exp + 5, target_p_exp - (f.max_exp - 5))
+        a_max = min(f.max_exp - 5, target_p_exp - (f.min_exp + 5))
+        a_r = random.randint(a_min, a_max)
+        b_r = target_p_exp - a_r
+
+        # use non-extreme mantissas to avoid accidental exponent carry in a*b
+        a_m = random.randint(1 << (m - 2), (1 << m) - 1)
+        b_m = random.randint(1 << (m - 2), (1 << m) - 1)
+        a_h = f.to_hex(a_s, a_r, a_m)
+        b_h = f.to_hex(b_s, b_r, b_m)
+        p_exp = self.f.get_exp(get_result_from_ref(OP_MUL, a_h, b_h, "0", f.name))
+        res_raw = p_exp + d
+
+        # pick a mid-range result mantissa so rounding is less likely to change exponent
+        r_s = random.randint(0, 1)
+        res_m = random.randint(1 << (m - 2), (1 << (m - 1)) - 1)
+        res_h = f.to_hex(r_s, res_raw, res_m)
+        c_h = get_result_from_ref(SOLVER_OPS[op], a_h, b_h, res_h, f.name)
+        c_exp = f.get_exp(c_h)
+
+        # for shallow cancellation, c should be aligned with the product
+        if c_exp != p_exp:
+            return False
+        # Final validation
+        p_exp2, r_exp = self.get_op_details(op, a_h, b_h, c_h)
+        if (r_exp - max(p_exp2, c_exp)) == d:
+            self.store(op, a_h, b_h, c_h)
+            return True
         return False
 
-    target_prod_exp = random.randint(valid_min_prod, valid_max_prod)
-
-    a_raw_min = max(min_raw, target_prod_exp - max_raw)
-    a_raw_max = min(max_raw, target_prod_exp - min_raw)
-
-    if a_raw_min > a_raw_max:
-        return False
-
-    a_raw = random.randint(a_raw_min, a_raw_max)
-    b_raw = target_prod_exp - a_raw
-
-    if d < -m:
-        target_depth = abs(d)
-        sum_kj = max(0, 2 * m - target_depth)
-        k = sum_kj // 2
-        j = sum_kj - k
-        a_m = 1 << k
-        b_m = 1 << j
-        res_m = 0
-    else:
-        a_m, b_m, res_m = random.getrandbits(m), random.getrandbits(m), random.getrandbits(m)
-
-    a_hex = decimal_components_to_hex(fmt, a_sign, a_raw + bias, a_m)
-    b_hex = decimal_components_to_hex(fmt, b_sign, b_raw + bias, b_m)
-
-    prod_hex = get_result_from_ref(OP_MUL, a_hex, b_hex, "0", fmt)
-    ab_exp = extract_unbiased_exp(prod_hex, fmt)
-
-    res_raw = ab_exp + d
-
-    if not (min_raw - 1 <= res_raw <= max_raw):
+    def generate_deep_cancel(self, d: int, op: str) -> bool:
+        f = self.f
+        a_s, b_s, r_s = random.randint(0, 1), random.randint(0, 1), random.randint(0, 1)
+        res_raw, res_m, a_m, b_m = f.min_exp - 1, 0, 0, 0
+        target_p_exp = res_raw - d
+        split = self.get_random_split(target_p_exp)
+        a_r, b_r = split
+        a_h, b_h = f.to_hex(a_s, a_r, a_m), f.to_hex(b_s, b_r, b_m)
+        p_exp = self.f.get_exp(get_result_from_ref(OP_MUL, a_h, b_h, "0", f.name))
+        res_h = f.to_hex(r_s, p_exp + d, res_m)
+        c_h = get_result_from_ref(SOLVER_OPS[op], a_h, b_h, res_h, f.name)
+        p_exp, r_exp = self.get_op_details(op, a_h, b_h, c_h)
+        c_exp = f.get_exp(c_h)
+        if (r_exp - max(p_exp, c_exp)) == d:
+            self.store(op, a_h, b_h, c_h)
+            return True
         return False
 
-    res_hex = decimal_components_to_hex(fmt, res_sign, res_raw + bias, res_m)
-
-    try:
-        c_hex = get_result_from_ref(SOLVER_OPS[op], a_hex, b_hex, res_hex, fmt)
-        vector = generate_test_vector(op, int(a_hex, 16), int(b_hex, 16), int(c_hex, 16), fmt, fmt)
-        run_and_store_test_vector(vector, test_f, cover_f)
-        return True
-    except Exception:
+    def generate(self, d: int, op: str) -> bool:
+        f, m = self.f, self.f.m_bits
+        a_s, b_s, r_s = random.randint(0, 1), random.randint(0, 1), random.randint(0, 1)
+
+        if d <= -(2 * f.p - 1):
+            return self.generate_deep_cancel(d, op)
+        elif d in [-6, -5, -4, -3, -2, -1]:
+            return self.generate_shallow_cancel(d, op)
+        elif d == 0:
+            return self.generate_same_exp(d, op)
+        elif d == 1:  # need result > operands
+            a_raw, b_raw = random.randint(0, f.max_exp // 2), random.randint(0, f.max_exp // 2)
+            c_s = 0 if op in [OP_FMADD, OP_FNMADD] else 1
+            a_h = f.to_hex(a_s, a_raw, random.getrandbits(m))
+            b_h = f.to_hex(a_s, b_raw, random.getrandbits(m))
+            c_h = f.to_hex(c_s, a_raw + b_raw + 1, (1 << m) - 1)
+            self.store(op, a_h, b_h, c_h)
+            return True
+        else:
+            valid_lo = max(f.min_exp, (f.min_exp - 1) - d)
+            valid_hi = min(f.max_exp, f.max_exp - d)
+            target_p_exp = random.randint(valid_lo, valid_hi)
+
+        # generate a and b
+        split = self.get_random_split(target_p_exp)
+        a_r, b_r = split
+
+        # special mantissas for deep cancellation
+        if d < -m:
+            target_depth = abs(d)
+            k = max(0, 2 * m - target_depth) // 2
+            a_m, b_m, res_m = 1 << k, 1 << (max(0, 2 * m - target_depth) - k), 0
+        else:
+            a_m, b_m, res_m = [random.getrandbits(m) for _ in range(3)]
+
+        a_h, b_h = f.to_hex(a_s, a_r, a_m), f.to_hex(b_s, b_r, b_m)
+
+        # solve for c
+        p_exp = self.f.get_exp(get_result_from_ref(OP_MUL, a_h, b_h, "0", f.name))
+        res_h = f.to_hex(r_s, p_exp + d, res_m)
+        c_h = get_result_from_ref(SOLVER_OPS[op], a_h, b_h, res_h, f.name)
+
+        p_exp, r_exp = self.get_op_details(op, a_h, b_h, c_h)
+        c_exp = f.get_exp(c_h)
+
+        if (r_exp - max(p_exp, c_exp)) == d:
+            self.store(op, a_h, b_h, c_h)
+            return True
         return False
 
 
-def main() -> None:
+@register_model("B16")
+def main(test_f: TextIO, cover_f: TextIO) -> None:
     with (
-        Path("./tests/testvectors/B16_tv.txt").open("w") as test_f,
-        Path("./tests/covervectors/B16_cv.txt").open("w") as cover_f,
+        Path("./tests/testvectors/B16_tv.txt").open("w") as tf,
+        Path("./tests/covervectors/B16_cv.txt").open("w") as cf,
     ):
-        for fmt in FLOAT_FMTS:
-            p = MANTISSA_BITS[fmt] + 1
-            for d in range(-(2 * p + 1), 2):
+        for fmt_name in FLOAT_FMTS:
+            gen = B16Generator(fmt_name, tf, cf)
+            for d in range(-(2 * gen.f.p + 1), 2):
+                retries = 15
                 for op in OPS:
-                    seed(reproducible_hash(f"{fmt}_b16_{d}_{op}"))
-
-                    max_retries = 5
-                    for _ in range(max_retries):
-                        success = False
-
-                        if d <= -(2 * p - 1):
-                            success = generate_deep_cancel(fmt, d, op, test_f, cover_f)
-                        elif d == 0:
-                            success = generate_same_exp(fmt, d, op, test_f, cover_f)
-                        elif d == 1:
-                            success = generate_carry(fmt, d, op, test_f, cover_f)
-                        else:
-                            success = generate_standard(fmt, d, op, test_f, cover_f)
-                        if success:
+                    seed(reproducible_hash(f"{fmt_name}_b16_{d}_{op}"))
+                    for _ in range(retries):
+                        if gen.generate(d, op):
                             break
-
-
-if __name__ == "__main__":
-    main()

From 1030997f38a4c6df893805b2aebb7c4cfa4b08d1 Mon Sep 17 00:00:00 2001
From: Angela Zheng <angela20061015@gmail.com>
Date: Tue, 28 Apr 2026 21:14:36 -0700
Subject: [PATCH 6/7] todos

---
 docs/B16.adoc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/B16.adoc b/docs/B16.adoc
index 3184315..cdd5267 100644
--- a/docs/B16.adoc
+++ b/docs/B16.adoc
@@ -22,7 +22,7 @@ test all values in the range:
 
 *Operations Supported:* fmadd, fmsub, fnmadd, fnmsub
 
-== Definitions
+== Definitions (TODO: CHANGE UP DEFINITIONS)
 
 `a`:: Operand 1
 `b`:: Operand 2
@@ -38,18 +38,18 @@ test all values in the range:
 `max_exp`:: Maximum exponent value based on precision
 `min_exp`:: Minimum exponent value based on precision
 
-== Background
+== Background (TODO: NEED NEW BACKGROUND FOR FMAS)
 
 Cancellation occurs when the most significant digits of two operands subtract to zero, triggering a massive normalization effort as the hardware shifts out the many leading zeros. In high-performing floating point processors, a Leading Zero Anticipator (LZA) is used to predict the location of the most significant bit of the result to perform normalization. Generating operands that force specific cancellation depths thus tests the accuracy of the LZA, especially for the most difficult edge cases where the depth of cancellation is close to the precision.
 
-=== Design Choices
+=== Design Choices (TODO: UPDATE DESIGN CHOICES)
 
 . If a_exp and b_exp are both negative, their sum must not exceed the lower exponent bound. This means that a_exp + b_exp > min_exp
 . If a_exp and b_exp are both positive, their sum must not exceed the upper exponent bound. This means that a_exp + b_exp < max_exp
 . Even though Aharoni implied that `a_exp` and `b_exp` can be different, we defined `a_exp` and `b_exp` to be the same for most cases to make controlling cancellation easier. Since we're only interested in the difference between the exponent of the operands and the intermediate result, the difference between `a_exp` and `b_exp` should not be of main concern.
 . Mantissas for `d = -p, 0, 1` are not randomly generated but are selected based on obvious cases that produce that specific `d`. Again, since we're only interested in the exponents, we should not be concerned with generating all mantissas randomly.
 
-=== Notes
+=== Notes (TODO: CHANGE NOTES)
 
 All explanation in this document will be done with one instead of both operations because the only difference will be sign assignments. All exponents used in the examples are unbiased exponents.
 

From cf010692f670fcfe7ec9db8ae9b784797c8f087b Mon Sep 17 00:00:00 2001
From: Angela Zheng <angela20061015@gmail.com>
Date: Tue, 28 Apr 2026 21:18:56 -0700
Subject: [PATCH 7/7] Added some design choices

---
 docs/B16.adoc | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/docs/B16.adoc b/docs/B16.adoc
index cdd5267..a22ec48 100644
--- a/docs/B16.adoc
+++ b/docs/B16.adoc
@@ -46,16 +46,15 @@ Cancellation occurs when the most significant digits of two operands subtract to
 
 . If a_exp and b_exp are both negative, their sum must not exceed the lower exponent bound. This means that a_exp + b_exp > min_exp
 . If a_exp and b_exp are both positive, their sum must not exceed the upper exponent bound. This means that a_exp + b_exp < max_exp
-. Even though Aharoni implied that `a_exp` and `b_exp` can be different, we defined `a_exp` and `b_exp` to be the same for most cases to make controlling cancellation easier. Since we're only interested in the difference between the exponent of the operands and the intermediate result, the difference between `a_exp` and `b_exp` should not be of main concern.
-. Mantissas for `d = -p, 0, 1` are not randomly generated but are selected based on obvious cases that produce that specific `d`. Again, since we're only interested in the exponents, we should not be concerned with generating all mantissas randomly.
+. Even though theoretically whether the product exponent is greater or the addend operand is greater should be random, we have the addend exponent to be greater for most cases to make controlling cancellation easier. Since we're only interested in the difference between the exponent of the operands and the intermediate result, this difference should not be of main concern, but could be a future improvement
+. For extremely deep cancellation cases the only possible result is Zero
 
 === Notes (TODO: CHANGE NOTES)
 
 All explanation in this document will be done with one instead of both operations because the only difference will be sign assignments. All exponents used in the examples are unbiased exponents.
 
 == General Procedure
-We need to ensure that the product exponent $(a_{raw} + b_{raw})$ is high enough that even at the most extreme cancellation $(d = -(2p+1))$, the result stays above the subnormal floor ($min\_raw$). Conversely, it must be low enough that at $d=1$, we don't overflow $max\_raw$.
-d = -2(p+1)
+We need to ensure that the product exponent `a_raw` and `b_raw` is high enough that even at the most extreme cancellation `d = -(2p+1)`, the result stays above the subnormal floor `min_raw`. Conversely, it must be low enough that at `d=1`, we don't overflow `max_raw`.
 
 Two situations: ab_exp is max, or c_exp is max. When we multiply a, b, the intermediate product is 2p bits wide