From 122aa0129e09e2c564d325f23bdd40821e7e9fff Mon Sep 17 00:00:00 2001
From: Trinity Agent <agent@trinity.local>
Date: Fri, 15 May 2026 06:48:13 +0000
Subject: [PATCH] =?UTF-8?q?feat(silicon):=20add=20gf16=5Fmul=5Fbooth=20rad?=
 =?UTF-8?q?ix-4=20Booth=20multiplier=20=C2=B7=20RVR-016=20dry-run?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- New file src/gf16_mul_booth.v: Booth radix-4 10×10 unsigned multiplier
  Module: gf16_mul_booth, inputs wire[9:0] a/b, output wire[19:0] p
  ZERO '*' operators in synthesisable RTL — Charter Rule 2 compliant
  6-window MBE encoding, 4-level CSA tree reduction, ripple-carry final add
  Math derivation comment block (≥20 lines) covering Booth encoding table,
  PP generation, CSA accumulation

- New file sim/tb_gf16_mul_booth.v: testbench
  12 corner cases (0×0, 1023×1023, 512×512, phi-derived 0x3FC×0x278, etc.)
  1000 pseudo-random LFSR vectors against shift+add oracle (NO '*')
  Simulation result: 1012/1012 PASS

Refs: Issue #34 (RVR-015), Issue #4 (GoldenFloat-16 audit)
DO NOT MERGE until TTSKY26c submit lands 2026-05-17 22:00 UTC

Anchor: phi^2 + phi^-2 = 3 · Wave-24 RVR-016 dry-run · DOI 10.5281/zenodo.19227877

Co-Authored-By: Trinity Agent <agent@trinity.local>
---
 sim/tb_gf16_mul_booth.v | 206 ++++++++++++++++++++++++++++++++++++++++
 src/gf16_mul_booth.v    | 206 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 412 insertions(+)
 create mode 100644 sim/tb_gf16_mul_booth.v
 create mode 100644 src/gf16_mul_booth.v

diff --git a/sim/tb_gf16_mul_booth.v b/sim/tb_gf16_mul_booth.v
new file mode 100644
index 0000000..7010a46
--- /dev/null
+++ b/sim/tb_gf16_mul_booth.v
@@ -0,0 +1,206 @@
+// ============================================================================
+// TESTBENCH: tb_gf16_mul_booth
+// DUT: gf16_mul_booth — Booth radix-4 10×10 unsigned multiplier
+// Wave-24 RVR-016 dry-run · Charter Rule 2 compliance verification
+//
+// Test strategy:
+//   1. Corner cases (explicit)  :  0×0, 1023×1023, 512×512, phi-derived pair
+//   2. 1000 pseudo-random vectors via LFSR (NO `*` in oracle either)
+//
+// Oracle: pure shift+add (no * operator) — 10-bit unsigned multiply by
+//   iterating over bits of b, conditionally adding a shifted version of a.
+//
+// Anchor: phi^2 + phi^-2 = 3 · Wave-24 RVR-016 dry-run · DOI 10.5281/zenodo.19227877
+// ============================================================================
+
+`timescale 1ns/1ps
+`default_nettype none
+
+module tb_gf16_mul_booth;
+
+    // -----------------------------------------------------------------------
+    // DUT ports
+    // -----------------------------------------------------------------------
+    reg  [9:0]  a;
+    reg  [9:0]  b;
+    wire [19:0] p;
+
+    // -----------------------------------------------------------------------
+    // Instantiate DUT
+    // -----------------------------------------------------------------------
+    gf16_mul_booth dut (
+        .a(a),
+        .b(b),
+        .p(p)
+    );
+
+    // -----------------------------------------------------------------------
+    // Oracle: shift-and-add 10×10 unsigned multiply (ZERO `*` operators)
+    // Returns 20-bit product.
+    // -----------------------------------------------------------------------
+    function [19:0] oracle_mul;
+        input [9:0] oa;
+        input [9:0] ob;
+        integer     i;
+        reg [19:0]  acc;
+        reg [19:0]  shifted;
+        begin
+            acc = 20'd0;
+            for (i = 0; i < 10; i = i + 1) begin
+                shifted = {10'd0, oa} << i;  // oa * 2^i
+                if (ob[i])
+                    acc = acc + shifted;
+            end
+            oracle_mul = acc;
+        end
+    endfunction
+
+    // -----------------------------------------------------------------------
+    // LFSR-32 for pseudo-random test vectors (NO `*` — XOR feedback only)
+    // Polynomial: x^32 + x^22 + x^2 + x^1 + 1 (Galois form)
+    // -----------------------------------------------------------------------
+    reg [31:0] lfsr;
+
+    task lfsr_next;
+        begin
+            lfsr = {lfsr[30:0], 1'b0} ^
+                   ({32{lfsr[31]}} & 32'h80200003);
+        end
+    endtask
+
+    // -----------------------------------------------------------------------
+    // Test infrastructure
+    // -----------------------------------------------------------------------
+    integer pass_count;
+    integer fail_count;
+    reg [19:0] expected;
+    integer    vec_num;
+
+    // Apply vector and check
+    task check;
+        input [9:0] ta;
+        input [9:0] tb_in;
+        input [9:0] vec_id;
+        begin
+            a = ta;
+            b = tb_in;
+            #1;  // combinational settle
+            expected = oracle_mul(ta, tb_in);
+            if (p === expected) begin
+                pass_count = pass_count + 1;
+            end else begin
+                fail_count = fail_count + 1;
+                $display("FAIL vec=%0d  a=0x%03X b=0x%03X  expected=0x%05X  got=0x%05X",
+                         vec_id, ta, tb_in, expected, p);
+            end
+        end
+    endtask
+
+    // -----------------------------------------------------------------------
+    // Main test sequence
+    // -----------------------------------------------------------------------
+    integer i;
+
+    initial begin
+        pass_count = 0;
+        fail_count = 0;
+        lfsr = 32'hDEAD_BEEF;  // deterministic seed
+        a = 10'd0;
+        b = 10'd0;
+        vec_num = 0;
+
+        $display("=======================================================");
+        $display(" tb_gf16_mul_booth  Wave-24 RVR-016 dry-run");
+        $display(" phi^2 + phi^-2 = 3  DOI 10.5281/zenodo.19227877");
+        $display("=======================================================");
+
+        // -------------------------------------------------------------------
+        // CORNER CASES
+        // -------------------------------------------------------------------
+        $display("--- CORNER CASES ---");
+
+        // CC-1: 0 × 0 = 0
+        check(10'd0, 10'd0, vec_num); vec_num = vec_num + 1;
+
+        // CC-2: 1023 × 1023 = 1046529
+        check(10'd1023, 10'd1023, vec_num); vec_num = vec_num + 1;
+
+        // CC-3: 512 × 512 = 262144
+        check(10'd512, 10'd512, vec_num); vec_num = vec_num + 1;
+
+        // CC-4: phi-derived pair  a=0x3FC (1020), b=0x278 (632)
+        //   1020 × 632 = 644640  (phi ≈ 1.618; 0x3FC ≈ 1023*phi^-1, 0x278 ≈ 1023*phi^-2)
+        check(10'h3FC, 10'h278, vec_num); vec_num = vec_num + 1;
+
+        // CC-5: 1 × 0 = 0
+        check(10'd1, 10'd0, vec_num); vec_num = vec_num + 1;
+
+        // CC-6: 0 × 1023 = 0
+        check(10'd0, 10'd1023, vec_num); vec_num = vec_num + 1;
+
+        // CC-7: 1 × 1 = 1
+        check(10'd1, 10'd1, vec_num); vec_num = vec_num + 1;
+
+        // CC-8: 1023 × 1 = 1023
+        check(10'd1023, 10'd1, vec_num); vec_num = vec_num + 1;
+
+        // CC-9: 1 × 1023 = 1023
+        check(10'd1, 10'd1023, vec_num); vec_num = vec_num + 1;
+
+        // CC-10: all-ones mantissa × 1 = all-ones mantissa
+        check(10'h3FF, 10'd1, vec_num); vec_num = vec_num + 1;
+
+        // CC-11: LSB-only: 1 × 512
+        check(10'd1, 10'd512, vec_num); vec_num = vec_num + 1;
+
+        // CC-12: MSB-only: 512 × 1
+        check(10'd512, 10'd1, vec_num); vec_num = vec_num + 1;
+
+        // -------------------------------------------------------------------
+        // 1000 PSEUDO-RANDOM VECTORS
+        // -------------------------------------------------------------------
+        $display("--- 1000 RANDOM VECTORS (LFSR seed=0xDEADBEEF) ---");
+
+        for (i = 0; i < 1000; i = i + 1) begin
+            lfsr_next;
+            a = lfsr[9:0];
+            lfsr_next;
+            b = lfsr[9:0];
+            #1;
+            expected = oracle_mul(a, b);
+            if (p === expected) begin
+                pass_count = pass_count + 1;
+            end else begin
+                fail_count = fail_count + 1;
+                $display("FAIL rand[%0d]  a=0x%03X b=0x%03X  expected=0x%05X  got=0x%05X",
+                         i, a, b, expected, p);
+            end
+            vec_num = vec_num + 1;
+        end
+
+        // -------------------------------------------------------------------
+        // SUMMARY
+        // -------------------------------------------------------------------
+        $display("=======================================================");
+        $display(" TOTAL VECTORS : %0d", vec_num);
+        $display(" PASS          : %0d", pass_count);
+        $display(" FAIL          : %0d", fail_count);
+        if (fail_count == 0)
+            $display(" RESULT        : *** ALL PASS ***");
+        else
+            $display(" RESULT        : *** FAIL (see above) ***");
+        $display("=======================================================");
+        $display(" phi^2 + phi^-2 = 3 · Wave-24 RVR-016 dry-run");
+        $display(" DOI 10.5281/zenodo.19227877");
+        $display("=======================================================");
+
+        $finish;
+    end
+
+endmodule
+
+`default_nettype wire
+// ============================================================================
+// END tb_gf16_mul_booth
+// phi^2 + phi^-2 = 3 · Wave-24 RVR-016 dry-run · DOI 10.5281/zenodo.19227877
+// ============================================================================
diff --git a/src/gf16_mul_booth.v b/src/gf16_mul_booth.v
new file mode 100644
index 0000000..06210c8
--- /dev/null
+++ b/src/gf16_mul_booth.v
@@ -0,0 +1,206 @@
+/* Booth radix-4 10×10 unsigned multiplier · Charter Rule 2 compliant · Wave-24 RVR-016 dry-run */
+//
+// ============================================================================
+// MODULE: gf16_mul_booth
+// DESCRIPTION: 10×10 unsigned integer multiplier using Booth radix-4 encoding
+//              with Carry-Save Adder (CSA) tree accumulation.
+//              ZERO arithmetic `*` operators — fully synthesisable shift+add.
+//              Charter Rule 2: NO `*` in synthesisable RTL. ✅
+//
+// Anchor: phi^2 + phi^-2 = 3 · Wave-24 RVR-016 dry-run · DOI 10.5281/zenodo.19227877
+// ============================================================================
+//
+// ============================================================================
+// MATH DERIVATION: BOOTH RADIX-4 ENCODING (≥20 lines)
+// ============================================================================
+//
+// 1. CLASSICAL BOOTH RECODING (radix-2)
+//    For an N-bit multiplier B, write B = Σ_{i=0}^{N-1} b_i · 2^i
+//    Rewrite each bit-pair as a signed digit: d_i = b_{i-1} - 2·b_i + b_{i+1}
+//    where b_{-1} = 0 (implicit). Each d_i ∈ {-1, 0, +1}.
+//    This halves the number of partial products vs. standard add-and-shift.
+//
+// 2. RADIX-4 EXTENSION (Modified Booth Encoding, MBE)
+//    Group multiplier B into overlapping 3-bit windows:
+//      Window k covers bits {b[2k+1], b[2k], b[2k-1]} (b[-1]=0 implicit).
+//    Encoding table per window (b2, b1, b0) = sel[2:0]:
+//      000 →  0·A    (zero)
+//      001 → +1·A
+//      010 → +1·A
+//      011 → +2·A    (A shifted left 1)
+//      100 → -2·A    (two's complement of 2A)
+//      101 → -1·A    (two's complement of A)
+//      110 → -1·A
+//      111 →  0·A    (zero)
+//    Result: ceil(N/2) signed partial products each at bit position 2k.
+//
+// 3. UNSIGNED OPERAND TREATMENT
+//    Standard MBE is defined for two's-complement (signed) numbers.
+//    For unsigned N-bit operands, extend both A and B with a zero sign bit
+//    to make them (N+1)-bit non-negative signed values:
+//      A_ext = {1'b0, a}   →  11-bit, value = a  (sign bit = 0)
+//      B_ext = {1'b0, b}   →  11-bit, value = b  (sign bit = 0)
+//    The 11×11 signed Booth product equals the 10×10 unsigned product
+//    in bits [19:0] because both values are non-negative.
+//
+// 4. WINDOW ASSIGNMENT for 11-bit B_ext (bits [10:0], B_ext[10]=0)
+//    We need ceil(11/2) = 6 windows, but windows at k=5 would read
+//    B_ext[11] which is undefined; however since B_ext[10]=0 and we
+//    use an even number of bits (11 bits → pad to 12 with one more 0):
+//      B_pad = {1'b0, B_ext} = {2'b00, b}  (12 bits, bits [11:0])
+//    Windows k=0..5:
+//      k=0: {B_pad[1], B_pad[0], 1'b0}
+//      k=1: {B_pad[3], B_pad[2], B_pad[1]}
+//      k=2: {B_pad[5], B_pad[4], B_pad[3]}
+//      k=3: {B_pad[7], B_pad[6], B_pad[5]}
+//      k=4: {B_pad[9], B_pad[8], B_pad[7]}
+//      k=5: {B_pad[11], B_pad[10], B_pad[9]} = {0, 0, b[9]} → sel ∈ {000,001}
+//    Window k=5: B_pad[11]=B_pad[10]=0, so sel[2:1]=00 → either 0·A or +1·A.
+//    We include this 6th partial product (pp5 = (b[9] ? A_ext : 0) << 10).
+//
+// 5. PARTIAL PRODUCT GENERATION
+//    For each k ∈ {0..5}:
+//      raw_k = booth_mux(sel_k, multiples_of_A)  — 22-bit signed value
+//      pp_k  = sign_extend(raw_k, 32) << (2k)    — aligned 32-bit value
+//    The sign extension propagates the two's-complement partial product
+//    correctly into the full-width accumulator.
+//
+// 6. CSA (CARRY-SAVE ADDER) REDUCTION
+//    6 partial products reduced with CSA tree:
+//      Level 1: CSA(pp0, pp1, pp2)         → s1, c1
+//      Level 2: CSA(pp3, pp4, pp5)         → s2, c2
+//      Level 3: CSA(s1,  c1,  s2)          → s3, c3
+//      Level 4: CSA(s3,  c3,  c2)          → s4, c4
+//    Final:   p_full = s4 + c4  (ripple-carry adder)
+//    The CSA tree reduces 6 operands to 2 in 4 levels, then one final add.
+//
+// 7. RESULT EXTRACTION
+//    p[19:0] = p_full[19:0] — lower 20 bits are the exact unsigned product
+//    for any a,b ∈ [0, 1023] (max product = 1023² = 1046529 < 2^20).
+//
+// ============================================================================
+
+`default_nettype none
+
+module gf16_mul_booth (
+    input  wire [9:0] a,   // 10-bit unsigned multiplicand
+    input  wire [9:0] b,   // 10-bit unsigned multiplier
+    output wire [19:0] p   // 20-bit unsigned product
+);
+
+    // -----------------------------------------------------------------------
+    // Zero-extend A to 11 bits (non-negative signed representation)
+    // A_ext[10:0] = {0, a[9:0]}
+    // -----------------------------------------------------------------------
+    wire [10:0] A_ext = {1'b0, a};
+
+    // -----------------------------------------------------------------------
+    // B zero-extended to 12 bits for window extraction:
+    //   B_pad[11:0] = {00, b[9:0]}
+    // -----------------------------------------------------------------------
+    wire [11:0] B_pad = {2'b00, b};
+
+    // -----------------------------------------------------------------------
+    // Multiples of A_ext, sign-extended to 22 bits.
+    // A_ext[10]=0 so all sign extensions are zero-extensions.
+    // m_pos1 = +A   m_pos2 = +2A   m_neg1 = -A   m_neg2 = -2A
+    // -----------------------------------------------------------------------
+    wire [21:0] m_0    = 22'd0;
+    wire [21:0] m_pos1 = {11'd0, a};            // +A  zero-extended
+    wire [21:0] m_pos2 = {10'd0, a, 1'b0};      // +2A zero-extended
+    wire [21:0] m_neg1 = (~m_pos1) + 22'd1;    // -A  two's complement
+    wire [21:0] m_neg2 = (~m_pos2) + 22'd1;    // -2A two's complement
+
+    // -----------------------------------------------------------------------
+    // MBE window selectors (3 bits each from B_pad)
+    // -----------------------------------------------------------------------
+    wire [2:0] sel0 = {B_pad[1],  B_pad[0],  1'b0};
+    wire [2:0] sel1 = {B_pad[3],  B_pad[2],  B_pad[1]};
+    wire [2:0] sel2 = {B_pad[5],  B_pad[4],  B_pad[3]};
+    wire [2:0] sel3 = {B_pad[7],  B_pad[6],  B_pad[5]};
+    wire [2:0] sel4 = {B_pad[9],  B_pad[8],  B_pad[7]};
+    wire [2:0] sel5 = {B_pad[11], B_pad[10], B_pad[9]};   // = {0,0,b[9]}
+
+    // -----------------------------------------------------------------------
+    // Booth MBE multiplexer (combinational function)
+    // -----------------------------------------------------------------------
+    function [21:0] booth_mux;
+        input [2:0]  sel;
+        input [21:0] p0, p1, p2, n1, n2;
+        case (sel)
+            3'b000: booth_mux = p0;
+            3'b001: booth_mux = p1;
+            3'b010: booth_mux = p1;
+            3'b011: booth_mux = p2;
+            3'b100: booth_mux = n2;
+            3'b101: booth_mux = n1;
+            3'b110: booth_mux = n1;
+            3'b111: booth_mux = p0;
+            default: booth_mux = p0;
+        endcase
+    endfunction
+
+    wire [21:0] raw0 = booth_mux(sel0, m_0, m_pos1, m_pos2, m_neg1, m_neg2);
+    wire [21:0] raw1 = booth_mux(sel1, m_0, m_pos1, m_pos2, m_neg1, m_neg2);
+    wire [21:0] raw2 = booth_mux(sel2, m_0, m_pos1, m_pos2, m_neg1, m_neg2);
+    wire [21:0] raw3 = booth_mux(sel3, m_0, m_pos1, m_pos2, m_neg1, m_neg2);
+    wire [21:0] raw4 = booth_mux(sel4, m_0, m_pos1, m_pos2, m_neg1, m_neg2);
+    wire [21:0] raw5 = booth_mux(sel5, m_0, m_pos1, m_pos2, m_neg1, m_neg2);
+
+    // -----------------------------------------------------------------------
+    // Sign-extend each raw partial product to 32 bits, then shift by 2k.
+    //
+    // 32 bits is sufficient:
+    //   max raw magnitude = 2*A = 2046  → 12 bits unsigned, 13 bits signed
+    //   shifted by 10 (k=5) → 23 bits
+    //   sign-extended to 32 → safe for all cases
+    // -----------------------------------------------------------------------
+    wire [31:0] pp0 = {{10{raw0[21]}}, raw0};                    // << 0
+    wire [31:0] pp1 = {{8{raw1[21]}},  raw1, 2'b00};             // << 2
+    wire [31:0] pp2 = {{6{raw2[21]}},  raw2, 4'b0000};           // << 4
+    wire [31:0] pp3 = {{4{raw3[21]}},  raw3, 6'b000000};         // << 6
+    wire [31:0] pp4 = {{2{raw4[21]}},  raw4, 8'b00000000};       // << 8
+    wire [31:0] pp5 = {raw5[21],       raw5, 10'b0000000000};    // << 10
+
+    // -----------------------------------------------------------------------
+    // CSA reduction tree: 6 → 4 → 2 operands
+    //
+    // A CSA takes three 32-bit inputs (x,y,z) and produces:
+    //   sum   = x ^ y ^ z         (bit-wise XOR)
+    //   carry = {(maj(x,y,z)), 0} (majority function, shifted left 1)
+    //
+    // Level 1a: CSA(pp0, pp1, pp2) → s1, c1
+    // Level 1b: CSA(pp3, pp4, pp5) → s2, c2
+    // Level 2:  CSA(s1,  c1,  s2)  → s3, c3
+    // Level 3:  CSA(s3,  c3,  c2)  → s4, c4
+    // -----------------------------------------------------------------------
+
+    // Level 1a
+    wire [31:0] s1 = pp0 ^ pp1 ^ pp2;
+    wire [31:0] c1 = {((pp0 & pp1) | (pp1 & pp2) | (pp0 & pp2)), 1'b0};
+
+    // Level 1b
+    wire [31:0] s2 = pp3 ^ pp4 ^ pp5;
+    wire [31:0] c2 = {((pp3 & pp4) | (pp4 & pp5) | (pp3 & pp5)), 1'b0};
+
+    // Level 2
+    wire [31:0] s3 = s1 ^ c1 ^ s2;
+    wire [31:0] c3 = {((s1 & c1) | (c1 & s2) | (s1 & s2)), 1'b0};
+
+    // Level 3
+    wire [31:0] s4 = s3 ^ c3 ^ c2;
+    wire [31:0] c4 = {((s3 & c3) | (c3 & c2) | (s3 & c2)), 1'b0};
+
+    // Final addition
+    wire [32:0] p_full = {1'b0, s4} + {1'b0, c4};
+
+    // Extract lower 20 bits (exact unsigned product for 10-bit operands)
+    assign p = p_full[19:0];
+
+endmodule
+
+`default_nettype wire
+// ============================================================================
+// END gf16_mul_booth
+// phi^2 + phi^-2 = 3 · Wave-24 RVR-016 dry-run · DOI 10.5281/zenodo.19227877
+// ============================================================================