Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
d748af2
remove FFT + Merkle padding optimization
TomWambsgans Apr 22, 2026
60b04f4
reverse variable ordering and apply split-eq trick
TomWambsgans Apr 23, 2026
fcda679
w
TomWambsgans Apr 23, 2026
1779e72
wip
TomWambsgans Apr 23, 2026
74ec0c9
Merge branch 'main' into whir-split-eq
TomWambsgans Apr 24, 2026
6b5d7f4
w
TomWambsgans Apr 24, 2026
f310d3f
simplify
TomWambsgans Apr 24, 2026
685269c
packing on build_all_compressed_groups (only improves avx)
TomWambsgans Apr 24, 2026
92b2d16
wip
TomWambsgans Apr 24, 2026
b691be7
w
TomWambsgans Apr 24, 2026
bef2f44
faster prepare_evals_for_fft_helper
TomWambsgans Apr 24, 2026
b9f0989
w
TomWambsgans Apr 24, 2026
6ba37ce
simplify
TomWambsgans Apr 24, 2026
8fd20ec
wip
TomWambsgans Apr 24, 2026
ad975ff
Merge branch 'main' into whir-split-eq
TomWambsgans Apr 24, 2026
fb31a0f
padding aware FFT
TomWambsgans Apr 24, 2026
fc62b98
even faster fft
TomWambsgans Apr 24, 2026
243b840
simplify
TomWambsgans Apr 24, 2026
6280964
faster merkle
TomWambsgans Apr 25, 2026
54b849b
avoid unnecessary allocation in initial Merkle tree
TomWambsgans Apr 25, 2026
5e341dd
Merge branch 'main' into whir-split-eq
TomWambsgans Apr 25, 2026
3bf8d69
Merge branch 'main' into whir-split-eq
TomWambsgans Apr 25, 2026
f346b1e
Merge branch 'main' into whir-split-eq
TomWambsgans Apr 25, 2026
e6c2329
add TODO comment for a potential opti in `compute_eval_eq_base_packed…
TomWambsgans Apr 25, 2026
681d173
Merge remote-tracking branch 'origin/main' into whir-split-eq
TomWambsgans Apr 25, 2026
8f1619e
fmt
TomWambsgans Apr 25, 2026
4062fa6
Perf simd compress hi dot (mirror of https://github.com/Plonky3/Plonk…
TomWambsgans Apr 27, 2026
fba3779
merge branch main
TomWambsgans Apr 29, 2026
64046ef
Merge remote-tracking branch 'origin/main' into whir-split-eq
TomWambsgans May 18, 2026
71cead0
w
TomWambsgans May 18, 2026
974ee3d
Merge branch 'main' into whir-split-eq
TomWambsgans May 21, 2026
1107e7d
Merge remote-tracking branch 'origin/main' into whir-split-eq
TomWambsgans May 24, 2026
c685737
Merge branch 'main' into whir-split-eq
TomWambsgans May 24, 2026
7ba1c84
merge main
TomWambsgans Jun 4, 2026
b58856b
wip
TomWambsgans Jun 4, 2026
0a17e91
Merge remote-tracking branch 'origin/main' into whir-split-eq
TomWambsgans Jun 5, 2026
a2d1bd0
Merge branch 'main' into whir-split-eq
TomWambsgans Jun 5, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 10 additions & 19 deletions crates/backend/poly/src/eq_mle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -307,11 +307,8 @@ pub fn compute_eval_eq_base_packed<F, EF, const INITIALIZED: bool>(
}

#[inline]
pub fn compute_eval_eq_base_packed_batched<F, EF>(
evals: &[MultilinearPoint<F>],
out: &mut [EF::ExtensionPacking],
scalars: &[EF],
) where
pub fn compute_eval_eq_base_batched<F, EF>(evals: &[MultilinearPoint<F>], out: &mut [EF], scalars: &[EF])
where
F: Field,
EF: ExtensionField<F>,
{
Expand All @@ -321,22 +318,21 @@ pub fn compute_eval_eq_base_packed_batched<F, EF>(
}

let n = evals[0].len();
let packing_width = F::Packing::WIDTH;
let log_packing_width = log2_strict_usize(packing_width);
let log_packing_width = log2_strict_usize(F::Packing::WIDTH);
assert!(log_packing_width <= n);
assert_eq!(out.len(), 1 << (n - log_packing_width));
assert_eq!(out.len(), 1 << n);

let k = n.min(LOG_BATCHED_TILE_SIZE);

if k <= log_packing_width || k >= n {
for (eval, &scalar) in evals.iter().zip(scalars) {
compute_eval_eq_base_packed::<F, EF, true>(eval, out, scalar);
compute_eval_eq_base::<F, EF, true>(eval, out, scalar);
}
return;
}

let n_prefix_levels = n - k;
let tile_packed_size = 1 << (k - log_packing_width);
let tile_size = 1 << k;

let per_query: Vec<_> = evals
.iter()
Expand All @@ -350,19 +346,14 @@ pub fn compute_eval_eq_base_packed_batched<F, EF>(
})
.collect();

// `out` already splits into `2^n_prefix_levels` tiles — many more than there are
// workers — so the pool's task counter load-balances these directly.
parallel::par_chunks_mut(out, tile_packed_size, |tile_idx, out_tile| {
// `out` splits into `2^n_prefix_levels` tiles — many more than there are workers —
// so the pool's task counter load-balances these directly.
parallel::par_chunks_mut(out, tile_size, |tile_idx, out_tile| {
for (eq_prefix, middle, eq_suffix) in &per_query {
// Here e could precompute the eq poly, trading some memory for less computation
// (2x faster on M4 max, but 2x slower on machines with smaller caches.
// TODO implement both and choose based on cache size?)
base_eval_eq_packed_with_packed_output::<F, EF, true>(
middle,
out_tile,
*eq_suffix,
EF::ExtensionPacking::from(eq_prefix[tile_idx]),
);
base_eval_eq_packed::<F, EF, true>(middle, out_tile, *eq_suffix, eq_prefix[tile_idx]);
}
});
}
Expand Down
9 changes: 9 additions & 0 deletions crates/backend/poly/src/point.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,15 @@ where
}
}

impl<F: Clone> MultilinearPoint<F> {
#[must_use]
pub fn reversed(&self) -> Self {
let mut v = self.0.clone();
v.reverse();
Self(v)
}
}

impl<F> From<Vec<F>> for MultilinearPoint<F> {
fn from(v: Vec<F>) -> Self {
Self(v)
Expand Down
49 changes: 34 additions & 15 deletions crates/rec_aggregation/zkdsl_implem/whir.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,17 +126,23 @@ def whir_open(

folding_randomness_global = Array(n_vars * DIM)

start_buf = Array(n_rounds + 2)
start_buf[0] = folding_randomness_global
# WHIR sumcheck folds LSB-first, so chronological challenges are in reverse polynomial-var
# order: chronological challenge #c is written to global position (n_vars - 1 - c), so the
# cumulative reads as [x_0, x_1, ..., x_{n_vars-1}]. `chrono_buf` carries the running
# chronological index across the `range` loop (range loops may not mutate outer-scope vars).
chrono_buf = Array(n_rounds + 2)
chrono_buf[0] = 0
for i in range(0, n_rounds + 1):
start: Mut = start_buf[i]
chrono: Mut = chrono_buf[i]
for j in range(0, folding_factors[i]):
copy_5(all_folding_randomness[i] + j * DIM, start + j * DIM)
start += folding_factors[i] * DIM
start_buf[i + 1] = start
start = start_buf[n_rounds + 1]
target_pos = n_vars - 1 - (chrono + j)
copy_5(all_folding_randomness[i] + j * DIM, folding_randomness_global + target_pos * DIM)
chrono += folding_factors[i]
chrono_buf[i + 1] = chrono
chrono = chrono_buf[n_rounds + 1]
for j in range(0, n_final_vars):
copy_5(all_folding_randomness[n_rounds + 1] + j * DIM, start + j * DIM)
target_pos = n_vars - 1 - (chrono + j)
copy_5(all_folding_randomness[n_rounds + 1] + j * DIM, folding_randomness_global + target_pos * DIM)

all_ood_recovered_evals = Array(num_oods[0] * DIM)
for i in range(0, num_oods[0]):
Expand All @@ -152,6 +158,9 @@ def whir_open(
num_oods[0],
)

# LSB-fold: at round i the polynomial's remaining vars are [x_0, ..., x_{n_vars_remaining-1}],
# i.e. the FIRST n_vars_remaining entries of folding_randomness_global (no pointer advance).
# eval_carry carries (n_vars_remaining, folding_randomness ptr, running sum) across the loop.
eval_carry = Array((n_rounds + 1) * 3)
eval_carry[0] = n_vars
eval_carry[1] = folding_randomness_global
Expand All @@ -164,12 +173,9 @@ def whir_open(
n_vars_remaining -= folding_factors[i]
my_ood_recovered_evals = Array(num_oods[i + 1] * DIM)
combination_randomness_powers = all_combination_randomness_powers[i]
my_folding_randomness += folding_factors[i] * DIM
for j in range(0, num_oods[i + 1]):
expanded_from_univariate = expand_from_univariate_ext(all_ood_points[i] + j * DIM, n_vars_remaining)
poly_eq_extension_dynamic_to(
expanded_from_univariate, my_folding_randomness, my_ood_recovered_evals + j * DIM, n_vars_remaining
)
poly_eq_extension_dynamic_to(expanded_from_univariate, folding_randomness_global, my_ood_recovered_evals + j * DIM, n_vars_remaining)
summed_ood = Array(DIM)
dot_product_ee_dynamic(
my_ood_recovered_evals,
Expand All @@ -182,7 +188,7 @@ def whir_open(
circle_value_i = all_circle_values[i]
for j in range(0, num_queries[i]): # unroll ?
expanded_from_univariate = expand_from_univariate_base(circle_value_i[j], n_vars_remaining)
poly_eq_base_extension_to(expanded_from_univariate, my_folding_randomness, s6s + j * DIM, n_vars_remaining)
poly_eq_base_extension_to(expanded_from_univariate, folding_randomness_global, s6s + j * DIM, n_vars_remaining)
s7 = Array(DIM)
dot_product_ee_dynamic(
s6s,
Expand All @@ -196,10 +202,18 @@ def whir_open(
eval_carry[base + 4] = my_folding_randomness
eval_carry[base + 5] = s
s = eval_carry[n_rounds * 3 + 2]

# WHIR sumcheck folds LSB-first: final_sumcheck challenges are [r_1=x_{m-1}, ..., r_m=x_0].
# eval_multilinear_coeffs_rev computes f(x_j = point[j]); for LSB-fold we need
# f(x_j = r_{m-j}) = point[j] = r_{j+1} = x_{m-j-1} which is wrong, so reverse first.
final_sumcheck_chals_rev = Array(n_final_vars * DIM)
final_sumcheck_chals = all_folding_randomness[n_rounds + 1]
for j in range(0, n_final_vars):
copy_5(final_sumcheck_chals + (n_final_vars - 1 - j) * DIM, final_sumcheck_chals_rev + j * DIM)
final_value = match_range(
n_final_vars,
range(MAX_NUM_VARIABLES_TO_SEND_COEFFS - WHIR_SUBSEQUENT_FOLDING_FACTOR, MAX_NUM_VARIABLES_TO_SEND_COEFFS + 1),
lambda n: eval_multilinear_coeffs_rev(final_coeffcients, all_folding_randomness[n_rounds + 1], n),
lambda n: eval_multilinear_coeffs_rev(final_coeffcients, final_sumcheck_chals_rev, n),
)
# copy_5(mul_extension_ret(s, final_value), end_sum);

Expand Down Expand Up @@ -376,7 +390,12 @@ def sample_stir_indexes_and_fold(

folds = Array(num_queries * DIM)

poly_eq = compute_eq_mle_extension_dynamic(folding_randomness, folding_factor)
# WHIR sumcheck folds LSB-first; the leaf is laid out so its first var is the polynomial's
# last LSB-folded var. evaluate (poly_eq) is MSB-first, so reverse the per-round challenges.
folding_randomness_reversed = Array(folding_factor * DIM)
for j in range(0, folding_factor):
copy_5(folding_randomness + (folding_factor - 1 - j) * DIM, folding_randomness_reversed + j * DIM)
poly_eq = compute_eq_mle_extension_dynamic(folding_randomness_reversed, folding_factor)

if merkle_leaves_in_basefield == 1:
for i in range(0, num_queries):
Expand Down
2 changes: 1 addition & 1 deletion crates/sub_protocols/src/quotient_gkr/layers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ impl<'a, EF: ExtensionField<PF<EF>>> LayerStorage<'a, EF> {
}
}

pub fn materialise_in_full(self) -> (Vec<EF>, Vec<EF>) {
pub(super) fn materialise_in_full(self) -> (Vec<EF>, Vec<EF>) {
let natural = match self {
Self::Natural { .. } => self,
other => other.convert_to_natural(),
Expand Down
15 changes: 8 additions & 7 deletions crates/whir/src/commit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,24 +65,25 @@ where
&self,
prover_state: &mut impl FSProver<EF>,
polynomial: &MleOwned<EF>,
actual_data_len: usize, // polynomial[actual_data_len..] is zero
_actual_data_len: usize, // polynomial[_actual_data_len..] is zero
) -> Witness<EF> {
let n_blocks = 1usize << self.folding_factor.at_round(0);
let evals_len = 1usize << self.num_variables;
let effective_n_cols = actual_data_len.div_ceil(evals_len / n_blocks);
// DFT matrix width: skip as many zero columns as possible, aligned to packing (SIMD)
let dft_n_cols = effective_n_cols.next_multiple_of(packing_width::<EF>()).min(n_blocks);

// NOTE: main's zero-COLUMN skip optimization (dft_n_cols / effective_n_cols < n_blocks)
// assumed an MSB-cols matrix layout, where the polynomial's zero suffix lands in trailing
// columns. The split-eq LSB-cols layout puts the zero suffix in trailing ROWS instead, so
// skipping columns would drop live data. We commit all columns (no skip): same root, just
// without the prover-side speedup. (The branch optimized this via row-skip in the DFT.)
let folded_matrix = info_span!("FFT").in_scope(|| {
reorder_and_dft(
&polynomial.by_ref(),
self.folding_factor.at_round(0),
self.starting_log_inv_rate,
dft_n_cols,
n_blocks,
)
});

let (prover_data, root) = MerkleData::build(folded_matrix, n_blocks, effective_n_cols);
let (prover_data, root) = MerkleData::build(folded_matrix, n_blocks, n_blocks);

prover_state.add_base_scalars(&root);

Expand Down
3 changes: 3 additions & 0 deletions crates/whir/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ pub(crate) use utils::*;
mod matrix;
pub(crate) use matrix::*;

mod svo;
pub(crate) use svo::*;

#[derive(Clone, Debug)]
pub struct SparseStatement<EF> {
pub total_num_variables: usize,
Expand Down
Loading
Loading