From 7672e5e699d3791d117548fdf5696451d580560e Mon Sep 17 00:00:00 2001 From: Ameer Ghani Date: Fri, 29 May 2026 02:50:06 +0000 Subject: [PATCH 1/2] crc32: add riscv64 implementation --- .github/workflows/checks.yaml | 9 +++ zlib-rs/src/cpu_features.rs | 8 +++ zlib-rs/src/crc32.rs | 8 +++ zlib-rs/src/crc32/zbc.rs | 107 ++++++++++++++++++++++++++++++++++ 4 files changed, 132 insertions(+) create mode 100644 zlib-rs/src/crc32/zbc.rs diff --git a/.github/workflows/checks.yaml b/.github/workflows/checks.yaml index 0ddeac8f..4ea2759d 100644 --- a/.github/workflows/checks.yaml +++ b/.github/workflows/checks.yaml @@ -31,6 +31,10 @@ jobs: # powerpc64le-unknown-linux-gnu CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_LINKER: powerpc64le-linux-gnu-gcc CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_RUNNER: qemu-ppc64le -L /usr/powerpc64le-linux-gnu + # riscv64gc-unknown-linux-gnu + CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_LINKER: riscv64-linux-gnu-gcc + CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER: qemu-riscv64 -L /usr/riscv64-linux-gnu -cpu rv64,zbc=true + CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUSTFLAGS: "-Ctarget-feature=+zbc" # wasm32-wasip1 (std for wasip2 is unstable) WASI_SDK_PATH: /tmp/wasi-sdk-24.0-x86_64-linux CC_wasm32_wasip1: /tmp/wasi-sdk-24.0-x86_64-linux/bin/clang @@ -82,6 +86,11 @@ jobs: codecov: false packages: gcc-powerpc64le-linux-gnu g++-powerpc64le-linux-gnu qemu-user qemu-user-static + - target: "riscv64gc-unknown-linux-gnu" + os: ubuntu-latest + codecov: false + packages: gcc-riscv64-linux-gnu g++-riscv64-linux-gnu qemu-user qemu-user-static + - target: "wasm32-wasip1" os: ubuntu-latest flags: "-p zlib-rs -p libz-rs-sys -p test-libz-rs-sys" diff --git a/zlib-rs/src/cpu_features.rs b/zlib-rs/src/cpu_features.rs index f442d49c..5c8ba93d 100644 --- a/zlib-rs/src/cpu_features.rs +++ b/zlib-rs/src/cpu_features.rs @@ -108,3 +108,11 @@ pub fn is_enabled_simd128() -> bool { false } + +#[inline(always)] +pub fn is_enabled_zbc() -> bool { + #[cfg(target_arch = "riscv64")] + return cfg!(target_feature = "zbc"); + + false +} diff --git a/zlib-rs/src/crc32.rs b/zlib-rs/src/crc32.rs index cf4acfc7..d99e0db7 100644 --- a/zlib-rs/src/crc32.rs +++ b/zlib-rs/src/crc32.rs @@ -13,6 +13,8 @@ mod pclmulqdq; #[cfg(target_arch = "x86_64")] #[cfg(feature = "vpclmulqdq")] mod vpclmulqdq; +#[cfg(target_arch = "riscv64")] +mod zbc; pub use combine::{crc32_combine, crc32_combine_gen, crc32_combine_op}; @@ -83,6 +85,12 @@ impl Crc32Fold { return; } + #[cfg(target_arch = "riscv64")] + if crate::cpu_features::is_enabled_zbc() { + self.value = unsafe { self::zbc::crc32_zbc_riscv64(self.value, src) }; + return; + } + #[cfg(target_arch = "loongarch64")] { self.value = self::loongarch::crc32_loongarch64(self.value, src); diff --git a/zlib-rs/src/crc32/zbc.rs b/zlib-rs/src/crc32/zbc.rs new file mode 100644 index 00000000..95a5968c --- /dev/null +++ b/zlib-rs/src/crc32/zbc.rs @@ -0,0 +1,107 @@ +//! crc32 implementation using the riscv64 zbc ISA extension. Derived from +//! zlib-ng's implementation, see +//! https://github.com/zlib-ng/zlib-ng/blob/da22434b657578c41af1bdf06b27304e4aceb00f/arch/riscv/crc32_zbc.c + +use crate::crc32::zbc::asm::{clmul, clmulh}; + +use super::crc32_braid; + +const CLMUL_MIN_LEN: usize = 16; +const CLMUL_CHUNK_LEN: usize = 16; + +const CONSTANT_R3: u64 = 0x1751997D0; +const CONSTANT_R4: u64 = 0x0CCAA009E; +const CONSTANT_R5: u64 = 0x163CD6124; +const MASK32: u64 = 0xFFFFFFFF; +const CRCPOLY_TRUE_LE_FULL: u64 = 0x1DB710641; +const CONSTANT_RU: u64 = 0x1F7011641; + +/// # Safety +/// +/// This function must only be called on riscv64 with the zbc (carryless +/// multiplication) feature. +pub unsafe fn crc32_zbc_riscv64(mut crc: u32, buf: &[u8]) -> u32 { + if buf.len() < CLMUL_MIN_LEN { + return crc32_braid(crc, buf); + } + + let unaligned_len = buf.len() % CLMUL_CHUNK_LEN; + if unaligned_len > 0 { + crc = crc32_braid(crc, &buf[..unaligned_len]); + } + + !crc32_zbc_riscv64_impl(!crc, &buf[unaligned_len..]) +} + +fn crc32_zbc_riscv64_impl(crc: u32, buf: &[u8]) -> u32 { + // This unwrap is legal because crc32_zbc_riscv64 guarantees the input is at + // least 16 bytes. + let mut low = u64::from_le_bytes(buf[..8].try_into().unwrap()) ^ crc as u64; + let mut high = u64::from_le_bytes(buf[8..16].try_into().unwrap()); + + buf.chunks_exact(16).skip(1).for_each(|chunk| { + let t2 = clmul(CONSTANT_R4, high); + let t3 = clmulh(CONSTANT_R4, high); + let t0_new = clmul(CONSTANT_R3, low); + let t1_new = clmulh(CONSTANT_R3, low); + low = t0_new ^ t2; + high = t1_new ^ t3; + low ^= u64::from_le_bytes(chunk[..8].try_into().unwrap()); + high ^= u64::from_le_bytes(chunk[8..].try_into().unwrap()); + }); + + // Fold the 128-bit result into 64 bits + let fold_t3 = clmulh(low, CONSTANT_R4); + let fold_t2 = clmul(low, CONSTANT_R4); + low = high ^ fold_t2; + high = fold_t3; + + // Combine the low and high parts and perform polynomial reduction + let combined = (low >> 32) | ((high & MASK32) << 32); + let reduced_low = { clmul(low & MASK32, CONSTANT_R5) } ^ combined; + + // Barrett reduction step + let mut barrett = clmul(reduced_low & MASK32, CONSTANT_RU) & MASK32; + barrett = clmul(barrett, CRCPOLY_TRUE_LE_FULL); + let ret = barrett ^ reduced_low; + + (ret >> 32) as u32 +} + +/// Inline assembly for required instructions, since the intrinsics are nightly +/// compiler only. +mod asm { + // Returns the lower half of carryless multiplication of rs1 and rs2. + // See https://riscv.github.io/riscv-isa-manual/snapshot/spec/#insns-clmul + #[inline(always)] + pub fn clmul(rs1: u64, rs2: u64) -> u64 { + let rd; + unsafe { + core::arch::asm!( + "clmul {rd}, {rs1}, {rs2}", + rs1 = in(reg) rs1, + rs2 = in(reg) rs2, + rd = out(reg) rd, + options(pure, nomem, nostack) + ); + } + rd + } + + // Returns the upper half of carryless multiplication of rs1 and rs2. + // See https://riscv.github.io/riscv-isa-manual/snapshot/spec/#insns-clmulh + #[inline(always)] + pub fn clmulh(rs1: u64, rs2: u64) -> u64 { + let rd; + unsafe { + core::arch::asm!( + "clmulh {rd}, {rs1}, {rs2}", + rs1 = in(reg) rs1, + rs2 = in(reg) rs2, + rd = out(reg) rd, + options(pure, nomem, nostack) + ); + } + rd + } +} From 1e431ba201521d9870f6d81b2ea4e4f8fff867fa Mon Sep 17 00:00:00 2001 From: Ameer Ghani Date: Tue, 2 Jun 2026 20:11:39 -0500 Subject: [PATCH 2/2] Review pass --- .github/workflows/checks.yaml | 4 +-- zlib-rs/src/cpu_features.rs | 6 ++-- zlib-rs/src/crc32.rs | 6 ++-- zlib-rs/src/crc32/{zbc.rs => zbkc.rs} | 51 ++++++++++++++------------- 4 files changed, 35 insertions(+), 32 deletions(-) rename zlib-rs/src/crc32/{zbc.rs => zbkc.rs} (63%) diff --git a/.github/workflows/checks.yaml b/.github/workflows/checks.yaml index 4ea2759d..4feb029a 100644 --- a/.github/workflows/checks.yaml +++ b/.github/workflows/checks.yaml @@ -33,8 +33,8 @@ jobs: CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_RUNNER: qemu-ppc64le -L /usr/powerpc64le-linux-gnu # riscv64gc-unknown-linux-gnu CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_LINKER: riscv64-linux-gnu-gcc - CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER: qemu-riscv64 -L /usr/riscv64-linux-gnu -cpu rv64,zbc=true - CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUSTFLAGS: "-Ctarget-feature=+zbc" + CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER: qemu-riscv64 -L /usr/riscv64-linux-gnu -cpu rv64,zbkc=true + CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUSTFLAGS: "-Ctarget-feature=+zbkc" # wasm32-wasip1 (std for wasip2 is unstable) WASI_SDK_PATH: /tmp/wasi-sdk-24.0-x86_64-linux CC_wasm32_wasip1: /tmp/wasi-sdk-24.0-x86_64-linux/bin/clang diff --git a/zlib-rs/src/cpu_features.rs b/zlib-rs/src/cpu_features.rs index 5c8ba93d..8eca4e9d 100644 --- a/zlib-rs/src/cpu_features.rs +++ b/zlib-rs/src/cpu_features.rs @@ -110,9 +110,11 @@ pub fn is_enabled_simd128() -> bool { } #[inline(always)] -pub fn is_enabled_zbc() -> bool { +pub fn is_enabled_zbkc() -> bool { + // FIXME: std::arch::is_riscv64_feature_detected is stabilized in 1.78. Switch to runtime + // feature detection once MSRV is bumped. Until then, zbkc support is compile-time only. #[cfg(target_arch = "riscv64")] - return cfg!(target_feature = "zbc"); + return cfg!(target_feature = "zbkc"); false } diff --git a/zlib-rs/src/crc32.rs b/zlib-rs/src/crc32.rs index d99e0db7..b570a56d 100644 --- a/zlib-rs/src/crc32.rs +++ b/zlib-rs/src/crc32.rs @@ -14,7 +14,7 @@ mod pclmulqdq; #[cfg(feature = "vpclmulqdq")] mod vpclmulqdq; #[cfg(target_arch = "riscv64")] -mod zbc; +mod zbkc; pub use combine::{crc32_combine, crc32_combine_gen, crc32_combine_op}; @@ -86,8 +86,8 @@ impl Crc32Fold { } #[cfg(target_arch = "riscv64")] - if crate::cpu_features::is_enabled_zbc() { - self.value = unsafe { self::zbc::crc32_zbc_riscv64(self.value, src) }; + if crate::cpu_features::is_enabled_zbkc() { + self.value = unsafe { self::zbkc::crc32_zbkc_riscv64(self.value, src) }; return; } diff --git a/zlib-rs/src/crc32/zbc.rs b/zlib-rs/src/crc32/zbkc.rs similarity index 63% rename from zlib-rs/src/crc32/zbc.rs rename to zlib-rs/src/crc32/zbkc.rs index 95a5968c..9f4948f9 100644 --- a/zlib-rs/src/crc32/zbc.rs +++ b/zlib-rs/src/crc32/zbkc.rs @@ -1,8 +1,13 @@ -//! crc32 implementation using the riscv64 zbc ISA extension. Derived from +//! crc32 implementation using the riscv64 zbkc ISA extension. Derived from //! zlib-ng's implementation, see //! https://github.com/zlib-ng/zlib-ng/blob/da22434b657578c41af1bdf06b27304e4aceb00f/arch/riscv/crc32_zbc.c +//! +//! # Safety +//! +//! The functions in this module must only be executed on a riscv64 system with +//! the zbkc feature. -use crate::crc32::zbc::asm::{clmul, clmulh}; +use crate::crc32::zbkc::asm::{clmul, clmulh}; use super::crc32_braid; @@ -16,11 +21,7 @@ const MASK32: u64 = 0xFFFFFFFF; const CRCPOLY_TRUE_LE_FULL: u64 = 0x1DB710641; const CONSTANT_RU: u64 = 0x1F7011641; -/// # Safety -/// -/// This function must only be called on riscv64 with the zbc (carryless -/// multiplication) feature. -pub unsafe fn crc32_zbc_riscv64(mut crc: u32, buf: &[u8]) -> u32 { +pub unsafe fn crc32_zbkc_riscv64(mut crc: u32, buf: &[u8]) -> u32 { if buf.len() < CLMUL_MIN_LEN { return crc32_braid(crc, buf); } @@ -30,20 +31,20 @@ pub unsafe fn crc32_zbc_riscv64(mut crc: u32, buf: &[u8]) -> u32 { crc = crc32_braid(crc, &buf[..unaligned_len]); } - !crc32_zbc_riscv64_impl(!crc, &buf[unaligned_len..]) + !unsafe { crc32_zbkc_riscv64_impl(!crc, &buf[unaligned_len..]) } } -fn crc32_zbc_riscv64_impl(crc: u32, buf: &[u8]) -> u32 { - // This unwrap is legal because crc32_zbc_riscv64 guarantees the input is at +unsafe fn crc32_zbkc_riscv64_impl(crc: u32, buf: &[u8]) -> u32 { + // This unwrap is legal because crc32_zbkc_riscv64 guarantees the input is at // least 16 bytes. let mut low = u64::from_le_bytes(buf[..8].try_into().unwrap()) ^ crc as u64; let mut high = u64::from_le_bytes(buf[8..16].try_into().unwrap()); buf.chunks_exact(16).skip(1).for_each(|chunk| { - let t2 = clmul(CONSTANT_R4, high); - let t3 = clmulh(CONSTANT_R4, high); - let t0_new = clmul(CONSTANT_R3, low); - let t1_new = clmulh(CONSTANT_R3, low); + let t2 = unsafe { clmul(CONSTANT_R4, high) }; + let t3 = unsafe { clmulh(CONSTANT_R4, high) }; + let t0_new = unsafe { clmul(CONSTANT_R3, low) }; + let t1_new = unsafe { clmulh(CONSTANT_R3, low) }; low = t0_new ^ t2; high = t1_new ^ t3; low ^= u64::from_le_bytes(chunk[..8].try_into().unwrap()); @@ -51,30 +52,30 @@ fn crc32_zbc_riscv64_impl(crc: u32, buf: &[u8]) -> u32 { }); // Fold the 128-bit result into 64 bits - let fold_t3 = clmulh(low, CONSTANT_R4); - let fold_t2 = clmul(low, CONSTANT_R4); + let fold_t3 = unsafe { clmulh(low, CONSTANT_R4) }; + let fold_t2 = unsafe { clmul(low, CONSTANT_R4) }; low = high ^ fold_t2; high = fold_t3; // Combine the low and high parts and perform polynomial reduction let combined = (low >> 32) | ((high & MASK32) << 32); - let reduced_low = { clmul(low & MASK32, CONSTANT_R5) } ^ combined; + let reduced_low = unsafe { clmul(low & MASK32, CONSTANT_R5) } ^ combined; // Barrett reduction step - let mut barrett = clmul(reduced_low & MASK32, CONSTANT_RU) & MASK32; - barrett = clmul(barrett, CRCPOLY_TRUE_LE_FULL); + let mut barrett = unsafe { clmul(reduced_low & MASK32, CONSTANT_RU) & MASK32 }; + barrett = unsafe { clmul(barrett, CRCPOLY_TRUE_LE_FULL) }; let ret = barrett ^ reduced_low; (ret >> 32) as u32 } -/// Inline assembly for required instructions, since the intrinsics are nightly -/// compiler only. +/// Inline assembly for required instructions, since the stdarch intrinsics are +/// currently unstable. mod asm { // Returns the lower half of carryless multiplication of rs1 and rs2. // See https://riscv.github.io/riscv-isa-manual/snapshot/spec/#insns-clmul - #[inline(always)] - pub fn clmul(rs1: u64, rs2: u64) -> u64 { + #[target_feature(enable = "zbkc")] + pub unsafe fn clmul(rs1: u64, rs2: u64) -> u64 { let rd; unsafe { core::arch::asm!( @@ -90,8 +91,8 @@ mod asm { // Returns the upper half of carryless multiplication of rs1 and rs2. // See https://riscv.github.io/riscv-isa-manual/snapshot/spec/#insns-clmulh - #[inline(always)] - pub fn clmulh(rs1: u64, rs2: u64) -> u64 { + #[target_feature(enable = "zbkc")] + pub unsafe fn clmulh(rs1: u64, rs2: u64) -> u64 { let rd; unsafe { core::arch::asm!(