Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .github/workflows/checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ jobs:
# powerpc64le-unknown-linux-gnu
CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_LINKER: powerpc64le-linux-gnu-gcc
CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_RUNNER: qemu-ppc64le -L /usr/powerpc64le-linux-gnu
# riscv64gc-unknown-linux-gnu
CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_LINKER: riscv64-linux-gnu-gcc
CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER: qemu-riscv64 -L /usr/riscv64-linux-gnu -cpu rv64,zbkc=true
CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUSTFLAGS: "-Ctarget-feature=+zbkc"
# wasm32-wasip1 (std for wasip2 is unstable)
WASI_SDK_PATH: /tmp/wasi-sdk-24.0-x86_64-linux
CC_wasm32_wasip1: /tmp/wasi-sdk-24.0-x86_64-linux/bin/clang
Expand Down Expand Up @@ -82,6 +86,11 @@ jobs:
codecov: false
packages: gcc-powerpc64le-linux-gnu g++-powerpc64le-linux-gnu qemu-user qemu-user-static

- target: "riscv64gc-unknown-linux-gnu"
os: ubuntu-latest
codecov: false
packages: gcc-riscv64-linux-gnu g++-riscv64-linux-gnu qemu-user qemu-user-static

- target: "wasm32-wasip1"
os: ubuntu-latest
flags: "-p zlib-rs -p libz-rs-sys -p test-libz-rs-sys"
Expand Down
10 changes: 10 additions & 0 deletions zlib-rs/src/cpu_features.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,13 @@ pub fn is_enabled_simd128() -> bool {

false
}

#[inline(always)]
pub fn is_enabled_zbkc() -> bool {
// FIXME: std::arch::is_riscv64_feature_detected is stabilized in 1.78. Switch to runtime
// feature detection once MSRV is bumped. Until then, zbkc support is compile-time only.
#[cfg(target_arch = "riscv64")]
return cfg!(target_feature = "zbkc");

false
}
8 changes: 8 additions & 0 deletions zlib-rs/src/crc32.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ mod pclmulqdq;
#[cfg(target_arch = "x86_64")]
#[cfg(feature = "vpclmulqdq")]
mod vpclmulqdq;
#[cfg(target_arch = "riscv64")]
mod zbkc;

pub use combine::{crc32_combine, crc32_combine_gen, crc32_combine_op};

Expand Down Expand Up @@ -83,6 +85,12 @@ impl Crc32Fold {
return;
}

#[cfg(target_arch = "riscv64")]
if crate::cpu_features::is_enabled_zbkc() {
self.value = unsafe { self::zbkc::crc32_zbkc_riscv64(self.value, src) };
return;
}

#[cfg(target_arch = "loongarch64")]
{
self.value = self::loongarch::crc32_loongarch64(self.value, src);
Expand Down
108 changes: 108 additions & 0 deletions zlib-rs/src/crc32/zbkc.rs
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not needed for this PR, but I suspect an approach like in #514 could work here too? Though aarch64 has 64 x 64 -> 128 bits in one instruction, while here it takes 2 so perhaps it's not actually advantageous.

Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
//! crc32 implementation using the riscv64 zbkc ISA extension. Derived from
//! zlib-ng's implementation, see
//! https://github.com/zlib-ng/zlib-ng/blob/da22434b657578c41af1bdf06b27304e4aceb00f/arch/riscv/crc32_zbc.c
//!
//! # Safety
//!
//! The functions in this module must only be executed on a riscv64 system with
//! the zbkc feature.

use crate::crc32::zbkc::asm::{clmul, clmulh};

use super::crc32_braid;

const CLMUL_MIN_LEN: usize = 16;
const CLMUL_CHUNK_LEN: usize = 16;

const CONSTANT_R3: u64 = 0x1751997D0;
const CONSTANT_R4: u64 = 0x0CCAA009E;
const CONSTANT_R5: u64 = 0x163CD6124;
const MASK32: u64 = 0xFFFFFFFF;
const CRCPOLY_TRUE_LE_FULL: u64 = 0x1DB710641;
const CONSTANT_RU: u64 = 0x1F7011641;

pub unsafe fn crc32_zbkc_riscv64(mut crc: u32, buf: &[u8]) -> u32 {
if buf.len() < CLMUL_MIN_LEN {
return crc32_braid(crc, buf);
}

let unaligned_len = buf.len() % CLMUL_CHUNK_LEN;
if unaligned_len > 0 {
crc = crc32_braid(crc, &buf[..unaligned_len]);
}

!unsafe { crc32_zbkc_riscv64_impl(!crc, &buf[unaligned_len..]) }
}

unsafe fn crc32_zbkc_riscv64_impl(crc: u32, buf: &[u8]) -> u32 {
// This unwrap is legal because crc32_zbkc_riscv64 guarantees the input is at
// least 16 bytes.
let mut low = u64::from_le_bytes(buf[..8].try_into().unwrap()) ^ crc as u64;
let mut high = u64::from_le_bytes(buf[8..16].try_into().unwrap());

buf.chunks_exact(16).skip(1).for_each(|chunk| {
let t2 = unsafe { clmul(CONSTANT_R4, high) };
let t3 = unsafe { clmulh(CONSTANT_R4, high) };
let t0_new = unsafe { clmul(CONSTANT_R3, low) };
let t1_new = unsafe { clmulh(CONSTANT_R3, low) };
low = t0_new ^ t2;
high = t1_new ^ t3;
low ^= u64::from_le_bytes(chunk[..8].try_into().unwrap());
high ^= u64::from_le_bytes(chunk[8..].try_into().unwrap());
});

// Fold the 128-bit result into 64 bits
let fold_t3 = unsafe { clmulh(low, CONSTANT_R4) };
let fold_t2 = unsafe { clmul(low, CONSTANT_R4) };
low = high ^ fold_t2;
high = fold_t3;

// Combine the low and high parts and perform polynomial reduction
let combined = (low >> 32) | ((high & MASK32) << 32);
let reduced_low = unsafe { clmul(low & MASK32, CONSTANT_R5) } ^ combined;

// Barrett reduction step
let mut barrett = unsafe { clmul(reduced_low & MASK32, CONSTANT_RU) & MASK32 };
barrett = unsafe { clmul(barrett, CRCPOLY_TRUE_LE_FULL) };
let ret = barrett ^ reduced_low;

(ret >> 32) as u32
}

/// Inline assembly for required instructions, since the stdarch intrinsics are
/// currently unstable.
mod asm {
// Returns the lower half of carryless multiplication of rs1 and rs2.
// See https://riscv.github.io/riscv-isa-manual/snapshot/spec/#insns-clmul
#[target_feature(enable = "zbkc")]
pub unsafe fn clmul(rs1: u64, rs2: u64) -> u64 {
let rd;
unsafe {
core::arch::asm!(
"clmul {rd}, {rs1}, {rs2}",
rs1 = in(reg) rs1,
rs2 = in(reg) rs2,
rd = out(reg) rd,
options(pure, nomem, nostack)
);
}
rd
}

// Returns the upper half of carryless multiplication of rs1 and rs2.
// See https://riscv.github.io/riscv-isa-manual/snapshot/spec/#insns-clmulh
#[target_feature(enable = "zbkc")]
pub unsafe fn clmulh(rs1: u64, rs2: u64) -> u64 {
let rd;
unsafe {
core::arch::asm!(
"clmulh {rd}, {rs1}, {rs2}",
rs1 = in(reg) rs1,
rs2 = in(reg) rs2,
rd = out(reg) rd,
options(pure, nomem, nostack)
);
}
rd
}
}
Loading