diff --git a/src/ailego/math/euclidean_distance_matrix_fp16_dispatch.cc b/src/ailego/math/euclidean_distance_matrix_fp16_dispatch.cc index fb145265e..83b02335d 100644 --- a/src/ailego/math/euclidean_distance_matrix_fp16_dispatch.cc +++ b/src/ailego/math/euclidean_distance_matrix_fp16_dispatch.cc @@ -23,6 +23,11 @@ float SquaredEuclideanDistanceFp16NEON(const Float16 *lhs, const Float16 *rhs, size_t size); #endif +#if defined(__riscv_zvfh) +float SquaredEuclideanDistanceRVV(const Float16 *lhs, const Float16 *rhs, + size_t size); +#endif + #if defined(__AVX512FP16__) float SquaredEuclideanDistanceFp16AVX512FP16(const Float16 *lhs, const Float16 *rhs, size_t size); @@ -48,6 +53,8 @@ void SquaredEuclideanDistanceMatrix::Compute(const ValueType *m, float *out) { #if defined(__ARM_NEON) *out = SquaredEuclideanDistanceFp16NEON(m, q, dim); +#elif defined(__riscv_zvfh) + *out = SquaredEuclideanDistanceRVV(m, q, dim); #else #if defined(__AVX512FP16__) if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_FP16) { diff --git a/src/ailego/math/euclidean_distance_matrix_fp16_rvv.cc b/src/ailego/math/euclidean_distance_matrix_fp16_rvv.cc new file mode 100644 index 000000000..366eb3e2b --- /dev/null +++ b/src/ailego/math/euclidean_distance_matrix_fp16_rvv.cc @@ -0,0 +1,58 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "euclidean_distance_matrix.h" + +namespace zvec { +namespace ailego { + +#if defined(__riscv_zvfh) +namespace { + +static inline float SquaredEuclideanDistanceRVVImpl(const Float16 *lhs, + const Float16 *rhs, + size_t size) { + const _Float16 *lhs_fp16 = reinterpret_cast(lhs); + const _Float16 *rhs_fp16 = reinterpret_cast(rhs); + const size_t vlmax = __riscv_vsetvlmax_e16m4(); + vfloat32m8_t v_sum = __riscv_vfmv_v_f_f32m8(0.0f, vlmax); + + while (size != 0) { + const size_t vl = __riscv_vsetvl_e16m4(size); + vfloat16m4_t v_lhs = __riscv_vle16_v_f16m4(lhs_fp16, vl); + vfloat16m4_t v_rhs = __riscv_vle16_v_f16m4(rhs_fp16, vl); + vfloat32m8_t v_diff = __riscv_vfwsub_vv_f32m8(v_lhs, v_rhs, vl); + v_sum = __riscv_vfmacc_vv_f32m8_tu(v_sum, v_diff, v_diff, vl); + lhs_fp16 += vl; + rhs_fp16 += vl; + size -= vl; + } + + vfloat32m1_t v_zero = __riscv_vfmv_v_f_f32m1(0.0f, 1); + vfloat32m1_t v_red = __riscv_vfredusum_vs_f32m8_f32m1(v_sum, v_zero, vlmax); + return __riscv_vfmv_f_s_f32m1_f32(v_red); +} + +} // namespace + +float SquaredEuclideanDistanceRVV(const Float16 *lhs, const Float16 *rhs, + size_t size) { + return SquaredEuclideanDistanceRVVImpl(lhs, rhs, size); +} + +#endif // __riscv_zvfh + +} // namespace ailego +} // namespace zvec diff --git a/src/ailego/math/euclidean_distance_matrix_fp32_dispatch.cc b/src/ailego/math/euclidean_distance_matrix_fp32_dispatch.cc index cc3044389..c941af7c9 100644 --- a/src/ailego/math/euclidean_distance_matrix_fp32_dispatch.cc +++ b/src/ailego/math/euclidean_distance_matrix_fp32_dispatch.cc @@ -23,6 +23,11 @@ void SquaredEuclideanDistanceFp32NEON(const float *lhs, const float *rhs, size_t size, float *out); #endif +#if defined(__riscv_vector) +float SquaredEuclideanDistanceRVV(const float *lhs, const float *rhs, + size_t size); +#endif + #if defined(__AVX512F__) float SquaredEuclideanDistanceFp32AVX512(const float *lhs, const float *rhs, size_t size); @@ -51,6 +56,8 @@ void SquaredEuclideanDistanceMatrix::Compute(const ValueType *m, float *out) { #if defined(__ARM_NEON) SquaredEuclideanDistanceFp32NEON(m, q, dim, out); +#elif defined(__riscv_vector) + *out = SquaredEuclideanDistanceRVV(m, q, dim); #else #if defined(__AVX512F__) if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) { diff --git a/src/ailego/math/euclidean_distance_matrix_fp32_rvv.cc b/src/ailego/math/euclidean_distance_matrix_fp32_rvv.cc new file mode 100644 index 000000000..13ac69d7e --- /dev/null +++ b/src/ailego/math/euclidean_distance_matrix_fp32_rvv.cc @@ -0,0 +1,58 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "distance_matrix_euclidean_utility.i" +#include "euclidean_distance_matrix.h" + +namespace zvec { +namespace ailego { + +#if defined(__riscv_vector) +namespace { + +static inline float SquaredEuclideanDistanceRVVImpl(const float *lhs, + const float *rhs, + size_t size) { + const size_t vlmax = __riscv_vsetvlmax_e32m8(); + vfloat32m8_t v_sum = __riscv_vfmv_v_f_f32m8(0.0f, vlmax); + + while (size != 0) { + size_t vl = __riscv_vsetvl_e32m8(size); + vfloat32m8_t v_lhs = __riscv_vle32_v_f32m8(lhs, vl); + vfloat32m8_t v_rhs = __riscv_vle32_v_f32m8(rhs, vl); + vfloat32m8_t v_d = __riscv_vfsub_vv_f32m8(v_lhs, v_rhs, vl); + v_sum = __riscv_vfmacc_vv_f32m8_tu(v_sum, v_d, v_d, vl); + lhs += vl; + rhs += vl; + size -= vl; + } + + vfloat32m1_t v_zero = __riscv_vfmv_v_f_f32m1(0.0f, 1); + vfloat32m1_t v_red = __riscv_vfredusum_vs_f32m8_f32m1(v_sum, v_zero, vlmax); + return __riscv_vfmv_f_s_f32m1_f32(v_red); +} + +} // namespace + +//! Squared Euclidean Distance +float SquaredEuclideanDistanceRVV(const float *lhs, const float *rhs, + size_t size) { + return SquaredEuclideanDistanceRVVImpl(lhs, rhs, size); +} + +#endif // __riscv_vector + +} // namespace ailego +} // namespace zvec diff --git a/src/ailego/math/euclidean_distance_matrix_int8_dispatch.cc b/src/ailego/math/euclidean_distance_matrix_int8_dispatch.cc index d64ca1efc..7a8ec0cf3 100644 --- a/src/ailego/math/euclidean_distance_matrix_int8_dispatch.cc +++ b/src/ailego/math/euclidean_distance_matrix_int8_dispatch.cc @@ -18,6 +18,11 @@ namespace zvec { namespace ailego { +#if defined(__riscv_vector) +float SquaredEuclideanDistanceRVV(const int8_t *lhs, const int8_t *rhs, + size_t size); +#endif + #if defined(__AVX2__) float SquaredEuclideanDistanceInt8AVX2(const int8_t *lhs, const int8_t *rhs, size_t size); @@ -36,6 +41,9 @@ void SquaredEuclideanDistanceMatrix::Compute(const ValueType *m, const ValueType *q, size_t dim, float *out) { +#if defined(__riscv_vector) + *out = SquaredEuclideanDistanceRVV(m, q, dim); +#else #if defined(__AVX2__) if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) { *out = SquaredEuclideanDistanceInt8AVX2(m, q, dim); @@ -51,6 +59,7 @@ void SquaredEuclideanDistanceMatrix::Compute(const ValueType *m, #endif *out = SquaredEuclideanDistanceInt8Scalar(m, q, dim); +#endif // __riscv_vector } //! Compute the distance between matrix and query (INT8, M=1, N=1) diff --git a/src/ailego/math/euclidean_distance_matrix_int8_rvv.cc b/src/ailego/math/euclidean_distance_matrix_int8_rvv.cc new file mode 100644 index 000000000..25481a302 --- /dev/null +++ b/src/ailego/math/euclidean_distance_matrix_int8_rvv.cc @@ -0,0 +1,56 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "euclidean_distance_matrix.h" + +namespace zvec { +namespace ailego { + +#if defined(__riscv_vector) +namespace { + +static inline float SquaredEuclideanDistanceRVVImpl(const int8_t *lhs, + const int8_t *rhs, + size_t size) { + const size_t vlmax = __riscv_vsetvlmax_e8m2(); + vint32m8_t v_sum = __riscv_vmv_v_x_i32m8(0, vlmax); + + while (size != 0) { + const size_t vl = __riscv_vsetvl_e8m2(size); + vint8m2_t v_lhs = __riscv_vle8_v_i8m2(lhs, vl); + vint8m2_t v_rhs = __riscv_vle8_v_i8m2(rhs, vl); + vint16m4_t v_d = __riscv_vwsub_vv_i16m4(v_lhs, v_rhs, vl); + v_sum = __riscv_vwmacc_vv_i32m8_tu(v_sum, v_d, v_d, vl); + lhs += vl; + rhs += vl; + size -= vl; + } + + vint32m1_t v_zero = __riscv_vmv_v_x_i32m1(0, 1); + vint32m1_t v_red = __riscv_vredsum_vs_i32m8_i32m1(v_sum, v_zero, vlmax); + return static_cast(__riscv_vmv_x_s_i32m1_i32(v_red)); +} + +} // namespace + +float SquaredEuclideanDistanceRVV(const int8_t *lhs, const int8_t *rhs, + size_t size) { + return SquaredEuclideanDistanceRVVImpl(lhs, rhs, size); +} + +#endif // __riscv_vector + +} // namespace ailego +} // namespace zvec diff --git a/src/ailego/math/inner_product_matrix_fp16_dispatch.cc b/src/ailego/math/inner_product_matrix_fp16_dispatch.cc index 3c46bc32b..fa5a0ea6f 100644 --- a/src/ailego/math/inner_product_matrix_fp16_dispatch.cc +++ b/src/ailego/math/inner_product_matrix_fp16_dispatch.cc @@ -27,6 +27,11 @@ float MinusInnerProductFp16NEON(const Float16 *lhs, const Float16 *rhs, size_t size); #endif +#if defined(__riscv_zvfh) +float InnerProductRVV(const Float16 *lhs, const Float16 *rhs, size_t size); +float MinusInnerProductRVV(const Float16 *lhs, const Float16 *rhs, size_t size); +#endif + #if defined(__AVX__) float InnerProductFp16AVX(const Float16 *lhs, const Float16 *rhs, size_t size); float MinusInnerProductFp16AVX(const Float16 *lhs, const Float16 *rhs, @@ -58,6 +63,8 @@ void InnerProductMatrix::Compute(const ValueType *m, float *out) { #if defined(__ARM_NEON) *out = InnerProductFp16NEON(m, q, dim); +#elif defined(__riscv_zvfh) + *out = InnerProductRVV(m, q, dim); #else #if defined(__AVX512FP16__) if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_FP16) { @@ -88,6 +95,8 @@ void MinusInnerProductMatrix::Compute(const ValueType *m, size_t dim, float *out) { #if defined(__ARM_NEON) *out = MinusInnerProductFp16NEON(m, q, dim); +#elif defined(__riscv_zvfh) + *out = MinusInnerProductRVV(m, q, dim); #else #if defined(__AVX512FP16__) if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_FP16) { diff --git a/src/ailego/math/inner_product_matrix_fp16_rvv.cc b/src/ailego/math/inner_product_matrix_fp16_rvv.cc new file mode 100644 index 000000000..2f059a0f3 --- /dev/null +++ b/src/ailego/math/inner_product_matrix_fp16_rvv.cc @@ -0,0 +1,60 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "inner_product_matrix.h" + +namespace zvec { +namespace ailego { + +#if defined(__riscv_zvfh) +namespace { + +static inline float InnerProductRVVImpl(const Float16 *lhs, const Float16 *rhs, + size_t size) { + const _Float16 *lhs_fp16 = reinterpret_cast(lhs); + const _Float16 *rhs_fp16 = reinterpret_cast(rhs); + const size_t vlmax = __riscv_vsetvlmax_e16m4(); + vfloat32m8_t v_sum = __riscv_vfmv_v_f_f32m8(0.0f, vlmax); + + while (size != 0) { + const size_t vl = __riscv_vsetvl_e16m4(size); + vfloat16m4_t v_lhs = __riscv_vle16_v_f16m4(lhs_fp16, vl); + vfloat16m4_t v_rhs = __riscv_vle16_v_f16m4(rhs_fp16, vl); + v_sum = __riscv_vfwmacc_vv_f32m8_tu(v_sum, v_lhs, v_rhs, vl); + lhs_fp16 += vl; + rhs_fp16 += vl; + size -= vl; + } + + vfloat32m1_t v_zero = __riscv_vfmv_v_f_f32m1(0.0f, 1); + vfloat32m1_t v_red = __riscv_vfredusum_vs_f32m8_f32m1(v_sum, v_zero, vlmax); + return __riscv_vfmv_f_s_f32m1_f32(v_red); +} + +} // namespace + +float InnerProductRVV(const Float16 *lhs, const Float16 *rhs, size_t size) { + return InnerProductRVVImpl(lhs, rhs, size); +} + +float MinusInnerProductRVV(const Float16 *lhs, const Float16 *rhs, + size_t size) { + return -InnerProductRVVImpl(lhs, rhs, size); +} + +#endif // __riscv_zvfh + +} // namespace ailego +} // namespace zvec diff --git a/src/ailego/math/inner_product_matrix_fp32_dispatch.cc b/src/ailego/math/inner_product_matrix_fp32_dispatch.cc index 8b289b6e6..a7de76de4 100644 --- a/src/ailego/math/inner_product_matrix_fp32_dispatch.cc +++ b/src/ailego/math/inner_product_matrix_fp32_dispatch.cc @@ -26,6 +26,11 @@ float MinusInnerProductFp32NEON(const float *lhs, const float *rhs, size_t size); #endif +#if defined(__riscv_vector) +float InnerProductRVV(const float *lhs, const float *rhs, size_t size); +float MinusInnerProductRVV(const float *lhs, const float *rhs, size_t size); +#endif + #if defined(__AVX512F__) float InnerProductFp32AVX512(const float *lhs, const float *rhs, size_t size); float MinusInnerProductFp32AVX512(const float *lhs, const float *rhs, @@ -51,6 +56,8 @@ void InnerProductMatrix::Compute(const float *m, const float *q, size_t dim, float *out) { #if defined(__ARM_NEON) *out = InnerProductFp32NEON(m, q, dim); +#elif defined(__riscv_vector) + *out = InnerProductRVV(m, q, dim); #else #if defined(__AVX512F__) if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) { @@ -82,6 +89,8 @@ void MinusInnerProductMatrix::Compute(const float *m, float *out) { #if defined(__ARM_NEON) *out = MinusInnerProductFp32NEON(m, q, dim); +#elif defined(__riscv_vector) + *out = MinusInnerProductRVV(m, q, dim); #else #if defined(__AVX512F__) if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) { diff --git a/src/ailego/math/inner_product_matrix_fp32_rvv.cc b/src/ailego/math/inner_product_matrix_fp32_rvv.cc new file mode 100644 index 000000000..c942f6871 --- /dev/null +++ b/src/ailego/math/inner_product_matrix_fp32_rvv.cc @@ -0,0 +1,59 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "inner_product_matrix.h" + +namespace zvec { +namespace ailego { + +#if defined(__riscv_vector) +namespace { + +static inline float InnerProductRVVImpl(const float *lhs, const float *rhs, + size_t size) { + const size_t vlmax = __riscv_vsetvlmax_e32m8(); + vfloat32m8_t v_sum = __riscv_vfmv_v_f_f32m8(0.0f, vlmax); + + while (size != 0) { + size_t vl = __riscv_vsetvl_e32m8(size); + vfloat32m8_t v_lhs = __riscv_vle32_v_f32m8(lhs, vl); + vfloat32m8_t v_rhs = __riscv_vle32_v_f32m8(rhs, vl); + v_sum = __riscv_vfmacc_vv_f32m8_tu(v_sum, v_lhs, v_rhs, vl); + lhs += vl; + rhs += vl; + size -= vl; + } + + vfloat32m1_t v_zero = __riscv_vfmv_v_f_f32m1(0.0f, 1); + vfloat32m1_t v_red = __riscv_vfredusum_vs_f32m8_f32m1(v_sum, v_zero, vlmax); + return __riscv_vfmv_f_s_f32m1_f32(v_red); +} + +} // namespace + +//! Inner Product +float InnerProductRVV(const float *lhs, const float *rhs, size_t size) { + return InnerProductRVVImpl(lhs, rhs, size); +} + +//! Minus Inner Product +float MinusInnerProductRVV(const float *lhs, const float *rhs, size_t size) { + return -InnerProductRVVImpl(lhs, rhs, size); +} + +#endif // __riscv_vector + +} // namespace ailego +} // namespace zvec diff --git a/src/ailego/math/inner_product_matrix_int8_dispatch.cc b/src/ailego/math/inner_product_matrix_int8_dispatch.cc index d2faac29e..63e9daf56 100644 --- a/src/ailego/math/inner_product_matrix_int8_dispatch.cc +++ b/src/ailego/math/inner_product_matrix_int8_dispatch.cc @@ -21,6 +21,11 @@ namespace ailego { //-------------------------------------------------- // Dense //-------------------------------------------------- +#if defined(__riscv_vector) +float InnerProductRVV(const int8_t *lhs, const int8_t *rhs, size_t size); +float MinusInnerProductRVV(const int8_t *lhs, const int8_t *rhs, size_t size); +#endif + #if defined(__AVX2__) float InnerProductInt8AVX2(const int8_t *lhs, const int8_t *rhs, size_t size); float MinusInnerProductInt8AVX2(const int8_t *lhs, const int8_t *rhs, @@ -39,6 +44,9 @@ float MinusInnerProductInt8Scalar(const int8_t *m, const int8_t *q, size_t dim); //! Compute the distance between matrix and query (INT8, M=1, N=1) void InnerProductMatrix::Compute(const int8_t *m, const int8_t *q, size_t dim, float *out) { +#if defined(__riscv_vector) + *out = InnerProductRVV(m, q, dim); +#else #if defined(__AVX2__) if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) { *out = InnerProductInt8AVX2(m, q, dim); @@ -55,12 +63,16 @@ void InnerProductMatrix::Compute(const int8_t *m, const int8_t *q, #endif //__SSE4_1__ *out = InnerProductInt8Scalar(m, q, dim); +#endif // __riscv_vector } //! Compute the distance between matrix and query (INT8, M=1, N=1) void MinusInnerProductMatrix::Compute(const int8_t *m, const int8_t *q, size_t dim, float *out) { +#if defined(__riscv_vector) + *out = MinusInnerProductRVV(m, q, dim); +#else #if defined(__AVX2__) if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) { *out = MinusInnerProductInt8AVX2(m, q, dim); @@ -76,6 +88,7 @@ void MinusInnerProductMatrix::Compute(const int8_t *m, #endif //__SSE4_1__ *out = MinusInnerProductInt8Scalar(m, q, dim); +#endif // __riscv_vector } } // namespace ailego diff --git a/src/ailego/math/inner_product_matrix_int8_rvv.cc b/src/ailego/math/inner_product_matrix_int8_rvv.cc new file mode 100644 index 000000000..4bb97006c --- /dev/null +++ b/src/ailego/math/inner_product_matrix_int8_rvv.cc @@ -0,0 +1,59 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "inner_product_matrix.h" + +namespace zvec { +namespace ailego { + +#if defined(__riscv_vector) +namespace { + +static inline float InnerProductRVVImpl(const int8_t *lhs, const int8_t *rhs, + size_t size) { + const size_t vlmax = __riscv_vsetvlmax_e8m2(); + vint32m8_t v_sum = __riscv_vmv_v_x_i32m8(0, vlmax); + + while (size != 0) { + const size_t vl = __riscv_vsetvl_e8m2(size); + const vint8m2_t v_lhs8 = __riscv_vle8_v_i8m2(lhs, vl); + const vint8m2_t v_rhs8 = __riscv_vle8_v_i8m2(rhs, vl); + const vint16m4_t v_lhs16 = __riscv_vsext_vf2_i16m4(v_lhs8, vl); + const vint16m4_t v_rhs16 = __riscv_vsext_vf2_i16m4(v_rhs8, vl); + v_sum = __riscv_vwmacc_vv_i32m8_tu(v_sum, v_lhs16, v_rhs16, vl); + lhs += vl; + rhs += vl; + size -= vl; + } + + const vint32m1_t v_zero = __riscv_vmv_v_x_i32m1(0, 1); + const vint32m1_t v_red = __riscv_vredsum_vs_i32m8_i32m1(v_sum, v_zero, vlmax); + return static_cast(__riscv_vmv_x_s_i32m1_i32(v_red)); +} + +} // namespace + +float InnerProductRVV(const int8_t *lhs, const int8_t *rhs, size_t size) { + return InnerProductRVVImpl(lhs, rhs, size); +} + +float MinusInnerProductRVV(const int8_t *lhs, const int8_t *rhs, size_t size) { + return -InnerProductRVVImpl(lhs, rhs, size); +} + +#endif // __riscv_vector + +} // namespace ailego +} // namespace zvec diff --git a/src/ailego/math/mips_euclidean_distance_matrix_fp16_dispatch.cc b/src/ailego/math/mips_euclidean_distance_matrix_fp16_dispatch.cc index 8e40563cf..ad55754de 100644 --- a/src/ailego/math/mips_euclidean_distance_matrix_fp16_dispatch.cc +++ b/src/ailego/math/mips_euclidean_distance_matrix_fp16_dispatch.cc @@ -26,6 +26,16 @@ float MipsEuclideanDistanceSphericalInjectionFp16NEON(const Float16 *lhs, size_t size, float e2); #endif +#if defined(__riscv_zvfh) +float MipsEuclideanDistanceRepeatedQuadraticInjectionRVV(const Float16 *lhs, + const Float16 *rhs, + size_t size, size_t m, + float e2); +float MipsEuclideanDistanceSphericalInjectionRVV(const Float16 *lhs, + const Float16 *rhs, + size_t size, float e2); +#endif + #if defined(__AVX512F__) float MipsEuclideanDistanceRepeatedQuadraticInjectionFp16AVX512( const Float16 *lhs, const Float16 *rhs, size_t size, size_t m, float e2); @@ -53,6 +63,8 @@ void MipsSquaredEuclideanDistanceMatrix::Compute( const ValueType *p, const ValueType *q, size_t dim, float e2, float *out) { #if defined(__ARM_NEON) *out = MipsEuclideanDistanceSphericalInjectionFp16NEON(p, q, dim, e2); +#elif defined(__riscv_zvfh) + *out = MipsEuclideanDistanceSphericalInjectionRVV(p, q, dim, e2); #else #if defined(__AVX512F__) if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) { @@ -78,6 +90,8 @@ void MipsSquaredEuclideanDistanceMatrix::Compute( #if defined(__ARM_NEON) *out = MipsEuclideanDistanceRepeatedQuadraticInjectionFp16NEON(p, q, dim, m, e2); +#elif defined(__riscv_zvfh) + *out = MipsEuclideanDistanceRepeatedQuadraticInjectionRVV(p, q, dim, m, e2); #else #if defined(__AVX512F__) if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) { diff --git a/src/ailego/math/mips_euclidean_distance_matrix_fp16_rvv.cc b/src/ailego/math/mips_euclidean_distance_matrix_fp16_rvv.cc new file mode 100644 index 000000000..461d55a12 --- /dev/null +++ b/src/ailego/math/mips_euclidean_distance_matrix_fp16_rvv.cc @@ -0,0 +1,100 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "mips_euclidean_distance_matrix.h" + +namespace zvec { +namespace ailego { + +#if defined(__riscv_zvfh) +namespace { + +static inline float HorizontalReduceF32M8(vfloat32m8_t value, size_t vlmax) { + vfloat32m1_t zero = __riscv_vfmv_v_f_f32m1(0.0f, 1); + vfloat32m1_t red = __riscv_vfredusum_vs_f32m8_f32m1(value, zero, vlmax); + return __riscv_vfmv_f_s_f32m1_f32(red); +} + +static inline float InnerProductRVV(const Float16 *lhs, const Float16 *rhs, + size_t size) { + const _Float16 *lhs_fp16 = reinterpret_cast(lhs); + const _Float16 *rhs_fp16 = reinterpret_cast(rhs); + const size_t vlmax = __riscv_vsetvlmax_e16m4(); + vfloat32m8_t v_sum = __riscv_vfmv_v_f_f32m8(0.0f, vlmax); + + while (size != 0) { + const size_t vl = __riscv_vsetvl_e16m4(size); + vfloat16m4_t v_lhs = __riscv_vle16_v_f16m4(lhs_fp16, vl); + vfloat16m4_t v_rhs = __riscv_vle16_v_f16m4(rhs_fp16, vl); + v_sum = __riscv_vfwmacc_vv_f32m8_tu(v_sum, v_lhs, v_rhs, vl); + lhs_fp16 += vl; + rhs_fp16 += vl; + size -= vl; + } + + return HorizontalReduceF32M8(v_sum, vlmax); +} + +static inline float SquaredNormRVV(const Float16 *src, size_t size) { + const _Float16 *src_fp16 = reinterpret_cast(src); + const size_t vlmax = __riscv_vsetvlmax_e16m4(); + vfloat32m8_t v_sum = __riscv_vfmv_v_f_f32m8(0.0f, vlmax); + + while (size != 0) { + const size_t vl = __riscv_vsetvl_e16m4(size); + vfloat16m4_t v_src = __riscv_vle16_v_f16m4(src_fp16, vl); + v_sum = __riscv_vfwmacc_vv_f32m8_tu(v_sum, v_src, v_src, vl); + src_fp16 += vl; + size -= vl; + } + + return HorizontalReduceF32M8(v_sum, vlmax); +} + +} // namespace + +float MipsEuclideanDistanceSphericalInjectionRVV(const Float16 *lhs, + const Float16 *rhs, + size_t size, float e2) { + float sum = InnerProductRVV(lhs, rhs, size); + float u2 = SquaredNormRVV(lhs, size); + float v2 = SquaredNormRVV(rhs, size); + return ComputeSphericalInjection(sum, u2, v2, e2); +} + +float MipsEuclideanDistanceRepeatedQuadraticInjectionRVV(const Float16 *lhs, + const Float16 *rhs, + size_t size, size_t m, + float e2) { + float sum = InnerProductRVV(lhs, rhs, size); + float u2 = SquaredNormRVV(lhs, size); + float v2 = SquaredNormRVV(rhs, size); + + sum = e2 * (u2 + v2 - 2.0f * sum); + u2 *= e2; + v2 *= e2; + for (size_t i = 0; i < m; ++i) { + float d = u2 - v2; + sum += d * d; + u2 = u2 * u2; + v2 = v2 * v2; + } + return sum; +} + +#endif // __riscv_zvfh + +} // namespace ailego +} // namespace zvec diff --git a/src/ailego/math/mips_euclidean_distance_matrix_fp32_dispatch.cc b/src/ailego/math/mips_euclidean_distance_matrix_fp32_dispatch.cc index f48626a3f..38c5e24c8 100644 --- a/src/ailego/math/mips_euclidean_distance_matrix_fp32_dispatch.cc +++ b/src/ailego/math/mips_euclidean_distance_matrix_fp32_dispatch.cc @@ -23,6 +23,16 @@ float InnerProductAndSquaredNormFp32NEON(const float *lhs, const float *rhs, size_t size, float *sql, float *sqr); #endif +#if defined(__riscv_vector) +float MipsEuclideanDistanceRepeatedQuadraticInjectionRVV(const float *lhs, + const float *rhs, + size_t size, size_t m, + float e2); +float MipsEuclideanDistanceSphericalInjectionRVV(const float *lhs, + const float *rhs, size_t size, + float e2); +#endif + #if defined(__AVX512F__) float MipsEuclideanDistanceRepeatedQuadraticInjectionFp32AVX512( const float *lhs, const float *rhs, size_t size, size_t m, float e2); @@ -70,6 +80,8 @@ void MipsSquaredEuclideanDistanceMatrix::Compute( *out = ComputeSphericalInjection(sum, u2, v2, e2); return; +#elif defined(__riscv_vector) + *out = MipsEuclideanDistanceSphericalInjectionRVV(p, q, dim, e2); #else #if defined(__AVX512F__) if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) { @@ -113,6 +125,8 @@ void MipsSquaredEuclideanDistanceMatrix::Compute( } *out = sum; return; +#elif defined(__riscv_vector) + *out = MipsEuclideanDistanceRepeatedQuadraticInjectionRVV(p, q, dim, m, e2); #else #if defined(__AVX512F__) if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) { diff --git a/src/ailego/math/mips_euclidean_distance_matrix_fp32_rvv.cc b/src/ailego/math/mips_euclidean_distance_matrix_fp32_rvv.cc new file mode 100644 index 000000000..892ee01fd --- /dev/null +++ b/src/ailego/math/mips_euclidean_distance_matrix_fp32_rvv.cc @@ -0,0 +1,97 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "mips_euclidean_distance_matrix.h" + +namespace zvec { +namespace ailego { + +#if defined(__riscv_vector) +namespace { + +static inline float HorizontalReduceF32M8(vfloat32m8_t value, size_t vlmax) { + vfloat32m1_t zero = __riscv_vfmv_v_f_f32m1(0.0f, 1); + vfloat32m1_t red = __riscv_vfredusum_vs_f32m8_f32m1(value, zero, vlmax); + return __riscv_vfmv_f_s_f32m1_f32(red); +} + +static inline float InnerProductRVV(const float *lhs, const float *rhs, + size_t size) { + const size_t vlmax = __riscv_vsetvlmax_e32m8(); + vfloat32m8_t v_sum = __riscv_vfmv_v_f_f32m8(0.0f, vlmax); + + while (size != 0) { + size_t vl = __riscv_vsetvl_e32m8(size); + vfloat32m8_t v_lhs = __riscv_vle32_v_f32m8(lhs, vl); + vfloat32m8_t v_rhs = __riscv_vle32_v_f32m8(rhs, vl); + v_sum = __riscv_vfmacc_vv_f32m8_tu(v_sum, v_lhs, v_rhs, vl); + lhs += vl; + rhs += vl; + size -= vl; + } + + return HorizontalReduceF32M8(v_sum, vlmax); +} + +static inline float SquaredNormRVV(const float *src, size_t size) { + const size_t vlmax = __riscv_vsetvlmax_e32m8(); + vfloat32m8_t v_sum = __riscv_vfmv_v_f_f32m8(0.0f, vlmax); + + while (size != 0) { + size_t vl = __riscv_vsetvl_e32m8(size); + vfloat32m8_t v_src = __riscv_vle32_v_f32m8(src, vl); + v_sum = __riscv_vfmacc_vv_f32m8_tu(v_sum, v_src, v_src, vl); + src += vl; + size -= vl; + } + + return HorizontalReduceF32M8(v_sum, vlmax); +} + +} // namespace + +float MipsEuclideanDistanceSphericalInjectionRVV(const float *lhs, + const float *rhs, size_t size, + float e2) { + float sum = InnerProductRVV(lhs, rhs, size); + float u2 = SquaredNormRVV(lhs, size); + float v2 = SquaredNormRVV(rhs, size); + return ComputeSphericalInjection(sum, u2, v2, e2); +} + +float MipsEuclideanDistanceRepeatedQuadraticInjectionRVV(const float *lhs, + const float *rhs, + size_t size, size_t m, + float e2) { + float sum = InnerProductRVV(lhs, rhs, size); + float u2 = SquaredNormRVV(lhs, size); + float v2 = SquaredNormRVV(rhs, size); + + sum = e2 * (u2 + v2 - 2.0f * sum); + u2 *= e2; + v2 *= e2; + for (size_t i = 0; i < m; ++i) { + float d = u2 - v2; + sum += d * d; + u2 = u2 * u2; + v2 = v2 * v2; + } + return sum; +} + +#endif // __riscv_vector + +} // namespace ailego +} // namespace zvec diff --git a/src/ailego/math/mips_euclidean_distance_matrix_int8_dispatch.cc b/src/ailego/math/mips_euclidean_distance_matrix_int8_dispatch.cc index f0f744940..5d8047e97 100644 --- a/src/ailego/math/mips_euclidean_distance_matrix_int8_dispatch.cc +++ b/src/ailego/math/mips_euclidean_distance_matrix_int8_dispatch.cc @@ -18,6 +18,16 @@ namespace zvec { namespace ailego { +#if defined(__riscv_vector) +float MipsEuclideanDistanceRepeatedQuadraticInjectionRVV(const int8_t *lhs, + const int8_t *rhs, + size_t size, size_t m, + float e2); +float MipsEuclideanDistanceSphericalInjectionRVV(const int8_t *lhs, + const int8_t *rhs, size_t size, + float e2); +#endif + #if defined(__AVX2__) float MipsEuclideanDistanceRepeatedQuadraticInjectionInt8AVX2( const int8_t *lhs, const int8_t *rhs, size_t size, size_t m, float e2); @@ -43,6 +53,9 @@ float MipsEuclideanDistanceSphericalInjectionInt8Scalar(const int8_t *lhs, //! Compute the distance between matrix and query by SphericalInjection void MipsSquaredEuclideanDistanceMatrix::Compute( const ValueType *p, const ValueType *q, size_t dim, float e2, float *out) { +#if defined(__riscv_vector) + *out = MipsEuclideanDistanceSphericalInjectionRVV(p, q, dim, e2); +#else #if defined(__AVX2__) if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) { *out = MipsEuclideanDistanceSphericalInjectionInt8AVX2(p, q, dim, e2); @@ -58,12 +71,16 @@ void MipsSquaredEuclideanDistanceMatrix::Compute( #endif //__SSE4_1__ *out = MipsEuclideanDistanceSphericalInjectionInt8Scalar(p, q, dim, e2); +#endif // __riscv_vector } //! Compute the distance between matrix and query by RepeatedQuadraticInjection void MipsSquaredEuclideanDistanceMatrix::Compute( const ValueType *p, const ValueType *q, size_t dim, size_t m, float e2, float *out) { +#if defined(__riscv_vector) + *out = MipsEuclideanDistanceRepeatedQuadraticInjectionRVV(p, q, dim, m, e2); +#else #if defined(__AVX2__) if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) { *out = MipsEuclideanDistanceRepeatedQuadraticInjectionInt8AVX2(p, q, dim, m, @@ -81,6 +98,7 @@ void MipsSquaredEuclideanDistanceMatrix::Compute( *out = MipsEuclideanDistanceRepeatedQuadraticInjectionInt8Scalar(p, q, dim, m, e2); +#endif // __riscv_vector } } // namespace ailego diff --git a/src/ailego/math/mips_euclidean_distance_matrix_int8_rvv.cc b/src/ailego/math/mips_euclidean_distance_matrix_int8_rvv.cc new file mode 100644 index 000000000..5b96478c4 --- /dev/null +++ b/src/ailego/math/mips_euclidean_distance_matrix_int8_rvv.cc @@ -0,0 +1,100 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "mips_euclidean_distance_matrix.h" + +namespace zvec { +namespace ailego { + +#if defined(__riscv_vector) +namespace { + +static inline float HorizontalReduceI32M8(vint32m8_t value, size_t vlmax) { + const vint32m1_t zero = __riscv_vmv_v_x_i32m1(0, 1); + const vint32m1_t red = __riscv_vredsum_vs_i32m8_i32m1(value, zero, vlmax); + return static_cast(__riscv_vmv_x_s_i32m1_i32(red)); +} + +static inline float InnerProductRVV(const int8_t *lhs, const int8_t *rhs, + size_t size) { + const size_t vlmax = __riscv_vsetvlmax_e8m2(); + vint32m8_t v_sum = __riscv_vmv_v_x_i32m8(0, vlmax); + + while (size != 0) { + const size_t vl = __riscv_vsetvl_e8m2(size); + const vint8m2_t v_lhs8 = __riscv_vle8_v_i8m2(lhs, vl); + const vint8m2_t v_rhs8 = __riscv_vle8_v_i8m2(rhs, vl); + const vint16m4_t v_lhs16 = __riscv_vsext_vf2_i16m4(v_lhs8, vl); + const vint16m4_t v_rhs16 = __riscv_vsext_vf2_i16m4(v_rhs8, vl); + v_sum = __riscv_vwmacc_vv_i32m8_tu(v_sum, v_lhs16, v_rhs16, vl); + lhs += vl; + rhs += vl; + size -= vl; + } + + return HorizontalReduceI32M8(v_sum, vlmax); +} + +static inline float SquaredNormRVV(const int8_t *src, size_t size) { + const size_t vlmax = __riscv_vsetvlmax_e8m2(); + vint32m8_t v_sum = __riscv_vmv_v_x_i32m8(0, vlmax); + + while (size != 0) { + const size_t vl = __riscv_vsetvl_e8m2(size); + const vint8m2_t v_src8 = __riscv_vle8_v_i8m2(src, vl); + const vint16m4_t v_src16 = __riscv_vsext_vf2_i16m4(v_src8, vl); + v_sum = __riscv_vwmacc_vv_i32m8_tu(v_sum, v_src16, v_src16, vl); + src += vl; + size -= vl; + } + + return HorizontalReduceI32M8(v_sum, vlmax); +} + +} // namespace + +float MipsEuclideanDistanceSphericalInjectionRVV(const int8_t *lhs, + const int8_t *rhs, size_t size, + float e2) { + const float sum = InnerProductRVV(lhs, rhs, size); + const float u2 = SquaredNormRVV(lhs, size); + const float v2 = SquaredNormRVV(rhs, size); + return ComputeSphericalInjection(sum, u2, v2, e2); +} + +float MipsEuclideanDistanceRepeatedQuadraticInjectionRVV(const int8_t *lhs, + const int8_t *rhs, + size_t size, size_t m, + float e2) { + float sum = InnerProductRVV(lhs, rhs, size); + float u2 = SquaredNormRVV(lhs, size); + float v2 = SquaredNormRVV(rhs, size); + + sum = e2 * (u2 + v2 - 2.0f * sum); + u2 *= e2; + v2 *= e2; + for (size_t i = 0; i < m; ++i) { + const float d = u2 - v2; + sum += d * d; + u2 = u2 * u2; + v2 = v2 * v2; + } + return sum; +} + +#endif // __riscv_vector + +} // namespace ailego +} // namespace zvec diff --git a/src/ailego/math/norm1_matrix.h b/src/ailego/math/norm1_matrix.h index 7e8d9cbc8..b06f3bb9b 100644 --- a/src/ailego/math/norm1_matrix.h +++ b/src/ailego/math/norm1_matrix.h @@ -116,7 +116,8 @@ struct Norm1Matrix< } }; -#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__)) +#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__)) || \ + defined(__riscv_vector) /*! L1-Norm Matrix (FP32, M=1) */ template <> @@ -127,10 +128,10 @@ struct Norm1Matrix { //! Compute the L1-norm of vectors static void Compute(const ValueType *m, size_t dim, float *out); }; -#endif // __SSE__ || (__ARM_NEON && __aarch64__) +#endif // __SSE__ || (__ARM_NEON && __aarch64__) || __riscv_vector #if (defined(__F16C__) && defined(__AVX__)) || \ - (defined(__ARM_NEON) && defined(__aarch64__)) + (defined(__ARM_NEON) && defined(__aarch64__)) || defined(__riscv_zvfh) /*! L1-Norm Matrix (FP16, M=1) */ template <> @@ -141,7 +142,7 @@ struct Norm1Matrix { //! Compute the L1-norm of vectors static void Compute(const ValueType *m, size_t dim, float *out); }; -#endif // (__F16C__ && __AVX__) || (__ARM_NEON && __aarch64__) +#endif // (__F16C__ && __AVX__) || (__ARM_NEON && __aarch64__) || __riscv_zvfh } // namespace ailego } // namespace zvec diff --git a/src/ailego/math/norm1_matrix_fp16.cc b/src/ailego/math/norm1_matrix_fp16.cc index e75b3e0a8..1941b9325 100644 --- a/src/ailego/math/norm1_matrix_fp16.cc +++ b/src/ailego/math/norm1_matrix_fp16.cc @@ -67,13 +67,38 @@ static const __m512 ABS_MASK_FP32_AVX512 = //! Calculate sum of absolute (NEON) #define SA_FP16_NEON(v_m, v_sum) v_sum = vaddq_f16(vabsq_f16(v_m), v_sum); +#if defined(__riscv_zvfh) +//! Compute the L1-norm of vector (RVV) +static inline float Norm1RVV(const Float16 *m, size_t dim) { + const _Float16 *m_fp16 = reinterpret_cast(m); + const size_t vlmax = __riscv_vsetvlmax_e16m4(); + vfloat32m8_t v_sum = __riscv_vfmv_v_f_f32m8(0.0f, vlmax); + + while (dim != 0) { + const size_t vl = __riscv_vsetvl_e16m4(dim); + vfloat16m4_t v_m = __riscv_vle16_v_f16m4(m_fp16, vl); + vfloat16m4_t v_abs = __riscv_vfabs_v_f16m4(v_m, vl); + v_sum = __riscv_vfwadd_wv_f32m8_tu(v_sum, v_sum, v_abs, vl); + m_fp16 += vl; + dim -= vl; + } + + vfloat32m1_t v_zero = __riscv_vfmv_v_f_f32m1(0.0f, 1); + vfloat32m1_t v_reduce = + __riscv_vfredusum_vs_f32m8_f32m1(v_sum, v_zero, vlmax); + return __riscv_vfmv_f_s_f32m1_f32(v_reduce); +} +#endif // __riscv_zvfh + #if (defined(__F16C__) && defined(__AVX__)) || \ - (defined(__ARM_NEON) && defined(__aarch64__)) + (defined(__ARM_NEON) && defined(__aarch64__)) || defined(__riscv_zvfh) //! Compute the L1-norm of vectors (FP16, M=1) void Norm1Matrix::Compute(const ValueType *m, size_t dim, float *out) { #if defined(__ARM_NEON) NORM_FP16_1_NEON(m, dim, out, ) +#elif defined(__riscv_zvfh) + *out = Norm1RVV(m, dim); #else #if defined(__AVX512F__) if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) { @@ -84,7 +109,7 @@ void Norm1Matrix::Compute(const ValueType *m, size_t dim, NORM_FP16_1_AVX(m, dim, out, ) #endif } -#endif // (__F16C__ && __AVX__) || (__ARM_NEON && __aarch64__) +#endif // (__F16C__ && __AVX__) || (__ARM_NEON && __aarch64__) || __riscv_zvfh } // namespace ailego } // namespace zvec diff --git a/src/ailego/math/norm1_matrix_fp32.cc b/src/ailego/math/norm1_matrix_fp32.cc index 2e7279118..fa1a5d27d 100644 --- a/src/ailego/math/norm1_matrix_fp32.cc +++ b/src/ailego/math/norm1_matrix_fp32.cc @@ -56,12 +56,37 @@ namespace ailego { //! Calculate sum of absolute (NEON) #define SA_FP32_NEON(v_m, v_sum) v_sum = vaddq_f32(vabsq_f32(v_m), v_sum); -#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__)) +#if defined(__riscv_vector) +//! Compute the L1-norm of vector (RVV) +static inline float Norm1RVV(const float *m, size_t dim) { + const size_t vlmax = __riscv_vsetvlmax_e32m8(); + vfloat32m8_t v_sum = __riscv_vfmv_v_f_f32m8(0.0f, vlmax); + + while (dim != 0) { + const size_t vl = __riscv_vsetvl_e32m8(dim); + vfloat32m8_t v_m = __riscv_vle32_v_f32m8(m, vl); + vfloat32m8_t v_abs = __riscv_vfabs_v_f32m8(v_m, vl); + v_sum = __riscv_vfadd_vv_f32m8_tu(v_sum, v_sum, v_abs, vl); + m += vl; + dim -= vl; + } + + vfloat32m1_t v_zero = __riscv_vfmv_v_f_f32m1(0.0f, 1); + vfloat32m1_t v_reduce = + __riscv_vfredusum_vs_f32m8_f32m1(v_sum, v_zero, vlmax); + return __riscv_vfmv_f_s_f32m1_f32(v_reduce); +} +#endif // __riscv_vector + +#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__)) || \ + defined(__riscv_vector) //! Compute the L1-norm of vectors (FP32, M=1) void Norm1Matrix::Compute(const ValueType *m, size_t dim, float *out) { #if defined(__ARM_NEON) NORM_FP32_1_NEON(m, dim, out, ) +#elif defined(__riscv_vector) + *out = Norm1RVV(m, dim); #else #if defined(__AVX512F__) if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) { @@ -78,7 +103,7 @@ void Norm1Matrix::Compute(const ValueType *m, size_t dim, NORM_FP32_1_SSE(m, dim, out, ) #endif } -#endif // __SSE__ || (__ARM_NEON && __aarch64__) +#endif // __SSE__ || (__ARM_NEON && __aarch64__) || __riscv_vector } // namespace ailego } // namespace zvec diff --git a/src/ailego/math/norm2_matrix.h b/src/ailego/math/norm2_matrix.h index 3c905147d..f4a598c53 100644 --- a/src/ailego/math/norm2_matrix.h +++ b/src/ailego/math/norm2_matrix.h @@ -371,7 +371,8 @@ struct SquaredNorm2Matrix= 2>::type> { } }; -#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__)) +#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__)) || \ + defined(__riscv_vector) /*! L2-Norm Matrix (FP32, M=1) */ template <> @@ -393,10 +394,10 @@ struct SquaredNorm2Matrix { //! Compute the squared L2-norm of vectors static void Compute(const ValueType *m, size_t dim, float *out); }; -#endif // __SSE__ || (__ARM_NEON && __aarch64__) +#endif // __SSE__ || (__ARM_NEON && __aarch64__) || __riscv_vector #if (defined(__F16C__) && defined(__AVX__)) || \ - (defined(__ARM_NEON) && defined(__aarch64__)) + (defined(__ARM_NEON) && defined(__aarch64__)) || defined(__riscv_zvfh) /*! L2-Norm Matrix (FP16, M=1) */ template <> @@ -418,7 +419,7 @@ struct SquaredNorm2Matrix { //! Compute the squared L2-norm of vectors static void Compute(const ValueType *m, size_t dim, float *out); }; -#endif // (__F16C__ && __AVX__) || (__ARM_NEON && __aarch64__) +#endif // (__F16C__ && __AVX__) || (__ARM_NEON && __aarch64__) || __riscv_zvfh } // namespace ailego } // namespace zvec diff --git a/src/ailego/math/norm2_matrix_fp16.cc b/src/ailego/math/norm2_matrix_fp16.cc index 6bb8dd06c..a8fac9438 100644 --- a/src/ailego/math/norm2_matrix_fp16.cc +++ b/src/ailego/math/norm2_matrix_fp16.cc @@ -52,13 +52,42 @@ namespace ailego { //! Calculate sum of squared (NEON) #define SS_FP16_NEON(v_m, v_sum) v_sum = vfmaq_f16(v_sum, v_m, v_m); +#if defined(__riscv_zvfh) +//! Compute the squared L2-norm of vector (RVV) +static inline float SquaredNorm2RVV(const Float16 *m, size_t dim) { + const _Float16 *m_fp16 = reinterpret_cast(m); + const size_t vlmax = __riscv_vsetvlmax_e16m4(); + vfloat32m8_t v_sum = __riscv_vfmv_v_f_f32m8(0.0f, vlmax); + + while (dim != 0) { + const size_t vl = __riscv_vsetvl_e16m4(dim); + vfloat16m4_t v_m = __riscv_vle16_v_f16m4(m_fp16, vl); + v_sum = __riscv_vfwmacc_vv_f32m8_tu(v_sum, v_m, v_m, vl); + m_fp16 += vl; + dim -= vl; + } + + vfloat32m1_t v_zero = __riscv_vfmv_v_f_f32m1(0.0f, 1); + vfloat32m1_t v_reduce = + __riscv_vfredusum_vs_f32m8_f32m1(v_sum, v_zero, vlmax); + return __riscv_vfmv_f_s_f32m1_f32(v_reduce); +} + +//! Compute the L2-norm of vector (RVV) +static inline float Norm2RVV(const Float16 *m, size_t dim) { + return std::sqrt(SquaredNorm2RVV(m, dim)); +} +#endif // __riscv_zvfh + #if (defined(__F16C__) && defined(__AVX__)) || \ - (defined(__ARM_NEON) && defined(__aarch64__)) + (defined(__ARM_NEON) && defined(__aarch64__)) || defined(__riscv_zvfh) //! Compute the L2-norm of vectors (FP16, M=1) void Norm2Matrix::Compute(const ValueType *m, size_t dim, float *out) { #if defined(__ARM_NEON) NORM_FP16_1_NEON(m, dim, out, std::sqrt) +#elif defined(__riscv_zvfh) + *out = Norm2RVV(m, dim); #else #if defined(__AVX512F__) if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) { @@ -75,6 +104,8 @@ void SquaredNorm2Matrix::Compute(const ValueType *m, size_t dim, float *out) { #if defined(__ARM_NEON) NORM_FP16_1_NEON(m, dim, out, ) +#elif defined(__riscv_zvfh) + *out = SquaredNorm2RVV(m, dim); #else #if defined(__AVX512F__) if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) { @@ -85,7 +116,7 @@ void SquaredNorm2Matrix::Compute(const ValueType *m, size_t dim, NORM_FP16_1_AVX(m, dim, out, ) #endif } -#endif // (__F16C__ && __AVX__) || (__ARM_NEON && __aarch64__) +#endif // (__F16C__ && __AVX__) || (__ARM_NEON && __aarch64__) || __riscv_zvfh } // namespace ailego } // namespace zvec \ No newline at end of file diff --git a/src/ailego/math/norm2_matrix_fp32.cc b/src/ailego/math/norm2_matrix_fp32.cc index 8cc76c1f5..b700382ce 100644 --- a/src/ailego/math/norm2_matrix_fp32.cc +++ b/src/ailego/math/norm2_matrix_fp32.cc @@ -43,12 +43,41 @@ namespace ailego { //! Calculate sum of squared (NEON) #define SS_FP32_NEON(v_m, v_sum) v_sum = vfmaq_f32(v_sum, v_m, v_m); -#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__)) +#if defined(__riscv_vector) +//! Compute the squared L2-norm of vector (RVV) +static inline float SquaredNorm2RVV(const float *m, size_t dim) { + const size_t vlmax = __riscv_vsetvlmax_e32m8(); + vfloat32m8_t v_sum = __riscv_vfmv_v_f_f32m8(0.0f, vlmax); + + while (dim != 0) { + const size_t vl = __riscv_vsetvl_e32m8(dim); + vfloat32m8_t v_m = __riscv_vle32_v_f32m8(m, vl); + v_sum = __riscv_vfmacc_vv_f32m8_tu(v_sum, v_m, v_m, vl); + m += vl; + dim -= vl; + } + + vfloat32m1_t v_zero = __riscv_vfmv_v_f_f32m1(0.0f, 1); + vfloat32m1_t v_reduce = + __riscv_vfredusum_vs_f32m8_f32m1(v_sum, v_zero, vlmax); + return __riscv_vfmv_f_s_f32m1_f32(v_reduce); +} + +//! Compute the L2-norm of vector (RVV) +static inline float Norm2RVV(const float *m, size_t dim) { + return std::sqrt(SquaredNorm2RVV(m, dim)); +} +#endif // __riscv_vector + +#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__)) || \ + defined(__riscv_vector) //! Compute the L2-norm of vectors (FP32, M=1) void Norm2Matrix::Compute(const ValueType *m, size_t dim, float *out) { #if defined(__ARM_NEON) NORM_FP32_1_NEON(m, dim, out, std::sqrt) +#elif defined(__riscv_vector) + *out = Norm2RVV(m, dim); #else #if defined(__AVX512F__) if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) { @@ -71,6 +100,8 @@ void SquaredNorm2Matrix::Compute(const ValueType *m, size_t dim, float *out) { #if defined(__ARM_NEON) NORM_FP32_1_NEON(m, dim, out, ) +#elif defined(__riscv_vector) + *out = SquaredNorm2RVV(m, dim); #else #if defined(__AVX512F__) if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) { @@ -87,7 +118,7 @@ void SquaredNorm2Matrix::Compute(const ValueType *m, size_t dim, NORM_FP32_1_SSE(m, dim, out, ) #endif } -#endif // __SSE__ || (__ARM_NEON && __aarch64__) +#endif // __SSE__ || (__ARM_NEON && __aarch64__) || __riscv_vector } // namespace ailego } // namespace zvec diff --git a/src/ailego/math/normalizer.cc b/src/ailego/math/normalizer.cc index a31a9f350..e3802485e 100644 --- a/src/ailego/math/normalizer.cc +++ b/src/ailego/math/normalizer.cc @@ -17,6 +17,34 @@ namespace zvec { namespace ailego { +#if defined(__riscv_vector) +static inline void NormalizeRVV(float *arr, size_t dim, float norm) { + while (dim != 0) { + const size_t vl = __riscv_vsetvl_e32m8(dim); + vfloat32m8_t v_arr = __riscv_vle32_v_f32m8(arr, vl); + v_arr = __riscv_vfdiv_vf_f32m8(v_arr, norm, vl); + __riscv_vse32_v_f32m8(arr, v_arr, vl); + arr += vl; + dim -= vl; + } +} +#endif // __riscv_vector + +#if defined(__riscv_zvfh) +static inline void NormalizeRVV(Float16 *arr, size_t dim, float norm) { + _Float16 *arr_fp16 = reinterpret_cast<_Float16 *>(arr); + const _Float16 norm_fp16 = static_cast<_Float16>(norm); + while (dim != 0) { + const size_t vl = __riscv_vsetvl_e16m8(dim); + vfloat16m8_t v_arr = __riscv_vle16_v_f16m8(arr_fp16, vl); + v_arr = __riscv_vfdiv_vf_f16m8(v_arr, norm_fp16, vl); + __riscv_vse16_v_f16m8(arr_fp16, v_arr, vl); + arr_fp16 += vl; + dim -= vl; + } +} +#endif // __riscv_zvfh + #if (defined(__ARM_NEON) && defined(__aarch64__)) static inline void NormalizeNEON(float *arr, size_t dim, float norm) { float *last = arr + dim; @@ -392,11 +420,14 @@ static inline void NormalizeSSE(float *arr, size_t dim, float norm) { } #endif // __SSE__ -#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__)) +#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__)) || \ + defined(__riscv_vector) //! Compute the norm of vector void Normalizer::Compute(ValueType *arr, size_t dim, float norm) { #if defined(__ARM_NEON) NormalizeNEON(arr, dim, norm); +#elif defined(__riscv_vector) + NormalizeRVV(arr, dim, norm); #else #if defined(__AVX512F__) if (dim > 15) { @@ -413,14 +444,16 @@ void Normalizer::Compute(ValueType *arr, size_t dim, float norm) { NormalizeSSE(arr, dim, norm); #endif // __ARM_NEON } -#endif // __SSE__ || (__ARM_NEON && __aarch64__) +#endif // __SSE__ || (__ARM_NEON && __aarch64__) || __riscv_vector #if (defined(__F16C__) && defined(__AVX__)) || \ - (defined(__ARM_NEON) && defined(__aarch64__)) + (defined(__ARM_NEON) && defined(__aarch64__)) || defined(__riscv_zvfh) //! Compute the norm of vector void Normalizer::Compute(ValueType *arr, size_t dim, float norm) { #if defined(__ARM_NEON) NormalizeNEON(reinterpret_cast(arr), dim, norm); +#elif defined(__riscv_zvfh) + NormalizeRVV(arr, dim, norm); #else #if defined(__AVX512F__) if (dim > 31) { @@ -431,7 +464,7 @@ void Normalizer::Compute(ValueType *arr, size_t dim, float norm) { NormalizeAVX(reinterpret_cast(arr), dim, norm); #endif // __ARM_NEON } -#endif // (__F16C__ && __AVX__) || (__ARM_NEON && __aarch64__) +#endif // (__F16C__ && __AVX__) || (__ARM_NEON && __aarch64__) || __riscv_zvfh } // namespace ailego } // namespace zvec \ No newline at end of file diff --git a/src/ailego/math/normalizer.h b/src/ailego/math/normalizer.h index 2c191b0e7..25acdce09 100644 --- a/src/ailego/math/normalizer.h +++ b/src/ailego/math/normalizer.h @@ -51,7 +51,8 @@ struct Normalizer { } }; -#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__)) +#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__)) || \ + defined(__riscv_vector) /*! Normalizer (FP32) */ template <> @@ -78,10 +79,10 @@ struct Normalizer { } } }; -#endif // __SSE__ || (__ARM_NEON && __aarch64__) +#endif // __SSE__ || (__ARM_NEON && __aarch64__) || __riscv_vector #if (defined(__F16C__) && defined(__AVX__)) || \ - (defined(__ARM_NEON) && defined(__aarch64__)) + (defined(__ARM_NEON) && defined(__aarch64__)) || defined(__riscv_zvfh) /*! Normalizer (FP16) */ template <> @@ -108,7 +109,7 @@ struct Normalizer { } } }; -#endif // (__F16C__ && __AVX__) || (__ARM_NEON && __aarch64__) +#endif // (__F16C__ && __AVX__) || (__ARM_NEON && __aarch64__) || __riscv_zvfh } // namespace ailego } // namespace zvec diff --git a/src/ailego/math_batch/euclidean_distance_batch_dispatch.cc b/src/ailego/math_batch/euclidean_distance_batch_dispatch.cc index 5c8ffb254..345e95c4d 100644 --- a/src/ailego/math_batch/euclidean_distance_batch_dispatch.cc +++ b/src/ailego/math_batch/euclidean_distance_batch_dispatch.cc @@ -87,9 +87,37 @@ void compute_one_to_many_squared_euclidean_avx2_fp16_12( // float *results); #endif +#if defined(__riscv_vector) +void compute_one_to_many_squared_euclidean_rvv_fp32_1( + const float *query, const float **ptrs, + std::array &prefetch_ptrs, size_t dimensionality, + float *results); + +void compute_one_to_many_squared_euclidean_rvv_fp32_12( + const float *query, const float **ptrs, + std::array &prefetch_ptrs, size_t dimensionality, + float *results); +#endif + +#if defined(__riscv_zvfh) +void compute_one_to_many_squared_euclidean_rvv_fp16_1( + const ailego::Float16 *query, const ailego::Float16 **ptrs, + std::array &prefetch_ptrs, + size_t dimensionality, float *results); + +void compute_one_to_many_squared_euclidean_rvv_fp16_12( + const ailego::Float16 *query, const ailego::Float16 **ptrs, + std::array &prefetch_ptrs, + size_t dimensionality, float *results); +#endif + void SquaredEuclideanDistanceBatchImpl::compute_one_to_many( const ValueType *query, const ValueType **ptrs, std::array &prefetch_ptrs, size_t dim, float *sums) { +#if defined(__riscv_vector) + return compute_one_to_many_squared_euclidean_rvv_fp32_1( + query, ptrs, prefetch_ptrs, dim, sums); +#else #if defined(__AVX2__) if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) { return compute_one_to_many_squared_euclidean_avx2_fp32_1( @@ -98,12 +126,17 @@ void SquaredEuclideanDistanceBatchImpl::compute_one_to_many( #endif return compute_one_to_many_squared_euclidean_fallback( query, ptrs, prefetch_ptrs, dim, sums); +#endif // __riscv_vector } void SquaredEuclideanDistanceBatchImpl::compute_one_to_many( const ailego::Float16 *query, const ailego::Float16 **ptrs, std::array &prefetch_ptrs, size_t dim, float *sums) { +#if defined(__riscv_zvfh) + return compute_one_to_many_squared_euclidean_rvv_fp16_1( + query, ptrs, prefetch_ptrs, dim, sums); +#else #if defined(__AVX512FP16__) if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_FP16) { return compute_one_to_many_squared_euclidean_avx512fp16_fp16_1( @@ -124,11 +157,16 @@ void SquaredEuclideanDistanceBatchImpl::compute_one_to_many( #endif return compute_one_to_many_squared_euclidean_fallback( query, ptrs, prefetch_ptrs, dim, sums); +#endif // __riscv_zvfh } void SquaredEuclideanDistanceBatchImpl::compute_one_to_many( const float *query, const float **ptrs, std::array &prefetch_ptrs, size_t dim, float *sums) { +#if defined(__riscv_vector) + return compute_one_to_many_squared_euclidean_rvv_fp32_12( + query, ptrs, prefetch_ptrs, dim, sums); +#else #if defined(__AVX512F__) if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) { return compute_one_to_many_squared_euclidean_avx512f_fp32_12( @@ -144,6 +182,7 @@ void SquaredEuclideanDistanceBatchImpl::compute_one_to_many( #endif return compute_one_to_many_squared_euclidean_fallback( query, ptrs, prefetch_ptrs, dim, sums); +#endif // __riscv_vector } void SquaredEuclideanDistanceBatchImpl:: @@ -151,6 +190,10 @@ void SquaredEuclideanDistanceBatchImpl:: const ailego::Float16 **ptrs, std::array &prefetch_ptrs, size_t dim, float *sums) { +#if defined(__riscv_zvfh) + return compute_one_to_many_squared_euclidean_rvv_fp16_12( + query, ptrs, prefetch_ptrs, dim, sums); +#else #if defined(__AVX512FP16__) if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_FP16) { return compute_one_to_many_squared_euclidean_avx512fp16_fp16_12( @@ -171,6 +214,7 @@ void SquaredEuclideanDistanceBatchImpl:: #endif return compute_one_to_many_squared_euclidean_fallback( query, ptrs, prefetch_ptrs, dim, sums); +#endif // __riscv_zvfh } // void SquaredEuclideanDistanceBatchImpl::compute_one_to_many( diff --git a/src/ailego/math_batch/euclidean_distance_batch_impl_fp16_rvv.cc b/src/ailego/math_batch/euclidean_distance_batch_impl_fp16_rvv.cc new file mode 100644 index 000000000..612e561b2 --- /dev/null +++ b/src/ailego/math_batch/euclidean_distance_batch_impl_fp16_rvv.cc @@ -0,0 +1,97 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +namespace zvec::ailego::DistanceBatch { + +#if defined(__riscv_zvfh) + +void compute_one_to_many_squared_euclidean_rvv_fp16_1( + const ailego::Float16 *query, const ailego::Float16 **ptrs, + std::array &prefetch_ptrs, + size_t dimensionality, float *results) { + const _Float16 *q_ptr = reinterpret_cast(query); + const _Float16 *m_ptr = reinterpret_cast(ptrs[0]); + + const size_t vlmax = __riscv_vsetvlmax_e16m4(); + vfloat32m8_t acc = __riscv_vfmv_v_f_f32m8(0.0f, vlmax); + + size_t dim = 0; + while (dim < dimensionality) { + size_t vl = __riscv_vsetvl_e16m4(dimensionality - dim); + + vfloat16m4_t q = __riscv_vle16_v_f16m4(q_ptr + dim, vl); + vfloat16m4_t m = __riscv_vle16_v_f16m4(m_ptr + dim, vl); + + // Widen the fp16 difference to fp32 (matches the scalar/AVX reference, + // which converts to fp32 before subtracting), then square-accumulate. + vfloat32m8_t diff = __riscv_vfwsub_vv_f32m8(q, m, vl); + acc = __riscv_vfmacc_vv_f32m8_tu(acc, diff, diff, vl); + + if (prefetch_ptrs[0] != nullptr) { + ailego_prefetch(prefetch_ptrs[0] + dim); + } + + dim += vl; + } + + vfloat32m1_t zero = __riscv_vfmv_v_f_f32m1(0.0f, 1); + vfloat32m1_t red = __riscv_vfredusum_vs_f32m8_f32m1(acc, zero, vlmax); + results[0] = __riscv_vfmv_f_s_f32m1_f32(red); +} + +void compute_one_to_many_squared_euclidean_rvv_fp16_12( + const ailego::Float16 *query, const ailego::Float16 **ptrs, + std::array &prefetch_ptrs, + size_t dimensionality, float *results) { + const _Float16 *q_ptr = reinterpret_cast(query); + + const size_t vlmax = __riscv_vsetvlmax_e16m4(); + + for (size_t channel = 0; channel < 12; ++channel) { + const _Float16 *m_ptr = reinterpret_cast(ptrs[channel]); + + vfloat32m8_t acc = __riscv_vfmv_v_f_f32m8(0.0f, vlmax); + + size_t dim = 0; + while (dim < dimensionality) { + size_t vl = __riscv_vsetvl_e16m4(dimensionality - dim); + + vfloat16m4_t q = __riscv_vle16_v_f16m4(q_ptr + dim, vl); + vfloat16m4_t m = __riscv_vle16_v_f16m4(m_ptr + dim, vl); + + // Widen the fp16 difference to fp32 (matches the scalar/AVX reference, + // which converts to fp32 before subtracting), then square-accumulate. + vfloat32m8_t diff = __riscv_vfwsub_vv_f32m8(q, m, vl); + acc = __riscv_vfmacc_vv_f32m8_tu(acc, diff, diff, vl); + + if (prefetch_ptrs[channel] != nullptr) { + ailego_prefetch(prefetch_ptrs[channel] + dim); + } + + dim += vl; + } + + vfloat32m1_t zero = __riscv_vfmv_v_f_f32m1(0.0f, 1); + vfloat32m1_t red = __riscv_vfredusum_vs_f32m8_f32m1(acc, zero, vlmax); + results[channel] = __riscv_vfmv_f_s_f32m1_f32(red); + } +} + +#endif + +} // namespace zvec::ailego::DistanceBatch diff --git a/src/ailego/math_batch/euclidean_distance_batch_impl_fp32_rvv.cc b/src/ailego/math_batch/euclidean_distance_batch_impl_fp32_rvv.cc new file mode 100644 index 000000000..45b6cc888 --- /dev/null +++ b/src/ailego/math_batch/euclidean_distance_batch_impl_fp32_rvv.cc @@ -0,0 +1,91 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace zvec::ailego::DistanceBatch { + +#if defined(__riscv_vector) + +void compute_one_to_many_squared_euclidean_rvv_fp32_1( + const float *query, const float **ptrs, + std::array &prefetch_ptrs, size_t dimensionality, + float *results) { + const float *q_ptr = query; + const float *m_ptr = ptrs[0]; + + const size_t vlmax = __riscv_vsetvlmax_e32m8(); + vfloat32m8_t acc = __riscv_vfmv_v_f_f32m8(0.0f, vlmax); + + size_t dim = 0; + while (dim < dimensionality) { + size_t vl = __riscv_vsetvl_e32m8(dimensionality - dim); + + vfloat32m8_t q = __riscv_vle32_v_f32m8(q_ptr + dim, vl); + vfloat32m8_t m = __riscv_vle32_v_f32m8(m_ptr + dim, vl); + + vfloat32m8_t diff = __riscv_vfsub_vv_f32m8(q, m, vl); + acc = __riscv_vfmacc_vv_f32m8_tu(acc, diff, diff, vl); + + if (prefetch_ptrs[0] != nullptr) { + ailego_prefetch(prefetch_ptrs[0] + dim); + } + + dim += vl; + } + + vfloat32m1_t zero = __riscv_vfmv_v_f_f32m1(0.0f, 1); + vfloat32m1_t red = __riscv_vfredusum_vs_f32m8_f32m1(acc, zero, vlmax); + results[0] = __riscv_vfmv_f_s_f32m1_f32(red); +} + +void compute_one_to_many_squared_euclidean_rvv_fp32_12( + const float *query, const float **ptrs, + std::array &prefetch_ptrs, size_t dimensionality, + float *results) { + const float *q_ptr = query; + + const size_t vlmax = __riscv_vsetvlmax_e32m8(); + + for (size_t channel = 0; channel < 12; ++channel) { + const float *m_ptr = ptrs[channel]; + vfloat32m8_t acc = __riscv_vfmv_v_f_f32m8(0.0f, vlmax); + + size_t dim = 0; + while (dim < dimensionality) { + size_t vl = __riscv_vsetvl_e32m8(dimensionality - dim); + + vfloat32m8_t q = __riscv_vle32_v_f32m8(q_ptr + dim, vl); + vfloat32m8_t m = __riscv_vle32_v_f32m8(m_ptr + dim, vl); + + vfloat32m8_t diff = __riscv_vfsub_vv_f32m8(q, m, vl); + acc = __riscv_vfmacc_vv_f32m8_tu(acc, diff, diff, vl); + + if (prefetch_ptrs[channel] != nullptr) { + ailego_prefetch(prefetch_ptrs[channel] + dim); + } + + dim += vl; + } + + vfloat32m1_t zero = __riscv_vfmv_v_f_f32m1(0.0f, 1); + vfloat32m1_t red = __riscv_vfredusum_vs_f32m8_f32m1(acc, zero, vlmax); + results[channel] = __riscv_vfmv_f_s_f32m1_f32(red); + } +} + +#endif + +} // namespace zvec::ailego::DistanceBatch diff --git a/src/ailego/math_batch/inner_product_distance_batch_dispatch.cc b/src/ailego/math_batch/inner_product_distance_batch_dispatch.cc index 78376626a..55390fbb6 100644 --- a/src/ailego/math_batch/inner_product_distance_batch_dispatch.cc +++ b/src/ailego/math_batch/inner_product_distance_batch_dispatch.cc @@ -93,9 +93,47 @@ void compute_one_to_many_inner_product_avx2_int8_12( float *results); #endif +#if defined(__riscv_vector) +void compute_one_to_many_inner_product_rvv_fp32_1( + const float *query, const float **ptrs, + std::array &prefetch_ptrs, size_t dimensionality, + float *results); + +void compute_one_to_many_inner_product_rvv_fp32_12( + const float *query, const float **ptrs, + std::array &prefetch_ptrs, size_t dimensionality, + float *results); + +void compute_one_to_many_inner_product_rvv_int8_1( + const int8_t *query, const int8_t **ptrs, + std::array &prefetch_ptrs, size_t dimensionality, + float *results); + +void compute_one_to_many_inner_product_rvv_int8_12( + const int8_t *query, const int8_t **ptrs, + std::array &prefetch_ptrs, size_t dimensionality, + float *results); +#endif + +#if defined(__riscv_zvfh) +void compute_one_to_many_inner_product_rvv_fp16_1( + const ailego::Float16 *query, const ailego::Float16 **ptrs, + std::array &prefetch_ptrs, + size_t dimensionality, float *results); + +void compute_one_to_many_inner_product_rvv_fp16_12( + const ailego::Float16 *query, const ailego::Float16 **ptrs, + std::array &prefetch_ptrs, + size_t dimensionality, float *results); +#endif + void InnerProductDistanceBatchImpl::compute_one_to_many( const ValueType *query, const ValueType **ptrs, std::array &prefetch_ptrs, size_t dim, float *sums) { +#if defined(__riscv_vector) + return compute_one_to_many_inner_product_rvv_fp32_1(query, ptrs, + prefetch_ptrs, dim, sums); +#else #if defined(__AVX2__) if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) { return compute_one_to_many_inner_product_avx2_fp32_1( @@ -104,12 +142,17 @@ void InnerProductDistanceBatchImpl::compute_one_to_many( #endif return compute_one_to_many_inner_product_fallback(query, ptrs, prefetch_ptrs, dim, sums); +#endif // __riscv_vector } void InnerProductDistanceBatchImpl::compute_one_to_many( const ailego::Float16 *query, const ailego::Float16 **ptrs, std::array &prefetch_ptrs, size_t dim, float *sums) { +#if defined(__riscv_zvfh) + return compute_one_to_many_inner_product_rvv_fp16_1(query, ptrs, + prefetch_ptrs, dim, sums); +#else #if defined(__AVX512FP16__) if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_FP16) { return compute_one_to_many_inner_product_avx512fp16_fp16_1( @@ -130,11 +173,16 @@ void InnerProductDistanceBatchImpl::compute_one_to_many( #endif return compute_one_to_many_inner_product_fallback(query, ptrs, prefetch_ptrs, dim, sums); +#endif // __riscv_zvfh } void InnerProductDistanceBatchImpl::compute_one_to_many( const int8_t *query, const int8_t **ptrs, std::array &prefetch_ptrs, size_t dim, float *sums) { +#if defined(__riscv_vector) + return compute_one_to_many_inner_product_rvv_int8_1(query, ptrs, + prefetch_ptrs, dim, sums); +#else // #if defined(__AVX512BW__) // TODO: this version is problematic // return compute_one_to_many_avx512_int8( // query, ptrs, prefetch_ptrs, dim, sums); @@ -152,6 +200,7 @@ void InnerProductDistanceBatchImpl::compute_one_to_many( #endif return compute_one_to_many_inner_product_fallback(query, ptrs, prefetch_ptrs, dim, sums); +#endif // __riscv_vector } DistanceBatchQueryPreprocessFunc @@ -167,6 +216,10 @@ InnerProductDistanceBatchImpl::GetQueryPreprocessFunc() { void InnerProductDistanceBatchImpl::compute_one_to_many( const ValueType *query, const ValueType **ptrs, std::array &prefetch_ptrs, size_t dim, float *sums) { +#if defined(__riscv_vector) + return compute_one_to_many_inner_product_rvv_fp32_12( + query, ptrs, prefetch_ptrs, dim, sums); +#else #if defined(__AVX2__) if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) { return compute_one_to_many_inner_product_avx2_fp32_12( @@ -175,12 +228,17 @@ void InnerProductDistanceBatchImpl::compute_one_to_many( #endif return compute_one_to_many_inner_product_fallback(query, ptrs, prefetch_ptrs, dim, sums); +#endif // __riscv_vector } void InnerProductDistanceBatchImpl::compute_one_to_many( const ailego::Float16 *query, const ailego::Float16 **ptrs, std::array &prefetch_ptrs, size_t dim, float *sums) { +#if defined(__riscv_zvfh) + return compute_one_to_many_inner_product_rvv_fp16_12( + query, ptrs, prefetch_ptrs, dim, sums); +#else #if defined(__AVX512FP16__) if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_FP16) { return compute_one_to_many_inner_product_avx512fp16_fp16_12( @@ -201,11 +259,16 @@ void InnerProductDistanceBatchImpl::compute_one_to_many( #endif return compute_one_to_many_inner_product_fallback(query, ptrs, prefetch_ptrs, dim, sums); +#endif // __riscv_zvfh } void InnerProductDistanceBatchImpl::compute_one_to_many( const int8_t *query, const int8_t **ptrs, std::array &prefetch_ptrs, size_t dim, float *sums) { +#if defined(__riscv_vector) + return compute_one_to_many_inner_product_rvv_int8_12( + query, ptrs, prefetch_ptrs, dim, sums); +#else // #if defined(__AVX512BW__) // TODO: this version is problematic // return compute_one_to_many_avx512_int8( // query, ptrs, prefetch_ptrs, dim, sums); @@ -223,6 +286,7 @@ void InnerProductDistanceBatchImpl::compute_one_to_many( #endif return compute_one_to_many_inner_product_fallback(query, ptrs, prefetch_ptrs, dim, sums); +#endif // __riscv_vector } } // namespace zvec::ailego::DistanceBatch diff --git a/src/ailego/math_batch/inner_product_distance_batch_impl_fp16_rvv.cc b/src/ailego/math_batch/inner_product_distance_batch_impl_fp16_rvv.cc new file mode 100644 index 000000000..efff8ee56 --- /dev/null +++ b/src/ailego/math_batch/inner_product_distance_batch_impl_fp16_rvv.cc @@ -0,0 +1,91 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +namespace zvec::ailego::DistanceBatch { + +#if defined(__riscv_zvfh) + +void compute_one_to_many_inner_product_rvv_fp16_1( + const ailego::Float16 *query, const ailego::Float16 **ptrs, + std::array &prefetch_ptrs, + size_t dimensionality, float *results) { + const _Float16 *q_ptr = reinterpret_cast(query); + const _Float16 *m_ptr = reinterpret_cast(ptrs[0]); + + const size_t vlmax = __riscv_vsetvlmax_e16m4(); + vfloat32m8_t acc = __riscv_vfmv_v_f_f32m8(0.0f, vlmax); + + size_t dim = 0; + while (dim < dimensionality) { + size_t vl = __riscv_vsetvl_e16m4(dimensionality - dim); + + vfloat16m4_t q = __riscv_vle16_v_f16m4(q_ptr + dim, vl); + vfloat16m4_t m = __riscv_vle16_v_f16m4(m_ptr + dim, vl); + + acc = __riscv_vfwmacc_vv_f32m8_tu(acc, q, m, vl); + + if (prefetch_ptrs[0] != nullptr) { + ailego_prefetch(prefetch_ptrs[0] + dim); + } + + dim += vl; + } + + vfloat32m1_t zero = __riscv_vfmv_v_f_f32m1(0.0f, 1); + vfloat32m1_t red = __riscv_vfredusum_vs_f32m8_f32m1(acc, zero, vlmax); + results[0] = __riscv_vfmv_f_s_f32m1_f32(red); +} + +void compute_one_to_many_inner_product_rvv_fp16_12( + const ailego::Float16 *query, const ailego::Float16 **ptrs, + std::array &prefetch_ptrs, + size_t dimensionality, float *results) { + const _Float16 *q_ptr = reinterpret_cast(query); + + const size_t vlmax = __riscv_vsetvlmax_e16m4(); + + for (size_t channel = 0; channel < 12; ++channel) { + const _Float16 *m_ptr = reinterpret_cast(ptrs[channel]); + + vfloat32m8_t acc = __riscv_vfmv_v_f_f32m8(0.0f, vlmax); + + size_t dim = 0; + while (dim < dimensionality) { + size_t vl = __riscv_vsetvl_e16m4(dimensionality - dim); + + vfloat16m4_t q = __riscv_vle16_v_f16m4(q_ptr + dim, vl); + vfloat16m4_t m = __riscv_vle16_v_f16m4(m_ptr + dim, vl); + + acc = __riscv_vfwmacc_vv_f32m8_tu(acc, q, m, vl); + + if (prefetch_ptrs[channel] != nullptr) { + ailego_prefetch(prefetch_ptrs[channel] + dim); + } + + dim += vl; + } + + vfloat32m1_t zero = __riscv_vfmv_v_f_f32m1(0.0f, 1); + vfloat32m1_t red = __riscv_vfredusum_vs_f32m8_f32m1(acc, zero, vlmax); + results[channel] = __riscv_vfmv_f_s_f32m1_f32(red); + } +} + +#endif + +} // namespace zvec::ailego::DistanceBatch diff --git a/src/ailego/math_batch/inner_product_distance_batch_impl_fp32_rvv.cc b/src/ailego/math_batch/inner_product_distance_batch_impl_fp32_rvv.cc new file mode 100644 index 000000000..72076567c --- /dev/null +++ b/src/ailego/math_batch/inner_product_distance_batch_impl_fp32_rvv.cc @@ -0,0 +1,89 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace zvec::ailego::DistanceBatch { + +#if defined(__riscv_vector) + +void compute_one_to_many_inner_product_rvv_fp32_1( + const float *query, const float **ptrs, + std::array &prefetch_ptrs, size_t dimensionality, + float *results) { + const float *q_ptr = query; + const float *m_ptr = ptrs[0]; + + const size_t vlmax = __riscv_vsetvlmax_e32m8(); + vfloat32m8_t acc = __riscv_vfmv_v_f_f32m8(0.0f, vlmax); + + size_t dim = 0; + while (dim < dimensionality) { + size_t vl = __riscv_vsetvl_e32m8(dimensionality - dim); + + vfloat32m8_t q = __riscv_vle32_v_f32m8(q_ptr + dim, vl); + vfloat32m8_t m = __riscv_vle32_v_f32m8(m_ptr + dim, vl); + + acc = __riscv_vfmacc_vv_f32m8_tu(acc, q, m, vl); + + if (prefetch_ptrs[0] != nullptr) { + ailego_prefetch(prefetch_ptrs[0] + dim); + } + + dim += vl; + } + + vfloat32m1_t zero = __riscv_vfmv_v_f_f32m1(0.0f, 1); + vfloat32m1_t red = __riscv_vfredusum_vs_f32m8_f32m1(acc, zero, vlmax); + results[0] = __riscv_vfmv_f_s_f32m1_f32(red); +} + +void compute_one_to_many_inner_product_rvv_fp32_12( + const float *query, const float **ptrs, + std::array &prefetch_ptrs, size_t dimensionality, + float *results) { + const float *q_ptr = query; + + const size_t vlmax = __riscv_vsetvlmax_e32m8(); + + for (size_t channel = 0; channel < 12; ++channel) { + const float *m_ptr = ptrs[channel]; + vfloat32m8_t acc = __riscv_vfmv_v_f_f32m8(0.0f, vlmax); + + size_t dim = 0; + while (dim < dimensionality) { + size_t vl = __riscv_vsetvl_e32m8(dimensionality - dim); + + vfloat32m8_t q = __riscv_vle32_v_f32m8(q_ptr + dim, vl); + vfloat32m8_t m = __riscv_vle32_v_f32m8(m_ptr + dim, vl); + + acc = __riscv_vfmacc_vv_f32m8_tu(acc, q, m, vl); + + if (prefetch_ptrs[channel] != nullptr) { + ailego_prefetch(prefetch_ptrs[channel] + dim); + } + + dim += vl; + } + + vfloat32m1_t zero = __riscv_vfmv_v_f_f32m1(0.0f, 1); + vfloat32m1_t red = __riscv_vfredusum_vs_f32m8_f32m1(acc, zero, vlmax); + results[channel] = __riscv_vfmv_f_s_f32m1_f32(red); + } +} + +#endif + +} // namespace zvec::ailego::DistanceBatch diff --git a/src/ailego/math_batch/inner_product_distance_batch_impl_int8_rvv.cc b/src/ailego/math_batch/inner_product_distance_batch_impl_int8_rvv.cc new file mode 100644 index 000000000..5f23a6003 --- /dev/null +++ b/src/ailego/math_batch/inner_product_distance_batch_impl_int8_rvv.cc @@ -0,0 +1,93 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace zvec::ailego::DistanceBatch { + +#if defined(__riscv_vector) + +void compute_one_to_many_inner_product_rvv_int8_1( + const int8_t *query, const int8_t **ptrs, + std::array &prefetch_ptrs, size_t dimensionality, + float *results) { + const int8_t *q_ptr = query; + const int8_t *m_ptr = ptrs[0]; + + const size_t vlmax = __riscv_vsetvlmax_e8m2(); + vint32m8_t acc = __riscv_vmv_v_x_i32m8(0, vlmax); + + size_t dim = 0; + while (dim < dimensionality) { + size_t vl = __riscv_vsetvl_e8m2(dimensionality - dim); + + vint8m2_t q8 = __riscv_vle8_v_i8m2(q_ptr + dim, vl); + vint8m2_t m8 = __riscv_vle8_v_i8m2(m_ptr + dim, vl); + vint16m4_t q16 = __riscv_vsext_vf2_i16m4(q8, vl); + vint16m4_t m16 = __riscv_vsext_vf2_i16m4(m8, vl); + + acc = __riscv_vwmacc_vv_i32m8_tu(acc, q16, m16, vl); + + if (prefetch_ptrs[0] != nullptr) { + ailego_prefetch(prefetch_ptrs[0] + dim); + } + + dim += vl; + } + + vint32m1_t zero = __riscv_vmv_v_x_i32m1(0, 1); + vint32m1_t red = __riscv_vredsum_vs_i32m8_i32m1(acc, zero, vlmax); + results[0] = static_cast(__riscv_vmv_x_s_i32m1_i32(red)); +} + +void compute_one_to_many_inner_product_rvv_int8_12( + const int8_t *query, const int8_t **ptrs, + std::array &prefetch_ptrs, size_t dimensionality, + float *results) { + const int8_t *q_ptr = query; + + const size_t vlmax = __riscv_vsetvlmax_e8m2(); + + for (size_t channel = 0; channel < 12; ++channel) { + const int8_t *m_ptr = ptrs[channel]; + vint32m8_t acc = __riscv_vmv_v_x_i32m8(0, vlmax); + + size_t dim = 0; + while (dim < dimensionality) { + size_t vl = __riscv_vsetvl_e8m2(dimensionality - dim); + + vint8m2_t q8 = __riscv_vle8_v_i8m2(q_ptr + dim, vl); + vint8m2_t m8 = __riscv_vle8_v_i8m2(m_ptr + dim, vl); + vint16m4_t q16 = __riscv_vsext_vf2_i16m4(q8, vl); + vint16m4_t m16 = __riscv_vsext_vf2_i16m4(m8, vl); + + acc = __riscv_vwmacc_vv_i32m8_tu(acc, q16, m16, vl); + + if (prefetch_ptrs[channel] != nullptr) { + ailego_prefetch(prefetch_ptrs[channel] + dim); + } + + dim += vl; + } + + vint32m1_t zero = __riscv_vmv_v_x_i32m1(0, 1); + vint32m1_t red = __riscv_vredsum_vs_i32m8_i32m1(acc, zero, vlmax); + results[channel] = static_cast(__riscv_vmv_x_s_i32m1_i32(red)); + } +} + +#endif + +} // namespace zvec::ailego::DistanceBatch