Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions cmake/gen/rvv_microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ SET(PROD_RVV_MICROKERNEL_SRCS
src/f32-vbinary/gen/f32-vsubc-rvv-u8v.c
src/f32-vclamp/gen/f32-vclamp-rvv-u8v.c
src/f32-vcmul/gen/f32-vcmul-rvv-u2v.c
src/f32-vcopysign/gen/f32-vcopysign-rvv-u8v.c
src/f32-vcopysign/gen/f32-vcopysignc-rvv-u8v.c
src/f32-vcopysign/gen/f32-vrcopysignc-rvv-u8v.c
src/f32-vhswish/gen/f32-vhswish-rvv-u4v.c
src/f32-vlrelu/gen/f32-vlrelu-rvv-u4v.c
src/f32-vrnd/gen/f32-vrndd-rvv-u4v.c
Expand Down Expand Up @@ -214,6 +217,9 @@ SET(NON_PROD_RVV_MICROKERNEL_SRCS
src/f32-vclamp/gen/f32-vclamp-rvv-u4v.c
src/f32-vcmul/gen/f32-vcmul-rvv-u1v.c
src/f32-vcmul/gen/f32-vcmul-rvv-u4v.c
src/f32-vcopysign/gen/f32-vcopysign-rvv-u4v.c
src/f32-vcopysign/gen/f32-vcopysignc-rvv-u4v.c
src/f32-vcopysign/gen/f32-vrcopysignc-rvv-u4v.c
src/f32-vhswish/gen/f32-vhswish-rvv-u1v.c
src/f32-vhswish/gen/f32-vhswish-rvv-u2v.c
src/f32-vhswish/gen/f32-vhswish-rvv-u8v.c
Expand Down
16 changes: 16 additions & 0 deletions cmake/gen/rvvfp16arith_microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@ SET(PROD_RVVFP16ARITH_MICROKERNEL_SRCS
src/f16-vbinary/gen/f16-vsub-rvvfp16arith-u8v.c
src/f16-vbinary/gen/f16-vsubc-rvvfp16arith-u8v.c
src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u8v.c
src/f16-vhswish/gen/f16-vhswish-rvvfp16arith-u8v.c
src/f16-vlrelu/gen/f16-vlrelu-rvvfp16arith-u8v.c
src/f16-vrnd/gen/f16-vrndd-rvvfp16arith-u8v.c
src/f16-vrnd/gen/f16-vrndne-rvvfp16arith-u8v.c
src/f16-vrnd/gen/f16-vrndu-rvvfp16arith-u8v.c
src/f16-vrnd/gen/f16-vrndz-rvvfp16arith-u8v.c
src/f16-vrsqrt/gen/f16-vrsqrt-rvvfp16arith-rsqrt-u4v.c
src/f16-vsqrt/gen/f16-vsqrt-rvvfp16arith-sqrt-u8v.c
src/f16-vunary/gen/f16-vabs-rvvfp16arith-u8v.c
src/f16-vunary/gen/f16-vneg-rvvfp16arith-u8v.c
src/f16-vunary/gen/f16-vsqr-rvvfp16arith-u8v.c
Expand Down Expand Up @@ -139,6 +147,14 @@ SET(NON_PROD_RVVFP16ARITH_MICROKERNEL_SRCS
src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u1v.c
src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u2v.c
src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u4v.c
src/f16-vhswish/gen/f16-vhswish-rvvfp16arith-u4v.c
src/f16-vlrelu/gen/f16-vlrelu-rvvfp16arith-u4v.c
src/f16-vrnd/gen/f16-vrndd-rvvfp16arith-u4v.c
src/f16-vrnd/gen/f16-vrndne-rvvfp16arith-u4v.c
src/f16-vrnd/gen/f16-vrndu-rvvfp16arith-u4v.c
src/f16-vrnd/gen/f16-vrndz-rvvfp16arith-u4v.c
src/f16-vrsqrt/gen/f16-vrsqrt-rvvfp16arith-rsqrt-u2v.c
src/f16-vsqrt/gen/f16-vsqrt-rvvfp16arith-sqrt-u4v.c
src/f16-vunary/gen/f16-vabs-rvvfp16arith-u1v.c
src/f16-vunary/gen/f16-vabs-rvvfp16arith-u2v.c
src/f16-vunary/gen/f16-vabs-rvvfp16arith-u4v.c
Expand Down
6 changes: 6 additions & 0 deletions gen/rvv_microkernels.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ PROD_RVV_MICROKERNEL_SRCS = [
"src/f32-vbinary/gen/f32-vsubc-rvv-u8v.c",
"src/f32-vclamp/gen/f32-vclamp-rvv-u8v.c",
"src/f32-vcmul/gen/f32-vcmul-rvv-u2v.c",
"src/f32-vcopysign/gen/f32-vcopysign-rvv-u8v.c",
"src/f32-vcopysign/gen/f32-vcopysignc-rvv-u8v.c",
"src/f32-vcopysign/gen/f32-vrcopysignc-rvv-u8v.c",
"src/f32-vhswish/gen/f32-vhswish-rvv-u4v.c",
"src/f32-vlrelu/gen/f32-vlrelu-rvv-u4v.c",
"src/f32-vrnd/gen/f32-vrndd-rvv-u4v.c",
Expand Down Expand Up @@ -211,6 +214,9 @@ NON_PROD_RVV_MICROKERNEL_SRCS = [
"src/f32-vclamp/gen/f32-vclamp-rvv-u4v.c",
"src/f32-vcmul/gen/f32-vcmul-rvv-u1v.c",
"src/f32-vcmul/gen/f32-vcmul-rvv-u4v.c",
"src/f32-vcopysign/gen/f32-vcopysign-rvv-u4v.c",
"src/f32-vcopysign/gen/f32-vcopysignc-rvv-u4v.c",
"src/f32-vcopysign/gen/f32-vrcopysignc-rvv-u4v.c",
"src/f32-vhswish/gen/f32-vhswish-rvv-u1v.c",
"src/f32-vhswish/gen/f32-vhswish-rvv-u2v.c",
"src/f32-vhswish/gen/f32-vhswish-rvv-u8v.c",
Expand Down
16 changes: 16 additions & 0 deletions gen/rvvfp16arith_microkernels.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@ PROD_RVVFP16ARITH_MICROKERNEL_SRCS = [
"src/f16-vbinary/gen/f16-vsub-rvvfp16arith-u8v.c",
"src/f16-vbinary/gen/f16-vsubc-rvvfp16arith-u8v.c",
"src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u8v.c",
"src/f16-vhswish/gen/f16-vhswish-rvvfp16arith-u8v.c",
"src/f16-vlrelu/gen/f16-vlrelu-rvvfp16arith-u8v.c",
"src/f16-vrnd/gen/f16-vrndd-rvvfp16arith-u8v.c",
"src/f16-vrnd/gen/f16-vrndne-rvvfp16arith-u8v.c",
"src/f16-vrnd/gen/f16-vrndu-rvvfp16arith-u8v.c",
"src/f16-vrnd/gen/f16-vrndz-rvvfp16arith-u8v.c",
"src/f16-vrsqrt/gen/f16-vrsqrt-rvvfp16arith-rsqrt-u4v.c",
"src/f16-vsqrt/gen/f16-vsqrt-rvvfp16arith-sqrt-u8v.c",
"src/f16-vunary/gen/f16-vabs-rvvfp16arith-u8v.c",
"src/f16-vunary/gen/f16-vneg-rvvfp16arith-u8v.c",
"src/f16-vunary/gen/f16-vsqr-rvvfp16arith-u8v.c",
Expand Down Expand Up @@ -136,6 +144,14 @@ NON_PROD_RVVFP16ARITH_MICROKERNEL_SRCS = [
"src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u1v.c",
"src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u2v.c",
"src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u4v.c",
"src/f16-vhswish/gen/f16-vhswish-rvvfp16arith-u4v.c",
"src/f16-vlrelu/gen/f16-vlrelu-rvvfp16arith-u4v.c",
"src/f16-vrnd/gen/f16-vrndd-rvvfp16arith-u4v.c",
"src/f16-vrnd/gen/f16-vrndne-rvvfp16arith-u4v.c",
"src/f16-vrnd/gen/f16-vrndu-rvvfp16arith-u4v.c",
"src/f16-vrnd/gen/f16-vrndz-rvvfp16arith-u4v.c",
"src/f16-vrsqrt/gen/f16-vrsqrt-rvvfp16arith-rsqrt-u2v.c",
"src/f16-vsqrt/gen/f16-vsqrt-rvvfp16arith-sqrt-u4v.c",
"src/f16-vunary/gen/f16-vabs-rvvfp16arith-u1v.c",
"src/f16-vunary/gen/f16-vabs-rvvfp16arith-u2v.c",
"src/f16-vunary/gen/f16-vabs-rvvfp16arith-u4v.c",
Expand Down
4 changes: 4 additions & 0 deletions scripts/generate-f16-vhswish.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,8 @@ tools/xngen src/f16-vhswish/neonfp16arith.c.in -D BATCH_TILE=16 -o src/f16-vhswi
tools/xngen src/f16-vhswish/f16c.c.in -D BATCH_TILE=8 -o src/f16-vhswish/gen/f16-vhswish-f16c-u8.c &
tools/xngen src/f16-vhswish/f16c.c.in -D BATCH_TILE=16 -o src/f16-vhswish/gen/f16-vhswish-f16c-u16.c &

################################### RISC-V Vector #############################
tools/xngen src/f16-vhswish/rvv.c.in -D LMUL=4 -o src/f16-vhswish/gen/f16-vhswish-rvvfp16arith-u4v.c &
tools/xngen src/f16-vhswish/rvv.c.in -D LMUL=8 -o src/f16-vhswish/gen/f16-vhswish-rvvfp16arith-u8v.c &

wait
4 changes: 4 additions & 0 deletions scripts/generate-f16-vlrelu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,8 @@ tools/xngen src/f16-vlrelu/neonfp16arith.c.in -D BATCH_TILE=16 -o src/f16-vlrelu
tools/xngen src/f16-vlrelu/f16c.c.in -D BATCH_TILE=8 -o src/f16-vlrelu/gen/f16-vlrelu-f16c-u8.c &
tools/xngen src/f16-vlrelu/f16c.c.in -D BATCH_TILE=16 -o src/f16-vlrelu/gen/f16-vlrelu-f16c-u16.c &

################################### RISC-V Vector #############################
tools/xngen src/f16-vlrelu/rvv.c.in -D LMUL=4 -o src/f16-vlrelu/gen/f16-vlrelu-rvvfp16arith-u4v.c &
tools/xngen src/f16-vlrelu/rvv.c.in -D LMUL=8 -o src/f16-vlrelu/gen/f16-vlrelu-rvvfp16arith-u8v.c &

wait
12 changes: 11 additions & 1 deletion scripts/generate-f16-vrnd.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ tools/xngen src/f16-vrnd/neonfp16arith.c.in -D OP=RNDU -D BATCH_TILE=16 -o src/
tools/xngen src/f16-vrnd/neonfp16arith.c.in -D OP=RNDD -D BATCH_TILE=8 -o src/f16-vrnd/gen/f16-vrndd-neonfp16arith-u8.c &
tools/xngen src/f16-vrnd/neonfp16arith.c.in -D OP=RNDD -D BATCH_TILE=16 -o src/f16-vrnd/gen/f16-vrndd-neonfp16arith-u16.c &

################################# x86 F16C #################################
################################# x86 F16C ####################################
tools/xngen src/f16-vrnd/f16c.c.in -D OP=RNDNE -D BATCH_TILE=8 -o src/f16-vrnd/gen/f16-vrndne-f16c-u8.c &
tools/xngen src/f16-vrnd/f16c.c.in -D OP=RNDNE -D BATCH_TILE=16 -o src/f16-vrnd/gen/f16-vrndne-f16c-u16.c &
tools/xngen src/f16-vrnd/f16c.c.in -D OP=RNDZ -D BATCH_TILE=8 -o src/f16-vrnd/gen/f16-vrndz-f16c-u8.c &
Expand All @@ -24,4 +24,14 @@ tools/xngen src/f16-vrnd/f16c.c.in -D OP=RNDU -D BATCH_TILE=16 -o src/f16-vrnd/
tools/xngen src/f16-vrnd/f16c.c.in -D OP=RNDD -D BATCH_TILE=8 -o src/f16-vrnd/gen/f16-vrndd-f16c-u8.c &
tools/xngen src/f16-vrnd/f16c.c.in -D OP=RNDD -D BATCH_TILE=16 -o src/f16-vrnd/gen/f16-vrndd-f16c-u16.c &

################################ RISC-V Vector ################################
tools/xngen src/f16-vrnd/rvv.c.in -D OP=RNDNE -D LMUL=4 -o src/f16-vrnd/gen/f16-vrndne-rvvfp16arith-u4v.c &
tools/xngen src/f16-vrnd/rvv.c.in -D OP=RNDNE -D LMUL=8 -o src/f16-vrnd/gen/f16-vrndne-rvvfp16arith-u8v.c &
tools/xngen src/f16-vrnd/rvv.c.in -D OP=RNDZ -D LMUL=4 -o src/f16-vrnd/gen/f16-vrndz-rvvfp16arith-u4v.c &
tools/xngen src/f16-vrnd/rvv.c.in -D OP=RNDZ -D LMUL=8 -o src/f16-vrnd/gen/f16-vrndz-rvvfp16arith-u8v.c &
tools/xngen src/f16-vrnd/rvv.c.in -D OP=RNDU -D LMUL=4 -o src/f16-vrnd/gen/f16-vrndu-rvvfp16arith-u4v.c &
tools/xngen src/f16-vrnd/rvv.c.in -D OP=RNDU -D LMUL=8 -o src/f16-vrnd/gen/f16-vrndu-rvvfp16arith-u8v.c &
tools/xngen src/f16-vrnd/rvv.c.in -D OP=RNDD -D LMUL=4 -o src/f16-vrnd/gen/f16-vrndd-rvvfp16arith-u4v.c &
tools/xngen src/f16-vrnd/rvv.c.in -D OP=RNDD -D LMUL=8 -o src/f16-vrnd/gen/f16-vrndd-rvvfp16arith-u8v.c &

wait
6 changes: 5 additions & 1 deletion scripts/generate-f16-vrsqrt.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

############################### ARM NEONFP16ARITH ##############################
############################### ARM NEONFP16ARITH #############################
tools/xngen src/f16-vrsqrt/neonfp16arith-rsqrt.c.in -D BATCH_TILE=8 -D FULL_ACC=1 -o src/f16-vrsqrt/gen/f16-vrsqrt-neonfp16arith-rsqrt-u8.c &
tools/xngen src/f16-vrsqrt/neonfp16arith-rsqrt.c.in -D BATCH_TILE=16 -D FULL_ACC=1 -o src/f16-vrsqrt/gen/f16-vrsqrt-neonfp16arith-rsqrt-u16.c &
tools/xngen src/f16-vrsqrt/neonfp16arith-rsqrt.c.in -D BATCH_TILE=32 -D FULL_ACC=1 -o src/f16-vrsqrt/gen/f16-vrsqrt-neonfp16arith-rsqrt-u32.c &
Expand All @@ -14,4 +14,8 @@ tools/xngen src/f16-vrsqrt/f16c-rsqrt.c.in -D BATCH_TILE=8 -o src/f16-vrsqrt/ge
tools/xngen src/f16-vrsqrt/f16c-rsqrt.c.in -D BATCH_TILE=16 -o src/f16-vrsqrt/gen/f16-vrsqrt-f16c-rsqrt-u16.c &
tools/xngen src/f16-vrsqrt/f16c-rsqrt.c.in -D BATCH_TILE=32 -o src/f16-vrsqrt/gen/f16-vrsqrt-f16c-rsqrt-u32.c &

############################### RISC-V Vector #################################
tools/xngen src/f16-vrsqrt/rvv.c.in -D LMUL=2 -o src/f16-vrsqrt/gen/f16-vrsqrt-rvvfp16arith-rsqrt-u2v.c &
tools/xngen src/f16-vrsqrt/rvv.c.in -D LMUL=4 -o src/f16-vrsqrt/gen/f16-vrsqrt-rvvfp16arith-rsqrt-u4v.c &

wait
4 changes: 4 additions & 0 deletions scripts/generate-f16-vsqrt.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,8 @@ tools/xngen src/f16-vsqrt/avx512fp16-sqrt.c.in -D BATCH_TILE=32 -o src/f16-vsqr
tools/xngen src/f16-vsqrt/avx512fp16-sqrt.c.in -D BATCH_TILE=64 -o src/f16-vsqrt/gen/f16-vsqrt-avx512fp16-sqrt-u64.c &
tools/xngen src/f16-vsqrt/avx512fp16-sqrt.c.in -D BATCH_TILE=128 -o src/f16-vsqrt/gen/f16-vsqrt-avx512fp16-sqrt-u128.c &

################################ RISC-V Vector ################################
tools/xngen src/f16-vsqrt/rvv.c.in -D LMUL=4 -o src/f16-vsqrt/gen/f16-vsqrt-rvvfp16arith-sqrt-u4v.c &
tools/xngen src/f16-vsqrt/rvv.c.in -D LMUL=8 -o src/f16-vsqrt/gen/f16-vsqrt-rvvfp16arith-sqrt-u8v.c &

wait
11 changes: 10 additions & 1 deletion scripts/generate-f32-vcopysign.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ tools/xngen src/f32-vcopysign/copysign.c.in -D ARCH=avx -D BATCH_TILES=8,
tools/xngen src/f32-vcopysign/copysign.c.in -D ARCH=avx512f -D BATCH_TILES=16,32,48,64 -o src/f32-vcopysign/gen/f32-vcopysign-avx512f.c &
tools/xngen src/f32-vcopysign/copysign.c.in -D ARCH=hvx -D BATCH_TILES=32,64,96,128 -o src/f32-vcopysign/gen/f32-vcopysign-hvx.c &


# Scalar sign
tools/xngen src/f32-vcopysign/copysignc.c.in -D ARCH=scalar -D BATCH_TILES=1,2,4,8 -o src/f32-vcopysign/gen/f32-vcopysignc-scalar.c &
tools/xngen src/f32-vcopysign/copysignc.c.in -D ARCH=sse2 -D BATCH_TILES=4,8,12,16 -o src/f32-vcopysign/gen/f32-vcopysignc-sse2.c &
Expand All @@ -36,4 +35,14 @@ tools/xngen src/f32-vcopysign/copysign.c.in -D ARCH=avx512f -D BATCH_TILES=16
tools/xngen src/f32-vcopysign/copysignc.c.in -D ARCH=avx512f -D BATCH_TILES=16,32,48,64 -o src/f32-vcopysign/gen/f32-vcopysignc-avx512f.c &
tools/xngen src/f32-vcopysign/rcopysignc.c.in -D ARCH=avx512f -D BATCH_TILES=16,32,48,64 -o src/f32-vcopysign/gen/f32-vrcopysignc-avx512f.c &

##################################### RISC-V Vector ############################
tools/xngen src/f32-vcopysign/rvv.c.in -D LMUL=4 -D OP=COPYSIGN -o src/f32-vcopysign/gen/f32-vcopysign-rvv-u4v.c &
tools/xngen src/f32-vcopysign/rvv.c.in -D LMUL=8 -D OP=COPYSIGN -o src/f32-vcopysign/gen/f32-vcopysign-rvv-u8v.c &

tools/xngen src/f32-vcopysign/rvv.c.in -D LMUL=4 -D OP=COPYSIGNC -o src/f32-vcopysign/gen/f32-vcopysignc-rvv-u4v.c &
tools/xngen src/f32-vcopysign/rvv.c.in -D LMUL=8 -D OP=COPYSIGNC -o src/f32-vcopysign/gen/f32-vcopysignc-rvv-u8v.c &

tools/xngen src/f32-vcopysign/rvv.c.in -D LMUL=4 -D OP=RCOPYSIGNC -o src/f32-vcopysign/gen/f32-vrcopysignc-rvv-u4v.c &
tools/xngen src/f32-vcopysign/rvv.c.in -D LMUL=8 -D OP=RCOPYSIGNC -o src/f32-vcopysign/gen/f32-vrcopysignc-rvv-u8v.c &

wait
14 changes: 14 additions & 0 deletions src/configs/binary-elementwise-config.c
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 +632,20 @@ static void init_f32_vcopysign_config(void) {
f32_vcopysign_config.ropc_ukernel = XNN_INIT_BINARY_UKERNEL(xnn_f32_vrcopysignc_ukernel__hvx_u128);
f32_vcopysign_config.element_tile = 128;
}
#elif XNN_ARCH_RISCV && XNN_ENABLE_RISCV_VECTOR
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
assert(hardware_config != NULL);
if (hardware_config->arch_flags & xnn_arch_riscv_vector) {
f32_vcopysign_config.op_ukernel = XNN_INIT_BINARY_UKERNEL(xnn_f32_vcopysign_ukernel__rvv_u8v);
f32_vcopysign_config.opc_ukernel = XNN_INIT_BINARY_UKERNEL(xnn_f32_vcopysignc_ukernel__rvv_u8v);
f32_vcopysign_config.ropc_ukernel = XNN_INIT_BINARY_UKERNEL(xnn_f32_vrcopysignc_ukernel__rvv_u8v);
f32_vcopysign_config.element_tile = 8 * hardware_config->vlenb / sizeof(float);
} else {
f32_vcopysign_config.op_ukernel = XNN_INIT_BINARY_UKERNEL(xnn_f32_vcopysign_ukernel__scalar_u2);
f32_vcopysign_config.opc_ukernel = XNN_INIT_BINARY_UKERNEL(xnn_f32_vcopysignc_ukernel__scalar_u2);
f32_vcopysign_config.ropc_ukernel = XNN_INIT_BINARY_UKERNEL(xnn_f32_vrcopysignc_ukernel__scalar_u2);
f32_vcopysign_config.element_tile = 8;
}
#else
f32_vcopysign_config.op_ukernel = XNN_INIT_BINARY_UKERNEL(xnn_f32_vcopysign_ukernel__scalar_u2);
f32_vcopysign_config.opc_ukernel = XNN_INIT_BINARY_UKERNEL(xnn_f32_vcopysignc_ukernel__scalar_u2);
Expand Down
57 changes: 57 additions & 0 deletions src/configs/unary-elementwise-config.c
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,13 @@ static void init_f16_hswish_config(void) {
f16_hswish_config.element_tile = 16;
}
#endif
#elif XNN_ARCH_RISCV && XNN_ENABLE_RISCV_FP16_VECTOR
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
assert(hardware_config != NULL);
if (hardware_config->arch_flags & xnn_arch_riscv_vector_fp16_arith) {
f16_hswish_config.ukernel = XNN_INIT_UNARY_UKERNEL(xnn_f16_vhswish_ukernel__rvvfp16arith_u8v);
f16_hswish_config.element_tile = 8 * hardware_config->vlenb / sizeof(xnn_float16);
}
#endif
}

Expand Down Expand Up @@ -372,6 +379,14 @@ static void init_f16_lrelu_config(void) {
f16_lrelu_config.init = (xnn_init_unary_uparams_fn) xnn_init_f16_lrelu_scalar_params;
}
#endif
#elif XNN_ARCH_RISCV && XNN_ENABLE_RISCV_FP16_VECTOR
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
assert(hardware_config != NULL);
if (hardware_config->arch_flags & xnn_arch_riscv_vector_fp16_arith) {
f16_lrelu_config.ukernel = XNN_INIT_UNARY_UKERNEL(xnn_f16_vlrelu_ukernel__rvvfp16arith_u8v);
f16_lrelu_config.element_tile = 8 * hardware_config->vlenb / sizeof(xnn_float16);
f16_lrelu_config.init = (xnn_init_unary_uparams_fn) xnn_init_f16_lrelu_scalar_params;
}
#endif
}

Expand Down Expand Up @@ -436,6 +451,13 @@ static void init_f16_rndd_config(void) {
f16_rndd_config.element_tile = 16;
}
#endif
#elif XNN_ARCH_RISCV && XNN_ENABLE_RISCV_FP16_VECTOR
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
assert(hardware_config != NULL);
if (hardware_config->arch_flags & xnn_arch_riscv_vector_fp16_arith) {
f16_rndd_config.ukernel = XNN_INIT_UNARY_UKERNEL(xnn_f16_vrndd_ukernel__rvvfp16arith_u8v);
f16_rndd_config.element_tile = 8 * hardware_config->vlenb / sizeof(xnn_float16);
}
#endif
}

Expand Down Expand Up @@ -466,6 +488,13 @@ static void init_f16_rndne_config(void) {
f16_rndne_config.element_tile = 16;
}
#endif
#elif XNN_ARCH_RISCV && XNN_ENABLE_RISCV_FP16_VECTOR
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
assert(hardware_config != NULL);
if (hardware_config->arch_flags & xnn_arch_riscv_vector_fp16_arith) {
f16_rndne_config.ukernel = XNN_INIT_UNARY_UKERNEL(xnn_f16_vrndne_ukernel__rvvfp16arith_u8v);
f16_rndne_config.element_tile = 8 * hardware_config->vlenb / sizeof(xnn_float16);
}
#endif
}

Expand Down Expand Up @@ -496,6 +525,13 @@ static void init_f16_rndu_config(void) {
f16_rndu_config.element_tile = 16;
}
#endif
#elif XNN_ARCH_RISCV && XNN_ENABLE_RISCV_FP16_VECTOR
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
assert(hardware_config != NULL);
if (hardware_config->arch_flags & xnn_arch_riscv_vector_fp16_arith) {
f16_rndu_config.ukernel = XNN_INIT_UNARY_UKERNEL(xnn_f16_vrndu_ukernel__rvvfp16arith_u8v);
f16_rndu_config.element_tile = 8 * hardware_config->vlenb / sizeof(xnn_float16);
}
#endif
}

Expand Down Expand Up @@ -526,6 +562,13 @@ static void init_f16_rndz_config(void) {
f16_rndz_config.element_tile = 16;
}
#endif
#elif XNN_ARCH_RISCV && XNN_ENABLE_RISCV_FP16_VECTOR
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
assert(hardware_config != NULL);
if (hardware_config->arch_flags & xnn_arch_riscv_vector_fp16_arith) {
f16_rndz_config.ukernel = XNN_INIT_UNARY_UKERNEL(xnn_f16_vrndz_ukernel__rvvfp16arith_u8v);
f16_rndz_config.element_tile = 8 * hardware_config->vlenb / sizeof(xnn_float16);
}
#endif
}

Expand Down Expand Up @@ -556,6 +599,13 @@ static void init_f16_rsqrt_config(void) {
f16_rsqrt_config.element_tile = 32;
}
#endif
#elif XNN_ARCH_RISCV && XNN_ENABLE_RISCV_FP16_VECTOR
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
assert(hardware_config != NULL);
if (hardware_config->arch_flags & xnn_arch_riscv_vector_fp16_arith) {
f16_rsqrt_config.ukernel = XNN_INIT_UNARY_UKERNEL(xnn_f16_vrsqrt_ukernel__rvvfp16arith_rsqrt_u4v);
f16_rsqrt_config.element_tile = 4 * hardware_config->vlenb / sizeof(xnn_float16);
}
#endif
}

Expand Down Expand Up @@ -665,6 +715,13 @@ static void init_f16_sqrt_config(void) {
f16_sqrt_config.element_tile = 32;
}
#endif
#elif XNN_ARCH_RISCV && XNN_ENABLE_RISCV_FP16_VECTOR
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
assert(hardware_config != NULL);
if (hardware_config->arch_flags & xnn_arch_riscv_vector_fp16_arith) {
f16_sqrt_config.ukernel = XNN_INIT_UNARY_UKERNEL(xnn_f16_vsqrt_ukernel__rvvfp16arith_sqrt_u8v);
f16_sqrt_config.element_tile = 8 * hardware_config->vlenb / sizeof(xnn_float16);
}
#endif
}

Expand Down
4 changes: 4 additions & 0 deletions src/f16-vhswish/f16-vhswish.inc
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,7 @@ XNN_UKERNEL(xnn_arch_x86_f16c, xnn_f16_vhswish_ukernel__f16c_u8, 8, false, xnn_f
XNN_UKERNEL(xnn_arch_x86_f16c, xnn_f16_vhswish_ukernel__f16c_u16, 16, false, xnn_float16, struct xnn_f16_default_params, NULL)
#endif // XNN_ENABLE_F16C && (XNN_ARCH_X86 || XNN_ARCH_X86_64)

#if XNN_ARCH_RISCV && XNN_ENABLE_RISCV_FP16_VECTOR
XNN_UKERNEL(xnn_arch_riscv_vector_fp16_arith, xnn_f16_vhswish_ukernel__rvvfp16arith_u4v, 4, true, xnn_float16, struct xnn_f16_default_params, NULL)
XNN_UKERNEL(xnn_arch_riscv_vector_fp16_arith, xnn_f16_vhswish_ukernel__rvvfp16arith_u8v, 8, true, xnn_float16, struct xnn_f16_default_params, NULL)
#endif // XNN_ARCH_RISCV && XNN_ENABLE_RISCV_FP16_VECTOR
Loading
Loading