From cf89f6965c6581ae7383235f2e2f78502f86be0c Mon Sep 17 00:00:00 2001 From: Yolanda Chen Date: Wed, 1 Apr 2026 14:05:39 +0800 Subject: [PATCH] Update qd8 gemm config to enable 4x16c2s2 microkernels for wasm simd --- cmake/gen/wasmsimd_microkernels.cmake | 8 ++-- gen/wasmsimd_microkernels.bzl | 8 ++-- scripts/generate-qs8-igemm.sh | 2 +- src/configs/gemm-config.c | 42 +++++++++++++++---- ...-igemm-4x16c2s2-minmax-wasmsimd-dot16x2.c} | 0 5 files changed, 42 insertions(+), 18 deletions(-) rename src/qd8-f32-qc8w-igemm/gen/{qs8-f32-qc8w-igemm-4x16c2s2-minmax-wasmsimd-dot16x2.c => qd8-f32-qc8w-igemm-4x16c2s2-minmax-wasmsimd-dot16x2.c} (100%) diff --git a/cmake/gen/wasmsimd_microkernels.cmake b/cmake/gen/wasmsimd_microkernels.cmake index a705cfd350a..d7de5ed03cd 100644 --- a/cmake/gen/wasmsimd_microkernels.cmake +++ b/cmake/gen/wasmsimd_microkernels.cmake @@ -156,9 +156,13 @@ SET(PROD_WASMSIMD_MICROKERNEL_SRCS src/qd8-f32-qc4w-gemm/gen/qd8-f32-qc4w-gemm-1x4c8-minmax-wasmsimd-dot16x2-ld64.c src/qd8-f32-qc4w-gemm/gen/qd8-f32-qc4w-gemm-4x4c8-minmax-wasmsimd-dot16x2-ld64.c src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-1x4c2s4-minmax-wasmsimd-dot16x2-ld128.c + src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-1x16c2s2-minmax-wasmsimd-dot16x2.c src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-4x4c2s4-minmax-wasmsimd-dot16x2-ld128.c + src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-4x16c2s2-minmax-wasmsimd-dot16x2.c src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-1x4c2s4-minmax-wasmsimd-dot16x2-ld128.c + src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-1x16c2s2-minmax-wasmsimd-dot16x2.c src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-4x4c2s4-minmax-wasmsimd-dot16x2-ld128.c + src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-4x16c2s2-minmax-wasmsimd-dot16x2.c src/qs8-dwconv/gen/qs8-dwconv-9p16c-minmax-fp32-wasmsimd-mul16-add16.c src/qs8-dwconv/gen/qs8-dwconv-25p16c-minmax-fp32-wasmsimd-mul16-add16.c src/qs8-f32-vcvt/gen/qs8-f32-vcvt-wasmsimd-u32.c @@ -740,7 +744,6 @@ SET(NON_PROD_WASMSIMD_MICROKERNEL_SRCS src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-1x4c2s4-minmax-wasmsimd-dot16x2-ld64.c src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-1x4c8-minmax-wasmsimd-dot16x2-ld64.c src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-1x4c8-minmax-wasmsimd-dot16x2-ld128.c - src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-1x16c2s2-minmax-wasmsimd-dot16x2.c src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-2x4c2-minmax-wasmsimd-dot16x2-ld64.c src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-2x4c2-minmax-wasmsimd-dot16x2-ld128.c src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-2x4c2s4-minmax-wasmsimd-dot16x2-ld64.c @@ -758,13 +761,11 @@ SET(NON_PROD_WASMSIMD_MICROKERNEL_SRCS src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-4x4c2s4-minmax-wasmsimd-dot16x2-ld64.c src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-4x4c8-minmax-wasmsimd-dot16x2-ld64.c src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-4x4c8-minmax-wasmsimd-dot16x2-ld128.c - src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-4x16c2s2-minmax-wasmsimd-dot16x2.c src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-1x4c2-minmax-wasmsimd-dot16x2-ld64.c src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-1x4c2-minmax-wasmsimd-dot16x2-ld128.c src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-1x4c2s4-minmax-wasmsimd-dot16x2-ld64.c src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-1x4c8-minmax-wasmsimd-dot16x2-ld64.c src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-1x4c8-minmax-wasmsimd-dot16x2-ld128.c - src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-1x16c2s2-minmax-wasmsimd-dot16x2.c src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-2x4c2-minmax-wasmsimd-dot16x2-ld64.c src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-2x4c2-minmax-wasmsimd-dot16x2-ld128.c src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-2x4c2s4-minmax-wasmsimd-dot16x2-ld64.c @@ -782,7 +783,6 @@ SET(NON_PROD_WASMSIMD_MICROKERNEL_SRCS src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-4x4c2s4-minmax-wasmsimd-dot16x2-ld64.c src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-4x4c8-minmax-wasmsimd-dot16x2-ld64.c src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-4x4c8-minmax-wasmsimd-dot16x2-ld128.c - src/qd8-f32-qc8w-igemm/gen/qs8-f32-qc8w-igemm-4x16c2s2-minmax-wasmsimd-dot16x2.c src/qs8-dwconv/gen/qs8-dwconv-9p8c-minmax-fp32-wasmsimd-mul16-add16.c src/qs8-dwconv/gen/qs8-dwconv-9p8c-minmax-fp32-wasmsimd-mul16.c src/qs8-dwconv/gen/qs8-dwconv-9p16c-minmax-fp32-wasmsimd-mul16.c diff --git a/gen/wasmsimd_microkernels.bzl b/gen/wasmsimd_microkernels.bzl index 2f000a6e099..066698e646e 100644 --- a/gen/wasmsimd_microkernels.bzl +++ b/gen/wasmsimd_microkernels.bzl @@ -152,9 +152,13 @@ PROD_WASMSIMD_MICROKERNEL_SRCS = [ "src/qd8-f32-qc4w-gemm/gen/qd8-f32-qc4w-gemm-1x4c8-minmax-wasmsimd-dot16x2-ld64.c", "src/qd8-f32-qc4w-gemm/gen/qd8-f32-qc4w-gemm-4x4c8-minmax-wasmsimd-dot16x2-ld64.c", "src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-1x4c2s4-minmax-wasmsimd-dot16x2-ld128.c", + "src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-1x16c2s2-minmax-wasmsimd-dot16x2.c", "src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-4x4c2s4-minmax-wasmsimd-dot16x2-ld128.c", + "src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-4x16c2s2-minmax-wasmsimd-dot16x2.c", "src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-1x4c2s4-minmax-wasmsimd-dot16x2-ld128.c", + "src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-1x16c2s2-minmax-wasmsimd-dot16x2.c", "src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-4x4c2s4-minmax-wasmsimd-dot16x2-ld128.c", + "src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-4x16c2s2-minmax-wasmsimd-dot16x2.c", "src/qs8-dwconv/gen/qs8-dwconv-9p16c-minmax-fp32-wasmsimd-mul16-add16.c", "src/qs8-dwconv/gen/qs8-dwconv-25p16c-minmax-fp32-wasmsimd-mul16-add16.c", "src/qs8-f32-vcvt/gen/qs8-f32-vcvt-wasmsimd-u32.c", @@ -737,7 +741,6 @@ NON_PROD_WASMSIMD_MICROKERNEL_SRCS = [ "src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-1x4c2s4-minmax-wasmsimd-dot16x2-ld64.c", "src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-1x4c8-minmax-wasmsimd-dot16x2-ld64.c", "src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-1x4c8-minmax-wasmsimd-dot16x2-ld128.c", - "src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-1x16c2s2-minmax-wasmsimd-dot16x2.c", "src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-2x4c2-minmax-wasmsimd-dot16x2-ld64.c", "src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-2x4c2-minmax-wasmsimd-dot16x2-ld128.c", "src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-2x4c2s4-minmax-wasmsimd-dot16x2-ld64.c", @@ -755,13 +758,11 @@ NON_PROD_WASMSIMD_MICROKERNEL_SRCS = [ "src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-4x4c2s4-minmax-wasmsimd-dot16x2-ld64.c", "src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-4x4c8-minmax-wasmsimd-dot16x2-ld64.c", "src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-4x4c8-minmax-wasmsimd-dot16x2-ld128.c", - "src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-4x16c2s2-minmax-wasmsimd-dot16x2.c", "src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-1x4c2-minmax-wasmsimd-dot16x2-ld64.c", "src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-1x4c2-minmax-wasmsimd-dot16x2-ld128.c", "src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-1x4c2s4-minmax-wasmsimd-dot16x2-ld64.c", "src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-1x4c8-minmax-wasmsimd-dot16x2-ld64.c", "src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-1x4c8-minmax-wasmsimd-dot16x2-ld128.c", - "src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-1x16c2s2-minmax-wasmsimd-dot16x2.c", "src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-2x4c2-minmax-wasmsimd-dot16x2-ld64.c", "src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-2x4c2-minmax-wasmsimd-dot16x2-ld128.c", "src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-2x4c2s4-minmax-wasmsimd-dot16x2-ld64.c", @@ -779,7 +780,6 @@ NON_PROD_WASMSIMD_MICROKERNEL_SRCS = [ "src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-4x4c2s4-minmax-wasmsimd-dot16x2-ld64.c", "src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-4x4c8-minmax-wasmsimd-dot16x2-ld64.c", "src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-4x4c8-minmax-wasmsimd-dot16x2-ld128.c", - "src/qd8-f32-qc8w-igemm/gen/qs8-f32-qc8w-igemm-4x16c2s2-minmax-wasmsimd-dot16x2.c", "src/qs8-dwconv/gen/qs8-dwconv-9p8c-minmax-fp32-wasmsimd-mul16-add16.c", "src/qs8-dwconv/gen/qs8-dwconv-9p8c-minmax-fp32-wasmsimd-mul16.c", "src/qs8-dwconv/gen/qs8-dwconv-9p16c-minmax-fp32-wasmsimd-mul16.c", diff --git a/scripts/generate-qs8-igemm.sh b/scripts/generate-qs8-igemm.sh index 4a01154641e..eeeb7f5ea4d 100755 --- a/scripts/generate-qs8-igemm.sh +++ b/scripts/generate-qs8-igemm.sh @@ -152,7 +152,7 @@ tools/xngen src/qs8-igemm/MRx16c2s2-wasmsimd-dot16x2.c.in -D MR=1 -D NR=16 -D RE tools/xngen src/qs8-igemm/MRx16c2s2-wasmsimd-dot16x2.c.in -D MR=4 -D NR=16 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -o src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x16c2s2-minmax-fp32-wasmsimd-dot16x2.c & tools/xngen src/qs8-igemm/MRx16c2s2-wasmsimd-dot16x2.c.in -D MR=1 -D NR=16 -D REQUANTIZATION= -D DATATYPE=QD8 -o src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-1x16c2s2-minmax-wasmsimd-dot16x2.c & -tools/xngen src/qs8-igemm/MRx16c2s2-wasmsimd-dot16x2.c.in -D MR=4 -D NR=16 -D REQUANTIZATION= -D DATATYPE=QD8 -o src/qd8-f32-qc8w-igemm/gen/qs8-f32-qc8w-igemm-4x16c2s2-minmax-wasmsimd-dot16x2.c & +tools/xngen src/qs8-igemm/MRx16c2s2-wasmsimd-dot16x2.c.in -D MR=4 -D NR=16 -D REQUANTIZATION= -D DATATYPE=QD8 -o src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-4x16c2s2-minmax-wasmsimd-dot16x2.c & ### C8 micro-kernels tools/xngen src/qs8-igemm/MRx4c8-wasmsimd-dot16x2.c.in -D MR=1 -D VARIANT=LD64 -D REQUANTIZATION= -D DATATYPE=QD8 -o src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-1x4c8-minmax-wasmsimd-dot16x2-ld64.c & diff --git a/src/configs/gemm-config.c b/src/configs/gemm-config.c index 36a00a48b16..0d6f2ad57d1 100644 --- a/src/configs/gemm-config.c +++ b/src/configs/gemm-config.c @@ -4540,6 +4540,17 @@ static void init_qd8_f32_qc8w_gemm_config(void) { qd8_f32_qc8w_gemm_config.log2_kr = 3; #endif } else { + #if XNN_ENABLE_WASM_REVECTORIZE + qd8_f32_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = XNN_INIT_HMP_DQGEMM_UKERNEL(xnn_qd8_f32_qc8w_gemm_minmax_ukernel_1x16c2s2__wasmsimd_dot16x2); + qd8_f32_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(4)] = XNN_INIT_HMP_DQGEMM_UKERNEL(xnn_qd8_f32_qc8w_gemm_minmax_ukernel_4x16c2s2__wasmsimd_dot16x2); + qd8_f32_qc8w_gemm_config.minmax.dqigemm[XNN_MR_TO_INDEX(1)] = XNN_INIT_HMP_DQIGEMM_UKERNEL(xnn_qd8_f32_qc8w_igemm_minmax_ukernel_1x16c2s2__wasmsimd_dot16x2); + qd8_f32_qc8w_gemm_config.minmax.dqigemm[XNN_MR_TO_INDEX(4)] = XNN_INIT_HMP_DQIGEMM_UKERNEL(xnn_qd8_f32_qc8w_igemm_minmax_ukernel_4x16c2s2__wasmsimd_dot16x2); + qd8_f32_qc8w_gemm_config.init.f32 = xnn_init_f32_minmax_scalar_params; + qd8_f32_qc8w_gemm_config.mr = 4; + qd8_f32_qc8w_gemm_config.nr = 16; + qd8_f32_qc8w_gemm_config.log2_kr = 1; + qd8_f32_qc8w_gemm_config.log2_sr = 1; + #else qd8_f32_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = XNN_INIT_HMP_DQGEMM_UKERNEL(xnn_qd8_f32_qc8w_gemm_minmax_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128); qd8_f32_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(4)] = XNN_INIT_HMP_DQGEMM_UKERNEL(xnn_qd8_f32_qc8w_gemm_minmax_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128); qd8_f32_qc8w_gemm_config.minmax.dqigemm[XNN_MR_TO_INDEX(1)] = XNN_INIT_HMP_DQIGEMM_UKERNEL(xnn_qd8_f32_qc8w_igemm_minmax_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128); @@ -4549,17 +4560,30 @@ static void init_qd8_f32_qc8w_gemm_config(void) { qd8_f32_qc8w_gemm_config.nr = 4; qd8_f32_qc8w_gemm_config.log2_kr = 1; qd8_f32_qc8w_gemm_config.log2_sr = 2; + #endif } #elif XNN_ARCH_WASMSIMD - qd8_f32_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = XNN_INIT_HMP_DQGEMM_UKERNEL(xnn_qd8_f32_qc8w_gemm_minmax_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128); - qd8_f32_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(4)] = XNN_INIT_HMP_DQGEMM_UKERNEL(xnn_qd8_f32_qc8w_gemm_minmax_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128); - qd8_f32_qc8w_gemm_config.minmax.dqigemm[XNN_MR_TO_INDEX(1)] = XNN_INIT_HMP_DQIGEMM_UKERNEL(xnn_qd8_f32_qc8w_igemm_minmax_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128); - qd8_f32_qc8w_gemm_config.minmax.dqigemm[XNN_MR_TO_INDEX(4)] = XNN_INIT_HMP_DQIGEMM_UKERNEL(xnn_qd8_f32_qc8w_igemm_minmax_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128); - qd8_f32_qc8w_gemm_config.init.f32 = xnn_init_f32_minmax_scalar_params; - qd8_f32_qc8w_gemm_config.mr = 4; - qd8_f32_qc8w_gemm_config.nr = 4; - qd8_f32_qc8w_gemm_config.log2_kr = 1; - qd8_f32_qc8w_gemm_config.log2_sr = 2; + #if XNN_ENABLE_WASM_REVECTORIZE + qd8_f32_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = XNN_INIT_HMP_DQGEMM_UKERNEL(xnn_qd8_f32_qc8w_gemm_minmax_ukernel_1x16c2s2__wasmsimd_dot16x2); + qd8_f32_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(4)] = XNN_INIT_HMP_DQGEMM_UKERNEL(xnn_qd8_f32_qc8w_gemm_minmax_ukernel_4x16c2s2__wasmsimd_dot16x2); + qd8_f32_qc8w_gemm_config.minmax.dqigemm[XNN_MR_TO_INDEX(1)] = XNN_INIT_HMP_DQIGEMM_UKERNEL(xnn_qd8_f32_qc8w_igemm_minmax_ukernel_1x16c2s2__wasmsimd_dot16x2); + qd8_f32_qc8w_gemm_config.minmax.dqigemm[XNN_MR_TO_INDEX(4)] = XNN_INIT_HMP_DQIGEMM_UKERNEL(xnn_qd8_f32_qc8w_igemm_minmax_ukernel_4x16c2s2__wasmsimd_dot16x2); + qd8_f32_qc8w_gemm_config.init.f32 = xnn_init_f32_minmax_scalar_params; + qd8_f32_qc8w_gemm_config.mr = 4; + qd8_f32_qc8w_gemm_config.nr = 16; + qd8_f32_qc8w_gemm_config.log2_kr = 1; + qd8_f32_qc8w_gemm_config.log2_sr = 1; + #else + qd8_f32_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = XNN_INIT_HMP_DQGEMM_UKERNEL(xnn_qd8_f32_qc8w_gemm_minmax_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128); + qd8_f32_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(4)] = XNN_INIT_HMP_DQGEMM_UKERNEL(xnn_qd8_f32_qc8w_gemm_minmax_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128); + qd8_f32_qc8w_gemm_config.minmax.dqigemm[XNN_MR_TO_INDEX(1)] = XNN_INIT_HMP_DQIGEMM_UKERNEL(xnn_qd8_f32_qc8w_igemm_minmax_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128); + qd8_f32_qc8w_gemm_config.minmax.dqigemm[XNN_MR_TO_INDEX(4)] = XNN_INIT_HMP_DQIGEMM_UKERNEL(xnn_qd8_f32_qc8w_igemm_minmax_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128); + qd8_f32_qc8w_gemm_config.init.f32 = xnn_init_f32_minmax_scalar_params; + qd8_f32_qc8w_gemm_config.mr = 4; + qd8_f32_qc8w_gemm_config.nr = 4; + qd8_f32_qc8w_gemm_config.log2_kr = 1; + qd8_f32_qc8w_gemm_config.log2_sr = 2; + #endif #elif XNN_ARCH_RISCV && XNN_ENABLE_RISCV_VECTOR const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); qd8_f32_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = XNN_INIT_HMP_DQGEMM_UKERNEL(xnn_qd8_f32_qc8w_gemm_minmax_ukernel_1x4v__rvv); diff --git a/src/qd8-f32-qc8w-igemm/gen/qs8-f32-qc8w-igemm-4x16c2s2-minmax-wasmsimd-dot16x2.c b/src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-4x16c2s2-minmax-wasmsimd-dot16x2.c similarity index 100% rename from src/qd8-f32-qc8w-igemm/gen/qs8-f32-qc8w-igemm-4x16c2s2-minmax-wasmsimd-dot16x2.c rename to src/qd8-f32-qc8w-igemm/gen/qd8-f32-qc8w-igemm-4x16c2s2-minmax-wasmsimd-dot16x2.c