From 3c67a632d6c915d49a5f76857f4c5134cf2baba1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5ns=20Nilsson?= Date: Fri, 6 Mar 2026 16:50:10 +0100 Subject: [PATCH] Fix buffer transpose conv buffer issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DSP/MVE transpose‑conv buffer size functions were declared but not defined. As DSP version is same as scalar, DSP declaration is removed. Change-Id: I857cda062c9f5a53d2b532c4f092a6b2da870923 --- Include/arm_nnfunctions.h | 21 +++------- Include/arm_nnsupportfunctions.h | 16 +++++++- .../arm_convolve_get_buffer_sizes_s8.c | 25 ++++++------ .../arm_transpose_conv_get_buffer_sizes_s8.c | 40 +++++++++++++++++-- 4 files changed, 67 insertions(+), 35 deletions(-) diff --git a/Include/arm_nnfunctions.h b/Include/arm_nnfunctions.h index c33a2149..e48c333c 100644 --- a/Include/arm_nnfunctions.h +++ b/Include/arm_nnfunctions.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates + * SPDX-FileCopyrightText: Copyright 2010-2024, 2026 Arm Limited and/or its affiliates * * SPDX-License-Identifier: Apache-2.0 * @@ -21,8 +21,8 @@ * Title: arm_nnfunctions.h * Description: Public header file for CMSIS NN Library * - * $Date: 04 November 2024 - * $Revision: V.18.0.0 + * $Date: 9 Mars 2026 + * $Revision: V.19.0.0 * * Target : Arm(R) M-Profile Architecture * -------------------------------------------------------------------- */ @@ -590,18 +590,6 @@ int32_t arm_transpose_conv_s8_get_reverse_conv_buffer_size(const cmsis_nn_transp const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims); -/** - * @brief Get size of additional buffer required by arm_transpose_conv_s8() for processors with DSP extension. - * Refer to arm_transpose_conv_s8_get_buffer_size() for function argument details. - * - * @note Intended for compilation on Host. If compiling for an Arm target, use - * arm_transpose_conv_s8_get_buffer_size(). - * - */ -int32_t arm_transpose_conv_s8_get_buffer_size_dsp(const cmsis_nn_dims *input_dims, - const cmsis_nn_dims *filter_dims, - const cmsis_nn_dims *out_dims); - /** * @brief Get size of additional buffer required by arm_transpose_conv_s8() for Arm(R) Helium Architecture case. * Refer to arm_transpose_conv_s8_get_buffer_size() for function argument details. @@ -610,7 +598,8 @@ int32_t arm_transpose_conv_s8_get_buffer_size_dsp(const cmsis_nn_dims *input_dim * arm_transpose_conv_s8_get_buffer_size(). * */ -int32_t arm_transpose_conv_s8_get_buffer_size_mve(const cmsis_nn_dims *input_dims, +int32_t arm_transpose_conv_s8_get_buffer_size_mve(const cmsis_nn_transpose_conv_params *transposed_conv_params, + const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims, const cmsis_nn_dims *out_dims); diff --git a/Include/arm_nnsupportfunctions.h b/Include/arm_nnsupportfunctions.h index 94846aa5..4a587c62 100644 --- a/Include/arm_nnsupportfunctions.h +++ b/Include/arm_nnsupportfunctions.h @@ -21,8 +21,8 @@ * Title: arm_nnsupportfunctions.h * Description: Public header file of support functions for CMSIS NN Library * - * $Date: 27 Feb 2026 - * $Revision: V.22.8.1 + * $Date: 6 Mars 2026 + * $Revision: V.22.9.0 * * Target : Arm(R) M-Profile Architecture * -------------------------------------------------------------------- */ @@ -245,6 +245,18 @@ void arm_s8_to_s16_unordered_with_offset(const int8_t *src, int16_t *dst, int32_ #endif +/** + * @brief Get the required buffer size for optimized s8 convolution. + * This is for processors with MVE extension. + * Refer to arm_convolve_s8_get_buffer_size() for function argument details. + * + * @note Intended for compilation on Host. If compiling for an Arm target, use + * arm_convolve_s8_get_buffer_size(). Note also this is a support function, + * so not recommended to call directly even on Host. + * + */ +int32_t arm_convolve_s8_get_buffer_size_mve(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims); + /** * @brief Get the required buffer size for optimized s8 depthwise convolution * function with constraint that in_channel equals out_channel. diff --git a/Source/ConvolutionFunctions/arm_convolve_get_buffer_sizes_s8.c b/Source/ConvolutionFunctions/arm_convolve_get_buffer_sizes_s8.c index 50597520..58f6a119 100644 --- a/Source/ConvolutionFunctions/arm_convolve_get_buffer_sizes_s8.c +++ b/Source/ConvolutionFunctions/arm_convolve_get_buffer_sizes_s8.c @@ -21,8 +21,8 @@ * Title: arm_convolve_get_buffer_sizes_s8.c * Description: Collection of get buffer size functions for the various s8 convolution layer functions. * - * $Date: 27 Feb 2026 - * $Revision: V.2.2.2 + * $Date: 6 Mar 2026 + * $Revision: V.2.3.0 * * Target : Arm(R) M-Profile Architecture * @@ -50,17 +50,6 @@ __STATIC_INLINE int32_t arm_convolve_1x1_s8_fast_get_buffer_size_dsp(const cmsis #endif } -__STATIC_INLINE int32_t arm_convolve_s8_get_buffer_size_mve(const cmsis_nn_dims *input_dims, - const cmsis_nn_dims *filter_dims) -{ - int32_t col_length = input_dims->c * filter_dims->w * filter_dims->h; - // Get number of complete lanes with int8 elements (multiple of 16) for given col_length. This is dependent on - // implementation of arm_nn_mat_mult_nt_t_s8 - col_length = (col_length + 15) / 16; - // 4 -> number of im2col buffers, 16 -> 16 elements per Q register - return 4 * col_length * 16 * (int32_t)sizeof(int8_t); -} - __STATIC_INLINE int32_t arm_convolve_1_x_n_s8_get_buffer_size_mve(const cmsis_nn_conv_params *conv_params, const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims, @@ -104,6 +93,16 @@ int32_t arm_convolve_s8_get_buffer_size(const cmsis_nn_dims *input_dims, const c #endif } +int32_t arm_convolve_s8_get_buffer_size_mve(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims) +{ + int32_t col_length = input_dims->c * filter_dims->w * filter_dims->h; + // Get number of complete lanes with int8 elements (multiple of 16) for given col_length. This is dependent on + // implementation of arm_nn_mat_mult_nt_t_s8 + col_length = (col_length + 15) / 16; + // 4 -> number of im2col buffers, 16 -> 16 elements per Q register + return 4 * col_length * 16 * (int32_t)sizeof(int8_t); +} + int32_t arm_convolve_1_x_n_s8_get_buffer_size(const cmsis_nn_conv_params *conv_params, const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims, diff --git a/Source/ConvolutionFunctions/arm_transpose_conv_get_buffer_sizes_s8.c b/Source/ConvolutionFunctions/arm_transpose_conv_get_buffer_sizes_s8.c index fd29eff7..6cfa6fd4 100644 --- a/Source/ConvolutionFunctions/arm_transpose_conv_get_buffer_sizes_s8.c +++ b/Source/ConvolutionFunctions/arm_transpose_conv_get_buffer_sizes_s8.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright 2023-2024 Arm Limited and/or its affiliates + * SPDX-FileCopyrightText: Copyright 2023-2024, 2026 Arm Limited and/or its affiliates * * SPDX-License-Identifier: Apache-2.0 * @@ -21,8 +21,8 @@ * Title: arm_transpose_conv_get_buffer_sizes_s8.c * Description: Collection of get buffer size functions for the transpose convolution layer functions. * - * $Date: 29 October 2024 - * $Revision: V.2.0.0 + * $Date: 9 Mars 2026 + * $Revision: V.2.1.0 * * Target : Arm(R) M-Profile Architecture * @@ -53,7 +53,9 @@ int32_t arm_transpose_conv_s8_get_buffer_size(const cmsis_nn_transpose_conv_para const cmsis_nn_dims *filter_dims, const cmsis_nn_dims *out_dims) { - +#if defined(ARM_MATH_MVEI) + return arm_transpose_conv_s8_get_buffer_size_mve(transpose_conv_params, input_dims, filter_dims, out_dims); +#else const bool reverse_conv_possible = ((transpose_conv_params->stride.w <= 2) && (transpose_conv_params->stride.h <= 2)); const bool reverse_conv_efficient = (input_dims->c > REVERSE_TCOL_EFFICIENT_THRESHOLD); @@ -74,6 +76,36 @@ int32_t arm_transpose_conv_s8_get_buffer_size(const cmsis_nn_transpose_conv_para const int32_t buf_y = MAX(filter_dims->h, transpose_conv_params->stride.h); return buf_x * buf_y * sizeof(int32_t); } +#endif +} + +int32_t arm_transpose_conv_s8_get_buffer_size_mve(const cmsis_nn_transpose_conv_params *transpose_conv_params, + const cmsis_nn_dims *input_dims, + const cmsis_nn_dims *filter_dims, + const cmsis_nn_dims *out_dims) +{ + + const bool reverse_conv_possible = + ((transpose_conv_params->stride.w <= 2) && (transpose_conv_params->stride.h <= 2)); + const bool reverse_conv_efficient = (input_dims->c > REVERSE_TCOL_EFFICIENT_THRESHOLD); + + if (reverse_conv_possible && reverse_conv_efficient) + { + const cmsis_nn_dims reverse_conv_input_dims = {input_dims->n, + input_dims->h * transpose_conv_params->stride.h, + input_dims->w * transpose_conv_params->stride.w, + input_dims->c}; + + return arm_convolve_s8_get_buffer_size_mve(&reverse_conv_input_dims, filter_dims); + } + else + { + const int32_t buf_x = ((input_dims->w - 1) * transpose_conv_params->stride.w + + MAX(filter_dims->w, transpose_conv_params->stride.h)) * + out_dims->c; + const int32_t buf_y = MAX(filter_dims->h, transpose_conv_params->stride.h); + return buf_x * buf_y * sizeof(int32_t); + } } int32_t arm_transpose_conv_s8_get_reverse_conv_buffer_size(const cmsis_nn_transpose_conv_params *transpose_conv_params,