Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 5 additions & 16 deletions Include/arm_nnfunctions.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
* SPDX-FileCopyrightText: Copyright 2010-2024, 2026 Arm Limited and/or its affiliates <open-source-office@arm.com>
*
* SPDX-License-Identifier: Apache-2.0
*
Expand All @@ -21,8 +21,8 @@
* Title: arm_nnfunctions.h
* Description: Public header file for CMSIS NN Library
*
* $Date: 04 November 2024
* $Revision: V.18.0.0
* $Date: 9 Mars 2026
* $Revision: V.19.0.0
*
* Target : Arm(R) M-Profile Architecture
* -------------------------------------------------------------------- */
Expand Down Expand Up @@ -590,18 +590,6 @@ int32_t arm_transpose_conv_s8_get_reverse_conv_buffer_size(const cmsis_nn_transp
const cmsis_nn_dims *input_dims,
const cmsis_nn_dims *filter_dims);

/**
* @brief Get size of additional buffer required by arm_transpose_conv_s8() for processors with DSP extension.
* Refer to arm_transpose_conv_s8_get_buffer_size() for function argument details.
*
* @note Intended for compilation on Host. If compiling for an Arm target, use
* arm_transpose_conv_s8_get_buffer_size().
*
*/
int32_t arm_transpose_conv_s8_get_buffer_size_dsp(const cmsis_nn_dims *input_dims,
const cmsis_nn_dims *filter_dims,
const cmsis_nn_dims *out_dims);

/**
* @brief Get size of additional buffer required by arm_transpose_conv_s8() for Arm(R) Helium Architecture case.
* Refer to arm_transpose_conv_s8_get_buffer_size() for function argument details.
Expand All @@ -610,7 +598,8 @@ int32_t arm_transpose_conv_s8_get_buffer_size_dsp(const cmsis_nn_dims *input_dim
* arm_transpose_conv_s8_get_buffer_size().
*
*/
int32_t arm_transpose_conv_s8_get_buffer_size_mve(const cmsis_nn_dims *input_dims,
int32_t arm_transpose_conv_s8_get_buffer_size_mve(const cmsis_nn_transpose_conv_params *transposed_conv_params,
const cmsis_nn_dims *input_dims,
const cmsis_nn_dims *filter_dims,
const cmsis_nn_dims *out_dims);

Expand Down
16 changes: 14 additions & 2 deletions Include/arm_nnsupportfunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
* Title: arm_nnsupportfunctions.h
* Description: Public header file of support functions for CMSIS NN Library
*
* $Date: 27 Feb 2026
* $Revision: V.22.8.1
* $Date: 6 Mars 2026
* $Revision: V.22.9.0
*
* Target : Arm(R) M-Profile Architecture
* -------------------------------------------------------------------- */
Expand Down Expand Up @@ -245,6 +245,18 @@ void arm_s8_to_s16_unordered_with_offset(const int8_t *src, int16_t *dst, int32_

#endif

/**
* @brief Get the required buffer size for optimized s8 convolution.
* This is for processors with MVE extension.
* Refer to arm_convolve_s8_get_buffer_size() for function argument details.
*
* @note Intended for compilation on Host. If compiling for an Arm target, use
* arm_convolve_s8_get_buffer_size(). Note also this is a support function,
* so not recommended to call directly even on Host.
*
*/
int32_t arm_convolve_s8_get_buffer_size_mve(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);

/**
* @brief Get the required buffer size for optimized s8 depthwise convolution
* function with constraint that in_channel equals out_channel.
Expand Down
25 changes: 12 additions & 13 deletions Source/ConvolutionFunctions/arm_convolve_get_buffer_sizes_s8.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
* Title: arm_convolve_get_buffer_sizes_s8.c
* Description: Collection of get buffer size functions for the various s8 convolution layer functions.
*
* $Date: 27 Feb 2026
* $Revision: V.2.2.2
* $Date: 6 Mar 2026
* $Revision: V.2.3.0
*
* Target : Arm(R) M-Profile Architecture
*
Expand Down Expand Up @@ -50,17 +50,6 @@ __STATIC_INLINE int32_t arm_convolve_1x1_s8_fast_get_buffer_size_dsp(const cmsis
#endif
}

__STATIC_INLINE int32_t arm_convolve_s8_get_buffer_size_mve(const cmsis_nn_dims *input_dims,
const cmsis_nn_dims *filter_dims)
{
int32_t col_length = input_dims->c * filter_dims->w * filter_dims->h;
// Get number of complete lanes with int8 elements (multiple of 16) for given col_length. This is dependent on
// implementation of arm_nn_mat_mult_nt_t_s8
col_length = (col_length + 15) / 16;
// 4 -> number of im2col buffers, 16 -> 16 elements per Q register
return 4 * col_length * 16 * (int32_t)sizeof(int8_t);
}

__STATIC_INLINE int32_t arm_convolve_1_x_n_s8_get_buffer_size_mve(const cmsis_nn_conv_params *conv_params,
const cmsis_nn_dims *input_dims,
const cmsis_nn_dims *filter_dims,
Expand Down Expand Up @@ -104,6 +93,16 @@ int32_t arm_convolve_s8_get_buffer_size(const cmsis_nn_dims *input_dims, const c
#endif
}

int32_t arm_convolve_s8_get_buffer_size_mve(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims)
{
int32_t col_length = input_dims->c * filter_dims->w * filter_dims->h;
// Get number of complete lanes with int8 elements (multiple of 16) for given col_length. This is dependent on
// implementation of arm_nn_mat_mult_nt_t_s8
col_length = (col_length + 15) / 16;
// 4 -> number of im2col buffers, 16 -> 16 elements per Q register
return 4 * col_length * 16 * (int32_t)sizeof(int8_t);
}

int32_t arm_convolve_1_x_n_s8_get_buffer_size(const cmsis_nn_conv_params *conv_params,
const cmsis_nn_dims *input_dims,
const cmsis_nn_dims *filter_dims,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
* SPDX-FileCopyrightText: Copyright 2023-2024, 2026 Arm Limited and/or its affiliates <open-source-office@arm.com>
*
* SPDX-License-Identifier: Apache-2.0
*
Expand All @@ -21,8 +21,8 @@
* Title: arm_transpose_conv_get_buffer_sizes_s8.c
* Description: Collection of get buffer size functions for the transpose convolution layer functions.
*
* $Date: 29 October 2024
* $Revision: V.2.0.0
* $Date: 9 Mars 2026
* $Revision: V.2.1.0
*
* Target : Arm(R) M-Profile Architecture
*
Expand Down Expand Up @@ -53,7 +53,9 @@ int32_t arm_transpose_conv_s8_get_buffer_size(const cmsis_nn_transpose_conv_para
const cmsis_nn_dims *filter_dims,
const cmsis_nn_dims *out_dims)
{

#if defined(ARM_MATH_MVEI)
return arm_transpose_conv_s8_get_buffer_size_mve(transpose_conv_params, input_dims, filter_dims, out_dims);
#else
const bool reverse_conv_possible =
((transpose_conv_params->stride.w <= 2) && (transpose_conv_params->stride.h <= 2));
const bool reverse_conv_efficient = (input_dims->c > REVERSE_TCOL_EFFICIENT_THRESHOLD);
Expand All @@ -74,6 +76,36 @@ int32_t arm_transpose_conv_s8_get_buffer_size(const cmsis_nn_transpose_conv_para
const int32_t buf_y = MAX(filter_dims->h, transpose_conv_params->stride.h);
return buf_x * buf_y * sizeof(int32_t);
}
#endif
}

int32_t arm_transpose_conv_s8_get_buffer_size_mve(const cmsis_nn_transpose_conv_params *transpose_conv_params,
const cmsis_nn_dims *input_dims,
const cmsis_nn_dims *filter_dims,
const cmsis_nn_dims *out_dims)
{

const bool reverse_conv_possible =
((transpose_conv_params->stride.w <= 2) && (transpose_conv_params->stride.h <= 2));
const bool reverse_conv_efficient = (input_dims->c > REVERSE_TCOL_EFFICIENT_THRESHOLD);

if (reverse_conv_possible && reverse_conv_efficient)
{
const cmsis_nn_dims reverse_conv_input_dims = {input_dims->n,
input_dims->h * transpose_conv_params->stride.h,
input_dims->w * transpose_conv_params->stride.w,
input_dims->c};

return arm_convolve_s8_get_buffer_size_mve(&reverse_conv_input_dims, filter_dims);
}
else
{
const int32_t buf_x = ((input_dims->w - 1) * transpose_conv_params->stride.w +
MAX(filter_dims->w, transpose_conv_params->stride.h)) *
out_dims->c;
const int32_t buf_y = MAX(filter_dims->h, transpose_conv_params->stride.h);
return buf_x * buf_y * sizeof(int32_t);
}
}

int32_t arm_transpose_conv_s8_get_reverse_conv_buffer_size(const cmsis_nn_transpose_conv_params *transpose_conv_params,
Expand Down