Skip to content

Commit 7d2d12d

Browse files
Audio: aec: optimize acoustic echo cancellation processing
This check-in introduces performance optimization modifications to the audio Echo Cancellation (AEC) implementation. The enhancements primarily focus on refining loop structures and memory copy operations to ensure more efficient use of cycles. Signed-off-by: shastry <malladi.sastry@intel.com>
1 parent 3681e09 commit 7d2d12d

2 files changed

Lines changed: 274 additions & 84 deletions

File tree

src/audio/google/google_rtc_audio_processing.c

Lines changed: 207 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -791,7 +791,6 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
791791
size_t dst_buf_size;
792792

793793
size_t num_of_bytes_to_process;
794-
size_t channel;
795794
size_t buffer_offset;
796795

797796
struct sof_source *ref_stream, *src_stream;
@@ -822,23 +821,57 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
822821
/* 32float: de-interlace ref buffer, convert it to float, skip channels if > Max
823822
* 16int: linearize buffer, skip channels if > Max
824823
*/
824+
/* Reduce cycle waste by streamlining the inner loop,
825+
* converting from array indexing to pointer arithmetic,
826+
* and putting data copy verification outside the loop.
827+
*/
825828
buffer_offset = 0;
826-
for (int i = 0; i < cd->num_frames; i++) {
827-
for (channel = 0; channel < cd->num_aec_reference_channels; ++channel) {
829+
const int16_t *ref_data_end = ref + cd->num_frames * cd->num_aec_reference_channels;
830+
831+
/* Check that ref is within the valid range of the ref_buf buffer */
832+
if (!ref || ref < (int16_t *)ref_buf_start || ref >= (int16_t *)ref_buf_end) {
833+
/* ref does not point to valid int16_t data,
834+
* return -EINVAL immediately to indicate an invalid argument was passed
835+
*/
836+
return -EINVAL;
837+
}
838+
828839
#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
829-
cd->aec_reference_buffer_ptrs[channel][i] =
830-
convert_int16_to_float(ref[channel]);
840+
float **ref_ptr = cd->aec_reference_buffer_ptrs;
841+
int s_chan;
842+
int i;
843+
844+
/* Loop over frames and channels, converting data from int16 to float */
845+
for (i = 0; i < cd->num_frames; ++i) {
846+
for (s_chan = 0; s_chan < cd->num_aec_reference_channels; ++s_chan) {
847+
(*ref_ptr)[s_chan] = convert_int16_to_float(*ref++);
848+
ref_ptr++;
849+
}
850+
}
851+
831852
#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
832-
cd->aec_reference_buffer[buffer_offset++] = ref[channel];
833-
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
853+
int16_t *ref_buf = cd->aec_reference_buffer;
854+
size_t sizeofrefbuffer = sizeof(cd->aec_reference_buffer);
834855

835-
}
856+
/* Use memcpy to copy the data from ref buffer to ref_buf buffer until it reaches
857+
* ref_data_end
858+
* This assumes that the data in the ref buffer is contiguous
859+
*/
860+
size_t num_bytes = (ref_data_end - ref) * sizeof(*ref);
836861

837-
ref += cd->num_aec_reference_channels;
838-
if ((void *)ref >= (void *)ref_buf_end)
839-
ref = (void *)ref_buf_start;
862+
if (num_bytes > sizeofrefbuffer) {
863+
// Handle the error: the source data is too large to fit in the destination buffer
864+
return -EINVAL;
840865
}
841866

867+
memcpy(ref_buf, ref, num_bytes);
868+
869+
/* Update the ref and ref_buf pointers */
870+
ref = ref_data_end;
871+
ref_buf += (ref_data_end - ref);
872+
873+
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
874+
842875
#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
843876
GoogleRtcAudioProcessingAnalyzeRender_float32(cd->state,
844877
(const float **)
@@ -856,22 +889,64 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
856889
assert(!ret);
857890
src_buf_end = src_buf_start + src_buf_size;
858891

892+
/* The second optimization eliminates the inner loop
893+
* and replaces it with pointer arithmetic for speedier access.
894+
* To reduce cycle waste, the data copy check is moved outside of the loop.
895+
*/
859896
buffer_offset = 0;
860-
for (int i = 0; i < cd->num_frames; i++) {
861-
for (channel = 0; channel < cd->num_capture_channels; channel++)
897+
const int16_t *src_end = src + cd->num_frames * cd->config.output_fmt.channels_count;
898+
899+
/* Check if the calculated end of the source buffer exceeds the actual end of the buffer */
900+
src_end = (int16_t *)cir_buf_wrap((void *)src_end,
901+
(void *)src_buf_start, (void *)src_buf_end);
902+
862903
#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
863-
cd->process_buffer_ptrs[channel][i] = convert_int16_to_float(src[channel]);
904+
/* Declare a pointer to the process buffer */
905+
float **proc_ptr = cd->process_buffer_ptrs;
906+
907+
/* Check for null pointers and buffer overflows */
908+
if (!src || !proc_ptr || src >= (const int16_t *)src_end)
909+
/* If there's an error, return -EINVAL immediately to indicate an
910+
* invalid argument was passed
911+
*/
912+
return -EINVAL;
913+
else
914+
/* If there's no error, continue processing */
915+
while (src != (const int16_t *)src_end) {
916+
/* If the source pointer has reached or exceeded the end of the source
917+
* buffer, wrap it back to the start
918+
*/
919+
src = (int16_t *)cir_buf_wrap((void *)src,
920+
(void *)src_buf_start, (void *)src_buf_end);
921+
/* Convert the source data from int16_t to float and store it in the
922+
* process buffer
923+
*/
924+
*proc_ptr++ = convert_int16_to_float(src++);
925+
}
926+
864927
#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
865-
cd->process_buffer[buffer_offset++] = src[channel];
866-
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
928+
/* Declare a pointer to the process buffer */
929+
int16_t *proc_buf = cd->process_buffer;
867930

868-
/* move pointer to next frame
869-
* number of incoming channels may be < cd->num_capture_channels
931+
/* Check for null pointers and buffer overflows */
932+
if (!src || !proc_buf || src >= (int16_t *)src_end)
933+
/* If there's an error, return -EINVAL immediately to indicate an
934+
* invalid argument was passed
870935
*/
871-
src += cd->config.output_fmt.channels_count;
872-
if ((void *)src >= (void *)src_buf_end)
873-
src = (void *)src_buf_start;
874-
}
936+
return -EINVAL;
937+
else
938+
/* If there's no error, continue processing */
939+
while (src != (int16_t *)src_end) {
940+
/* If the source pointer has reached or exceeded the end of the source
941+
* buffer, wrap it back to the start
942+
*/
943+
src = (int16_t *)cir_buf_wrap((void *)src,
944+
(void *)src_buf_start, (void *)src_buf_end);
945+
/* Copy the source data to the process buffer */
946+
*proc_buf++ = *src++;
947+
}
948+
949+
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
875950

876951
source_release_data(src_stream, num_of_bytes_to_process);
877952

@@ -894,25 +969,48 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
894969

895970
/* process all channels in output stream */
896971
buffer_offset = 0;
897-
for (int i = 0; i < cd->num_frames; i++) {
898-
for (channel = 0; channel < cd->config.output_fmt.channels_count; channel++) {
899-
/* set data in processed channels, zeroize not processed */
900-
if (channel < cd->num_capture_channels)
972+
973+
/* Calculate the end of the destination buffer based on the number of frames and
974+
* channels
975+
*/
976+
int16_t *dst_end = dst + cd->num_frames * cd->config.output_fmt.channels_count;
977+
978+
/* Check if the calculated end of the destination buffer exceeds the actual end
979+
* of the buffer
980+
*/
981+
dst_end = (int16_t *)cir_buf_wrap((void *)dst_end,
982+
(void *)dst_buf_start, (void *)dst_buf_end);
983+
901984
#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
902-
dst[channel] = convert_float_to_int16(
903-
cd->process_buffer_ptrs[channel][i]);
985+
float **proc_ptr = cd->process_buffer_ptrs;
986+
987+
/* Check for null pointers and buffer overflows */
988+
if (!dst || !proc_ptr || dst >= dst_end || *proc_ptr >= *proc_ptr + cd->num_frames)
989+
/* If there's an error, return -EINVAL immediately to indicate an
990+
* invalid argument was passed
991+
*/
992+
return -EINVAL;
993+
994+
/* Convert data from float to int16_t and store it in the destination buffer */
995+
for (; dst != dst_end; ++dst, ++proc_ptr)
996+
*dst = convert_float_to_int16(*proc_ptr);
997+
904998
#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
905-
dst[channel] = cd->process_buffer[buffer_offset++];
906-
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
907-
else
908-
dst[channel] = 0;
909-
}
999+
int16_t *process_buffer = cd->process_buffer;
9101000

911-
dst += cd->config.output_fmt.channels_count;
912-
if ((void *)dst >= (void *)dst_buf_end)
913-
dst = (void *)dst_buf_start;
914-
}
1001+
/* Check for null pointers and buffer overflows */
1002+
if (!dst || !process_buffer || dst >= dst_end ||
1003+
process_buffer >= process_buffer + cd->num_frames)
1004+
/* If there's an error, return -EINVAL immediately to indicate an
1005+
* invalid argument was passed
1006+
*/
1007+
return -EINVAL;
1008+
1009+
/* Copy the data from the process buffer to the destination buffer */
1010+
for (; dst != dst_end; ++dst, ++process_buffer)
1011+
*dst = *process_buffer;
9151012

1013+
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
9161014
sink_commit_buffer(dst_stream, num_of_bytes_to_process);
9171015

9181016
return 0;
@@ -928,6 +1026,8 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
9281026
int16_t *src, *dst, *ref;
9291027
uint32_t num_aec_reference_frames;
9301028
uint32_t num_aec_reference_bytes;
1029+
int ref_channels;
1030+
int aec_ref_product;
9311031
int num_samples_remaining;
9321032
int num_frames_remaining;
9331033
int channel;
@@ -950,25 +1050,33 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
9501050
ref_stream = ref_streamb->data;
9511051
ref = audio_stream_get_rptr(ref_stream);
9521052

1053+
/* Pre-calculate the number of channels in the reference stream for efficiency */
1054+
ref_channels = audio_stream_get_channels(ref_stream);
1055+
1056+
/* Pre-calculate the product of the number of AEC reference channels and the AEC
1057+
* reference frame index
1058+
*/
1059+
aec_ref_product = cd->num_aec_reference_channels * cd->aec_reference_frame_index;
1060+
9531061
num_aec_reference_frames = input_buffers[cd->aec_reference_source].size;
9541062
num_aec_reference_bytes = audio_stream_frame_bytes(ref_stream) * num_aec_reference_frames;
9551063

956-
num_samples_remaining = num_aec_reference_frames * audio_stream_get_channels(ref_stream);
1064+
num_samples_remaining = num_aec_reference_frames * ref_channels;
9571065
while (num_samples_remaining) {
9581066
nmax = audio_stream_samples_without_wrap_s16(ref_stream, ref);
9591067
n = MIN(num_samples_remaining, nmax);
9601068
for (i = 0; i < n; i += cd->num_aec_reference_channels) {
961-
j = cd->num_aec_reference_channels * cd->aec_reference_frame_index;
1069+
j = aec_ref_product;
9621070
for (channel = 0; channel < cd->num_aec_reference_channels; ++channel)
9631071
cd->aec_reference_buffer[j++] = ref[channel];
964-
965-
ref += audio_stream_get_channels(ref_stream);
1072+
ref += ref_channels;
9661073
++cd->aec_reference_frame_index;
967-
9681074
if (cd->aec_reference_frame_index == cd->num_frames) {
9691075
GoogleRtcAudioProcessingAnalyzeRender_int16(cd->state,
970-
cd->aec_reference_buffer);
1076+
cd->aec_reference_buffer);
9711077
cd->aec_reference_frame_index = 0;
1078+
/* Reset the product as the frame index is reset */
1079+
aec_ref_product = 0;
9721080
}
9731081
}
9741082
num_samples_remaining -= n;
@@ -984,6 +1092,8 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
9841092
src = audio_stream_get_rptr(mic_stream);
9851093
dst = audio_stream_get_wptr(out_stream);
9861094

1095+
/* Move out of loop */
1096+
int mic_stream_channels = audio_stream_get_channels(mic_stream);
9871097
frames = input_buffers[cd->raw_microphone_source].size;
9881098
num_frames_remaining = frames;
9891099

@@ -993,34 +1103,66 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
9931103
nmax = audio_stream_frames_without_wrap(out_stream, dst);
9941104
n = MIN(n, nmax);
9951105
for (i = 0; i < n; i++) {
996-
memcpy_s(&(cd->raw_mic_buffer[cd->raw_mic_buffer_frame_index *
997-
cd->num_capture_channels]),
998-
cd->num_frames * cd->num_capture_channels *
999-
sizeof(cd->raw_mic_buffer[0]), src,
1000-
sizeof(int16_t) * cd->num_capture_channels);
1001-
++cd->raw_mic_buffer_frame_index;
1002-
1003-
memcpy_s(dst, cd->num_frames * cd->num_capture_channels *
1004-
sizeof(cd->output_buffer[0]),
1005-
&(cd->output_buffer[cd->output_buffer_frame_index *
1006-
cd->num_capture_channels]),
1007-
sizeof(int16_t) * cd->num_capture_channels);
1008-
++cd->output_buffer_frame_index;
1009-
1010-
if (cd->raw_mic_buffer_frame_index == cd->num_frames) {
1011-
GoogleRtcAudioProcessingProcessCapture_int16(cd->state,
1012-
cd->raw_mic_buffer,
1013-
cd->output_buffer);
1014-
cd->output_buffer_frame_index = 0;
1015-
cd->raw_mic_buffer_frame_index = 0;
1106+
/* If we haven't filled the buffer yet, copy the data */
1107+
if (cd->raw_mic_buffer_frame_index < cd->num_frames) {
1108+
size_t num_bytes = sizeof(int16_t) * cd->num_capture_channels;
1109+
size_t buffer_size = sizeof(cd->raw_mic_buffer);
1110+
size_t frame_index = cd->raw_mic_buffer_frame_index;
1111+
size_t buffer_used = frame_index * sizeof(int16_t);
1112+
size_t buffer_remaining = buffer_size - buffer_used;
1113+
1114+
if (num_bytes <= buffer_remaining) {
1115+
int16_t *buffer_start = cd->raw_mic_buffer;
1116+
size_t offset = frame_index * cd->num_capture_channels;
1117+
1118+
buffer_start += offset;
1119+
memcpy(buffer_start, src, num_bytes);
1120+
++cd->raw_mic_buffer_frame_index;
1121+
} else {
1122+
/* The source data is too big to fit in the
1123+
* destination buffer.
1124+
*/
1125+
return -EINVAL;
1126+
}
10161127
}
10171128

1018-
src += audio_stream_get_channels(mic_stream);
1019-
dst += audio_stream_get_channels(out_stream);
1129+
if (cd->output_buffer_frame_index < cd->num_frames) {
1130+
size_t num_bytes = sizeof(int16_t) * cd->num_capture_channels;
1131+
size_t buffer_size = sizeof(cd->output_buffer);
1132+
size_t frame_index = cd->output_buffer_frame_index;
1133+
size_t buffer_used = frame_index * sizeof(int16_t);
1134+
size_t buffer_remaining = buffer_size - buffer_used;
1135+
1136+
if (num_bytes <= buffer_remaining) {
1137+
int16_t *output_start = cd->output_buffer;
1138+
size_t offset = frame_index * cd->num_capture_channels;
1139+
1140+
output_start += offset;
1141+
memcpy(dst, output_start, num_bytes);
1142+
++cd->output_buffer_frame_index;
1143+
} else {
1144+
/* The source data is too big to fit in the
1145+
* destination buffer.
1146+
*/
1147+
return -EINVAL;
1148+
}
1149+
}
1150+
1151+
src += mic_stream_channels;
1152+
dst += mic_stream_channels;
10201153
}
10211154
num_frames_remaining -= n;
10221155
src = audio_stream_wrap(mic_stream, src);
10231156
dst = audio_stream_wrap(out_stream, dst);
1157+
1158+
/* If we've filled the buffer, process the data */
1159+
if (cd->raw_mic_buffer_frame_index == cd->num_frames) {
1160+
GoogleRtcAudioProcessingProcessCapture_int16(cd->state,
1161+
cd->raw_mic_buffer,
1162+
cd->output_buffer);
1163+
cd->output_buffer_frame_index = 0;
1164+
cd->raw_mic_buffer_frame_index = 0;
1165+
}
10241166
}
10251167

10261168
module_update_buffer_position(&input_buffers[cd->raw_microphone_source],

0 commit comments

Comments
 (0)