From 73ee7760319afba8f5550820632f297cbc5251e8 Mon Sep 17 00:00:00 2001 From: Eloi Du Bois Date: Fri, 10 Apr 2020 14:22:44 -0500 Subject: [PATCH 1/3] AVX optimizations for the convertBuffer function --- CMakeLists.txt | 24 +++++++++++ RtAudio.cpp | 109 +++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 117 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1b1c506c..4ffb5cc6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,6 +64,30 @@ if (CMAKE_COMPILER_IS_GNUCXX) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") endif (CMAKE_COMPILER_IS_GNUCXX) +#default SIMD configuration uses AVX2 build flags +if(NOT DEFAULT_SIMD_FLAGS) + set(DEFAULT_SIMD_FLAGS "AVX2") +endif() + +SET(ENABLE_SIMD_FLAGS "${DEFAULT_SIMD_FLAGS}" CACHE STRING "Set compiler SIMD flags") +SET_PROPERTY(CACHE ENABLE_SIMD_FLAGS PROPERTY STRINGS none AVX2) + +#set up according to your own system environment +#Windows +if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" + OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC")) + if(${ENABLE_SIMD_FLAGS} MATCHES "AVX2") + add_definitions(/arch:AVX2 /D__AVX__) + message(STATUS "Enabling AVX2 instructions") + endif() +#Linux +else() + if(${ENABLE_SIMD_FLAGS} MATCHES "AVX") + add_definitions(-mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -mpopcnt -mavx -mavx2 -mfma) + message(STATUS "Enabling AVX2 instructions") + endif() +endif() + # Add debug flags if (CMAKE_BUILD_TYPE STREQUAL "Debug") add_definitions(-D__RTAUDIO_DEBUG__) diff --git a/RtAudio.cpp b/RtAudio.cpp index 44b1f79e..b0ab7a2b 100644 --- a/RtAudio.cpp +++ b/RtAudio.cpp @@ -49,6 +49,13 @@ #include #include +#if __SSE2__ +#include +#endif +#if __AVX__ +#include +#endif + // Static variable definitions. const unsigned int RtApi::MAX_SAMPLE_RATES = 14; const unsigned int RtApi::SAMPLE_RATES[] = { @@ -537,7 +544,7 @@ struct CoreHandle { bool internalDrain; // Indicates if stop is initiated from callback or not. CoreHandle() - :deviceBuffer(0), drainCounter(0), internalDrain(false) { nStreams[0] = 1; nStreams[1] = 1; id[0] = 0; id[1] = 0; xrun[0] = false; xrun[1] = false; } + :deviceBuffer(0), drainCounter(0), internalDrain(false) { iStream[0] = 0; iStream[1] = 0; nStreams[0] = 1; nStreams[1] = 1; id[0] = 0; id[1] = 0; xrun[0] = false; xrun[1] = false; } }; RtApiCore:: RtApiCore() @@ -10386,6 +10393,10 @@ void RtApi :: setConvertInfo( StreamMode mode, unsigned int firstChannel ) void RtApi :: convertBuffer( char *outBuffer, char *inBuffer, ConvertInfo &info ) { + static const float kBias[8] = {0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f}; + static const float kScale[8] = {32767.5f, 32767.5f, 32767.5f, 32767.5f, 32767.5f, 32767.5f, 32767.5f, 32767.5f}; + static const float kScaleI[8] = {1.0f / 32767.5f, 1.0f / 32767.5f, 1.0f / 32767.5f, 1.0f / 32767.5f, 1.0f / 32767.5f, 1.0f / 32767.5f, 1.0f / 32767.5f, 1.0f / 32767.5f}; + // This function does format conversion, input/output channel compensation, and // data interleaving/deinterleaving. 24-bit integers are assumed to occupy // the lower three bytes of a 32-bit integer. @@ -10395,6 +10406,12 @@ void RtApi :: convertBuffer( char *outBuffer, char *inBuffer, ConvertInfo &info ( stream_.nDeviceChannels[0] < stream_.nDeviceChannels[1] ) ) memset( outBuffer, 0, stream_.bufferSize * info.outJump * formatBytes( info.outFormat ) ); + if (info.outFormat == info.inFormat && info.channels == 1) + { + std::memcpy(outBuffer, inBuffer, stream_.bufferSize * formatBytes(info.outFormat)); + return; + } + int j; if (info.outFormat == RTAUDIO_FLOAT64) { Float64 scale; @@ -10493,15 +10510,44 @@ void RtApi :: convertBuffer( char *outBuffer, char *inBuffer, ConvertInfo &info } else if (info.inFormat == RTAUDIO_SINT16) { Int16 *in = (Int16 *)inBuffer; - scale = (Float32) ( 1.0 / 32767.5 ); - for (unsigned int i=0; i Date: Fri, 10 Apr 2020 14:34:53 -0500 Subject: [PATCH 2/3] minor fix --- RtAudio.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/RtAudio.cpp b/RtAudio.cpp index b0ab7a2b..c92bd2ef 100644 --- a/RtAudio.cpp +++ b/RtAudio.cpp @@ -10514,7 +10514,7 @@ void RtApi :: convertBuffer( char *outBuffer, char *inBuffer, ConvertInfo &info if (info.channels == 1) { #if __AVX__ - if (stream_.bufferSize % 8 == 0) + if (stream_.bufferSize >= 8 && stream_.bufferSize % 8 == 0) { __m256 _bias = _mm256_broadcast_ss(kBias); __m256 _scale = _mm256_broadcast_ss(kScaleI); @@ -10781,7 +10781,7 @@ void RtApi :: convertBuffer( char *outBuffer, char *inBuffer, ConvertInfo &info if (info.channels == 1) { #if __AVX__ - if (stream_.bufferSize % 8 == 0) + if (stream_.bufferSize >= 8 && stream_.bufferSize % 8 == 0) { __m256 _bias = _mm256_broadcast_ss(kBias); __m256 _scale = _mm256_broadcast_ss(kScale); From fd7ad3f1afb1727f5f60d3a3882d5cbaf138e445 Mon Sep 17 00:00:00 2001 From: Eloi Du Bois Date: Sat, 11 Apr 2020 00:06:05 -0500 Subject: [PATCH 3/3] compilation fix --- RtAudio.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/RtAudio.cpp b/RtAudio.cpp index c92bd2ef..bee826af 100644 --- a/RtAudio.cpp +++ b/RtAudio.cpp @@ -10393,9 +10393,11 @@ void RtApi :: setConvertInfo( StreamMode mode, unsigned int firstChannel ) void RtApi :: convertBuffer( char *outBuffer, char *inBuffer, ConvertInfo &info ) { +#ifdef __AVX__ static const float kBias[8] = {0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f}; static const float kScale[8] = {32767.5f, 32767.5f, 32767.5f, 32767.5f, 32767.5f, 32767.5f, 32767.5f, 32767.5f}; static const float kScaleI[8] = {1.0f / 32767.5f, 1.0f / 32767.5f, 1.0f / 32767.5f, 1.0f / 32767.5f, 1.0f / 32767.5f, 1.0f / 32767.5f, 1.0f / 32767.5f, 1.0f / 32767.5f}; +#endif // This function does format conversion, input/output channel compensation, and // data interleaving/deinterleaving. 24-bit integers are assumed to occupy