VX_RPP Audio support - HIP backend#1626
Conversation
…r nodes and removed memcpy related changes
Remove print statements
…udio_support_gpu
…udio_support_gpu
The unified API changes needs to accomodate the HIP backend for the audio augmentations. This commit adds the necessary changes to the Resample node and the audio augmentations in RPP to support the HIP backend.
…udio_support_gpu
rrawther
left a comment
There was a problem hiding this comment.
PLease move the low-level kernel implementations to RPP instead
Add support to use rppt API instead of using specific kernels
…in/MIVisionX into fg/audio_support_gpu
Review: VX_RPP Audio Support - HIP BackendThank you for this substantial contribution enabling GPU acceleration for audio operations in the AMD OpenVX RPP extension. Summary of ChangesThis PR adds HIP backend support to 9 audio processing operations that were previously CPU-only:
Key Technical Highlights1. Audio Tensor Dimension Handling 2. Memory Management Strategy
3. Resample Window LUT 4. Backend Selection Pattern 0 ""0 ""0 ""1 "/usr/include/stdc-predef.h" 1 3 40 "" 21 ""This cleanly consolidates the CPU/GPU paths and reduces code duplication. 5. Version Updates
API Change NoticeThe signature change (adding parameter) is a breaking change. Projects using this function will need updates. Consider documenting this in release notes. Suggestions
AssessmentNeeds Review - This is a significant feature addition with architectural changes. While the implementation looks solid, given the scope (9 operations, memory management changes, API modifications), this warrants thorough maintainer review and testing validation before merge. The HIP kernel implementations appear well-structured and the memory management patterns are consistent with existing GPU operations in the codebase. |
There was a problem hiding this comment.
Pull request overview
Copilot reviewed 14 out of 14 changed files in this pull request and generated 14 comments.
Comments suppressed due to low confidence (5)
amd_openvx_extensions/amd_rpp/source/tensor/Spectrogram.cpp:78
refreshSpectrogramuses roi_tensor_ptr_src/dst after the device-type switch, but those pointers are only set inside#if ENABLE_HIPin the GPU branch. With ENABLE_HIP=0 and GPU affinity, the pointers are uninitialized andupdateDstRoi()will dereference invalid memory. ReturnVX_ERROR_NOT_IMPLEMENTED(or handle OpenCL) when GPU is selected but HIP support is not compiled in.
void *roi_tensor_ptr_src, *roi_tensor_ptr_dst;
if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
#if ENABLE_HIP
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr_src, sizeof(roi_tensor_ptr_src)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[3], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr_dst, sizeof(roi_tensor_ptr_dst)));
#endif
} else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr_src, sizeof(roi_tensor_ptr_src)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[3], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr_dst, sizeof(roi_tensor_ptr_dst)));
}
updateDstRoi(data, reinterpret_cast<RpptROI *>(roi_tensor_ptr_src), reinterpret_cast<RpptROI *>(roi_tensor_ptr_dst));
return status;
amd_openvx_extensions/amd_rpp/source/tensor/PreemphasisFilter.cpp:57
- The GPU branch only populates roi_tensor_ptr_src under
#if ENABLE_HIP, butroi_tensor_ptr_srcis always dereferenced afterwards to computepSampleSize. In OpenCL-only builds (ENABLE_HIP=0) with GPU affinity this leaves the pointer uninitialized and can crash. ReturnVX_ERROR_NOT_IMPLEMENTED(or handle OpenCL) when GPU is selected but HIP is not compiled in.
void *roi_tensor_ptr_src;
STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[3], 0, data->pSrcDesc->n, sizeof(float), data->pPreemphCoeff, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
#if ENABLE_HIP
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr_src, sizeof(roi_tensor_ptr_src)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
#endif
}
if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr_src, sizeof(roi_tensor_ptr_src)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
}
RpptROI *src_roi = reinterpret_cast<RpptROI *>(roi_tensor_ptr_src);
for (int n = 0; n < data->inputTensorDims[0]; n++)
data->pSampleSize[n] = src_roi[n].xywhROI.roiWidth * src_roi[n].xywhROI.roiHeight;
amd_openvx_extensions/amd_rpp/source/tensor/NonSilentRegionDetection.cpp:60
- roi_tensor_ptr is only set inside
#if ENABLE_HIPfor the GPU path, but it is unconditionally dereferenced afterwards to fillpSrcLength. If ENABLE_HIP=0 (e.g., OpenCL build) and the node affinity is GPU, this will use an uninitialized pointer. Add an#elsereturn (likeVX_ERROR_NOT_IMPLEMENTED) inside the GPU branch when HIP isn’t enabled.
void *roi_tensor_ptr;
if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
#if ENABLE_HIP
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst1, sizeof(data->pDst1)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[3], VX_TENSOR_BUFFER_HIP, &data->pDst2, sizeof(data->pDst2)));
#endif
} else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst1, sizeof(data->pDst1)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[3], VX_TENSOR_BUFFER_HOST, &data->pDst2, sizeof(data->pDst2)));
}
unsigned *src_roi = static_cast<unsigned *>(roi_tensor_ptr);
for (unsigned i = 0, j = 0; i < data->inputTensorDims[0]; i++, j += 4)
data->pSrcLength[i] = src_roi[j + 2];
amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp:56
- In the GPU branch, roi_tensor_ptr_src is only set under
#if ENABLE_HIP, but it is always dereferenced afterwards to populatepSrcRoi. With ENABLE_HIP=0 and GPU affinity (e.g., OpenCL build), this can crash due to an uninitialized pointer. Add an#elsereturnVX_ERROR_NOT_IMPLEMENTED(or implement the OpenCL buffer path) inside the GPU branch.
void *roi_tensor_ptr_src;
if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
#if ENABLE_HIP
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr_src, sizeof(roi_tensor_ptr_src)));
#endif
} else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr_src, sizeof(roi_tensor_ptr_src)));
}
RpptROI *src_roi = reinterpret_cast<RpptROI *>(roi_tensor_ptr_src);
for (int n = 0; n < data->inputTensorDims[0]; n++) {
data->pSrcRoi[n * 2] = src_roi[n].xywhROI.roiWidth;
data->pSrcRoi[n * 2 + 1] = src_roi[n].xywhROI.roiHeight;
}
amd_openvx_extensions/amd_rpp/CMakeLists.txt:196
- The audio-support CMake condition mixes
ORandANDwithout parentheses. In CMake,ANDbinds tighter thanOR, so forrpp_VERSION_MAJOR > 3the check can enable audio support even whenRPP_AUDIO_AUGMENTATIONS_SUPPORT_FOUNDis false. Wrap the version comparison in parentheses so the support flag is required in all cases (i.e.,(version_ok) AND RPP_AUDIO_AUGMENTATIONS_SUPPORT_FOUND).
# Audio features for VX_RPP
if((${rpp_VERSION_MAJOR} VERSION_GREATER "3") OR (${rpp_VERSION_MAJOR} VERSION_EQUAL "3" AND ${rpp_VERSION_MINOR} VERSION_GREATER_EQUAL "1" AND ${rpp_VERSION_PATCH} VERSION_GREATER_EQUAL "1")
AND RPP_AUDIO_AUGMENTATIONS_SUPPORT_FOUND)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DAUDIO_SUPPORT=1")
message("-- ${White}vx_rpp Audio Features Included${ColourReset}")
target_compile_definitions(vx_rpp PUBLIC RPP_AUDIO=1)
else()
target_compile_definitions(vx_rpp PUBLIC RPP_AUDIO=0)
message("-- ${Yellow}vx_rpp Audio Features Excluded${ColourReset}")
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
rrawther
left a comment
There was a problem hiding this comment.
Please resolve all review comments
|
@rrawther @LakshmiKumar23 Addressed all review comments please check |
|
@kiritigowda : Can you please check this PR. It is ready to be merged from my side |
Motivation
Adds HIP backend support for audio processing operations in the AMD OpenVX RPP extension. It enables GPU acceleration for audio augmentation functions that were previously only available on CPU.
Technical Details
Test Plan
Test Result
Submission Checklist