Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions ci/rocm_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# ROCm GPU Test Script - based on coverage_test.sh

source $(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/utils.sh
init

# Set ROCm environment
export WITH_ROCM=ON
export WITH_TESTING=ON

mkdir -p ${PADDLE_ROOT}/build
cd ${PADDLE_ROOT}/build

echo "::group::Install dependencies"
pip install hypothesis 2>/dev/null || true
pip install -r ${PADDLE_ROOT}/python/unittest_py/requirements.txt 2>/dev/null || true
echo "::endgroup::"

echo "::group::Install paddle"
if ls ${PADDLE_ROOT}/build/python/dist/*whl >/dev/null 2>&1; then
pip install ${PADDLE_ROOT}/build/python/dist/*whl --force-reinstall
elif ls ${PADDLE_ROOT}/dist/*whl >/dev/null 2>&1; then
pip install ${PADDLE_ROOT}/dist/*whl --force-reinstall
fi
echo "::endgroup::"

# Copy test support files from source directory (not build directory)
# Note: coverage_test.sh has a bug copying empty file from build dir
cp ${PADDLE_ROOT}/test/legacy_test/testsuite.py ${PADDLE_ROOT}/build/test/legacy_test/ 2>/dev/null || true
cp ${PADDLE_ROOT}/test/legacy_test/testsuite.py ${PADDLE_ROOT}/build/python 2>/dev/null || true
cp -r ${PADDLE_ROOT}/build/test/white_list ${PADDLE_ROOT}/build/python 2>/dev/null || true

# Add source test directories to PYTHONPATH for module imports
export PYTHONPATH=${PADDLE_ROOT}/test:${PADDLE_ROOT}/test/legacy_test:${PYTHONPATH}

ut_total_startTime_s=`date +%s`

parallel_test_base_gpu_test

ut_total_endTime_s=`date +%s`
echo "TestCases Total Time: $[ $ut_total_endTime_s - $ut_total_startTime_s ]s"
echo "ipipe_log_param_TestCases_Total_Time: $[ $ut_total_endTime_s - $ut_total_startTime_s ]s" >> ${PADDLE_ROOT}/build/build_summary.txt

if [[ -f ${PADDLE_ROOT}/build/build_summary.txt ]];then
echo "=====================build summary======================"
cat ${PADDLE_ROOT}/build/build_summary.txt
echo "========================================================"
fi
28 changes: 27 additions & 1 deletion ci/utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -693,7 +693,12 @@ function card_test() {
if [ "${WITH_XPU}" == "ON" ];then
CUDA_DEVICE_COUNT=1
elif [ "${WITH_ROCM}" == "ON" ];then
CUDA_DEVICE_COUNT=$(rocm-smi -i | grep DCU | wc -l)
# Support both DCU (Hygon) and AMD ROCm GPUs
# Each GPU appears multiple times in -i output, count unique GPU IDs
CUDA_DEVICE_COUNT=$(rocm-smi -i 2>/dev/null | grep "^GPU\[" | cut -d']' -f1 | sort -u | wc -l || echo 0)
if [ "${CUDA_DEVICE_COUNT}" -eq 0 ]; then
CUDA_DEVICE_COUNT=1
fi
elif [ "${WITH_IPU}" == "ON" ];then
CUDA_DEVICE_COUNT=1
else
Expand Down Expand Up @@ -821,6 +826,27 @@ set +x
cp -r ${PADDLE_ROOT}/build/CTestCostData.txt ${PADDLE_ROOT}/build/Testing/Temporary/

get_quickly_disable_ut||disable_ut_quickly='disable_ut' # indicate whether the case was in quickly disable list

# ROCm: Disable tests not supported on ROCm platform
if [ "$WITH_ROCM" == "ON" ]; then
# OneDNN/MKL-DNN/cuDNN: Intel/NVIDIA specific
rocm_skip_tests="onednn|mkldnn|cudnn"
# BF16 kernels not registered for ROCm: argsort, mode, randperm
rocm_skip_tests="${rocm_skip_tests}|test_argsort_op|test_mode_op|test_randperm_op"
# FlashAttention not available (libflashattn.so missing)
rocm_skip_tests="${rocm_skip_tests}|test_scaled_dot_product_attention|test_compat_nn_multihead_attention"
# Graphviz dependency (dot command)
rocm_skip_tests="${rocm_skip_tests}|test_capture_backward_subgraph|test_capture_fwd_graph"
# c_embedding: distributed operator not registered
rocm_skip_tests="${rocm_skip_tests}|test_c_embedding_op"
if [ -n "$disable_ut_quickly" ] && [ "$disable_ut_quickly" != "disable_ut" ]; then
disable_ut_quickly="${disable_ut_quickly}|${rocm_skip_tests}"
else
disable_ut_quickly="${rocm_skip_tests}"
fi
echo "ROCm: Skipping OneDNN/MKL-DNN/cuDNN and ROCm-incompatible tests"
fi

test_cases=$(ctest -N -V) # get all test cases

if [ ${WITH_CINN:-OFF} == "ON" ]; then
Expand Down
4 changes: 2 additions & 2 deletions cmake/external/warpctc.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ if(WITH_ROCM)
set(WARPCTC_PATCH_ROCM_COMMAND
patch -p1 <
${PADDLE_SOURCE_DIR}/patches/warpctc/CMakeLists.txt.rocm.patch && patch
-p1 < ${PADDLE_SOURCE_DIR}/patches/warpctc/devicetypes.cuh.patch && patch
-p1 < ${PADDLE_SOURCE_DIR}/patches/warpctc/hip.cmake.patch)
-p1 < ${PADDLE_SOURCE_DIR}/patches/warpctc/devicetypes.cuh.patch && cp
${PADDLE_SOURCE_DIR}/patches/warpctc/hip.cmake.rocm70 cmake/hip.cmake)
endif()

set(WARPCTC_INCLUDE_DIR
Expand Down
3 changes: 2 additions & 1 deletion cmake/external/warprnnt.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ endif()
if(WITH_ROCM)
set(WARPRNNT_PATCH_ROCM_COMMAND
patch -p1 <
${PADDLE_SOURCE_DIR}/patches/warprnnt/CMakeLists.txt.rocm.patch)
${PADDLE_SOURCE_DIR}/patches/warprnnt/CMakeLists.txt.rocm.patch && cp
${PADDLE_SOURCE_DIR}/patches/warprnnt/hip.cmake.rocm70 cmake/hip.cmake)
endif()
if(NOT WIN32 AND WITH_GPU)
if(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0 AND ${CMAKE_CXX_COMPILER_VERSION}
Expand Down
66 changes: 44 additions & 22 deletions cmake/hip.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,37 @@ if(NOT DEFINED ENV{ROCM_PATH})
set(ROCM_PATH
"/opt/rocm"
CACHE PATH "Path to which ROCm has been installed")
set(HIP_PATH
${ROCM_PATH}/hip
CACHE PATH "Path to which HIP has been installed")
set(HIP_CLANG_PATH
${ROCM_PATH}/llvm/bin
CACHE PATH "Path to which clang has been installed")
else()
set(ROCM_PATH
$ENV{ROCM_PATH}
CACHE PATH "Path to which ROCm has been installed")
endif()

# ROCm 7.0+: HIP is now directly under ROCM_PATH, not in a separate hip subdirectory
# Check if we're using newer ROCm layout (7.0+) or older layout
if(EXISTS "${ROCM_PATH}/lib/cmake/hip/FindHIP.cmake")
# ROCm 7.0+ layout
set(HIP_PATH
${ROCM_PATH}
CACHE PATH "Path to which HIP has been installed")
set(CMAKE_MODULE_PATH "${ROCM_PATH}/lib/cmake/hip" ${CMAKE_MODULE_PATH})
elseif(EXISTS "${ROCM_PATH}/hip/cmake")
# Legacy ROCm layout (< 7.0)
set(HIP_PATH
${ROCM_PATH}/hip
CACHE PATH "Path to which HIP has been installed")
set(HIP_CLANG_PATH
${ROCM_PATH}/llvm/bin
CACHE PATH "Path to which clang has been installed")
set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
else()
# Fallback: assume ROCm 7.0+ layout
set(HIP_PATH
${ROCM_PATH}
CACHE PATH "Path to which HIP has been installed")
set(CMAKE_MODULE_PATH "${ROCM_PATH}/lib/cmake/hip" ${CMAKE_MODULE_PATH})
endif()
set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})

set(HIP_CLANG_PATH
${ROCM_PATH}/llvm/bin
CACHE PATH "Path to which clang has been installed")
set(CMAKE_PREFIX_PATH "${ROCM_PATH}" ${CMAKE_PREFIX_PATH})

find_package(HIP REQUIRED)
Expand Down Expand Up @@ -65,11 +78,23 @@ macro(find_hip_version hip_header_file)
)
endif()
endmacro()
find_hip_version(${HIP_PATH}/include/hip/hip_version.h)
# ROCm 7.0+: hip_version.h is directly under ROCM_PATH/include
if(EXISTS "${ROCM_PATH}/include/hip/hip_version.h")
find_hip_version(${ROCM_PATH}/include/hip/hip_version.h)
elseif(EXISTS "${HIP_PATH}/include/hip/hip_version.h")
find_hip_version(${HIP_PATH}/include/hip/hip_version.h)
else()
message(WARNING "Cannot find hip_version.h")
endif()

macro(find_package_and_include PACKAGE_NAME)
find_package("${PACKAGE_NAME}" REQUIRED)
include_directories("${ROCM_PATH}/${PACKAGE_NAME}/include")
# ROCm 7.0+ uses /opt/rocm/include/<package>/ instead of /opt/rocm/<package>/include/
if(EXISTS "${ROCM_PATH}/include/${PACKAGE_NAME}")
include_directories("${ROCM_PATH}/include/${PACKAGE_NAME}")
elseif(EXISTS "${ROCM_PATH}/${PACKAGE_NAME}/include")
include_directories("${ROCM_PATH}/${PACKAGE_NAME}/include")
endif()
message(STATUS "${PACKAGE_NAME} version: ${${PACKAGE_NAME}_VERSION}")
endmacro()

Expand All @@ -93,10 +118,10 @@ endif()

# set CXX flags for HIP
set(CMAKE_C_FLAGS
"${CMAKE_C_FLAGS} -D__HIP_PLATFORM_HCC__ -D__HIP_PLATFORM_AMD__ -DROCM_NO_WRAPPER_HEADER_WARNING"
"${CMAKE_C_FLAGS} -D__HIP_PLATFORM_HCC__ -D__HIP_PLATFORM_AMD__ -D__HIP__=1 -DROCM_NO_WRAPPER_HEADER_WARNING"
)
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_HCC__ -D__HIP_PLATFORM_AMD__ -DROCM_NO_WRAPPER_HEADER_WARNING"
"${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_HCC__ -D__HIP_PLATFORM_AMD__ -D__HIP__=1 -DROCM_NO_WRAPPER_HEADER_WARNING"
)
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP")
Expand All @@ -106,6 +131,7 @@ set(THRUST_DEVICE_SYSTEM THRUST_DEVICE_SYSTEM_HIP)
list(APPEND HIP_CXX_FLAGS -fPIC)
list(APPEND HIP_CXX_FLAGS -D__HIP_PLATFORM_HCC__=1)
list(APPEND HIP_CXX_FLAGS -D__HIP_PLATFORM_AMD__=1)
list(APPEND HIP_CXX_FLAGS -D__HIP__=1)
# Note(qili93): HIP has compile conflicts of float16.h as platform::float16 overload std::is_floating_point and std::is_integer
list(APPEND HIP_CXX_FLAGS -D__HIP_NO_HALF_CONVERSIONS__=1)
list(APPEND HIP_CXX_FLAGS -DROCM_NO_WRAPPER_HEADER_WARNING)
Expand Down Expand Up @@ -159,15 +185,11 @@ set(HIP_CLANG_FLAGS ${HIP_CXX_FLAGS})
# Ask hcc to generate device code during compilation so we can use
# host linker to link.
list(APPEND HIP_HCC_FLAGS -fno-gpu-rdc)
list(APPEND HIP_HCC_FLAGS --offload-arch=gfx906) # Z100 (ZIFANG)
list(APPEND HIP_HCC_FLAGS --offload-arch=gfx926) # K100 (KONGING)
list(APPEND HIP_HCC_FLAGS --offload-arch=gfx928) # K100_AI (KONGING_AI)
list(APPEND HIP_HCC_FLAGS --offload-arch=gfx936) # BW1000 (BOWEN)
list(APPEND HIP_HCC_FLAGS --offload-arch=gfx942) # MI300
list(APPEND HIP_HCC_FLAGS --offload-arch=gfx950) # MI350X
list(APPEND HIP_CLANG_FLAGS -fno-gpu-rdc)
list(APPEND HIP_CLANG_FLAGS --offload-arch=gfx906) # Z100 (ZIFANG)
list(APPEND HIP_CLANG_FLAGS --offload-arch=gfx926) # K100 (KONGING)
list(APPEND HIP_CLANG_FLAGS --offload-arch=gfx928) # K100_AI (KONGING_AI)
list(APPEND HIP_CLANG_FLAGS --offload-arch=gfx936) # BW1000 (BOWEN)
list(APPEND HIP_CLANG_FLAGS --offload-arch=gfx942) # MI300
list(APPEND HIP_CLANG_FLAGS --offload-arch=gfx950) # MI350X

if(HIP_COMPILER STREQUAL clang)
set(hip_library_name amdhip64)
Expand Down
27 changes: 21 additions & 6 deletions cmake/rccl.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,35 @@ if(WITH_RCCL)
set(RCCL_ROOT
${ROCM_PATH}/rccl
CACHE PATH "RCCL ROOT")
find_path(
RCCL_INCLUDE_DIR rccl.h
PATHS ${RCCL_ROOT} ${RCCL_ROOT}/include ${RCCL_ROOT}/local/include
$ENV{RCCL_ROOT} $ENV{RCCL_ROOT}/include $ENV{RCCL_ROOT}/local/include
# ROCm 7.0+: rccl.h is under include/rccl/ directory
# First try to find rccl.h directly (handles both old and new layouts)
find_file(
RCCL_HEADER_FILE rccl.h
PATHS ${ROCM_PATH}/include/rccl
${ROCM_PATH}/include
${RCCL_ROOT}
${RCCL_ROOT}/include
${RCCL_ROOT}/local/include
$ENV{RCCL_ROOT}
$ENV{RCCL_ROOT}/include
$ENV{RCCL_ROOT}/local/include
NO_DEFAULT_PATH)

file(READ ${RCCL_INCLUDE_DIR}/rccl.h RCCL_VERSION_FILE_CONTENTS)
if(NOT RCCL_HEADER_FILE)
message(FATAL_ERROR "Cannot find rccl.h. Please check RCCL installation.")
endif()

# Get the directory containing rccl.h
get_filename_component(RCCL_INCLUDE_DIR ${RCCL_HEADER_FILE} DIRECTORY)

file(READ ${RCCL_HEADER_FILE} RCCL_VERSION_FILE_CONTENTS)

string(REGEX MATCH "define NCCL_VERSION_CODE +([0-9]+)" RCCL_VERSION
"${RCCL_VERSION_FILE_CONTENTS}")
string(REGEX REPLACE "define NCCL_VERSION_CODE +([0-9]+)" "\\1" RCCL_VERSION
"${RCCL_VERSION}")

# 2604 for ROCM3.5 and 2708 for ROCM 3.9
message(STATUS "Current RCCL header is ${RCCL_INCLUDE_DIR}/rccl.h. "
message(STATUS "Current RCCL header is ${RCCL_HEADER_FILE}. "
"Current RCCL version is v${RCCL_VERSION}. ")
endif()
14 changes: 5 additions & 9 deletions cmake/third_party.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -406,15 +406,11 @@ endif()

list(APPEND third_party_deps extern_eigen3 extern_gflags extern_glog
extern_xxhash)
list(
APPEND
third_party_deps
extern_zlib
extern_dlpack
extern_warpctc
extern_warprnnt
extern_threadpool
extern_lapack)
list(APPEND third_party_deps extern_zlib extern_dlpack extern_threadpool
extern_lapack)
if(NOT WITH_ROCM)
list(APPEND third_party_deps extern_warpctc extern_warprnnt)
endif()

if(WITH_MAGMA)
list(APPEND third_party_deps extern_magma)
Expand Down
54 changes: 33 additions & 21 deletions cmake/thrust.cmake
Original file line number Diff line number Diff line change
@@ -1,26 +1,38 @@
function(add_thrust_patches_if_necessary)
set(thrust_detect_file ${PROJECT_BINARY_DIR}/detect_thrust.cu)
file(
WRITE ${thrust_detect_file}
""
"#include \"thrust/version.h\"\n"
"#include \"thrust/shuffle.h\"\n"
"#include \"stdio.h\"\n"
"int main() {\n"
" int version = THRUST_VERSION;\n"
" printf(\"%d\", version);\n"
" return 0;\n"
"}\n")
# ROCm 7.0+ has rocThrust with shuffle support built-in, so no patches needed
if(WITH_ROCM)
# Check if rocThrust has shuffle.h
if(EXISTS "${ROCM_PATH}/include/thrust/shuffle.h")
message(STATUS "ROCm thrust has native shuffle support, skipping patches")
return()
endif()
endif()

# For CUDA, check if thrust has shuffle support
if(WITH_GPU)
set(thrust_detect_file ${PROJECT_BINARY_DIR}/detect_thrust.cu)
file(
WRITE ${thrust_detect_file}
""
"#include \"thrust/version.h\"\n"
"#include \"thrust/shuffle.h\"\n"
"#include \"stdio.h\"\n"
"int main() {\n"
" int version = THRUST_VERSION;\n"
" printf(\"%d\", version);\n"
" return 0;\n"
"}\n")

execute_process(
COMMAND "${CUDA_NVCC_EXECUTABLE}" "--run" "${thrust_detect_file}"
WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/"
RESULT_VARIABLE nvcc_res
ERROR_QUIET)
if(NOT nvcc_res EQUAL 0)
set(thrust_patches "${PADDLE_SOURCE_DIR}/patches/thrust")
message(STATUS "Add thrust patches: ${thrust_patches}")
include_directories(${thrust_patches})
execute_process(
COMMAND "${CUDA_NVCC_EXECUTABLE}" "--run" "${thrust_detect_file}"
WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/"
RESULT_VARIABLE nvcc_res
ERROR_QUIET)
if(NOT nvcc_res EQUAL 0)
set(thrust_patches "${PADDLE_SOURCE_DIR}/patches/thrust")
message(STATUS "Add thrust patches: ${thrust_patches}")
include_directories(${thrust_patches})
endif()
endif()
endfunction()

Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/platform/enforce.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,12 @@ limitations under the License. */
#include <hiprand/hiprand.h>
#include <miopen/miopen.h>
#include <rocblas/rocblas.h>
// thrust headers require hipcc (rocThrust 7.0+ pulls in rocprim)
#ifdef __HIPCC__
#include <thrust/system/hip/error.h>
#include <thrust/system_error.h> // NOLINT
#endif
#endif

#include <fstream>
#include <iomanip>
Expand Down
3 changes: 2 additions & 1 deletion paddle/phi/api/lib/tensor_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ PADDLE_API phi::Place GetPlaceFromPtr(void* data) {
#else
hipPointerAttribute_t attr = {};
hipError_t status = hipPointerGetAttributes(&attr, data);
if (status == hipSuccess && attr.memoryType == hipMemoryTypeDevice) {
// ROCm 7.0+ uses 'type' instead of 'memoryType'
if (status == hipSuccess && attr.type == hipMemoryTypeDevice) {
return phi::GPUPlace(attr.device);
}
#endif
Expand Down
3 changes: 2 additions & 1 deletion paddle/phi/backends/dynload/magma.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ limitations under the License. */

#ifdef PADDLE_WITH_HIP
#include <hip/hip_complex.h>
#include <thrust/complex.h>
// Note: thrust/complex.h is not needed for magma type definitions
// and should only be included when compiled with hipcc
typedef hipDoubleComplex magmaDoubleComplex;
typedef hipFloatComplex magmaFloatComplex;
#endif // PADDLE_WITH_HIP
Expand Down
Loading
Loading