diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6b80b24..6bcdd0a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -94,25 +94,51 @@ else()
list(APPEND CMAKE_HOST_FLAGS "-O3;-march=native")
endif()
-# GPU arch targets
-set(TARGETS "gfx900;gfx906")
-if(HIP_VERSION VERSION_GREATER_EQUAL "3.7")
- set(TARGETS "${TARGETS};gfx908")
-endif()
-if(HIP_VERSION VERSION_GREATER_EQUAL "4.3")
- set(TARGETS "${TARGETS};gfx90a")
-endif()
-if (HIP_VERSION VERSION_GREATER_EQUAL "5.7")
- set(TARGETS "${TARGETS};gfx942")
-endif()
-if (HIP_VERSION VERSION_GREATER_EQUAL "6.5")
- set(TARGETS "${TARGETS};gfx950;gfx1100")
+set(ARCHS "") # use plural to indicate list
+if(DEFINED HPL_BUILD_ARCH AND NOT HPL_BUILD_ARCH STREQUAL "")
+ string(REPLACE "," ";" ARCHS "${HPL_BUILD_ARCH}")
+ list(TRANSFORM ARCHS STRIP)
+ list(REMOVE_DUPLICATES ARCHS)
+ message(STATUS "Using manually specified GPU targets: ${ARCHS}")
+else()
+ message(STATUS "Detecting available architecture")
+ ############ Find using rocminfo #####################
+ find_program(ROCMINFO_EXECUTABLE rocminfo)
+ if(ROCMINFO_EXECUTABLE)
+ execute_process(
+ COMMAND ${ROCMINFO_EXECUTABLE}
+ OUTPUT_VARIABLE ROCMINFO_OUTPUT
+ ERROR_QUIET
+ OUTPUT_STRIP_TRAILING_WHITESPACE
+ )
+
+ # 1) Only match lines where the token follows "Name:"
+ string(REGEX MATCHALL "Name:[ \t]+gfx[0-9a-z]+" ARCH_MATCHES "${ROCMINFO_OUTPUT}")
+
+ # 2) Strip the leading "Name: " to keep just gfx tokens
+ string(REGEX REPLACE "Name:[ \t]+" "" ARCHS "${ARCH_MATCHES}")
+
+ # 3) Remove duplicates
+ list(REMOVE_DUPLICATES ARCHS)
+
+ foreach(match ${ARCHS})
+ string(REGEX REPLACE "Name:\\s+" "" arch "${match}")
+ list(APPEND ARCH "${arch}")
+ endforeach()
+ endif()
endif()
if (HIP_VERSION VERSION_GREATER_EQUAL "7.0")
set(TARGETS "${TARGETS};gfx1201")
endif()
-foreach(target ${TARGETS})
+if(ARCHS STREQUAL "")
+ message(FATAL_ERROR "No GPU architectures detected via rocminfo and no BUILD_ARCH specified. Use ./install.sh --arch=gfxXXX")
+endif()
+
+message(STATUS "Building for GPU architecture: ${ARCHS}")
+
+# Generate HIP_HIPCC_FLAGS
+foreach(target ${ARCHS})
list(APPEND HIP_HIPCC_FLAGS "--offload-arch=${target}")
endforeach()
diff --git a/README.md b/README.md
index 2f7fb27..6152147 100644
--- a/README.md
+++ b/README.md
@@ -27,6 +27,7 @@ cd rocHPL
# --with-rocm=
- Path to ROCm install (Default: /opt/rocm)
# --with-rocblas= - Path to rocBLAS library (Default: /opt/rocm/rocblas)
# --with-mpi= - Path to external MPI install (Default: clone+build OpenMPI)
+# --arch="arch-list" - Specify comma separated architecture list to build (Default: detect from rocminfo)"
# --verbose-print - Verbose output during HPL setup (Default: true)
# --progress-report - Print progress report to terminal during HPL run (Default: true)
# --detailed-timing - Record detailed timers during HPL run (Default: true)
diff --git a/install.sh b/install.sh
index 2dfb4c5..fdcf9c6 100755
--- a/install.sh
+++ b/install.sh
@@ -17,6 +17,7 @@ function display_help()
echo " [--with-rocm=] Path to ROCm install (Default: /opt/rocm)"
echo " [--with-rocblas=] Path to rocBLAS library (Default: /opt/rocm/rocblas)"
echo " [--with-mpi=] Path to external MPI install (Default: clone+build OpenMPI)"
+ echo " [--arch] Specify comma separated architecture list to build (Default: detect from rocm_agent_enumerator)"
echo " [--with-mpi-gtl=] Path to external MPI-GTL install (Optional: defaults to no gtl support)"
echo " [--verbose-print] Verbose output during HPL setup (Default: true)"
echo " [--progress-report] Print progress report to terminal during HPL run (Default: true)"
@@ -232,7 +233,7 @@ enable_tracing=false
# check if we have a modern version of getopt that can handle whitespace and long parameters
getopt -T
if [[ $? -eq 4 ]]; then
- GETOPT_PARSE=$(getopt --name "${0}" --longoptions help,debug,prefix:,with-rocm:,with-mpi:,with-mpi-gtl:,with-rocblas:,verbose-print:,progress-report:,detailed-timing:,enable-tracing: --options hg -- "$@")
+ GETOPT_PARSE=$(getopt --name "${0}" --longoptions help,debug,prefix:,with-rocm:,with-mpi:,with-mpi-gtl:,with-rocblas:,verbose-print:,arch:,progress-report:,detailed-timing:,enable-tracing: --options hg -- "$@")
else
echo "Need a new version of getopt"
exit_with_error 1
@@ -263,6 +264,9 @@ while true; do
--with-mpi)
with_mpi=${2}
shift 2 ;;
+ --arch)
+ arch=${2}
+ shift 2 ;;
--with-mpi-gtl)
with_mpi_gtl=${2}
shift 2 ;;
@@ -316,6 +320,7 @@ pushd .
fi
+
# #################################################
# configure & build
# #################################################
@@ -333,7 +338,6 @@ pushd .
else
cmake_common_options="${cmake_common_options} -DCMAKE_BUILD_TYPE=Debug"
fi
-
shopt -s nocasematch
if [[ "${verbose_print}" == on || "${verbose_print}" == true || "${verbose_print}" == 1 || "${verbose_print}" == enabled ]]; then
cmake_common_options="${cmake_common_options} -DHPL_VERBOSE_PRINT=ON"
@@ -347,6 +351,9 @@ pushd .
if [[ "${enable_tracing}" == on || "${enable_tracing}" == true || "${enable_tracing}" == 1 || "${enable_tracing}" == enabled ]]; then
cmake_common_options="${cmake_common_options} -DHPL_TRACING=ON"
fi
+ if [[ -n "${arch}" ]]; then
+ cmake_common_options="${cmake_common_options} -DHPL_BUILD_ARCH=${arch}"
+ fi
shopt -u nocasematch
# Build library with AMD toolchain because of existence of device kernels