diff --git a/build2cmake/src/templates/cuda/kernel.cmake b/build2cmake/src/templates/cuda/kernel.cmake index 8205c959..c91e065c 100644 --- a/build2cmake/src/templates/cuda/kernel.cmake +++ b/build2cmake/src/templates/cuda/kernel.cmake @@ -49,7 +49,21 @@ if(GPU_LANG STREQUAL "CUDA") list(APPEND SRC {{'"${' + kernel_name + '_SRC}"'}}) {% if supports_hipify %} elseif(GPU_LANG STREQUAL "HIP") - hip_archs_loose_intersection({{kernel_name}}_ARCHS "{{ rocm_archs|join(";") }}" ${ROCM_ARCHS}) + hip_archs_loose_intersection({{kernel_name}}_ARCHS "{{ rocm_archs|join(";") }}" "${ROCM_ARCHS}") + message(STATUS "Archs for kernel {{kernel_name}}: {{ '${' + kernel_name + '_ARCHS}'}}") + + foreach(_KERNEL_SRC {{'${' + kernel_name + '_SRC}'}}) + if(_KERNEL_SRC MATCHES ".*\\.(cu|hip)$") + foreach(_ROCM_ARCH {{ '${' + kernel_name + '_ARCHS}'}}) + set_property( + SOURCE ${_KERNEL_SRC} + APPEND PROPERTY + COMPILE_OPTIONS "$<$:--offload-arch=${_ROCM_ARCH}>" + ) + endforeach() + endif() + endforeach() + list(APPEND SRC {{'"${' + kernel_name + '_SRC}"'}}) {% endif %} endif() diff --git a/build2cmake/src/templates/cuda/preamble.cmake b/build2cmake/src/templates/cuda/preamble.cmake index 4abea735..78becb9a 100644 --- a/build2cmake/src/templates/cuda/preamble.cmake +++ b/build2cmake/src/templates/cuda/preamble.cmake @@ -11,7 +11,7 @@ message(STATUS "FetchContent base directory: ${FETCHCONTENT_BASE_DIR}") set(CUDA_SUPPORTED_ARCHS "{{ cuda_supported_archs }}") -set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101") +set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1200;gfx1201") include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake) @@ -85,11 +85,9 @@ if(GPU_LANG STREQUAL "CUDA") add_compile_definitions(CUDA_KERNEL) elseif(GPU_LANG STREQUAL "HIP") - set(ROCM_ARCHS "${HIP_SUPPORTED_ARCHS}") - # TODO: remove this once we can set specific archs per source file set. - override_gpu_arches(GPU_ARCHES - ${GPU_LANG} - "${${GPU_LANG}_SUPPORTED_ARCHS}") + override_gpu_arches(GPU_ARCHES HIP ${HIP_SUPPORTED_ARCHS}) + set(ROCM_ARCHS ${GPU_ARCHES}) + message(STATUS "ROCM supported target architectures: ${ROCM_ARCHS}") add_compile_definitions(ROCM_KERNEL) else() diff --git a/build2cmake/src/templates/utils.cmake b/build2cmake/src/templates/utils.cmake index dc522755..d4d4cef2 100644 --- a/build2cmake/src/templates/utils.cmake +++ b/build2cmake/src/templates/utils.cmake @@ -74,6 +74,7 @@ function (hipify_sources_target OUT_SRCS NAME ORIG_SRCS) set(HIP_SRCS) foreach (SRC ${SRCS}) get_source_file_property(include_dirs "${SRC}" INCLUDE_DIRECTORIES) + get_source_file_property(compile_options "${SRC}" COMPILE_OPTIONS) string(REGEX REPLACE "\.cu$" "\.hip" SRC ${SRC}) string(REGEX REPLACE "cuda" "hip" SRC ${SRC}) @@ -84,6 +85,12 @@ function (hipify_sources_target OUT_SRCS NAME ORIG_SRCS) PROPERTIES INCLUDE_DIRECTORIES "${include_dirs}") endif() + if(compile_options) + set_source_files_properties( + ${SRC} + PROPERTIES COMPILE_OPTIONS "${compile_options}") + endif() + list(APPEND HIP_SRCS "${CMAKE_CURRENT_BINARY_DIR}/${SRC}") endforeach() @@ -516,8 +523,13 @@ function (define_gpu_extension_target GPU_MOD_NAME) endif() if (GPU_ARCHITECTURES) - set_target_properties(${GPU_MOD_NAME} PROPERTIES - ${GPU_LANGUAGE}_ARCHITECTURES "${GPU_ARCHITECTURES}") + if (GPU_LANGUAGE STREQUAL "HIP") + # Clear target architectures, we are passing arch flags per source file. + set_property(TARGET ${GPU_MOD_NAME} PROPERTY HIP_ARCHITECTURES off) + else() + set_target_properties(${GPU_MOD_NAME} PROPERTIES + ${GPU_LANGUAGE}_ARCHITECTURES "${GPU_ARCHITECTURES}") + endif() endif() set_property(TARGET ${GPU_MOD_NAME} PROPERTY CXX_STANDARD 17)