From 27e5e555e404cda606bc709a721206d3b042aad5 Mon Sep 17 00:00:00 2001 From: Mariya Podchishchaeva Date: Tue, 21 Mar 2023 14:49:34 +0100 Subject: [PATCH 01/44] [clang][SYCL] Fix LIT test for Windows (#8710) Due to incorrect substitution wrong command line was formed on Windows. --- clang/test/CodeGenSYCL/fpga-attr-do-while-loops.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/CodeGenSYCL/fpga-attr-do-while-loops.cpp b/clang/test/CodeGenSYCL/fpga-attr-do-while-loops.cpp index ae8dc07ae5006..91414bcfe87ea 100644 --- a/clang/test/CodeGenSYCL/fpga-attr-do-while-loops.cpp +++ b/clang/test/CodeGenSYCL/fpga-attr-do-while-loops.cpp @@ -1,4 +1,4 @@ -// RUN: %clang++ -fsycl-device-only -fintelfpga -S %s -o - | FileCheck %s +// RUN: %clang -fsycl-device-only -fintelfpga -S %s -o - | FileCheck %s #include "Inputs/sycl.hpp" From 7c7efeee209205657283f1d054c8d031589aa46e Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Tue, 21 Mar 2023 14:38:58 +0000 Subject: [PATCH 02/44] [SYCL][Reduction] Make reducer uncopyable and immovable (#8654) According to the SYCL 2020 specification, the reducer class should be neither moveable nor copyable. This commit deletes these constructors and assignment operators. Fixes https://github.com/intel/llvm/issues/6065 --------- Signed-off-by: Larsen, Steffen --- sycl/include/sycl/reduction.hpp | 56 ++++- .../reduction/reducer_copy_move.cpp | 79 +++++++ .../reduction/reducer_copy_move_negative.cpp | 221 ++++++++++++++++++ 3 files changed, 354 insertions(+), 2 deletions(-) create mode 100644 sycl/test/basic_tests/reduction/reducer_copy_move.cpp create mode 100644 sycl/test/basic_tests/reduction/reducer_copy_move_negative.cpp diff --git a/sycl/include/sycl/reduction.hpp b/sycl/include/sycl/reduction.hpp index 94fa6fadae41a..47dc36c3e0bc6 100644 --- a/sycl/include/sycl/reduction.hpp +++ b/sycl/include/sycl/reduction.hpp @@ -422,6 +422,13 @@ class ReductionIdentityContainer< static constexpr bool has_identity = false; }; +// Token class to help with the in-place construction of reducers. +template +struct ReducerToken { + const IdentityContainerT &IdentityContainer; + const BinaryOperation BOp; +}; + } // namespace detail /// Specialization of the generic class 'reducer'. It is used for reductions @@ -458,6 +465,14 @@ class reducer< reducer(const IdentityContainerT &IdentityContainer, BinaryOperation BOp) : MValue(GetInitialValue(IdentityContainer)), MIdentity(IdentityContainer), MBinaryOp(BOp) {} + reducer( + const detail::ReducerToken &Token) + : reducer(Token.IdentityContainer, Token.BOp) {} + + reducer(const reducer &) = delete; + reducer(reducer &&) = delete; + reducer &operator=(const reducer &) = delete; + reducer &operator=(reducer &&) = delete; reducer &combine(const T &Partial) { if constexpr (has_identity) @@ -515,6 +530,14 @@ class reducer< reducer() : MValue(getIdentity()) {} reducer(const IdentityContainerT & /* Identity */, BinaryOperation) : MValue(getIdentity()) {} + reducer( + const detail::ReducerToken &Token) + : reducer(Token.IdentityContainer, Token.BOp) {} + + reducer(const reducer &) = delete; + reducer(reducer &&) = delete; + reducer &operator=(const reducer &) = delete; + reducer &operator=(reducer &&) = delete; reducer &combine(const T &Partial) { BinaryOperation BOp; @@ -553,6 +576,14 @@ class reducer &Token) + : reducer(Token.IdentityContainer, Token.BOp) {} + + reducer(const reducer &) = delete; + reducer(reducer &&) = delete; + reducer &operator=(const reducer &) = delete; + reducer &operator=(reducer &&) = delete; reducer &combine(const T &Partial) { if constexpr (has_identity) @@ -599,6 +630,14 @@ class reducer< reducer(const IdentityContainerT &IdentityContainer, BinaryOperation BOp) : MValue(GetInitialValue(IdentityContainer)), MIdentity(IdentityContainer), MBinaryOp(BOp) {} + reducer( + const detail::ReducerToken &Token) + : reducer(Token.IdentityContainer, Token.BOp) {} + + reducer(const reducer &) = delete; + reducer(reducer &&) = delete; + reducer &operator=(const reducer &) = delete; + reducer &operator=(reducer &&) = delete; reducer operator[](size_t Index) { @@ -650,6 +689,14 @@ class reducer< reducer() : MValue(getIdentity()) {} reducer(const IdentityContainerT & /* Identity */, BinaryOperation) : MValue(getIdentity()) {} + reducer( + const detail::ReducerToken &Token) + : reducer(Token.IdentityContainer, Token.BOp) {} + + reducer(const reducer &) = delete; + reducer(reducer &&) = delete; + reducer &operator=(const reducer &) = delete; + reducer &operator=(reducer &&) = delete; // SYCL 2020 revision 4 says this should be const, but this is a bug // see https://github.com/KhronosGroup/SYCL-Docs/pull/252 @@ -746,6 +793,8 @@ class reduction_impl_algo { using identity_container_type = ReductionIdentityContainer; + using reducer_token_type = + detail::ReducerToken; using reducer_type = reducer; using result_type = T; @@ -2062,8 +2111,11 @@ void reduCGFuncMulti(handler &CGH, KernelType KernelFunc, // Pass all reductions to user's lambda in the same order as supplied // Each reducer initializes its own storage auto ReduIndices = std::index_sequence_for(); - auto ReducersTuple = std::tuple{typename Reductions::reducer_type{ - std::get(IdentitiesTuple), std::get(BOPsTuple)}...}; + auto ReducerTokensTuple = + std::tuple{typename Reductions::reducer_token_type{ + std::get(IdentitiesTuple), std::get(BOPsTuple)}...}; + auto ReducersTuple = std::tuple{ + std::get(ReducerTokensTuple)...}; std::apply([&](auto &...Reducers) { KernelFunc(NDIt, Reducers...); }, ReducersTuple); diff --git a/sycl/test/basic_tests/reduction/reducer_copy_move.cpp b/sycl/test/basic_tests/reduction/reducer_copy_move.cpp new file mode 100644 index 0000000000000..707a880f51979 --- /dev/null +++ b/sycl/test/basic_tests/reduction/reducer_copy_move.cpp @@ -0,0 +1,79 @@ +// RUN: %clangxx -fsycl -fsyntax-only %s + +// Tests that the reducer class is neither movable nor copyable. + +#include + +#include + +template struct PlusWithoutIdentity { + T operator()(const T &A, const T &B) const { return A + B; } +}; + +template static constexpr void checkReducer() { + static_assert(!std::is_copy_constructible_v); + static_assert(!std::is_move_constructible_v); + static_assert(!std::is_copy_assignable_v); + static_assert(!std::is_move_assignable_v); +} + +int main() { + sycl::queue Q; + + int *ScalarMem = sycl::malloc_shared(1, Q); + int *SpanMem = sycl::malloc_shared(8, Q); + auto ScalarRed1 = sycl::reduction(ScalarMem, std::plus{}); + auto ScalarRed2 = sycl::reduction(ScalarMem, PlusWithoutIdentity{}); + auto SpanRed1 = + sycl::reduction(sycl::span{SpanMem, 8}, std::plus{}); + auto SpanRed2 = sycl::reduction(sycl::span{SpanMem, 8}, + PlusWithoutIdentity{}); + + Q.parallel_for(sycl::range<1>{1024}, ScalarRed1, + [=](sycl::item<1>, auto &Reducer) { + checkReducer>(); + }); + + Q.parallel_for(sycl::nd_range<1>{1024, 1024}, ScalarRed1, + [=](sycl::nd_item<1>, auto &Reducer) { + checkReducer>(); + }); + + Q.parallel_for(sycl::range<1>{1024}, ScalarRed2, + [=](sycl::item<1>, auto &Reducer) { + checkReducer>(); + }); + + Q.parallel_for(sycl::nd_range<1>{1024, 1024}, ScalarRed2, + [=](sycl::nd_item<1>, auto &Reducer) { + checkReducer>(); + }); + + Q.parallel_for( + sycl::range<1>{1024}, SpanRed1, [=](sycl::item<1>, auto &Reducer) { + checkReducer>(); + checkReducer>(); + }); + + Q.parallel_for( + sycl::nd_range<1>{1024, 1024}, SpanRed1, + [=](sycl::nd_item<1>, auto &Reducer) { + checkReducer>(); + checkReducer>(); + }); + + Q.parallel_for( + sycl::range<1>{1024}, SpanRed2, [=](sycl::item<1>, auto &Reducer) { + checkReducer>(); + checkReducer>(); + }); + + Q.parallel_for( + sycl::nd_range<1>{1024, 1024}, SpanRed2, + [=](sycl::nd_item<1>, auto &Reducer) { + checkReducer>(); + checkReducer>(); + }); + + return 0; +} diff --git a/sycl/test/basic_tests/reduction/reducer_copy_move_negative.cpp b/sycl/test/basic_tests/reduction/reducer_copy_move_negative.cpp new file mode 100644 index 0000000000000..32b1d08e50f53 --- /dev/null +++ b/sycl/test/basic_tests/reduction/reducer_copy_move_negative.cpp @@ -0,0 +1,221 @@ +// RUN: %clangxx -fsycl -fsyntax-only -ferror-limit=0 -Xclang -verify %s -Xclang -verify-ignore-unexpected=note + +// Tests the errors emitted from using the deleted copy and move assignment +// operators and constructors. + +#include + +#include + +template struct PlusWithoutIdentity { + T operator()(const T &A, const T &B) const { return A + B; } +}; + +int main() { + sycl::queue Q; + + int *ScalarMem = sycl::malloc_shared(1, Q); + int *SpanMem = sycl::malloc_shared(8, Q); + auto ScalarRed1 = sycl::reduction(ScalarMem, std::plus{}); + auto ScalarRed2 = sycl::reduction(ScalarMem, PlusWithoutIdentity{}); + auto SpanRed1 = + sycl::reduction(sycl::span{SpanMem, 8}, std::plus{}); + auto SpanRed2 = sycl::reduction(sycl::span{SpanMem, 8}, + PlusWithoutIdentity{}); + + // expected-error-re@sycl/reduction.hpp:* {{call to deleted constructor of 'sycl::reducer, 0{{.*}}>'}} + Q.parallel_for(sycl::range<1>{1024}, ScalarRed1, + [=](sycl::item<1>, auto Reducer) {}); + + // expected-error-re@sycl/reduction.hpp:* {{call to deleted constructor of 'sycl::reducer, 0{{.*}}>'}} + Q.parallel_for(sycl::nd_range<1>{1024, 1024}, ScalarRed1, + [=](sycl::nd_item<1>, auto Reducer) {}); + + // expected-error-re@sycl/reduction.hpp:* {{call to deleted constructor of 'sycl::reducer, 0{{.*}}>'}} + Q.parallel_for(sycl::range<1>{1024}, ScalarRed2, + [=](sycl::item<1>, auto Reducer) {}); + + // expected-error-re@sycl/reduction.hpp:* {{call to deleted constructor of 'sycl::reducer, 0{{.*}}>'}} + Q.parallel_for(sycl::nd_range<1>{1024, 1024}, ScalarRed2, + [=](sycl::nd_item<1>, auto Reducer) {}); + + // expected-error-re@sycl/reduction.hpp:* {{call to deleted constructor of 'sycl::reducer, 1{{.*}}>'}} + Q.parallel_for(sycl::range<1>{1024}, SpanRed1, + [=](sycl::item<1>, auto Reducer) {}); + + // expected-error-re@sycl/reduction.hpp:* {{call to deleted constructor of 'sycl::reducer, 1{{.*}}>'}} + Q.parallel_for(sycl::nd_range<1>{1024, 1024}, SpanRed1, + [=](sycl::nd_item<1>, auto Reducer) {}); + + // expected-error-re@sycl/reduction.hpp:* {{call to deleted constructor of 'sycl::reducer, 1{{.*}}>'}} + Q.parallel_for(sycl::range<1>{1024}, SpanRed2, + [=](sycl::item<1>, auto Reducer) {}); + + // expected-error-re@sycl/reduction.hpp:* {{call to deleted constructor of 'sycl::reducer, 1{{.*}}>'}} + Q.parallel_for(sycl::nd_range<1>{1024, 1024}, SpanRed2, + [=](sycl::nd_item<1>, auto Reducer) {}); + + Q.parallel_for(sycl::range<1>{1024}, ScalarRed1, + [=](sycl::item<1>, auto &Reducer) { + using reducer_t = std::remove_reference_t; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_t ReducerCopyAssign = Reducer; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_t ReducerMoveAssign = std::move(Reducer); + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_t ReducerCopyCtor{Reducer}; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_t ReducerMoveCtor{std::move(Reducer)}; + }); + + Q.parallel_for(sycl::nd_range<1>{1024, 1024}, ScalarRed1, + [=](sycl::nd_item<1>, auto &Reducer) { + using reducer_t = std::remove_reference_t; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_t ReducerCopyAssign = Reducer; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_t ReducerMoveAssign = std::move(Reducer); + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_t ReducerCopyCtor{Reducer}; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_t ReducerMoveCtor{std::move(Reducer)}; + }); + + Q.parallel_for(sycl::range<1>{1024}, ScalarRed2, + [=](sycl::item<1>, auto &Reducer) { + using reducer_t = std::remove_reference_t; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_t ReducerCopyAssign = Reducer; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_t ReducerMoveAssign = std::move(Reducer); + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_t ReducerCopyCtor{Reducer}; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_t ReducerMoveCtor{std::move(Reducer)}; + }); + + Q.parallel_for(sycl::nd_range<1>{1024, 1024}, ScalarRed2, + [=](sycl::nd_item<1>, auto &Reducer) { + using reducer_t = std::remove_reference_t; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_t ReducerCopyAssign = Reducer; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_t ReducerMoveAssign = std::move(Reducer); + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_t ReducerCopyCtor{Reducer}; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_t ReducerMoveCtor{std::move(Reducer)}; + }); + + Q.parallel_for( + sycl::range<1>{1024}, SpanRed1, [=](sycl::item<1>, auto &Reducer) { + using reducer_t = std::remove_reference_t; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 1{{.*}}>')}} + reducer_t ReducerCopyAssign = Reducer; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 1{{.*}}>')}} + reducer_t ReducerMoveAssign = std::move(Reducer); + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 1{{.*}}>')}} + reducer_t ReducerCopyCtor{Reducer}; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 1{{.*}}>')}} + reducer_t ReducerMoveCtor{std::move(Reducer)}; + + using reducer_subscript_t = + std::remove_reference_t; + reducer_subscript_t ReducerSubscript = Reducer[0]; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_subscript_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_subscript_t ReducerSubscriptCopyAssign = ReducerSubscript; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_subscript_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_subscript_t ReducerSubscriptMoveAssign = + std::move(ReducerSubscript); + // expected-error-re@+1 {{call to deleted constructor of 'reducer_subscript_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_subscript_t ReducerSubscriptCopyCtor{ReducerSubscript}; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_subscript_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_subscript_t ReducerSubscriptMoveCtor{ + std::move(ReducerSubscript)}; + }); + + Q.parallel_for( + sycl::nd_range<1>{1024, 1024}, SpanRed1, + [=](sycl::nd_item<1>, auto &Reducer) { + using reducer_t = std::remove_reference_t; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 1{{.*}}>')}} + reducer_t ReducerCopyAssign = Reducer; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 1{{.*}}>')}} + reducer_t ReducerMoveAssign = std::move(Reducer); + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 1{{.*}}>')}} + reducer_t ReducerCopyCtor{Reducer}; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 1{{.*}}>')}} + reducer_t ReducerMoveCtor{std::move(Reducer)}; + + using reducer_subscript_t = + std::remove_reference_t; + reducer_subscript_t ReducerSubscript = Reducer[0]; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_subscript_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_subscript_t ReducerSubscriptCopyAssign = ReducerSubscript; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_subscript_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_subscript_t ReducerSubscriptMoveAssign = + std::move(ReducerSubscript); + // expected-error-re@+1 {{call to deleted constructor of 'reducer_subscript_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_subscript_t ReducerSubscriptCopyCtor{ReducerSubscript}; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_subscript_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_subscript_t ReducerSubscriptMoveCtor{ + std::move(ReducerSubscript)}; + }); + + Q.parallel_for( + sycl::range<1>{1024}, SpanRed2, [=](sycl::item<1>, auto &Reducer) { + using reducer_t = std::remove_reference_t; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 1{{.*}}>')}} + reducer_t ReducerCopyAssign = Reducer; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 1{{.*}}>')}} + reducer_t ReducerMoveAssign = std::move(Reducer); + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 1{{.*}}>')}} + reducer_t ReducerCopyCtor{Reducer}; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 1{{.*}}>')}} + reducer_t ReducerMoveCtor{std::move(Reducer)}; + + using reducer_subscript_t = + std::remove_reference_t; + reducer_subscript_t ReducerSubscript = Reducer[0]; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_subscript_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_subscript_t ReducerSubscriptCopyAssign = ReducerSubscript; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_subscript_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_subscript_t ReducerSubscriptMoveAssign = + std::move(ReducerSubscript); + // expected-error-re@+1 {{call to deleted constructor of 'reducer_subscript_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_subscript_t ReducerSubscriptCopyCtor{ReducerSubscript}; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_subscript_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_subscript_t ReducerSubscriptMoveCtor{ + std::move(ReducerSubscript)}; + }); + + Q.parallel_for( + sycl::nd_range<1>{1024, 1024}, SpanRed2, + [=](sycl::nd_item<1>, auto &Reducer) { + using reducer_t = std::remove_reference_t; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 1{{.*}}>')}} + reducer_t ReducerCopyAssign = Reducer; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 1{{.*}}>')}} + reducer_t ReducerMoveAssign = std::move(Reducer); + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 1{{.*}}>')}} + reducer_t ReducerCopyCtor{Reducer}; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_t' (aka 'sycl::reducer, 1{{.*}}>')}} + reducer_t ReducerMoveCtor{std::move(Reducer)}; + + using reducer_subscript_t = + std::remove_reference_t; + reducer_subscript_t ReducerSubscript = Reducer[0]; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_subscript_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_subscript_t ReducerSubscriptCopyAssign = ReducerSubscript; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_subscript_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_subscript_t ReducerSubscriptMoveAssign = + std::move(ReducerSubscript); + // expected-error-re@+1 {{call to deleted constructor of 'reducer_subscript_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_subscript_t ReducerSubscriptCopyCtor{ReducerSubscript}; + // expected-error-re@+1 {{call to deleted constructor of 'reducer_subscript_t' (aka 'sycl::reducer, 0{{.*}}>')}} + reducer_subscript_t ReducerSubscriptMoveCtor{ + std::move(ReducerSubscript)}; + }); + + return 0; +} From c5c7ac2ea55b23c019120885174a6a10f1506282 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Tue, 21 Mar 2023 15:53:14 +0100 Subject: [PATCH 03/44] [SYCL] Add marray support to common + some math functions (#8631) This patch adds marray support to all functions from Table 179 of SYCL 2020 spec + to functions fabs, ilogb, fmax, fmin, ldexp, pown, rootn from Table 175 + to function exp10 from Table 177. E2E tests: https://github.com/intel/llvm-test-suite/pull/1656 --------- Co-authored-by: KornevNikita --- sycl/include/sycl/builtins.hpp | 182 +++++++++++++++++++++++++++++++++ 1 file changed, 182 insertions(+) diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp index fe6eeaaec2e4e..9ff111c14583a 100644 --- a/sycl/include/sycl/builtins.hpp +++ b/sycl/include/sycl/builtins.hpp @@ -122,10 +122,27 @@ __SYCL_MATH_FUNCTION_OVERLOAD_FM(log2) __SYCL_MATH_FUNCTION_OVERLOAD_FM(log10) __SYCL_MATH_FUNCTION_OVERLOAD_FM(sqrt) __SYCL_MATH_FUNCTION_OVERLOAD_FM(rsqrt) +__SYCL_MATH_FUNCTION_OVERLOAD_FM(fabs) #undef __SYCL_MATH_FUNCTION_OVERLOAD_FM #undef __SYCL_MATH_FUNCTION_OVERLOAD_IMPL +template +inline __SYCL_ALWAYS_INLINE + std::enable_if_t::value, marray> + ilogb(marray x) __NOEXC { + marray res; + for (size_t i = 0; i < N / 2; i++) { + vec partial_res = + __sycl_std::__invoke_ilogb>(detail::to_vec2(x, i * 2)); + std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); + } + if (N % 2) { + res[N - 1] = __sycl_std::__invoke_ilogb(x[N - 1]); + } + return res; +} + #define __SYCL_MATH_FUNCTION_2_OVERLOAD_IMPL(NAME) \ marray res; \ for (size_t i = 0; i < N / 2; i++) { \ @@ -170,6 +187,98 @@ inline __SYCL_ALWAYS_INLINE #undef __SYCL_MATH_FUNCTION_2_OVERLOAD_IMPL +#define __SYCL_MATH_FUNCTION_2_SGENFLOAT_Y_OVERLOAD(NAME) \ + template \ + inline __SYCL_ALWAYS_INLINE \ + std::enable_if_t::value, marray> \ + NAME(marray x, T y) __NOEXC { \ + marray res; \ + sycl::vec y_vec{y, y}; \ + for (size_t i = 0; i < N / 2; i++) { \ + auto partial_res = __sycl_std::__invoke_##NAME>( \ + detail::to_vec2(x, i * 2), y_vec); \ + std::memcpy(&res[i * 2], &partial_res, sizeof(vec)); \ + } \ + if (N % 2) { \ + res[N - 1] = __sycl_std::__invoke_##NAME(x[N - 1], y_vec[0]); \ + } \ + return res; \ + } + +__SYCL_MATH_FUNCTION_2_SGENFLOAT_Y_OVERLOAD(fmax) + // clang-format off +__SYCL_MATH_FUNCTION_2_SGENFLOAT_Y_OVERLOAD(fmin) + +#undef __SYCL_MATH_FUNCTION_2_SGENFLOAT_Y_OVERLOAD + +template +inline __SYCL_ALWAYS_INLINE + std::enable_if_t::value, marray> + ldexp(marray x, marray k) __NOEXC { + // clang-format on + marray res; + for (size_t i = 0; i < N; i++) { + res[i] = __sycl_std::__invoke_ldexp(x[i], k[i]); + } + return res; +} + +template +inline __SYCL_ALWAYS_INLINE + std::enable_if_t::value, marray> + ldexp(marray x, int k) __NOEXC { + marray res; + for (size_t i = 0; i < N; i++) { + res[i] = __sycl_std::__invoke_ldexp(x[i], k); + } + return res; +} + +#define __SYCL_MATH_FUNCTION_2_GENINT_Y_OVERLOAD_IMPL(NAME) \ + marray res; \ + for (size_t i = 0; i < N; i++) { \ + res[i] = __sycl_std::__invoke_##NAME(x[i], y[i]); \ + } \ + return res; + +template +inline __SYCL_ALWAYS_INLINE + std::enable_if_t::value, marray> + pown(marray x, marray y) __NOEXC { + __SYCL_MATH_FUNCTION_2_GENINT_Y_OVERLOAD_IMPL(pown) +} + +template +inline __SYCL_ALWAYS_INLINE + std::enable_if_t::value, marray> + rootn(marray x, marray y) __NOEXC { + __SYCL_MATH_FUNCTION_2_GENINT_Y_OVERLOAD_IMPL(rootn) +} + +#undef __SYCL_MATH_FUNCTION_2_GENINT_Y_OVERLOAD_IMPL + +#define __SYCL_MATH_FUNCTION_2_INT_Y_OVERLOAD_IMPL(NAME) \ + marray res; \ + for (size_t i = 0; i < N; i++) { \ + res[i] = __sycl_std::__invoke_##NAME(x[i], y); \ + } \ + return res; + +template +inline __SYCL_ALWAYS_INLINE + std::enable_if_t::value, marray> + pown(marray x, int y) __NOEXC { + __SYCL_MATH_FUNCTION_2_INT_Y_OVERLOAD_IMPL(pown) +} + +template +inline __SYCL_ALWAYS_INLINE + std::enable_if_t::value, marray> + rootn(marray x, + int y) __NOEXC{__SYCL_MATH_FUNCTION_2_INT_Y_OVERLOAD_IMPL(rootn)} + +#undef __SYCL_MATH_FUNCTION_2_INT_Y_OVERLOAD_IMPL + #define __SYCL_MATH_FUNCTION_3_OVERLOAD(NAME) \ template \ inline __SYCL_ALWAYS_INLINE \ @@ -789,6 +898,78 @@ detail::enable_if_t::value, T> sign(T x) __NOEXC { return __sycl_std::__invoke_sign(x); } +// marray common functions + +// TODO: can be optimized in the way math functions are optimized (usage of +// vec) +#define __SYCL_MARRAY_COMMON_FUNCTION_OVERLOAD_IMPL(NAME, ...) \ + T res; \ + for (int i = 0; i < T::size(); i++) { \ + res[i] = NAME(__VA_ARGS__); \ + } \ + return res; + +#define __SYCL_MARRAY_COMMON_FUNCTION_UNOP_OVERLOAD(NAME, ARG, ...) \ + template ::value>> \ + T NAME(ARG) __NOEXC { \ + __SYCL_MARRAY_COMMON_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +__SYCL_MARRAY_COMMON_FUNCTION_UNOP_OVERLOAD(degrees, T radians, radians[i]) +__SYCL_MARRAY_COMMON_FUNCTION_UNOP_OVERLOAD(radians, T degrees, degrees[i]) +__SYCL_MARRAY_COMMON_FUNCTION_UNOP_OVERLOAD(sign, T x, x[i]) + +#undef __SYCL_MARRAY_COMMON_FUNCTION_UNOP_OVERLOAD + +#define __SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD(NAME, ARG1, ARG2, ...) \ + template ::value>> \ + T NAME(ARG1, ARG2) __NOEXC { \ + __SYCL_MARRAY_COMMON_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +__SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD(min, T x, T y, x[i], y[i]) +__SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD(min, T x, + detail::marray_element_type y, + x[i], y) +__SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD(max, T x, T y, x[i], y[i]) +__SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD(max, T x, + detail::marray_element_type y, + x[i], y) +__SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD(step, T edge, T x, edge[i], x[i]) +__SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD( + step, detail::marray_element_type edge, T x, edge, x[i]) + +#undef __SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD + +#define __SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD(NAME, ARG1, ARG2, ARG3, \ + ...) \ + template ::value>> \ + T NAME(ARG1, ARG2, ARG3) __NOEXC { \ + __SYCL_MARRAY_COMMON_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +__SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD(clamp, T x, T minval, T maxval, + x[i], minval[i], maxval[i]) +__SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD( + clamp, T x, detail::marray_element_type minval, + detail::marray_element_type maxval, x[i], minval, maxval) +__SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD(mix, T x, T y, T a, x[i], y[i], + a[i]) +__SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD(mix, T x, T y, + detail::marray_element_type a, + x[i], y[i], a) +__SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD(smoothstep, T edge0, T edge1, T x, + edge0[i], edge1[i], x[i]) +__SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD( + smoothstep, detail::marray_element_type edge0, + detail::marray_element_type edge1, T x, edge0, edge1, x[i]) + +#undef __SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD +#undef __SYCL_MARRAY_COMMON_FUNCTION_OVERLOAD_IMPL + /* --------------- 4.13.4 Integer functions. --------------------------------*/ // ugeninteger abs (geninteger x) template @@ -1724,6 +1905,7 @@ __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(cos) __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(tan) __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(exp) __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(exp2) +__SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(exp10) __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(log) __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(log2) __SYCL_HALF_PRECISION_MATH_FUNCTION_OVERLOAD(log10) From 12a4566a95224eaebb9cc72250f0b8f67f8de4da Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Tue, 21 Mar 2023 17:29:49 +0000 Subject: [PATCH 04/44] [SYCL] Fix sycl::vec constructor ambiguity (#8608) Recent changes to the sycl::vec class added a private constructor taking and array. This resulted in constructor ambiguity when passing an initializer list, despite the constructor being private. This commit removes the constructor, making the implementation use the constructor taking both an array and an index sequence directly. --------- Signed-off-by: Larsen, Steffen Co-authored-by: Alexey Sachkov --- sycl/include/sycl/types.hpp | 12 ++++++------ sycl/test/regression/vec_init_list_ctor.cpp | 12 ++++++++++++ 2 files changed, 18 insertions(+), 6 deletions(-) create mode 100644 sycl/test/regression/vec_init_list_ctor.cpp diff --git a/sycl/include/sycl/types.hpp b/sycl/include/sycl/types.hpp index 05cea2cf0f043..ae1b2655d7320 100644 --- a/sycl/include/sycl/types.hpp +++ b/sycl/include/sycl/types.hpp @@ -729,9 +729,6 @@ template class vec { std::index_sequence) : m_Data{Arr[Is]...} {} - constexpr vec(const std::array, NumElements> &Arr) - : vec{Arr, std::make_index_sequence()} {} - public: using element_type = DataT; using rel_t = detail::rel_t; @@ -796,7 +793,8 @@ template class vec { template explicit constexpr vec(const EnableIfHostHalf &arg) : vec{detail::RepeatValue( - static_cast>(arg))} {} + static_cast>(arg)), + std::make_index_sequence()} {} template typename detail::enable_if_t< @@ -812,7 +810,8 @@ template class vec { #else explicit constexpr vec(const DataT &arg) : vec{detail::RepeatValue( - static_cast>(arg))} {} + static_cast>(arg)), + std::make_index_sequence()} {} template typename detail::enable_if_t< @@ -883,7 +882,8 @@ template class vec { template , typename = EnableIfSuitableNumElements> constexpr vec(const argTN &...args) - : vec{VecArgArrayCreator, argTN...>::Create(args...)} {} + : vec{VecArgArrayCreator, argTN...>::Create(args...), + std::make_index_sequence()} {} // TODO: Remove, for debug purposes only. void dump() { diff --git a/sycl/test/regression/vec_init_list_ctor.cpp b/sycl/test/regression/vec_init_list_ctor.cpp new file mode 100644 index 0000000000000..740075bc0fe80 --- /dev/null +++ b/sycl/test/regression/vec_init_list_ctor.cpp @@ -0,0 +1,12 @@ +// RUN: %clang -fsycl -fsyntax-only -Xclang -verify %s +// expected-no-diagnostics + +// Regression test checking that the vector ctor taking an initializer list +// doesn't cause warnings or errors. + +#include + +int main() { + sycl::vec V({1, 2}); + return 0; +} From 86c08b3cc4a3bb1815d202937c40a0cc13aad672 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Tue, 21 Mar 2023 17:29:58 +0000 Subject: [PATCH 05/44] [SYCL] Fix get_specialization_constant segmentation fault (#8542) In certain cases get_specialization_constant would cause a segmentation fault, likely due to strict aliasing violations. This commit changes the implementation to use memcpy of the data into the resulting object, as this can be assumed to be valid due to specialization constants being device-copyable. --------- Signed-off-by: Larsen, Steffen --- sycl/include/sycl/kernel_bundle.hpp | 12 +++++++----- sycl/unittests/kernel-and-program/Cache.cpp | 6 +++++- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/sycl/include/sycl/kernel_bundle.hpp b/sycl/include/sycl/kernel_bundle.hpp index 6eed7b270edca..12651407a537f 100644 --- a/sycl/include/sycl/kernel_bundle.hpp +++ b/sycl/include/sycl/kernel_bundle.hpp @@ -308,17 +308,19 @@ class kernel_bundle : public detail::kernel_bundle_plain, template typename std::remove_reference_t::value_type get_specialization_constant() const { - const char *SpecSymName = detail::get_spec_constant_symbolic_ID(); - if (!is_specialization_constant_set(SpecSymName)) - return SpecName.getDefaultValue(); - using SCType = typename std::remove_reference_t::value_type; + const char *SpecSymName = detail::get_spec_constant_symbolic_ID(); + SCType Res{SpecName.getDefaultValue()}; + if (!is_specialization_constant_set(SpecSymName)) + return Res; + std::array RetValue; get_specialization_constant_impl(SpecSymName, RetValue.data()); + std::memcpy(&Res, RetValue.data(), sizeof(SCType)); - return *reinterpret_cast(RetValue.data()); + return Res; } /// \returns an iterator to the first device image kernel_bundle contains diff --git a/sycl/unittests/kernel-and-program/Cache.cpp b/sycl/unittests/kernel-and-program/Cache.cpp index a4cc58ea360ae..0002aa75e332f 100644 --- a/sycl/unittests/kernel-and-program/Cache.cpp +++ b/sycl/unittests/kernel-and-program/Cache.cpp @@ -68,7 +68,11 @@ template <> const char *get_spec_constant_symbolic_ID() { static sycl::unittest::PiImage generateDefaultImage() { using namespace sycl::unittest; + std::vector SpecConstData; + PiProperty SC1 = makeSpecConstant(SpecConstData, "SC1", {0}, {0}, {42}); + PiPropertySet PropSet; + addSpecConstants({SC1}, std::move(SpecConstData), PropSet); std::vector Bin{0, 1, 2, 3, 4, 5}; // Random data @@ -256,7 +260,7 @@ TEST_F(KernelAndProgramCacheTest, SpecConstantCacheNegative) { detail::KernelProgramCache::ProgramCache &Cache = CtxImpl->getKernelProgramCache().acquireCachedPrograms().get(); - EXPECT_EQ(Cache.size(), 1U) << "Expect non-empty cache"; + EXPECT_EQ(Cache.size(), 2U) << "Expect an entry for each build in the cache."; } // Check that kernel_bundle created through join() is not cached. From 0b853c27b3de8e08c2ea406145e29dbbaa13979c Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Tue, 21 Mar 2023 17:30:10 +0000 Subject: [PATCH 06/44] [SYCL][NFC] Fix typo in handler 2D copy comment (#8553) Signed-off-by: Larsen, Steffen --- sycl/include/sycl/handler.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 0cf58142e43a5..b98567b3cf94b 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -2507,8 +2507,8 @@ class __SYCL_EXPORT handler { // Do the following: // 1. If both are host, use host_task to copy. - // 2. If either pointer is host or of the backend supports native memcpy2d, - // use special command. + // 2. If either pointer is host or the backend supports native memcpy2d, use + // special command. // 3. Otherwise, launch a kernel for copying. if (SrcIsHost && DestIsHost) { commonUSMCopy2DFallbackHostTask(Src, SrcPitch, Dest, DestPitch, Width, From 3be2e426b76dae076f40e6a99a9a11275a710467 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Tue, 21 Mar 2023 17:54:10 +0000 Subject: [PATCH 07/44] [SYCL][Docs] Clarify that weak_object is only available on host (#8713) This commit makes a clarification to sycl_ext_oneapi_weak_object that the new interfaces are only available on the host application. --------- Signed-off-by: Larsen, Steffen Co-authored-by: Greg Lueck --- .../extensions/supported/sycl_ext_oneapi_weak_object.asciidoc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/doc/extensions/supported/sycl_ext_oneapi_weak_object.asciidoc b/sycl/doc/extensions/supported/sycl_ext_oneapi_weak_object.asciidoc index 77f3b8cf179c8..1b58d93091c47 100644 --- a/sycl/doc/extensions/supported/sycl_ext_oneapi_weak_object.asciidoc +++ b/sycl/doc/extensions/supported/sycl_ext_oneapi_weak_object.asciidoc @@ -330,3 +330,5 @@ they are both empty `weak_object` instances. |=== +The `weak_object` class, the `ext_oneapi_owner_before` member functions and the +`owner_less` function object type must not be used in device code. From 61e51015a516f587eb2b8728aabe7d15b9fdb69e Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Tue, 21 Mar 2023 23:48:34 +0100 Subject: [PATCH 08/44] [SYCL] Filter out unneeded device images with lower state than requested (#8523) When fetching device images compatible with non-input states, we can ignore an image if another one with a higher state is available for all the possible kernel-device pairs. This patch adds the logic for filtering out such unnecessary images so that we can avoid JIT compilation if both AOT and SPIRV images are present. --- .../program_manager/program_manager.cpp | 130 ++++++++--- sycl/unittests/SYCL2020/CMakeLists.txt | 1 + .../SYCL2020/KernelBundleStateFiltering.cpp | 213 ++++++++++++++++++ sycl/unittests/helpers/MockKernelInfo.hpp | 28 +++ sycl/unittests/helpers/PiImage.hpp | 1 + sycl/unittests/helpers/TestKernel.hpp | 13 +- sycl/unittests/kernel-and-program/Cache.cpp | 19 +- 7 files changed, 354 insertions(+), 51 deletions(-) create mode 100644 sycl/unittests/SYCL2020/KernelBundleStateFiltering.cpp create mode 100644 sycl/unittests/helpers/MockKernelInfo.hpp diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index a197193d35432..86800d49711f8 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1683,46 +1683,120 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( } assert(BinImages.size() > 0 && "Expected to find at least one device image"); + // Ignore images with incompatible state. Image is considered compatible + // with a target state if an image is already in the target state or can + // be brought to target state by compiling/linking/building. + // + // Example: an image in "executable" state is not compatible with + // "input" target state - there is no operation to convert the image it + // to "input" state. An image in "input" state is compatible with + // "executable" target state because it can be built to get into + // "executable" state. + for (auto It = BinImages.begin(); It != BinImages.end();) { + if (getBinImageState(*It) > TargetState) + It = BinImages.erase(It); + else + ++It; + } + std::vector SYCLDeviceImages; - for (RTDeviceBinaryImage *BinImage : BinImages) { - const bundle_state ImgState = getBinImageState(BinImage); - - // Ignore images with incompatible state. Image is considered compatible - // with a target state if an image is already in the target state or can - // be brought to target state by compiling/linking/building. - // - // Example: an image in "executable" state is not compatible with - // "input" target state - there is no operation to convert the image it - // to "input" state. An image in "input" state is compatible with - // "executable" target state because it can be built to get into - // "executable" state. - if (ImgState > TargetState) - continue; - for (const sycl::device &Dev : Devs) { + // If a non-input state is requested, we can filter out some compatible + // images and return only those with the highest compatible state for each + // device-kernel pair. This map tracks how many kernel-device pairs need each + // image, so that any unneeded ones are skipped. + // TODO this has no effect if the requested state is input, consider having + // a separate branch for that case to avoid unnecessary tracking work. + struct DeviceBinaryImageInfo { + std::shared_ptr> KernelIDs; + bundle_state State = bundle_state::input; + int RequirementCounter = 0; + }; + std::unordered_map ImageInfoMap; + + for (const sycl::device &Dev : Devs) { + // Track the highest image state for each requested kernel. + using StateImagesPairT = + std::pair>; + using KernelImageMapT = + std::map; + KernelImageMapT KernelImageMap; + if (!KernelIDs.empty()) + for (const kernel_id &KernelID : KernelIDs) + KernelImageMap.insert({KernelID, {}}); + + for (RTDeviceBinaryImage *BinImage : BinImages) { if (!compatibleWithDevice(BinImage, Dev) || !doesDevSupportDeviceRequirements(Dev, *BinImage)) continue; - std::shared_ptr> KernelIDs; - // Collect kernel names for the image - { - std::lock_guard KernelIDsGuard(m_KernelIDsMutex); - KernelIDs = m_BinImg2KernelIDs[BinImage]; - // If the image does not contain any non-service kernels we can skip it. - if (!KernelIDs || KernelIDs->empty()) - continue; + auto InsertRes = ImageInfoMap.insert({BinImage, {}}); + DeviceBinaryImageInfo &ImgInfo = InsertRes.first->second; + if (InsertRes.second) { + ImgInfo.State = getBinImageState(BinImage); + // Collect kernel names for the image + { + std::lock_guard KernelIDsGuard(m_KernelIDsMutex); + ImgInfo.KernelIDs = m_BinImg2KernelIDs[BinImage]; + } } + const bundle_state ImgState = ImgInfo.State; + const std::shared_ptr> &ImageKernelIDs = + ImgInfo.KernelIDs; + int &ImgRequirementCounter = ImgInfo.RequirementCounter; - DeviceImageImplPtr Impl = std::make_shared( - BinImage, Ctx, Devs, ImgState, KernelIDs, /*PIProgram=*/nullptr); + // If the image does not contain any non-service kernels we can skip it. + if (!ImageKernelIDs || ImageKernelIDs->empty()) + continue; - SYCLDeviceImages.push_back( - createSyclObjFromImpl(Impl)); - break; + // Update tracked information. + for (kernel_id &KernelID : *ImageKernelIDs) { + StateImagesPairT *StateImagesPair; + // If only specific kernels are requested, ignore the rest. + if (!KernelIDs.empty()) { + auto It = KernelImageMap.find(KernelID); + if (It == KernelImageMap.end()) + continue; + StateImagesPair = &It->second; + } else + StateImagesPair = &KernelImageMap[KernelID]; + + auto &[KernelImagesState, KernelImages] = *StateImagesPair; + + if (KernelImages.empty()) { + KernelImagesState = ImgState; + KernelImages.push_back(BinImage); + ++ImgRequirementCounter; + } else if (KernelImagesState < ImgState) { + for (RTDeviceBinaryImage *Img : KernelImages) { + auto It = ImageInfoMap.find(Img); + assert(It != ImageInfoMap.end()); + assert(It->second.RequirementCounter > 0); + --(It->second.RequirementCounter); + } + KernelImages.clear(); + KernelImages.push_back(BinImage); + KernelImagesState = ImgState; + ++ImgRequirementCounter; + } else if (KernelImagesState == ImgState) { + KernelImages.push_back(BinImage); + ++ImgRequirementCounter; + } + } } } + for (const auto &ImgInfoPair : ImageInfoMap) { + if (ImgInfoPair.second.RequirementCounter == 0) + continue; + + DeviceImageImplPtr Impl = std::make_shared( + ImgInfoPair.first, Ctx, Devs, ImgInfoPair.second.State, + ImgInfoPair.second.KernelIDs, /*PIProgram=*/nullptr); + + SYCLDeviceImages.push_back(createSyclObjFromImpl(Impl)); + } + return SYCLDeviceImages; } diff --git a/sycl/unittests/SYCL2020/CMakeLists.txt b/sycl/unittests/SYCL2020/CMakeLists.txt index 9e22f73abfa00..09530b5f65a96 100644 --- a/sycl/unittests/SYCL2020/CMakeLists.txt +++ b/sycl/unittests/SYCL2020/CMakeLists.txt @@ -4,6 +4,7 @@ add_sycl_unittest(SYCL2020Tests OBJECT GetNativeOpenCL.cpp SpecializationConstant.cpp KernelBundle.cpp + KernelBundleStateFiltering.cpp KernelID.cpp HasExtension.cpp IsCompatible.cpp diff --git a/sycl/unittests/SYCL2020/KernelBundleStateFiltering.cpp b/sycl/unittests/SYCL2020/KernelBundleStateFiltering.cpp new file mode 100644 index 0000000000000..fb4e7f3315be9 --- /dev/null +++ b/sycl/unittests/SYCL2020/KernelBundleStateFiltering.cpp @@ -0,0 +1,213 @@ +//==---- KernelBundleStateFiltering.cpp --- Kernel bundle unit test --------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include + +class KernelA; +class KernelB; +class KernelC; +class KernelD; +class KernelE; +namespace sycl { +__SYCL_INLINE_VER_NAMESPACE(_V1) { +namespace detail { +template <> struct KernelInfo : public unittest::MockKernelInfoBase { + static constexpr const char *getName() { return "KernelA"; } +}; +template <> struct KernelInfo : public unittest::MockKernelInfoBase { + static constexpr const char *getName() { return "KernelB"; } +}; +template <> struct KernelInfo : public unittest::MockKernelInfoBase { + static constexpr const char *getName() { return "KernelC"; } +}; +template <> struct KernelInfo : public unittest::MockKernelInfoBase { + static constexpr const char *getName() { return "KernelD"; } +}; +template <> struct KernelInfo : public unittest::MockKernelInfoBase { + static constexpr const char *getName() { return "KernelE"; } +}; +} // namespace detail +} // __SYCL_INLINE_VER_NAMESPACE(_V1) +} // namespace sycl + +namespace { + +std::set TrackedImages; +sycl::unittest::PiImage +generateDefaultImage(std::initializer_list KernelNames, + pi_device_binary_type BinaryType, + const char *DeviceTargetSpec) { + using namespace sycl::unittest; + + PiPropertySet PropSet; + + static unsigned char NImage = 0; + std::vector Bin{NImage++}; + + PiArray Entries = makeEmptyKernels(KernelNames); + + PiImage Img{BinaryType, // Format + DeviceTargetSpec, + "", // Compile options + "", // Link options + std::move(Bin), + std::move(Entries), + std::move(PropSet)}; + const void *BinaryPtr = Img.getBinaryPtr(); + TrackedImages.insert(BinaryPtr); + + return Img; +} + +// Image 0: input, KernelA KernelB +// Image 1: exe, KernelA +// Image 2: input, KernelC +// Image 3: exe, KernelC +// Image 4: input, KernelD +// Image 5: input, KernelE +// Image 6: exe, KernelE +// Image 7: exe. KernelE +sycl::unittest::PiImage Imgs[] = { + generateDefaultImage({"KernelA", "KernelB"}, PI_DEVICE_BINARY_TYPE_SPIRV, + __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64), + generateDefaultImage({"KernelA"}, PI_DEVICE_BINARY_TYPE_NATIVE, + __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64), + generateDefaultImage({"KernelC"}, PI_DEVICE_BINARY_TYPE_SPIRV, + __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64), + generateDefaultImage({"KernelC"}, PI_DEVICE_BINARY_TYPE_NATIVE, + __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64), + generateDefaultImage({"KernelD"}, PI_DEVICE_BINARY_TYPE_SPIRV, + __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64), + generateDefaultImage({"KernelE"}, PI_DEVICE_BINARY_TYPE_SPIRV, + __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64), + generateDefaultImage({"KernelE"}, PI_DEVICE_BINARY_TYPE_NATIVE, + __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64), + generateDefaultImage({"KernelE"}, PI_DEVICE_BINARY_TYPE_NATIVE, + __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64)}; + +sycl::unittest::PiImageArray ImgArray{Imgs}; +std::vector UsedImageIndices; + +void redefinedPiProgramCreateCommon(const void *bin) { + if (TrackedImages.count(bin) != 0) { + unsigned char ImgIdx = *reinterpret_cast(bin); + UsedImageIndices.push_back(ImgIdx); + } +} + +pi_result redefinedPiProgramCreate(pi_context context, const void *il, + size_t length, pi_program *res_program) { + redefinedPiProgramCreateCommon(il); + return PI_SUCCESS; +} + +pi_result redefinedPiProgramCreateWithBinary( + pi_context context, pi_uint32 num_devices, const pi_device *device_list, + const size_t *lengths, const unsigned char **binaries, + size_t num_metadata_entries, const pi_device_binary_property *metadata, + pi_int32 *binary_status, pi_program *ret_program) { + redefinedPiProgramCreateCommon(binaries[0]); + return PI_SUCCESS; +} + +pi_result redefinedDevicesGet(pi_platform platform, pi_device_type device_type, + pi_uint32 num_entries, pi_device *devices, + pi_uint32 *num_devices) { + if (num_devices) + *num_devices = 2; + + if (devices) { + devices[0] = reinterpret_cast(1); + devices[1] = reinterpret_cast(2); + } + + return PI_SUCCESS; +} + +pi_result redefinedExtDeviceSelectBinary(pi_device device, + pi_device_binary *binaries, + pi_uint32 num_binaries, + pi_uint32 *selected_binary_ind) { + EXPECT_EQ(num_binaries, 1U); + // Treat image 3 as incompatible with one of the devices. + if (TrackedImages.count(binaries[0]->BinaryStart) != 0 && + *binaries[0]->BinaryStart == 3 && + device == reinterpret_cast(2)) { + return PI_ERROR_INVALID_BINARY; + } + *selected_binary_ind = 0; + return PI_SUCCESS; +} + +void verifyImageUse(const std::vector &ExpectedImages) { + std::sort(UsedImageIndices.begin(), UsedImageIndices.end()); + EXPECT_TRUE(std::is_sorted(ExpectedImages.begin(), ExpectedImages.end())); + EXPECT_EQ(UsedImageIndices, ExpectedImages); + UsedImageIndices.clear(); +} + +TEST(KernelBundle, DeviceImageStateFiltering) { + sycl::unittest::PiMock Mock; + Mock.redefineAfter( + redefinedPiProgramCreate); + Mock.redefineAfter( + redefinedPiProgramCreateWithBinary); + + // No kernel ids specified. + { + const sycl::device Dev = Mock.getPlatform().get_devices()[0]; + sycl::context Ctx{Dev}; + + sycl::kernel_bundle KernelBundle = + sycl::get_kernel_bundle(Ctx, {Dev}); + verifyImageUse({0, 1, 3, 4, 6, 7}); + } + + sycl::kernel_id KernelAID = sycl::get_kernel_id(); + sycl::kernel_id KernelCID = sycl::get_kernel_id(); + sycl::kernel_id KernelDID = sycl::get_kernel_id(); + + // Request specific kernel ids. + { + const sycl::device Dev = Mock.getPlatform().get_devices()[0]; + sycl::context Ctx{Dev}; + + sycl::kernel_bundle KernelBundle = + sycl::get_kernel_bundle( + Ctx, {Dev}, {KernelAID, KernelCID, KernelDID}); + verifyImageUse({1, 3, 4}); + } + + // Check the case where some executable images are unsupported by one of + // the devices. + { + Mock.redefine(redefinedDevicesGet); + Mock.redefine( + redefinedExtDeviceSelectBinary); + const std::vector Devs = Mock.getPlatform().get_devices(); + sycl::context Ctx{Devs}; + + sycl::kernel_bundle KernelBundle = + sycl::get_kernel_bundle( + Ctx, Devs, {KernelAID, KernelCID, KernelDID}); + verifyImageUse({1, 2, 3, 4}); + } +} +} // namespace diff --git a/sycl/unittests/helpers/MockKernelInfo.hpp b/sycl/unittests/helpers/MockKernelInfo.hpp new file mode 100644 index 0000000000000..501cb0bd27f8a --- /dev/null +++ b/sycl/unittests/helpers/MockKernelInfo.hpp @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +namespace sycl { +__SYCL_INLINE_VER_NAMESPACE(_V1) { +namespace unittest { +struct MockKernelInfoBase { + static constexpr unsigned getNumParams() { return 0; } + static const detail::kernel_param_desc_t &getParamDesc(int) { + static detail::kernel_param_desc_t Dummy; + return Dummy; + } + static constexpr bool isESIMD() { return false; } + static constexpr bool callsThisItem() { return false; } + static constexpr bool callsAnyThisFreeFunction() { return false; } + static constexpr int64_t getKernelSize() { return 1; } +}; + +} // namespace unittest +} // __SYCL_INLINE_VER_NAMESPACE(_V1) +} // namespace sycl diff --git a/sycl/unittests/helpers/PiImage.hpp b/sycl/unittests/helpers/PiImage.hpp index 38b06eef6d242..5a536c96a569c 100644 --- a/sycl/unittests/helpers/PiImage.hpp +++ b/sycl/unittests/helpers/PiImage.hpp @@ -262,6 +262,7 @@ class PiImage { MPropertySet.end(), }; } + const unsigned char *getBinaryPtr() { return &*MBinary.begin(); } private: uint16_t MVersion; diff --git a/sycl/unittests/helpers/TestKernel.hpp b/sycl/unittests/helpers/TestKernel.hpp index 3db52b419af37..abeeeaa53e9b5 100644 --- a/sycl/unittests/helpers/TestKernel.hpp +++ b/sycl/unittests/helpers/TestKernel.hpp @@ -8,6 +8,7 @@ #pragma once +#include "MockKernelInfo.hpp" #include "PiImage.hpp" template class TestKernel; @@ -15,16 +16,10 @@ template class TestKernel; namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { namespace detail { -template struct KernelInfo> { - static constexpr unsigned getNumParams() { return 0; } - static const kernel_param_desc_t &getParamDesc(int) { - static kernel_param_desc_t Dummy; - return Dummy; - } +template +struct KernelInfo> + : public unittest::MockKernelInfoBase { static constexpr const char *getName() { return "TestKernel"; } - static constexpr bool isESIMD() { return false; } - static constexpr bool callsThisItem() { return false; } - static constexpr bool callsAnyThisFreeFunction() { return false; } static constexpr int64_t getKernelSize() { return KernelSize; } static constexpr const char *getFileName() { return "TestKernel.hpp"; } static constexpr const char *getFunctionName() { diff --git a/sycl/unittests/kernel-and-program/Cache.cpp b/sycl/unittests/kernel-and-program/Cache.cpp index 0002aa75e332f..0bee628686087 100644 --- a/sycl/unittests/kernel-and-program/Cache.cpp +++ b/sycl/unittests/kernel-and-program/Cache.cpp @@ -15,6 +15,7 @@ #include "detail/kernel_program_cache.hpp" #include "detail/program_impl.hpp" #include "sycl/detail/pi.h" +#include #include #include #include @@ -40,22 +41,12 @@ namespace sycl { const static specialization_id SpecConst1{42}; __SYCL_INLINE_VER_NAMESPACE(_V1) { namespace detail { -struct MockKernelInfo { - static constexpr unsigned getNumParams() { return 0; } - static const kernel_param_desc_t &getParamDesc(int) { - static kernel_param_desc_t Dummy; - return Dummy; - } - static constexpr bool isESIMD() { return false; } - static constexpr bool callsThisItem() { return false; } - static constexpr bool callsAnyThisFreeFunction() { return false; } - static constexpr int64_t getKernelSize() { return 1; } -}; - -template <> struct KernelInfo : public MockKernelInfo { +template <> +struct KernelInfo : public unittest::MockKernelInfoBase { static constexpr const char *getName() { return "CacheTestKernel"; } }; -template <> struct KernelInfo : public MockKernelInfo { +template <> +struct KernelInfo : public unittest::MockKernelInfoBase { static constexpr const char *getName() { return "CacheTestKernel2"; } }; template <> const char *get_spec_constant_symbolic_ID() { From d31623ab586291f6d49b3aabe72d9c0fe5f7f87b Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Wed, 22 Mar 2023 02:08:07 -0700 Subject: [PATCH 09/44] [SYCL][Doc] Fix link in sycl_ext_oneapi_root_group (#8715) Signed-off-by: John Pennycook --- .../doc/extensions/proposed/sycl_ext_oneapi_root_group.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_root_group.asciidoc b/sycl/doc/extensions/proposed/sycl_ext_oneapi_root_group.asciidoc index b2dfa639b3f75..fdf3ced15afde 100644 --- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_root_group.asciidoc +++ b/sycl/doc/extensions/proposed/sycl_ext_oneapi_root_group.asciidoc @@ -44,7 +44,7 @@ This extension also depends on the following other SYCL extensions: * link:../experimental/sycl_ext_oneapi_properties.asciidoc[ sycl_ext_oneapi_properties] -* link:../proposed/sycl_ext_oneapi_kernel_properties.asciidoc[ +* link:../experimental/sycl_ext_oneapi_kernel_properties.asciidoc[ sycl_ext_oneapi_kernel_properties] From 93a629abc6944640dfc5151c5770a8049ea651be Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Wed, 22 Mar 2023 12:15:35 +0000 Subject: [PATCH 10/44] [SYCL] Implement two-run aspect propagation (#8681) This commit splits aspect propagation into two runs: 1. First run propagates all aspects, except fp64. Warnings are still issued for fp64 as if it was fully propagated, but the resulting metadata will not reflect it. This run before optimizations. 2. Second run propagates all aspects, including fp64. This should not have any effect on already propagated aspects. This run will not issue warnings as any conflicts would have been reported by the first pass. See the [design document](https://github.com/intel/llvm/blob/sycl/sycl/doc/design/OptionalDeviceFeatures.md#pre--and-post-optimization-aspect-propagation) for more information. --------- Signed-off-by: Larsen, Steffen --- clang/lib/CodeGen/BackendUtil.cpp | 7 +- .../SYCLLowerIR/SYCLPropagateAspectsUsage.h | 8 +- .../SYCLLowerIR/SYCLPropagateAspectsUsage.cpp | 61 +++++++- .../double-prop-after-exclude.ll | 48 ++++++ .../PropagateAspectsUsage/exclude-aspect.ll | 139 ++++++++++++++++++ 5 files changed, 254 insertions(+), 9 deletions(-) create mode 100644 llvm/test/SYCLLowerIR/PropagateAspectsUsage/double-prop-after-exclude.ll create mode 100644 llvm/test/SYCLLowerIR/PropagateAspectsUsage/exclude-aspect.ll diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index d7b1dcf72e8ca..47b9d4e39f1a6 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -911,7 +911,8 @@ void EmitAssemblyHelper::RunOptimizationPipeline( PB.registerPipelineStartEPCallback( [&](ModulePassManager &MPM, OptimizationLevel Level) { MPM.addPass(ESIMDVerifierPass(LangOpts.SYCLESIMDForceStatelessMem)); - MPM.addPass(SYCLPropagateAspectsUsagePass()); + MPM.addPass( + SYCLPropagateAspectsUsagePass(/*ExcludeAspects=*/{"fp64"})); }); // Add the InferAddressSpaces pass for all the SPIR[V] targets @@ -1026,6 +1027,10 @@ void EmitAssemblyHelper::RunOptimizationPipeline( if (LangOpts.EnableDAEInSpirKernels) MPM.addPass(DeadArgumentEliminationSYCLPass()); + // Rerun aspect propagation without warning diagnostics. + MPM.addPass(SYCLPropagateAspectsUsagePass(/*ExcludeAspects=*/{}, + /*ValidateAspects=*/false)); + // Add SPIRITTAnnotations pass to the pass manager if // -fsycl-instrument-device-code option was passed. This option can be // used only with spir triple. diff --git a/llvm/include/llvm/SYCLLowerIR/SYCLPropagateAspectsUsage.h b/llvm/include/llvm/SYCLLowerIR/SYCLPropagateAspectsUsage.h index 4845ad6e97bf1..f8f1078b0669a 100644 --- a/llvm/include/llvm/SYCLLowerIR/SYCLPropagateAspectsUsage.h +++ b/llvm/include/llvm/SYCLLowerIR/SYCLPropagateAspectsUsage.h @@ -20,13 +20,19 @@ namespace llvm { class SYCLPropagateAspectsUsagePass : public PassInfoMixin { public: - SYCLPropagateAspectsUsagePass(StringRef OptionsString = {}) { + SYCLPropagateAspectsUsagePass(std::set ExcludeAspects = {}, + bool ValidateAspects = true, + StringRef OptionsString = {}) + : ExcludedAspects{std::move(ExcludeAspects)}, + ValidateAspectUsage{ValidateAspects} { OptionsString.split(this->TargetFixedAspects, ',', /*MaxSplit=*/-1, /*KeepEmpty=*/false); }; PreservedAnalyses run(Module &M, ModuleAnalysisManager &); private: + std::set ExcludedAspects; + const bool ValidateAspectUsage; SmallVector TargetFixedAspects; }; diff --git a/llvm/lib/SYCLLowerIR/SYCLPropagateAspectsUsage.cpp b/llvm/lib/SYCLLowerIR/SYCLPropagateAspectsUsage.cpp index b13e480170fec..0f32fd78b68de 100644 --- a/llvm/lib/SYCLLowerIR/SYCLPropagateAspectsUsage.cpp +++ b/llvm/lib/SYCLLowerIR/SYCLPropagateAspectsUsage.cpp @@ -53,6 +53,11 @@ static cl::opt ClSyclFixedTargets( "is expected to be runnable on"), cl::Hidden, cl::init("")); +static cl::opt ClSyclExcludeAspects( + "sycl-propagate-aspects-usage-exclude-aspects", + cl::desc("Specify aspects to exclude when propagating aspect usage"), + cl::Hidden, cl::init("")); + namespace { using AspectsSetTy = SmallSet; @@ -293,15 +298,37 @@ getAspectUsageChain(const Function *F, const FunctionToAspectsMapTy &AspectsMap, return CallChain; } -void createUsedAspectsMetadataForFunctions(FunctionToAspectsMapTy &Map) { +void createUsedAspectsMetadataForFunctions( + FunctionToAspectsMapTy &Map, const AspectsSetTy &ExcludeAspectVals) { for (auto &[F, Aspects] : Map) { if (Aspects.empty()) continue; LLVMContext &C = F->getContext(); + // Create a set of unique aspects. First we add the ones from the found + // aspects that have not been excluded. + AspectsSetTy UniqueAspects; + for (const int &A : Aspects) + if (!ExcludeAspectVals.contains(A)) + UniqueAspects.insert(A); + + // If there are no new aspects, we can just keep the old metadata. + if (UniqueAspects.empty()) + continue; + + // If there is new metadata, merge it with the old aspects. We preserve + // the excluded ones. + if (const MDNode *ExistingAspects = F->getMetadata("sycl_used_aspects")) { + for (const MDOperand &MDOp : ExistingAspects->operands()) { + const Constant *C = cast(MDOp)->getValue(); + UniqueAspects.insert(cast(C)->getSExtValue()); + } + } + + // Create new metadata. SmallVector AspectsMetadata; - for (const auto &A : Aspects) + for (const int &A : UniqueAspects) AspectsMetadata.push_back(ConstantAsMetadata::get( ConstantInt::getSigned(Type::getInt32Ty(C), A))); @@ -506,7 +533,8 @@ void setSyclFixedTargetsMD(const std::vector &EntryPoints, FunctionToAspectsMapTy buildFunctionsToAspectsMap(Module &M, TypeToAspectsMapTy &TypesWithAspects, const AspectValueToNameMapTy &AspectValues, - const std::vector &EntryPoints) { + const std::vector &EntryPoints, + bool ValidateAspects) { FunctionToAspectsMapTy FunctionToUsedAspects; FunctionToAspectsMapTy FunctionToDeclaredAspects; CallGraphTy CG; @@ -522,8 +550,9 @@ buildFunctionsToAspectsMap(Module &M, TypeToAspectsMapTy &TypesWithAspects, for (Function *F : EntryPoints) propagateAspectsThroughCG(F, CG, FunctionToUsedAspects, Visited); - validateUsedAspectsForFunctions(FunctionToUsedAspects, AspectValues, - EntryPoints, CG); + if (ValidateAspects) + validateUsedAspectsForFunctions(FunctionToUsedAspects, AspectValues, + EntryPoints, CG); // The set of aspects from FunctionToDeclaredAspects should be merged to the // set of FunctionToUsedAspects after validateUsedAspectsForFunctions call to @@ -558,6 +587,14 @@ SYCLPropagateAspectsUsagePass::run(Module &M, ModuleAnalysisManager &MAM) { StringRef(ClSyclFixedTargets) .split(TargetFixedAspects, ',', /*MaxSplit=*/-1, /*KeepEmpty=*/false); + if (ClSyclExcludeAspects.getNumOccurrences() > 0) { + SmallVector ExcludedAspectsVec; + StringRef(ClSyclExcludeAspects) + .split(ExcludedAspectsVec, ',', /*MaxSplit=*/-1, /*KeepEmpty=*/false); + ExcludedAspects.insert(ExcludedAspectsVec.begin(), + ExcludedAspectsVec.end()); + } + std::vector EntryPoints; for (Function &F : M.functions()) if (isEntryPoint(F)) @@ -566,9 +603,19 @@ SYCLPropagateAspectsUsagePass::run(Module &M, ModuleAnalysisManager &MAM) { propagateAspectsToOtherTypesInModule(M, TypesWithAspects, AspectValues); FunctionToAspectsMapTy FunctionToUsedAspects = buildFunctionsToAspectsMap( - M, TypesWithAspects, AspectValues, EntryPoints); + M, TypesWithAspects, AspectValues, EntryPoints, ValidateAspectUsage); + + // Create a set of excluded aspect values. + AspectsSetTy ExcludedAspectVals; + for (const StringRef &AspectName : ExcludedAspects) { + const auto AspectValIter = AspectValues.find(AspectName); + assert(AspectValIter != AspectValues.end() && + "Excluded aspect does not have a corresponding value."); + ExcludedAspectVals.insert(AspectValIter->second); + } - createUsedAspectsMetadataForFunctions(FunctionToUsedAspects); + createUsedAspectsMetadataForFunctions(FunctionToUsedAspects, + ExcludedAspectVals); setSyclFixedTargetsMD(EntryPoints, TargetFixedAspects, AspectValues); diff --git a/llvm/test/SYCLLowerIR/PropagateAspectsUsage/double-prop-after-exclude.ll b/llvm/test/SYCLLowerIR/PropagateAspectsUsage/double-prop-after-exclude.ll new file mode 100644 index 0000000000000..8eb2512b507d6 --- /dev/null +++ b/llvm/test/SYCLLowerIR/PropagateAspectsUsage/double-prop-after-exclude.ll @@ -0,0 +1,48 @@ +; RUN: opt -passes=sycl-propagate-aspects-usage -sycl-propagate-aspects-usage-exclude-aspects=fp64 < %s -S -o %t_first.ll +; RUN: opt -passes=sycl-propagate-aspects-usage < %t_first.ll -S -o %t_second.ll +; FileCheck %s --input-file %t_first.ll --check-prefix=CHECK-FIRST +; FileCheck %s --input-file %t_second.ll --check-prefix=CHECK-SECOND +; +; Test checks that fp64 usage is correctly propagate in the two-run model. + +%composite = type { double } + +; CHECK-FIRST-NOT: spir_kernel void @kernel() {{.*}} !sycl_used_aspects +; CHECK-SECOND: spir_kernel void @kernel() !sycl_used_aspects ![[MDID:]] +define spir_kernel void @kernel() { + call spir_func void @func() + ret void +} + +; CHECK-FIRST-NOT: spir_func void @func() {{.*}} !sycl_used_aspects +; CHECK-SECOND: spir_func void @func() !sycl_used_aspects ![[MDID]] { +define spir_func void @func() { + %tmp = alloca double + ret void +} + +; CHECK-FIRST-NOT: spir_func void @func.array() {{.*}} !sycl_used_aspects +; CHECK-SECOND: spir_func void @func.array() !sycl_used_aspects ![[MDID]] { +define spir_func void @func.array() { + %tmp = alloca [4 x double] + ret void +} + +; CHECK-FIRST-NOT: spir_func void @func.vector() {{.*}} !sycl_used_aspects +; CHECK-SECOND: spir_func void @func.vector() !sycl_used_aspects ![[MDID]] { +define spir_func void @func.vector() { + %tmp = alloca <4 x double> + ret void +} + +; CHECK-FIRST-NOT: spir_func void @func.composite() {{.*}} !sycl_used_aspects +; CHECK-SECOND: spir_func void @func.composite() !sycl_used_aspects ![[MDID]] { +define spir_func void @func.composite() { + %tmp = alloca %composite + ret void +} + +!sycl_aspects = !{!0} +!0 = !{!"fp64", i32 6} + +; CHECK-SECOND: ![[MDID]] = !{i32 6} diff --git a/llvm/test/SYCLLowerIR/PropagateAspectsUsage/exclude-aspect.ll b/llvm/test/SYCLLowerIR/PropagateAspectsUsage/exclude-aspect.ll new file mode 100644 index 0000000000000..59c7964cd60ad --- /dev/null +++ b/llvm/test/SYCLLowerIR/PropagateAspectsUsage/exclude-aspect.ll @@ -0,0 +1,139 @@ +; RUN: opt -passes=sycl-propagate-aspects-usage -sycl-propagate-aspects-usage-exclude-aspects=aspect4,aspect1 -S < %s | FileCheck %s +; +; Test checks that the pass is able to collect all aspects used in a function + +%A = type { i32 } +%B = type { i32 } +%C = type { i32 } +%D = type { i32 } + +; None of funcA's aspects are excluded. +; CHECK: define spir_func void @funcA() !sycl_used_aspects ![[#ID0:]] { +define spir_func void @funcA() { + %tmp = alloca %A + ret void +} + +; funcB uses "aspect1" which is excluded, so the resulting aspects are the same +; as for funcA. +; CHECK: define spir_func void @funcB() !sycl_used_aspects ![[#ID0]] { +define spir_func void @funcB() { + %tmp = alloca %B + call spir_func void @funcA() + ret void +} + +; funcC has an aspect excluded, propagated from funcB. +; CHECK: define spir_func void @funcC() !sycl_used_aspects ![[#ID1:]] { +define spir_func void @funcC() { + %tmp = alloca %C + call spir_func void @funcB() + ret void +} + +; funcD has two aspects excluded; one from the use of D and one from propagated. +; from funcB and funcC. +; CHECK: define spir_func void @funcD() !sycl_used_aspects ![[#ID2:]] { +define spir_func void @funcD() { + %tmp = alloca %D + call spir_func void @funcC() + ret void +} + +; kernel1 has the same aspects as funcD. +; CHECK: define spir_kernel void @kernel1() !sycl_used_aspects ![[#ID2]] +define spir_kernel void @kernel1() { + call spir_func void @funcD() + ret void +} + +; funcE should get none of its explicitly declared aspects in its +; sycl_used_aspects +; CHECK: define spir_func void @funcE() !sycl_declared_aspects ![[#DA1:]] { +define spir_func void @funcE() !sycl_declared_aspects !10 { + ret void +} + +; funcF should have the same aspects as funcE +; CHECK-NOT: define spir_func void @funcF() {{.*}} !sycl_used_aspects +define spir_func void @funcF() { + call spir_func void @funcE() + ret void +} + +; funcG only keeps one aspect, the rest are excluded +; CHECK: define spir_func void @funcG() !sycl_declared_aspects ![[#DA2:]] !sycl_used_aspects ![[#ID3:]] +define spir_func void @funcG() !sycl_declared_aspects !11 { + ret void +} + +; funcH should have the same aspects as funcG +; CHECK: define spir_func void @funcH() !sycl_used_aspects ![[#ID3]] +define spir_func void @funcH() { + call spir_func void @funcG() + ret void +} + +; CHECK: define spir_kernel void @kernel2() !sycl_used_aspects ![[#ID3]] +define spir_kernel void @kernel2() { + call spir_func void @funcF() + call spir_func void @funcH() + ret void +} + +; CHECK: define spir_func void @funcI() !sycl_used_aspects ![[#DA1]] { +define spir_func void @funcI() !sycl_used_aspects !10 { + ret void +} + +; CHECK-NOT: define spir_func void @funcJ() {{.*}} !sycl_used_aspects +define spir_func void @funcJ() { + call spir_func void @funcI() + ret void +} + +; +; Note that the listed aspects can be reordered due to the merging of the +; aspect sets. +; CHECK: define spir_func void @funcK() !sycl_used_aspects ![[#ID4:]] { +define spir_func void @funcK() !sycl_used_aspects !11 { + ret void +} + +; CHECK: define spir_func void @funcL() !sycl_used_aspects ![[#ID3]] +define spir_func void @funcL() { + call spir_func void @funcK() + ret void +} + +; CHECK: define spir_kernel void @kernel3() !sycl_used_aspects ![[#ID3]] +define spir_kernel void @kernel3() { + call spir_func void @funcK() + call spir_func void @funcL() + ret void +} + +!sycl_types_that_use_aspects = !{!0, !1, !2, !3} +!0 = !{!"A", i32 0} +!1 = !{!"B", i32 1} +!2 = !{!"C", i32 2} +!3 = !{!"D", i32 3, i32 4} + +!sycl_aspects = !{!4, !5, !6, !7, !8, !9} +!4 = !{!"aspect0", i32 0} +!5 = !{!"aspect1", i32 1} +!6 = !{!"aspect2", i32 2} +!7 = !{!"aspect3", i32 3} +!8 = !{!"aspect4", i32 4} +!9 = !{!"fp64", i32 5} + +!10 = !{i32 1} +!11 = !{i32 4, i32 2, i32 1} +; CHECK-DAG: ![[#DA1]] = !{i32 1} +; CHECK-DAG: ![[#DA2]] = !{i32 4, i32 2, i32 1} + +; CHECK-DAG: ![[#ID0]] = !{i32 0} +; CHECK-DAG: ![[#ID1]] = !{i32 2, i32 0} +; CHECK-DAG: ![[#ID2]] = !{i32 0, i32 2, i32 3} +; CHECK-DAG: ![[#ID3]] = !{i32 2} +; CHECK-DAG: ![[#ID4]] = !{i32 2, i32 4, i32 1} From ea269223aa2a932278937bd669cbc50f041c103e Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Wed, 22 Mar 2023 14:18:33 +0000 Subject: [PATCH 11/44] [SYCL] Revert any and all return types to int for vectors (#8591) SYCL 2020 has any and all return bool for scalar and marray arguments, but int for vector arguments. Currently enabling SYCL2020_CONFORMANT_APIS switches all versions of any and all to return bool. This commit changes it so that the variants taking vector arguments are unaffected by SYCL2020_CONFORMANT_APIS and language version. Signed-off-by: Larsen, Steffen --- sycl/include/sycl/builtins.hpp | 6 ++---- sycl/test/basic_tests/relational_builtins.cpp | 12 ++++++------ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp index 9ff111c14583a..df6e1d7aa2dd0 100644 --- a/sycl/include/sycl/builtins.hpp +++ b/sycl/include/sycl/builtins.hpp @@ -1614,8 +1614,7 @@ any(T x) __NOEXC { // int any (vigeninteger x) template -detail::enable_if_t::value, detail::anyall_ret_t> -any(T x) __NOEXC { +detail::enable_if_t::value, int> any(T x) __NOEXC { return detail::rel_sign_bit_test_ret_t( __sycl_std::__invoke_Any>( detail::rel_sign_bit_test_arg_t(x))); @@ -1630,8 +1629,7 @@ all(T x) __NOEXC { // int all (vigeninteger x) template -detail::enable_if_t::value, detail::anyall_ret_t> -all(T x) __NOEXC { +detail::enable_if_t::value, int> all(T x) __NOEXC { return detail::rel_sign_bit_test_ret_t( __sycl_std::__invoke_All>( detail::rel_sign_bit_test_arg_t(x))); diff --git a/sycl/test/basic_tests/relational_builtins.cpp b/sycl/test/basic_tests/relational_builtins.cpp index 88b97d3980412..33f3ddb671d54 100644 --- a/sycl/test/basic_tests/relational_builtins.cpp +++ b/sycl/test/basic_tests/relational_builtins.cpp @@ -280,28 +280,28 @@ void foo() { // any CHECK(int, bool, any, int16_t) - CHECK(int, bool, any, int16v) + CHECK(int, int, any, int16v) CHECK2020(_, bool, any, int16m) CHECK(int, bool, any, int32_t) - CHECK(int, bool, any, int32v) + CHECK(int, int, any, int32v) CHECK2020(_, bool, any, int32m) CHECK(int, bool, any, int64_t) - CHECK(int, bool, any, int64v) + CHECK(int, int, any, int64v) CHECK2020(_, bool, any, int64m) // all CHECK(int, bool, all, int16_t) - CHECK(int, bool, all, int16v) + CHECK(int, int, all, int16v) CHECK2020(_, bool, all, int16m) CHECK(int, bool, all, int32_t) - CHECK(int, bool, all, int32v) + CHECK(int, int, all, int32v) CHECK2020(_, bool, all, int32m) CHECK(int, bool, all, int64_t) - CHECK(int, bool, all, int64v) + CHECK(int, int, all, int64v) CHECK2020(_, bool, all, int64m) // bitselect From 488c7c9e679451be3de8b32d87391c42fe10940a Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Wed, 22 Mar 2023 14:18:47 +0000 Subject: [PATCH 12/44] [SYCL] Fix weak_object move and copy assignment (#8716) This commit fixes an issue where the copy and move assignment operators of the weak_object class would be implicitly deleted due to them being missing from the base class. Signed-off-by: Larsen, Steffen --- .../sycl/ext/oneapi/weak_object_base.hpp | 3 ++ sycl/unittests/Extensions/WeakObject.cpp | 43 +++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/sycl/include/sycl/ext/oneapi/weak_object_base.hpp b/sycl/include/sycl/ext/oneapi/weak_object_base.hpp index 52877d9d64c9f..7dc10e7e86e1a 100644 --- a/sycl/include/sycl/ext/oneapi/weak_object_base.hpp +++ b/sycl/include/sycl/ext/oneapi/weak_object_base.hpp @@ -33,6 +33,9 @@ template class weak_object_base { weak_object_base(const weak_object_base &Other) noexcept = default; weak_object_base(weak_object_base &&Other) noexcept = default; + weak_object_base &operator=(const weak_object_base &Other) noexcept = default; + weak_object_base &operator=(weak_object_base &&Other) noexcept = default; + void reset() noexcept { MObjWeakPtr.reset(); } void swap(weak_object_base &Other) noexcept { MObjWeakPtr.swap(Other.MObjWeakPtr); diff --git a/sycl/unittests/Extensions/WeakObject.cpp b/sycl/unittests/Extensions/WeakObject.cpp index 2357ab5febf8a..8af9974bdba4a 100644 --- a/sycl/unittests/Extensions/WeakObject.cpp +++ b/sycl/unittests/Extensions/WeakObject.cpp @@ -198,6 +198,39 @@ template struct WeakObjectCheckOwnerLessMap { } }; +template struct WeakObjectCheckCopy { + void operator()(SyclObjT Obj) { + sycl::ext::oneapi::weak_object WeakObj{Obj}; + + sycl::ext::oneapi::weak_object WeakObjCopyCtor{WeakObj}; + sycl::ext::oneapi::weak_object WeakObjCopyAssign = WeakObj; + + EXPECT_FALSE(WeakObjCopyCtor.expired()); + EXPECT_FALSE(WeakObjCopyAssign.expired()); + + EXPECT_TRUE(WeakObjCopyCtor.lock() == Obj); + EXPECT_TRUE(WeakObjCopyAssign.lock() == Obj); + } +}; + +template struct WeakObjectCheckMove { + void operator()(SyclObjT Obj) { + sycl::ext::oneapi::weak_object WeakObj1{Obj}; + sycl::ext::oneapi::weak_object WeakObj2{Obj}; + + sycl::ext::oneapi::weak_object WeakObjMoveCtor{ + std::move(WeakObj1)}; + sycl::ext::oneapi::weak_object WeakObjMoveAssign = + std::move(WeakObj2); + + EXPECT_FALSE(WeakObjMoveCtor.expired()); + EXPECT_FALSE(WeakObjMoveAssign.expired()); + + EXPECT_TRUE(WeakObjMoveCtor.lock() == Obj); + EXPECT_TRUE(WeakObjMoveAssign.lock() == Obj); + } +}; + template