From a34b09c30b2b402fd9d12e495ac473cfcb87cbd0 Mon Sep 17 00:00:00 2001 From: JP Lehr Date: Fri, 23 Jan 2026 05:23:57 -0600 Subject: [PATCH 1/7] [Compiler] Addressing new compiler warnings Clang enables new lifetime warnings in production and we see build errors due to this with the staging compiler. The attributes added in this PR are suggested by the compiler. However, I'm not very familiar with the code base, so the changes may be incorrect. --- include/ck/host_utility/io.hpp | 3 ++- include/ck/library/utility/host_tensor.hpp | 2 +- include/ck/tensor_operation/gpu/device/tensor_layout.hpp | 2 +- include/ck/utility/amd_wave_read_first_lane.hpp | 3 ++- include/ck/utility/pipeline_enum.hpp | 3 ++- include/ck/utility/scheduler_enum.hpp | 3 ++- include/ck/utility/tuple.hpp | 2 +- 7 files changed, 11 insertions(+), 7 deletions(-) diff --git a/include/ck/host_utility/io.hpp b/include/ck/host_utility/io.hpp index db45199b173..50f2d6bac6a 100644 --- a/include/ck/host_utility/io.hpp +++ b/include/ck/host_utility/io.hpp @@ -27,7 +27,8 @@ std::ostream& operator<<(std::ostream& os, const std::array& v) } template -std::ostream& operator<<(std::ostream& os, const TensorDescriptor& desc) +std::ostream& operator<<([[clang::lifetimebound]] std::ostream& os, + const TensorDescriptor& desc) { constexpr index_t nDim = remove_cvref_t::GetNumOfDimension(); diff --git a/include/ck/library/utility/host_tensor.hpp b/include/ck/library/utility/host_tensor.hpp index 1dda0a48639..fbbc6f2109b 100644 --- a/include/ck/library/utility/host_tensor.hpp +++ b/include/ck/library/utility/host_tensor.hpp @@ -26,7 +26,7 @@ namespace ck { template -std::ostream& LogRange(std::ostream& os, Range&& range, std::string delim) +std::ostream& LogRange([[clang::lifetimebound]] std::ostream& os, Range&& range, std::string delim) { bool first = true; for(auto&& v : range) diff --git a/include/ck/tensor_operation/gpu/device/tensor_layout.hpp b/include/ck/tensor_operation/gpu/device/tensor_layout.hpp index 7018bbd251c..4e05ca50b5e 100644 --- a/include/ck/tensor_operation/gpu/device/tensor_layout.hpp +++ b/include/ck/tensor_operation/gpu/device/tensor_layout.hpp @@ -455,7 +455,7 @@ struct G_NDHW : public BaseConvolutionLayout template < typename Layout, typename std::enable_if::value, bool>::type = false> -std::ostream& operator<<(std::ostream& os, const Layout&) +std::ostream& operator<<([[clang::lifetimebound]] std::ostream& os, const Layout&) { os << Layout::name; return os; diff --git a/include/ck/utility/amd_wave_read_first_lane.hpp b/include/ck/utility/amd_wave_read_first_lane.hpp index 44259f0601f..4b64b76cc76 100644 --- a/include/ck/utility/amd_wave_read_first_lane.hpp +++ b/include/ck/utility/amd_wave_read_first_lane.hpp @@ -44,7 +44,8 @@ struct get_carrier<3> // replacement of host std::copy_n() template - __device__ static OutputIterator copy_n(InputIterator from, Size size, OutputIterator to) + __device__ static OutputIterator + copy_n(InputIterator from, Size size, [[clang::lifetimebound]] OutputIterator to) { if(0 < size) { diff --git a/include/ck/utility/pipeline_enum.hpp b/include/ck/utility/pipeline_enum.hpp index 4421386f599..a224011a04f 100644 --- a/include/ck/utility/pipeline_enum.hpp +++ b/include/ck/utility/pipeline_enum.hpp @@ -25,7 +25,8 @@ enum struct PipelineVersion } // namespace ck #if !defined(__HIPCC_RTC__) || !defined(CK_CODE_GEN_RTC) -inline std::ostream& operator<<(std::ostream& os, const ck::PipelineVersion& p) +inline std::ostream& operator<<([[clang::lifetimebound]] std::ostream& os, + const ck::PipelineVersion& p) { switch(p) { diff --git a/include/ck/utility/scheduler_enum.hpp b/include/ck/utility/scheduler_enum.hpp index 0c4bfabaf3d..67c5c3b50a0 100644 --- a/include/ck/utility/scheduler_enum.hpp +++ b/include/ck/utility/scheduler_enum.hpp @@ -70,7 +70,8 @@ enum struct TailNumber } // namespace ck #if !defined(__HIPCC_RTC__) || !defined(CK_CODE_GEN_RTC) -inline std::ostream& operator<<(std::ostream& os, const ck::LoopScheduler& s) +inline std::ostream& operator<<([[clang::lifetimebound]] std::ostream& os, + const ck::LoopScheduler& s) { switch(s) { diff --git a/include/ck/utility/tuple.hpp b/include/ck/utility/tuple.hpp index 16575950307..94585459433 100644 --- a/include/ck/utility/tuple.hpp +++ b/include/ck/utility/tuple.hpp @@ -51,7 +51,7 @@ get_tuple_element_data_reference(const TupleElementKeyData& x) // for write access of tuple element template __host__ __device__ constexpr Data& -get_tuple_element_data_reference(TupleElementKeyData& x) +get_tuple_element_data_reference([[clang::lifetimebound]] TupleElementKeyData& x) { return x.mData; } From dc34993a41148eab763df1ea4a4473d43cac7e17 Mon Sep 17 00:00:00 2001 From: JP Lehr Date: Fri, 23 Jan 2026 16:15:45 -0600 Subject: [PATCH 2/7] Update some more instances --- example/ck_tile/01_fmha/bias.hpp | 2 +- example/ck_tile/01_fmha/mask.hpp | 2 +- include/ck/host_utility/io.hpp | 2 +- include/ck/library/utility/convolution_parameter.hpp | 3 ++- include/ck/library/utility/host_tensor.hpp | 5 +++-- include/ck_tile/core/container/tuple.hpp | 7 ++++--- .../ops/gemm/pipeline/gemm_pipeline_ag_bg_cr_scheduler.hpp | 6 ++++-- 7 files changed, 16 insertions(+), 11 deletions(-) diff --git a/example/ck_tile/01_fmha/bias.hpp b/example/ck_tile/01_fmha/bias.hpp index 33f398cc2a9..b5262043843 100644 --- a/example/ck_tile/01_fmha/bias.hpp +++ b/example/ck_tile/01_fmha/bias.hpp @@ -106,7 +106,7 @@ struct bias_info return info; } - friend std::ostream& operator<<(std::ostream& os, const bias_info& bi) + friend std::ostream& operator<<([[clang::lifetimebound]] std::ostream& os, const bias_info& bi) { bi.serialize(os); return os; diff --git a/example/ck_tile/01_fmha/mask.hpp b/example/ck_tile/01_fmha/mask.hpp index f85b811116b..c780bf7b6bd 100644 --- a/example/ck_tile/01_fmha/mask.hpp +++ b/example/ck_tile/01_fmha/mask.hpp @@ -191,7 +191,7 @@ struct mask_info return area; } - friend std::ostream& operator<<(std::ostream& os, const mask_info& mi) + friend std::ostream& operator<<([[clang::lifetimebound]] std::ostream& os, const mask_info& mi) { mi.serialize(os); return os; diff --git a/include/ck/host_utility/io.hpp b/include/ck/host_utility/io.hpp index 50f2d6bac6a..22d744ff159 100644 --- a/include/ck/host_utility/io.hpp +++ b/include/ck/host_utility/io.hpp @@ -13,7 +13,7 @@ namespace ck { template -std::ostream& operator<<(std::ostream& os, const std::vector& v) +std::ostream& operator<<([[clang::lifetimebound]] std::ostream& os, const std::vector& v) { std::copy(std::begin(v), std::end(v), std::ostream_iterator(os, " ")); return os; diff --git a/include/ck/library/utility/convolution_parameter.hpp b/include/ck/library/utility/convolution_parameter.hpp index 354b1120400..a25002409bd 100644 --- a/include/ck/library/utility/convolution_parameter.hpp +++ b/include/ck/library/utility/convolution_parameter.hpp @@ -110,4 +110,5 @@ ConvParam parse_conv_param(int num_dim_spatial, int arg_idx, char* const argv[]) } // namespace utils } // namespace ck -std::ostream& operator<<(std::ostream& os, const ck::utils::conv::ConvParam& p); +std::ostream& operator<<([[clang::lifetimebound]] std::ostream& os, + const ck::utils::conv::ConvParam& p); diff --git a/include/ck/library/utility/host_tensor.hpp b/include/ck/library/utility/host_tensor.hpp index fbbc6f2109b..b6558ee7f8b 100644 --- a/include/ck/library/utility/host_tensor.hpp +++ b/include/ck/library/utility/host_tensor.hpp @@ -580,8 +580,9 @@ struct HostTensorDescriptor return std::inner_product(iss.begin(), iss.end(), mStrides.begin(), std::size_t{0}); } - friend std::ostream& operator<<(std::ostream& os, const HostTensorDescriptor& desc); - friend std::ostream& operator<<(std::ostream& os, ChosenLayout tag); + friend std::ostream& operator<<([[clang::lifetimebound]] std::ostream& os, + const HostTensorDescriptor& desc); + friend std::ostream& operator<<([[clang::lifetimebound]] std::ostream& os, ChosenLayout tag); private: std::vector mLens; diff --git a/include/ck_tile/core/container/tuple.hpp b/include/ck_tile/core/container/tuple.hpp index 7f8176d5ec3..79a063a7cf0 100644 --- a/include/ck_tile/core/container/tuple.hpp +++ b/include/ck_tile/core/container/tuple.hpp @@ -98,13 +98,14 @@ CK_TILE_HOST_DEVICE constexpr T getv(const tuple_object&) } template -CK_TILE_HOST_DEVICE constexpr const T& getv(const tuple_object& x) +CK_TILE_HOST_DEVICE constexpr const T& +getv([[clang::lifetimebound]] const tuple_object& x) { return x.element; } template -CK_TILE_HOST_DEVICE constexpr T& getv(tuple_object& x) +CK_TILE_HOST_DEVICE constexpr T& getv([[clang::lifetimebound]] tuple_object& x) { return x.element; } @@ -292,7 +293,7 @@ struct tuple : impl::tuple_base, T...> //template CK_TILE_HOST_DEVICE constexpr decltype(auto) get_as(index_t i) const { TP_COM_(); return reinterpret_cast&>(*this).at(i); } template CK_TILE_HOST_DEVICE constexpr decltype(auto) get_as(number) { TP_COM_(); return reinterpret_cast&>(*this).at(number{}); } template CK_TILE_HOST_DEVICE constexpr decltype(auto) get_as(number) const { TP_COM_(); return reinterpret_cast&>(*this).at(number{}); } - + // template CK_TILE_HOST_DEVICE constexpr void set_as(index_t i, const Tx & x) { TP_COM_(); reinterpret_cast&>(*this).at(i) = x; } template CK_TILE_HOST_DEVICE constexpr void set_as(number, const Tx & x) { TP_COM_(); reinterpret_cast&>(*this).at(number{}) = x; } diff --git a/include/ck_tile/ops/gemm/pipeline/gemm_pipeline_ag_bg_cr_scheduler.hpp b/include/ck_tile/ops/gemm/pipeline/gemm_pipeline_ag_bg_cr_scheduler.hpp index 957cf7ab8f3..987704e4336 100644 --- a/include/ck_tile/ops/gemm/pipeline/gemm_pipeline_ag_bg_cr_scheduler.hpp +++ b/include/ck_tile/ops/gemm/pipeline/gemm_pipeline_ag_bg_cr_scheduler.hpp @@ -41,7 +41,8 @@ enum struct TailNumber } // namespace ck_tile -inline std::ostream& operator<<(std::ostream& os, const ck_tile::GemmPipelineScheduler& s) +inline std::ostream& operator<<([[clang::lifetimebound]] std::ostream& os, + const ck_tile::GemmPipelineScheduler& s) { switch(s) { @@ -53,7 +54,8 @@ inline std::ostream& operator<<(std::ostream& os, const ck_tile::GemmPipelineSch return os; } -inline std::ostream& operator<<(std::ostream& os, const ck_tile::TailNumber& s) +inline std::ostream& operator<<([[clang::lifetimebound]] std::ostream& os, + const ck_tile::TailNumber& s) { switch(s) { From d1dece64ad97325161a864c60e2f351634d82a1b Mon Sep 17 00:00:00 2001 From: JP Lehr Date: Thu, 29 Jan 2026 03:59:07 -0600 Subject: [PATCH 3/7] Adds file-level ignores via clang diagnostic pragma The number of instances was large, so I decided to use file-level scope to disable the warning via pragma clang diagnostic ignored. It also showed this warning coming from the gtest dependency. For that, I did add the respective command line flag to the CMake variables. I don't know if this is acceptable or not. --- cmake/gtest.cmake | 1 + example/ck_tile/01_fmha/quant.hpp | 4 +++ include/ck/library/utility/host_tensor.hpp | 5 ++++ include/ck/tensor/static_tensor.hpp | 3 +++ .../multi_index_transform.hpp | 25 +++++++++++++------ .../ck/tensor_description/tensor_adaptor.hpp | 3 ++- .../tensor_description/tensor_descriptor.hpp | 13 +++++++--- .../block/blockwise_gemm_pipeline_xdlops.hpp | 3 +++ .../blockwise_gemm_pipeline_xdlops_base.hpp | 4 +++ .../gpu/block/blockwise_gemm_xdlops.hpp | 3 +++ .../blockwise_gemm_xdlops_skip_b_lds.hpp | 4 +++ .../grid/gridwise_gemm_xdlops_bwd_weight.hpp | 4 +++ include/ck/utility/dtype_vector.hpp | 21 +++++++++------- include/ck/utility/env.hpp | 5 +++- include/ck/utility/static_buffer.hpp | 5 +++- include/ck/utility/tuple.hpp | 5 ++-- .../core/algorithm/coordinate_transform.hpp | 4 +++ include/ck_tile/core/container/map.hpp | 4 +++ include/ck_tile/core/container/tuple.hpp | 4 +++ include/ck_tile/core/numeric/e8m0.hpp | 4 +++ include/ck_tile/core/numeric/pk_fp4.hpp | 4 +++ .../core/tensor/static_distributed_tensor.hpp | 4 +++ .../ck_tile/core/tensor/tensor_adaptor.hpp | 4 +++ .../core/tensor/tensor_adaptor_coordinate.hpp | 4 +++ include/ck_tile/core/tensor/tensor_view.hpp | 4 +++ .../ck_tile/core/tensor/tile_distribution.hpp | 4 +++ include/ck_tile/core/utility/env.hpp | 4 +++ include/ck_tile/core/utility/functional.hpp | 3 +++ include/ck_tile/host/arg_parser.hpp | 4 +++ include/ck_tile/host/host_tensor.hpp | 4 +++ .../gemm_streamk/gemm_streamk_benchmark.hpp | 4 +++ 31 files changed, 142 insertions(+), 25 deletions(-) diff --git a/cmake/gtest.cmake b/cmake/gtest.cmake index 993330f9897..cfb8486643a 100644 --- a/cmake/gtest.cmake +++ b/cmake/gtest.cmake @@ -68,6 +68,7 @@ set(GTEST_CXX_FLAGS -Wno-deprecated -Wno-unsafe-buffer-usage -Wno-float-equal + -Wno-lifetime-safety-intra-tu-suggestions ) if(WIN32) diff --git a/example/ck_tile/01_fmha/quant.hpp b/example/ck_tile/01_fmha/quant.hpp index 59d4ac17073..ee917bd5f05 100644 --- a/example/ck_tile/01_fmha/quant.hpp +++ b/example/ck_tile/01_fmha/quant.hpp @@ -8,6 +8,9 @@ #include "ck_tile/core.hpp" #include "ck_tile/ops/fmha.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + // keep sync with BlockAttentionQuantScaleEnum enum class quant_scale_enum { @@ -51,3 +54,4 @@ struct quant_scale_info return os; } }; +#pragma clang diagnostic pop diff --git a/include/ck/library/utility/host_tensor.hpp b/include/ck/library/utility/host_tensor.hpp index b6558ee7f8b..2e95ee8cf3c 100644 --- a/include/ck/library/utility/host_tensor.hpp +++ b/include/ck/library/utility/host_tensor.hpp @@ -23,6 +23,10 @@ #include "ck/tensor_operation/gpu/device/tensor_layout.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" +#pragma clang diagnostic ignored "-Wlifetime-safety-cross-tu-suggestions" + namespace ck { template @@ -1172,3 +1176,4 @@ struct Tensor }; } // namespace ck +#pragma clang diagnostic pop diff --git a/include/ck/tensor/static_tensor.hpp b/include/ck/tensor/static_tensor.hpp index 529745e3b94..c3f3bd0c916 100644 --- a/include/ck/tensor/static_tensor.hpp +++ b/include/ck/tensor/static_tensor.hpp @@ -4,6 +4,8 @@ #ifndef CK_STATIC_TENSOR_HPP #define CK_STATIC_TENSOR_HPP +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" namespace ck { // StaticTensor for Scalar @@ -270,4 +272,5 @@ __host__ __device__ constexpr auto make_static_tensor(TensorDesc, X invalid_elem } } // namespace ck +#pragma clang diagnostic pop #endif diff --git a/include/ck/tensor_description/multi_index_transform.hpp b/include/ck/tensor_description/multi_index_transform.hpp index 19a47487328..0b533b09256 100644 --- a/include/ck/tensor_description/multi_index_transform.hpp +++ b/include/ck/tensor_description/multi_index_transform.hpp @@ -6,6 +6,9 @@ #include "ck/utility/common_header.hpp" #include "ck/utility/multi_index.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { template @@ -29,7 +32,8 @@ struct PassThrough __host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return 1; } - __host__ __device__ constexpr const auto& GetUpperLengths() const { return up_lengths_; } + __host__ __device__ constexpr const auto& GetUpperLengths() const [[clang::lifetimebound]] + { return up_lengths_; } template __host__ __device__ static constexpr void CalculateLowerIndex(LowIdx& idx_low, @@ -305,7 +309,8 @@ struct RightPad __host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return 1; } - __host__ __device__ constexpr const auto& GetUpperLengths() const { return up_lengths_; } + __host__ __device__ constexpr const auto& GetUpperLengths() const [[clang::lifetimebound]] + { return up_lengths_; } template __host__ __device__ static constexpr void CalculateLowerIndex(LowIdx& idx_low, @@ -403,7 +408,8 @@ struct Embed __host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return NDimUp; } - __host__ __device__ constexpr const auto& GetUpperLengths() const { return up_lengths_; } + __host__ __device__ constexpr const auto& GetUpperLengths() const [[clang::lifetimebound]] + { return up_lengths_; } template __host__ __device__ constexpr void CalculateLowerIndex(LowIdx& idx_low, @@ -1074,7 +1080,8 @@ struct Merge_v2_magic_division __host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return 1; } - __host__ __device__ constexpr const auto& GetUpperLengths() const { return up_lengths_; } + __host__ __device__ constexpr const auto& GetUpperLengths() const [[clang::lifetimebound]] + { return up_lengths_; } template __host__ __device__ constexpr void CalculateLowerIndex(LowIdx& idx_low, @@ -1366,7 +1373,8 @@ struct Merge_v3_division_mod __host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return 1; } - __host__ __device__ constexpr const auto& GetUpperLengths() const { return up_lengths_; } + __host__ __device__ constexpr const auto& GetUpperLengths() const [[clang::lifetimebound]] + { return up_lengths_; } template __host__ __device__ constexpr void CalculateLowerIndex(LowIdx& idx_low, @@ -1480,7 +1488,8 @@ struct UnMerge __host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return NDimUp; } - __host__ __device__ constexpr const auto& GetUpperLengths() const { return up_lengths_; } + __host__ __device__ constexpr const auto& GetUpperLengths() const [[clang::lifetimebound]] + { return up_lengths_; } template __host__ __device__ constexpr void CalculateLowerIndex(LowIdx& idx_low, @@ -1640,7 +1649,8 @@ struct ConvBwdDataImplicitGemmOutTransform __host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return 3; } - __host__ __device__ constexpr const auto& GetUpperLengths() const { return up_lengths_; } + __host__ __device__ constexpr const auto& GetUpperLengths() const [[clang::lifetimebound]] + { return up_lengths_; } template __host__ __device__ constexpr auto CalculateLowerIndexN(const UpIdx& idx_up) const @@ -2236,3 +2246,4 @@ struct Xor } }; } // namespace ck +#pragma clang diagnostic pop diff --git a/include/ck/tensor_description/tensor_adaptor.hpp b/include/ck/tensor_description/tensor_adaptor.hpp index 79c5881d48a..dac29f31ffc 100644 --- a/include/ck/tensor_description/tensor_adaptor.hpp +++ b/include/ck/tensor_description/tensor_adaptor.hpp @@ -23,7 +23,8 @@ struct TensorAdaptor { __host__ __device__ static constexpr index_t GetNumOfTransform() { return Transforms::Size(); } - __host__ __device__ constexpr const auto& GetTransforms() const { return transforms_; } + __host__ __device__ constexpr const auto& GetTransforms() const [[clang::lifetimebound]] + { return transforms_; } __host__ __device__ static constexpr auto GetLowerDimensionHiddenIdss() { diff --git a/include/ck/tensor_description/tensor_descriptor.hpp b/include/ck/tensor_description/tensor_descriptor.hpp index 2437132d114..eab21cf6e1e 100644 --- a/include/ck/tensor_description/tensor_descriptor.hpp +++ b/include/ck/tensor_description/tensor_descriptor.hpp @@ -7,6 +7,8 @@ #include "ck/utility/sequence_helper.hpp" #include "ck/tensor_description/multi_index_transform.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" namespace ck { template @@ -179,7 +181,8 @@ struct TensorDescriptor } // TODO make these private - __host__ __device__ constexpr const auto& GetTransforms() const { return transforms_; } + __host__ __device__ constexpr const auto& GetTransforms() const [[clang::lifetimebound]] + { return transforms_; } __host__ __device__ static constexpr auto GetLowerDimensionIdss() { @@ -253,9 +256,10 @@ struct TensorCoordinate __host__ __device__ constexpr index_t GetOffset() const { return idx_hidden_[Number<0>{}]; } // TODO make these private - __host__ __device__ constexpr const auto& GetHiddenIndex() const { return idx_hidden_; } + __host__ __device__ constexpr const auto& GetHiddenIndex() const [[clang::lifetimebound]] + { return idx_hidden_; } - __host__ __device__ auto& GetHiddenIndex() { return idx_hidden_; } + __host__ __device__ auto& GetHiddenIndex() [[clang::lifetimebound]] { return idx_hidden_; } __host__ __device__ constexpr auto GetVisibleIndex() const { @@ -284,7 +288,7 @@ struct TensorCoordinateStep __host__ __device__ constexpr const auto& GetIndexDiff() const { return GetVisibleIndexDiff(); } // TODO make these private - __host__ __device__ constexpr const auto& GetVisibleIndexDiff() const + __host__ __device__ constexpr const auto& GetVisibleIndexDiff() const [[clang::lifetimebound]] { return idx_diff_visible_; } @@ -613,3 +617,4 @@ using TensorCoordinateStep_t = decltype(make_tensor_coordinate_step( TensorDesc{}, MultiIndex::GetNumOfDimension()>{})); } // namespace ck +#pragma clang diagnostic pop diff --git a/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops.hpp index 81492f0a048..fa0a69ed1f0 100644 --- a/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops.hpp +++ b/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops.hpp @@ -13,6 +13,8 @@ // Prefetech 2 stage // Local prefetch 1 stage +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" namespace ck { template @@ -1031,3 +1033,4 @@ struct BlockwiseGemmXdlops_v2 }; } // namespace ck +#pragma clang diagnostic pop diff --git a/include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops_skip_b_lds.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops_skip_b_lds.hpp index 1dba7f67a13..65a326e3e76 100644 --- a/include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops_skip_b_lds.hpp +++ b/include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops_skip_b_lds.hpp @@ -8,6 +8,9 @@ #include "ck/tensor_operation/gpu/warp/xdlops_gemm.hpp" #include "ck/tensor_description/tensor_adaptor.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { template ()>> __host__ __device__ constexpr vector_type(type v) : data_{v} {} template - __host__ __device__ constexpr const auto& AsType() const + __host__ __device__ constexpr const auto& AsType() const [[clang::lifetimebound]] { static_assert(is_same::value || is_same::value, "Something went wrong, please check src and dst types."); @@ -136,7 +138,7 @@ struct vector_type()>> } template - __host__ __device__ constexpr auto& AsType() + __host__ __device__ constexpr auto& AsType() [[clang::lifetimebound]] { static_assert(is_same::value || is_same::value, "Something went wrong, please check src and dst types."); @@ -248,7 +250,7 @@ struct vector_type()>> __host__ __device__ constexpr vector_type(type v) : data_{v} {} template - __host__ __device__ constexpr const auto& AsType() const + __host__ __device__ constexpr const auto& AsType() const [[clang::lifetimebound]] { static_assert(is_same::value || is_same::value || is_same::value, "Something went wrong, please check src and dst types."); @@ -272,7 +274,7 @@ struct vector_type()>> } template - __host__ __device__ constexpr auto& AsType() + __host__ __device__ constexpr auto& AsType() [[clang::lifetimebound]] { static_assert(is_same::value || is_same::value || is_same::value, "Something went wrong, please check src and dst types."); @@ -583,7 +585,7 @@ struct vector_type()>> } template - __host__ __device__ constexpr auto& AsType() + __host__ __device__ constexpr auto& AsType() [[clang::lifetimebound]] { static_assert(is_same::value || is_same::value || is_same::value || is_same::value, @@ -754,7 +756,7 @@ struct vector_type()>> } template - __host__ __device__ constexpr auto& AsType() + __host__ __device__ constexpr auto& AsType() [[clang::lifetimebound]] { static_assert(is_same::value || is_same::value || is_same::value || is_same::value || @@ -1427,7 +1429,7 @@ struct non_native_vector_base< } template - __host__ __device__ constexpr auto& AsType() + __host__ __device__ constexpr auto& AsType() [[clang::lifetimebound]] { static_assert(is_same_v || is_same_v || is_same_v, "Something went wrong, please check src and dst types."); @@ -1627,7 +1629,7 @@ struct vector_type()>> __host__ __device__ constexpr vector_type(type v) : data_{v} {} template - __host__ __device__ constexpr const auto& AsType() const + __host__ __device__ constexpr const auto& AsType() const [[clang::lifetimebound]] { static_assert(is_same::value || is_same::value || is_same::value, @@ -1797,7 +1799,7 @@ struct vector_type()>> } template - __host__ __device__ constexpr auto& AsType() + __host__ __device__ constexpr auto& AsType() [[clang::lifetimebound]] { static_assert(is_same::value || is_same::value || is_same::value || is_same::value || @@ -2284,3 +2286,4 @@ using pk_i4x4_t = typename vector_type::type; using pk_i4x8_t = typename vector_type::type; } // namespace ck +#pragma clang diagnostic pop diff --git a/include/ck/utility/env.hpp b/include/ck/utility/env.hpp index 0cb0b4caf83..4cabd89e33c 100644 --- a/include/ck/utility/env.hpp +++ b/include/ck/utility/env.hpp @@ -9,6 +9,9 @@ #include #include +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck { namespace internal { template @@ -188,5 +191,5 @@ void UpdateEnvVar(EnvVar, const std::string_view& val) // environment variable to enable logging: // export CK_LOGGING=ON or CK_LOGGING=1 or CK_LOGGING=ENABLED CK_DECLARE_ENV_VAR_BOOL(CK_LOGGING) - +#pragma clang diagnostic pop #endif diff --git a/include/ck/utility/static_buffer.hpp b/include/ck/utility/static_buffer.hpp index d49817eb8f5..7e47da5bf89 100644 --- a/include/ck/utility/static_buffer.hpp +++ b/include/ck/utility/static_buffer.hpp @@ -5,6 +5,8 @@ #include "statically_indexed_array.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" namespace ck { // static buffer for scalar @@ -104,7 +106,7 @@ struct StaticBufferTupleOfVector // Set S // i is offset of S template - __host__ __device__ constexpr S& operator()(Number i) + __host__ __device__ constexpr S& operator()(Number i) [[clang::lifetimebound]] { constexpr auto i_v = i / s_per_v; constexpr auto i_s = i % s_per_v; @@ -195,3 +197,4 @@ __host__ __device__ constexpr auto make_static_buffer(LongNumber) } } // namespace ck +#pragma clang diagnostic pop diff --git a/include/ck/utility/tuple.hpp b/include/ck/utility/tuple.hpp index 94585459433..16cd35e1d64 100644 --- a/include/ck/utility/tuple.hpp +++ b/include/ck/utility/tuple.hpp @@ -106,6 +106,7 @@ struct TupleImpl, Xs...> : TupleElementKeyData __host__ __device__ constexpr auto& GetElementDataByKey(TupleElementKey) + [[clang::lifetimebound]] { return get_tuple_element_data_reference>(*this); } @@ -147,7 +148,7 @@ struct Tuple : detail::TupleImpl - __host__ __device__ constexpr auto& At(Number) + __host__ __device__ constexpr auto& At(Number) [[clang::lifetimebound]] { static_assert(I < base::Size(), "wrong! out of range"); return base::GetElementDataByKey(detail::TupleElementKey{}); @@ -162,7 +163,7 @@ struct Tuple : detail::TupleImpl - __host__ __device__ constexpr auto& operator()(Number i) + __host__ __device__ constexpr auto& operator()(Number i) [[clang::lifetimebound]] { return At(i); } diff --git a/include/ck_tile/core/algorithm/coordinate_transform.hpp b/include/ck_tile/core/algorithm/coordinate_transform.hpp index 732799cef82..30c93b8f005 100644 --- a/include/ck_tile/core/algorithm/coordinate_transform.hpp +++ b/include/ck_tile/core/algorithm/coordinate_transform.hpp @@ -11,6 +11,9 @@ #include "ck_tile/core/utility/magic_div.hpp" #include "ck_tile/core/utility/print.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck_tile { enum struct coord_transform_enum @@ -1776,3 +1779,4 @@ make_indexing_transform_with_adaptor(const UpLength& up_lengths, const IndexingA } } // namespace ck_tile +#pragma clang diagnostic pop diff --git a/include/ck_tile/core/container/map.hpp b/include/ck_tile/core/container/map.hpp index d342235b383..8c861ceeb6a 100644 --- a/include/ck_tile/core/container/map.hpp +++ b/include/ck_tile/core/container/map.hpp @@ -8,6 +8,9 @@ #include "ck_tile/core/container/sequence.hpp" #include "ck_tile/core/container/tuple.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck_tile { // naive map @@ -157,3 +160,4 @@ CK_TILE_HOST_DEVICE static void print(const map& m) } } // namespace ck_tile +#pragma clang diagnostic pop diff --git a/include/ck_tile/core/container/tuple.hpp b/include/ck_tile/core/container/tuple.hpp index 79a063a7cf0..11e7b1e52f3 100644 --- a/include/ck_tile/core/container/tuple.hpp +++ b/include/ck_tile/core/container/tuple.hpp @@ -13,6 +13,9 @@ #include #include +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + #ifndef CK_TILE_TUPLE_IMPL #define CK_TILE_TUPLE_IMPL 1 #endif @@ -865,3 +868,4 @@ struct tuple_element> } \ }() #endif +#pragma clang diagnostic pop diff --git a/include/ck_tile/core/numeric/e8m0.hpp b/include/ck_tile/core/numeric/e8m0.hpp index 41aeb8ffabf..ee125242838 100644 --- a/include/ck_tile/core/numeric/e8m0.hpp +++ b/include/ck_tile/core/numeric/e8m0.hpp @@ -6,6 +6,9 @@ #include "ck_tile/core/config.hpp" #include "ck_tile/core/numeric/mxfp_convert.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck_tile { /** @@ -100,3 +103,4 @@ CK_TILE_HOST_DEVICE constexpr e8m0_bexp_t::operator float() const } } // namespace ck_tile +#pragma clang diagnostic pop diff --git a/include/ck_tile/core/numeric/pk_fp4.hpp b/include/ck_tile/core/numeric/pk_fp4.hpp index cc23ce71a83..309b41675c5 100644 --- a/include/ck_tile/core/numeric/pk_fp4.hpp +++ b/include/ck_tile/core/numeric/pk_fp4.hpp @@ -8,6 +8,9 @@ #include "ck_tile/core/numeric/half.hpp" #include "ck_tile/core/numeric/mxfp_convert.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + #if defined(__gfx950__) #define CK_TILE_FP4_CVT_DEVICE 1 #else @@ -431,3 +434,4 @@ CK_TILE_HOST_DEVICE constexpr fp16x2_t pk_fp4_t::to_fp16x2(float scale) const #endif } // namespace ck_tile +#pragma clang diagnostic pop diff --git a/include/ck_tile/core/tensor/static_distributed_tensor.hpp b/include/ck_tile/core/tensor/static_distributed_tensor.hpp index 10c7587bcb4..bdd81dae07c 100644 --- a/include/ck_tile/core/tensor/static_distributed_tensor.hpp +++ b/include/ck_tile/core/tensor/static_distributed_tensor.hpp @@ -14,6 +14,9 @@ #include "ck_tile/core/tensor/tile_distribution.hpp" #include "ck_tile/core/container/thread_buffer.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck_tile { template @@ -266,3 +269,4 @@ inline constexpr bool is_similiar_distributed_tensor_v = } // namespace detail } // namespace ck_tile +#pragma clang diagnostic pop diff --git a/include/ck_tile/core/tensor/tensor_adaptor.hpp b/include/ck_tile/core/tensor/tensor_adaptor.hpp index 78160b800da..e6cdb66ef9d 100644 --- a/include/ck_tile/core/tensor/tensor_adaptor.hpp +++ b/include/ck_tile/core/tensor/tensor_adaptor.hpp @@ -12,6 +12,9 @@ #include "ck_tile/core/utility/type_traits.hpp" #include "ck_tile/core/numeric/numeric.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck_tile { // Transforms: Tuple @@ -950,3 +953,4 @@ CK_TILE_HOST_DEVICE constexpr auto chain_tensor_adaptors(const X& x, const Xs&.. remove_cvref_t, \ remove_cvref_t>{trans}; \ }() +#pragma clang diagnostic pop diff --git a/include/ck_tile/core/tensor/tensor_adaptor_coordinate.hpp b/include/ck_tile/core/tensor/tensor_adaptor_coordinate.hpp index 2ea76a3814d..6d33bde83e4 100644 --- a/include/ck_tile/core/tensor/tensor_adaptor_coordinate.hpp +++ b/include/ck_tile/core/tensor/tensor_adaptor_coordinate.hpp @@ -14,6 +14,9 @@ #include "ck_tile/core/utility/type_traits.hpp" #include "ck_tile/core/utility/print.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck_tile { template @@ -367,3 +370,4 @@ CK_TILE_HOST_DEVICE void print(const tensor_adaptor_coordinate& coord) detail::CK_PRINT_X_<>{}(coord); } } // namespace ck_tile +#pragma clang diagnostic pop diff --git a/include/ck_tile/core/tensor/tensor_view.hpp b/include/ck_tile/core/tensor/tensor_view.hpp index 837f2b87a6a..833a7f44135 100644 --- a/include/ck_tile/core/tensor/tensor_view.hpp +++ b/include/ck_tile/core/tensor/tensor_view.hpp @@ -14,6 +14,9 @@ #include "ck_tile/core/utility/functional.hpp" #include "ck_tile/core/utility/type_traits.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck_tile { /* @@ -582,3 +585,4 @@ pad_tensor_view(const TensorView& tensor_view, const TileLengths& tile_lengths, } } // namespace ck_tile +#pragma clang diagnostic pop diff --git a/include/ck_tile/core/tensor/tile_distribution.hpp b/include/ck_tile/core/tensor/tile_distribution.hpp index f9c2aba5023..aa5714e5c24 100644 --- a/include/ck_tile/core/tensor/tile_distribution.hpp +++ b/include/ck_tile/core/tensor/tile_distribution.hpp @@ -15,6 +15,9 @@ #include "ck_tile/core/utility/functional.hpp" #include "ck_tile/core/utility/type_traits.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck_tile { template @@ -731,3 +734,4 @@ CK_TILE_HOST_DEVICE void print(const tile_distribution #include +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck_tile { template @@ -206,3 +209,4 @@ void UpdateEnvVar(EnvVar, const std::string_view& val) // environment variable to enable logging: // export CK_TILE_LOGGING=ON or CK_TILE_LOGGING=1 or CK_TILE_LOGGING=ENABLED CK_TILE_DECLARE_ENV_VAR_BOOL(CK_TILE_LOGGING) +#pragma clang diagnostic pop diff --git a/include/ck_tile/core/utility/functional.hpp b/include/ck_tile/core/utility/functional.hpp index 898d21574e5..33ba4a943a9 100644 --- a/include/ck_tile/core/utility/functional.hpp +++ b/include/ck_tile/core/utility/functional.hpp @@ -10,6 +10,8 @@ #include #include +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" namespace ck_tile { namespace detail { @@ -258,3 +260,4 @@ constexpr auto conditional_expr(X&& x, Y&& y) } } // namespace ck_tile +#pragma clang diagnostic pop diff --git a/include/ck_tile/host/arg_parser.hpp b/include/ck_tile/host/arg_parser.hpp index 8c45d2b1755..fee7f7779bb 100644 --- a/include/ck_tile/host/arg_parser.hpp +++ b/include/ck_tile/host/arg_parser.hpp @@ -13,6 +13,9 @@ #include #include +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck_tile { /* * a host side utility, arg parser for, either @@ -234,3 +237,4 @@ class ArgParser std::vector keys; }; } // namespace ck_tile +#pragma clang diagnostic pop diff --git a/include/ck_tile/host/host_tensor.hpp b/include/ck_tile/host/host_tensor.hpp index d26686ec37b..ddeb3ad7812 100644 --- a/include/ck_tile/host/host_tensor.hpp +++ b/include/ck_tile/host/host_tensor.hpp @@ -17,6 +17,9 @@ #include "ck_tile/host/joinable_thread.hpp" #include "ck_tile/host/ranges.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck_tile { template @@ -859,3 +862,4 @@ auto get_default_stride(std::size_t row, return stride; } } // namespace ck_tile +#pragma clang diagnostic pop diff --git a/tile_engine/ops/gemm_streamk/gemm_streamk_benchmark.hpp b/tile_engine/ops/gemm_streamk/gemm_streamk_benchmark.hpp index 45beb0accec..d877f174b2c 100644 --- a/tile_engine/ops/gemm_streamk/gemm_streamk_benchmark.hpp +++ b/tile_engine/ops/gemm_streamk/gemm_streamk_benchmark.hpp @@ -17,6 +17,9 @@ // Data types and Layouts are defined by the generated kernel headers // No hardcoded type definitions here to avoid conflicts +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + enum class Metric { LATENCY = 0, @@ -199,3 +202,4 @@ void gemm_host_reference(int verify, c_m_n_gpu_buf_ref.FromDevice(c_m_n_host_result.data()); } } +#pragma clang diagnostic pop From 1a9b02855dd103bc12982b3ddc92f1c567067982 Mon Sep 17 00:00:00 2001 From: JP Lehr Date: Thu, 29 Jan 2026 10:02:21 -0600 Subject: [PATCH 4/7] This adds the remaining instances For a build on gfx90a. --- cmake/gtest.cmake | 1 + include/ck/wrapper/layout.hpp | 4 ++++ include/ck/wrapper/tensor.hpp | 4 ++++ include/ck_tile/core/arch/mma/amdgcn_mma.hpp | 4 ++++ profiler/src/profiler_operation_registry.hpp | 4 ++++ test/position_embedding/position_embedding.cpp | 4 ++++ 6 files changed, 21 insertions(+) diff --git a/cmake/gtest.cmake b/cmake/gtest.cmake index cfb8486643a..51e0359ab6b 100644 --- a/cmake/gtest.cmake +++ b/cmake/gtest.cmake @@ -69,6 +69,7 @@ set(GTEST_CXX_FLAGS -Wno-unsafe-buffer-usage -Wno-float-equal -Wno-lifetime-safety-intra-tu-suggestions + -Wno-lifetime-safety-cross-tu-suggestions ) if(WIN32) diff --git a/include/ck/wrapper/layout.hpp b/include/ck/wrapper/layout.hpp index 334d5851db0..6d99f4e5e32 100644 --- a/include/ck/wrapper/layout.hpp +++ b/include/ck/wrapper/layout.hpp @@ -5,6 +5,9 @@ #include "ck/wrapper/utils/layout_utils.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + // Disable from doxygen docs generation /// @cond INTERNAL namespace ck { @@ -482,3 +485,4 @@ struct Layout } // namespace wrapper } // namespace ck +#pragma clang diagnostic pop diff --git a/include/ck/wrapper/tensor.hpp b/include/ck/wrapper/tensor.hpp index 9f8278a3578..ed7f2fa23d4 100644 --- a/include/ck/wrapper/tensor.hpp +++ b/include/ck/wrapper/tensor.hpp @@ -7,6 +7,9 @@ #include "utils/tensor_partition.hpp" #include "utils/layout_utils.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + // Disable from doxygen docs generation /// @cond INTERNAL namespace ck { @@ -441,3 +444,4 @@ struct Tensor } // namespace wrapper } // namespace ck +#pragma clang diagnostic pop diff --git a/include/ck_tile/core/arch/mma/amdgcn_mma.hpp b/include/ck_tile/core/arch/mma/amdgcn_mma.hpp index 4c9ef7d6bae..1eef5819bc0 100644 --- a/include/ck_tile/core/arch/mma/amdgcn_mma.hpp +++ b/include/ck_tile/core/arch/mma/amdgcn_mma.hpp @@ -7,6 +7,9 @@ #include "ck_tile/core/numeric/vector_type.hpp" #include "ck_tile/core/utility/ignore.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + namespace ck_tile::core::arch::mma { /** @@ -112,6 +115,7 @@ struct amdgcn_mma }; } // namespace ck_tile::core::arch::mma +#pragma clang diagnostic pop // Include the implementations #include "wmma/wmma.hpp" diff --git a/profiler/src/profiler_operation_registry.hpp b/profiler/src/profiler_operation_registry.hpp index 28674554a19..fd698ee3403 100644 --- a/profiler/src/profiler_operation_registry.hpp +++ b/profiler/src/profiler_operation_registry.hpp @@ -9,6 +9,9 @@ #include #include +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + class ProfilerOperationRegistry final { ProfilerOperationRegistry() = default; @@ -83,3 +86,4 @@ class ProfilerOperationRegistry final ::ProfilerOperationRegistry::GetInstance().Add(name, description, operation) \ _Pragma("clang diagnostic pop") // clang-format on +#pragma clang diagnostic pop diff --git a/test/position_embedding/position_embedding.cpp b/test/position_embedding/position_embedding.cpp index 134d2e5f371..689a7a799a9 100644 --- a/test/position_embedding/position_embedding.cpp +++ b/test/position_embedding/position_embedding.cpp @@ -9,6 +9,9 @@ #include "ck_tile/core.hpp" #include "ck_tile/ops/fmha.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + #ifndef TEST_ALIBI_VERBOSE #define TEST_ALIBI_VERBOSE 0 #endif @@ -213,3 +216,4 @@ int main() // clang-format on return rtn ? 0 : -1; } +#pragma clang diagnostic pop From 673ff17bfb6af5026f69e3fd9073d202b59409af Mon Sep 17 00:00:00 2001 From: illsilin_amdeng Date: Thu, 29 Jan 2026 09:32:55 -0800 Subject: [PATCH 5/7] fix clang format --- .../multi_index_transform.hpp | 28 ++++++++++++++----- .../ck/tensor_description/tensor_adaptor.hpp | 4 ++- .../tensor_description/tensor_descriptor.hpp | 8 ++++-- 3 files changed, 30 insertions(+), 10 deletions(-) diff --git a/include/ck/tensor_description/multi_index_transform.hpp b/include/ck/tensor_description/multi_index_transform.hpp index 0b533b09256..5a6c335b2ca 100644 --- a/include/ck/tensor_description/multi_index_transform.hpp +++ b/include/ck/tensor_description/multi_index_transform.hpp @@ -33,7 +33,9 @@ struct PassThrough __host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return 1; } __host__ __device__ constexpr const auto& GetUpperLengths() const [[clang::lifetimebound]] - { return up_lengths_; } + { + return up_lengths_; + } template __host__ __device__ static constexpr void CalculateLowerIndex(LowIdx& idx_low, @@ -310,7 +312,9 @@ struct RightPad __host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return 1; } __host__ __device__ constexpr const auto& GetUpperLengths() const [[clang::lifetimebound]] - { return up_lengths_; } + { + return up_lengths_; + } template __host__ __device__ static constexpr void CalculateLowerIndex(LowIdx& idx_low, @@ -409,7 +413,9 @@ struct Embed __host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return NDimUp; } __host__ __device__ constexpr const auto& GetUpperLengths() const [[clang::lifetimebound]] - { return up_lengths_; } + { + return up_lengths_; + } template __host__ __device__ constexpr void CalculateLowerIndex(LowIdx& idx_low, @@ -1081,7 +1087,9 @@ struct Merge_v2_magic_division __host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return 1; } __host__ __device__ constexpr const auto& GetUpperLengths() const [[clang::lifetimebound]] - { return up_lengths_; } + { + return up_lengths_; + } template __host__ __device__ constexpr void CalculateLowerIndex(LowIdx& idx_low, @@ -1374,7 +1382,9 @@ struct Merge_v3_division_mod __host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return 1; } __host__ __device__ constexpr const auto& GetUpperLengths() const [[clang::lifetimebound]] - { return up_lengths_; } + { + return up_lengths_; + } template __host__ __device__ constexpr void CalculateLowerIndex(LowIdx& idx_low, @@ -1489,7 +1499,9 @@ struct UnMerge __host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return NDimUp; } __host__ __device__ constexpr const auto& GetUpperLengths() const [[clang::lifetimebound]] - { return up_lengths_; } + { + return up_lengths_; + } template __host__ __device__ constexpr void CalculateLowerIndex(LowIdx& idx_low, @@ -1650,7 +1662,9 @@ struct ConvBwdDataImplicitGemmOutTransform __host__ __device__ static constexpr index_t GetNumOfUpperDimension() { return 3; } __host__ __device__ constexpr const auto& GetUpperLengths() const [[clang::lifetimebound]] - { return up_lengths_; } + { + return up_lengths_; + } template __host__ __device__ constexpr auto CalculateLowerIndexN(const UpIdx& idx_up) const diff --git a/include/ck/tensor_description/tensor_adaptor.hpp b/include/ck/tensor_description/tensor_adaptor.hpp index dac29f31ffc..ee8c7ed71bf 100644 --- a/include/ck/tensor_description/tensor_adaptor.hpp +++ b/include/ck/tensor_description/tensor_adaptor.hpp @@ -24,7 +24,9 @@ struct TensorAdaptor __host__ __device__ static constexpr index_t GetNumOfTransform() { return Transforms::Size(); } __host__ __device__ constexpr const auto& GetTransforms() const [[clang::lifetimebound]] - { return transforms_; } + { + return transforms_; + } __host__ __device__ static constexpr auto GetLowerDimensionHiddenIdss() { diff --git a/include/ck/tensor_description/tensor_descriptor.hpp b/include/ck/tensor_description/tensor_descriptor.hpp index eab21cf6e1e..a237c4219dc 100644 --- a/include/ck/tensor_description/tensor_descriptor.hpp +++ b/include/ck/tensor_description/tensor_descriptor.hpp @@ -182,7 +182,9 @@ struct TensorDescriptor // TODO make these private __host__ __device__ constexpr const auto& GetTransforms() const [[clang::lifetimebound]] - { return transforms_; } + { + return transforms_; + } __host__ __device__ static constexpr auto GetLowerDimensionIdss() { @@ -257,7 +259,9 @@ struct TensorCoordinate // TODO make these private __host__ __device__ constexpr const auto& GetHiddenIndex() const [[clang::lifetimebound]] - { return idx_hidden_; } + { + return idx_hidden_; + } __host__ __device__ auto& GetHiddenIndex() [[clang::lifetimebound]] { return idx_hidden_; } From d677cdef3b4acf5c7c0dd319cd73d20033a43a5d Mon Sep 17 00:00:00 2001 From: JP Lehr Date: Thu, 29 Jan 2026 17:00:19 -0600 Subject: [PATCH 6/7] Adding couple more instances from gfx1200 build --- .../gpu/block/blockwise_gemm_pipeline_wmmaops_base.hpp | 3 +++ include/ck/tensor_operation/gpu/block/blockwise_gemm_wmma.hpp | 4 ++++ tile_engine/ops/gemm/gemm_universal/gemm_benchmark.hpp | 3 +++ 3 files changed, 10 insertions(+) diff --git a/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_wmmaops_base.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_wmmaops_base.hpp index f831c0f6cf8..e41cf8c82d6 100644 --- a/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_wmmaops_base.hpp +++ b/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_wmmaops_base.hpp @@ -10,6 +10,8 @@ #include "ck/tensor_operation/gpu/warp/wmma_gemm.hpp" #include "ck/tensor_description/tensor_adaptor.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" namespace ck { template Date: Sat, 31 Jan 2026 03:41:50 -0600 Subject: [PATCH 7/7] Fixed another few instances --- tile_engine/ops/gemm/gemm_multi_d/gemm_multi_d_benchmark.hpp | 4 ++++ .../ops/gemm/gemm_preshuffle/gemm_preshuffle_benchmark.hpp | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/tile_engine/ops/gemm/gemm_multi_d/gemm_multi_d_benchmark.hpp b/tile_engine/ops/gemm/gemm_multi_d/gemm_multi_d_benchmark.hpp index f8c196e32af..b0d8445c16f 100644 --- a/tile_engine/ops/gemm/gemm_multi_d/gemm_multi_d_benchmark.hpp +++ b/tile_engine/ops/gemm/gemm_multi_d/gemm_multi_d_benchmark.hpp @@ -13,6 +13,9 @@ #include "ck_tile/host.hpp" #include "gemm_multi_d_common.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-seggestions" + // Data types and Layouts are defined by the generated kernel headers // No hardcoded type definitions here to avoid conflicts @@ -230,3 +233,4 @@ void gemm_multi_d_host_reference(int verify, a_m_k, b_k_n, {d0_m_n, d1_m_n}, c_m_n_host_result); } } +#pragma clang diagnostic pop diff --git a/tile_engine/ops/gemm/gemm_preshuffle/gemm_preshuffle_benchmark.hpp b/tile_engine/ops/gemm/gemm_preshuffle/gemm_preshuffle_benchmark.hpp index 748fe581d35..41ccc4a01bb 100644 --- a/tile_engine/ops/gemm/gemm_preshuffle/gemm_preshuffle_benchmark.hpp +++ b/tile_engine/ops/gemm/gemm_preshuffle/gemm_preshuffle_benchmark.hpp @@ -7,6 +7,9 @@ #include "ck_tile/host.hpp" #include "gemm_preshuffle_common.hpp" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlifetime-safety-intra-tu-suggestions" + //[TODO] Move parts of this File to commons enum class Metric { @@ -234,3 +237,4 @@ void gemm_host_reference(int verify, c_m_n_gpu_buf_ref.FromDevice(c_m_n_ref.data()); } } +#pragma clang diagnostic pop