diff --git a/src/shammodels/ramses/include/shammodels/ramses/modules/AMRGridRefinementHandler.hpp b/src/shammodels/ramses/include/shammodels/ramses/modules/AMRGridRefinementHandler.hpp index bab7476bb7..49d1e986b9 100644 --- a/src/shammodels/ramses/include/shammodels/ramses/modules/AMRGridRefinementHandler.hpp +++ b/src/shammodels/ramses/include/shammodels/ramses/modules/AMRGridRefinementHandler.hpp @@ -42,6 +42,7 @@ namespace shammodels::basegodunov::modules { using AMRBlock = typename Config::AMRBlock; using BlockCoord = shamrock::amr::AMRBlockCoord; using OrientedAMRGraph = OrientedAMRGraph; + using TgridUint = typename std::make_unsigned>::type; ShamrockCtx &context; Config &solver_config; @@ -53,6 +54,19 @@ namespace shammodels::basegodunov::modules { void update_refinement(); private: + /** + * @brief build histrogram for amr block levels + */ + void amr_block_levels_histogram( + const sham::DeviceScheduler_ptr &sched, + const sham::DeviceBuffer &amr_levels, + const u32 nb_levels, + sham::DeviceBuffer &block_count_per_level, + sham::DeviceBuffer &block_reordered_indx_map, + const u32 len, + TgridUint level_min, + TgridUint level_max); + /** * @brief Generate the list of blocks that need to be refined or derefined. * diff --git a/src/shammodels/ramses/src/modules/AMRGridRefinementHandler.cpp b/src/shammodels/ramses/src/modules/AMRGridRefinementHandler.cpp index d43f70b7c9..97568f8053 100644 --- a/src/shammodels/ramses/src/modules/AMRGridRefinementHandler.cpp +++ b/src/shammodels/ramses/src/modules/AMRGridRefinementHandler.cpp @@ -16,10 +16,91 @@ #include "shammodels/ramses/modules/AMRGridRefinementHandler.hpp" #include "shamalgs/details/algorithm/algorithm.hpp" +#include "shamalgs/details/numeric/numeric.hpp" +#include "shambackends/DeviceBuffer.hpp" #include "shamcomm/logs.hpp" #include "shammodels/ramses/modules/AMRSortBlocks.hpp" #include +/** + * @brief build histrogram for amr block levels + * @tparam Tvec + * @tparam TgridVec + * @param sched pointer to device scheduler + * @param amr_levels block's amr level + * @param nb_levels number of levels from level_min to level_max + * @param block_count_per_level buffer of levels ordered from level_min to level_max + * @param block_reordered_indx_map + */ +template +void shammodels::basegodunov::modules::AMRGridRefinementHandler:: + amr_block_levels_histogram( + const sham::DeviceScheduler_ptr &sched, + const sham::DeviceBuffer &amr_levels, + const u32 nb_levels, + sham::DeviceBuffer &block_count_per_level, + sham::DeviceBuffer &block_reordered_indx_map, + const u32 len, + TgridUint level_min, + TgridUint level_max) { + + SHAM_ASSERT(amr_levels.get_size() == len); + SHAM_ASSERT(block_count_per_level.get_size() == nb_levels); + + // compute histogramm to count the number of blocks per level + block_count_per_level.fill(0); + auto &q1 = shambase::get_check_ref(sched).get_queue(); + sham::kernel_call( + q1, + sham::MultiRef{amr_levels}, + sham::MultiRef{block_count_per_level}, + len, + [](u32 i, const TgridUint *__restrict amr_levels, u32 *__restrict block_count_per_level) { + auto cur_level = amr_levels[i]; + sycl::atomic_ref< + u32, + sycl::memory_order::relaxed, + sycl::memory_scope::system, + sycl::access::address_space::global_space> + bin_count(block_count_per_level[cur_level]); + bin_count++; + }); + // computes offsets for each level + sham::DeviceBuffer offset_buf + = shamalgs::numeric::scan_exclusive(sched, block_count_per_level, nb_levels); + + // // build block index map such that from the left to right + // // amr_levl[reordered_blocks] ===> L_min,L_min,L_min...| L1,L1,L1...| .... |L_max L_max L_max + block_reordered_indx_map.fill(0); + sham::DeviceBuffer idx_counter_per_lev(nb_levels, sched); + idx_counter_per_lev.fill(0); + auto &q2 = shambase::get_check_ref(sched).get_queue(); + sham::kernel_call( + q2, + sham::MultiRef{amr_levels, block_count_per_level, offset_buf}, + sham::MultiRef{idx_counter_per_lev, block_reordered_indx_map}, + len, + [level_min]( + u32 i, + const TgridUint *__restrict amr_levels, + const u32 *__restrict block_cnt_per_lev, + const u32 *__restrict offsets, + u32 *__restrict idx_counter_per_lev, + u32 *__restrict reordered_blocks) { + auto amr_lev = amr_levels[i]; + sycl::atomic_ref< + u32, + sycl::memory_order::relaxed, + sycl::memory_scope::device, + sycl::access::address_space::global_space> + atomic_cnt_idx_per_lev(idx_counter_per_lev[amr_lev]); + u32 old_loc_pos = atomic_cnt_idx_per_lev.fetch_add(static_cast(1)); + + u32 glob_pos_idx = offsets[i] + old_loc_pos; + reordered_blocks[glob_pos_idx] = i; + }); +} + template template void shammodels::basegodunov::modules::AMRGridRefinementHandler::