From 5d07c4393aea45f023f0124f2010a15e714a434f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20David--Cl=C3=A9ris?= Date: Sun, 15 Mar 2026 13:08:01 +0100 Subject: [PATCH 01/26] cleanup --- examples/benchmarks/sph_weak_scale_test.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/examples/benchmarks/sph_weak_scale_test.py b/examples/benchmarks/sph_weak_scale_test.py index be488c3bd..90bfacbc2 100644 --- a/examples/benchmarks/sph_weak_scale_test.py +++ b/examples/benchmarks/sph_weak_scale_test.py @@ -159,13 +159,24 @@ result_text += f"res_cnts = {res_cnts}\n" result_text += f"step time = {step_time}\n" + dic_out = { + "world_size": shamrock.sys.world_size(), + "rate": res_rate, + "cnt": res_cnt, + "step_time": step_time, + } + # print the system metrics result_text += "system metrics:\n" for key, value in max_rate_system_metrics.items(): result_text += f"{key}: {value} J\n" + dic_out[key] = value for key, value in max_rate_system_metrics.items(): result_text += f"avg power {key} / step time : {value / step_time} W\n" + dic_out[f"power_{key}"] = value / step_time + + result_text += f"dic_out = {dic_out}\n" print("current results:") print(result_text) From de797a610e3beab44d777f2e17e11c9335caa9a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20David--Cl=C3=A9ris?= Date: Sun, 15 Mar 2026 13:11:14 +0100 Subject: [PATCH 02/26] cleanup --- env/machine/argonne/aurora/env_oneapi.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/env/machine/argonne/aurora/env_oneapi.sh b/env/machine/argonne/aurora/env_oneapi.sh index 1fdebde40..c59bc8eef 100644 --- a/env/machine/argonne/aurora/env_oneapi.sh +++ b/env/machine/argonne/aurora/env_oneapi.sh @@ -6,6 +6,8 @@ module load python module load ninja function shamconfigure { + # note that the -g flag is set. In principle there is no impact on the perf, but if you run on + # aurora it is better to enable debug symbols for crash reporting. cmake \ -S $SHAMROCK_DIR \ -B $BUILD_DIR \ @@ -14,10 +16,11 @@ function shamconfigure { -DINTEL_LLVM_PATH=$(dirname $(which icpx))/.. \ -DCMAKE_CXX_COMPILER=$(which icpx) \ -DCMAKE_C_COMPILER=$(which icx) \ - -DCMAKE_CXX_FLAGS="-fsycl -fp-model=precise" \ + -DCMAKE_CXX_FLAGS="-g -fsycl -fp-model=precise" \ -DCMAKE_EXE_LINKER_FLAGS="-Wl,--copy-dt-needed-entries" \ -DCMAKE_BUILD_TYPE="${SHAMROCK_BUILD_TYPE}" \ -DBUILD_TEST=Yes \ + -DSHAMROCK_USE_CPPTRACE=Yes \ "${CMAKE_OPT[@]}" } From b161128b7ac5a31e6fc4b7df759db6d11dcae743 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20David--Cl=C3=A9ris?= Date: Sun, 15 Mar 2026 22:35:37 +0100 Subject: [PATCH 03/26] better ? --- src/shamsys/src/system_metrics.cpp | 36 +++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/src/shamsys/src/system_metrics.cpp b/src/shamsys/src/system_metrics.cpp index aefcfd60d..ee4437a9a 100644 --- a/src/shamsys/src/system_metrics.cpp +++ b/src/shamsys/src/system_metrics.cpp @@ -16,6 +16,7 @@ #include "shambase/aliases_int.hpp" #include "shambase/popen.hpp" #include "shamcomm/local_rank.hpp" +#include "shamcomm/wrapper.hpp" #include "shamsys/system_metrics.hpp" #include @@ -24,35 +25,47 @@ namespace shamsys { class AuroraSystemMetricReporter : public ISystemMetricReporter { public: std::optional get_rank_energy_consummed() override { + shamcomm::mpi::Barrier(MPI_COMM_WORLD); + std::optional ret = std::nullopt; if (shamcomm::is_main_node_rank()) { std::string output = shambase::popen_fetch_output("geopmread BOARD_ENERGY board 0"); - return std::stoull(output.c_str()); + ret = std::stoull(output.c_str()); } - return std::nullopt; + shamcomm::mpi::Barrier(MPI_COMM_WORLD); + return ret; } std::optional get_gpu_energy_consummed() override { + shamcomm::mpi::Barrier(MPI_COMM_WORLD); + std::optional ret = std::nullopt; if (shamcomm::is_main_node_rank()) { std::string output = shambase::popen_fetch_output("geopmread GPU_ENERGY board 0"); - return std::stoull(output.c_str()); + ret = std::stoull(output.c_str()); } - return std::nullopt; + shamcomm::mpi::Barrier(MPI_COMM_WORLD); + return ret; } std::optional get_cpu_energy_consummed() override { + shamcomm::mpi::Barrier(MPI_COMM_WORLD); + std::optional ret = std::nullopt; if (shamcomm::is_main_node_rank()) { std::string output = shambase::popen_fetch_output("geopmread CPU_ENERGY board 0"); - return std::stoull(output.c_str()); + ret = std::stoull(output.c_str()); } - return std::nullopt; + shamcomm::mpi::Barrier(MPI_COMM_WORLD); + return ret; } std::optional get_dram_energy_consummed() override { + shamcomm::mpi::Barrier(MPI_COMM_WORLD); + std::optional ret = std::nullopt; if (shamcomm::is_main_node_rank()) { std::string output = shambase::popen_fetch_output("geopmread DRAM_ENERGY board 0"); - return std::stoull(output.c_str()); + ret = std::stoull(output.c_str()); } - return std::nullopt; + shamcomm::mpi::Barrier(MPI_COMM_WORLD); + return ret; } bool support_rank_energy_consummed() override { return true; } @@ -64,12 +77,15 @@ namespace shamsys { class IntelRAPLSystemMetricReport : public ISystemMetricReporter { public: std::optional get_rank_energy_consummed() override { + shamcomm::mpi::Barrier(MPI_COMM_WORLD); + std::optional ret = std::nullopt; if (shamcomm::is_main_node_rank()) { std::string output = shambase::popen_fetch_output( "cat /sys/class/powercap/intel-rapl:0/energy_uj"); - return f64(std::stoull(output.c_str())) * 1e-6; + ret = f64(std::stoull(output.c_str())) * 1e-6; } - return std::nullopt; + shamcomm::mpi::Barrier(MPI_COMM_WORLD); + return ret; } std::optional get_gpu_energy_consummed() override { return std::nullopt; } From 58b4695751b780529cdeaad20c00f5e49dba87b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20David--Cl=C3=A9ris?= Date: Sun, 15 Mar 2026 23:08:07 +0100 Subject: [PATCH 04/26] better reporting ? --- src/shammodels/sph/src/Solver.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/shammodels/sph/src/Solver.cpp b/src/shammodels/sph/src/Solver.cpp index d2e042373..b6467de41 100644 --- a/src/shammodels/sph/src/Solver.cpp +++ b/src/shammodels/sph/src/Solver.cpp @@ -1579,10 +1579,12 @@ void shammodels::sph::Solver::update_sync_load_values() { template class Kern> shammodels::sph::TimestepLog shammodels::sph::Solver::evolve_once() { - sham::MemPerfInfos mem_perf_infos_start = sham::details::get_mem_perf_info(); - f64 mpi_timer_start = shamcomm::mpi::get_timer("total"); + // has to be first since there is a barrier that may mess the other timers shamsys::SystemMetrics system_metrics_start = shamsys::get_system_metrics(); + sham::MemPerfInfos mem_perf_infos_start = sham::details::get_mem_perf_info(); + f64 mpi_timer_start = shamcomm::mpi::get_timer("total"); + Tscal t_current = solver_config.get_time(); Tscal dt = solver_config.get_dt_sph(); From ff5a902e22f4f6cafbaf0a56bbedc08b43f5f2fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20David--Cl=C3=A9ris?= Date: Sun, 15 Mar 2026 23:37:48 +0100 Subject: [PATCH 05/26] better reporting ? --- examples/benchmarks/sph_weak_scale_test.py | 11 +++--- src/shammodels/common/src/timestep_report.cpp | 27 +++++++++----- src/shammodels/sph/src/Solver.cpp | 6 ++-- src/shammodels/sph/src/SolverLog.cpp | 5 +++ src/shammodels/sph/src/pySPHModel.cpp | 1 + .../include/shamsys/system_metrics.hpp | 16 +++++++-- src/shamsys/src/system_metrics.cpp | 36 ++++++------------- 7 files changed, 60 insertions(+), 42 deletions(-) diff --git a/examples/benchmarks/sph_weak_scale_test.py b/examples/benchmarks/sph_weak_scale_test.py index 90bfacbc2..9278ae739 100644 --- a/examples/benchmarks/sph_weak_scale_test.py +++ b/examples/benchmarks/sph_weak_scale_test.py @@ -167,14 +167,17 @@ } # print the system metrics + metrics_duration = max_rate_system_metrics["duration"] result_text += "system metrics:\n" for key, value in max_rate_system_metrics.items(): - result_text += f"{key}: {value} J\n" - dic_out[key] = value + if not key == "duration": + result_text += f"{key}: {value} J\n" + dic_out[key] = value for key, value in max_rate_system_metrics.items(): - result_text += f"avg power {key} / step time : {value / step_time} W\n" - dic_out[f"power_{key}"] = value / step_time + if not key == "duration": + result_text += f"avg power {key} / step time : {value / metrics_duration} W\n" + dic_out[f"power_{key}"] = value / metrics_duration result_text += f"dic_out = {dic_out}\n" diff --git a/src/shammodels/common/src/timestep_report.cpp b/src/shammodels/common/src/timestep_report.cpp index 2d9ede830..a617e777f 100644 --- a/src/shammodels/common/src/timestep_report.cpp +++ b/src/shammodels/common/src/timestep_report.cpp @@ -69,6 +69,9 @@ std::string shammodels::report_perf_timestep( = optional_gather_power(system_metrics.cpu_energy_consummed); std::vector dram_energy_consummed_all_ranks = optional_gather_power(system_metrics.dram_energy_consummed); + std::vector metric_time_all_ranks + = (report_power_usage) ? shamalgs::collective::gather(system_metrics.wall_time) + : std::vector{}; if (shamcomm::world_rank() != 0) { return ""; @@ -88,6 +91,10 @@ std::string shammodels::report_perf_timestep( = std::accumulate(max_mem_device_all_ranks.begin(), max_mem_device_all_ranks.end(), 0_u64); size_t sum_mem_host_total = std::accumulate(max_mem_host_all_ranks.begin(), max_mem_host_all_ranks.end(), 0_u64); + f64 metric_tmax + = (report_power_usage) + ? *std::max_element(metric_time_all_ranks.begin(), metric_time_all_ranks.end()) + : 0._f64; std::vector rank_power_step_all_ranks; std::vector rank_gpu_power_step_all_ranks; @@ -101,28 +108,32 @@ std::string shammodels::report_perf_timestep( for (u32 i = 0; i < shamcomm::world_size(); i++) { if (rank_energy_consummed_all_ranks[i] > 0._f64) { rank_power_step_all_ranks.push_back( - shambase::format("{:.1f} W", f64(rank_energy_consummed_all_ranks[i]) / max_t)); + shambase::format( + "{:.1f} W", f64(rank_energy_consummed_all_ranks[i]) / metric_tmax)); } else { rank_power_step_all_ranks.push_back("N/A"); } if (gpu_energy_consummed_all_ranks[i] > 0._f64) { rank_gpu_power_step_all_ranks.push_back( - shambase::format("{:.1f} W", f64(gpu_energy_consummed_all_ranks[i]) / max_t)); + shambase::format( + "{:.1f} W", f64(gpu_energy_consummed_all_ranks[i]) / metric_tmax)); } else { rank_gpu_power_step_all_ranks.push_back("N/A"); } if (cpu_energy_consummed_all_ranks[i] > 0._f64) { rank_cpu_power_step_all_ranks.push_back( - shambase::format("{:.1f} W", f64(cpu_energy_consummed_all_ranks[i]) / max_t)); + shambase::format( + "{:.1f} W", f64(cpu_energy_consummed_all_ranks[i]) / metric_tmax)); } else { rank_cpu_power_step_all_ranks.push_back("N/A"); } if (dram_energy_consummed_all_ranks[i] > 0._f64) { rank_dram_power_step_all_ranks.push_back( - shambase::format("{:.1f} W", f64(dram_energy_consummed_all_ranks[i]) / max_t)); + shambase::format( + "{:.1f} W", f64(dram_energy_consummed_all_ranks[i]) / metric_tmax)); } else { rank_dram_power_step_all_ranks.push_back("N/A"); } @@ -135,10 +146,10 @@ std::string shammodels::report_perf_timestep( cpu_energy_consummed_all_ranks.begin(), cpu_energy_consummed_all_ranks.end(), 0._f64); f64 sum_dram_energy_consummed = std::accumulate( dram_energy_consummed_all_ranks.begin(), dram_energy_consummed_all_ranks.end(), 0._f64); - sum_power_step = shambase::format("{:.1e} W", sum_rank_energy_consummed / max_t); - sum_gpu_power_step = shambase::format("{:.1e} W", sum_gpu_energy_consummed / max_t); - sum_cpu_power_step = shambase::format("{:.1e} W", sum_cpu_energy_consummed / max_t); - sum_dram_power_step = shambase::format("{:.1e} W", sum_dram_energy_consummed / max_t); + sum_power_step = shambase::format("{:.1e} W", sum_rank_energy_consummed / metric_tmax); + sum_gpu_power_step = shambase::format("{:.1e} W", sum_gpu_energy_consummed / metric_tmax); + sum_cpu_power_step = shambase::format("{:.1e} W", sum_cpu_energy_consummed / metric_tmax); + sum_dram_power_step = shambase::format("{:.1e} W", sum_dram_energy_consummed / metric_tmax); } u32 cols_count = 9_u32; diff --git a/src/shammodels/sph/src/Solver.cpp b/src/shammodels/sph/src/Solver.cpp index b6467de41..d783cef92 100644 --- a/src/shammodels/sph/src/Solver.cpp +++ b/src/shammodels/sph/src/Solver.cpp @@ -2650,12 +2650,14 @@ shammodels::sph::TimestepLog shammodels::sph::Solver::evolve_once() tstep.end(); - sham::MemPerfInfos mem_perf_infos_end = sham::details::get_mem_perf_info(); + f64 delta_mpi_timer = shamcomm::mpi::get_timer("total") - mpi_timer_start; + sham::MemPerfInfos mem_perf_infos_end = sham::details::get_mem_perf_info(); + + /// must be after the mpi timer to not count the barrier of the system metrics std::optional rank_energy_consummed_end = shamsys::get_rank_energy_consummed(); shamsys::SystemMetrics system_metrics_end = shamsys::get_system_metrics(); shamsys::SystemMetrics system_metrics_delta = system_metrics_end - system_metrics_start; - f64 delta_mpi_timer = shamcomm::mpi::get_timer("total") - mpi_timer_start; f64 t_dev_alloc = (mem_perf_infos_end.time_alloc_device - mem_perf_infos_start.time_alloc_device) + (mem_perf_infos_end.time_free_device - mem_perf_infos_start.time_free_device); diff --git a/src/shammodels/sph/src/SolverLog.cpp b/src/shammodels/sph/src/SolverLog.cpp index 9390574c4..c377163c4 100644 --- a/src/shammodels/sph/src/SolverLog.cpp +++ b/src/shammodels/sph/src/SolverLog.cpp @@ -74,6 +74,8 @@ shamsys::SystemMetrics shammodels::sph::SolverLog::get_last_system_metrics() { = optional_gather_power(last_log.system_metrics.cpu_energy_consummed); std::vector dram_energy_consummed_all_ranks = optional_gather_power(last_log.system_metrics.dram_energy_consummed); + std::vector metric_time_all_ranks + = shamalgs::collective::gather(last_log.system_metrics.wall_time); f64 sum_rank_energy_consummed = std::accumulate( rank_energy_consummed_all_ranks.begin(), rank_energy_consummed_all_ranks.end(), 0._f64); @@ -83,8 +85,11 @@ shamsys::SystemMetrics shammodels::sph::SolverLog::get_last_system_metrics() { cpu_energy_consummed_all_ranks.begin(), cpu_energy_consummed_all_ranks.end(), 0._f64); f64 sum_dram_energy_consummed = std::accumulate( dram_energy_consummed_all_ranks.begin(), dram_energy_consummed_all_ranks.end(), 0._f64); + f64 metric_time_all + = *std::max_element(metric_time_all_ranks.begin(), metric_time_all_ranks.end()); shamsys::SystemMetrics system_metrics; + system_metrics.wall_time = metric_time_all; system_metrics.rank_energy_consummed = (shamsys::support_rank_energy_consummed()) ? sum_rank_energy_consummed : std::optional{}; diff --git a/src/shammodels/sph/src/pySPHModel.cpp b/src/shammodels/sph/src/pySPHModel.cpp index 5b3c07ddf..74da727eb 100644 --- a/src/shammodels/sph/src/pySPHModel.cpp +++ b/src/shammodels/sph/src/pySPHModel.cpp @@ -1132,6 +1132,7 @@ void add_instance(py::module &m, std::string name_config, std::string name_model [&](T &self) { auto system_metrics = self.solver.solve_logs.get_last_system_metrics(); py::dict ret; + ret["duration"] = system_metrics.wall_time; if (system_metrics.rank_energy_consummed.has_value()) { ret["rank_energy_consummed"] = system_metrics.rank_energy_consummed.value(); } diff --git a/src/shamsys/include/shamsys/system_metrics.hpp b/src/shamsys/include/shamsys/system_metrics.hpp index a900fc293..4abd1f836 100644 --- a/src/shamsys/include/shamsys/system_metrics.hpp +++ b/src/shamsys/include/shamsys/system_metrics.hpp @@ -17,7 +17,9 @@ #include "shambase/aliases_float.hpp" #include "shambase/memory.hpp" +#include "shambase/stacktrace.hpp" #include "shamcmdopt/env.hpp" +#include "shamcomm/wrapper.hpp" #include #include @@ -76,18 +78,27 @@ namespace shamsys { } struct SystemMetrics { + f64 wall_time; std::optional rank_energy_consummed; std::optional gpu_energy_consummed; std::optional cpu_energy_consummed; std::optional dram_energy_consummed; }; - inline SystemMetrics get_system_metrics() { - return SystemMetrics{ + inline SystemMetrics get_system_metrics(bool barrier = true) { + if (barrier) { + shamcomm::mpi::Barrier(MPI_COMM_WORLD); + } + auto ret = SystemMetrics{ + shambase::details::get_wtime(), get_rank_energy_consummed(), get_gpu_energy_consummed(), get_cpu_energy_consummed(), get_dram_energy_consummed()}; + if (barrier) { + shamcomm::mpi::Barrier(MPI_COMM_WORLD); + } + return ret; } inline SystemMetrics operator-(const SystemMetrics &lhs, const SystemMetrics &rhs) { @@ -98,6 +109,7 @@ namespace shamsys { : std::nullopt; }; return SystemMetrics{ + lhs.wall_time - rhs.wall_time, optional_sub(lhs.rank_energy_consummed, rhs.rank_energy_consummed), optional_sub(lhs.gpu_energy_consummed, rhs.gpu_energy_consummed), optional_sub(lhs.cpu_energy_consummed, rhs.cpu_energy_consummed), diff --git a/src/shamsys/src/system_metrics.cpp b/src/shamsys/src/system_metrics.cpp index ee4437a9a..aefcfd60d 100644 --- a/src/shamsys/src/system_metrics.cpp +++ b/src/shamsys/src/system_metrics.cpp @@ -16,7 +16,6 @@ #include "shambase/aliases_int.hpp" #include "shambase/popen.hpp" #include "shamcomm/local_rank.hpp" -#include "shamcomm/wrapper.hpp" #include "shamsys/system_metrics.hpp" #include @@ -25,47 +24,35 @@ namespace shamsys { class AuroraSystemMetricReporter : public ISystemMetricReporter { public: std::optional get_rank_energy_consummed() override { - shamcomm::mpi::Barrier(MPI_COMM_WORLD); - std::optional ret = std::nullopt; if (shamcomm::is_main_node_rank()) { std::string output = shambase::popen_fetch_output("geopmread BOARD_ENERGY board 0"); - ret = std::stoull(output.c_str()); + return std::stoull(output.c_str()); } - shamcomm::mpi::Barrier(MPI_COMM_WORLD); - return ret; + return std::nullopt; } std::optional get_gpu_energy_consummed() override { - shamcomm::mpi::Barrier(MPI_COMM_WORLD); - std::optional ret = std::nullopt; if (shamcomm::is_main_node_rank()) { std::string output = shambase::popen_fetch_output("geopmread GPU_ENERGY board 0"); - ret = std::stoull(output.c_str()); + return std::stoull(output.c_str()); } - shamcomm::mpi::Barrier(MPI_COMM_WORLD); - return ret; + return std::nullopt; } std::optional get_cpu_energy_consummed() override { - shamcomm::mpi::Barrier(MPI_COMM_WORLD); - std::optional ret = std::nullopt; if (shamcomm::is_main_node_rank()) { std::string output = shambase::popen_fetch_output("geopmread CPU_ENERGY board 0"); - ret = std::stoull(output.c_str()); + return std::stoull(output.c_str()); } - shamcomm::mpi::Barrier(MPI_COMM_WORLD); - return ret; + return std::nullopt; } std::optional get_dram_energy_consummed() override { - shamcomm::mpi::Barrier(MPI_COMM_WORLD); - std::optional ret = std::nullopt; if (shamcomm::is_main_node_rank()) { std::string output = shambase::popen_fetch_output("geopmread DRAM_ENERGY board 0"); - ret = std::stoull(output.c_str()); + return std::stoull(output.c_str()); } - shamcomm::mpi::Barrier(MPI_COMM_WORLD); - return ret; + return std::nullopt; } bool support_rank_energy_consummed() override { return true; } @@ -77,15 +64,12 @@ namespace shamsys { class IntelRAPLSystemMetricReport : public ISystemMetricReporter { public: std::optional get_rank_energy_consummed() override { - shamcomm::mpi::Barrier(MPI_COMM_WORLD); - std::optional ret = std::nullopt; if (shamcomm::is_main_node_rank()) { std::string output = shambase::popen_fetch_output( "cat /sys/class/powercap/intel-rapl:0/energy_uj"); - ret = f64(std::stoull(output.c_str())) * 1e-6; + return f64(std::stoull(output.c_str())) * 1e-6; } - shamcomm::mpi::Barrier(MPI_COMM_WORLD); - return ret; + return std::nullopt; } std::optional get_gpu_energy_consummed() override { return std::nullopt; } From e0cd91d1dfd56ccace69e22623658307ca067cd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20David--Cl=C3=A9ris?= Date: Mon, 16 Mar 2026 00:20:41 +0100 Subject: [PATCH 06/26] streamline that shit --- src/shammodels/common/src/timestep_report.cpp | 97 ++--------- src/shammodels/sph/src/SolverLog.cpp | 47 +----- .../include/shamsys/system_metrics.hpp | 37 +++-- src/shamsys/src/system_metrics.cpp | 155 ++++++++++++++++++ 4 files changed, 194 insertions(+), 142 deletions(-) diff --git a/src/shammodels/common/src/timestep_report.cpp b/src/shammodels/common/src/timestep_report.cpp index a617e777f..23db6d3ec 100644 --- a/src/shammodels/common/src/timestep_report.cpp +++ b/src/shammodels/common/src/timestep_report.cpp @@ -56,22 +56,8 @@ std::string shammodels::report_perf_timestep( std::vector max_mem_device_all_ranks = shamalgs::collective::gather(max_mem_device); std::vector max_mem_host_all_ranks = shamalgs::collective::gather(max_mem_host); - auto optional_gather_power = [&](const std::optional &value) -> std::vector { - return (report_power_usage) ? shamalgs::collective::gather(value ? value.value() : 0._f64) - : std::vector{}; - }; - - std::vector rank_energy_consummed_all_ranks - = optional_gather_power(system_metrics.rank_energy_consummed); - std::vector gpu_energy_consummed_all_ranks - = optional_gather_power(system_metrics.gpu_energy_consummed); - std::vector cpu_energy_consummed_all_ranks - = optional_gather_power(system_metrics.cpu_energy_consummed); - std::vector dram_energy_consummed_all_ranks - = optional_gather_power(system_metrics.dram_energy_consummed); - std::vector metric_time_all_ranks - = (report_power_usage) ? shamalgs::collective::gather(system_metrics.wall_time) - : std::vector{}; + auto rank_metrics = (report_power_usage) ? shamsys::gather_rank_metrics(system_metrics) + : std::vector{}; if (shamcomm::world_rank() != 0) { return ""; @@ -91,65 +77,16 @@ std::string shammodels::report_perf_timestep( = std::accumulate(max_mem_device_all_ranks.begin(), max_mem_device_all_ranks.end(), 0_u64); size_t sum_mem_host_total = std::accumulate(max_mem_host_all_ranks.begin(), max_mem_host_all_ranks.end(), 0_u64); - f64 metric_tmax - = (report_power_usage) - ? *std::max_element(metric_time_all_ranks.begin(), metric_time_all_ranks.end()) - : 0._f64; - std::vector rank_power_step_all_ranks; - std::vector rank_gpu_power_step_all_ranks; - std::vector rank_cpu_power_step_all_ranks; - std::vector rank_dram_power_step_all_ranks; - std::string sum_power_step; - std::string sum_gpu_power_step; - std::string sum_cpu_power_step; - std::string sum_dram_power_step; - if (report_power_usage) { - for (u32 i = 0; i < shamcomm::world_size(); i++) { - if (rank_energy_consummed_all_ranks[i] > 0._f64) { - rank_power_step_all_ranks.push_back( - shambase::format( - "{:.1f} W", f64(rank_energy_consummed_all_ranks[i]) / metric_tmax)); - } else { - rank_power_step_all_ranks.push_back("N/A"); - } - - if (gpu_energy_consummed_all_ranks[i] > 0._f64) { - rank_gpu_power_step_all_ranks.push_back( - shambase::format( - "{:.1f} W", f64(gpu_energy_consummed_all_ranks[i]) / metric_tmax)); - } else { - rank_gpu_power_step_all_ranks.push_back("N/A"); - } + shamsys::SystemMetrics aggregated_metrics = shamsys::aggregate_rank_metrics(rank_metrics); - if (cpu_energy_consummed_all_ranks[i] > 0._f64) { - rank_cpu_power_step_all_ranks.push_back( - shambase::format( - "{:.1f} W", f64(cpu_energy_consummed_all_ranks[i]) / metric_tmax)); - } else { - rank_cpu_power_step_all_ranks.push_back("N/A"); - } - - if (dram_energy_consummed_all_ranks[i] > 0._f64) { - rank_dram_power_step_all_ranks.push_back( - shambase::format( - "{:.1f} W", f64(dram_energy_consummed_all_ranks[i]) / metric_tmax)); - } else { - rank_dram_power_step_all_ranks.push_back("N/A"); - } + std::vector formatted_rank_metrics{}; + shamsys::FormattedSystemMetrics formatted_aggregated_metrics = {}; + if (report_power_usage) { + for (const auto &metric : rank_metrics) { + formatted_rank_metrics.push_back(shamsys::format_system_metrics(metric)); } - f64 sum_rank_energy_consummed = std::accumulate( - rank_energy_consummed_all_ranks.begin(), rank_energy_consummed_all_ranks.end(), 0._f64); - f64 sum_gpu_energy_consummed = std::accumulate( - gpu_energy_consummed_all_ranks.begin(), gpu_energy_consummed_all_ranks.end(), 0._f64); - f64 sum_cpu_energy_consummed = std::accumulate( - cpu_energy_consummed_all_ranks.begin(), cpu_energy_consummed_all_ranks.end(), 0._f64); - f64 sum_dram_energy_consummed = std::accumulate( - dram_energy_consummed_all_ranks.begin(), dram_energy_consummed_all_ranks.end(), 0._f64); - sum_power_step = shambase::format("{:.1e} W", sum_rank_energy_consummed / metric_tmax); - sum_gpu_power_step = shambase::format("{:.1e} W", sum_gpu_energy_consummed / metric_tmax); - sum_cpu_power_step = shambase::format("{:.1e} W", sum_cpu_energy_consummed / metric_tmax); - sum_dram_power_step = shambase::format("{:.1e} W", sum_dram_energy_consummed / metric_tmax); + formatted_aggregated_metrics = shamsys::format_system_metrics(aggregated_metrics); } u32 cols_count = 9_u32; @@ -217,16 +154,16 @@ std::string shammodels::report_perf_timestep( }; if (report_power_usage) { if (shamsys::support_rank_energy_consummed()) { - row.push_back(rank_power_step_all_ranks[i]); + row.push_back(formatted_rank_metrics[i].rank_power.value_or("N/A")); } if (shamsys::support_gpu_energy_consummed()) { - row.push_back(rank_gpu_power_step_all_ranks[i]); + row.push_back(formatted_rank_metrics[i].gpu_power.value_or("N/A")); } if (shamsys::support_cpu_energy_consummed()) { - row.push_back(rank_cpu_power_step_all_ranks[i]); + row.push_back(formatted_rank_metrics[i].cpu_power.value_or("N/A")); } if (shamsys::support_dram_energy_consummed()) { - row.push_back(rank_dram_power_step_all_ranks[i]); + row.push_back(formatted_rank_metrics[i].dram_power.value_or("N/A")); } } table.add_data(row, Table::right); @@ -265,16 +202,16 @@ std::string shammodels::report_perf_timestep( }; if (report_power_usage) { if (shamsys::support_rank_energy_consummed()) { - all_row.push_back(sum_power_step); + all_row.push_back(formatted_aggregated_metrics.rank_power.value_or("N/A")); } if (shamsys::support_gpu_energy_consummed()) { - all_row.push_back(sum_gpu_power_step); + all_row.push_back(formatted_aggregated_metrics.gpu_power.value_or("N/A")); } if (shamsys::support_cpu_energy_consummed()) { - all_row.push_back(sum_cpu_power_step); + all_row.push_back(formatted_aggregated_metrics.cpu_power.value_or("N/A")); } if (shamsys::support_dram_energy_consummed()) { - all_row.push_back(sum_dram_power_step); + all_row.push_back(formatted_aggregated_metrics.dram_power.value_or("N/A")); } } table.add_data(all_row, Table::right); diff --git a/src/shammodels/sph/src/SolverLog.cpp b/src/shammodels/sph/src/SolverLog.cpp index c377163c4..bc0f6bdc2 100644 --- a/src/shammodels/sph/src/SolverLog.cpp +++ b/src/shammodels/sph/src/SolverLog.cpp @@ -59,49 +59,6 @@ shamsys::SystemMetrics shammodels::sph::SolverLog::get_last_system_metrics() { auto &last_log = step_logs.back(); - bool report_power_usage = shamsys::has_reporter(); - - auto optional_gather_power = [&](const std::optional &value) -> std::vector { - return (report_power_usage) ? shamalgs::collective::gather(value ? value.value() : 0._f64) - : std::vector{}; - }; - - std::vector rank_energy_consummed_all_ranks - = optional_gather_power(last_log.system_metrics.rank_energy_consummed); - std::vector gpu_energy_consummed_all_ranks - = optional_gather_power(last_log.system_metrics.gpu_energy_consummed); - std::vector cpu_energy_consummed_all_ranks - = optional_gather_power(last_log.system_metrics.cpu_energy_consummed); - std::vector dram_energy_consummed_all_ranks - = optional_gather_power(last_log.system_metrics.dram_energy_consummed); - std::vector metric_time_all_ranks - = shamalgs::collective::gather(last_log.system_metrics.wall_time); - - f64 sum_rank_energy_consummed = std::accumulate( - rank_energy_consummed_all_ranks.begin(), rank_energy_consummed_all_ranks.end(), 0._f64); - f64 sum_gpu_energy_consummed = std::accumulate( - gpu_energy_consummed_all_ranks.begin(), gpu_energy_consummed_all_ranks.end(), 0._f64); - f64 sum_cpu_energy_consummed = std::accumulate( - cpu_energy_consummed_all_ranks.begin(), cpu_energy_consummed_all_ranks.end(), 0._f64); - f64 sum_dram_energy_consummed = std::accumulate( - dram_energy_consummed_all_ranks.begin(), dram_energy_consummed_all_ranks.end(), 0._f64); - f64 metric_time_all - = *std::max_element(metric_time_all_ranks.begin(), metric_time_all_ranks.end()); - - shamsys::SystemMetrics system_metrics; - system_metrics.wall_time = metric_time_all; - system_metrics.rank_energy_consummed = (shamsys::support_rank_energy_consummed()) - ? sum_rank_energy_consummed - : std::optional{}; - system_metrics.gpu_energy_consummed = (shamsys::support_gpu_energy_consummed()) - ? sum_gpu_energy_consummed - : std::optional{}; - system_metrics.cpu_energy_consummed = (shamsys::support_cpu_energy_consummed()) - ? sum_cpu_energy_consummed - : std::optional{}; - system_metrics.dram_energy_consummed = (shamsys::support_dram_energy_consummed()) - ? sum_dram_energy_consummed - : std::optional{}; - - return system_metrics; + auto rank_metrics = shamsys::gather_rank_metrics(last_log.system_metrics); + return shamsys::aggregate_rank_metrics(rank_metrics); } diff --git a/src/shamsys/include/shamsys/system_metrics.hpp b/src/shamsys/include/shamsys/system_metrics.hpp index 4abd1f836..a4481e7ed 100644 --- a/src/shamsys/include/shamsys/system_metrics.hpp +++ b/src/shamsys/include/shamsys/system_metrics.hpp @@ -17,9 +17,7 @@ #include "shambase/aliases_float.hpp" #include "shambase/memory.hpp" -#include "shambase/stacktrace.hpp" #include "shamcmdopt/env.hpp" -#include "shamcomm/wrapper.hpp" #include #include @@ -85,21 +83,26 @@ namespace shamsys { std::optional dram_energy_consummed; }; - inline SystemMetrics get_system_metrics(bool barrier = true) { - if (barrier) { - shamcomm::mpi::Barrier(MPI_COMM_WORLD); - } - auto ret = SystemMetrics{ - shambase::details::get_wtime(), - get_rank_energy_consummed(), - get_gpu_energy_consummed(), - get_cpu_energy_consummed(), - get_dram_energy_consummed()}; - if (barrier) { - shamcomm::mpi::Barrier(MPI_COMM_WORLD); - } - return ret; - } + SystemMetrics get_system_metrics(bool barrier = true); + + std::vector gather_rank_metrics(const SystemMetrics &input); + + SystemMetrics aggregate_rank_metrics(const std::vector &input); + + struct FormattedSystemMetrics { + std::string wall_time; + std::optional rank_energy_consummed; + std::optional gpu_energy_consummed; + std::optional cpu_energy_consummed; + std::optional dram_energy_consummed; + std::optional rank_power; + std::optional gpu_power; + std::optional cpu_power; + std::optional dram_power; + }; + + /// Only to be used on deltas, not the raw one + FormattedSystemMetrics format_system_metrics(const SystemMetrics &input); inline SystemMetrics operator-(const SystemMetrics &lhs, const SystemMetrics &rhs) { auto optional_sub = [](const std::optional &lhs, diff --git a/src/shamsys/src/system_metrics.cpp b/src/shamsys/src/system_metrics.cpp index aefcfd60d..4c78b0078 100644 --- a/src/shamsys/src/system_metrics.cpp +++ b/src/shamsys/src/system_metrics.cpp @@ -15,7 +15,10 @@ #include "shambase/aliases_int.hpp" #include "shambase/popen.hpp" +#include "shambase/stacktrace.hpp" +#include "shamalgs/collective/reduction.hpp" #include "shamcomm/local_rank.hpp" +#include "shamcomm/wrapper.hpp" #include "shamsys/system_metrics.hpp" #include @@ -145,4 +148,156 @@ namespace shamsys { } return reporter; } + + SystemMetrics get_system_metrics(bool barrier) { + if (barrier) { + shamcomm::mpi::Barrier(MPI_COMM_WORLD); + } + auto ret = SystemMetrics{ + shambase::details::get_wtime(), + get_rank_energy_consummed(), + get_gpu_energy_consummed(), + get_cpu_energy_consummed(), + get_dram_energy_consummed()}; + if (barrier) { + shamcomm::mpi::Barrier(MPI_COMM_WORLD); + } + return ret; + } + + std::vector gather_rank_metrics(const SystemMetrics &input) { + std::vector ret(shamcomm::world_size()); + + auto optional_gather_power = [&](const std::optional &value) -> std::vector { + return shamalgs::collective::gather(value ? value.value() : 0._f64); + }; + + std::vector rank_energy_consummed_all_ranks + = optional_gather_power(input.rank_energy_consummed); + std::vector gpu_energy_consummed_all_ranks + = optional_gather_power(input.gpu_energy_consummed); + std::vector cpu_energy_consummed_all_ranks + = optional_gather_power(input.cpu_energy_consummed); + std::vector dram_energy_consummed_all_ranks + = optional_gather_power(input.dram_energy_consummed); + std::vector metric_time_all_ranks = shamalgs::collective::gather(input.wall_time); + + for (u32 i = 0; i < shamcomm::world_size(); i++) { + ret[i] = SystemMetrics{ + metric_time_all_ranks[i], + (shamsys::support_rank_energy_consummed()) + ? std::optional{rank_energy_consummed_all_ranks[i]} + : std::nullopt, + (shamsys::support_gpu_energy_consummed()) + ? std::optional{gpu_energy_consummed_all_ranks[i]} + : std::nullopt, + (shamsys::support_cpu_energy_consummed()) + ? std::optional{cpu_energy_consummed_all_ranks[i]} + : std::nullopt, + (shamsys::support_dram_energy_consummed()) + ? std::optional{dram_energy_consummed_all_ranks[i]} + : std::nullopt, + }; + } + + return ret; + } + + SystemMetrics aggregate_rank_metrics(const std::vector &input) { + f64 sum_rank_energy_consummed = 0._f64; + f64 sum_gpu_energy_consummed = 0._f64; + f64 sum_cpu_energy_consummed = 0._f64; + f64 sum_dram_energy_consummed = 0._f64; + f64 metric_time_all = 0._f64; + + for (const auto &m : input) { + sum_rank_energy_consummed + += (m.rank_energy_consummed ? m.rank_energy_consummed.value() : 0._f64); + sum_gpu_energy_consummed + += (m.gpu_energy_consummed ? m.gpu_energy_consummed.value() : 0._f64); + sum_cpu_energy_consummed + += (m.cpu_energy_consummed ? m.cpu_energy_consummed.value() : 0._f64); + sum_dram_energy_consummed + += (m.dram_energy_consummed ? m.dram_energy_consummed.value() : 0._f64); + metric_time_all = std::max(metric_time_all, m.wall_time); + } + + SystemMetrics system_metrics; + system_metrics.wall_time = metric_time_all; + system_metrics.rank_energy_consummed = (shamsys::support_rank_energy_consummed()) + ? sum_rank_energy_consummed + : std::optional{}; + system_metrics.gpu_energy_consummed = (shamsys::support_gpu_energy_consummed()) + ? sum_gpu_energy_consummed + : std::optional{}; + system_metrics.cpu_energy_consummed = (shamsys::support_cpu_energy_consummed()) + ? sum_cpu_energy_consummed + : std::optional{}; + system_metrics.dram_energy_consummed = (shamsys::support_dram_energy_consummed()) + ? sum_dram_energy_consummed + : std::optional{}; + + return system_metrics; + } + + FormattedSystemMetrics format_system_metrics(const SystemMetrics &input) { + FormattedSystemMetrics ret{ + shambase::format("{:.1f} s", input.wall_time), + std::nullopt, + std::nullopt, + std::nullopt, + std::nullopt, + std::nullopt, + std::nullopt, + std::nullopt, + std::nullopt, + }; + + if (input.rank_energy_consummed.has_value()) { + if (input.wall_time > 0._f64 && input.rank_energy_consummed.value() > 0._f64) { + f64 consumed_energy = input.rank_energy_consummed.value(); + f64 power = consumed_energy / input.wall_time; + ret.rank_power = shambase::format("{:.1f} W", power); + ret.rank_energy_consummed = shambase::format("{:.1f} W", consumed_energy); + } else { + ret.rank_power = "N/A"; + ret.rank_energy_consummed = "N/A"; + } + } + if (input.gpu_energy_consummed.has_value()) { + if (input.wall_time > 0._f64 && input.gpu_energy_consummed.value() > 0._f64) { + f64 consumed_energy = input.gpu_energy_consummed.value(); + f64 power = consumed_energy / input.wall_time; + ret.gpu_power = shambase::format("{:.1f} W", power); + ret.gpu_energy_consummed = shambase::format("{:.1f} W", consumed_energy); + } else { + ret.gpu_power = "N/A"; + ret.gpu_energy_consummed = "N/A"; + } + } + if (input.cpu_energy_consummed.has_value()) { + if (input.wall_time > 0._f64 && input.cpu_energy_consummed.value() > 0._f64) { + f64 consumed_energy = input.cpu_energy_consummed.value(); + f64 power = consumed_energy / input.wall_time; + ret.cpu_power = shambase::format("{:.1f} W", power); + ret.cpu_energy_consummed = shambase::format("{:.1f} W", consumed_energy); + } else { + ret.cpu_power = "N/A"; + ret.cpu_energy_consummed = "N/A"; + } + } + if (input.dram_energy_consummed.has_value()) { + if (input.wall_time > 0._f64 && input.dram_energy_consummed.value() > 0._f64) { + f64 consumed_energy = input.dram_energy_consummed.value(); + f64 power = consumed_energy / input.wall_time; + ret.dram_power = shambase::format("{:.1f} W", power); + ret.dram_energy_consummed = shambase::format("{:.1f} W", consumed_energy); + } else { + ret.dram_power = "N/A"; + ret.dram_energy_consummed = "N/A"; + } + } + + return ret; + } } // namespace shamsys From 6bc2845a8039138aa50aa12f74bad7d3cb84f6a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20David--Cl=C3=A9ris?= Date: Mon, 16 Mar 2026 00:49:54 +0100 Subject: [PATCH 07/26] whoopsi --- src/shamsys/src/system_metrics.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/shamsys/src/system_metrics.cpp b/src/shamsys/src/system_metrics.cpp index 4c78b0078..8db366a97 100644 --- a/src/shamsys/src/system_metrics.cpp +++ b/src/shamsys/src/system_metrics.cpp @@ -258,7 +258,7 @@ namespace shamsys { f64 consumed_energy = input.rank_energy_consummed.value(); f64 power = consumed_energy / input.wall_time; ret.rank_power = shambase::format("{:.1f} W", power); - ret.rank_energy_consummed = shambase::format("{:.1f} W", consumed_energy); + ret.rank_energy_consummed = shambase::format("{:.1f} J", consumed_energy); } else { ret.rank_power = "N/A"; ret.rank_energy_consummed = "N/A"; @@ -269,7 +269,7 @@ namespace shamsys { f64 consumed_energy = input.gpu_energy_consummed.value(); f64 power = consumed_energy / input.wall_time; ret.gpu_power = shambase::format("{:.1f} W", power); - ret.gpu_energy_consummed = shambase::format("{:.1f} W", consumed_energy); + ret.gpu_energy_consummed = shambase::format("{:.1f} J", consumed_energy); } else { ret.gpu_power = "N/A"; ret.gpu_energy_consummed = "N/A"; @@ -280,7 +280,7 @@ namespace shamsys { f64 consumed_energy = input.cpu_energy_consummed.value(); f64 power = consumed_energy / input.wall_time; ret.cpu_power = shambase::format("{:.1f} W", power); - ret.cpu_energy_consummed = shambase::format("{:.1f} W", consumed_energy); + ret.cpu_energy_consummed = shambase::format("{:.1f} J", consumed_energy); } else { ret.cpu_power = "N/A"; ret.cpu_energy_consummed = "N/A"; @@ -291,7 +291,7 @@ namespace shamsys { f64 consumed_energy = input.dram_energy_consummed.value(); f64 power = consumed_energy / input.wall_time; ret.dram_power = shambase::format("{:.1f} W", power); - ret.dram_energy_consummed = shambase::format("{:.1f} W", consumed_energy); + ret.dram_energy_consummed = shambase::format("{:.1f} J", consumed_energy); } else { ret.dram_power = "N/A"; ret.dram_energy_consummed = "N/A"; From ee3632a5df41f410804bfda080b6ce9e861281a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20David--Cl=C3=A9ris?= Date: Mon, 16 Mar 2026 01:03:07 +0100 Subject: [PATCH 08/26] correct max mem usage --- examples/benchmarks/sph_weak_scale_test.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/examples/benchmarks/sph_weak_scale_test.py b/examples/benchmarks/sph_weak_scale_test.py index 9278ae739..7cc979606 100644 --- a/examples/benchmarks/sph_weak_scale_test.py +++ b/examples/benchmarks/sph_weak_scale_test.py @@ -14,6 +14,8 @@ result_text = "" for N_target_base in [32e6]: + shamrock.backends.reset_mem_info_max() + gamma = 5.0 / 3.0 rho_g = 1 target_tot_u = 1 @@ -117,6 +119,8 @@ model.set_cfl_multipler(1e-4) model.set_cfl_mult_stiffness(1e6) + shamrock.backends.reset_mem_info_max() + # converge smoothing length and compute initial dt model.timestep() From f815d5f529acbdbf3a08f85df4b503c3c34a69c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20David--Cl=C3=A9ris?= Date: Mon, 16 Mar 2026 11:30:46 +0100 Subject: [PATCH 09/26] try to permute time mesure and power mesure --- src/shamsys/src/system_metrics.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/shamsys/src/system_metrics.cpp b/src/shamsys/src/system_metrics.cpp index 8db366a97..9bc539647 100644 --- a/src/shamsys/src/system_metrics.cpp +++ b/src/shamsys/src/system_metrics.cpp @@ -154,11 +154,12 @@ namespace shamsys { shamcomm::mpi::Barrier(MPI_COMM_WORLD); } auto ret = SystemMetrics{ - shambase::details::get_wtime(), + 0, get_rank_energy_consummed(), get_gpu_energy_consummed(), get_cpu_energy_consummed(), get_dram_energy_consummed()}; + ret.wall_time = shambase::details::get_wtime(); if (barrier) { shamcomm::mpi::Barrier(MPI_COMM_WORLD); } From cdca66abe4e3fa414071628a39fc959534e94eab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20David--Cl=C3=A9ris?= Date: Mon, 16 Mar 2026 12:11:15 +0100 Subject: [PATCH 10/26] attempt --- src/shammodels/sph/src/Solver.cpp | 9 ++++++--- src/shamsys/src/system_metrics.cpp | 6 +++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/shammodels/sph/src/Solver.cpp b/src/shammodels/sph/src/Solver.cpp index d783cef92..daf214c95 100644 --- a/src/shammodels/sph/src/Solver.cpp +++ b/src/shammodels/sph/src/Solver.cpp @@ -1580,7 +1580,10 @@ template class Kern> shammodels::sph::TimestepLog shammodels::sph::Solver::evolve_once() { // has to be first since there is a barrier that may mess the other timers + shambase::Timer timer_system_metrics; + timer_system_metrics.start(); shamsys::SystemMetrics system_metrics_start = shamsys::get_system_metrics(); + timer_system_metrics.end(); sham::MemPerfInfos mem_perf_infos_start = sham::details::get_mem_perf_info(); f64 mpi_timer_start = shamcomm::mpi::get_timer("total"); @@ -2654,9 +2657,9 @@ shammodels::sph::TimestepLog shammodels::sph::Solver::evolve_once() sham::MemPerfInfos mem_perf_infos_end = sham::details::get_mem_perf_info(); /// must be after the mpi timer to not count the barrier of the system metrics - std::optional rank_energy_consummed_end = shamsys::get_rank_energy_consummed(); - shamsys::SystemMetrics system_metrics_end = shamsys::get_system_metrics(); - shamsys::SystemMetrics system_metrics_delta = system_metrics_end - system_metrics_start; + shamsys::SystemMetrics system_metrics_end = shamsys::get_system_metrics(); + shamsys::SystemMetrics system_metrics_delta = system_metrics_end - system_metrics_start; + system_metrics_delta.wall_time -= timer_system_metrics.elasped_sec(); f64 t_dev_alloc = (mem_perf_infos_end.time_alloc_device - mem_perf_infos_start.time_alloc_device) diff --git a/src/shamsys/src/system_metrics.cpp b/src/shamsys/src/system_metrics.cpp index 9bc539647..ddb5275d5 100644 --- a/src/shamsys/src/system_metrics.cpp +++ b/src/shamsys/src/system_metrics.cpp @@ -153,13 +153,13 @@ namespace shamsys { if (barrier) { shamcomm::mpi::Barrier(MPI_COMM_WORLD); } - auto ret = SystemMetrics{ - 0, + f64 wall_time = shambase::details::get_wtime(); + auto ret = SystemMetrics{ + wall_time, get_rank_energy_consummed(), get_gpu_energy_consummed(), get_cpu_energy_consummed(), get_dram_energy_consummed()}; - ret.wall_time = shambase::details::get_wtime(); if (barrier) { shamcomm::mpi::Barrier(MPI_COMM_WORLD); } From d811e5f0bd5a42742f5558106702132713b5d118 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20David--Cl=C3=A9ris?= Date: Mon, 16 Mar 2026 13:22:44 +0100 Subject: [PATCH 11/26] attempt --- src/shammodels/sph/src/Solver.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/shammodels/sph/src/Solver.cpp b/src/shammodels/sph/src/Solver.cpp index daf214c95..6e5450863 100644 --- a/src/shammodels/sph/src/Solver.cpp +++ b/src/shammodels/sph/src/Solver.cpp @@ -1580,10 +1580,7 @@ template class Kern> shammodels::sph::TimestepLog shammodels::sph::Solver::evolve_once() { // has to be first since there is a barrier that may mess the other timers - shambase::Timer timer_system_metrics; - timer_system_metrics.start(); shamsys::SystemMetrics system_metrics_start = shamsys::get_system_metrics(); - timer_system_metrics.end(); sham::MemPerfInfos mem_perf_infos_start = sham::details::get_mem_perf_info(); f64 mpi_timer_start = shamcomm::mpi::get_timer("total"); @@ -2659,7 +2656,6 @@ shammodels::sph::TimestepLog shammodels::sph::Solver::evolve_once() /// must be after the mpi timer to not count the barrier of the system metrics shamsys::SystemMetrics system_metrics_end = shamsys::get_system_metrics(); shamsys::SystemMetrics system_metrics_delta = system_metrics_end - system_metrics_start; - system_metrics_delta.wall_time -= timer_system_metrics.elasped_sec(); f64 t_dev_alloc = (mem_perf_infos_end.time_alloc_device - mem_perf_infos_start.time_alloc_device) From 775d26ac258ebbf8b571c840f94d1d50634df29a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20David--Cl=C3=A9ris?= Date: Mon, 16 Mar 2026 14:41:50 +0100 Subject: [PATCH 12/26] add duration --- examples/benchmarks/sph_weak_scale_test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/benchmarks/sph_weak_scale_test.py b/examples/benchmarks/sph_weak_scale_test.py index 7cc979606..5a0bda2a9 100644 --- a/examples/benchmarks/sph_weak_scale_test.py +++ b/examples/benchmarks/sph_weak_scale_test.py @@ -183,6 +183,8 @@ result_text += f"avg power {key} / step time : {value / metrics_duration} W\n" dic_out[f"power_{key}"] = value / metrics_duration + dic_out["system_metric_duration"] = metrics_duration + result_text += f"dic_out = {dic_out}\n" print("current results:") From 1d533025d5c4061a51fb1a404db8b1c99c99cb2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20David--Cl=C3=A9ris?= Date: Tue, 17 Mar 2026 09:58:48 +0100 Subject: [PATCH 13/26] add support for linked geopm --- src/shamsys/CMakeLists.txt | 16 +++++++++ src/shamsys/src/system_metrics.cpp | 55 ++++++++++++++++++++++++++++-- 2 files changed, 69 insertions(+), 2 deletions(-) diff --git a/src/shamsys/CMakeLists.txt b/src/shamsys/CMakeLists.txt index 76aa8d691..f7a631b8a 100644 --- a/src/shamsys/CMakeLists.txt +++ b/src/shamsys/CMakeLists.txt @@ -44,6 +44,22 @@ if(SHAMROCK_USE_CPPTRACE) target_compile_definitions(shamsys PUBLIC -DSHAMROCK_USE_CPPTRACE) endif() +include(CheckIncludeFileCXX) + +check_include_file_cxx(geopm/PlatformIO.hpp HAVE_GEOPM_HEADER) +find_library(GEOPM_LIB geopmd) + +if(HAVE_GEOPM_HEADER AND GEOPM_LIB) + option(SHAMROCK_USE_GEOPM "use geopm tooling" Off) + + message("-- SHAMROCK_USE_GEOPM is set to ${SHAMROCK_USE_GEOPM}") + + if(SHAMROCK_USE_GEOPM) + target_link_libraries(shamsys PUBLIC ${GEOPM_LIB}) + target_compile_definitions(shamsys PUBLIC SHAMROCK_USE_GEOPM) + endif() +endif() + target_include_directories(shamsys PUBLIC "$" "$") diff --git a/src/shamsys/src/system_metrics.cpp b/src/shamsys/src/system_metrics.cpp index d4096af5a..b92581960 100644 --- a/src/shamsys/src/system_metrics.cpp +++ b/src/shamsys/src/system_metrics.cpp @@ -22,8 +22,52 @@ #include "shamsys/system_metrics.hpp" #include +#ifdef SHAMROCK_USE_GEOPM + #include + #include +#endif + namespace shamsys { +#ifdef SHAMROCK_USE_GEOPM + + class AuroraSystemMetricReporterLinked : public ISystemMetricReporter { + public: + std::optional get_rank_energy_consummed() override { + if (shamcomm::is_main_node_rank()) { + return geopm::platform_io().read_signal("BOARD_ENERGY", GEOPM_DOMAIN_BOARD, 0); + } + return std::nullopt; + } + + std::optional get_gpu_energy_consummed() override { + if (shamcomm::is_main_node_rank()) { + return geopm::platform_io().read_signal("GPU_ENERGY", GEOPM_DOMAIN_BOARD, 0); + } + return std::nullopt; + } + + std::optional get_cpu_energy_consummed() override { + if (shamcomm::is_main_node_rank()) { + return geopm::platform_io().read_signal("CPU_ENERGY", GEOPM_DOMAIN_BOARD, 0); + } + return std::nullopt; + } + + std::optional get_dram_energy_consummed() override { + if (shamcomm::is_main_node_rank()) { + return geopm::platform_io().read_signal("DRAM_ENERGY", GEOPM_DOMAIN_BOARD, 0); + } + return std::nullopt; + } + + bool support_rank_energy_consummed() override { return true; } + bool support_gpu_energy_consummed() override { return true; } + bool support_cpu_energy_consummed() override { return true; } + bool support_dram_energy_consummed() override { return true; } + }; +#endif + class AuroraSystemMetricReporter : public ISystemMetricReporter { public: std::optional get_rank_energy_consummed() override { @@ -112,14 +156,18 @@ namespace shamsys { std::unique_ptr make_reporter(std::string_view reporter_name) { if (reporter_name == "aurora") { return std::make_unique(); +#ifdef SHAMROCK_USE_GEOPM + } else if (reporter_name == "aurora-linked") { + return std::make_unique(); +#endif } else if (reporter_name == "intel-rapl") { return std::make_unique(); } else if (reporter_name == "noop" || reporter_name == "none" || reporter_name == "") { return std::make_unique(); } else { throw shambase::make_except_with_loc(shambase::format( - "Unknown system metrics reporter: {}, valid reporters are: aurora, intel-rapl, " - "noop", + "Unknown system metrics reporter: {}, valid reporters are: aurora, aurora-linked, " + "intel-rapl, noop", reporter_name)); } return std::make_unique(); @@ -150,6 +198,9 @@ namespace shamsys { } SystemMetrics get_system_metrics(bool barrier) { + // Ensure that barriers aren't used if there is no reporter + barrier = barrier && has_reporter(); + if (barrier) { shamcomm::mpi::Barrier(MPI_COMM_WORLD); } From 9011b2ad163790489d574400f96310973f45f2bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20David--Cl=C3=A9ris?= Date: Tue, 17 Mar 2026 10:00:41 +0100 Subject: [PATCH 14/26] enable geopm on aurora --- env/machine/argonne/aurora/env_oneapi.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/env/machine/argonne/aurora/env_oneapi.sh b/env/machine/argonne/aurora/env_oneapi.sh index c59bc8eef..65bb36f34 100644 --- a/env/machine/argonne/aurora/env_oneapi.sh +++ b/env/machine/argonne/aurora/env_oneapi.sh @@ -21,6 +21,7 @@ function shamconfigure { -DCMAKE_BUILD_TYPE="${SHAMROCK_BUILD_TYPE}" \ -DBUILD_TEST=Yes \ -DSHAMROCK_USE_CPPTRACE=Yes \ + -DSHAMROCK_USE_GEOPM=Yes \ "${CMAKE_OPT[@]}" } From 9dc904d977a5cc139e20a0d9c33001fd90f29ee0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20David--Cl=C3=A9ris?= Date: Tue, 17 Mar 2026 11:12:42 +0100 Subject: [PATCH 15/26] shut up warnings --- src/pylib/shamrock/utils/plot/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pylib/shamrock/utils/plot/__init__.py b/src/pylib/shamrock/utils/plot/__init__.py index a0e302908..f5ef6899a 100644 --- a/src/pylib/shamrock/utils/plot/__init__.py +++ b/src/pylib/shamrock/utils/plot/__init__.py @@ -15,7 +15,7 @@ _HAS_MATPLOTLIB = True except ImportError: _HAS_MATPLOTLIB = False - print("Warning: matplotlib is not installed, some Shamrock functions will not be available") + # print("Warning: matplotlib is not installed, some Shamrock functions will not be available") try: from PIL import Image @@ -23,7 +23,7 @@ _HAS_PIL = True except ImportError: _HAS_PIL = False - print("Warning: PIL is not installed, some Shamrock functions will not be available") + # print("Warning: PIL is not installed, some Shamrock functions will not be available") if _HAS_MATPLOTLIB and _HAS_PIL: __all__.append("show_image_sequence") From dcd76b050e596cfbc4bb1ac9af39c42effd64ec3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20David--Cl=C3=A9ris?= Date: Thu, 19 Mar 2026 17:34:31 +0100 Subject: [PATCH 16/26] better ? --- examples/benchmarks/sph_weak_scale_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/benchmarks/sph_weak_scale_test.py b/examples/benchmarks/sph_weak_scale_test.py index 5a0bda2a9..69767b5cc 100644 --- a/examples/benchmarks/sph_weak_scale_test.py +++ b/examples/benchmarks/sph_weak_scale_test.py @@ -13,7 +13,7 @@ result_text = "" -for N_target_base in [32e6]: +for N_target_base in [1e6]: shamrock.backends.reset_mem_info_max() gamma = 5.0 / 3.0 @@ -25,7 +25,7 @@ compute_multiplier = shamrock.sys.world_size() # compute_multiplier = 12 - scheduler_split_val = int(2e7) + scheduler_split_val = int(1e5) scheduler_merge_val = int(1) N_target = N_target_base * compute_multiplier @@ -102,7 +102,7 @@ model.set_value_in_a_box("uint", "f64", 0, bmin, bmax) - rinj = 8 * dr + rinj = 16 * dr u_inj = 1 model.add_kernel_value("uint", "f64", u_inj, (0, 0, 0), rinj) @@ -116,7 +116,7 @@ model.set_cfl_cour(0.1) model.set_cfl_force(0.1) - model.set_cfl_multipler(1e-4) + model.set_cfl_multipler(1e-6) model.set_cfl_mult_stiffness(1e6) shamrock.backends.reset_mem_info_max() From 2ebb27a63e682f266c56a666159ab7d887d81c73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20David--Cl=C3=A9ris?= Date: Fri, 20 Mar 2026 00:04:32 +0100 Subject: [PATCH 17/26] more debug infos --- src/shamalgs/src/collective/sparse_exchange.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/shamalgs/src/collective/sparse_exchange.cpp b/src/shamalgs/src/collective/sparse_exchange.cpp index 6e6625544..970d35d05 100644 --- a/src/shamalgs/src/collective/sparse_exchange.cpp +++ b/src/shamalgs/src/collective/sparse_exchange.cpp @@ -56,6 +56,7 @@ namespace shamalgs::collective { /// fetch u64_2 from global message data std::vector fetch_global_message_data( const std::vector &messages_send) { + __shamrock_stack_entry(); std::vector local_data = std::vector(messages_send.size()); @@ -84,6 +85,7 @@ namespace shamalgs::collective { /// decode message to get message std::vector decode_all_message(const std::vector &global_data) { + __shamrock_stack_entry(); std::vector message_all(global_data.size()); for (u64 i = 0; i < global_data.size(); i++) { message_all[i] = unpack(global_data[i]); @@ -94,6 +96,7 @@ namespace shamalgs::collective { /// compute message tags void compute_tags(std::vector &message_all) { + __shamrock_stack_entry(); std::vector tag_map(shamcomm::world_size(), 0); From 8f7087bd5f7ac93b28fd9b0532c9c5764c2d3541 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20David--Cl=C3=A9ris?= Date: Fri, 20 Mar 2026 00:20:13 +0100 Subject: [PATCH 18/26] dammit --- examples/benchmarks/sph_weak_scale_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/benchmarks/sph_weak_scale_test.py b/examples/benchmarks/sph_weak_scale_test.py index 69767b5cc..74944a270 100644 --- a/examples/benchmarks/sph_weak_scale_test.py +++ b/examples/benchmarks/sph_weak_scale_test.py @@ -13,7 +13,7 @@ result_text = "" -for N_target_base in [1e6]: +for N_target_base in [32e6]: shamrock.backends.reset_mem_info_max() gamma = 5.0 / 3.0 From 36b2516fc5624e661c024fee9af511b8cbf62e45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20David--Cl=C3=A9ris?= Date: Fri, 20 Mar 2026 00:20:58 +0100 Subject: [PATCH 19/26] dammit --- examples/benchmarks/sph_weak_scale_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/benchmarks/sph_weak_scale_test.py b/examples/benchmarks/sph_weak_scale_test.py index 74944a270..c21bea71e 100644 --- a/examples/benchmarks/sph_weak_scale_test.py +++ b/examples/benchmarks/sph_weak_scale_test.py @@ -25,7 +25,7 @@ compute_multiplier = shamrock.sys.world_size() # compute_multiplier = 12 - scheduler_split_val = int(1e5) + scheduler_split_val = int(2e7) scheduler_merge_val = int(1) N_target = N_target_base * compute_multiplier From 5e2d837da403dd12356eaef780d3ce8e11ce0ecb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20David--Cl=C3=A9ris?= Date: Sat, 21 Mar 2026 00:11:14 +0100 Subject: [PATCH 20/26] more steps --- examples/benchmarks/sph_weak_scale_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/benchmarks/sph_weak_scale_test.py b/examples/benchmarks/sph_weak_scale_test.py index c21bea71e..4e9ba9dfb 100644 --- a/examples/benchmarks/sph_weak_scale_test.py +++ b/examples/benchmarks/sph_weak_scale_test.py @@ -129,7 +129,7 @@ res_cnts = [] res_system_metrics = [] - for i in range(5): + for i in range(10): shamrock.sys.mpi_barrier() model.timestep() From e698051a384dc7aca4f506480d9e3f4b795ad429 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20David--Cl=C3=A9ris?= Date: Sun, 22 Mar 2026 21:56:36 +0100 Subject: [PATCH 21/26] better ? --- examples/benchmarks/sph_weak_scale_test.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/examples/benchmarks/sph_weak_scale_test.py b/examples/benchmarks/sph_weak_scale_test.py index 4e9ba9dfb..a71226a6b 100644 --- a/examples/benchmarks/sph_weak_scale_test.py +++ b/examples/benchmarks/sph_weak_scale_test.py @@ -63,7 +63,6 @@ ) cfg.set_boundary_periodic() cfg.set_eos_adiabatic(gamma) - cfg.set_max_neigh_cache_size(int(100e9)) cfg.print_status() model.set_solver_config(cfg) model.init_scheduler(scheduler_split_val, scheduler_merge_val) @@ -116,9 +115,6 @@ model.set_cfl_cour(0.1) model.set_cfl_force(0.1) - model.set_cfl_multipler(1e-6) - model.set_cfl_mult_stiffness(1e6) - shamrock.backends.reset_mem_info_max() # converge smoothing length and compute initial dt @@ -131,6 +127,9 @@ for i in range(10): shamrock.sys.mpi_barrier() + + # To replay the same step + model.set_next_dt(0.0) model.timestep() tmp_res_rate, tmp_res_cnt, tmp_system_metrics = ( From 907933994822f927f124ebebd51389f96cd1f9d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20David--Cl=C3=A9ris?= Date: Mon, 23 Mar 2026 00:00:17 +0100 Subject: [PATCH 22/26] more omp --- src/shamrock/include/shamrock/scheduler/SerialPatchTree.hpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/shamrock/include/shamrock/scheduler/SerialPatchTree.hpp b/src/shamrock/include/shamrock/scheduler/SerialPatchTree.hpp index 8449755de..b2decdfb4 100644 --- a/src/shamrock/include/shamrock/scheduler/SerialPatchTree.hpp +++ b/src/shamrock/include/shamrock/scheduler/SerialPatchTree.hpp @@ -269,10 +269,13 @@ class SerialPatchTree { sycl::queue &queue, shamrock::patch::PatchField pfield, Func &&reducer) { + __shamrock_stack_entry(); + shamrock::patch::PatchtreeField ptfield; ptfield.allocate(get_element_count()); { + __shamrock_stack_entry(); sycl::host_accessor lpid{ shambase::get_check_ref(linked_patch_ids_buf), sycl::read_only}; sycl::host_accessor tree_field{ @@ -280,6 +283,8 @@ class SerialPatchTree { // init reduction std::unordered_map &idp_to_gid = sched.patch_list.id_patch_to_global_idx; + +#pragma omp parallel for for (u64 idx = 0; idx < get_element_count(); idx++) { tree_field[idx] = (lpid[idx] != u64_max) ? pfield.get(lpid[idx]) : T(); } From 05bf4f70a1eba58135c51d62362605ec9528a350 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20David--Cl=C3=A9ris?= Date: Tue, 24 Mar 2026 15:26:30 +0100 Subject: [PATCH 23/26] print --- src/shammodels/gsph/src/modules/GSPHGhostHandler.cpp | 4 ++-- src/shammodels/sph/include/shammodels/sph/SPHUtilities.hpp | 5 ++++- src/shammodels/sph/src/BasicSPHGhosts.cpp | 4 ++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/shammodels/gsph/src/modules/GSPHGhostHandler.cpp b/src/shammodels/gsph/src/modules/GSPHGhostHandler.cpp index 070fc6f85..531a6e750 100644 --- a/src/shammodels/gsph/src/modules/GSPHGhostHandler.cpp +++ b/src/shammodels/gsph/src/modules/GSPHGhostHandler.cpp @@ -344,13 +344,13 @@ auto GSPHGhostHandler::gen_id_table_interfaces(GeneratorMap &&gen) for (auto &[k, v] : send_count_stats) { if (v > 0.2) { - warn_log += shambase::format("\n patch {} high interf/patch volume: {}", k, v); + // warn_log += shambase::format("\n patch {} high interf/patch volume: {}", k, v); has_warn = true; } } if (has_warn && shamcomm::world_rank() == 0) { - warn_log = "\n This can lead to high mpi " + warn_log = "\n High interf/patch volume. This can lead to high mpi " "overhead, try to increase the patch split crit" + warn_log; } diff --git a/src/shammodels/sph/include/shammodels/sph/SPHUtilities.hpp b/src/shammodels/sph/include/shammodels/sph/SPHUtilities.hpp index af5f17a9b..64f7b0ed4 100644 --- a/src/shammodels/sph/include/shammodels/sph/SPHUtilities.hpp +++ b/src/shammodels/sph/include/shammodels/sph/SPHUtilities.hpp @@ -86,7 +86,10 @@ namespace shammodels::sph { PatchField interactR_patch = sched.map_owned_to_patch_field_simple( [&](const Patch p, PatchDataLayer &pdat) -> flt { if (!pdat.is_empty()) { - return pdat.get_field(ihpart).compute_max() * h_evol_max * Rkern; + auto tmp = pdat.get_field(ihpart).compute_max() * h_evol_max * Rkern; + shamcomm::logs::raw_ln( + shambase::format("patch {}, Rghost = {}", p.id_patch, tmp)); + return tmp; } else { return shambase::VectorProperties::get_min(); } diff --git a/src/shammodels/sph/src/BasicSPHGhosts.cpp b/src/shammodels/sph/src/BasicSPHGhosts.cpp index 6b6357761..8f5e1a164 100644 --- a/src/shammodels/sph/src/BasicSPHGhosts.cpp +++ b/src/shammodels/sph/src/BasicSPHGhosts.cpp @@ -560,13 +560,13 @@ auto BasicSPHGhostHandler::gen_id_table_interfaces(GeneratorMap &&gen) for (auto &[k, v] : send_count_stats) { if (v > 0.2) { - warn_log += shambase::format("\n patch {} high interf/patch volume: {}", k, v); + // warn_log += shambase::format("\n patch {} high interf/patch volume: {}", k, v); has_warn = true; } } if (has_warn && shamcomm::world_rank() == 0) { - warn_log = "\n This can lead to high mpi " + warn_log = "\n High interf/patch volume. This can lead to high mpi " "overhead, try to increase the patch split crit" + warn_log; } From b6228ead81a7410022330f97742ef02e4e6c6b80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20David--Cl=C3=A9ris?= Date: Tue, 24 Mar 2026 15:46:45 +0100 Subject: [PATCH 24/26] print --- .../src/solvergraph/ExchangeGhostLayer.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/shamrock/src/solvergraph/ExchangeGhostLayer.cpp b/src/shamrock/src/solvergraph/ExchangeGhostLayer.cpp index c87577db9..0aa22f670 100644 --- a/src/shamrock/src/solvergraph/ExchangeGhostLayer.cpp +++ b/src/shamrock/src/solvergraph/ExchangeGhostLayer.cpp @@ -30,6 +30,21 @@ void shamrock::solvergraph::ExchangeGhostLayer::_impl_evaluate_internal() { auto &ghost_layer = edges.ghost_layer; const shamrock::solvergraph::RankGetter &rank_owner = edges.rank_owner; + std::unordered_map msg_sizes_send; + + std::stringstream ss; + ss << "Rank " << shamcomm::world_rank() << " is sending " + << ghost_layer.patchdatas.get_native().size() << " patches sizes:"; + for (auto &pdat : ghost_layer.patchdatas.get_native()) { + // ss << pdat.first.first << " " << pdat.first.second << " " << pdat.second.get_obj_cnt() << + // "\n"; + msg_sizes_send[rank_owner.get_rank_owner(pdat.first.first)] += pdat.second.get_obj_cnt(); + } + for (auto &[rank, size] : msg_sizes_send) { + ss << "\n" << "msg size to rank " << rank << " is " << size; + } + shamcomm::logs::raw_ln(ss.str()); + shambase::DistributedDataShared recv_dat; shamalgs::collective::serialize_sparse_comm( From e0a117d03d1b2c1ee7ad42ffde20b680ee62b64e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20David--Cl=C3=A9ris?= Date: Tue, 24 Mar 2026 16:59:56 +0100 Subject: [PATCH 25/26] print --- src/shamrock/src/solvergraph/ExchangeGhostLayer.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/shamrock/src/solvergraph/ExchangeGhostLayer.cpp b/src/shamrock/src/solvergraph/ExchangeGhostLayer.cpp index 0aa22f670..afd80fc5a 100644 --- a/src/shamrock/src/solvergraph/ExchangeGhostLayer.cpp +++ b/src/shamrock/src/solvergraph/ExchangeGhostLayer.cpp @@ -31,17 +31,22 @@ void shamrock::solvergraph::ExchangeGhostLayer::_impl_evaluate_internal() { const shamrock::solvergraph::RankGetter &rank_owner = edges.rank_owner; std::unordered_map msg_sizes_send; + std::unordered_map msg_sizes_max_send; std::stringstream ss; ss << "Rank " << shamcomm::world_rank() << " is sending " << ghost_layer.patchdatas.get_native().size() << " patches sizes:"; for (auto &pdat : ghost_layer.patchdatas.get_native()) { + u64 key = rank_owner.get_rank_owner(pdat.first.first); // ss << pdat.first.first << " " << pdat.first.second << " " << pdat.second.get_obj_cnt() << // "\n"; - msg_sizes_send[rank_owner.get_rank_owner(pdat.first.first)] += pdat.second.get_obj_cnt(); + msg_sizes_send[key] += pdat.second.get_obj_cnt(); + msg_sizes_max_send[key] = std::max(msg_sizes_max_send[key], u64(pdat.second.get_obj_cnt())); } for (auto &[rank, size] : msg_sizes_send) { - ss << "\n" << "msg size to rank " << rank << " is " << size; + ss << "\n" + << "msg size from rank " << rank << " is " << size << " max is " + << msg_sizes_max_send[rank]; } shamcomm::logs::raw_ln(ss.str()); From 6ec6ec7fda3bb0886be23af8ccd597e4ea8e549c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20David--Cl=C3=A9ris?= Date: Tue, 24 Mar 2026 17:36:50 +0100 Subject: [PATCH 26/26] print --- src/shammodels/sph/include/shammodels/sph/SPHUtilities.hpp | 4 ++++ src/shamrock/src/solvergraph/ExchangeGhostLayer.cpp | 2 ++ 2 files changed, 6 insertions(+) diff --git a/src/shammodels/sph/include/shammodels/sph/SPHUtilities.hpp b/src/shammodels/sph/include/shammodels/sph/SPHUtilities.hpp index 64f7b0ed4..80dacb3af 100644 --- a/src/shammodels/sph/include/shammodels/sph/SPHUtilities.hpp +++ b/src/shammodels/sph/include/shammodels/sph/SPHUtilities.hpp @@ -86,10 +86,14 @@ namespace shammodels::sph { PatchField interactR_patch = sched.map_owned_to_patch_field_simple( [&](const Patch p, PatchDataLayer &pdat) -> flt { if (!pdat.is_empty()) { +#if false auto tmp = pdat.get_field(ihpart).compute_max() * h_evol_max * Rkern; shamcomm::logs::raw_ln( shambase::format("patch {}, Rghost = {}", p.id_patch, tmp)); return tmp; +#else + return pdat.get_field(ihpart).compute_max() * h_evol_max * Rkern; +#endif } else { return shambase::VectorProperties::get_min(); } diff --git a/src/shamrock/src/solvergraph/ExchangeGhostLayer.cpp b/src/shamrock/src/solvergraph/ExchangeGhostLayer.cpp index afd80fc5a..5edb41815 100644 --- a/src/shamrock/src/solvergraph/ExchangeGhostLayer.cpp +++ b/src/shamrock/src/solvergraph/ExchangeGhostLayer.cpp @@ -30,6 +30,7 @@ void shamrock::solvergraph::ExchangeGhostLayer::_impl_evaluate_internal() { auto &ghost_layer = edges.ghost_layer; const shamrock::solvergraph::RankGetter &rank_owner = edges.rank_owner; +#if false std::unordered_map msg_sizes_send; std::unordered_map msg_sizes_max_send; @@ -49,6 +50,7 @@ void shamrock::solvergraph::ExchangeGhostLayer::_impl_evaluate_internal() { << msg_sizes_max_send[rank]; } shamcomm::logs::raw_ln(ss.str()); +#endif shambase::DistributedDataShared recv_dat;