From f804045966607555fd626e95c50a161fa6305543 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Mon, 1 Dec 2025 00:03:24 +0000 Subject: [PATCH 1/6] naive concept for dynamic domain update in pgen --- pgens/shock/pgen.hpp | 91 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/pgens/shock/pgen.hpp b/pgens/shock/pgen.hpp index fc579777d..3d374470b 100644 --- a/pgens/shock/pgen.hpp +++ b/pgens/shock/pgen.hpp @@ -323,5 +323,96 @@ namespace user { inj_box); } }; + + // update domain if needed + // todo: implement in main code like CustomPostStep + void CustomUpdateDomain(const SimulationParams& params, Domain& local_domain, Domain& global_domain) { + + // todo: check if update is needed at this time step + + // compute size of local and global domains + const auto local_size = local_domain->mesh.n_active()[in::x1]; + const auto local_offset = local_domain->offset_ncells()[in::x1]; + const auto global_size = global_domain->mesh.n_active()[in::x1]; + const auto ni2 = global_domain->mesh.n_active(in::x2); + + // compute number density field + // todo: define buffers properly + auto scatter_buff = Kokkos::Experimental::create_scatter_view(buffer); + for (const auto& sp : specs) { + auto& prtl_spec = prtl_species[sp - 1]; + // clang-format off + Kokkos::parallel_for( + "ComputeMoments", + prtl_spec.rangeActiveParticles(), + kernel::ParticleMoments_kernel({}, scatter_buff, buff_idx, + prtl_spec.i1, prtl_spec.i2, prtl_spec.i3, + prtl_spec.dx1, prtl_spec.dx2, prtl_spec.dx3, + prtl_spec.ux1, prtl_spec.ux2, prtl_spec.ux3, + prtl_spec.phi, prtl_spec.weight, prtl_spec.tag, + prtl_spec.mass(), prtl_spec.charge(), + false, + mesh.metric, mesh.flds_bc(), + ni2, ONE, 0)); + // clang-format on + } + Kokkos::Experimental::contribute(buffer, scatter_buff); + + // compute particle profile along x1 + index_t Nx[global_size] = { 0 }; + + for (auto i = 0; i < local_size; ++i) { + for (auto d = 0u; d < M::Dim; ++d) { + // todo: sum over other dimensions + Nx[local_offset + i] += buffer(i, j, buff_idx); + } + } + // todo: perform MPI allreduce to get global Nx + index_t Nx_global[global_size] = { 0 }; + MPI_ALLREDUCE(MPI_SUM, Nx, Nx_global, global_size, MPI_TYPE_INT_T, MPI_COMM_WORLD); + + // compute mean particle load + int total_N = 0; + for (auto i = 0; i < global_size; ++i) { + total_N += Nx_global[i]; + } + + // get threshold number of particles + auto N_1_ranks += global_domain.ndomains_per_dim()[d] + auto N_23_ranks = 0; + for (auto d = 1u; d < M::Dim; ++d) { + N_23ranks += global_domain.ndomains_per_dim()[d]; + } + // todo: as parameter + real_t tolerance = 1.05; // allow 5% tolerance + index_t target_N = total_N / (N_1_ranks + N_23ranks) * tolerance; + // compute new domain boundaries + index_t bound_start[N_ranks]; + index_t bound_end[N_ranks]; + + bound_start[0] = 0; + for (auto r = 0; r < N_ranks-1; ++r) { + real_t cum_N = 0; + for (auto i = bound_start[r]; i < global_size; ++i) { + cum_N += static_cast(Nx_global[i]) / N_23_ranks; + if (cum_N >= target_N) { + bound_end[r] = i; + // check if we have more than 5 cells + index_t Ncells = bound_end[r] - bound_start[r] + 1; + if (Ncells < 5) { + bound_end[r] = bound_start[r] + 5; + } + bound_start[r+1] = bound_end[r]+1; + break; + } + } + } + bound_end[N_ranks-1] = global_size - 1; + + // todo: bounds in other directions + + // todo: reshuffling of particles according to new domain boundaries + + } } // namespace user #endif From 80c4a3a108ec8a274a41f8b4b8e25821e62c3cde Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Sat, 21 Feb 2026 19:35:43 +0000 Subject: [PATCH 2/6] alternative density construction method on particle basis --- pgens/shock/pgen.hpp | 169 +++++++++++++++++++++++++++++++------------ 1 file changed, 121 insertions(+), 48 deletions(-) diff --git a/pgens/shock/pgen.hpp b/pgens/shock/pgen.hpp index 0ceb40782..f817a54ff 100644 --- a/pgens/shock/pgen.hpp +++ b/pgens/shock/pgen.hpp @@ -330,72 +330,132 @@ namespace user { // update domain if needed // todo: implement in main code like CustomPostStep - void CustomUpdateDomain(const SimulationParams& params, Domain& local_domain, Domain& global_domain) { + void CustomUpdateDomain(const SimulationParams& params, Domain& local_domain, Domain& new_domain, Domain& global_domain) { - // todo: check if update is needed at this time step + // check if the injector should be active + // ToDo: read parameter into global variable + if (step % params.template get("setup.domain_decomposition_frequency") != 0) { + return; + } // compute size of local and global domains const auto local_size = local_domain->mesh.n_active()[in::x1]; const auto local_offset = local_domain->offset_ncells()[in::x1]; const auto global_size = global_domain->mesh.n_active()[in::x1]; - const auto ni2 = global_domain->mesh.n_active(in::x2); - - // compute number density field - // todo: define buffers properly - auto scatter_buff = Kokkos::Experimental::create_scatter_view(buffer); - for (const auto& sp : specs) { - auto& prtl_spec = prtl_species[sp - 1]; - // clang-format off - Kokkos::parallel_for( - "ComputeMoments", - prtl_spec.rangeActiveParticles(), - kernel::ParticleMoments_kernel({}, scatter_buff, buff_idx, - prtl_spec.i1, prtl_spec.i2, prtl_spec.i3, - prtl_spec.dx1, prtl_spec.dx2, prtl_spec.dx3, - prtl_spec.ux1, prtl_spec.ux2, prtl_spec.ux3, - prtl_spec.phi, prtl_spec.weight, prtl_spec.tag, - prtl_spec.mass(), prtl_spec.charge(), - false, - mesh.metric, mesh.flds_bc(), - ni2, ONE, 0)); - // clang-format on - } - Kokkos::Experimental::contribute(buffer, scatter_buff); - // compute particle profile along x1 - index_t Nx[global_size] = { 0 }; + // global number density field along x1 + index_t Nx_global[global_size] = { 0 }; + + /* + Option 1: Use built-in particle counting kernel to compute number density field and perform MPI allreduce to get global number density field. + Then compute new domain boundaries based on the global number density field and perform reshuffling of particles according to new domain boundaries. + */ + // tuple_t local_cells{ 0 }, global_x_min { 0 }, global_x_max { 0 }; + // for (auto d = 0; d < M::Dim; ++d) { + // local_cells[d] = local_domain->mesh.n_active(d); + // global_x_min[d] = local_domain->offset_ncells(d); + // global_x_max[d] = local_domain->mesh.n_active(d) + local_domain->offset_ncells(d); + // } + + // // compute number density field + // array_t NumberOfParticles("num_particles", local_cells); + // auto scatter_buff = Kokkos::Experimental::create_scatter_view(NumberOfParticles); + // for (const auto& sp : specs) { + // auto& prtl_spec = prtl_species[sp - 1]; + // // clang-format off + // Kokkos::parallel_for( + // "ComputeMoments", + // prtl_spec.rangeActiveParticles(), + // kernel::ParticleMoments_kernel({}, scatter_buff, buff_idx, + // prtl_spec.i1, prtl_spec.i2, prtl_spec.i3, + // prtl_spec.dx1, prtl_spec.dx2, prtl_spec.dx3, + // prtl_spec.ux1, prtl_spec.ux2, prtl_spec.ux3, + // prtl_spec.phi, prtl_spec.weight, prtl_spec.tag, + // prtl_spec.mass(), prtl_spec.charge(), + // false, + // mesh.metric, mesh.flds_bc(), + // ni2, ONE, 0)); + // // clang-format on + // } + // Kokkos::Experimental::contribute(NumberOfParticles, scatter_buff); + + // // compute particle profile along x1 + // index_t Nx[global_size] = { 0 }; + + // for (auto i = 0; i < local_size; ++i) { + // for (auto d = 0u; d < M::Dim; ++d) { + // // todo: sum over other dimensions + // Nx[local_offset + i] += buffer(i, j, buff_idx); + // } + // } + // // todo: perform MPI allreduce to get global Nx + // index_t Nx_global[global_size] = { 0 }; + // MPI_ALLREDUCE(MPI_SUM, Nx, Nx_global, global_size, MPI_TYPE_INT_T, MPI_COMM_WORLD); + + /* + Option 2: Loop over particles and compute number density field manually. Then perform MPI allreduce to get global number density field. + Then compute new domain boundaries based on the global number density field and perform reshuffling of particles according to new domain boundaries. + */ + // store total number of particles in each cell in x1 direction + array_t NumberOfParticles("num_particles", local_size); + // loop over particle species + for (auto s { 0u }; s < 2; ++s) { + // get particle properties + auto& species = local_domain.species[s]; + auto i1 = species.i1; + auto tag = species.tag; + + auto NumParts_scatter = Kokkos::Experimental::create_scatter_view( + NumberOfParticles); + Kokkos::parallel_for( + "ComputePPC", + species.rangeActiveParticles(), + Lambda(index_t p) { + if (tag(p) != ParticleTag::alive) { + return; + } + auto NumPart_acc = NumParts_scatter.access(); + NumPart_acc(i1(p)) += 1; + }); + Kokkos::Experimental::contribute(NumberOfParticles, NumParts_scatter); + } + + // construct contribution to global number density field along x1 direction + index_t Nx_local[global_size] = { 0 }; for (auto i = 0; i < local_size; ++i) { - for (auto d = 0u; d < M::Dim; ++d) { - // todo: sum over other dimensions - Nx[local_offset + i] += buffer(i, j, buff_idx); - } + Nx_local[i+local_offset] = NumberOfParticles(i); } - // todo: perform MPI allreduce to get global Nx - index_t Nx_global[global_size] = { 0 }; - MPI_ALLREDUCE(MPI_SUM, Nx, Nx_global, global_size, MPI_TYPE_INT_T, MPI_COMM_WORLD); - + // sum up all ranks + MPI_ALLREDUCE(MPI_SUM, Nx_local, Nx_global, global_size, MPI_TYPE_INT_T, MPI_COMM_WORLD); + // compute mean particle load - int total_N = 0; + npart_t total_N = 0; for (auto i = 0; i < global_size; ++i) { total_N += Nx_global[i]; } // get threshold number of particles - auto N_1_ranks += global_domain.ndomains_per_dim()[d] + auto N_1_ranks = global_domain.ndomains_per_dim()[in::x1]; auto N_23_ranks = 0; for (auto d = 1u; d < M::Dim; ++d) { - N_23ranks += global_domain.ndomains_per_dim()[d]; + N_23_ranks += global_domain.ndomains_per_dim()[d]; + } + + // maximum allowed load imbalance + real_t tolerance = params.load_balancing_tolerance; + index_t target_N = total_N / (N_1_ranks + N_23_ranks) * tolerance; + // compute new domain boundaries in x1 direction + index_t bound_start[N_1_ranks]; + index_t bound_end[N_1_ranks]; + + // overwrite N_23_ranks to be 1 if it's initally 0 to avoid division by zero + if (N_23_ranks == 0) { + N_23_ranks = 1; } - // todo: as parameter - real_t tolerance = 1.05; // allow 5% tolerance - index_t target_N = total_N / (N_1_ranks + N_23ranks) * tolerance; - // compute new domain boundaries - index_t bound_start[N_ranks]; - index_t bound_end[N_ranks]; bound_start[0] = 0; - for (auto r = 0; r < N_ranks-1; ++r) { + for (auto r = 0; r < N_1_ranks-1; ++r) { real_t cum_N = 0; for (auto i = bound_start[r]; i < global_size; ++i) { cum_N += static_cast(Nx_global[i]) / N_23_ranks; @@ -411,9 +471,22 @@ namespace user { } } } - bound_end[N_ranks-1] = global_size - 1; - - // todo: bounds in other directions + // rest of the domain goes to the last rank + bound_end[N_1_ranks-1] = global_size - 1; + + // compute maximum load imbalance after reshuffling + index_t max_N = 0; + for (auto r = 0; r < N_1_ranks; ++r) { + index_t N_r = 0; + for (auto i = bound_start[r]; i < bound_end[r]; ++i) { + N_r += Nx_global[i]; + } + if (N_r > max_N) { + max_N = N_r; + } + } + real_t imbalance = static_cast(max_N) / (total_N / N_1_ranks); + // todo: reshuffling of particles according to new domain boundaries From 188f6affaabe129482d5cb812c1d65b92fc31815 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Sat, 21 Mar 2026 19:46:12 +0000 Subject: [PATCH 3/6] first attempt at general dynamic load balancing --- input.example.toml | 23 ++ src/engines/engine.hpp | 10 +- src/framework/CMakeLists.txt | 1 + src/framework/domain/metadomain.h | 2 + src/framework/domain/metadomain_lb.cpp | 352 ++++++++++++++++++++++++ src/framework/parameters/algorithms.cpp | 1 + src/framework/parameters/algorithms.h | 1 + src/framework/parameters/grid.cpp | 38 +++ src/framework/parameters/grid.h | 5 + 9 files changed, 432 insertions(+), 1 deletion(-) create mode 100644 src/framework/domain/metadomain_lb.cpp diff --git a/input.example.toml b/input.example.toml index f890143a6..5d0af7f55 100644 --- a/input.example.toml +++ b/input.example.toml @@ -28,6 +28,29 @@ # @example: [2, 2, 2] (total of 8 domains) decomposition = "" + [simulation.domain.load_balancing] + # Enable load balancing + # @type: bool + # @default: false + enable = "" + # Interval between load balancing steps + # @type: int + # @default: 10 + interval = "" + # Dimensions to balance + # @type: array + # @default: [] + # @note: Empty array means balance all dimensions + dimensions = "" + # Maximum number of iterations + # @type: int + # @default: 1 + max_iterations = "" + # Tolerance for load balancing + # @type: float + # @default: 0.1 + tolerance = "" + [grid] # Spatial resolution of the grid # @required diff --git a/src/engines/engine.hpp b/src/engines/engine.hpp index 597de5171..bce033e6d 100644 --- a/src/engines/engine.hpp +++ b/src/engines/engine.hpp @@ -250,7 +250,7 @@ namespace ntt { "ParticleBoundaries", "Communications", "Injector", "Custom", "ParticleSort", "Output", - "Checkpoint" }, + "Checkpoint", "LoadBalancing" }, []() { Kokkos::fence(); }, @@ -285,6 +285,14 @@ namespace ntt { time += dt; ++step; + const auto lb_enable = m_params.template get("simulation.domain.load_balancing.enable"); + const auto lb_interval = m_params.template get("simulation.domain.load_balancing.interval"); + if (lb_enable && lb_interval > 0 && step % lb_interval == 0) { + timers.start("LoadBalancing"); + m_metadomain.BalanceLoad(m_params); + timers.stop("LoadBalancing"); + } + auto print_output = false; auto print_checkpoint = false; #if defined(OUTPUT_ENABLED) diff --git a/src/framework/CMakeLists.txt b/src/framework/CMakeLists.txt index df2bf4c69..826ce2165 100644 --- a/src/framework/CMakeLists.txt +++ b/src/framework/CMakeLists.txt @@ -56,6 +56,7 @@ set(SOURCES ${SRC_DIR}/domain/metadomain.cpp ${SRC_DIR}/domain/metadomain_comm.cpp ${SRC_DIR}/domain/metadomain_sort.cpp + ${SRC_DIR}/domain/metadomain_lb.cpp ${SRC_DIR}/domain/metadomain_stats.cpp ${SRC_DIR}/containers/particles.cpp ${SRC_DIR}/containers/particles_sort.cpp diff --git a/src/framework/domain/metadomain.h b/src/framework/domain/metadomain.h index d04433fcf..29154c35d 100644 --- a/src/framework/domain/metadomain.h +++ b/src/framework/domain/metadomain.h @@ -106,6 +106,8 @@ namespace ntt { const SimulationParams&, Domain&) const; + void BalanceLoad(const SimulationParams&); + /** * @param global_ndomains total number of domains * @param global_decomposition decomposition of the global domain diff --git a/src/framework/domain/metadomain_lb.cpp b/src/framework/domain/metadomain_lb.cpp new file mode 100644 index 000000000..6df3c6b73 --- /dev/null +++ b/src/framework/domain/metadomain_lb.cpp @@ -0,0 +1,352 @@ +#include "framework/domain/metadomain.h" +#include "framework/domain/domain.h" +#include "framework/specialization_registry.h" +#include "arch/mpi_tags.h" +#include "utils/numeric.h" +#include "utils/reporter.h" +#include "framework/parameters/parameters.h" + +#include +#include +#include + +#if defined(MPI_ENABLED) + #include +#endif + +namespace ntt { + + // Load balancing helper based on the 1D julia model + bool negotiate_boundary_single(const std::vector& N, std::vector& bounds, int i, int n_ghost, double tol) { + int left_start = bounds[i]; + int mid = bounds[i+1]; + int right_end = bounds[i+2]; + + double w1 = 0; + for (int k = left_start; k < mid; ++k) w1 += N[k]; + double w2 = 0; + for (int k = mid; k < right_end; ++k) w2 += N[k]; + + if (std::abs(w1 - w2) <= tol) return false; + + int L_min = 2 * n_ghost + 1; + double best_diff = std::abs(w1 - w2); + int best_shift = 0; + + if (w1 > w2) { + int max_shift = (mid - left_start) - L_min; + double current_transfer = 0.0; + for (int s = 1; s <= max_shift; ++s) { + current_transfer += N[mid - s]; + double new_diff = std::abs((w1 - current_transfer) - (w2 + current_transfer)); + if (new_diff < best_diff) { + best_diff = new_diff; + best_shift = s; + } else { + break; + } + } + if (best_shift > 0) { + bounds[i+1] -= best_shift; + return true; + } + } else if (w2 > w1) { + int max_shift = (right_end - mid) - L_min; + double current_transfer = 0.0; + for (int s = 1; s <= max_shift; ++s) { + current_transfer += N[mid + s - 1]; // mid is inclusive for right domain. + double new_diff = std::abs((w1 + current_transfer) - (w2 - current_transfer)); + if (new_diff < best_diff) { + best_diff = new_diff; + best_shift = s; + } else { + break; + } + } + if (best_shift > 0) { + bounds[i+1] += best_shift; + return true; + } + } + return false; + } + + template + requires IsCompatibleWithMetadomain + void Metadomain::BalanceLoad(const SimulationParams& params) { + const auto lb_dims = params.template get>("simulation.domain.load_balancing.dimensions"); + const auto lb_max_iters = 1; //params.template get("simulation.domain.load_balancing.max_iterations", 10); + const auto lb_tol = 0.1; //params.template get("simulation.domain.load_balancing.tolerance", 0.1); + + if (lb_dims.empty()) return; + + for (int dim : lb_dims) { + if (dim < 1 || dim > D) continue; + int d = dim - 1; + + int nx_domains = g_ndomains_per_dim[d]; + if (nx_domains < 2) continue; + + int global_ncells = g_mesh.n_active(static_cast(d)); + Kokkos::View d_N("N", global_ncells); + + // 1. Gather particles histogram natively on the GPU device + runOnLocalDomains([&](auto& dom) { + for (const auto& sp : dom.species) { + if (sp.npart() == 0) continue; + + auto global_offset = dom.offset_ncells()[d]; + auto i_view = (d == 0) ? sp.i1 : ((d == 1) ? sp.i2 : sp.i3); + + Kokkos::parallel_for("GatherHistogram", sp.rangeActiveParticles(), KOKKOS_LAMBDA(int p) { + int local_cell = i_view(p) - N_GHOSTS; + int global_cell = global_offset + local_cell; + if (global_cell >= 0 && global_cell < global_ncells) { + Kokkos::atomic_add(&d_N(global_cell), ONE); + } + }); + } + }); + + auto h_N = Kokkos::create_mirror_view(d_N); + Kokkos::deep_copy(h_N, d_N); + std::vector N(global_ncells, ZERO); + for (int i = 0; i < global_ncells; ++i) { + N[i] = h_N(i); + } + +#if defined(MPI_ENABLED) + std::vector N_global(global_ncells, ZERO); + MPI_Allreduce(N.data(), N_global.data(), global_ncells, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + N = N_global; +#endif + + // 2. Setup bounds vector + std::vector bounds(nx_domains + 1, 0); + bounds[0] = 0; + bounds[nx_domains] = global_ncells; + + for (int i = 0; i < nx_domains - 1; ++i) { + std::vector target_idx(D, 0); + target_idx[d] = i; + unsigned int flatten_idx = g_domain_offset2index[target_idx]; + const auto& dom = g_subdomains[flatten_idx]; + bounds[i+1] = dom.offset_ncells()[d] + dom.mesh.n_active(static_cast(d)); + } + + std::vector old_bounds = bounds; + + // 3. Negotiate boundaries using iterative method + for (int iter = 1; iter <= lb_max_iters; ++iter) { + bool moved_global = false; + // RED phase + for (int i = 1; i < nx_domains; i += 2) { + moved_global |= negotiate_boundary_single(N, bounds, i - 1, N_GHOSTS, lb_tol); + } + // BLACK phase + for (int i = 2; i < nx_domains; i += 2) { + moved_global |= negotiate_boundary_single(N, bounds, i - 1, N_GHOSTS, lb_tol); + } + if (!moved_global) break; + } + + bool any_change = false; + for (int i = 0; i <= nx_domains; ++i) { + if (bounds[i] != old_bounds[i]) any_change = true; + } + + if (!any_change) continue; + + info::Print(fmt::format("Load Balancing shifted boundaries in dimension {}", dim), true); + + // 4. Update domains if boundary changed + std::vector> new_subdomains; + for (unsigned int idx = 0; idx < g_ndomains; ++idx) { + auto& old_dom = g_subdomains[idx]; + + std::vector new_ncells = old_dom.mesh.n_active(); + std::vector old_offset_ncells = old_dom.offset_ncells(); + std::vector new_offset_ncells = old_offset_ncells; + + int grid_i = old_dom.offset_ndomains()[d]; + + new_offset_ncells[d] = bounds[grid_i]; + new_ncells[d] = bounds[grid_i+1] - bounds[grid_i]; + + boundaries_t new_extent = old_dom.mesh.extent(); + real_t x_min = 0, x_max = 0; + if (d == 0) { + x_min = g_mesh.metric.template convert<1, Crd::Cd, Crd::Ph>(new_offset_ncells[d]); + x_max = g_mesh.metric.template convert<1, Crd::Cd, Crd::Ph>(new_offset_ncells[d] + new_ncells[d]); + } else if (d == 1) { + if constexpr (D == Dim::_2D || D == Dim::_3D) { + x_min = g_mesh.metric.template convert<2, Crd::Cd, Crd::Ph>(new_offset_ncells[d]); + x_max = g_mesh.metric.template convert<2, Crd::Cd, Crd::Ph>(new_offset_ncells[d] + new_ncells[d]); + } + } else if (d == 2) { + if constexpr (D == Dim::_3D) { + x_min = g_mesh.metric.template convert<3, Crd::Cd, Crd::Ph>(new_offset_ncells[d]); + x_max = g_mesh.metric.template convert<3, Crd::Cd, Crd::Ph>(new_offset_ncells[d] + new_ncells[d]); + } + } + new_extent[d].first = x_min; + new_extent[d].second = x_max; + + if (old_dom.is_placeholder()) { + new_subdomains.emplace_back(true, idx, old_dom.offset_ndomains(), new_offset_ncells, new_ncells, new_extent, g_metric_params, g_species_params); + } else { + new_subdomains.emplace_back(idx, old_dom.offset_ndomains(), new_offset_ncells, new_ncells, new_extent, g_metric_params, g_species_params); + auto& new_dom = new_subdomains.back(); + + auto new_em = new_dom.fields.em; + auto old_em = old_dom.fields.em; + auto new_bckp = new_dom.fields.bckp; + auto old_bckp = old_dom.fields.bckp; + + int new_off_0 = new_offset_ncells[0]; + int new_off_1 = (D > 1) ? new_offset_ncells[1] : 0; + int new_off_2 = (D > 2) ? new_offset_ncells[2] : 0; + + int old_off_0 = old_offset_ncells[0]; + int old_off_1 = (D > 1) ? old_offset_ncells[1] : 0; + int old_off_2 = (D > 2) ? old_offset_ncells[2] : 0; + + int old_ext_0 = old_em.extent(0); + int old_ext_1 = (D > 1) ? old_em.extent(1) : 0; + int old_ext_2 = (D > 2) ? old_em.extent(2) : 0; + + if constexpr (D == Dim::_1D) { + Kokkos::parallel_for("CopyFields_LB_1D", new_dom.mesh.rangeActiveCells(), KOKKOS_LAMBDA(int i1) { + int gx1 = new_off_0 + i1 - N_GHOSTS; + int old_i1 = gx1 - old_off_0 + N_GHOSTS; + if (old_i1 >= N_GHOSTS && old_i1 < old_ext_0 - N_GHOSTS) { + for (int comp = 0; comp < 6; ++comp) { + new_em(i1, comp) = old_em(old_i1, comp); + new_bckp(i1, comp) = old_bckp(old_i1, comp); + } + } + }); + } else if constexpr (D == Dim::_2D) { + Kokkos::parallel_for("CopyFields_LB_2D", new_dom.mesh.rangeActiveCells(), KOKKOS_LAMBDA(int i1, int i2) { + int gx1 = new_off_0 + i1 - N_GHOSTS; + int gx2 = new_off_1 + i2 - N_GHOSTS; + int old_i1 = gx1 - old_off_0 + N_GHOSTS; + int old_i2 = gx2 - old_off_1 + N_GHOSTS; + if (old_i1 >= N_GHOSTS && old_i1 < old_ext_0 - N_GHOSTS && + old_i2 >= N_GHOSTS && old_i2 < old_ext_1 - N_GHOSTS) { + for (int comp = 0; comp < 6; ++comp) { + new_em(i1, i2, comp) = old_em(old_i1, old_i2, comp); + new_bckp(i1, i2, comp) = old_bckp(old_i1, old_i2, comp); + } + } + }); + } else if constexpr (D == Dim::_3D) { + Kokkos::parallel_for("CopyFields_LB_3D", new_dom.mesh.rangeActiveCells(), KOKKOS_LAMBDA(int i1, int i2, int i3) { + int gx1 = new_off_0 + i1 - N_GHOSTS; + int gx2 = new_off_1 + i2 - N_GHOSTS; + int gx3 = new_off_2 + i3 - N_GHOSTS; + int old_i1 = gx1 - old_off_0 + N_GHOSTS; + int old_i2 = gx2 - old_off_1 + N_GHOSTS; + int old_i3 = gx3 - old_off_2 + N_GHOSTS; + if (old_i1 >= N_GHOSTS && old_i1 < old_ext_0 - N_GHOSTS && + old_i2 >= N_GHOSTS && old_i2 < old_ext_1 - N_GHOSTS && + old_i3 >= N_GHOSTS && old_i3 < old_ext_2 - N_GHOSTS) { + for (int comp = 0; comp < 6; ++comp) { + new_em(i1, i2, i3, comp) = old_em(old_i1, old_i2, old_i3, comp); + new_bckp(i1, i2, i3, comp) = old_bckp(old_i1, old_i2, old_i3, comp); + } + } + }); + } + + for(size_t s_idx = 0; s_idx < g_species_params.size(); ++s_idx) { + auto& old_sp = old_dom.species[s_idx]; + auto& new_sp = new_dom.species[s_idx]; + new_sp.set_npart(old_sp.npart()); + + Kokkos::deep_copy(new_sp.i1, old_sp.i1); + if(D>1) Kokkos::deep_copy(new_sp.i2, old_sp.i2); + if(D>2) Kokkos::deep_copy(new_sp.i3, old_sp.i3); + Kokkos::deep_copy(new_sp.dx1, old_sp.dx1); + if(D>1)Kokkos::deep_copy(new_sp.dx2, old_sp.dx2); + if(D>2)Kokkos::deep_copy(new_sp.dx3, old_sp.dx3); + Kokkos::deep_copy(new_sp.ux1, old_sp.ux1); + Kokkos::deep_copy(new_sp.ux2, old_sp.ux2); + Kokkos::deep_copy(new_sp.ux3, old_sp.ux3); + Kokkos::deep_copy(new_sp.weight, old_sp.weight); + + int offset_diff1 = old_offset_ncells[0] - new_offset_ncells[0]; + if constexpr (D == Dim::_1D) { + if (offset_diff1 != 0) { + auto i1_view = new_sp.i1; + auto tag_view = new_sp.tag; + int ni1 = new_ncells[0]; + Kokkos::parallel_for("ShiftParticles_1D", new_sp.rangeActiveParticles(), KOKKOS_LAMBDA(int p) { + i1_view(p) += offset_diff1; +#if defined(MPI_ENABLED) + tag_view(p) = mpi::SendTag(tag_view(p), i1_view(p) < 0, i1_view(p) >= ni1); +#endif + }); + } + } else if constexpr (D == Dim::_2D) { + int offset_diff2 = old_offset_ncells[1] - new_offset_ncells[1]; + if (offset_diff1 != 0 || offset_diff2 != 0) { + auto i1_view = new_sp.i1; + auto i2_view = new_sp.i2; + auto tag_view = new_sp.tag; + int ni1 = new_ncells[0]; + int ni2 = new_ncells[1]; + Kokkos::parallel_for("ShiftParticles_2D", new_sp.rangeActiveParticles(), KOKKOS_LAMBDA(int p) { + i1_view(p) += offset_diff1; + i2_view(p) += offset_diff2; +#if defined(MPI_ENABLED) + tag_view(p) = mpi::SendTag(tag_view(p), i1_view(p) < 0, i1_view(p) >= ni1, i2_view(p) < 0, i2_view(p) >= ni2); +#endif + }); + } + } else if constexpr (D == Dim::_3D) { + int offset_diff2 = old_offset_ncells[1] - new_offset_ncells[1]; + int offset_diff3 = old_offset_ncells[2] - new_offset_ncells[2]; + if (offset_diff1 != 0 || offset_diff2 != 0 || offset_diff3 != 0) { + auto i1_view = new_sp.i1; + auto i2_view = new_sp.i2; + auto i3_view = new_sp.i3; + auto tag_view = new_sp.tag; + int ni1 = new_ncells[0]; + int ni2 = new_ncells[1]; + int ni3 = new_ncells[2]; + Kokkos::parallel_for("ShiftParticles_3D", new_sp.rangeActiveParticles(), KOKKOS_LAMBDA(int p) { + i1_view(p) += offset_diff1; + i2_view(p) += offset_diff2; + i3_view(p) += offset_diff3; +#if defined(MPI_ENABLED) + tag_view(p) = mpi::SendTag(tag_view(p), + i1_view(p) < 0, i1_view(p) >= ni1, + i2_view(p) < 0, i2_view(p) >= ni2, + i3_view(p) < 0, i3_view(p) >= ni3); +#endif + }); + } + } + } + } + } + g_subdomains = std::move(new_subdomains); + + redefineNeighbors(); + redefineBoundaries(); + + runOnLocalDomains([&](auto& dom){ + CommunicateParticles(dom); + CommunicateFields(dom, Comm::E | Comm::B | Comm::J); + }); + } + } + +#define METADOMAIN_LB(S, M, D) \ + template void Metadomain>::BalanceLoad(const SimulationParams&); + + NTT_FOREACH_SPECIALIZATION(METADOMAIN_LB) +#undef METADOMAIN_LB + +} // namespace ntt diff --git a/src/framework/parameters/algorithms.cpp b/src/framework/parameters/algorithms.cpp index 856d05fc7..9ea0dc4a4 100644 --- a/src/framework/parameters/algorithms.cpp +++ b/src/framework/parameters/algorithms.cpp @@ -125,6 +125,7 @@ namespace ntt { "larmor_max", ZERO); } + } void Algorithms::setParams(const std::map& extra, diff --git a/src/framework/parameters/algorithms.h b/src/framework/parameters/algorithms.h index a496cb7b6..fbf49b9ab 100644 --- a/src/framework/parameters/algorithms.h +++ b/src/framework/parameters/algorithms.h @@ -46,6 +46,7 @@ namespace ntt { real_t synchrotron_gamma_rad; real_t compton_gamma_rad; + void read(real_t, const std::map&, const toml::value&); void setParams(const std::map&, SimulationParams*) const; }; diff --git a/src/framework/parameters/grid.cpp b/src/framework/parameters/grid.cpp index 9e5d39df5..e01765926 100644 --- a/src/framework/parameters/grid.cpp +++ b/src/framework/parameters/grid.cpp @@ -385,6 +385,39 @@ namespace ntt { "decomposition", std::vector { -1, -1, -1 }); + load_balancing_enable = toml::find_or(toml_data, + "simulation", + "domain", + "load_balancing", + "enable", + false); + load_balancing_interval = toml::find_or(toml_data, + "simulation", + "domain", + "load_balancing", + "interval", + 0u); + load_balancing_dimensions = toml::find_or>( + toml_data, + "simulation", + "domain", + "load_balancing", + "dimensions", + std::vector {}); + + load_balancing_max_iterations = toml::find_or(toml_data, + "simulation", + "domain", + "load_balancing", + "max_iterations", + 10); + load_balancing_tolerance = toml::find_or(toml_data, + "simulation", + "domain", + "load_balancing", + "tolerance", + 0.1); + /* resolution and dimension ------------------------------------------- */ resolution = toml::find>(toml_data, "grid", "resolution"); raise::ErrorIf(resolution.size() < 1 || resolution.size() > 3, @@ -533,6 +566,11 @@ namespace ntt { void Grid::setParams(SimulationParams* params) const { params->set("simulation.domain.number", number_of_domains); params->set("simulation.domain.decomposition", domain_decomposition); + params->set("simulation.domain.load_balancing.enable", load_balancing_enable); + params->set("simulation.domain.load_balancing.interval", load_balancing_interval); + params->set("simulation.domain.load_balancing.dimensions", load_balancing_dimensions); + params->set("simulation.domain.load_balancing.max_iterations", load_balancing_max_iterations); + params->set("simulation.domain.load_balancing.tolerance", load_balancing_tolerance); params->set("grid.resolution", resolution); params->set("grid.dim", dim); diff --git a/src/framework/parameters/grid.h b/src/framework/parameters/grid.h index 978b7f329..e44554f7d 100644 --- a/src/framework/parameters/grid.h +++ b/src/framework/parameters/grid.h @@ -56,6 +56,11 @@ namespace ntt { struct Grid { unsigned int number_of_domains; std::vector domain_decomposition; + unsigned int load_balancing_interval; + std::vector load_balancing_dimensions; + bool load_balancing_enable; + unsigned int load_balancing_max_iterations; + real_t load_balancing_tolerance; std::vector resolution; Dimension dim; From 48e2a2ef8cd3bd8013488f8c5f43bcafdc31f6a6 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Mon, 23 Mar 2026 15:53:08 +0000 Subject: [PATCH 4/6] bugfixes --- src/framework/domain/metadomain_lb.cpp | 45 ++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/src/framework/domain/metadomain_lb.cpp b/src/framework/domain/metadomain_lb.cpp index 6df3c6b73..221ceb08f 100644 --- a/src/framework/domain/metadomain_lb.cpp +++ b/src/framework/domain/metadomain_lb.cpp @@ -74,9 +74,9 @@ namespace ntt { template requires IsCompatibleWithMetadomain void Metadomain::BalanceLoad(const SimulationParams& params) { - const auto lb_dims = params.template get>("simulation.domain.load_balancing.dimensions"); - const auto lb_max_iters = 1; //params.template get("simulation.domain.load_balancing.max_iterations", 10); - const auto lb_tol = 0.1; //params.template get("simulation.domain.load_balancing.tolerance", 0.1); + const auto lb_dims = params.template get>("simulation.domain.load_balancing.dimensions"); + const auto lb_max_iters = static_cast(params.template get("simulation.domain.load_balancing.max_iterations")); + const auto lb_tol = static_cast(params.template get("simulation.domain.load_balancing.tolerance")); if (lb_dims.empty()) return; @@ -150,6 +150,21 @@ namespace ntt { if (!moved_global) break; } + // Clamp bounds to ensure every domain is at least 2*N_GHOSTS+1 cells wide + const int L_min = 2 * N_GHOSTS + 1; + for (int i = 1; i < nx_domains; ++i) { + // ensure domain i-1 (left) is at least L_min wide + if (bounds[i] - bounds[i - 1] < L_min) { + bounds[i] = bounds[i - 1] + L_min; + } + } + // walk backwards to ensure domain nx_domains-1 (rightmost) is at least L_min wide + for (int i = nx_domains - 1; i >= 1; --i) { + if (bounds[i + 1] - bounds[i] < L_min) { + bounds[i] = bounds[i + 1] - L_min; + } + } + bool any_change = false; for (int i = 0; i <= nx_domains; ++i) { if (bounds[i] != old_bounds[i]) any_change = true; @@ -161,6 +176,7 @@ namespace ntt { // 4. Update domains if boundary changed std::vector> new_subdomains; + new_subdomains.reserve(g_ndomains); for (unsigned int idx = 0; idx < g_ndomains; ++idx) { auto& old_dom = g_subdomains[idx]; @@ -274,15 +290,28 @@ namespace ntt { Kokkos::deep_copy(new_sp.ux2, old_sp.ux2); Kokkos::deep_copy(new_sp.ux3, old_sp.ux3); Kokkos::deep_copy(new_sp.weight, old_sp.weight); + + // Reset all copied particle tags to 'alive': particles with + // send-direction tags from the previous pusher step must not be + // re-sent by CommunicateParticles; ShiftParticles below will + // re-tag any particle that is now out of the new domain bounds. + { + auto tag_view = new_sp.tag; + Kokkos::parallel_for("ResetTags_LB", new_sp.rangeActiveParticles(), KOKKOS_LAMBDA(int p) { + tag_view(p) = ParticleTag::alive; + }); + } int offset_diff1 = old_offset_ncells[0] - new_offset_ncells[0]; if constexpr (D == Dim::_1D) { if (offset_diff1 != 0) { auto i1_view = new_sp.i1; + auto i1_prev_view = new_sp.i1_prev; auto tag_view = new_sp.tag; int ni1 = new_ncells[0]; Kokkos::parallel_for("ShiftParticles_1D", new_sp.rangeActiveParticles(), KOKKOS_LAMBDA(int p) { i1_view(p) += offset_diff1; + i1_prev_view(p) += offset_diff1; #if defined(MPI_ENABLED) tag_view(p) = mpi::SendTag(tag_view(p), i1_view(p) < 0, i1_view(p) >= ni1); #endif @@ -292,13 +321,17 @@ namespace ntt { int offset_diff2 = old_offset_ncells[1] - new_offset_ncells[1]; if (offset_diff1 != 0 || offset_diff2 != 0) { auto i1_view = new_sp.i1; + auto i1_prev_view = new_sp.i1_prev; auto i2_view = new_sp.i2; + auto i2_prev_view = new_sp.i2_prev; auto tag_view = new_sp.tag; int ni1 = new_ncells[0]; int ni2 = new_ncells[1]; Kokkos::parallel_for("ShiftParticles_2D", new_sp.rangeActiveParticles(), KOKKOS_LAMBDA(int p) { i1_view(p) += offset_diff1; i2_view(p) += offset_diff2; + i1_prev_view(p) += offset_diff1; + i2_prev_view(p) += offset_diff2; #if defined(MPI_ENABLED) tag_view(p) = mpi::SendTag(tag_view(p), i1_view(p) < 0, i1_view(p) >= ni1, i2_view(p) < 0, i2_view(p) >= ni2); #endif @@ -311,6 +344,9 @@ namespace ntt { auto i1_view = new_sp.i1; auto i2_view = new_sp.i2; auto i3_view = new_sp.i3; + auto i1_prev_view = new_sp.i1_prev; + auto i2_prev_view = new_sp.i2_prev; + auto i3_prev_view = new_sp.i3_prev; auto tag_view = new_sp.tag; int ni1 = new_ncells[0]; int ni2 = new_ncells[1]; @@ -319,6 +355,9 @@ namespace ntt { i1_view(p) += offset_diff1; i2_view(p) += offset_diff2; i3_view(p) += offset_diff3; + i1_prev_view(p) += offset_diff1; + i2_prev_view(p) += offset_diff2; + i3_prev_view(p) += offset_diff3; #if defined(MPI_ENABLED) tag_view(p) = mpi::SendTag(tag_view(p), i1_view(p) < 0, i1_view(p) >= ni1, From 2dd5eaeb5b963b5c4ae1ed0c1a88911c61290ad7 Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Mon, 23 Mar 2026 16:38:44 +0000 Subject: [PATCH 5/6] added missing copys --- src/framework/domain/metadomain_lb.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/framework/domain/metadomain_lb.cpp b/src/framework/domain/metadomain_lb.cpp index 221ceb08f..1051d279f 100644 --- a/src/framework/domain/metadomain_lb.cpp +++ b/src/framework/domain/metadomain_lb.cpp @@ -281,15 +281,22 @@ namespace ntt { new_sp.set_npart(old_sp.npart()); Kokkos::deep_copy(new_sp.i1, old_sp.i1); + Kokkos::deep_copy(new_sp.i1_prev, old_sp.i1_prev); if(D>1) Kokkos::deep_copy(new_sp.i2, old_sp.i2); + if(D>1) Kokkos::deep_copy(new_sp.i2_prev, old_sp.i2_prev); if(D>2) Kokkos::deep_copy(new_sp.i3, old_sp.i3); + if(D>2) Kokkos::deep_copy(new_sp.i3_prev, old_sp.i3_prev); Kokkos::deep_copy(new_sp.dx1, old_sp.dx1); + Kokkos::deep_copy(new_sp.dx1_prev, old_sp.dx1_prev); if(D>1)Kokkos::deep_copy(new_sp.dx2, old_sp.dx2); + if(D>1)Kokkos::deep_copy(new_sp.dx2_prev, old_sp.dx2_prev); if(D>2)Kokkos::deep_copy(new_sp.dx3, old_sp.dx3); + if(D>2)Kokkos::deep_copy(new_sp.dx3_prev, old_sp.dx3_prev); Kokkos::deep_copy(new_sp.ux1, old_sp.ux1); Kokkos::deep_copy(new_sp.ux2, old_sp.ux2); Kokkos::deep_copy(new_sp.ux3, old_sp.ux3); Kokkos::deep_copy(new_sp.weight, old_sp.weight); + Kokkos::deep_copy(new_sp.tag, old_sp.tag); // Reset all copied particle tags to 'alive': particles with // send-direction tags from the previous pusher step must not be From 7baac55700bf5a098c36588b2584aec59da78e5c Mon Sep 17 00:00:00 2001 From: LudwigBoess Date: Mon, 23 Mar 2026 22:03:08 +0000 Subject: [PATCH 6/6] simplified computation of new boundaries and number of cells --- src/framework/domain/metadomain_lb.cpp | 352 +++++++++++-------------- 1 file changed, 150 insertions(+), 202 deletions(-) diff --git a/src/framework/domain/metadomain_lb.cpp b/src/framework/domain/metadomain_lb.cpp index 1051d279f..90289a449 100644 --- a/src/framework/domain/metadomain_lb.cpp +++ b/src/framework/domain/metadomain_lb.cpp @@ -78,11 +78,21 @@ namespace ntt { const auto lb_max_iters = static_cast(params.template get("simulation.domain.load_balancing.max_iterations")); const auto lb_tol = static_cast(params.template get("simulation.domain.load_balancing.tolerance")); + // if no dimensions specified, skip load balancing if (lb_dims.empty()) return; + auto global_boundaries = std::vector> {}; + auto offset_ncells = std::vector> {}; + auto global_ncells = std::vector> {}; + + // track if any boundary changed across all dimensions to avoid unnecessary domain updates + bool any_change = false; + + // loop over all dimenstions to be load balanced for (int dim : lb_dims) { + // ToDo: fallback options for dimentions that should not be load-balanced. if (dim < 1 || dim > D) continue; - int d = dim - 1; + int d = dim - 1; int nx_domains = g_ndomains_per_dim[d]; if (nx_domains < 2) continue; @@ -122,7 +132,7 @@ namespace ntt { #endif // 2. Setup bounds vector - std::vector bounds(nx_domains + 1, 0); + std::vector bounds(nx_domains + 1, 0); bounds[0] = 0; bounds[nx_domains] = global_ncells; @@ -131,10 +141,11 @@ namespace ntt { target_idx[d] = i; unsigned int flatten_idx = g_domain_offset2index[target_idx]; const auto& dom = g_subdomains[flatten_idx]; - bounds[i+1] = dom.offset_ncells()[d] + dom.mesh.n_active(static_cast(d)); + // compute global cell index of the right boundary of this domain + bounds[i+1] = dom.offset_ncells()[d] + dom.mesh.n_active(static_cast(d)); } - std::vector old_bounds = bounds; + std::vector old_bounds = bounds; // 3. Negotiate boundaries using iterative method for (int iter = 1; iter <= lb_max_iters; ++iter) { @@ -150,215 +161,160 @@ namespace ntt { if (!moved_global) break; } - // Clamp bounds to ensure every domain is at least 2*N_GHOSTS+1 cells wide - const int L_min = 2 * N_GHOSTS + 1; - for (int i = 1; i < nx_domains; ++i) { - // ensure domain i-1 (left) is at least L_min wide - if (bounds[i] - bounds[i - 1] < L_min) { - bounds[i] = bounds[i - 1] + L_min; - } - } - // walk backwards to ensure domain nx_domains-1 (rightmost) is at least L_min wide - for (int i = nx_domains - 1; i >= 1; --i) { - if (bounds[i + 1] - bounds[i] < L_min) { - bounds[i] = bounds[i + 1] - L_min; - } - } - - bool any_change = false; + // check if any boundary changed for (int i = 0; i <= nx_domains; ++i) { if (bounds[i] != old_bounds[i]) any_change = true; } - if (!any_change) continue; + // store updated boundaries for this dimension + global_boundaries.push_back(bounds); + // store offsets for this dimension (for later use in shifting particles) + std::vector offsets(nx_domains, 0); + for (int i = 1; i <= nx_domains; ++i) { // first boundary is fixed at 0 + offsets[i] = bounds[i] - old_bounds[i]; + } + offset_ncells.push_back(offsets); + + // store number of cells for each domain in this dimension (for later use in domain updates) + std::vector new_ncells(nx_domains, 0); + for (int i = 1; i <= nx_domains; ++i) { + new_ncells[i] = bounds[i] - bounds[i-1]; + } + global_ncells.push_back(new_ncells); info::Print(fmt::format("Load Balancing shifted boundaries in dimension {}", dim), true); - // 4. Update domains if boundary changed - std::vector> new_subdomains; - new_subdomains.reserve(g_ndomains); - for (unsigned int idx = 0; idx < g_ndomains; ++idx) { - auto& old_dom = g_subdomains[idx]; - - std::vector new_ncells = old_dom.mesh.n_active(); - std::vector old_offset_ncells = old_dom.offset_ncells(); - std::vector new_offset_ncells = old_offset_ncells; - - int grid_i = old_dom.offset_ndomains()[d]; - - new_offset_ncells[d] = bounds[grid_i]; - new_ncells[d] = bounds[grid_i+1] - bounds[grid_i]; - - boundaries_t new_extent = old_dom.mesh.extent(); - real_t x_min = 0, x_max = 0; - if (d == 0) { - x_min = g_mesh.metric.template convert<1, Crd::Cd, Crd::Ph>(new_offset_ncells[d]); - x_max = g_mesh.metric.template convert<1, Crd::Cd, Crd::Ph>(new_offset_ncells[d] + new_ncells[d]); - } else if (d == 1) { - if constexpr (D == Dim::_2D || D == Dim::_3D) { - x_min = g_mesh.metric.template convert<2, Crd::Cd, Crd::Ph>(new_offset_ncells[d]); - x_max = g_mesh.metric.template convert<2, Crd::Cd, Crd::Ph>(new_offset_ncells[d] + new_ncells[d]); - } - } else if (d == 2) { - if constexpr (D == Dim::_3D) { - x_min = g_mesh.metric.template convert<3, Crd::Cd, Crd::Ph>(new_offset_ncells[d]); - x_max = g_mesh.metric.template convert<3, Crd::Cd, Crd::Ph>(new_offset_ncells[d] + new_ncells[d]); - } - } - new_extent[d].first = x_min; - new_extent[d].second = x_max; + } // loop over dimensions - if (old_dom.is_placeholder()) { - new_subdomains.emplace_back(true, idx, old_dom.offset_ndomains(), new_offset_ncells, new_ncells, new_extent, g_metric_params, g_species_params); - } else { - new_subdomains.emplace_back(idx, old_dom.offset_ndomains(), new_offset_ncells, new_ncells, new_extent, g_metric_params, g_species_params); - auto& new_dom = new_subdomains.back(); - - auto new_em = new_dom.fields.em; - auto old_em = old_dom.fields.em; - auto new_bckp = new_dom.fields.bckp; - auto old_bckp = old_dom.fields.bckp; - - int new_off_0 = new_offset_ncells[0]; - int new_off_1 = (D > 1) ? new_offset_ncells[1] : 0; - int new_off_2 = (D > 2) ? new_offset_ncells[2] : 0; - - int old_off_0 = old_offset_ncells[0]; - int old_off_1 = (D > 1) ? old_offset_ncells[1] : 0; - int old_off_2 = (D > 2) ? old_offset_ncells[2] : 0; - - int old_ext_0 = old_em.extent(0); - int old_ext_1 = (D > 1) ? old_em.extent(1) : 0; - int old_ext_2 = (D > 2) ? old_em.extent(2) : 0; - - if constexpr (D == Dim::_1D) { - Kokkos::parallel_for("CopyFields_LB_1D", new_dom.mesh.rangeActiveCells(), KOKKOS_LAMBDA(int i1) { - int gx1 = new_off_0 + i1 - N_GHOSTS; - int old_i1 = gx1 - old_off_0 + N_GHOSTS; - if (old_i1 >= N_GHOSTS && old_i1 < old_ext_0 - N_GHOSTS) { - for (int comp = 0; comp < 6; ++comp) { - new_em(i1, comp) = old_em(old_i1, comp); - new_bckp(i1, comp) = old_bckp(old_i1, comp); - } - } - }); - } else if constexpr (D == Dim::_2D) { - Kokkos::parallel_for("CopyFields_LB_2D", new_dom.mesh.rangeActiveCells(), KOKKOS_LAMBDA(int i1, int i2) { - int gx1 = new_off_0 + i1 - N_GHOSTS; - int gx2 = new_off_1 + i2 - N_GHOSTS; - int old_i1 = gx1 - old_off_0 + N_GHOSTS; - int old_i2 = gx2 - old_off_1 + N_GHOSTS; - if (old_i1 >= N_GHOSTS && old_i1 < old_ext_0 - N_GHOSTS && - old_i2 >= N_GHOSTS && old_i2 < old_ext_1 - N_GHOSTS) { - for (int comp = 0; comp < 6; ++comp) { - new_em(i1, i2, comp) = old_em(old_i1, old_i2, comp); - new_bckp(i1, i2, comp) = old_bckp(old_i1, old_i2, comp); - } - } - }); - } else if constexpr (D == Dim::_3D) { - Kokkos::parallel_for("CopyFields_LB_3D", new_dom.mesh.rangeActiveCells(), KOKKOS_LAMBDA(int i1, int i2, int i3) { - int gx1 = new_off_0 + i1 - N_GHOSTS; - int gx2 = new_off_1 + i2 - N_GHOSTS; - int gx3 = new_off_2 + i3 - N_GHOSTS; - int old_i1 = gx1 - old_off_0 + N_GHOSTS; - int old_i2 = gx2 - old_off_1 + N_GHOSTS; - int old_i3 = gx3 - old_off_2 + N_GHOSTS; - if (old_i1 >= N_GHOSTS && old_i1 < old_ext_0 - N_GHOSTS && - old_i2 >= N_GHOSTS && old_i2 < old_ext_1 - N_GHOSTS && - old_i3 >= N_GHOSTS && old_i3 < old_ext_2 - N_GHOSTS) { - for (int comp = 0; comp < 6; ++comp) { - new_em(i1, i2, i3, comp) = old_em(old_i1, old_i2, old_i3, comp); - new_bckp(i1, i2, i3, comp) = old_bckp(old_i1, old_i2, old_i3, comp); - } - } - }); - } + // no changes, skip domain updates + if (!any_change) return; - for(size_t s_idx = 0; s_idx < g_species_params.size(); ++s_idx) { - auto& old_sp = old_dom.species[s_idx]; - auto& new_sp = new_dom.species[s_idx]; - new_sp.set_npart(old_sp.npart()); - - Kokkos::deep_copy(new_sp.i1, old_sp.i1); - Kokkos::deep_copy(new_sp.i1_prev, old_sp.i1_prev); - if(D>1) Kokkos::deep_copy(new_sp.i2, old_sp.i2); - if(D>1) Kokkos::deep_copy(new_sp.i2_prev, old_sp.i2_prev); - if(D>2) Kokkos::deep_copy(new_sp.i3, old_sp.i3); - if(D>2) Kokkos::deep_copy(new_sp.i3_prev, old_sp.i3_prev); - Kokkos::deep_copy(new_sp.dx1, old_sp.dx1); - Kokkos::deep_copy(new_sp.dx1_prev, old_sp.dx1_prev); - if(D>1)Kokkos::deep_copy(new_sp.dx2, old_sp.dx2); - if(D>1)Kokkos::deep_copy(new_sp.dx2_prev, old_sp.dx2_prev); - if(D>2)Kokkos::deep_copy(new_sp.dx3, old_sp.dx3); - if(D>2)Kokkos::deep_copy(new_sp.dx3_prev, old_sp.dx3_prev); - Kokkos::deep_copy(new_sp.ux1, old_sp.ux1); - Kokkos::deep_copy(new_sp.ux2, old_sp.ux2); - Kokkos::deep_copy(new_sp.ux3, old_sp.ux3); - Kokkos::deep_copy(new_sp.weight, old_sp.weight); - Kokkos::deep_copy(new_sp.tag, old_sp.tag); - - // Reset all copied particle tags to 'alive': particles with - // send-direction tags from the previous pusher step must not be - // re-sent by CommunicateParticles; ShiftParticles below will - // re-tag any particle that is now out of the new domain bounds. - { - auto tag_view = new_sp.tag; - Kokkos::parallel_for("ResetTags_LB", new_sp.rangeActiveParticles(), KOKKOS_LAMBDA(int p) { - tag_view(p) = ParticleTag::alive; - }); - } - - int offset_diff1 = old_offset_ncells[0] - new_offset_ncells[0]; - if constexpr (D == Dim::_1D) { - if (offset_diff1 != 0) { - auto i1_view = new_sp.i1; - auto i1_prev_view = new_sp.i1_prev; - auto tag_view = new_sp.tag; - int ni1 = new_ncells[0]; - Kokkos::parallel_for("ShiftParticles_1D", new_sp.rangeActiveParticles(), KOKKOS_LAMBDA(int p) { - i1_view(p) += offset_diff1; - i1_prev_view(p) += offset_diff1; + // ToDo: Mesh update + + + // ToDo: Field update + for (unsigned int idx { 0 }; idx < g_ndomains; ++idx) { +#if defined(MPI_ENABLED) + // !TODO: need to change to support multiple domains per rank + // assuming ONE local subdomain + const auto local = ((int)idx == g_mpi_rank); + if (local) { + auto nxnew = std::vector(D, 0); + auto nxold = std::vector(D, 0); + for (auto d { 0 }; d < (short)D; ++d) { + nxnew.push_back(new_ncells[d][idx]); + nxold.push_back(nxnew - offset_ncells[d][idx]); + } + if (offset_ncells[d][idx] < 0) { + // domain is shrinking -> right boundary moves left, need to send data to the right neighbor + // ToDo: define fields + ndfield_t new_fields {... }; + ndfield_t send_fields {... }; + + if constexpr (D == Dim::_1D) { + Kokkos::deep_copy(new_fields, Kokkos::slice(old_fields, { 0, nxnew[0] })); + Kokkos::deep_copy(send_fields, Kokkos::slice(old_fields, { nxnew[0], nxold[0] })); + } else if constexpr (D == Dim::_2D) { + Kokkos::deep_copy(new_fields, Kokkos::slice(old_fields, { 0, nxnew[0] }, { 0, nxnew[1] })); + Kokkos::deep_copy(send_fields, Kokkos::slice(old_fields, { nxnew[0], nxold[0] }, { nxnew[1], nxold[1] })); + } else if constexpr (D == Dim::_3D) { + Kokkos::deep_copy(new_fields, Kokkos::slice(old_fields, { 0, nxnew[0] }, { 0, nxnew[1] }, { 0, nxnew[2] })); + Kokkos::deep_copy(send_fields, Kokkos::slice(old_fields, { nxnew[0], nxold[0] }, { nxnew[1], nxold[1] }, { nxnew[2], nxold[2] })); + } + + MPI_SEND(send_fields, ...); + } else if (offset_ncells[d][idx] > 0) { + // domain is growing -> right boundary moves right, need to receive data from the left neighbor + ndfield_t new_fields {... }; + ndfield_t recv_fields {... }; + + if constexpr (D == Dim::_1D) { + Kokkos::deep_copy(Kokkos::slice(new_fields, { nxnew[0] - nxold[0], nxnew[0] }), old_fields); + } else if constexpr (D == Dim::_2D) { + Kokkos::deep_copy(Kokkos::slice(new_fields, { nxnew[0] - nxold[0], nxnew[0] }, { nxnew[1] - nxold[1], nxnew[1] }), old_fields); + } else if constexpr (D == Dim::_3D) { + Kokkos::deep_copy(Kokkos::slice(new_fields, { nxnew[0] - nxold[0], nxnew[0] }, { nxnew[1] - nxold[1], nxnew[1] }, + { nxnew[2] - nxold[2], nxnew[2] }), old_fields); + } + + MPI_RECV(recv_fields, ...); + Kokkos::deep_copy(Kokkos::slice(new_fields, { 0, nxnew[0] - nxold[0] }), recv_fields); + } + } + + g_subdomains.back().set_mpi_rank(idx); + if (g_subdomains.back().mpi_rank() == g_mpi_rank) { + g_local_subdomain_indices.push_back(idx); + } +#endif // MPI_ENABLED + + } + + // ToDo: Particle update + for(size_t s_idx = 0; s_idx < g_species_params.size(); ++s_idx) { + auto& sp = dom.species[s_idx]; + + // Reset all copied particle tags to 'alive': particles with + // send-direction tags from the previous pusher step must not be + // re-sent by CommunicateParticles; ShiftParticles below will + // re-tag any particle that is now out of the new domain bounds. + { + auto tag_view = sp.tag; + Kokkos::parallel_for("ResetTags_LB", sp.rangeActiveParticles(), KOKKOS_LAMBDA(int p) { + tag_view(p) = ParticleTag::alive; + }); + } + + int offset_diff1 = offset_ncells[0][idx]; + if constexpr (D == Dim::_1D) { + if (offset_diff1 != 0) { + auto i1_view = sp.i1; + auto i1_prev_view = sp.i1_prev; + auto tag_view = sp.tag; + int ni1 = new_ncells[0]; + Kokkos::parallel_for("ShiftParticles_1D", sp.rangeActiveParticles(), KOKKOS_LAMBDA(int p) { + i1_view(p) += offset_diff1; + i1_prev_view(p) += offset_diff1; #if defined(MPI_ENABLED) - tag_view(p) = mpi::SendTag(tag_view(p), i1_view(p) < 0, i1_view(p) >= ni1); + tag_view(p) = mpi::SendTag(tag_view(p), i1_view(p) < 0, i1_view(p) >= ni1); #endif - }); - } - } else if constexpr (D == Dim::_2D) { - int offset_diff2 = old_offset_ncells[1] - new_offset_ncells[1]; - if (offset_diff1 != 0 || offset_diff2 != 0) { - auto i1_view = new_sp.i1; - auto i1_prev_view = new_sp.i1_prev; - auto i2_view = new_sp.i2; - auto i2_prev_view = new_sp.i2_prev; - auto tag_view = new_sp.tag; - int ni1 = new_ncells[0]; - int ni2 = new_ncells[1]; - Kokkos::parallel_for("ShiftParticles_2D", new_sp.rangeActiveParticles(), KOKKOS_LAMBDA(int p) { - i1_view(p) += offset_diff1; - i2_view(p) += offset_diff2; - i1_prev_view(p) += offset_diff1; - i2_prev_view(p) += offset_diff2; + }); + } + } else if constexpr (D == Dim::_2D) { + int offset_diff2 = offset_ncells[domain_idx][1]; + if (offset_diff1 != 0 || offset_diff2 != 0) { + auto i1_view = sp.i1; + auto i1_prev_view = sp.i1_prev; + auto i2_view = sp.i2; + auto i2_prev_view = sp.i2_prev; + auto tag_view = sp.tag; + int ni1 = new_ncells[0]; + int ni2 = new_ncells[1]; + Kokkos::parallel_for("ShiftParticles_2D", sp.rangeActiveParticles(), KOKKOS_LAMBDA(int p) { + i1_view(p) += offset_diff1; + i2_view(p) += offset_diff2; + i1_prev_view(p) += offset_diff1; + i2_prev_view(p) += offset_diff2; #if defined(MPI_ENABLED) tag_view(p) = mpi::SendTag(tag_view(p), i1_view(p) < 0, i1_view(p) >= ni1, i2_view(p) < 0, i2_view(p) >= ni2); #endif }); } } else if constexpr (D == Dim::_3D) { - int offset_diff2 = old_offset_ncells[1] - new_offset_ncells[1]; - int offset_diff3 = old_offset_ncells[2] - new_offset_ncells[2]; + int offset_diff2 = offset_ncells[domain_idx][1]; + int offset_diff3 = offset_ncells[domain_idx][2]; if (offset_diff1 != 0 || offset_diff2 != 0 || offset_diff3 != 0) { - auto i1_view = new_sp.i1; - auto i2_view = new_sp.i2; - auto i3_view = new_sp.i3; - auto i1_prev_view = new_sp.i1_prev; - auto i2_prev_view = new_sp.i2_prev; - auto i3_prev_view = new_sp.i3_prev; - auto tag_view = new_sp.tag; + auto i1_view = sp.i1; + auto i2_view = sp.i2; + auto i3_view = sp.i3; + auto i1_prev_view = sp.i1_prev; + auto i2_prev_view = sp.i2_prev; + auto i3_prev_view = sp.i3_prev; + auto tag_view = sp.tag; int ni1 = new_ncells[0]; int ni2 = new_ncells[1]; int ni3 = new_ncells[2]; - Kokkos::parallel_for("ShiftParticles_3D", new_sp.rangeActiveParticles(), KOKKOS_LAMBDA(int p) { + Kokkos::parallel_for("ShiftParticles_3D", sp.rangeActiveParticles(), KOKKOS_LAMBDA(int p) { i1_view(p) += offset_diff1; i2_view(p) += offset_diff2; i3_view(p) += offset_diff3; @@ -375,17 +331,9 @@ namespace ntt { } } } - } - } - g_subdomains = std::move(new_subdomains); - redefineNeighbors(); - redefineBoundaries(); - - runOnLocalDomains([&](auto& dom){ - CommunicateParticles(dom); - CommunicateFields(dom, Comm::E | Comm::B | Comm::J); - }); + CommunicateParticles(dom); + CommunicateFields(dom, Comm::E | Comm::B | Comm::J); } }