Merge pull request mcdc-project#271 from alexandermote/dev

ilhamv · web-flow · commit 36b03323cfbd · 2025-03-18T09:37:17.000+07:00
Variable-sized subdomains on CPU/GPU
diff --git a/.github/workflows/regression_test-numba_cpu_mpi.yml b/.github/workflows/regression_test-numba_cpu_mpi.yml
@@ -26,5 +26,6 @@ jobs:
         pip list 
     - name: Regression Test - Numba and MPI
       run: |
-          cd test/regression
-          python run.py --mode=numba --mpiexec=4
+        cd test/regression
+        python run.py --mode=numba --mpiexec=4
+        python run.py --mode=numba --mpiexec=16 --name=slab_reed_dd_3d
diff --git a/.github/workflows/regression_test-python_mpi.yml b/.github/workflows/regression_test-python_mpi.yml
@@ -28,3 +28,4 @@ jobs:
       run: |
         cd test/regression
         python run.py --mpiexec=4
+        python run.py --mpiexec=16 --name=slab_reed_dd_3d
diff --git a/mcdc/main.py b/mcdc/main.py
@@ -861,15 +861,25 @@ def prepare():
                 )
 
         else:  # decomposed mesh filters
+            mcdc["technique"]["dd_xsum"] = len(input_deck.mesh_tallies[i].x) - 1
+            mcdc["technique"]["dd_ysum"] = len(input_deck.mesh_tallies[i].y) - 1
+            mcdc["technique"]["dd_zsum"] = len(input_deck.mesh_tallies[i].z) - 1
+
             mxn, mxp, myn, myp, mzn, mzp = dd_mesh_bounds(i)
 
             # Filters
             new_x = input_deck.mesh_tallies[i].x[mxn:mxp]
             new_y = input_deck.mesh_tallies[i].y[myn:myp]
             new_z = input_deck.mesh_tallies[i].z[mzn:mzp]
-            mcdc["mesh_tallies"][i]["filter"]["x"] = new_x
-            mcdc["mesh_tallies"][i]["filter"]["y"] = new_y
-            mcdc["mesh_tallies"][i]["filter"]["z"] = new_z
+            xlen = len(new_x)
+            ylen = len(new_y)
+            zlen = len(new_z)
+            mcdc["mesh_tallies"][i]["filter"]["x"][:xlen] = new_x
+            mcdc["mesh_tallies"][i]["filter"]["y"][:ylen] = new_y
+            mcdc["mesh_tallies"][i]["filter"]["z"][:zlen] = new_z
+            mcdc["technique"]["dd_xlen"] = xlen - 1
+            mcdc["technique"]["dd_ylen"] = ylen - 1
+            mcdc["technique"]["dd_zlen"] = zlen - 1
             for name in ["t", "mu", "azi", "g"]:
                 N = len(getattr(input_deck.mesh_tallies[i], name))
                 mcdc["mesh_tallies"][i]["filter"][name][:N] = getattr(
@@ -1632,9 +1642,9 @@ def dd_mergetally(mcdc, data):
     d_Nz = input_deck.technique["dd_mesh"]["z"].size - 1
 
     # capture tally lengths for reorganizing later
-    xlen = len(mcdc["mesh_tallies"][0]["filter"]["x"]) - 1
-    ylen = len(mcdc["mesh_tallies"][0]["filter"]["y"]) - 1
-    zlen = len(mcdc["mesh_tallies"][0]["filter"]["z"]) - 1
+    xlen = mcdc["technique"]["dd_xlen"]
+    ylen = mcdc["technique"]["dd_ylen"]
+    zlen = mcdc["technique"]["dd_zlen"]
 
     # MPI gather
     if (d_Nx * d_Ny * d_Nz) == MPI.COMM_WORLD.Get_size():
@@ -1648,6 +1658,10 @@ def dd_mergetally(mcdc, data):
             MPI.COMM_WORLD.Gatherv(
                 sendbuf=tally[i], recvbuf=(dd_tally[i], sendcounts), root=0
             )
+        # gather tally lengths for proper recombination
+        xlens = MPI.COMM_WORLD.gather(xlen, root=0)
+        ylens = MPI.COMM_WORLD.gather(ylen, root=0)
+        zlens = MPI.COMM_WORLD.gather(zlen, root=0)
 
     # MPI gather for multiprocessor subdomains
     else:
@@ -1669,6 +1683,10 @@ def dd_mergetally(mcdc, data):
             # gather tallies
             for i, t in enumerate(tally):
                 dd_comm.Gatherv(tally[i], (dd_tally[i], sendcounts), root=0)
+            # gather tally lengths for proper recombination
+            xlens = dd_comm.gather(xlen, root=0)
+            ylens = dd_comm.gather(ylen, root=0)
+            zlens = dd_comm.gather(zlen, root=0)
         dd_group.Free()
         if MPI.COMM_NULL != dd_comm:
             dd_comm.Free()
@@ -1678,20 +1696,39 @@ def dd_mergetally(mcdc, data):
         # reorganize tally data
         # TODO: find/develop a more efficient algorithm for this
         tally_idx = 0
+        offset = 0
+        ysum = mcdc["technique"]["dd_ysum"]
+        zsum = mcdc["technique"]["dd_zsum"]
         for di in range(0, d_Nx * d_Ny * d_Nz):
             dz = di // (d_Nx * d_Ny)
             dy = (di % (d_Nx * d_Ny)) // d_Nx
             dx = di % d_Nx
+
+            offset = 0
+            # calculate subdomain offset
+            for i in range(0, dx):
+                offset += xlens[i] * ysum * zsum
+
+            for i in range(0, dy):
+                y_ind = i * d_Nx
+                offset += ylens[y_ind] * zsum
+
+            for i in range(0, dz):
+                z_ind = i * d_Nx * d_Ny
+                offset += zlens[z_ind]
+
+            # calculate index within subdomain
+            xlen = xlens[di]
+            ylen = ylens[di]
+            zlen = zlens[di]
             for xi in range(0, xlen):
                 for yi in range(0, ylen):
                     for zi in range(0, zlen):
                         # calculate reorganized index
-                        ind_x = xi * (ylen * d_Ny * zlen * d_Nz) + dx * (
-                            xlen * ylen * d_Ny * zlen * d_Nz
-                        )
-                        ind_y = yi * (xlen * d_Nx) + dy * (ylen * xlen * d_Nx)
-                        ind_z = zi + dz * zlen
-                        buff_idx = ind_x + ind_y + ind_z
+                        ind_x = xi * ysum * zsum
+                        ind_y = yi * zsum
+                        ind_z = zi
+                        buff_idx = offset + ind_x + ind_y + ind_z
                         # place tally value in correct position
                         buff[:, buff_idx] = dd_tally[:, tally_idx]
                         tally_idx += 1
@@ -1716,11 +1753,11 @@ def dd_mergemesh(mcdc, data):
             MPI.COMM_WORLD.gather(len(mcdc["mesh_tallies"][0]["filter"]["x"]), root=0)
         )
         if mcdc["mpi_master"]:
-            x_filter = np.zeros((mcdc["mesh_tallies"].shape, sum(sendcounts)))
+            x_filter = np.zeros((mcdc["mesh_tallies"].shape[0], sum(sendcounts)))
         else:
-            x_filter = np.empty((mcdc["mesh_tallies"].shape))  # dummy tally
+            x_filter = np.empty((mcdc["mesh_tallies"].shape[0]))  # dummy tally
         # gather mesh
-        for i in range(mcdc["mesh_tallies"].shape):
+        for i in range(mcdc["mesh_tallies"].shape[0]):
             MPI.COMM_WORLD.Gatherv(
                 sendbuf=mcdc["mesh_tallies"][i]["filter"]["x"],
                 recvbuf=(x_filter[i], sendcounts),
@@ -1740,7 +1777,7 @@ def dd_mergemesh(mcdc, data):
         else:
             y_filter = np.empty((mcdc["mesh_tallies"].shape[0]))  # dummy tally
         # gather mesh
-        for i in range(mcdc["mesh_tallies"].shape):
+        for i in range(mcdc["mesh_tallies"].shape[0]):
             MPI.COMM_WORLD.Gatherv(
                 sendbuf=mcdc["mesh_tallies"][i]["filter"]["y"],
                 recvbuf=(y_filter[i], sendcounts),
@@ -1786,7 +1823,7 @@ def dd_mergemesh(mcdc, data):
         if d_Nz > 1:
             dd_mesh.append(z_final)
         else:
-            dd_mesh.append("z", mcdc["mesh_tallies"][:]["filter"]["z"])
+            dd_mesh.append(mcdc["mesh_tallies"][:]["filter"]["z"])
     return dd_mesh
 
 
@@ -1876,9 +1913,9 @@ def generate_hdf5(data, mcdc):
                 # Set tally shape
                 N_score = tally["N_score"]
                 if mcdc["technique"]["domain_decomposition"]:
-                    Nx *= input_deck.technique["dd_mesh"]["x"].size - 1
-                    Ny *= input_deck.technique["dd_mesh"]["y"].size - 1
-                    Nz *= input_deck.technique["dd_mesh"]["z"].size - 1
+                    Nx = mcdc["technique"]["dd_xsum"]
+                    Ny = mcdc["technique"]["dd_ysum"]
+                    Nz = mcdc["technique"]["dd_zsum"]
                 if not mcdc["technique"]["uq"]:
                     shape = (3, Nmu, N_azi, Ng, Nt, Nx, Ny, Nz, N_score)
                 else:
diff --git a/mcdc/type_.py b/mcdc/type_.py
@@ -742,6 +742,12 @@ def dd_meshtally(input_deck):
         Nx = max(Nx, len(new_x))
         Ny = max(Ny, len(new_y))
         Nz = max(Nz, len(new_z))
+
+        # ensure all subdomains have equivalent tally sizes
+        # (this is necessary for domain decomp to function on GPUs)
+        Nx = MPI.COMM_WORLD.allreduce(Nx, MPI.MAX)
+        Ny = MPI.COMM_WORLD.allreduce(Ny, MPI.MAX)
+        Nz = MPI.COMM_WORLD.allreduce(Nz, MPI.MAX)
     return Nx, Ny, Nz
 
 
@@ -1108,6 +1114,12 @@ def make_type_technique(input_deck):
     # Mesh
     mesh, Nx, Ny, Nz, Nt, Nmu, N_azi, Ng = make_type_mesh(card["dd_mesh"])
     struct += [("dd_mesh", mesh)]
+    struct += [("dd_xlen", int64)]
+    struct += [("dd_ylen", int64)]
+    struct += [("dd_zlen", int64)]
+    struct += [("dd_xsum", int64)]
+    struct += [("dd_ysum", int64)]
+    struct += [("dd_zsum", int64)]
     struct += [("dd_idx", int64)]
     struct += [("dd_sent", int64)]
     struct += [("dd_work_ratio", int64, (len(card["dd_work_ratio"]),))]
diff --git a/test/regression/cooper_dd/answer.h5 b/test/regression/cooper_dd/answer.h5
diff --git a/test/regression/cooper_dd/input.py b/test/regression/cooper_dd/input.py
diff --git a/test/regression/run.py b/test/regression/run.py
@@ -63,15 +63,6 @@
             + "Note: Skipping %s (require multiple of 16 MPI ranks)" % name
             + Style.RESET_ALL
         )
-    elif name == "cooper_dd" and (
-        not parallel_run or not (mpiexec % 8 == 0 and srun % 8 == 0)
-    ):
-        temp.remove(name)
-        print(
-            Fore.YELLOW
-            + "Note: Skipping %s (require multiple of 8 MPI ranks)" % name
-            + Style.RESET_ALL
-        )
 
 names = temp
 
diff --git a/test/regression/slab_reed_dd_3d/answer.h5 b/test/regression/slab_reed_dd_3d/answer.h5
diff --git a/test/regression/slab_reed_dd_3d/input.py b/test/regression/slab_reed_dd_3d/input.py
@@ -60,10 +60,10 @@
 
 # Isotropic source in the first half of the outermost medium,
 # with 1/100 strength
-mcdc.source(x=[0.0, 4.0], y=[0.0, 4.0], z=[4.0, 6.0], isotropic=True, prob=0.5)
-mcdc.source(x=[4.0, 8.0], y=[0.0, 4.0], z=[4.0, 6.0], isotropic=True, prob=0.5)
-mcdc.source(x=[0.0, 4.0], y=[4.0, 8.0], z=[4.0, 6.0], isotropic=True, prob=0.5)
-mcdc.source(x=[4.0, 8.0], y=[4.0, 8.0], z=[4.0, 6.0], isotropic=True, prob=0.5)
+mcdc.source(x=[0.0, 4.0], y=[0.0, 4.0], z=[5.0, 6.0], isotropic=True, prob=0.5)
+mcdc.source(x=[4.0, 8.0], y=[0.0, 4.0], z=[5.0, 6.0], isotropic=True, prob=0.5)
+mcdc.source(x=[0.0, 4.0], y=[4.0, 8.0], z=[5.0, 6.0], isotropic=True, prob=0.5)
+mcdc.source(x=[4.0, 8.0], y=[4.0, 8.0], z=[5.0, 6.0], isotropic=True, prob=0.5)
 
 # =============================================================================
 # Set tally, setting, and run mcdc