diff --git a/src/gt4py/cartesian/backend/dace_backend.py b/src/gt4py/cartesian/backend/dace_backend.py index b7a9a0cece..5bcb8e9e08 100644 --- a/src/gt4py/cartesian/backend/dace_backend.py +++ b/src/gt4py/cartesian/backend/dace_backend.py @@ -920,7 +920,7 @@ def generate_extension(self) -> None: @register class DaceGPUBackend(BaseDaceBackend): - """DaCe python backend using gt4py.cartesian.gtc.""" + """GPU DaCe python with an optimal KJI loop layout""" name = "dace:gpu" languages: ClassVar[dict] = {"computation": "cuda", "bindings": ["python"]} @@ -933,3 +933,20 @@ class DaceGPUBackend(BaseDaceBackend): def generate_extension(self) -> None: return self.make_extension(uses_cuda=True) + + +@register +class DaceGPUBackendIJK(BaseDaceBackend): + """GPU DaCe python with an optimal IJK loop layout""" + + name = "dace:gpu_IJK" + languages: ClassVar[dict] = {"computation": "cuda", "bindings": ["python"]} + storage_info: ClassVar[layout.LayoutInfo] = layout_registry.from_name(name) + MODULE_GENERATOR_CLASS = DaCeCUDAPyExtModuleGenerator + options: ClassVar[GTBackendOptions] = { + **BaseGTBackend.GT_BACKEND_OPTS, + "device_sync": {"versioning": True, "type": bool}, + } + + def generate_extension(self) -> None: + return self.make_extension(uses_cuda=True) diff --git a/src/gt4py/cartesian/utils/compiler.py b/src/gt4py/cartesian/utils/compiler.py index 409a4c7e09..ec8dc20de0 100644 --- a/src/gt4py/cartesian/utils/compiler.py +++ b/src/gt4py/cartesian/utils/compiler.py @@ -137,7 +137,7 @@ class GPUCompilerName(enum.Enum): class GPUConfiguration: name: GPUCompilerName """Name identifier of the compiler""" - gpu_compile_flags: list[str] + gpu_compile_flags: str """Compile flags for device code""" binary_path: str """Path to binaries for GPU compiler & tools""" @@ -181,7 +181,7 @@ def gpu_configuration(optimization_level: str) -> GPUConfiguration: return GPUConfiguration( name=name, - gpu_compile_flags=gpu_compile_flags, + gpu_compile_flags=" ".join(gpu_compile_flags).strip(), binary_path=os.path.join(cuda_root, "bin"), include_path=os.path.join(cuda_root, "include"), library_path=library_path, diff --git a/src/gt4py/storage/cartesian/layout_registry.py b/src/gt4py/storage/cartesian/layout_registry.py index 4fadc0f7d0..f80c7e3f8d 100644 --- a/src/gt4py/storage/cartesian/layout_registry.py +++ b/src/gt4py/storage/cartesian/layout_registry.py @@ -66,6 +66,15 @@ def register(name: str, info: LayoutInfo) -> None: is_optimal_layout=layout_checker_factory(layout_maker_factory((2, 1, 0))), ), ) +register( + "dace:gpu_IJK", + LayoutInfo( + alignment=32, + device="gpu", + layout_map=layout_maker_factory((0, 1, 2)), + is_optimal_layout=layout_checker_factory(layout_maker_factory((0, 1, 2))), + ), +) register( "debug", LayoutInfo(