diff --git a/pyproject.toml b/pyproject.toml index a10488c7a9..b12335f0e0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -100,7 +100,7 @@ dependencies = [ 'click>=8.0.0', 'cmake>=3.22', 'cytoolz>=1.0.1', - 'dace>=2.0.0a3', + 'dace==2.3.7', 'deepdiff>=8.1.0', 'devtools>=0.6', 'factory-boy>=3.3.3', @@ -476,6 +476,9 @@ url = 'https://gridtools.github.io/pypi/' # dace = {index = "gridtools"} [tool.uv.sources] atlas4py = {index = "test.pypi"} +dace = [ + {git = "https://github.com/philip-paul-mueller/dace", branch = "phimuell__new-gpu-codegen-dev"} +] # -- versioningit -- [tool.versioningit] diff --git a/src/gt4py/next/program_processors/runners/dace/workflow/common.py b/src/gt4py/next/program_processors/runners/dace/workflow/common.py index 6ef363d924..cabcdc2201 100644 --- a/src/gt4py/next/program_processors/runners/dace/workflow/common.py +++ b/src/gt4py/next/program_processors/runners/dace/workflow/common.py @@ -109,6 +109,9 @@ def set_dace_config( # This setting allows to throw an exception if any implicit Copy-Map slips thorugh. dace.Config.set("compiler.cuda.allow_implicit_memlet_to_map", value=False) + # Use the new GPU code generator + dace.Config.set("compiler.cuda.implementation", value="experimental") + # In some stencils, for example `apply_diffusion_to_w`, the cuda codegen messes # up with the cuda streams, i.e. it allocates N streams but uses N+1. The first # idea was to use just one stream. However, even in that case the generator diff --git a/uv.lock b/uv.lock index 7027f7d4f5..f60aeac53b 100644 --- a/uv.lock +++ b/uv.lock @@ -1206,8 +1206,8 @@ wheels = [ [[package]] name = "dace" -version = "2.0.0a3" -source = { registry = "https://pypi.org/simple" } +version = "2.3.7" +source = { git = "https://github.com/philip-paul-mueller/dace?branch=phimuell__new-gpu-codegen-dev#896ec9295298d60e956f31834957b863b4fd9a7b" } dependencies = [ { name = "astunparse" }, { name = "dill" }, @@ -1223,7 +1223,6 @@ dependencies = [ { name = "sympy" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c3/f8/2401889078017475ce1293af212b76cfdb8a5771ece179851d441ef363f3/dace-2.0.0a3.tar.gz", hash = "sha256:94cbaac4b1f4ef312d24f4151b0905a0e6292a9cfdf9c8b643dabdb2e95b02fa", size = 6005767, upload-time = "2026-05-11T14:49:16.021Z" } [[package]] name = "debugpy" @@ -1858,7 +1857,7 @@ requires-dist = [ { name = "cupy-cuda13x", marker = "extra == 'cuda13'", specifier = ">=14.0" }, { name = "cupy-rocm-7-0", marker = "extra == 'rocm7'", specifier = ">=14.0" }, { name = "cytoolz", specifier = ">=1.0.1" }, - { name = "dace", specifier = ">=2.0.0a3" }, + { name = "dace", git = "https://github.com/philip-paul-mueller/dace?branch=phimuell__new-gpu-codegen-dev" }, { name = "deepdiff", specifier = ">=8.1.0" }, { name = "devtools", specifier = ">=0.6" }, { name = "factory-boy", specifier = ">=3.3.3" },