diff --git a/ci/cscs-ci-dace-determinism.yml b/ci/cscs-ci-dace-determinism.yml
new file mode 100644
index 0000000000..55f1297879
--- /dev/null
+++ b/ci/cscs-ci-dace-determinism.yml
@@ -0,0 +1,179 @@
+#
+# GT4Py - GridTools Framework
+#
+# Copyright (c) 2014-2024, ETH Zurich
+# All rights reserved.
+#
+# Please, refer to the LICENSE file in the root directory.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+
+# DaCe codegen determinism check — STANDALONE CSCS-CI PIPELINE
+# ============================================================
+#
+# How to trigger
+# --------------
+# Whitelisted users trigger it on any PR by posting the comment:
+#
+#     cscs-ci run dace-determinism
+#
+# What it does
+# ------------
+# Drives gt4py's `test_*_dace_determinism` nox sessions, each of which
+# runs pytest twice with isolated build caches and asserts the
+# DaCe-generated source files are byte-identical between the two runs.
+# A diff means the gt4py + dace toolchain is non-deterministic for that
+# test selection.
+#
+# Logic lives in:
+#   noxfile.py                              (test_*_dace_determinism sessions)
+#   scripts/dace_deterministic_codegen.py   (cache comparison lib + CLI)
+
+include:
+  - remote: 'https://gitlab.com/cscs-ci/recipes/-/raw/master/templates/v2/.ci-ext.yml'
+  - local: 'ci/cscs-ci-ext-config.yml'
+
+variables:
+  CUDA_VERSION: '12.6.2'
+  ROCM_VERSION: '7.1.1'
+  UBUNTU_VERSION: '24.04'
+  UV_VERSION: '0.11.2'
+
+.test_python_versions: &test_python_versions ['3.10']
+
+stages:
+  - build
+  - test
+
+# -- Build stage --
+.build_common:
+  stage: build
+  extends:
+    - .dynamic-image-name
+  variables:
+    BASE_IMAGE: jfrog.svc.cscs.ch/dockerhub/ubuntu:${UBUNTU_VERSION}
+    CSCS_REBUILD_POLICY: if-not-exists
+    DOCKERFILE: ci/Dockerfile
+    DOCKER_BUILD_ARGS: '["BASE_IMAGE", "CACHE_DIR", "EXTRA_APTGET", "EXTRA_UV_ENV_VARS", "EXTRA_UV_PIP_ARGS", "EXTRA_UV_SYNC_ARGS", "PY_VERSION", "UV_VERSION", "WORKDIR_PATH" ]'
+    PERSIST_IMAGE_NAME: ${CSCS_REGISTRY_PATH}/public/${ARCH}/base/gt4py-ci-${PY_VERSION}
+    WATCH_FILECHANGES: 'ci/Dockerfile ci/cscs-ci.yml ci/cscs-ci-ext-config.yml uv.lock'
+  parallel:
+    matrix:
+      - PY_VERSION: *test_python_versions
+
+.build_extra_cuda:
+  variables:
+    BASE_IMAGE: jfrog.svc.cscs.ch/dockerhub/nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
+    EXTRA_UV_SYNC_ARGS: "--extra cuda12"
+
+.build_extra_rocm:
+  variables:
+    BASE_IMAGE: jfrog.svc.cscs.ch/dockerhub/rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
+    EXTRA_UV_SYNC_ARGS: "--extra rocm7"
+    EXTRA_UV_ENV_VARS: "CUPY_INSTALL_USE_HIP=1 HCC_AMDGPU_TARGET=gfx942 ROCM_HOME=/opt/rocm"
+    KUBERNETES_MEMORY_REQUEST: "64Gi"
+    KUBERNETES_MEMORY_LIMIT: "64Gi"
+
+build_cscs_gh200:
+  extends:
+    - .container-builder-cscs-gh200
+    - .build_common
+    - .build_extra_cuda
+  needs: []
+
+build_cscs_amd_rocm:
+  extends:
+    - .container-builder-cscs-zen2
+    - .build_common
+    - .build_extra_rocm
+  needs: []
+
+# -- Test stage --
+.dace_determinism_common:
+  stage: test
+  image: ${CSCS_REGISTRY_PATH}/public/${ARCH}/base/gt4py-ci-${PY_VERSION}:${DOCKER_TAG}
+  variables:
+    TEST_VARIANTS: 'cpu'
+    USE_MPI: 0 # TODO(havogt): to workaround the libfabric hook injecting incompatible libraries
+    SLURM_JOB_NUM_NODES: 1
+    SLURM_TIMELIMIT: 60
+  allow_failure: true
+  artifacts:
+    when: always
+    paths:
+      - _dace_deterministic_codegen/
+    expire_in: 1 month
+  parallel:
+    matrix:
+      - SUBPACKAGE: [cartesian]
+        SUBVARIANT: ['cuda12', 'rocm7', 'cpu']
+        PY_VERSION: *test_python_versions
+      - SUBPACKAGE: [next]
+        SUBVARIANT: ['cuda12', 'rocm7', 'cpu']
+        DETAIL: ["nomesh"]
+        PY_VERSION: *test_python_versions
+  rules: &exclude_variants_rules
+    - if: '$SUBVARIANT == "cpu" && ($TEST_VARIANTS !~ /(^|\s)cpu(\s|$)/)'
+      when: never
+    - if: '$SUBVARIANT == "cuda12" && ($TEST_VARIANTS !~ /(^|\s)cuda12(\s|$)/)'
+      when: never
+    - if: '$SUBVARIANT == "rocm7" && ($TEST_VARIANTS !~ /(^|\s)rocm7(\s|$)/)'
+      when: never
+
+  script:
+    - mkdir -p "${WORKDIR}/gt4py" && git clone --depth 1 "${CSCS_CI_ORIG_CLONE_URL}" "${WORKDIR}/gt4py"
+    - cd "${WORKDIR}/gt4py" && git fetch --depth 1 origin "${CI_COMMIT_SHA}" && git checkout "${CI_COMMIT_SHA}"
+    - export NOX_SESSION_ARGS="(${SUBVARIANT}${DETAIL:+, $DETAIL})"
+    - |
+      cd "${WORKDIR}/gt4py"
+      nox_rc=0
+      ./noxfile.py -s "test_${SUBPACKAGE}_dace_determinism-${PY_VERSION}${NOX_SESSION_ARGS}" || nox_rc=$?
+      if [ -d _dace_deterministic_codegen ]; then
+        cp -r _dace_deterministic_codegen "${CI_PROJECT_DIR}/"
+      fi
+      exit $nox_rc
+
+dace_determinism_cscs_gh200:
+  extends:
+    - .container-runner-santis-gh200
+    - .dace_determinism_common
+  needs:
+    - build_cscs_gh200
+  variables:
+    TEST_VARIANTS: 'cuda12'
+    SLURM_GPUS_PER_NODE: 1
+    SLURM_PARTITION: 'shared'
+    GT4PY_BUILD_JOBS: 8
+    PYTEST_XDIST_AUTO_NUM_WORKERS: 32
+  rules:
+    - *exclude_variants_rules
+    - if: $SUBPACKAGE == 'next' && $DETAIL == 'nomesh'
+      variables:
+        # TODO: investigate why the dace tests seem to hang with multiple jobs
+        GT4PY_BUILD_JOBS: 1
+    - if: $SUBPACKAGE == 'cartesian'
+      variables:
+        GT4PY_CARTESIAN_DETERMINISM_XDIST: 4
+    - when: on_success
+
+dace_determinism_cscs_amd_rocm:
+  extends:
+    - .tds-container-runner-beverin-mi200
+    - .dace_determinism_common
+  needs:
+    - build_cscs_amd_rocm
+  variables:
+    TEST_VARIANTS: 'cpu rocm7'
+    SLURM_GPUS_PER_NODE: 4
+    GT4PY_BUILD_JOBS: 8
+    PYTEST_XDIST_AUTO_NUM_WORKERS: 32
+    SLURM_PARTITION: mi300
+    CMAKE_PREFIX_PATH: /opt/rocm
+    CUDA_HOME: /opt/rocm
+    CXX: /opt/rocm/bin/hipcc
+  rules:
+    - *exclude_variants_rules
+    - if: $SUBPACKAGE == 'cartesian'
+      variables:
+        GT4PY_CARTESIAN_DETERMINISM_XDIST: 4
+    - when: on_success
diff --git a/noxfile.py b/noxfile.py
index 8abee16251..7f79613b21 100755
--- a/noxfile.py
+++ b/noxfile.py
@@ -21,6 +21,8 @@
 
 import os
 import pathlib
+import shutil
+import sys
 from collections.abc import Sequence
 from typing import Final, Literal, TypeAlias
 
@@ -345,5 +347,284 @@ def test_typing_exports(session: nox.Session) -> None:
     )
 
 
+# -- DaCe codegen determinism check --
+#
+# The two `test_*_dace_determinism` sessions below each run gt4py's pytest
+# selection twice with isolated GT4PY_BUILD_CACHE_DIR per run, then
+# verify the DaCe-generated source files under <program>/src/ are
+# byte-identical between the two runs. A diff is a determinism bug
+# somewhere in the gt4py + dace toolchain for that test selection.
+#
+# Comparison logic (snapshot, hash, diff, report) lives in
+# `scripts/dace_deterministic_codegen.py`; the helper below just
+# wires gt4py's existing pytest invocation pattern into a "run
+# twice + compare" loop.
+#
+# Workdir at REPO_ROOT/_dace_deterministic_codegen/ (wiped before
+# each session invocation):
+#   run1/.gt4py_cache/...    (first run's cached programs)
+#   run2/.gt4py_cache/...    (second run's cached programs)
+#   diffs/<program>/<file>.diff   (only on mismatch)
+#   report.txt                    (human-readable summary)
+#
+# Only `dace` codegen is checked (`internal` doesn't go through dace),
+# so the codegen parameter is dropped from these sessions' signatures.
+
+DACE_DETERMINISM_WORKDIR_NAME: Final = "_dace_deterministic_codegen"
+
+
+def _run_dace_determinism_check(
+    session: nox.Session,
+    pytest_args: Sequence[str],
+    *,
+    layout: Literal["next", "cartesian"],
+) -> None:
+    """Run pytest twice with an isolated cache per run, then verify the
+    DaCe-generated source files are byte-identical between the two runs.
+
+    The ``layout`` parameter selects which cache mechanism gt4py is using:
+
+    * ``"next"`` — sets ``GT4PY_BUILD_CACHE_DIR=<run_dir>`` so the cache
+      lands at ``<run_dir>/.gt4py_cache/``, where the comparator walks
+      ``<program>_<sha256>/src/{cpu,cuda}/...``.
+    * ``"cartesian"`` — sets ``GT_CACHE_ROOT=<run_dir>`` plus
+      ``GT_CACHE_PYTEST_DIR=<run_dir>/gt_cache`` AND passes
+      ``--keep-gtcache`` to pytest. The conftest in
+      ``tests/cartesian_tests/conftest.py`` unconditionally
+      ``shutil.rmtree``\\ s its cache directory at ``pytest_sessionfinish``
+      unless that CLI flag is present — that gating is independent of the
+      env vars, so we need both knobs. The comparator then walks
+      ``<run_dir>/gt_cache/py<pyver>_<cachever>/<backend>/
+      <test.module.path>/<Class>_<backend>_<id>/`` and compares
+      ``m_*.py`` + ``bindings.{cpp,cu}`` + ``computation.hpp``.
+
+    On mismatch, calls ``session.error(...)`` with a pointer to the
+    diffs/ directory and report.txt so the failure is actionable.
+    """
+    workdir = REPO_ROOT / DACE_DETERMINISM_WORKDIR_NAME
+    if workdir.exists():
+        shutil.rmtree(workdir)
+
+    run1_dir = workdir / "run1"
+    run2_dir = workdir / "run2"
+    run1_dir.mkdir(parents=True)
+    run2_dir.mkdir(parents=True)
+
+    # Per-layout knobs:
+    #   - cache_subdir:    the subdirectory of run_dir where the cache lands
+    #   - extra_pytest:    additional pytest CLI args (cartesian needs
+    #                      --keep-gtcache; see conftest in
+    #                      tests/cartesian_tests/conftest.py:pytest_sessionfinish)
+    #   - env_for_run:     env-var overrides for the pytest subprocess
+    #
+    # Setting DACE_compiler_build_folder_mode to `development` is REQUIRED for
+    # both layouts. gt4py configures dace to `production` mode by default,
+    # which cleans up the dace build folder after compilation — leaving only
+    # the compiled .so and stripping the codegen sources we need to diff.
+    # Forcing `development` keeps `src/...` (next) and `bindings.{cpp,cu}` +
+    # `computation.hpp` (cartesian) around so the checker has codegen to
+    # compare. (See src/gt4py/next/program_processors/runners/dace/workflow/
+    # common.py for the upstream next-side config this overrides; the
+    # comment there explicitly documents this env var as the escape hatch.)
+    if layout == "cartesian":
+        cache_subdir = "gt_cache"
+        extra_pytest_args: list[str] = ["--keep-gtcache"]
+
+        def env_for_run(run_dir: pathlib.Path) -> dict[str, str]:
+            # gt4py.cartesian and gt4py.next have entirely separate caching
+            # subsystems with separate env vars. cartesian uses
+            # GT_CACHE_ROOT (the `root_path` for cache_settings) and
+            # GT_CACHE_PYTEST_DIR (which the conftest writes into
+            # cache_settings["dir_name"]). Both required to isolate the
+            # cache per run; --keep-gtcache is required for it to survive
+            # pytest_sessionfinish.
+            return {
+                "GT_CACHE_ROOT": str(run_dir),
+                "GT_CACHE_PYTEST_DIR": str(run_dir / cache_subdir),
+                "DACE_compiler_build_folder_mode": "development",
+            }
+    else:
+        cache_subdir = ".gt4py_cache"
+        extra_pytest_args = []
+
+        def env_for_run(run_dir: pathlib.Path) -> dict[str, str]:
+            # gt4py.next appends `.gt4py_cache` to GT4PY_BUILD_CACHE_DIR, so
+            # we pass the parent directory and the cache lands at
+            # .gt4py_cache/ underneath. Setting GT4PY_BUILD_CACHE_LIFETIME
+            # to `persistent` keeps the cache around long enough for the
+            # snapshot pass to read it.
+            return {
+                "GT4PY_BUILD_CACHE_DIR": str(run_dir),
+                "GT4PY_BUILD_CACHE_LIFETIME": "persistent",
+                "DACE_compiler_build_folder_mode": "development",
+            }
+
+    for run_dir in (run1_dir, run2_dir):
+        session.run(
+            *pytest_args,
+            *extra_pytest_args,
+            *session.posargs,
+            env=session.env | env_for_run(run_dir),
+            # The determinism check cares only about whether the DaCe
+            # codegen lands deterministically in the cache; individual
+            # test outcomes are irrelevant. Failed tests (exit code 1)
+            # often reflect runtime issues that have nothing to do with
+            # codegen — e.g., GPU contention from pytest-xdist workers
+            # racing for a single CUDA context on Santis, producing
+            # spurious cupy OutOfMemoryErrors. As long as SOME programs
+            # got cached, the comparator (called below with
+            # tolerate_missing=True) extracts the determinism signal from
+            # whatever overlap is present.
+            success_codes=[0, 1, NO_TESTS_COLLECTED_EXIT_CODE],
+        )
+
+    # Import the comparison library from scripts/. It uses only stdlib,
+    # so it runs fine in nox's runtime python (no session venv needed).
+    if str(REPO_ROOT) not in sys.path:
+        sys.path.insert(0, str(REPO_ROOT))
+    from scripts.dace_deterministic_codegen import (
+        DeterminismError,
+        NoProgramsObservedError,
+        NoSourceFilesObservedError,
+        UnsupportedBackendError,
+        check_determinism,
+    )
+
+    try:
+        check_determinism(
+            run1_dir / cache_subdir,
+            run2_dir / cache_subdir,
+            diffs_dir=workdir / "diffs",
+            report_path=workdir / "report.txt",
+            # Programs cached in only one run are reported but not
+            # counted as determinism failures — see the success_codes
+            # note above for why this is the right policy here.
+            tolerate_missing=True,
+            layout=layout,
+        )
+    except DeterminismError as e:
+        session.error(f"{e}\nSee {workdir / 'report.txt'} and {workdir / 'diffs'}/")
+    except NoProgramsObservedError as e:
+        session.error(f"{e}\nLikely the pytest selection collected no tests.")
+    except NoSourceFilesObservedError as e:
+        session.error(str(e))
+    except UnsupportedBackendError as e:
+        session.error(str(e))
+    finally:
+        # Reclaim disk after the comparison. The two per-run caches are
+        # ~hundreds of MB each in development mode, and dace's own
+        # `.dacecache/` at the repo root (used for SDFGs not routed
+        # through gt4py's build_folder override) is comparably bulky.
+        # We always keep `workdir/diffs/` and `workdir/report.txt` —
+        # those are the artifacts a maintainer actually needs to debug
+        # a determinism failure; the raw caches are reproducible by
+        # rerunning the session.
+        for tbd in (
+            run1_dir,
+            run2_dir,
+            REPO_ROOT / ".dacecache",
+        ):
+            if tbd.exists():
+                session.log(f"cleanup: removing {tbd}")
+                shutil.rmtree(tbd, ignore_errors=True)
+
+
+@nox.session(python=PYTHON_VERSIONS, tags=["cartesian", "dace", "determinism"])
+@nox.parametrize("device", [*DeviceNoxParam.values()])
+def test_cartesian_dace_determinism(
+    session: nox.Session,
+    device: DeviceOption,
+) -> None:
+    """Run selected 'gt4py.cartesian' DaCe tests twice and verify codegen
+    is byte-identical between the two runs."""
+
+    codegen_settings = CodeGenDaceTestSettings["dace"]
+    device_settings = DeviceTestSettings[device]
+    extras = [
+        "standard",
+        "testing",
+        *codegen_settings.get("extras", []),
+        *device_settings.get("extras", []),
+    ]
+    groups = ["test", *codegen_settings.get("groups", []), *device_settings.get("groups", [])]
+
+    install_session_venv(session, extras=extras, groups=groups)
+
+    markers = " and ".join(codegen_settings["markers"] + device_settings["markers"])
+
+    xdist_workers = os.environ.get(
+        "GT4PY_CARTESIAN_DETERMINISM_XDIST",
+        "2" if device == "cpu" else "4",
+    )
+
+    _run_dace_determinism_check(
+        session,
+        pytest_args=[
+            "pytest",
+            "--cache-clear",
+            "-sv",
+            "-n",
+            xdist_workers,
+            "--dist",
+            "loadgroup",
+            "-m",
+            f"{markers}",
+            str(pathlib.Path("tests") / "cartesian_tests"),
+        ],
+        layout="cartesian",
+    )
+
+
+@nox.session(python=PYTHON_VERSIONS, tags=["next", "dace", "determinism"])
+@nox.parametrize(
+    "meshlib",
+    [
+        nox.param("nomesh", id="nomesh", tags=["nomesh"]),
+        nox.param("atlas", id="atlas", tags=["atlas"]),
+    ],
+)
+@nox.parametrize("device", [*DeviceNoxParam.values()])
+def test_next_dace_determinism(
+    session: nox.Session,
+    device: DeviceOption,
+    meshlib: Literal["nomesh", "atlas"],
+) -> None:
+    """Run selected 'gt4py.next' DaCe tests twice and verify codegen
+    is byte-identical between the two runs."""
+
+    codegen_settings = CodeGenDaceTestSettings["dace"]
+    device_settings = DeviceTestSettings[device]
+    extras = [
+        "standard",
+        "testing",
+        *codegen_settings.get("extras", []),
+        *device_settings.get("extras", []),
+    ]
+    groups = ["test", *codegen_settings.get("groups", []), *device_settings.get("groups", [])]
+    mesh_markers: list[str] = []
+
+    match meshlib:
+        case "nomesh":
+            mesh_markers.append("not requires_atlas")
+        case "atlas":
+            mesh_markers.append("requires_atlas")
+            groups.append("frameworks")
+
+    install_session_venv(session, extras=extras, groups=groups)
+
+    markers = " and ".join(codegen_settings["markers"] + device_settings["markers"] + mesh_markers)
+
+    _run_dace_determinism_check(
+        session,
+        pytest_args=[
+            *"pytest --cache-clear -sv -n auto --dist loadgroup".split(),
+            "-m",
+            f"{markers}",
+            str(pathlib.Path("tests") / "next_tests"),
+        ],
+        layout="next",
+    )
+
+
 if __name__ == "__main__":
     nox.main()
diff --git a/scripts/dace_deterministic_codegen.py b/scripts/dace_deterministic_codegen.py
new file mode 100644
index 0000000000..1f547959d0
--- /dev/null
+++ b/scripts/dace_deterministic_codegen.py
@@ -0,0 +1,833 @@
+#!/usr/bin/env python3
+# GT4Py - GridTools Framework
+#
+# Copyright (c) 2014-2024, ETH Zurich
+# All rights reserved.
+#
+# Please, refer to the LICENSE file in the root directory.
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""GT4Py / DaCe codegen determinism check.
+
+Library + CLI for verifying that gt4py's DaCe backend produces
+byte-identical generated source files across two runs of the same
+test selection. Used by the ``test_*_determinism`` nox sessions in
+``noxfile.py``; also runnable standalone for ad-hoc comparison of
+two existing caches.
+
+Supports both gt4py cache layouts:
+
+* ``layout="next"`` (default) — the ``gt4py.next`` cache, a flat
+  ``<cache_root>/<name>_<sha256>/src/{cpu,cuda}/...`` structure
+  written via ``GT4PY_BUILD_CACHE_DIR``. Compares everything dace
+  writes as generated source under each program's ``src/``.
+  Unknown top-level backends (anything other than cpu/cuda, with
+  HIP nesting under cuda/hip) raise :class:`UnsupportedBackendError`.
+
+* ``layout="cartesian"`` — the ``gt4py.cartesian`` cache, a deeply
+  nested ``<cache_root>/py<pyver>_<cachever>/<backend>/<test.module
+  .path>/<Class>_<backend>_<id>/...`` structure written via
+  ``GT_CACHE_ROOT`` + ``GT_CACHE_PYTEST_DIR`` (with the conftest's
+  ``--keep-gtcache`` flag needed to survive ``pytest_sessionfinish``).
+  Compares the top-level ``m_*.py`` loader plus ``bindings.{cpp,cu}``
+  and ``computation.hpp`` under ``m_*_pyext_BUILD/``. Skips compiled
+  artifacts (``*.so``, ``*.o``, ``__pycache__/``), gzipped SDFG
+  archives (``*.sdfgz`` — gzip headers carry timestamps), the
+  metadata file (``*.cacheinfo``), and the recursive build mirror
+  directories (``_GT_/``, ``tmp/``) inside ``_pyext_BUILD/``.
+
+As a library
+------------
+
+::
+
+    from scripts.dace_deterministic_codegen import check_determinism
+
+    check_determinism(
+        cache1=Path(".../run1/.gt4py_cache"),
+        cache2=Path(".../run2/.gt4py_cache"),
+        layout="next",                       # or "cartesian"
+        diffs_dir=Path(".../diffs"),         # optional
+        report_path=Path(".../report.txt"),  # optional
+    )
+
+Raises ``DeterminismError`` on mismatch, ``NoProgramsObservedError``
+if both caches are empty, ``NoSourceFilesObservedError`` if programs
+were cached but contain no source files (typically a missing
+``DACE_compiler_build_folder_mode=development``), or
+``UnsupportedBackendError`` if the next-layout codegen produced an
+unfamiliar top-level backend.
+
+As a CLI
+--------
+
+::
+
+    python scripts/dace_deterministic_codegen.py \\
+        --run1 path/to/cache1 \\
+        --run2 path/to/cache2 \\
+        --layout {next,cartesian} \\
+        [--diffs-dir DIR] [--report FILE]
+
+Exit codes:
+
+    0   codegen is deterministic
+    1   codegen differs (see diffs/ and report.txt)
+    2   bad arguments / unsupported backend / no source files captured
+    3   no programs observed in either cache
+"""
+
+from __future__ import annotations
+
+import argparse
+import dataclasses
+import difflib
+import hashlib
+import re
+import sys
+from pathlib import Path
+from typing import Literal
+
+
+#: Cache layout dispatch tag. ``"next"`` is the gt4py.next cache
+#: (flat ``<root>/<name>_<sha256>/src/...`` structure); ``"cartesian"``
+#: is the gt4py.cartesian cache (deeply nested ``<root>/py<pyver>_*/
+#: <backend>/<test.module.path>/<Class>_<backend>_<id>/...``).
+Layout = Literal["next", "cartesian"]
+
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+#: GT4Py names each cached program folder ``<program_name>_<sha256>``.
+PROGRAM_FOLDER_RE = re.compile(r"^(?P<name>.+)_(?P<digest>[0-9a-f]{64})$")
+
+#: The single directory under each program folder we compare. Only ``src/``,
+#: nothing else — by design. dace also writes ``include/``, ``sample/``,
+#: ``program.sdfg``, source maps under ``map/``, runtime metadata
+#: (``dace.conf``, ``*.csv``), and build artifacts under ``build/``. None of
+#: those are the codegen surface we care about for this check.
+CODEGEN_ROOT = "src"
+
+#: Backends recognized as direct children of ``src/``. dace lays out
+#: codegen as ``src/<target_name>/[<target_type>/]<file>``:
+#:
+#:   - CPU codegen  -> src/cpu/<file>.cpp
+#:   - CUDA codegen -> src/cuda/<file>.cu
+#:   - HIP  codegen -> src/cuda/hip/<file>.cpp   (NOTE: under cuda/)
+#:
+#: HIP is dispatched by dace's CUDA target with ``target_type="hip"``, so
+#: it lands as a *subdirectory* of ``src/cuda/``, not its own top-level
+#: backend folder. That means {cpu, cuda} as a top-level allowlist is
+#: enough to cover all three: cpu via ``cpu/``, cuda + hip both via
+#: ``cuda/`` (with ``rglob`` picking up the nested hip files).
+#:
+#: If a snapshot ever encounters another top-level backend (mpi, sve,
+#: mlir, snitch, ...), the checker fails loudly rather than silently
+#: ignoring — those would need explicit support added here.
+SUPPORTED_BACKENDS: frozenset[str] = frozenset({"cpu", "cuda"})
+
+
+# Cartesian layout constants ------------------------------------------------
+
+#: Suffix that marks the per-stencil build directory inside a cartesian
+#: program folder, e.g. ``m_TestCopy_dacecpu_4__dacecpu_a8441f26b4_pyext_BUILD/``.
+#: Inside that directory we look at the TOP LEVEL only — its ``_GT_/`` and
+#: ``tmp/`` subdirectories contain recursive copies of the build path that
+#: setuptools spawns when building into an absolute prefix, and those are
+#: build artifacts, not codegen output.
+CARTESIAN_BUILD_DIR_SUFFIX = "_pyext_BUILD"
+
+#: Names of files inside ``m_*_pyext_BUILD/`` whose contents we byte-compare.
+#: ``bindings.{cpp,cu}`` is gt4py.cartesian's pybind11 wrapper around the
+#: dace SDFG; ``computation.hpp`` is dace's generated kernel implementation.
+#: Both reflect the codegen surface directly — a non-deterministic codegen
+#: pass will show up here.
+CARTESIAN_BUILD_SOURCE_NAMES: frozenset[str] = frozenset(
+    {"bindings.cpp", "bindings.cu", "computation.hpp"}
+)
+
+#: Directory-name prefixes inside a program folder that we MUST NOT descend
+#: into when searching for ``m_*.py`` loader stubs. ``__pycache__`` is
+#: Python's bytecode cache; the build dir holds compiler-generated artifacts.
+CARTESIAN_SKIP_DIRS: frozenset[str] = frozenset({"__pycache__"})
+
+#: The 10-hex codegen digest gt4py.cartesian embeds in filenames like
+#: ``m_<Class>_<backend>_<id>__<backend>_<DIGEST>.py`` and the build
+#: directory ``m_..._<DIGEST>_pyext_BUILD/``. We replace it with the
+#: literal ``<DIGEST>`` in the snapshot's relpath keys so that
+#: ``bindings.cpp`` from run1 (digest ``a8441f26b4``) and from run2
+#: (digest ``bbbbbbbbbb``) map to the same path, surfacing a real
+#: content diff rather than two "only-in-one-run" entries that look
+#: like flaky test selection.
+#:
+#: The pattern matches ``_`` + 10 lowercase hex + a boundary that is
+#: either a file extension (``.py``, ``.so``, ``.sdfgz``, ``.cacheinfo``)
+#: or the literal ``_pyext_BUILD`` suffix. Anchoring on those endings
+#: avoids false-positive matches inside arbitrary identifiers.
+CARTESIAN_DIGEST_RE = re.compile(r"_(?P<digest>[0-9a-f]{10})(?=(\.|_pyext_BUILD))")
+
+
+def _normalize_cartesian_relpath(relpath: str) -> str:
+    """Replace the 10-hex codegen digest in a cartesian relpath with the
+    literal token ``<DIGEST>``. Idempotent. Leaves non-matching paths
+    unchanged."""
+    return CARTESIAN_DIGEST_RE.sub("_<DIGEST>", relpath)
+
+
+# ---------------------------------------------------------------------------
+# Exceptions
+# ---------------------------------------------------------------------------
+
+
+class UnsupportedBackendError(RuntimeError):
+    """A program's ``src/`` contained a top-level backend other than cpu/cuda."""
+
+
+class NoProgramsObservedError(RuntimeError):
+    """Neither cache contained any cached program folders."""
+
+
+class NoSourceFilesObservedError(RuntimeError):
+    """Programs were observed in the caches but none contained any source files.
+
+    Almost always means dace's build folder mode was left at ``production``,
+    which strips the generated ``src/`` tree after compilation. Set
+    ``DACE_compiler_build_folder_mode=development`` (lowercase matters)
+    before running the tests so the codegen surface survives into the
+    cache and there's actually something to compare.
+    """
+
+
+class DeterminismError(RuntimeError):
+    """Two snapshots compared non-identical. ``.results`` carries the details."""
+
+    def __init__(self, message: str, results: list[ProgramResult]) -> None:
+        super().__init__(message)
+        self.results = results
+
+
+# ---------------------------------------------------------------------------
+# Snapshot
+# ---------------------------------------------------------------------------
+
+
+@dataclasses.dataclass(frozen=True)
+class FileEntry:
+    #: Logical relative path used as the comparison key. For ``layout="next"``
+    #: this is the on-disk path under the program folder verbatim. For
+    #: ``layout="cartesian"`` the 10-hex codegen digest in filenames is
+    #: replaced with the literal token ``<DIGEST>`` so equivalent files
+    #: across two runs (which carry different digests) still pair up.
+    relpath: str
+    sha256: str
+    #: The actual filename on disk relative to the program folder. Equal to
+    #: ``relpath`` for next; un-normalized (real digest preserved) for
+    #: cartesian. Used by :func:`write_diffs` to read the file back.
+    disk_relpath: str
+
+
+@dataclasses.dataclass
+class ProgramSnapshot:
+    name: str
+    folder: Path
+    files: dict[str, FileEntry]
+
+
+def snapshot_run(cache_root: Path, *, layout: Layout = "next") -> dict[str, ProgramSnapshot]:
+    """Walk a gt4py build cache and snapshot every program's generated source.
+
+    Dispatches on ``layout`` to either :func:`_snapshot_run_next` (the flat
+    ``<root>/<name>_<sha256>/src/...`` structure of gt4py.next) or
+    :func:`_snapshot_run_cartesian` (the deeply nested
+    ``<root>/py<pyver>_*/<backend>/<test.module.path>/<Class>_<backend>_<id>/...``
+    structure of gt4py.cartesian).
+
+    Returns an empty dict (rather than raising) when the path doesn't
+    exist or contains no programs in the expected layout; callers can
+    pair the empty result with :func:`_diagnose_empty_cache` for a
+    human-readable explanation of why.
+    """
+    if layout == "next":
+        return _snapshot_run_next(cache_root)
+    if layout == "cartesian":
+        return _snapshot_run_cartesian(cache_root)
+    raise ValueError(f"unknown layout: {layout!r}, expected 'next' or 'cartesian'")
+
+
+def _snapshot_run_next(cache_root: Path) -> dict[str, ProgramSnapshot]:
+    """Snapshot a gt4py.next-layout cache.
+
+    The input directory's name is irrelevant — the function looks for
+    immediate subdirectories matching ``<name>_<64-char-hex-digest>``
+    (gt4py.next's program-folder naming) and reads ``<program>/src/``
+    recursively under each one. HIP files at ``src/cuda/hip/`` are
+    picked up automatically by the recursive walk.
+
+    Raises :class:`UnsupportedBackendError` if any program's ``src/``
+    contains a top-level backend not in :data:`SUPPORTED_BACKENDS`.
+    """
+    if not cache_root.is_dir():
+        return {}
+
+    out: dict[str, ProgramSnapshot] = {}
+    for folder in sorted(p for p in cache_root.iterdir() if p.is_dir()):
+        m = PROGRAM_FOLDER_RE.match(folder.name)
+        if not m:
+            continue
+        name = m.group("name")
+
+        src_root = folder / CODEGEN_ROOT
+        if not src_root.is_dir():
+            # No src/ at all — record an empty snapshot. Pairing logic
+            # downstream will flag it if its counterpart has files.
+            out[name] = ProgramSnapshot(name=name, folder=folder, files={})
+            continue
+
+        # Backend check: every direct child of src/ must be a supported
+        # top-level backend. HIP lives nested under cuda/, so cuda is
+        # what matters here, not "hip".
+        for bd in sorted(d for d in src_root.iterdir() if d.is_dir()):
+            if bd.name not in SUPPORTED_BACKENDS:
+                raise UnsupportedBackendError(
+                    f"unsupported dace backend `{bd.name}/` found under "
+                    f"{src_root} — this checker currently supports "
+                    f"{sorted(SUPPORTED_BACKENDS)} as top-level backends "
+                    f"(HIP is handled under `cuda/hip/`). Add explicit "
+                    f"support in scripts/dace_deterministic_codegen.py "
+                    f"before running this selection."
+                )
+
+        # rglob recursively descends — picks up `cuda/hip/<file>` along
+        # with `cpu/<file>` and `cuda/<file>`, no special-casing needed.
+        files: dict[str, FileEntry] = {}
+        for fpath in sorted(src_root.rglob("*")):
+            if not fpath.is_file():
+                continue
+            rel = fpath.relative_to(folder).as_posix()
+            files[rel] = FileEntry(relpath=rel, sha256=_sha256(fpath), disk_relpath=rel)
+        out[name] = ProgramSnapshot(name=name, folder=folder, files=files)
+    return out
+
+
+def _snapshot_run_cartesian(cache_root: Path) -> dict[str, ProgramSnapshot]:
+    """Snapshot a gt4py.cartesian-layout cache.
+
+    Program identity is the **relative path** from ``cache_root`` to the
+    ``<Class>_<backend>_<id>`` folder, e.g.
+    ``py310_1013/dacecpu/cartesian_tests/integration_tests/multi_feature_tests
+    /test_suites/TestCopy_dacecpu_4``. Two runs of the same parametrized
+    test should produce the same relative path, so this works as a stable
+    matching key across runs.
+
+    Files compared per program (everything else is skipped — see module
+    docstring for rationale):
+
+      * ``m_*.py`` at the top of the program folder — the gt4py loader
+        stub. Its filename embeds the 10-hex codegen digest, and the file
+        body references it; either changing is a determinism signal.
+      * Files exactly one level inside ``m_*_pyext_BUILD/`` whose basename
+        is in :data:`CARTESIAN_BUILD_SOURCE_NAMES` (``bindings.cpp``,
+        ``bindings.cu``, ``computation.hpp``). The ``_GT_/`` and ``tmp/``
+        subdirectories of the build dir are recursive build-path mirrors
+        that setuptools creates when targeting an absolute prefix — they
+        contain object files and duplicated outputs, not codegen.
+    """
+    if not cache_root.is_dir():
+        return {}
+
+    # Discover program folders by finding every top-level `m_*.py` loader.
+    # "Top-level" here means: not under __pycache__ and not under any
+    # *_pyext_BUILD directory (which contains its own copies of generated
+    # files we don't want).
+    program_dirs: set[Path] = set()
+    for py in cache_root.rglob("m_*.py"):
+        if not py.is_file():
+            continue
+        parts = py.relative_to(cache_root).parts
+        # Reject if any ancestor is __pycache__ or any *_pyext_BUILD dir
+        if any(
+            p in CARTESIAN_SKIP_DIRS or p.endswith(CARTESIAN_BUILD_DIR_SUFFIX) for p in parts[:-1]
+        ):
+            continue
+        program_dirs.add(py.parent)
+
+    out: dict[str, ProgramSnapshot] = {}
+    for prog_dir in sorted(program_dirs):
+        program_id = prog_dir.relative_to(cache_root).as_posix()
+
+        files: dict[str, FileEntry] = {}
+
+        # Top-level m_*.py file(s) — the gt4py loader stub(s).
+        for f in sorted(prog_dir.glob("m_*.py")):
+            if f.is_file():
+                rel = _normalize_cartesian_relpath(f.name)
+                files[rel] = FileEntry(relpath=rel, sha256=_sha256(f), disk_relpath=f.name)
+
+        # Files DIRECTLY under any m_*_pyext_BUILD/ — iterdir, not rglob,
+        # so we don't descend into _GT_/ or tmp/ which carry build artifacts.
+        for build_dir in sorted(prog_dir.glob(f"m_*{CARTESIAN_BUILD_DIR_SUFFIX}")):
+            if not build_dir.is_dir():
+                continue
+            for f in sorted(build_dir.iterdir()):
+                if f.is_file() and f.name in CARTESIAN_BUILD_SOURCE_NAMES:
+                    disk_rel = f"{build_dir.name}/{f.name}"
+                    rel = _normalize_cartesian_relpath(disk_rel)
+                    files[rel] = FileEntry(relpath=rel, sha256=_sha256(f), disk_relpath=disk_rel)
+
+        out[program_id] = ProgramSnapshot(name=program_id, folder=prog_dir, files=files)
+    return out
+
+
+def _sha256(path: Path) -> str:
+    h = hashlib.sha256()
+    with path.open("rb") as f:
+        for chunk in iter(lambda: f.read(1 << 16), b""):
+            h.update(chunk)
+    return h.hexdigest()
+
+
+def _diagnose_empty_cache(cache_root: Path, *, layout: Layout = "next") -> str:
+    """Return a one-line explanation of why :func:`snapshot_run` found nothing."""
+    if not cache_root.exists():
+        return "path does not exist"
+    if not cache_root.is_dir():
+        return "path exists but is not a directory"
+
+    if layout == "cartesian":
+        # For cartesian we expect <root>/py<pyver>_<cachever>/<backend>/...
+        # If there's no py*_*/, the user likely passed the wrong path
+        # (e.g. the run_dir instead of run_dir/gt_cache).
+        subdirs = sorted(p for p in cache_root.iterdir() if p.is_dir())
+        if not subdirs:
+            return "directory has no subdirectories"
+        pyver_dirs = [p for p in subdirs if re.match(r"^py\d+_\d+$", p.name)]
+        if not pyver_dirs:
+            sample_names = [p.name for p in subdirs[:3]]
+            suffix = f" (and {len(subdirs) - 3} more)" if len(subdirs) > 3 else ""
+            return (
+                f"directory contains {len(subdirs)} subdirectory(ies) but none "
+                f"match cartesian's per-Python-version pattern `py<N>_<N>/` "
+                f"(saw: {sample_names}{suffix}). Did you pass the cache root, "
+                f"or its parent?"
+            )
+        # py*/ exists but no m_*.py loader stubs were found anywhere
+        return (
+            "cartesian cache structure present but contains no `m_*.py` loader "
+            "stubs at any depth — pytest probably collected zero tests, or the "
+            "conftest's `--keep-gtcache` flag wasn't passed and the cache was "
+            "wiped at session teardown."
+        )
+
+    # Fall through to the next-layout diagnostic
+    subdirs = sorted(p for p in cache_root.iterdir() if p.is_dir())
+    if not subdirs:
+        return "directory has no subdirectories"
+    matching = [p for p in subdirs if PROGRAM_FOLDER_RE.match(p.name)]
+    if not matching:
+        sample_names = [p.name for p in subdirs[:3]]
+        suffix = f" (and {len(subdirs) - 3} more)" if len(subdirs) > 3 else ""
+        return (
+            f"directory contains {len(subdirs)} subdirectory(ies) but none "
+            f"match the program-folder pattern `<name>_<64-char-hex-digest>/` "
+            f"(saw: {sample_names}{suffix}). If one of those names is "
+            f"`.gt4py_cache`, you've passed the parent of the cache by mistake."
+        )
+    return "directory has matching subdirectories but they were filtered out"
+
+
+# ---------------------------------------------------------------------------
+# Compare
+# ---------------------------------------------------------------------------
+
+
+@dataclasses.dataclass
+class ProgramResult:
+    name: str
+    match: bool
+    differing_files: list[str]
+    only_in_run1: list[str]
+    only_in_run2: list[str]
+
+    @property
+    def missing_on_one_side(self) -> bool:
+        """True iff the program was cached in only one of the two runs.
+
+        Distinguished from 'differs by content' (where the program is in
+        both runs but at least one file's bytes differ) — the latter is
+        always a determinism failure, the former is often a flaky-test
+        artifact and can be tolerated via ``tolerate_missing``.
+
+        The compare() loop populates only_in_runN exhaustively with the
+        absent side's file list ONLY when the whole program is missing;
+        when both programs are present but one happens to carry an extra
+        file, only_in_runN contains only that extra file. We distinguish
+        the two by requiring exactly one side to be wholly empty (which
+        is what compare() emits for the missing-program case).
+        """
+        return (
+            not self.match
+            and not self.differing_files
+            and (bool(self.only_in_run1) ^ bool(self.only_in_run2))
+        )
+
+
+def compare(
+    snap1: dict[str, ProgramSnapshot],
+    snap2: dict[str, ProgramSnapshot],
+) -> list[ProgramResult]:
+    """Pair programs by name across the two snapshots and diff their files."""
+    results: list[ProgramResult] = []
+    for name in sorted(set(snap1) | set(snap2)):
+        s1 = snap1.get(name)
+        s2 = snap2.get(name)
+
+        if s1 is None or s2 is None:
+            results.append(
+                ProgramResult(
+                    name=name,
+                    match=False,
+                    differing_files=[],
+                    only_in_run1=sorted((s1.files if s1 else {}).keys()),
+                    only_in_run2=sorted((s2.files if s2 else {}).keys()),
+                )
+            )
+            continue
+
+        keys1, keys2 = set(s1.files), set(s2.files)
+        only1 = sorted(keys1 - keys2)
+        only2 = sorted(keys2 - keys1)
+        differing = sorted(
+            rel for rel in keys1 & keys2 if s1.files[rel].sha256 != s2.files[rel].sha256
+        )
+        results.append(
+            ProgramResult(
+                name=name,
+                match=not (differing or only1 or only2),
+                differing_files=differing,
+                only_in_run1=only1,
+                only_in_run2=only2,
+            )
+        )
+    return results
+
+
+# ---------------------------------------------------------------------------
+# Diff + report
+# ---------------------------------------------------------------------------
+
+
+def write_diffs(
+    results: list[ProgramResult],
+    snap1: dict[str, ProgramSnapshot],
+    snap2: dict[str, ProgramSnapshot],
+    diffs_dir: Path,
+) -> None:
+    """Emit a unified diff per differing file under ``diffs_dir/<program>/``."""
+    for r in results:
+        if r.match:
+            continue
+        s1, s2 = snap1.get(r.name), snap2.get(r.name)
+        prog_dir = diffs_dir / r.name
+        for rel in r.differing_files:
+            # `rel` is the canonical (normalized) key; the on-disk filename
+            # may differ from it (cartesian normalizes the 10-hex digest).
+            # Look up the per-side FileEntry to recover the real path.
+            e1 = s1.files.get(rel) if s1 else None
+            e2 = s2.files.get(rel) if s2 else None
+            f1 = (s1.folder / e1.disk_relpath) if (s1 and e1) else None
+            f2 = (s2.folder / e2.disk_relpath) if (s2 and e2) else None
+            if not (f1 and f2 and f1.exists() and f2.exists()):
+                continue
+            try:
+                t1 = f1.read_text().splitlines(keepends=True)
+                t2 = f2.read_text().splitlines(keepends=True)
+            except UnicodeDecodeError:
+                prog_dir.mkdir(parents=True, exist_ok=True)
+                (prog_dir / f"{rel.replace('/', '__')}.binary-differs").write_text(
+                    f"binary content differs:\n  run1: {f1}\n  run2: {f2}\n"
+                )
+                continue
+            udiff = "".join(
+                difflib.unified_diff(
+                    t1,
+                    t2,
+                    fromfile=f"run1/{rel}",
+                    tofile=f"run2/{rel}",
+                    n=3,
+                )
+            )
+            prog_dir.mkdir(parents=True, exist_ok=True)
+            (prog_dir / f"{rel.replace('/', '__')}.diff").write_text(udiff)
+
+
+def render_report(results: list[ProgramResult], *, tolerate_missing: bool = False) -> str:
+    n_total = len(results)
+    n_missing = sum(1 for r in results if r.missing_on_one_side)
+    n_diff_content = sum(
+        1 for r in results if r.differing_files or (not r.match and not r.missing_on_one_side)
+    )
+    n_match = n_total - n_missing - n_diff_content
+
+    header = (
+        f"Programs: {n_total}    matches: {n_match}    "
+        f"differs: {n_diff_content}    only-in-one-run: {n_missing}"
+    )
+    lines = [header, ""]
+    for r in results:
+        if r.match:
+            tag = "MATCH "
+        elif r.missing_on_one_side:
+            tag = "ONE-OF"
+        else:
+            tag = "DIFFER"
+        lines.append(f"  [{tag}] {r.name}")
+        if not r.match:
+            lines.extend(f"           differs: {rel}" for rel in r.differing_files)
+            lines.extend(f"           only in run1: {rel}" for rel in r.only_in_run1)
+            lines.extend(f"           only in run2: {rel}" for rel in r.only_in_run2)
+
+    lines.append("")
+    if n_total == 0:
+        lines.append("RESULT: no programs observed (nothing was cached).")
+    elif n_diff_content == 0 and n_missing == 0:
+        lines.append(f"RESULT: codegen deterministic — {n_match} program(s) match.")
+    elif n_diff_content == 0 and tolerate_missing:
+        lines.append(
+            f"RESULT: codegen deterministic across the {n_match} shared program(s); "
+            f"{n_missing} program(s) cached in only one run (tolerated)."
+        )
+    else:
+        suffix = f" (plus {n_missing} cached in only one run)" if n_missing else ""
+        lines.append(
+            f"RESULT: NON-DETERMINISTIC CODEGEN — {n_diff_content}/{n_total} "
+            f"program(s) differ by content{suffix}."
+        )
+    return "\n".join(lines) + "\n"
+
+
+# ---------------------------------------------------------------------------
+# Library entry point
+# ---------------------------------------------------------------------------
+
+
+def check_determinism(
+    cache1: Path,
+    cache2: Path,
+    *,
+    diffs_dir: Path | None = None,
+    report_path: Path | None = None,
+    tolerate_missing: bool = True,
+    layout: Layout = "next",
+) -> list[ProgramResult]:
+    """Compare two gt4py caches; write artifacts; raise on mismatch.
+
+    Snapshots both caches (using the ``layout``-specific walker) and
+    diffs them. Optionally writes per-file unified diffs to
+    ``diffs_dir/<program>/`` and a human-readable summary to
+    ``report_path``.
+
+    Returns the list of :class:`ProgramResult` on a successful match.
+
+    Parameters
+    ----------
+    cache1, cache2
+        Roots of the two caches to compare. For ``layout="next"``, this
+        is the ``.gt4py_cache/`` directory (i.e. the parent of all the
+        ``<name>_<sha256>/`` program folders). For ``layout="cartesian"``,
+        this is the directory pointed to by ``GT_CACHE_PYTEST_DIR`` (i.e.
+        the parent of ``py<pyver>_<cachever>/``).
+    layout
+        Which cache layout to expect. See module docstring for details.
+    tolerate_missing
+        See module docstring. Default ``True`` (lenient).
+    diffs_dir, report_path
+        If set, persist diagnostic artifacts.
+
+    Raises:
+        UnsupportedBackendError:
+            (next layout only) A snapshot contained a backend other than cpu/cuda.
+        NoProgramsObservedError:
+            Both caches were empty — likely zero tests collected or the
+            cache was wiped at teardown.
+        NoSourceFilesObservedError:
+            Programs were cached but no source files survived (usually a
+            missing ``DACE_compiler_build_folder_mode=development``).
+        DeterminismError:
+            One or more programs differed between the two runs. Under
+            ``tolerate_missing=True`` this requires at least one
+            *content* difference.
+    """
+    snap1 = snapshot_run(cache1, layout=layout)
+    snap2 = snapshot_run(cache2, layout=layout)
+    results = compare(snap1, snap2)
+
+    if diffs_dir is not None:
+        write_diffs(results, snap1, snap2, diffs_dir)
+    if report_path is not None:
+        report_path.parent.mkdir(parents=True, exist_ok=True)
+        report_path.write_text(render_report(results, tolerate_missing=tolerate_missing))
+
+    if not results:
+        diag1 = _diagnose_empty_cache(cache1, layout=layout)
+        diag2 = _diagnose_empty_cache(cache2, layout=layout)
+        raise NoProgramsObservedError(
+            "no programs observed in either cache:\n"
+            f"  run1 ({cache1}): {diag1}\n"
+            f"  run2 ({cache2}): {diag2}"
+        )
+
+    # Safety net for the silent-false-positive case where both runs cached
+    # programs but every program's source tree is empty — typically because
+    # dace's build_folder_mode is `production` (the gt4py default). Without
+    # this, the comparator would see {} == {} for every program and report
+    # `deterministic` despite there being nothing to compare.
+    total_files = sum(len(s.files) for s in snap1.values()) + sum(
+        len(s.files) for s in snap2.values()
+    )
+    if total_files == 0:
+        if layout == "cartesian":
+            hint = (
+                "programs were cached but contain none of `m_*.py`, "
+                "`bindings.{cpp,cu}`, or `computation.hpp`"
+            )
+        else:
+            hint = "none of them contain any source files under src/"
+        raise NoSourceFilesObservedError(
+            f"{len(results)} program(s) cached, but {hint}. This almost "
+            f"always means dace's build folder mode is `production` rather "
+            f"than `development`, which strips the codegen output after "
+            f"compilation. Set DACE_compiler_build_folder_mode=development "
+            f"(lowercase matters) before running the tests so the codegen "
+            f"survives into the cache."
+        )
+
+    # Count true differs (program in both runs, content differs) and missing
+    # (program only in one run). Under tolerate_missing, only true differs
+    # raise; under strict mode, both do.
+    n_true_differs = sum(
+        1 for r in results if r.differing_files or (not r.missing_on_one_side and not r.match)
+    )
+    n_missing = sum(1 for r in results if r.missing_on_one_side)
+    n_failed = n_true_differs if tolerate_missing else (n_true_differs + n_missing)
+
+    if n_failed > 0:
+        if tolerate_missing:
+            msg = (
+                f"DaCe codegen is non-deterministic: {n_true_differs}/{len(results)} "
+                f"program(s) differ by content (plus {n_missing} cached in only one "
+                f"run, ignored under tolerate_missing)"
+            )
+        else:
+            msg = f"DaCe codegen is non-deterministic: {n_failed}/{len(results)} program(s) differ"
+        raise DeterminismError(msg, results)
+    return results
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+
+def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
+    p = argparse.ArgumentParser(
+        prog="dace_deterministic_codegen",
+        description=(
+            "Compare two gt4py build caches and check whether the DaCe "
+            "generated source files are byte-identical between them."
+        ),
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    p.add_argument(
+        "--run1",
+        required=True,
+        type=Path,
+        metavar="PATH",
+        help=(
+            "Path to the first cache root. For --layout next, this is the "
+            ".gt4py_cache/ directory. For --layout cartesian, this is the "
+            "directory that GT_CACHE_PYTEST_DIR pointed to (the parent of "
+            "py<ver>_<format>/)."
+        ),
+    )
+    p.add_argument(
+        "--run2",
+        required=True,
+        type=Path,
+        metavar="PATH",
+        help="Path to the second cache root. Same conventions as --run1.",
+    )
+    p.add_argument(
+        "--layout",
+        choices=["next", "cartesian"],
+        default="next",
+        help=(
+            "Cache layout. `next` is gt4py.next's flat "
+            "<root>/<name>_<sha256>/src/... structure (default). `cartesian` "
+            "is gt4py.cartesian's deeply nested "
+            "<root>/py<ver>_<format>/<backend>/<test.path>/<Class>_<bk>_<id>/ "
+            "structure."
+        ),
+    )
+    p.add_argument(
+        "--diffs-dir",
+        type=Path,
+        default=None,
+        metavar="PATH",
+        help="If set, write per-file unified diffs to this directory.",
+    )
+    p.add_argument(
+        "--report",
+        type=Path,
+        default=None,
+        metavar="PATH",
+        help="If set, write the human-readable summary report to this file.",
+    )
+    p.add_argument(
+        "--tolerate-missing",
+        action=argparse.BooleanOptionalAction,
+        default=True,
+        help=(
+            "Whether to skip programs cached in only one of the two runs. "
+            "Default: lenient — only content differences in shared programs "
+            "raise. Pass --no-tolerate-missing for strict mode, where any "
+            "program absent from one cache also counts as a determinism "
+            "failure."
+        ),
+    )
+    return p.parse_args(argv)
+
+
+def main(argv: list[str] | None = None) -> int:
+    args = parse_args(argv)
+
+    try:
+        results = check_determinism(
+            args.run1.expanduser().resolve(),
+            args.run2.expanduser().resolve(),
+            diffs_dir=args.diffs_dir.expanduser().resolve() if args.diffs_dir else None,
+            report_path=args.report.expanduser().resolve() if args.report else None,
+            tolerate_missing=args.tolerate_missing,
+            layout=args.layout,
+        )
+    except UnsupportedBackendError as e:
+        print(f"error: {e}", file=sys.stderr)
+        return 2
+    except NoProgramsObservedError as e:
+        print(f"error: {e}", file=sys.stderr)
+        return 3
+    except NoSourceFilesObservedError as e:
+        print(f"error: {e}", file=sys.stderr)
+        return 2
+    except DeterminismError as e:
+        print(render_report(e.results, tolerate_missing=args.tolerate_missing))
+        print(f"error: {e}", file=sys.stderr)
+        return 1
+
+    print(render_report(results, tolerate_missing=args.tolerate_missing))
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())