From 427cac18f29a0d200132866ca50ba1ce5420b94d Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Thu, 30 Apr 2026 14:39:40 +0200 Subject: [PATCH 01/28] Infrastructure to test DaCe's codegen (in)deterministic behavior --- ci/dace_deterministic_codegen/README.md | 172 +++++ .../bootstrap_icon4py.py | 182 +++++ .../dace_deterministic_codegen.py | 626 ++++++++++++++++++ 3 files changed, 980 insertions(+) create mode 100644 ci/dace_deterministic_codegen/README.md create mode 100644 ci/dace_deterministic_codegen/bootstrap_icon4py.py create mode 100644 ci/dace_deterministic_codegen/dace_deterministic_codegen.py diff --git a/ci/dace_deterministic_codegen/README.md b/ci/dace_deterministic_codegen/README.md new file mode 100644 index 0000000000..b47d4ee1cf --- /dev/null +++ b/ci/dace_deterministic_codegen/README.md @@ -0,0 +1,172 @@ +# dace_deterministic_codegen + +Determinism check for gt4py's DaCe backend. Runs an icon4py test selection +through `nox` **twice** with isolated gt4py build caches, then compares +the generated source code under each program's `src/` between the two +runs. Exit 0 = identical (deterministic), exit 1 = different. + +Currently supports the **cpu**, **cuda**, and **HIP** dace backends. +HIP is supported transparently: dace emits HIP code under `src/cuda/hip/` +(target_name="cuda", target_type="hip"), and the harness's recursive +sweep of `src/cuda/` picks it up automatically. If a run emits anything +else under `src/` (mpi, sve, mlir, snitch, …) the harness fails +immediately with a clear message — silently ignoring an unfamiliar +backend would mean reporting "deterministic" without actually checking +the relevant code. + +Valid `--selection` and `--component` values are read from icon4py's +own `noxfile.py` at runtime — no hardcoding here, so the harness +auto-tracks any future changes to icon4py's parametrization. + +Mirrors icon4py's `ci/dace.yml`, with the session name configurable: + +```bash +nox -r -s "-(, )" -- +``` + +Default `` is `test_model` — what `ci/dace.yml` itself uses. + +## A note on paths + +Every `--*` flag that takes a path (`--icon4py`, `--gt4py`, `--dace`) +accepts **both absolute and relative** paths. Relative paths are +resolved against the current working directory — i.e. wherever you +invoke the script from, not where the script lives. The script prints +the resolved absolute path on startup whenever you pass a relative one, +so you can confirm what it landed on. + +## Setup (one-time) + +Done once per machine, before any check is run. + +**1. Activate the gt4py venv** with editable gt4py (and dace, if on a +custom branch): + +```bash +source /path/to/gt4py-venv/bin/activate +uv pip install -e /path/to/gt4py +uv pip install -e /path/to/dace # optional, if custom dace branch +``` + +**2. Bootstrap icon4py into that same venv.** This patches icon4py's +`[tool.uv.sources]` so the editable gt4py / dace are what `uv sync` +installs into nox's session venv: + +```bash +uv pip install tomli_w +python /path/to/gt4py/ci/dace_deterministic_codegen/bootstrap_icon4py.py \ + --icon4py /path/to/icon4py \ + --gt4py /path/to/gt4py \ + --dace /path/to/dace # omit if upstream dace +``` + +**3. Sanity check:** + +```bash +python -c "import gt4py.next; print(gt4py.next.__file__)" +# must print a path inside your gt4py checkout, NOT site-packages/ +``` + +## Run the check + +With the venv from step 1 active: + +```bash +python /path/to/gt4py/ci/dace_deterministic_codegen/dace_deterministic_codegen.py \ + --icon4py /path/to/icon4py \ + --selection \ + --component \ + --posarg=--backend=dace_cpu \ + --posarg=--grid=icon_regional +``` + +The valid values for `--selection` and `--component` are read directly +from icon4py's `noxfile.py` at runtime. As of icon4py main, that's: + +- `--selection`: `datatest`, `stencils`, `basic` +- `--component`: `advection`, `diffusion`, `dycore`, `microphysics`, + `muphys`, `common`, `driver`, `standalone_driver`, `testing` + +If icon4py adds or renames these, the harness picks it up automatically; +no update needed here. If you pass an invalid value, the error message +lists the actual valid set extracted from your icon4py checkout. + +## Examples + +**Stencils for muphys, CPU** — mirrors `ci/dace.yml`'s stencil pattern: + +```bash +python $GT4PY/ci/dace_deterministic_codegen/dace_deterministic_codegen.py \ + --icon4py $ICON4PY \ + --selection stencils \ + --component muphys \ + --posarg=--backend=dace_cpu \ + --posarg=--grid=icon_regional +``` + +**Datatest for dycore, GPU** — mirrors the datatest pattern: + +```bash +python $GT4PY/ci/dace_deterministic_codegen/dace_deterministic_codegen.py \ + --icon4py $ICON4PY \ + --selection datatest \ + --component dycore \ + --posarg=--backend=dace_gpu \ + --posarg=--level=integration +``` + +**Custom session** — say a future icon4py defines a `test_other` +session with the same parametrization shape: + +```bash +python $GT4PY/ci/dace_deterministic_codegen/dace_deterministic_codegen.py \ + --icon4py $ICON4PY \ + --session test_other \ + --selection stencils \ + --component muphys \ + --posarg=--backend=dace_cpu +``` + +## Output + +By default, everything lands at `/_dace_deterministic_codegen/`. +Override with `--workdir PATH` (absolute or relative): + +``` +/ +├── run1/.gt4py_cache/... run1/test.log +├── run2/.gt4py_cache/... run2/test.log +├── diffs//.diff (only on mismatch) +└── report.txt (human-readable summary) +``` + +**Re-running wipes the workdir.** Whatever was there before — old logs, +old caches, an old `report.txt` from yesterday — is removed before the +new run starts. No merging, no appending. If you want to keep history +across invocations, copy the directory before re-running. + +## Exit codes + +| Code | Meaning | +|------|---------| +| 0 | Codegen is deterministic. | +| 1 | Codegen differs (see `report.txt` and `diffs/`). | +| 2 | Bad arguments (path doesn't exist, missing noxfile, …). | +| 3 | No programs observed in either run (test selection collected nothing). | +| 4 | A `nox` invocation itself failed (see `run1/test.log` / `run2/test.log`). | + +## Flags + +``` +--icon4py PATH icon4py checkout, abs or rel (required) +--session NAME nox session name (default: test_model) +--selection NAME noxfile selection (required); validated against + icon4py's actual noxfile at runtime +--component NAME leaf subpackage name (required); validated + against icon4py's actual noxfile at runtime +--python X.Y python version for the nox session (default: 3.10) +--workdir PATH where run1/, run2/, diffs/, report.txt land, + abs or rel (default: /_dace_deterministic_codegen/). + Wiped before each run. +--posarg ARG forwarded to pytest. Repeatable. +``` diff --git a/ci/dace_deterministic_codegen/bootstrap_icon4py.py b/ci/dace_deterministic_codegen/bootstrap_icon4py.py new file mode 100644 index 0000000000..4ceb05e6db --- /dev/null +++ b/ci/dace_deterministic_codegen/bootstrap_icon4py.py @@ -0,0 +1,182 @@ +#!/usr/bin/env python3 +# GT4Py - GridTools Framework +# +# Copyright (c) 2014-2024, ETH Zurich +# All rights reserved. +# +# Please, refer to the LICENSE file in the root directory. +# SPDX-License-Identifier: BSD-3-Clause + +"""Bootstrap icon4py into the *currently activated* venv (the gt4py CI venv). + +Edits icon4py's `pyproject.toml` so that `[tool.uv.sources]` points +`gt4py` (and optionally `dace`) at local-path editable installs, regenerates +the lockfile, and runs `uv sync --active` to install icon4py + its other +dependencies into `$VIRTUAL_ENV`. + +This is what makes the editable gt4py / dace branches survive everything +downstream — including the icon4py noxfile's own `uv sync` call when our +dace_deterministic_codegen harness runs `nox --no-venv`. + +Usage (run from anywhere): + + python ci/dace_deterministic_codegen/bootstrap_icon4py.py \\ + --icon4py /path/to/icon4py \\ + --gt4py /path/to/gt4py-dace_toolchain_deterministic \\ + --dace /path/to/dace # optional + +Idempotent: re-running it is safe; the [tool.uv.sources] entries are +overwritten in place. +""" + +from __future__ import annotations + +import argparse +import os +import shutil +import subprocess +import sys +from pathlib import Path + +try: + import tomllib # Python 3.11+ +except ModuleNotFoundError: + import tomli as tomllib # type: ignore[import-not-found] + +try: + import tomli_w +except ModuleNotFoundError: + print( + "error: this script needs `tomli_w`. install with:\n" + " uv pip install tomli_w # or pip install tomli_w", + file=sys.stderr, + ) + sys.exit(2) + + +def _is_python_project(path: Path) -> bool: + """A directory is installable by uv if it has any of these markers.""" + return any((path / m).is_file() for m in ("pyproject.toml", "setup.py", "setup.cfg")) + + +def patch_sources(pyproject: Path, overrides: dict[str, Path]) -> None: + """Set `[tool.uv.sources][] = {path = "...", editable = true}` for + every (pkg, path) in overrides. Other entries are preserved.""" + with pyproject.open("rb") as f: + doc = tomllib.load(f) + + sources = ( + doc.setdefault("tool", {}) + .setdefault("uv", {}) + .setdefault("sources", {}) + ) + for pkg, path in overrides.items(): + sources[pkg] = {"path": str(path), "editable": True} + + # Make a backup once. Idempotent: don't overwrite an existing backup, + # which would clobber the pristine original after a re-run. + backup = pyproject.with_suffix(pyproject.suffix + ".dace_deterministic_codegen.bak") + if not backup.exists(): + shutil.copy2(pyproject, backup) + + with pyproject.open("wb") as f: + tomli_w.dump(doc, f) + print(f"patched {pyproject} (backup at {backup.name})") + + +def run(cmd: list[str], cwd: Path) -> None: + print(f"+ {' '.join(cmd)} (cwd={cwd})") + rc = subprocess.run(cmd, cwd=str(cwd)).returncode + if rc != 0: + sys.exit(rc) + + +def main() -> int: + p = argparse.ArgumentParser(description=__doc__.split("\n\n", 1)[0]) + p.add_argument( + "--icon4py", required=True, type=Path, metavar="PATH", + help=( + "Path to icon4py checkout. Accepts BOTH absolute and relative " + "paths. Relative paths are resolved against the current working " + "directory." + ), + ) + p.add_argument( + "--gt4py", required=True, type=Path, metavar="PATH", + help=( + "Path to gt4py checkout to install editable. Accepts BOTH " + "absolute and relative paths (resolved against cwd)." + ), + ) + p.add_argument( + "--dace", type=Path, default=None, metavar="PATH", + help=( + "Optional path to dace checkout (absolute or relative). If " + "omitted, dace resolves through icon4py's existing source pin." + ), + ) + p.add_argument("--no-lock", action="store_true", + help="Skip `uv lock`. Useful if you already locked.") + p.add_argument("--no-sync", action="store_true", + help="Skip `uv sync`. Useful for CI steps that sync later.") + args = p.parse_args() + + # Resolve every path NOW. The script can be run from any cwd. + icon4py = args.icon4py.expanduser().resolve() + gt4py = args.gt4py.expanduser().resolve() + dace = args.dace.expanduser().resolve() if args.dace else None + + pyproject = icon4py / "pyproject.toml" + if not pyproject.is_file(): + print(f"error: no pyproject.toml at {pyproject}", file=sys.stderr) + return 2 + if not _is_python_project(gt4py): + print( + f"error: --gt4py path is not a python project (no pyproject.toml, " + f"setup.py, or setup.cfg): {gt4py}", + file=sys.stderr, + ) + return 2 + if dace and not _is_python_project(dace): + print( + f"error: --dace path is not a python project (no pyproject.toml, " + f"setup.py, or setup.cfg): {dace}", + file=sys.stderr, + ) + return 2 + + # Loud warning if no venv is active — the whole point of this script + # is to install INTO the gt4py CI venv. Without VIRTUAL_ENV set, uv + # would create a new .venv and we'd get nowhere. + if not os.environ.get("VIRTUAL_ENV"): + print( + "warning: VIRTUAL_ENV is not set. This script is meant to install " + "icon4py into the *currently activated* venv (typically your " + "gt4py CI venv). Activate it first, then re-run.", + file=sys.stderr, + ) + + overrides: dict[str, Path] = {"gt4py": gt4py} + if dace: + overrides["dace"] = dace + patch_sources(pyproject, overrides) + + if not args.no_lock: + # Regenerate uv.lock so it matches the new [tool.uv.sources]. + run(["uv", "lock"], cwd=icon4py) + if not args.no_sync: + # --active = use $VIRTUAL_ENV (the gt4py venv) instead of ./venv/. + run(["uv", "sync", "--active"], cwd=icon4py) + + print() + print("done. quick sanity check:") + print(' python -c "import gt4py.next; print(gt4py.next.__file__)"') + print(f' # should print a path inside {gt4py}') + if dace: + print(' python -c "import dace; print(dace.__file__)"') + print(f' # should print a path inside {dace}') + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/ci/dace_deterministic_codegen/dace_deterministic_codegen.py b/ci/dace_deterministic_codegen/dace_deterministic_codegen.py new file mode 100644 index 0000000000..402dcce18f --- /dev/null +++ b/ci/dace_deterministic_codegen/dace_deterministic_codegen.py @@ -0,0 +1,626 @@ +#!/usr/bin/env python3 +# GT4Py - GridTools Framework +# +# Copyright (c) 2014-2024, ETH Zurich +# All rights reserved. +# +# Please, refer to the LICENSE file in the root directory. +# SPDX-License-Identifier: BSD-3-Clause + +"""GT4Py / DaCe codegen determinism check. + +Drives an icon4py test selection through nox **twice** with isolated +gt4py build caches, then checks that the generated source files under +each program's `src/` are byte-identical between the two runs. A diff +is a determinism bug. + +Compares only the contents of `/src/` — the actual generated +backend code. Currently supports cpu, cuda, and hip (hip is emitted by +dace under `src/cuda/hip/`). Any other top-level backend under `src/` +(mpi, sve, mlir, snitch, …) causes the harness to fail with a clear +message rather than silently ignore it. + +Valid `--selection` and `--component` values are read from icon4py's +own `noxfile.py` at runtime (no hardcoding here), so the harness +tracks any future changes to icon4py's parametrization automatically. + +Mirrors icon4py's `ci/dace.yml` invocation pattern, with the session +name configurable: + + nox -r -s "-(, )" -- + +Defaults to `=test_model`, which is the icon4py main test +entry point and what `ci/dace.yml` uses. + +Outputs land at `/_dace_deterministic_codegen/`: + run1/.gt4py_cache/... run1/test.log + run2/.gt4py_cache/... run2/test.log + diffs//.diff (only on mismatch) + report.txt +""" + +from __future__ import annotations + +import argparse +import ast +import dataclasses +import difflib +import hashlib +import os +import re +import shutil +import subprocess +import sys +from pathlib import Path +from typing import Optional + + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +#: GT4Py names each cached program folder `_`. +PROGRAM_FOLDER_RE = re.compile(r"^(?P.+)_(?P[0-9a-f]{64})$") + +#: The single directory under each program folder we compare. Only `src/`, +#: nothing else — by design. dace also writes `include/`, `sample/`, +#: `program.sdfg`, source maps under `map/`, runtime metadata +#: (`dace.conf`, `*.csv`), and build artifacts under `build/`. None of +#: those are the codegen surface we care about for this check. +CODEGEN_ROOT = "src" + +#: Backends recognized as direct children of `src/`. dace lays out +#: codegen as `src//[/]`: +#: +#: - CPU codegen → src/cpu/.cpp +#: - CUDA codegen → src/cuda/.cu +#: - HIP codegen → src/cuda/hip/.cpp (NOTE: under cuda/) +#: +#: HIP is dispatched by dace's CUDA target with `target_type="hip"`, so +#: it lands as a *subdirectory* of `src/cuda/`, not its own top-level +#: backend folder. That means {cpu, cuda} as a top-level allowlist is +#: enough to cover all three: cpu via `cpu/`, cuda + hip both via +#: `cuda/` (with `rglob` picking up the nested hip files). +#: +#: If a snapshot ever encounters another top-level backend (mpi, sve, +#: mlir, snitch, …), the harness fails loudly rather than silently +#: ignoring — those would need explicit support added here. +SUPPORTED_BACKENDS: frozenset[str] = frozenset({"cpu", "cuda"}) + +#: Where outputs are written, relative to the icon4py checkout. +WORKDIR_NAME = "_dace_deterministic_codegen" + + +# --------------------------------------------------------------------------- +# icon4py noxfile introspection +# --------------------------------------------------------------------------- + +class NoxfileIntrospectionError(RuntimeError): + """Raised when we can't extract sessions/components from the noxfile.""" + + +def introspect_icon4py_noxfile( + noxfile: Path, +) -> tuple[frozenset[str], frozenset[str]]: + """Parse icon4py's noxfile.py and extract the valid `selection` and + `component` values. Returns `(selections, components)`. + + Reads the noxfile as AST — does not execute it. Two reasons: + importing would require `nox` in this script's environment, and + noxfile imports often have side effects (icon4py's pulls in a + handful of typing imports plus nox's own session machinery). + + Looks for two type-alias definitions matching icon4py main: + + ModelTestsSubset: TypeAlias = Literal["datatest", "stencils", "basic"] + ModelSubpackagePath: TypeAlias = Literal["atmosphere/advection", ...] + + Components are derived from the *leaf name* of each subpackage path + (`subpackage.split("/")[-1]`), matching the `id=...` icon4py uses + in nox.param. So `atmosphere/subgrid_scale_physics/muphys` becomes + the component `muphys`. + """ + if not noxfile.is_file(): + raise NoxfileIntrospectionError( + f"no noxfile.py at {noxfile} — is --icon4py the icon4py repo root?" + ) + + try: + tree = ast.parse(noxfile.read_text()) + except SyntaxError as e: + raise NoxfileIntrospectionError( + f"could not parse {noxfile} as Python: {e}" + ) from e + + selections = _extract_literal_strings(tree, "ModelTestsSubset") + subpackages = _extract_literal_strings(tree, "ModelSubpackagePath") + + if not selections: + raise NoxfileIntrospectionError( + f"could not find `ModelTestsSubset: TypeAlias = Literal[...]` " + f"in {noxfile}. icon4py's noxfile structure may have changed." + ) + if not subpackages: + raise NoxfileIntrospectionError( + f"could not find `ModelSubpackagePath: TypeAlias = Literal[...]` " + f"in {noxfile}. icon4py's noxfile structure may have changed." + ) + + components = frozenset(p.rsplit("/", 1)[-1] for p in subpackages) + return frozenset(selections), components + + +def _extract_literal_strings(tree: ast.AST, alias_name: str) -> list[str]: + """Find `: TypeAlias = Literal["a", "b", ...]` in the AST + and return the string literals. Returns [] if not found or shape is + unexpected (caller decides whether that's fatal).""" + for node in ast.walk(tree): + if not (isinstance(node, ast.AnnAssign) and isinstance(node.target, ast.Name)): + continue + if node.target.id != alias_name: + continue + # Expect: value = Subscript(value=Name('Literal'), slice=Tuple(elts=[Constant, ...])) + v = node.value + if not isinstance(v, ast.Subscript): + continue + elts: list[ast.expr] = [] + if isinstance(v.slice, ast.Tuple): + elts = list(v.slice.elts) + else: + # Single-arg Literal["foo"] + elts = [v.slice] + out: list[str] = [] + for e in elts: + if isinstance(e, ast.Constant) and isinstance(e.value, str): + out.append(e.value) + return out + return [] + + +# --------------------------------------------------------------------------- +# Snapshot +# --------------------------------------------------------------------------- + +@dataclasses.dataclass(frozen=True) +class FileEntry: + relpath: str + sha256: str + + +@dataclasses.dataclass +class ProgramSnapshot: + name: str + folder: Path + files: dict[str, FileEntry] + + +class UnsupportedBackendError(RuntimeError): + """A program's `src/` contained a top-level backend other than cpu/cuda.""" + + +def snapshot_run(cache_root: Path) -> dict[str, ProgramSnapshot]: + """Walk a `.gt4py_cache` and snapshot every program's generated source. + + For each `_/` folder, we read everything under + `/src/` recursively. dace lays this out as + `src//[/]`: + + src/cpu/.cpp + src/cuda/.cu (CUDA — target_type="") + src/cuda/hip/.cpp (HIP — target_type="hip", under cuda/) + + Currently supports cpu and cuda as top-level backends. HIP is + handled implicitly because dace nests it inside `src/cuda/hip/`, + not as a separate top-level directory; the recursive walk picks + it up automatically. + + If we encounter any *other* top-level backend under `src/` (mpi, + sve, mlir, snitch, ...), raises UnsupportedBackendError so the + user knows immediately rather than silently skipping. + """ + if not cache_root.exists(): + return {} + + out: dict[str, ProgramSnapshot] = {} + for folder in sorted(p for p in cache_root.iterdir() if p.is_dir()): + m = PROGRAM_FOLDER_RE.match(folder.name) + if not m: + continue + name = m.group("name") + + src_root = folder / CODEGEN_ROOT + if not src_root.is_dir(): + # No src/ at all — record an empty snapshot. Pairing logic + # downstream will flag it if its counterpart in the other run + # has files. + out[name] = ProgramSnapshot(name=name, folder=folder, files={}) + continue + + # Backend check: every direct child of src/ must be a supported + # top-level backend. HIP lives nested under cuda/, so cuda is + # what matters here, not "hip". + backend_dirs = sorted(d for d in src_root.iterdir() if d.is_dir()) + for bd in backend_dirs: + if bd.name not in SUPPORTED_BACKENDS: + raise UnsupportedBackendError( + f"unsupported dace backend `{bd.name}/` found under " + f"{src_root} — this harness currently supports " + f"{sorted(SUPPORTED_BACKENDS)} as top-level backends " + f"(HIP is handled under `cuda/hip/`). Add explicit " + f"support in dace_deterministic_codegen.py before " + f"running this selection." + ) + + # rglob recursively descends — picks up `cuda/hip/` along + # with `cpu/` and `cuda/`, no special-casing needed. + files: dict[str, FileEntry] = {} + for fpath in sorted(src_root.rglob("*")): + if not fpath.is_file(): + continue + rel = fpath.relative_to(folder).as_posix() + files[rel] = FileEntry(relpath=rel, sha256=_sha256(fpath)) + out[name] = ProgramSnapshot(name=name, folder=folder, files=files) + return out + + +def _sha256(path: Path) -> str: + h = hashlib.sha256() + with path.open("rb") as f: + for chunk in iter(lambda: f.read(1 << 16), b""): + h.update(chunk) + return h.hexdigest() + + +# --------------------------------------------------------------------------- +# Compare +# --------------------------------------------------------------------------- + +@dataclasses.dataclass +class ProgramResult: + name: str + match: bool + differing_files: list[str] + only_in_run1: list[str] + only_in_run2: list[str] + + +def compare( + snap1: dict[str, ProgramSnapshot], + snap2: dict[str, ProgramSnapshot], +) -> list[ProgramResult]: + results: list[ProgramResult] = [] + for name in sorted(set(snap1) | set(snap2)): + s1 = snap1.get(name) + s2 = snap2.get(name) + + if s1 is None or s2 is None: + results.append(ProgramResult( + name=name, match=False, differing_files=[], + only_in_run1=sorted((s1.files if s1 else {}).keys()), + only_in_run2=sorted((s2.files if s2 else {}).keys()), + )) + continue + + keys1, keys2 = set(s1.files), set(s2.files) + only1 = sorted(keys1 - keys2) + only2 = sorted(keys2 - keys1) + differing = sorted( + rel for rel in keys1 & keys2 + if s1.files[rel].sha256 != s2.files[rel].sha256 + ) + results.append(ProgramResult( + name=name, + match=not (differing or only1 or only2), + differing_files=differing, + only_in_run1=only1, + only_in_run2=only2, + )) + return results + + +# --------------------------------------------------------------------------- +# Diff + report +# --------------------------------------------------------------------------- + +def write_diffs( + results: list[ProgramResult], + snap1: dict[str, ProgramSnapshot], + snap2: dict[str, ProgramSnapshot], + diffs_dir: Path, +) -> None: + for r in results: + if r.match: + continue + s1, s2 = snap1.get(r.name), snap2.get(r.name) + prog_dir = diffs_dir / r.name + for rel in r.differing_files: + f1 = (s1.folder / rel) if s1 else None + f2 = (s2.folder / rel) if s2 else None + if not (f1 and f2 and f1.exists() and f2.exists()): + continue + try: + t1 = f1.read_text().splitlines(keepends=True) + t2 = f2.read_text().splitlines(keepends=True) + except UnicodeDecodeError: + prog_dir.mkdir(parents=True, exist_ok=True) + (prog_dir / f"{rel.replace('/', '__')}.binary-differs").write_text( + f"binary content differs:\n run1: {f1}\n run2: {f2}\n" + ) + continue + udiff = "".join(difflib.unified_diff( + t1, t2, fromfile=f"run1/{rel}", tofile=f"run2/{rel}", n=3, + )) + prog_dir.mkdir(parents=True, exist_ok=True) + (prog_dir / f"{rel.replace('/', '__')}.diff").write_text(udiff) + + +def render_report(results: list[ProgramResult]) -> str: + n_total = len(results) + n_match = sum(1 for r in results if r.match) + n_diff = n_total - n_match + + lines = [f"Programs: {n_total} matches: {n_match} mismatches: {n_diff}", ""] + for r in results: + lines.append(f" [{'MATCH ' if r.match else 'DIFFER'}] {r.name}") + if not r.match: + for rel in r.differing_files: + lines.append(f" differs: {rel}") + for rel in r.only_in_run1: + lines.append(f" only in run1: {rel}") + for rel in r.only_in_run2: + lines.append(f" only in run2: {rel}") + + lines.append("") + if n_total == 0: + lines.append("RESULT: no programs observed (nothing was cached).") + elif n_diff == 0: + lines.append(f"RESULT: codegen deterministic — {n_match} program(s) match.") + else: + lines.append(f"RESULT: NON-DETERMINISTIC CODEGEN — {n_diff}/{n_total} program(s) differ.") + return "\n".join(lines) + "\n" + + +# --------------------------------------------------------------------------- +# Nox runner +# --------------------------------------------------------------------------- + +def run_nox( + icon4py: Path, run_dir: Path, log_path: Path, + session: str, selection: str, component: str, python: str, posargs: list[str], +) -> int: + """Run nox once with `GT4PY_BUILD_CACHE_DIR=run_dir`. Returns the exit code. + + Mirrors `ci/dace.yml`: positional session ID, `-r` to reuse the venv + between runs (so run1 and run2 see identical venv state — important + for the determinism check). + + NOTE: gt4py's config appends `.gt4py_cache` to GT4PY_BUILD_CACHE_DIR, + so `run_dir` is the *parent*: gt4py creates + `run_dir/.gt4py_cache/_/` inside it. + """ + session_id = f"{session}-{python}({selection}, {component})" + argv = ["nox", "-r", "-s", session_id] + if posargs: + argv.append("--") + argv.extend(posargs) + + env = dict(os.environ.items()) + env["GT4PY_BUILD_CACHE_DIR"] = str(run_dir) + env["GT4PY_BUILD_CACHE_LIFETIME"] = "persistent" + + log_path.parent.mkdir(parents=True, exist_ok=True) + with log_path.open("w") as logf: + logf.write( + f"# cwd: {icon4py}\n" + "# command:\n " + "\n ".join(repr(a) for a in argv) + "\n" + f"# GT4PY_BUILD_CACHE_DIR={run_dir}\n" + f"# (gt4py appends .gt4py_cache; cache lands at {run_dir}/.gt4py_cache/)\n" + "# ---\n" + ) + logf.flush() + proc = subprocess.run(argv, cwd=str(icon4py), env=env, + stdout=logf, stderr=subprocess.STDOUT) + return proc.returncode + + +# --------------------------------------------------------------------------- +# Workdir +# --------------------------------------------------------------------------- + +@dataclasses.dataclass +class Workdir: + """Two parent dirs for gt4py's cache + a place for logs/diffs/report.""" + + root: Path + + @property + def run1_dir(self) -> Path: return self.root / "run1" + @property + def run2_dir(self) -> Path: return self.root / "run2" + @property + def cache1(self) -> Path: return self.run1_dir / ".gt4py_cache" + @property + def cache2(self) -> Path: return self.run2_dir / ".gt4py_cache" + @property + def log1(self) -> Path: return self.run1_dir / "test.log" + @property + def log2(self) -> Path: return self.run2_dir / "test.log" + @property + def diffs(self) -> Path: return self.root / "diffs" + @property + def report(self) -> Path: return self.root / "report.txt" + + def prepare(self) -> None: + """Wipe stale state from previous invocations.""" + for d in (self.run1_dir, self.run2_dir, self.diffs): + if d.exists(): + shutil.rmtree(d) + for d in (self.run1_dir, self.run2_dir): + d.mkdir(parents=True, exist_ok=True) + if self.report.exists(): + self.report.unlink() + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + +def parse_args(argv: Optional[list[str]] = None) -> argparse.Namespace: + p = argparse.ArgumentParser( + prog="dace_deterministic_codegen", + description=( + "Run an icon4py test selection twice via nox with isolated gt4py " + "caches and check that the generated source code is byte-identical." + ), + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + p.add_argument( + "--icon4py", required=True, type=Path, metavar="PATH", + help=( + "Path to icon4py checkout. Accepts BOTH absolute and relative " + "paths. Relative paths are resolved against the current working " + "directory (i.e. wherever you invoke this script from)." + ), + ) + p.add_argument( + "--session", default="test_model", metavar="NAME", + help=( + "Nox session name. Composed with --python/--selection/--component " + "into the final session ID `-(, " + ")`. Default matches icon4py's main test entry point." + ), + ) + p.add_argument( + "--selection", required=True, metavar="NAME", + help=( + "icon4py noxfile selection (e.g. stencils, datatest, basic). " + "Validated at runtime against icon4py's actual noxfile." + ), + ) + p.add_argument( + "--component", required=True, metavar="NAME", + help=( + "icon4py noxfile subpackage leaf name (e.g. muphys, dycore). " + "Validated at runtime against icon4py's actual noxfile." + ), + ) + p.add_argument( + "--python", default="3.10", metavar="X.Y", + help="Python version for the nox session.", + ) + p.add_argument( + "--workdir", type=Path, default=None, metavar="PATH", + help=( + "Where run1/, run2/, diffs/, and report.txt are written. " + "Accepts absolute or relative paths (resolved against cwd). " + "If the directory already exists from a prior run, its contents " + "are wiped before this run starts — no merging or appending. " + "Default: /_dace_deterministic_codegen/" + ), + ) + p.add_argument( + "--posarg", action="append", default=[], dest="posargs", metavar="ARG", + help=( + "Forwarded to pytest via `nox -- ARG`. Repeatable. " + "Example: --posarg=--backend=dace_cpu --posarg=--grid=icon_regional" + ), + ) + return p.parse_args(argv) + + +def main(argv: Optional[list[str]] = None) -> int: + args = parse_args(argv) + + # Resolve every path to absolute up-front, so the harness can be run + # from any cwd. We print what the path resolved to — `--icon4py ../foo` + # behaves intuitively but it's nice to confirm what it landed on. + icon4py = args.icon4py.expanduser().resolve() + if not args.icon4py.is_absolute(): + print(f"--icon4py resolved to: {icon4py}") + if not icon4py.is_dir(): + print(f"error: --icon4py path is not a directory: {icon4py}", file=sys.stderr) + return 2 + noxfile_path = icon4py / "noxfile.py" + if not noxfile_path.is_file(): + print(f"error: no noxfile.py at {noxfile_path} — is --icon4py " + f"the icon4py repo root?", file=sys.stderr) + return 2 + + # Introspect icon4py's noxfile to discover the legal selection / + # component values. This avoids hardcoding the lists, so the harness + # auto-tracks any future changes to icon4py's noxfile structure. + try: + valid_selections, valid_components = introspect_icon4py_noxfile(noxfile_path) + except NoxfileIntrospectionError as e: + print(f"error: {e}", file=sys.stderr) + return 2 + + if args.selection not in valid_selections: + print( + f"error: --selection {args.selection!r} is not one of " + f"{sorted(valid_selections)} (extracted from {noxfile_path})", + file=sys.stderr, + ) + return 2 + if args.component not in valid_components: + print( + f"error: --component {args.component!r} is not one of " + f"{sorted(valid_components)} (extracted from {noxfile_path})", + file=sys.stderr, + ) + return 2 + + workdir_root = ( + args.workdir.expanduser().resolve() + if args.workdir is not None + else icon4py / WORKDIR_NAME + ) + if args.workdir is not None and not args.workdir.is_absolute(): + print(f"--workdir resolved to: {workdir_root}") + workdir = Workdir(root=workdir_root) + workdir.prepare() # wipes run1/, run2/, diffs/, report.txt — see prepare() + + session_id = f"{args.session}-{args.python}({args.selection}, {args.component})" + + # ----- Run 1 + print(f"[1/2] nox -s '{session_id}' (cache: {workdir.run1_dir})", flush=True) + rc1 = run_nox(icon4py, workdir.run1_dir, workdir.log1, + args.session, args.selection, args.component, args.python, args.posargs) + if rc1 != 0: + print(f"error: run 1 failed (exit {rc1}). See log: {workdir.log1}", + file=sys.stderr) + return 4 + + # ----- Run 2 + print(f"[2/2] nox -s '{session_id}' (cache: {workdir.run2_dir})", flush=True) + rc2 = run_nox(icon4py, workdir.run2_dir, workdir.log2, + args.session, args.selection, args.component, args.python, args.posargs) + if rc2 != 0: + print(f"error: run 2 failed (exit {rc2}). See log: {workdir.log2}", + file=sys.stderr) + return 4 + + # ----- Snapshot + compare + report + try: + snap1 = snapshot_run(workdir.cache1) + snap2 = snapshot_run(workdir.cache2) + except UnsupportedBackendError as e: + print(f"error: {e}", file=sys.stderr) + return 2 + results = compare(snap1, snap2) + write_diffs(results, snap1, snap2, workdir.diffs) + report = render_report(results) + workdir.report.write_text(report) + print(report) + print(f"workdir: {workdir.root}") + + if not results: + print(f"error: no programs observed in either run — check the logs:\n" + f" {workdir.log1}\n {workdir.log2}", file=sys.stderr) + return 3 + + return 1 if any(not r.match for r in results) else 0 + + +if __name__ == "__main__": + sys.exit(main()) From 655fa5f30e1da7d56b19a85b64c55654fde43479 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Mon, 4 May 2026 08:00:26 +0200 Subject: [PATCH 02/28] Infrastructure to test DaCe's codegen (in)deterministic behavior: CI/CD integration [WIP] --- ci/cscs-ci-dace-determinism.yml | 136 +++++++++++ ci/cscs-ci.yml | 1 + ci/dace_deterministic_codegen/README.md | 38 +++ .../bootstrap_icon4py.py | 44 ++-- .../dace_deterministic_codegen.py | 185 +++++++++----- ci/dace_deterministic_codegen/run_in_ci.sh | 231 ++++++++++++++++++ 6 files changed, 563 insertions(+), 72 deletions(-) create mode 100644 ci/cscs-ci-dace-determinism.yml create mode 100644 ci/dace_deterministic_codegen/run_in_ci.sh diff --git a/ci/cscs-ci-dace-determinism.yml b/ci/cscs-ci-dace-determinism.yml new file mode 100644 index 0000000000..b71e209211 --- /dev/null +++ b/ci/cscs-ci-dace-determinism.yml @@ -0,0 +1,136 @@ +# +# GT4Py - GridTools Framework +# +# Copyright (c) 2014-2024, ETH Zurich +# All rights reserved. +# +# Please, refer to the LICENSE file in the root directory. +# SPDX-License-Identifier: BSD-3-Clause +# + +# DaCe codegen determinism check (icon4py-driven) +# =============================================== +# +# Drives an icon4py test selection through nox twice with isolated +# GT4PY_BUILD_CACHE_DIR per run, then asserts the generated source +# under each /src/ is byte-identical between the two runs. +# A diff means the gt4py + dace toolchain is non-deterministic for +# that selection. +# +# The actual logic lives in: +# ci/dace_deterministic_codegen/dace_deterministic_codegen.py (harness) +# ci/dace_deterministic_codegen/bootstrap_icon4py.py (venv prep) +# ci/dace_deterministic_codegen/run_in_ci.sh (CI driver) +# +# This file just wires those into GitLab CI: when to run, on which +# runners, with which selection × component matrix. +# +# Failure semantics +# ----------------- +# `allow_failure: true` while the determinism work stabilizes — +# surface regressions on the dashboard without gating merges. Drop +# `allow_failure` once we have a sustained green stretch on `main`. +# +# Custom dace branch +# ------------------ +# To test a dace fork/branch (e.g. unmerged determinism work), set +# DACE_REPO and DACE_REF in the variables block below. When unset, +# dace resolves through icon4py's existing source pin (currently the +# GridTools/pypi published wheel). See run_in_ci.sh for the details. + +stages: + - dace-determinism + +variables: + ICON4PY_REPO: 'https://github.com/C2SM/icon4py.git' + ICON4PY_REF: 'main' + + # Custom dace fork/branch under test. Leave both empty to resolve + # dace through icon4py's existing source pin. Set both together to + # install editable dace from a clone of DACE_REPO @ DACE_REF. + # Example for the deterministic codegen branch: + # DACE_REPO: 'https://github.com/GridTools/dace.git' + # DACE_REF: 'dace_toolchain_deterministic' + DACE_REPO: '' + DACE_REF: '' + +# Shared template for all dace-determinism jobs +.dace_determinism_common: + stage: dace-determinism + image: ${CSCS_REGISTRY_PATH}/public/${ARCH}/base/gt4py-ci-${PY_VERSION}:${DOCKER_TAG} + variables: + PY_VERSION: '3.10' + DACE_DETERMINISM_PYTHON: '${PY_VERSION}' + DACE_DETERMINISM_SELECTION: 'stencils' + DACE_DETERMINISM_GRID: 'icon_regional' + SLURM_JOB_NUM_NODES: 1 + SLURM_TIMELIMIT: 30 + # Don't block merges on determinism regressions — yet. Flip to + # `false` once the toolchain is reliably converged. + allow_failure: true + artifacts: + when: always + paths: + - _dace_deterministic_codegen/${DACE_DETERMINISM_BACKEND}/${DACE_DETERMINISM_COMPONENT}/ + expire_in: 1 month + script: + - mkdir -p "${WORKDIR}/gt4py" && git clone --depth 1 "${CSCS_CI_ORIG_CLONE_URL}" "${WORKDIR}/gt4py" + - cd "${WORKDIR}/gt4py" && git fetch --depth 1 origin "${CI_COMMIT_SHA}" && git checkout "${CI_COMMIT_SHA}" + # The gt4py CI Docker image already sets VIRTUAL_ENV and prepends + # ${VIRTUAL_ENV}/bin to PATH (see ci/Dockerfile), so + # `python`, `uv`, `nox` etc. resolve to the venv binaries without + # sourcing activate. Matches the .test_common pattern. + - export GT4PY_PATH="${WORKDIR}/gt4py" + - export ICON4PY_PATH="${WORKDIR}/icon4py" + # Custom dace branch: only set DACE_PATH if DACE_REPO is non-empty. + # run_in_ci.sh treats DACE_PATH (along with DACE_REPO/DACE_REF) as + # the trigger to clone + install dace editable. + - if [ -n "${DACE_REPO}" ]; then export DACE_PATH="${WORKDIR}/dace"; fi + # Per-cell artifact subdirectory (matches artifacts.paths above). + - export DACE_DETERMINISM_ARTIFACT_DIR="${CI_PROJECT_DIR}/_dace_deterministic_codegen/${DACE_DETERMINISM_BACKEND}/${DACE_DETERMINISM_COMPONENT}" + - bash "${WORKDIR}/gt4py/ci/dace_deterministic_codegen/run_in_ci.sh" + +dace_determinism_cscs_gh200_cuda: + extends: + - .container-runner-santis-gh200 + - .dace_determinism_common + needs: + - job: build_cscs_gh200 + parallel: + matrix: + - PY_VERSION: '3.10' + variables: + DACE_DETERMINISM_BACKEND: 'dace_gpu' + SLURM_GPUS_PER_NODE: 1 + SLURM_PARTITION: 'shared' + GT4PY_BUILD_JOBS: 8 + PYTEST_XDIST_AUTO_NUM_WORKERS: 32 + parallel: + matrix: + - DACE_DETERMINISM_COMPONENT: + - dycore + - advection + - diffusion + - muphys + +dace_determinism_cscs_gh200_cpu: + extends: + - .container-runner-santis-gh200 + - .dace_determinism_common + needs: + - job: build_cscs_gh200 + parallel: + matrix: + - PY_VERSION: '3.10' + variables: + DACE_DETERMINISM_BACKEND: 'dace_cpu' + SLURM_PARTITION: 'shared' + GT4PY_BUILD_JOBS: 8 + PYTEST_XDIST_AUTO_NUM_WORKERS: 32 + parallel: + matrix: + - DACE_DETERMINISM_COMPONENT: + - dycore + - advection + - diffusion + - muphys diff --git a/ci/cscs-ci.yml b/ci/cscs-ci.yml index 0f4475ad61..6ec2e1e96c 100644 --- a/ci/cscs-ci.yml +++ b/ci/cscs-ci.yml @@ -11,6 +11,7 @@ include: - remote: 'https://gitlab.com/cscs-ci/recipes/-/raw/master/templates/v2/.ci-ext.yml' - local: 'ci/cscs-ci-ext-config.yml' + - local: 'ci/cscs-ci-dace-determinism.yml' # Note: # block-name-with-dashes -> defined in remote cscs-ci ext include diff --git a/ci/dace_deterministic_codegen/README.md b/ci/dace_deterministic_codegen/README.md index b47d4ee1cf..371ce446ba 100644 --- a/ci/dace_deterministic_codegen/README.md +++ b/ci/dace_deterministic_codegen/README.md @@ -170,3 +170,41 @@ across invocations, copy the directory before re-running. Wiped before each run. --posarg ARG forwarded to pytest. Repeatable. ``` + +## CI integration + +The harness runs in CSCS CI as a separate `dace-determinism` stage, +defined in `ci/cscs-ci-dace-determinism.yml` and wired into the +pipeline via `ci/cscs-ci.yml`. A small driver script, +`ci/dace_deterministic_codegen/run_in_ci.sh`, encapsulates the +clone + bootstrap + harness invocation so the YAML stays minimal and +the same flow can be reproduced locally. + +### Reproducing a CI run locally + +The driver script reads only env vars, so a green or red CI run can +be reproduced one-to-one by exporting the same variables and invoking +`run_in_ci.sh`: + +```bash +# gt4py CI venv with editable gt4py already there +source /path/to/gt4py-venv/bin/activate + +export GT4PY_PATH=/path/to/gt4py +export ICON4PY_REPO=https://github.com/C2SM/icon4py.git +export ICON4PY_REF=main # or the SHA from the failing run +export ICON4PY_PATH=/tmp/icon4py + +# Optional: custom dace branch +export DACE_REPO=https://github.com/GridTools/dace.git +export DACE_REF=dace_toolchain_deterministic +export DACE_PATH=/tmp/dace + +export DACE_DETERMINISM_SELECTION=stencils +export DACE_DETERMINISM_COMPONENT=muphys +export DACE_DETERMINISM_PYTHON=3.10 +export DACE_DETERMINISM_BACKEND=dace_cpu +export DACE_DETERMINISM_GRID=icon_regional + +bash $GT4PY_PATH/ci/dace_deterministic_codegen/run_in_ci.sh +``` diff --git a/ci/dace_deterministic_codegen/bootstrap_icon4py.py b/ci/dace_deterministic_codegen/bootstrap_icon4py.py index 4ceb05e6db..87a8fd125a 100644 --- a/ci/dace_deterministic_codegen/bootstrap_icon4py.py +++ b/ci/dace_deterministic_codegen/bootstrap_icon4py.py @@ -38,8 +38,9 @@ import sys from pathlib import Path + try: - import tomllib # Python 3.11+ + import tomllib # Python 3.11+ except ModuleNotFoundError: import tomli as tomllib # type: ignore[import-not-found] @@ -65,11 +66,7 @@ def patch_sources(pyproject: Path, overrides: dict[str, Path]) -> None: with pyproject.open("rb") as f: doc = tomllib.load(f) - sources = ( - doc.setdefault("tool", {}) - .setdefault("uv", {}) - .setdefault("sources", {}) - ) + sources = doc.setdefault("tool", {}).setdefault("uv", {}).setdefault("sources", {}) for pkg, path in overrides.items(): sources[pkg] = {"path": str(path), "editable": True} @@ -94,7 +91,10 @@ def run(cmd: list[str], cwd: Path) -> None: def main() -> int: p = argparse.ArgumentParser(description=__doc__.split("\n\n", 1)[0]) p.add_argument( - "--icon4py", required=True, type=Path, metavar="PATH", + "--icon4py", + required=True, + type=Path, + metavar="PATH", help=( "Path to icon4py checkout. Accepts BOTH absolute and relative " "paths. Relative paths are resolved against the current working " @@ -102,29 +102,39 @@ def main() -> int: ), ) p.add_argument( - "--gt4py", required=True, type=Path, metavar="PATH", + "--gt4py", + required=True, + type=Path, + metavar="PATH", help=( "Path to gt4py checkout to install editable. Accepts BOTH " "absolute and relative paths (resolved against cwd)." ), ) p.add_argument( - "--dace", type=Path, default=None, metavar="PATH", + "--dace", + type=Path, + default=None, + metavar="PATH", help=( "Optional path to dace checkout (absolute or relative). If " "omitted, dace resolves through icon4py's existing source pin." ), ) - p.add_argument("--no-lock", action="store_true", - help="Skip `uv lock`. Useful if you already locked.") - p.add_argument("--no-sync", action="store_true", - help="Skip `uv sync`. Useful for CI steps that sync later.") + p.add_argument( + "--no-lock", action="store_true", help="Skip `uv lock`. Useful if you already locked." + ) + p.add_argument( + "--no-sync", + action="store_true", + help="Skip `uv sync`. Useful for CI steps that sync later.", + ) args = p.parse_args() # Resolve every path NOW. The script can be run from any cwd. icon4py = args.icon4py.expanduser().resolve() - gt4py = args.gt4py.expanduser().resolve() - dace = args.dace.expanduser().resolve() if args.dace else None + gt4py = args.gt4py.expanduser().resolve() + dace = args.dace.expanduser().resolve() if args.dace else None pyproject = icon4py / "pyproject.toml" if not pyproject.is_file(): @@ -171,10 +181,10 @@ def main() -> int: print() print("done. quick sanity check:") print(' python -c "import gt4py.next; print(gt4py.next.__file__)"') - print(f' # should print a path inside {gt4py}') + print(f" # should print a path inside {gt4py}") if dace: print(' python -c "import dace; print(dace.__file__)"') - print(f' # should print a path inside {dace}') + print(f" # should print a path inside {dace}") return 0 diff --git a/ci/dace_deterministic_codegen/dace_deterministic_codegen.py b/ci/dace_deterministic_codegen/dace_deterministic_codegen.py index 402dcce18f..67b9923b46 100644 --- a/ci/dace_deterministic_codegen/dace_deterministic_codegen.py +++ b/ci/dace_deterministic_codegen/dace_deterministic_codegen.py @@ -95,6 +95,7 @@ # icon4py noxfile introspection # --------------------------------------------------------------------------- + class NoxfileIntrospectionError(RuntimeError): """Raised when we can't extract sessions/components from the noxfile.""" @@ -128,9 +129,7 @@ def introspect_icon4py_noxfile( try: tree = ast.parse(noxfile.read_text()) except SyntaxError as e: - raise NoxfileIntrospectionError( - f"could not parse {noxfile} as Python: {e}" - ) from e + raise NoxfileIntrospectionError(f"could not parse {noxfile} as Python: {e}") from e selections = _extract_literal_strings(tree, "ModelTestsSubset") subpackages = _extract_literal_strings(tree, "ModelSubpackagePath") @@ -159,7 +158,9 @@ def _extract_literal_strings(tree: ast.AST, alias_name: str) -> list[str]: continue if node.target.id != alias_name: continue - # Expect: value = Subscript(value=Name('Literal'), slice=Tuple(elts=[Constant, ...])) + # Match the AST pattern for Literal["a", "b", ...]: + # a Subscript whose value is the Name "Literal" and whose slice is + # either a Tuple of string Constants or a single string Constant. v = node.value if not isinstance(v, ast.Subscript): continue @@ -181,6 +182,7 @@ def _extract_literal_strings(tree: ast.AST, alias_name: str) -> list[str]: # Snapshot # --------------------------------------------------------------------------- + @dataclasses.dataclass(frozen=True) class FileEntry: relpath: str @@ -275,6 +277,7 @@ def _sha256(path: Path) -> str: # Compare # --------------------------------------------------------------------------- + @dataclasses.dataclass class ProgramResult: name: str @@ -294,27 +297,32 @@ def compare( s2 = snap2.get(name) if s1 is None or s2 is None: - results.append(ProgramResult( - name=name, match=False, differing_files=[], - only_in_run1=sorted((s1.files if s1 else {}).keys()), - only_in_run2=sorted((s2.files if s2 else {}).keys()), - )) + results.append( + ProgramResult( + name=name, + match=False, + differing_files=[], + only_in_run1=sorted((s1.files if s1 else {}).keys()), + only_in_run2=sorted((s2.files if s2 else {}).keys()), + ) + ) continue keys1, keys2 = set(s1.files), set(s2.files) only1 = sorted(keys1 - keys2) only2 = sorted(keys2 - keys1) differing = sorted( - rel for rel in keys1 & keys2 - if s1.files[rel].sha256 != s2.files[rel].sha256 + rel for rel in keys1 & keys2 if s1.files[rel].sha256 != s2.files[rel].sha256 + ) + results.append( + ProgramResult( + name=name, + match=not (differing or only1 or only2), + differing_files=differing, + only_in_run1=only1, + only_in_run2=only2, + ) ) - results.append(ProgramResult( - name=name, - match=not (differing or only1 or only2), - differing_files=differing, - only_in_run1=only1, - only_in_run2=only2, - )) return results @@ -322,6 +330,7 @@ def compare( # Diff + report # --------------------------------------------------------------------------- + def write_diffs( results: list[ProgramResult], snap1: dict[str, ProgramSnapshot], @@ -347,9 +356,15 @@ def write_diffs( f"binary content differs:\n run1: {f1}\n run2: {f2}\n" ) continue - udiff = "".join(difflib.unified_diff( - t1, t2, fromfile=f"run1/{rel}", tofile=f"run2/{rel}", n=3, - )) + udiff = "".join( + difflib.unified_diff( + t1, + t2, + fromfile=f"run1/{rel}", + tofile=f"run2/{rel}", + n=3, + ) + ) prog_dir.mkdir(parents=True, exist_ok=True) (prog_dir / f"{rel.replace('/', '__')}.diff").write_text(udiff) @@ -384,9 +399,16 @@ def render_report(results: list[ProgramResult]) -> str: # Nox runner # --------------------------------------------------------------------------- + def run_nox( - icon4py: Path, run_dir: Path, log_path: Path, - session: str, selection: str, component: str, python: str, posargs: list[str], + icon4py: Path, + run_dir: Path, + log_path: Path, + session: str, + selection: str, + component: str, + python: str, + posargs: list[str], ) -> int: """Run nox once with `GT4PY_BUILD_CACHE_DIR=run_dir`. Returns the exit code. @@ -418,8 +440,9 @@ def run_nox( "# ---\n" ) logf.flush() - proc = subprocess.run(argv, cwd=str(icon4py), env=env, - stdout=logf, stderr=subprocess.STDOUT) + proc = subprocess.run( + argv, cwd=str(icon4py), env=env, stdout=logf, stderr=subprocess.STDOUT + ) return proc.returncode @@ -427,6 +450,7 @@ def run_nox( # Workdir # --------------------------------------------------------------------------- + @dataclasses.dataclass class Workdir: """Two parent dirs for gt4py's cache + a place for logs/diffs/report.""" @@ -434,21 +458,36 @@ class Workdir: root: Path @property - def run1_dir(self) -> Path: return self.root / "run1" + def run1_dir(self) -> Path: + return self.root / "run1" + @property - def run2_dir(self) -> Path: return self.root / "run2" + def run2_dir(self) -> Path: + return self.root / "run2" + @property - def cache1(self) -> Path: return self.run1_dir / ".gt4py_cache" + def cache1(self) -> Path: + return self.run1_dir / ".gt4py_cache" + @property - def cache2(self) -> Path: return self.run2_dir / ".gt4py_cache" + def cache2(self) -> Path: + return self.run2_dir / ".gt4py_cache" + @property - def log1(self) -> Path: return self.run1_dir / "test.log" + def log1(self) -> Path: + return self.run1_dir / "test.log" + @property - def log2(self) -> Path: return self.run2_dir / "test.log" + def log2(self) -> Path: + return self.run2_dir / "test.log" + @property - def diffs(self) -> Path: return self.root / "diffs" + def diffs(self) -> Path: + return self.root / "diffs" + @property - def report(self) -> Path: return self.root / "report.txt" + def report(self) -> Path: + return self.root / "report.txt" def prepare(self) -> None: """Wipe stale state from previous invocations.""" @@ -465,6 +504,7 @@ def prepare(self) -> None: # CLI # --------------------------------------------------------------------------- + def parse_args(argv: Optional[list[str]] = None) -> argparse.Namespace: p = argparse.ArgumentParser( prog="dace_deterministic_codegen", @@ -475,7 +515,10 @@ def parse_args(argv: Optional[list[str]] = None) -> argparse.Namespace: formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) p.add_argument( - "--icon4py", required=True, type=Path, metavar="PATH", + "--icon4py", + required=True, + type=Path, + metavar="PATH", help=( "Path to icon4py checkout. Accepts BOTH absolute and relative " "paths. Relative paths are resolved against the current working " @@ -483,7 +526,9 @@ def parse_args(argv: Optional[list[str]] = None) -> argparse.Namespace: ), ) p.add_argument( - "--session", default="test_model", metavar="NAME", + "--session", + default="test_model", + metavar="NAME", help=( "Nox session name. Composed with --python/--selection/--component " "into the final session ID `-(, " @@ -491,25 +536,34 @@ def parse_args(argv: Optional[list[str]] = None) -> argparse.Namespace: ), ) p.add_argument( - "--selection", required=True, metavar="NAME", + "--selection", + required=True, + metavar="NAME", help=( "icon4py noxfile selection (e.g. stencils, datatest, basic). " "Validated at runtime against icon4py's actual noxfile." ), ) p.add_argument( - "--component", required=True, metavar="NAME", + "--component", + required=True, + metavar="NAME", help=( "icon4py noxfile subpackage leaf name (e.g. muphys, dycore). " "Validated at runtime against icon4py's actual noxfile." ), ) p.add_argument( - "--python", default="3.10", metavar="X.Y", + "--python", + default="3.10", + metavar="X.Y", help="Python version for the nox session.", ) p.add_argument( - "--workdir", type=Path, default=None, metavar="PATH", + "--workdir", + type=Path, + default=None, + metavar="PATH", help=( "Where run1/, run2/, diffs/, and report.txt are written. " "Accepts absolute or relative paths (resolved against cwd). " @@ -519,7 +573,11 @@ def parse_args(argv: Optional[list[str]] = None) -> argparse.Namespace: ), ) p.add_argument( - "--posarg", action="append", default=[], dest="posargs", metavar="ARG", + "--posarg", + action="append", + default=[], + dest="posargs", + metavar="ARG", help=( "Forwarded to pytest via `nox -- ARG`. Repeatable. " "Example: --posarg=--backend=dace_cpu --posarg=--grid=icon_regional" @@ -542,8 +600,10 @@ def main(argv: Optional[list[str]] = None) -> int: return 2 noxfile_path = icon4py / "noxfile.py" if not noxfile_path.is_file(): - print(f"error: no noxfile.py at {noxfile_path} — is --icon4py " - f"the icon4py repo root?", file=sys.stderr) + print( + f"error: no noxfile.py at {noxfile_path} — is --icon4py the icon4py repo root?", + file=sys.stderr, + ) return 2 # Introspect icon4py's noxfile to discover the legal selection / @@ -571,9 +631,7 @@ def main(argv: Optional[list[str]] = None) -> int: return 2 workdir_root = ( - args.workdir.expanduser().resolve() - if args.workdir is not None - else icon4py / WORKDIR_NAME + args.workdir.expanduser().resolve() if args.workdir is not None else icon4py / WORKDIR_NAME ) if args.workdir is not None and not args.workdir.is_absolute(): print(f"--workdir resolved to: {workdir_root}") @@ -584,20 +642,34 @@ def main(argv: Optional[list[str]] = None) -> int: # ----- Run 1 print(f"[1/2] nox -s '{session_id}' (cache: {workdir.run1_dir})", flush=True) - rc1 = run_nox(icon4py, workdir.run1_dir, workdir.log1, - args.session, args.selection, args.component, args.python, args.posargs) + rc1 = run_nox( + icon4py, + workdir.run1_dir, + workdir.log1, + args.session, + args.selection, + args.component, + args.python, + args.posargs, + ) if rc1 != 0: - print(f"error: run 1 failed (exit {rc1}). See log: {workdir.log1}", - file=sys.stderr) + print(f"error: run 1 failed (exit {rc1}). See log: {workdir.log1}", file=sys.stderr) return 4 # ----- Run 2 print(f"[2/2] nox -s '{session_id}' (cache: {workdir.run2_dir})", flush=True) - rc2 = run_nox(icon4py, workdir.run2_dir, workdir.log2, - args.session, args.selection, args.component, args.python, args.posargs) + rc2 = run_nox( + icon4py, + workdir.run2_dir, + workdir.log2, + args.session, + args.selection, + args.component, + args.python, + args.posargs, + ) if rc2 != 0: - print(f"error: run 2 failed (exit {rc2}). See log: {workdir.log2}", - file=sys.stderr) + print(f"error: run 2 failed (exit {rc2}). See log: {workdir.log2}", file=sys.stderr) return 4 # ----- Snapshot + compare + report @@ -615,8 +687,11 @@ def main(argv: Optional[list[str]] = None) -> int: print(f"workdir: {workdir.root}") if not results: - print(f"error: no programs observed in either run — check the logs:\n" - f" {workdir.log1}\n {workdir.log2}", file=sys.stderr) + print( + f"error: no programs observed in either run — check the logs:\n" + f" {workdir.log1}\n {workdir.log2}", + file=sys.stderr, + ) return 3 return 1 if any(not r.match for r in results) else 0 diff --git a/ci/dace_deterministic_codegen/run_in_ci.sh b/ci/dace_deterministic_codegen/run_in_ci.sh new file mode 100644 index 0000000000..b04cd1d366 --- /dev/null +++ b/ci/dace_deterministic_codegen/run_in_ci.sh @@ -0,0 +1,231 @@ +#!/usr/bin/env bash +# GT4Py - GridTools Framework +# +# Copyright (c) 2014-2024, ETH Zurich +# All rights reserved. +# +# Please, refer to the LICENSE file in the root directory. +# SPDX-License-Identifier: BSD-3-Clause + +# Driver for running the dace_deterministic_codegen harness in CI. +# +# Encapsulates the clone + bootstrap + harness invocation so the YAML +# stays minimal and the logic is easy to reproduce locally (just set +# the env vars and run the script). +# +# Required environment variables (CI sets all of these via job vars): +# GT4PY_PATH Existing gt4py checkout (the commit under test). +# ICON4PY_REPO Git URL to clone icon4py from. +# ICON4PY_REF Git ref (branch, tag, or SHA) to checkout. +# ICON4PY_PATH Where to clone icon4py to (created if missing). +# DACE_DETERMINISM_SELECTION icon4py noxfile selection: stencils|datatest|basic. +# DACE_DETERMINISM_COMPONENT icon4py subpackage leaf: muphys|dycore|... +# DACE_DETERMINISM_PYTHON Python version for the nox session: 3.10, 3.14, ... +# DACE_DETERMINISM_BACKEND dace_cpu | dace_gpu (passed to pytest as --backend=...) +# DACE_DETERMINISM_GRID Grid name passed to pytest as --grid=... +# +# Optional environment variables: +# DACE_REPO Git URL for a custom dace fork. When set, +# DACE_REF and DACE_PATH must also be set. +# DACE_REF Git ref of the custom dace branch. +# DACE_PATH Where to clone dace to (created if missing). +# DACE_DETERMINISM_WORKDIR Where run1/, run2/, diffs/, report.txt land. +# Default: ${ICON4PY_PATH}/_dace_deterministic_codegen +# DACE_DETERMINISM_ARTIFACT_DIR If set, the workdir is copied here at the end +# (success or failure). Set to a path under +# ${CI_PROJECT_DIR} for GitLab CI artifact upload. +# +# Custom dace branch behaviour: +# - If DACE_REPO is unset, dace lands in the nox session venv via +# icon4py's existing [tool.uv.sources] pin (currently the +# GridTools/pypi published wheel). The parent venv may have its +# own dace from the gt4py CI venv setup, but that's separate — +# nox creates a fresh isolated venv and uv sync's into it from +# icon4py's pyproject.toml. +# - If DACE_REPO is set, the dace repo is cloned at DACE_REF and +# icon4py's [tool.uv.sources] is patched to point at the clone. +# Both the parent venv (Step 2) and the nox session venv (Step 3 +# onwards, via the patched source pin) end up with editable dace +# from the same local path. +# +# Exit codes: passed through from dace_deterministic_codegen.py. +# 0 = deterministic, 1 = differs, 2/3/4 = harness errors. +# See the harness README for the full table. + +set -euo pipefail + +# --- Validate required env vars ------------------------------------------- +required=( + GT4PY_PATH + ICON4PY_REPO + ICON4PY_REF + ICON4PY_PATH + DACE_DETERMINISM_SELECTION + DACE_DETERMINISM_COMPONENT + DACE_DETERMINISM_PYTHON + DACE_DETERMINISM_BACKEND + DACE_DETERMINISM_GRID +) +missing=() +for v in "${required[@]}"; do + if [[ -z "${!v:-}" ]]; then + missing+=("$v") + fi +done +if (( ${#missing[@]} > 0 )); then + echo "error: missing required env vars: ${missing[*]}" >&2 + exit 2 +fi + +# Custom dace branch is all-or-nothing: setting one of the three +# DACE_* vars without the others would leave us in a half-configured +# state where it's unclear whether the local dace is supposed to win +# over icon4py's source pin. +if [[ -n "${DACE_REPO:-}" ]]; then + if [[ -z "${DACE_REF:-}" || -z "${DACE_PATH:-}" ]]; then + echo "error: DACE_REPO is set but DACE_REF and/or DACE_PATH are not." >&2 + echo " To use a custom dace branch, set all three together:" >&2 + echo " DACE_REPO - git URL of the dace fork" >&2 + echo " DACE_REF - branch, tag, or SHA to check out" >&2 + echo " DACE_PATH - where to clone dace (typically \${WORKDIR}/dace)" >&2 + exit 2 + fi +fi + +# Active venv check. The Docker image sets VIRTUAL_ENV; bare local runs +# might not. We don't auto-activate — that's the caller's responsibility — +# but warn if it's missing, since installing into the system Python is +# almost never what's wanted. +if [[ -z "${VIRTUAL_ENV:-}" ]]; then + echo "warning: VIRTUAL_ENV is not set. Activate the gt4py CI venv first" >&2 + echo " (the gt4py CI Docker image sets this automatically; only" >&2 + echo " relevant when running this script outside the CI image)." >&2 +fi + +DACE_DETERMINISM_WORKDIR_DEFAULT="${ICON4PY_PATH}/_dace_deterministic_codegen" +DACE_DETERMINISM_WORKDIR="${DACE_DETERMINISM_WORKDIR:-${DACE_DETERMINISM_WORKDIR_DEFAULT}}" + +HARNESS_DIR="${GT4PY_PATH}/ci/dace_deterministic_codegen" +HARNESS="${HARNESS_DIR}/dace_deterministic_codegen.py" +BOOTSTRAP="${HARNESS_DIR}/bootstrap_icon4py.py" + +if [[ ! -f "$HARNESS" ]]; then + echo "error: harness not found at $HARNESS" >&2 + echo " (is GT4PY_PATH=$GT4PY_PATH the gt4py repo root?)" >&2 + exit 2 +fi +if [[ ! -f "$BOOTSTRAP" ]]; then + echo "error: bootstrap not found at $BOOTSTRAP" >&2 + exit 2 +fi + +# --- Helper: shallow-clone a repo at a ref, with SHA fallback ------------ +# Some git versions can't combine --depth 1 with arbitrary commit SHAs in +# `clone -b`. If -b fails, fall back to a full clone + explicit checkout. +clone_at_ref() { + local repo="$1" ref="$2" dest="$3" label="$4" + if [[ -d "${dest}/.git" ]]; then + echo " (${label} already cloned at ${dest}; fetching ${ref})" + git -C "${dest}" fetch --depth 1 origin "${ref}" + git -C "${dest}" checkout FETCH_HEAD + return + fi + if ! git clone --depth 1 -b "${ref}" "${repo}" "${dest}" 2>/dev/null; then + echo " (-b ${ref} failed; ${ref} may be a SHA — doing full clone + checkout)" + git clone "${repo}" "${dest}" + git -C "${dest}" checkout "${ref}" + fi +} + +# --- Step 1: clone icon4py at the pinned ref ----------------------------- +echo "==> [1/4] cloning icon4py @ ${ICON4PY_REF} from ${ICON4PY_REPO}" +clone_at_ref "${ICON4PY_REPO}" "${ICON4PY_REF}" "${ICON4PY_PATH}" "icon4py" + +# --- Step 1b (optional): clone custom dace ------------------------------- +if [[ -n "${DACE_REPO:-}" ]]; then + echo "==> [1b/4] cloning dace @ ${DACE_REF} from ${DACE_REPO}" + clone_at_ref "${DACE_REPO}" "${DACE_REF}" "${DACE_PATH}" "dace" +fi + +# --- Step 2: install editable gt4py (+ dace) + tomli_w into the venv ----- +# The gt4py CI Docker image already has gt4py's deps installed via uv +# sync --no-install-project. We add gt4py itself (editable, pointing at +# our checkout), tomli_w (which bootstrap_icon4py.py imports), and +# optionally dace (editable, when a custom branch is being tested). +# --no-deps skips re-resolving heavy transitive deps; the icon4py +# bootstrap below will handle anything missing via uv sync --active. +if [[ -n "${DACE_PATH:-}" ]]; then + echo "==> [2/4] installing editable gt4py + dace + tomli_w into ${VIRTUAL_ENV:-system Python}" +else + echo "==> [2/4] installing editable gt4py + tomli_w into ${VIRTUAL_ENV:-system Python}" +fi +uv pip install --no-deps -e "${GT4PY_PATH}" +if [[ -n "${DACE_PATH:-}" ]]; then + uv pip install --no-deps -e "${DACE_PATH}" +fi +uv pip install tomli_w + +# --- Step 3: bootstrap icon4py into the active venv ---------------------- +# Patches icon4py's [tool.uv.sources] so gt4py (and optionally dace) +# resolve to our local checkouts, then `uv lock` + `uv sync --active`. +# This is what makes the editable installs survive when icon4py's noxfile +# creates its session venv and runs `uv sync` inside it — that uv sync +# sees the patched source pins and installs editable from the same paths. +echo "==> [3/4] bootstrapping icon4py into the active venv" +bootstrap_args=( --icon4py "${ICON4PY_PATH}" --gt4py "${GT4PY_PATH}" ) +if [[ -n "${DACE_PATH:-}" ]]; then + bootstrap_args+=( --dace "${DACE_PATH}" ) +fi +python "${BOOTSTRAP}" "${bootstrap_args[@]}" + +# --- Step 4: run the determinism harness --------------------------------- +echo "==> [4/4] running the determinism harness" +echo " selection=${DACE_DETERMINISM_SELECTION} component=${DACE_DETERMINISM_COMPONENT}" +echo " python=${DACE_DETERMINISM_PYTHON} backend=${DACE_DETERMINISM_BACKEND} grid=${DACE_DETERMINISM_GRID}" +echo " workdir=${DACE_DETERMINISM_WORKDIR}" + +# Run with `set +e` and capture the exit code so the artifact-copy step +# below runs whether the harness reported determinism, non-determinism, +# or a tooling error. The harness is the source of truth on the exit +# code; we just defer reacting to it. +set +e +python "${HARNESS}" \ + --icon4py "${ICON4PY_PATH}" \ + --selection "${DACE_DETERMINISM_SELECTION}" \ + --component "${DACE_DETERMINISM_COMPONENT}" \ + --python "${DACE_DETERMINISM_PYTHON}" \ + --workdir "${DACE_DETERMINISM_WORKDIR}" \ + --posarg=--backend="${DACE_DETERMINISM_BACKEND}" \ + --posarg=--grid="${DACE_DETERMINISM_GRID}" +harness_rc=$? +set -e + +# --- Step 5 (optional): publish artifacts -------------------------------- +# If DACE_DETERMINISM_ARTIFACT_DIR is set (typically in CI to a path +# under ${CI_PROJECT_DIR}), copy the workdir there so GitLab can pick +# it up as a build artifact. We do this whether the harness passed or +# failed — both outcomes have a useful report.txt. +if [[ -n "${DACE_DETERMINISM_ARTIFACT_DIR:-}" ]]; then + echo "==> publishing artifacts to ${DACE_DETERMINISM_ARTIFACT_DIR}" + rm -rf "${DACE_DETERMINISM_ARTIFACT_DIR}" + mkdir -p "$(dirname "${DACE_DETERMINISM_ARTIFACT_DIR}")" + if [[ -d "${DACE_DETERMINISM_WORKDIR}" ]]; then + cp -r "${DACE_DETERMINISM_WORKDIR}" "${DACE_DETERMINISM_ARTIFACT_DIR}" + else + # Harness errored before creating the workdir — leave a note so + # the artifact upload still has something for diagnosis from the + # GitLab UI without ssh'ing to the runner. + mkdir -p "${DACE_DETERMINISM_ARTIFACT_DIR}" + cat > "${DACE_DETERMINISM_ARTIFACT_DIR}/MISSING_WORKDIR.txt" < Date: Mon, 4 May 2026 08:05:48 +0200 Subject: [PATCH 03/28] Infrastructure to test DaCe's codegen (in)deterministic behavior: CI/CD integration [WIP] --- ci/cscs-ci-dace-determinism.yml | 2 +- ci/dace_deterministic_codegen/README.md | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/ci/cscs-ci-dace-determinism.yml b/ci/cscs-ci-dace-determinism.yml index b71e209211..6742fbf82f 100644 --- a/ci/cscs-ci-dace-determinism.yml +++ b/ci/cscs-ci-dace-determinism.yml @@ -52,7 +52,7 @@ variables: # DACE_REPO: 'https://github.com/GridTools/dace.git' # DACE_REF: 'dace_toolchain_deterministic' DACE_REPO: '' - DACE_REF: '' + DACE_REF: '' # Shared template for all dace-determinism jobs .dace_determinism_common: diff --git a/ci/dace_deterministic_codegen/README.md b/ci/dace_deterministic_codegen/README.md index 371ce446ba..9dababdf57 100644 --- a/ci/dace_deterministic_codegen/README.md +++ b/ci/dace_deterministic_codegen/README.md @@ -147,12 +147,12 @@ across invocations, copy the directory before re-running. ## Exit codes -| Code | Meaning | -|------|---------| -| 0 | Codegen is deterministic. | -| 1 | Codegen differs (see `report.txt` and `diffs/`). | -| 2 | Bad arguments (path doesn't exist, missing noxfile, …). | -| 3 | No programs observed in either run (test selection collected nothing). | +| Code | Meaning | +| ---- | ------------------------------------------------------------------------- | +| 0 | Codegen is deterministic. | +| 1 | Codegen differs (see `report.txt` and `diffs/`). | +| 2 | Bad arguments (path doesn't exist, missing noxfile, …). | +| 3 | No programs observed in either run (test selection collected nothing). | | 4 | A `nox` invocation itself failed (see `run1/test.log` / `run2/test.log`). | ## Flags From 09842978fab24684ee95880f1607b5667898e6c8 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Mon, 18 May 2026 14:37:38 +0200 Subject: [PATCH 04/28] WIP: wording --- ci/cscs-ci-dace-determinism.yml | 2 +- ci/dace_deterministic_codegen/README.md | 12 +++--- .../bootstrap_icon4py.py | 2 +- .../dace_deterministic_codegen.py | 12 +++--- ci/dace_deterministic_codegen/run_in_ci.sh | 38 +++++++++---------- 5 files changed, 33 insertions(+), 33 deletions(-) diff --git a/ci/cscs-ci-dace-determinism.yml b/ci/cscs-ci-dace-determinism.yml index 6742fbf82f..f04b6fa17e 100644 --- a/ci/cscs-ci-dace-determinism.yml +++ b/ci/cscs-ci-dace-determinism.yml @@ -18,7 +18,7 @@ # that selection. # # The actual logic lives in: -# ci/dace_deterministic_codegen/dace_deterministic_codegen.py (harness) +# ci/dace_deterministic_codegen/dace_deterministic_codegen.py (checker) # ci/dace_deterministic_codegen/bootstrap_icon4py.py (venv prep) # ci/dace_deterministic_codegen/run_in_ci.sh (CI driver) # diff --git a/ci/dace_deterministic_codegen/README.md b/ci/dace_deterministic_codegen/README.md index 9dababdf57..c5073cb352 100644 --- a/ci/dace_deterministic_codegen/README.md +++ b/ci/dace_deterministic_codegen/README.md @@ -7,15 +7,15 @@ runs. Exit 0 = identical (deterministic), exit 1 = different. Currently supports the **cpu**, **cuda**, and **HIP** dace backends. HIP is supported transparently: dace emits HIP code under `src/cuda/hip/` -(target_name="cuda", target_type="hip"), and the harness's recursive +(target_name="cuda", target_type="hip"), and the checker's recursive sweep of `src/cuda/` picks it up automatically. If a run emits anything -else under `src/` (mpi, sve, mlir, snitch, …) the harness fails +else under `src/` (mpi, sve, mlir, snitch, …) the checker fails immediately with a clear message — silently ignoring an unfamiliar backend would mean reporting "deterministic" without actually checking the relevant code. Valid `--selection` and `--component` values are read from icon4py's -own `noxfile.py` at runtime — no hardcoding here, so the harness +own `noxfile.py` at runtime — no hardcoding here, so the checker auto-tracks any future changes to icon4py's parametrization. Mirrors icon4py's `ci/dace.yml`, with the session name configurable: @@ -87,7 +87,7 @@ from icon4py's `noxfile.py` at runtime. As of icon4py main, that's: - `--component`: `advection`, `diffusion`, `dycore`, `microphysics`, `muphys`, `common`, `driver`, `standalone_driver`, `testing` -If icon4py adds or renames these, the harness picks it up automatically; +If icon4py adds or renames these, the checker picks it up automatically; no update needed here. If you pass an invalid value, the error message lists the actual valid set extracted from your icon4py checkout. @@ -173,11 +173,11 @@ across invocations, copy the directory before re-running. ## CI integration -The harness runs in CSCS CI as a separate `dace-determinism` stage, +The checker runs in CSCS CI as a separate `dace-determinism` stage, defined in `ci/cscs-ci-dace-determinism.yml` and wired into the pipeline via `ci/cscs-ci.yml`. A small driver script, `ci/dace_deterministic_codegen/run_in_ci.sh`, encapsulates the -clone + bootstrap + harness invocation so the YAML stays minimal and +clone + bootstrap + checker invocation so the YAML stays minimal and the same flow can be reproduced locally. ### Reproducing a CI run locally diff --git a/ci/dace_deterministic_codegen/bootstrap_icon4py.py b/ci/dace_deterministic_codegen/bootstrap_icon4py.py index 87a8fd125a..6a743c5c35 100644 --- a/ci/dace_deterministic_codegen/bootstrap_icon4py.py +++ b/ci/dace_deterministic_codegen/bootstrap_icon4py.py @@ -16,7 +16,7 @@ This is what makes the editable gt4py / dace branches survive everything downstream — including the icon4py noxfile's own `uv sync` call when our -dace_deterministic_codegen harness runs `nox --no-venv`. +dace_deterministic_codegen checker runs `nox --no-venv`. Usage (run from anywhere): diff --git a/ci/dace_deterministic_codegen/dace_deterministic_codegen.py b/ci/dace_deterministic_codegen/dace_deterministic_codegen.py index 67b9923b46..5fd908b3b2 100644 --- a/ci/dace_deterministic_codegen/dace_deterministic_codegen.py +++ b/ci/dace_deterministic_codegen/dace_deterministic_codegen.py @@ -17,11 +17,11 @@ Compares only the contents of `/src/` — the actual generated backend code. Currently supports cpu, cuda, and hip (hip is emitted by dace under `src/cuda/hip/`). Any other top-level backend under `src/` -(mpi, sve, mlir, snitch, …) causes the harness to fail with a clear +(mpi, sve, mlir, snitch, …) causes the checker to fail with a clear message rather than silently ignore it. Valid `--selection` and `--component` values are read from icon4py's -own `noxfile.py` at runtime (no hardcoding here), so the harness +own `noxfile.py` at runtime (no hardcoding here), so the checker tracks any future changes to icon4py's parametrization automatically. Mirrors icon4py's `ci/dace.yml` invocation pattern, with the session @@ -83,7 +83,7 @@ #: `cuda/` (with `rglob` picking up the nested hip files). #: #: If a snapshot ever encounters another top-level backend (mpi, sve, -#: mlir, snitch, …), the harness fails loudly rather than silently +#: mlir, snitch, …), the checker fails loudly rather than silently #: ignoring — those would need explicit support added here. SUPPORTED_BACKENDS: frozenset[str] = frozenset({"cpu", "cuda"}) @@ -246,7 +246,7 @@ def snapshot_run(cache_root: Path) -> dict[str, ProgramSnapshot]: if bd.name not in SUPPORTED_BACKENDS: raise UnsupportedBackendError( f"unsupported dace backend `{bd.name}/` found under " - f"{src_root} — this harness currently supports " + f"{src_root} — this checker currently supports " f"{sorted(SUPPORTED_BACKENDS)} as top-level backends " f"(HIP is handled under `cuda/hip/`). Add explicit " f"support in dace_deterministic_codegen.py before " @@ -589,7 +589,7 @@ def parse_args(argv: Optional[list[str]] = None) -> argparse.Namespace: def main(argv: Optional[list[str]] = None) -> int: args = parse_args(argv) - # Resolve every path to absolute up-front, so the harness can be run + # Resolve every path to absolute up-front, so the checker can be run # from any cwd. We print what the path resolved to — `--icon4py ../foo` # behaves intuitively but it's nice to confirm what it landed on. icon4py = args.icon4py.expanduser().resolve() @@ -607,7 +607,7 @@ def main(argv: Optional[list[str]] = None) -> int: return 2 # Introspect icon4py's noxfile to discover the legal selection / - # component values. This avoids hardcoding the lists, so the harness + # component values. This avoids hardcoding the lists, so the checker # auto-tracks any future changes to icon4py's noxfile structure. try: valid_selections, valid_components = introspect_icon4py_noxfile(noxfile_path) diff --git a/ci/dace_deterministic_codegen/run_in_ci.sh b/ci/dace_deterministic_codegen/run_in_ci.sh index b04cd1d366..6d9e3ec4c0 100644 --- a/ci/dace_deterministic_codegen/run_in_ci.sh +++ b/ci/dace_deterministic_codegen/run_in_ci.sh @@ -7,9 +7,9 @@ # Please, refer to the LICENSE file in the root directory. # SPDX-License-Identifier: BSD-3-Clause -# Driver for running the dace_deterministic_codegen harness in CI. +# Driver for running the dace_deterministic_codegen checker in CI. # -# Encapsulates the clone + bootstrap + harness invocation so the YAML +# Encapsulates the clone + bootstrap + checker invocation so the YAML # stays minimal and the logic is easy to reproduce locally (just set # the env vars and run the script). # @@ -49,8 +49,8 @@ # from the same local path. # # Exit codes: passed through from dace_deterministic_codegen.py. -# 0 = deterministic, 1 = differs, 2/3/4 = harness errors. -# See the harness README for the full table. +# 0 = deterministic, 1 = differs, 2/3/4 = checker errors. +# See the checker README for the full table. set -euo pipefail @@ -105,12 +105,12 @@ fi DACE_DETERMINISM_WORKDIR_DEFAULT="${ICON4PY_PATH}/_dace_deterministic_codegen" DACE_DETERMINISM_WORKDIR="${DACE_DETERMINISM_WORKDIR:-${DACE_DETERMINISM_WORKDIR_DEFAULT}}" -HARNESS_DIR="${GT4PY_PATH}/ci/dace_deterministic_codegen" -HARNESS="${HARNESS_DIR}/dace_deterministic_codegen.py" -BOOTSTRAP="${HARNESS_DIR}/bootstrap_icon4py.py" +CHECKER_DIR="${GT4PY_PATH}/ci/dace_deterministic_codegen" +CHECKER="${CHECKER_DIR}/dace_deterministic_codegen.py" +BOOTSTRAP="${CHECKER_DIR}/bootstrap_icon4py.py" -if [[ ! -f "$HARNESS" ]]; then - echo "error: harness not found at $HARNESS" >&2 +if [[ ! -f "$CHECKER" ]]; then + echo "error: checker not found at $CHECKER" >&2 echo " (is GT4PY_PATH=$GT4PY_PATH the gt4py repo root?)" >&2 exit 2 fi @@ -178,18 +178,18 @@ if [[ -n "${DACE_PATH:-}" ]]; then fi python "${BOOTSTRAP}" "${bootstrap_args[@]}" -# --- Step 4: run the determinism harness --------------------------------- -echo "==> [4/4] running the determinism harness" +# --- Step 4: run the determinism checker --------------------------------- +echo "==> [4/4] running the determinism checker" echo " selection=${DACE_DETERMINISM_SELECTION} component=${DACE_DETERMINISM_COMPONENT}" echo " python=${DACE_DETERMINISM_PYTHON} backend=${DACE_DETERMINISM_BACKEND} grid=${DACE_DETERMINISM_GRID}" echo " workdir=${DACE_DETERMINISM_WORKDIR}" # Run with `set +e` and capture the exit code so the artifact-copy step -# below runs whether the harness reported determinism, non-determinism, -# or a tooling error. The harness is the source of truth on the exit +# below runs whether the checker reported determinism, non-determinism, +# or a tooling error. The checker is the source of truth on the exit # code; we just defer reacting to it. set +e -python "${HARNESS}" \ +python "${CHECKER}" \ --icon4py "${ICON4PY_PATH}" \ --selection "${DACE_DETERMINISM_SELECTION}" \ --component "${DACE_DETERMINISM_COMPONENT}" \ @@ -197,13 +197,13 @@ python "${HARNESS}" \ --workdir "${DACE_DETERMINISM_WORKDIR}" \ --posarg=--backend="${DACE_DETERMINISM_BACKEND}" \ --posarg=--grid="${DACE_DETERMINISM_GRID}" -harness_rc=$? +checker_rc=$? set -e # --- Step 5 (optional): publish artifacts -------------------------------- # If DACE_DETERMINISM_ARTIFACT_DIR is set (typically in CI to a path # under ${CI_PROJECT_DIR}), copy the workdir there so GitLab can pick -# it up as a build artifact. We do this whether the harness passed or +# it up as a build artifact. We do this whether the checker passed or # failed — both outcomes have a useful report.txt. if [[ -n "${DACE_DETERMINISM_ARTIFACT_DIR:-}" ]]; then echo "==> publishing artifacts to ${DACE_DETERMINISM_ARTIFACT_DIR}" @@ -212,12 +212,12 @@ if [[ -n "${DACE_DETERMINISM_ARTIFACT_DIR:-}" ]]; then if [[ -d "${DACE_DETERMINISM_WORKDIR}" ]]; then cp -r "${DACE_DETERMINISM_WORKDIR}" "${DACE_DETERMINISM_ARTIFACT_DIR}" else - # Harness errored before creating the workdir — leave a note so + # Checker errored before creating the workdir — leave a note so # the artifact upload still has something for diagnosis from the # GitLab UI without ssh'ing to the runner. mkdir -p "${DACE_DETERMINISM_ARTIFACT_DIR}" cat > "${DACE_DETERMINISM_ARTIFACT_DIR}/MISSING_WORKDIR.txt" < Date: Tue, 19 May 2026 14:14:34 +0200 Subject: [PATCH 05/28] WIP: wording --- ci/dace_deterministic_codegen/README.md | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/ci/dace_deterministic_codegen/README.md b/ci/dace_deterministic_codegen/README.md index c5073cb352..f71afeb841 100644 --- a/ci/dace_deterministic_codegen/README.md +++ b/ci/dace_deterministic_codegen/README.md @@ -5,14 +5,19 @@ through `nox` **twice** with isolated gt4py build caches, then compares the generated source code under each program's `src/` between the two runs. Exit 0 = identical (deterministic), exit 1 = different. -Currently supports the **cpu**, **cuda**, and **HIP** dace backends. -HIP is supported transparently: dace emits HIP code under `src/cuda/hip/` -(target_name="cuda", target_type="hip"), and the checker's recursive -sweep of `src/cuda/` picks it up automatically. If a run emits anything -else under `src/` (mpi, sve, mlir, snitch, …) the checker fails -immediately with a clear message — silently ignoring an unfamiliar -backend would mean reporting "deterministic" without actually checking -the relevant code. +The check compares **everything** generated under `/src/`, +recursively. On a typical run that includes both the CPU host glue +(`cpu/.cpp`) and the device kernels (`cuda/.cu` for CUDA, +`cuda/hip/.cpp` for HIP — dace nests HIP one level inside `cuda/` +via `target_name="cuda"`, `target_type="hip"`). All of it gets hashed +and diffed: a regression in the host glue is just as much a determinism +failure as one in the device kernel. + +The top-level layout under `src/` is gated to the dace backends we've +verified: `cpu/` and `cuda/` (with HIP picked up automatically inside +`cuda/`). If a run emits anything else there the checker fails immediately +with a clear message rather than silently ignoring the unfamiliar directory +and reporting a false "deterministic". Valid `--selection` and `--component` values are read from icon4py's own `noxfile.py` at runtime — no hardcoding here, so the checker From 02cacd1046de30b9e7e31c35147fde5b509cb010 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Tue, 19 May 2026 15:50:26 +0200 Subject: [PATCH 06/28] Testing infrastructure in Santis/Beverin: WIP --- ci/cscs-ci-dace-determinism.yml | 6 ++++++ ci/dace_deterministic_codegen/README.md | 9 +++++++++ .../bootstrap_icon4py.py | 7 ++++++- ci/dace_deterministic_codegen/run_in_ci.sh | 16 ++++++++++++++++ 4 files changed, 37 insertions(+), 1 deletion(-) diff --git a/ci/cscs-ci-dace-determinism.yml b/ci/cscs-ci-dace-determinism.yml index f04b6fa17e..b93017aa1b 100644 --- a/ci/cscs-ci-dace-determinism.yml +++ b/ci/cscs-ci-dace-determinism.yml @@ -101,6 +101,12 @@ dace_determinism_cscs_gh200_cuda: - PY_VERSION: '3.10' variables: DACE_DETERMINISM_BACKEND: 'dace_gpu' + # Inject icon4py's `cuda12` extra into the nox session venv. Without + # this, cupy isn't installed in the session, `gtx.CUPY_DEVICE_TYPE` + # resolves to None, and the pytest fixture explodes at + # `get_allocator(None)`. Mirrors icon4py's own ci/base.yml:107 for + # the Santis GH200 (aarch64) test template. + DACE_DETERMINISM_NOX_EXTRAS: 'cuda12' SLURM_GPUS_PER_NODE: 1 SLURM_PARTITION: 'shared' GT4PY_BUILD_JOBS: 8 diff --git a/ci/dace_deterministic_codegen/README.md b/ci/dace_deterministic_codegen/README.md index f71afeb841..9c3c2976f8 100644 --- a/ci/dace_deterministic_codegen/README.md +++ b/ci/dace_deterministic_codegen/README.md @@ -211,5 +211,14 @@ export DACE_DETERMINISM_PYTHON=3.10 export DACE_DETERMINISM_BACKEND=dace_cpu export DACE_DETERMINISM_GRID=icon_regional +# GPU runs only: inject icon4py's GPU extra into the nox session venv. +# Without this, cupy isn't installed in the session venv, so +# gt4py's `CUPY_DEVICE_TYPE` is None and icon4py's `GPU` constant +# (model_backends.py:19) is None, and the pytest fixture fails at +# get_allocator(None). cuda12/13 on Santis GH200, rocm6/7 on AMD. +# The driver script forwards this to icon4py's +# ICON4PY_NOX_UV_CUSTOM_SESSION_EXTRAS (noxfile.py). +# export DACE_DETERMINISM_NOX_EXTRAS=... + bash $GT4PY_PATH/ci/dace_deterministic_codegen/run_in_ci.sh ``` diff --git a/ci/dace_deterministic_codegen/bootstrap_icon4py.py b/ci/dace_deterministic_codegen/bootstrap_icon4py.py index 6a743c5c35..b4e5f29689 100644 --- a/ci/dace_deterministic_codegen/bootstrap_icon4py.py +++ b/ci/dace_deterministic_codegen/bootstrap_icon4py.py @@ -89,7 +89,12 @@ def run(cmd: list[str], cwd: Path) -> None: def main() -> int: - p = argparse.ArgumentParser(description=__doc__.split("\n\n", 1)[0]) + description = ( + __doc__.split("\n\n", 1)[0] + if __doc__ + else "Bootstrap icon4py into the currently activated venv (the gt4py CI venv)." + ) + p = argparse.ArgumentParser(description=description) p.add_argument( "--icon4py", required=True, diff --git a/ci/dace_deterministic_codegen/run_in_ci.sh b/ci/dace_deterministic_codegen/run_in_ci.sh index 6d9e3ec4c0..22462f4b94 100644 --- a/ci/dace_deterministic_codegen/run_in_ci.sh +++ b/ci/dace_deterministic_codegen/run_in_ci.sh @@ -34,6 +34,17 @@ # DACE_DETERMINISM_ARTIFACT_DIR If set, the workdir is copied here at the end # (success or failure). Set to a path under # ${CI_PROJECT_DIR} for GitLab CI artifact upload. +# DACE_DETERMINISM_NOX_EXTRAS Extra icon4py optional-dependency groups to +# inject into the nox session venv's `uv sync` +# (e.g. "cuda12/13" on Santis GH200, "rocm6/7" on +# AMD). Required for dace_gpu / gtfn_gpu runs: +# icon4py's `model_backends.py` reads +# `gtx.CUPY_DEVICE_TYPE`, which is `None` +# unless cupy is installed in the nox venv, +# which only happens via the cuda12/13/ +# rocm6/7 extra. Forwarded verbatim to icon4py's +# `ICON4PY_NOX_UV_CUSTOM_SESSION_EXTRAS` +# (noxfile.py). Leave unset for dace_cpu. # # Custom dace branch behaviour: # - If DACE_REPO is unset, dace lands in the nox session venv via @@ -184,6 +195,11 @@ echo " selection=${DACE_DETERMINISM_SELECTION} component=${DACE_DETERMINISM_C echo " python=${DACE_DETERMINISM_PYTHON} backend=${DACE_DETERMINISM_BACKEND} grid=${DACE_DETERMINISM_GRID}" echo " workdir=${DACE_DETERMINISM_WORKDIR}" +if [[ -n "${DACE_DETERMINISM_NOX_EXTRAS:-}" ]]; then + echo " nox_extras=${DACE_DETERMINISM_NOX_EXTRAS}" + export ICON4PY_NOX_UV_CUSTOM_SESSION_EXTRAS="${DACE_DETERMINISM_NOX_EXTRAS}" +fi + # Run with `set +e` and capture the exit code so the artifact-copy step # below runs whether the checker reported determinism, non-determinism, # or a tooling error. The checker is the source of truth on the exit From 801fc345898d7e075853d67db85448efdf06edb2 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Wed, 20 May 2026 16:15:03 +0200 Subject: [PATCH 07/28] Infrastructure to test DaCe's codegen (in)deterministic behavior: Test ONLY GT4Py tests (next & cartesian) --- ci/cscs-ci-dace-determinism.yml | 110 +-- ci/dace_deterministic_codegen/README.md | 224 ------ .../bootstrap_icon4py.py | 197 ----- .../dace_deterministic_codegen.py | 701 ------------------ ci/dace_deterministic_codegen/run_in_ci.sh | 247 ------ noxfile.py | 204 +++++ scripts/dace_deterministic_codegen.py | 533 +++++++++++++ 7 files changed, 778 insertions(+), 1438 deletions(-) delete mode 100644 ci/dace_deterministic_codegen/README.md delete mode 100644 ci/dace_deterministic_codegen/bootstrap_icon4py.py delete mode 100644 ci/dace_deterministic_codegen/dace_deterministic_codegen.py delete mode 100644 ci/dace_deterministic_codegen/run_in_ci.sh create mode 100644 scripts/dace_deterministic_codegen.py diff --git a/ci/cscs-ci-dace-determinism.yml b/ci/cscs-ci-dace-determinism.yml index b93017aa1b..aa3b482440 100644 --- a/ci/cscs-ci-dace-determinism.yml +++ b/ci/cscs-ci-dace-determinism.yml @@ -8,22 +8,30 @@ # SPDX-License-Identifier: BSD-3-Clause # -# DaCe codegen determinism check (icon4py-driven) -# =============================================== +# DaCe codegen determinism check +# ============================== +# +# Drives gt4py's `test_*_determinism` nox sessions, each of which +# runs pytest twice with isolated GT4PY_BUILD_CACHE_DIR and asserts +# the DaCe-generated source files under /src/ are +# byte-identical between the two runs. # -# Drives an icon4py test selection through nox twice with isolated -# GT4PY_BUILD_CACHE_DIR per run, then asserts the generated source -# under each /src/ is byte-identical between the two runs. # A diff means the gt4py + dace toolchain is non-deterministic for -# that selection. +# that test selection. +# +# Logic lives in: +# noxfile.py (test_*_determinism sessions) +# scripts/dace_deterministic_codegen.py (cache comparison lib + CLI) # -# The actual logic lives in: -# ci/dace_deterministic_codegen/dace_deterministic_codegen.py (checker) -# ci/dace_deterministic_codegen/bootstrap_icon4py.py (venv prep) -# ci/dace_deterministic_codegen/run_in_ci.sh (CI driver) +# This file just wires those sessions into GitLab CI: when to run, on +# which runners, and with which (session, device) cells. # -# This file just wires those into GitLab CI: when to run, on which -# runners, with which selection × component matrix. +# Run policy +# ---------- +# - Scheduled (nightly) on `main`. +# - Manual via "Run Pipeline" from the GitLab UI on any branch. +# - NOT on every commit. Each cell is two end-to-end pytest runs; +# the cost vs. signal for "every PR" is bad. # # Failure semantics # ----------------- @@ -31,65 +39,40 @@ # surface regressions on the dashboard without gating merges. Drop # `allow_failure` once we have a sustained green stretch on `main`. # -# Custom dace branch -# ------------------ -# To test a dace fork/branch (e.g. unmerged determinism work), set -# DACE_REPO and DACE_REF in the variables block below. When unset, -# dace resolves through icon4py's existing source pin (currently the -# GridTools/pypi published wheel). See run_in_ci.sh for the details. +# Adding new matrix cells +# ----------------------- +# The (subpackage, device, meshlib) dimension uses GitLab's +# `parallel.matrix` via NOX_SESSION. Adding cells is a one-line +# change to the relevant matrix block. stages: - dace-determinism -variables: - ICON4PY_REPO: 'https://github.com/C2SM/icon4py.git' - ICON4PY_REF: 'main' - - # Custom dace fork/branch under test. Leave both empty to resolve - # dace through icon4py's existing source pin. Set both together to - # install editable dace from a clone of DACE_REPO @ DACE_REF. - # Example for the deterministic codegen branch: - # DACE_REPO: 'https://github.com/GridTools/dace.git' - # DACE_REF: 'dace_toolchain_deterministic' - DACE_REPO: '' - DACE_REF: '' - -# Shared template for all dace-determinism jobs +# Shared template for all dace-determinism jobs. .dace_determinism_common: stage: dace-determinism image: ${CSCS_REGISTRY_PATH}/public/${ARCH}/base/gt4py-ci-${PY_VERSION}:${DOCKER_TAG} variables: PY_VERSION: '3.10' - DACE_DETERMINISM_PYTHON: '${PY_VERSION}' - DACE_DETERMINISM_SELECTION: 'stencils' - DACE_DETERMINISM_GRID: 'icon_regional' SLURM_JOB_NUM_NODES: 1 SLURM_TIMELIMIT: 30 - # Don't block merges on determinism regressions — yet. Flip to - # `false` once the toolchain is reliably converged. + rules: + # Schedule + manual web trigger only. No on-push, no on-MR. + - if: $CI_PIPELINE_SOURCE == "schedule" + - if: $CI_PIPELINE_SOURCE == "web" + - when: never allow_failure: true artifacts: when: always paths: - - _dace_deterministic_codegen/${DACE_DETERMINISM_BACKEND}/${DACE_DETERMINISM_COMPONENT}/ + - _dace_deterministic_codegen/ expire_in: 1 month script: - mkdir -p "${WORKDIR}/gt4py" && git clone --depth 1 "${CSCS_CI_ORIG_CLONE_URL}" "${WORKDIR}/gt4py" - cd "${WORKDIR}/gt4py" && git fetch --depth 1 origin "${CI_COMMIT_SHA}" && git checkout "${CI_COMMIT_SHA}" - # The gt4py CI Docker image already sets VIRTUAL_ENV and prepends - # ${VIRTUAL_ENV}/bin to PATH (see ci/Dockerfile), so - # `python`, `uv`, `nox` etc. resolve to the venv binaries without - # sourcing activate. Matches the .test_common pattern. - - export GT4PY_PATH="${WORKDIR}/gt4py" - - export ICON4PY_PATH="${WORKDIR}/icon4py" - # Custom dace branch: only set DACE_PATH if DACE_REPO is non-empty. - # run_in_ci.sh treats DACE_PATH (along with DACE_REPO/DACE_REF) as - # the trigger to clone + install dace editable. - - if [ -n "${DACE_REPO}" ]; then export DACE_PATH="${WORKDIR}/dace"; fi - # Per-cell artifact subdirectory (matches artifacts.paths above). - - export DACE_DETERMINISM_ARTIFACT_DIR="${CI_PROJECT_DIR}/_dace_deterministic_codegen/${DACE_DETERMINISM_BACKEND}/${DACE_DETERMINISM_COMPONENT}" - - bash "${WORKDIR}/gt4py/ci/dace_deterministic_codegen/run_in_ci.sh" + - cd "${WORKDIR}/gt4py" && ./noxfile.py -s "${NOX_SESSION}" +# GPU determinism on Santis GH200 (CUDA 12). dace_determinism_cscs_gh200_cuda: extends: - .container-runner-santis-gh200 @@ -100,25 +83,17 @@ dace_determinism_cscs_gh200_cuda: matrix: - PY_VERSION: '3.10' variables: - DACE_DETERMINISM_BACKEND: 'dace_gpu' - # Inject icon4py's `cuda12` extra into the nox session venv. Without - # this, cupy isn't installed in the session, `gtx.CUPY_DEVICE_TYPE` - # resolves to None, and the pytest fixture explodes at - # `get_allocator(None)`. Mirrors icon4py's own ci/base.yml:107 for - # the Santis GH200 (aarch64) test template. - DACE_DETERMINISM_NOX_EXTRAS: 'cuda12' SLURM_GPUS_PER_NODE: 1 SLURM_PARTITION: 'shared' GT4PY_BUILD_JOBS: 8 PYTEST_XDIST_AUTO_NUM_WORKERS: 32 parallel: matrix: - - DACE_DETERMINISM_COMPONENT: - - dycore - - advection - - diffusion - - muphys + - NOX_SESSION: + - "test_next_determinism-3.10(cuda12, nomesh)" + - "test_cartesian_determinism-3.10(cuda12)" +# CPU determinism on Santis GH200. dace_determinism_cscs_gh200_cpu: extends: - .container-runner-santis-gh200 @@ -129,14 +104,11 @@ dace_determinism_cscs_gh200_cpu: matrix: - PY_VERSION: '3.10' variables: - DACE_DETERMINISM_BACKEND: 'dace_cpu' SLURM_PARTITION: 'shared' GT4PY_BUILD_JOBS: 8 PYTEST_XDIST_AUTO_NUM_WORKERS: 32 parallel: matrix: - - DACE_DETERMINISM_COMPONENT: - - dycore - - advection - - diffusion - - muphys + - NOX_SESSION: + - "test_next_determinism-3.10(cpu, nomesh)" + - "test_cartesian_determinism-3.10(cpu)" diff --git a/ci/dace_deterministic_codegen/README.md b/ci/dace_deterministic_codegen/README.md deleted file mode 100644 index 9c3c2976f8..0000000000 --- a/ci/dace_deterministic_codegen/README.md +++ /dev/null @@ -1,224 +0,0 @@ -# dace_deterministic_codegen - -Determinism check for gt4py's DaCe backend. Runs an icon4py test selection -through `nox` **twice** with isolated gt4py build caches, then compares -the generated source code under each program's `src/` between the two -runs. Exit 0 = identical (deterministic), exit 1 = different. - -The check compares **everything** generated under `/src/`, -recursively. On a typical run that includes both the CPU host glue -(`cpu/.cpp`) and the device kernels (`cuda/.cu` for CUDA, -`cuda/hip/.cpp` for HIP — dace nests HIP one level inside `cuda/` -via `target_name="cuda"`, `target_type="hip"`). All of it gets hashed -and diffed: a regression in the host glue is just as much a determinism -failure as one in the device kernel. - -The top-level layout under `src/` is gated to the dace backends we've -verified: `cpu/` and `cuda/` (with HIP picked up automatically inside -`cuda/`). If a run emits anything else there the checker fails immediately -with a clear message rather than silently ignoring the unfamiliar directory -and reporting a false "deterministic". - -Valid `--selection` and `--component` values are read from icon4py's -own `noxfile.py` at runtime — no hardcoding here, so the checker -auto-tracks any future changes to icon4py's parametrization. - -Mirrors icon4py's `ci/dace.yml`, with the session name configurable: - -```bash -nox -r -s "-(, )" -- -``` - -Default `` is `test_model` — what `ci/dace.yml` itself uses. - -## A note on paths - -Every `--*` flag that takes a path (`--icon4py`, `--gt4py`, `--dace`) -accepts **both absolute and relative** paths. Relative paths are -resolved against the current working directory — i.e. wherever you -invoke the script from, not where the script lives. The script prints -the resolved absolute path on startup whenever you pass a relative one, -so you can confirm what it landed on. - -## Setup (one-time) - -Done once per machine, before any check is run. - -**1. Activate the gt4py venv** with editable gt4py (and dace, if on a -custom branch): - -```bash -source /path/to/gt4py-venv/bin/activate -uv pip install -e /path/to/gt4py -uv pip install -e /path/to/dace # optional, if custom dace branch -``` - -**2. Bootstrap icon4py into that same venv.** This patches icon4py's -`[tool.uv.sources]` so the editable gt4py / dace are what `uv sync` -installs into nox's session venv: - -```bash -uv pip install tomli_w -python /path/to/gt4py/ci/dace_deterministic_codegen/bootstrap_icon4py.py \ - --icon4py /path/to/icon4py \ - --gt4py /path/to/gt4py \ - --dace /path/to/dace # omit if upstream dace -``` - -**3. Sanity check:** - -```bash -python -c "import gt4py.next; print(gt4py.next.__file__)" -# must print a path inside your gt4py checkout, NOT site-packages/ -``` - -## Run the check - -With the venv from step 1 active: - -```bash -python /path/to/gt4py/ci/dace_deterministic_codegen/dace_deterministic_codegen.py \ - --icon4py /path/to/icon4py \ - --selection \ - --component \ - --posarg=--backend=dace_cpu \ - --posarg=--grid=icon_regional -``` - -The valid values for `--selection` and `--component` are read directly -from icon4py's `noxfile.py` at runtime. As of icon4py main, that's: - -- `--selection`: `datatest`, `stencils`, `basic` -- `--component`: `advection`, `diffusion`, `dycore`, `microphysics`, - `muphys`, `common`, `driver`, `standalone_driver`, `testing` - -If icon4py adds or renames these, the checker picks it up automatically; -no update needed here. If you pass an invalid value, the error message -lists the actual valid set extracted from your icon4py checkout. - -## Examples - -**Stencils for muphys, CPU** — mirrors `ci/dace.yml`'s stencil pattern: - -```bash -python $GT4PY/ci/dace_deterministic_codegen/dace_deterministic_codegen.py \ - --icon4py $ICON4PY \ - --selection stencils \ - --component muphys \ - --posarg=--backend=dace_cpu \ - --posarg=--grid=icon_regional -``` - -**Datatest for dycore, GPU** — mirrors the datatest pattern: - -```bash -python $GT4PY/ci/dace_deterministic_codegen/dace_deterministic_codegen.py \ - --icon4py $ICON4PY \ - --selection datatest \ - --component dycore \ - --posarg=--backend=dace_gpu \ - --posarg=--level=integration -``` - -**Custom session** — say a future icon4py defines a `test_other` -session with the same parametrization shape: - -```bash -python $GT4PY/ci/dace_deterministic_codegen/dace_deterministic_codegen.py \ - --icon4py $ICON4PY \ - --session test_other \ - --selection stencils \ - --component muphys \ - --posarg=--backend=dace_cpu -``` - -## Output - -By default, everything lands at `/_dace_deterministic_codegen/`. -Override with `--workdir PATH` (absolute or relative): - -``` -/ -├── run1/.gt4py_cache/... run1/test.log -├── run2/.gt4py_cache/... run2/test.log -├── diffs//.diff (only on mismatch) -└── report.txt (human-readable summary) -``` - -**Re-running wipes the workdir.** Whatever was there before — old logs, -old caches, an old `report.txt` from yesterday — is removed before the -new run starts. No merging, no appending. If you want to keep history -across invocations, copy the directory before re-running. - -## Exit codes - -| Code | Meaning | -| ---- | ------------------------------------------------------------------------- | -| 0 | Codegen is deterministic. | -| 1 | Codegen differs (see `report.txt` and `diffs/`). | -| 2 | Bad arguments (path doesn't exist, missing noxfile, …). | -| 3 | No programs observed in either run (test selection collected nothing). | -| 4 | A `nox` invocation itself failed (see `run1/test.log` / `run2/test.log`). | - -## Flags - -``` ---icon4py PATH icon4py checkout, abs or rel (required) ---session NAME nox session name (default: test_model) ---selection NAME noxfile selection (required); validated against - icon4py's actual noxfile at runtime ---component NAME leaf subpackage name (required); validated - against icon4py's actual noxfile at runtime ---python X.Y python version for the nox session (default: 3.10) ---workdir PATH where run1/, run2/, diffs/, report.txt land, - abs or rel (default: /_dace_deterministic_codegen/). - Wiped before each run. ---posarg ARG forwarded to pytest. Repeatable. -``` - -## CI integration - -The checker runs in CSCS CI as a separate `dace-determinism` stage, -defined in `ci/cscs-ci-dace-determinism.yml` and wired into the -pipeline via `ci/cscs-ci.yml`. A small driver script, -`ci/dace_deterministic_codegen/run_in_ci.sh`, encapsulates the -clone + bootstrap + checker invocation so the YAML stays minimal and -the same flow can be reproduced locally. - -### Reproducing a CI run locally - -The driver script reads only env vars, so a green or red CI run can -be reproduced one-to-one by exporting the same variables and invoking -`run_in_ci.sh`: - -```bash -# gt4py CI venv with editable gt4py already there -source /path/to/gt4py-venv/bin/activate - -export GT4PY_PATH=/path/to/gt4py -export ICON4PY_REPO=https://github.com/C2SM/icon4py.git -export ICON4PY_REF=main # or the SHA from the failing run -export ICON4PY_PATH=/tmp/icon4py - -# Optional: custom dace branch -export DACE_REPO=https://github.com/GridTools/dace.git -export DACE_REF=dace_toolchain_deterministic -export DACE_PATH=/tmp/dace - -export DACE_DETERMINISM_SELECTION=stencils -export DACE_DETERMINISM_COMPONENT=muphys -export DACE_DETERMINISM_PYTHON=3.10 -export DACE_DETERMINISM_BACKEND=dace_cpu -export DACE_DETERMINISM_GRID=icon_regional - -# GPU runs only: inject icon4py's GPU extra into the nox session venv. -# Without this, cupy isn't installed in the session venv, so -# gt4py's `CUPY_DEVICE_TYPE` is None and icon4py's `GPU` constant -# (model_backends.py:19) is None, and the pytest fixture fails at -# get_allocator(None). cuda12/13 on Santis GH200, rocm6/7 on AMD. -# The driver script forwards this to icon4py's -# ICON4PY_NOX_UV_CUSTOM_SESSION_EXTRAS (noxfile.py). -# export DACE_DETERMINISM_NOX_EXTRAS=... - -bash $GT4PY_PATH/ci/dace_deterministic_codegen/run_in_ci.sh -``` diff --git a/ci/dace_deterministic_codegen/bootstrap_icon4py.py b/ci/dace_deterministic_codegen/bootstrap_icon4py.py deleted file mode 100644 index b4e5f29689..0000000000 --- a/ci/dace_deterministic_codegen/bootstrap_icon4py.py +++ /dev/null @@ -1,197 +0,0 @@ -#!/usr/bin/env python3 -# GT4Py - GridTools Framework -# -# Copyright (c) 2014-2024, ETH Zurich -# All rights reserved. -# -# Please, refer to the LICENSE file in the root directory. -# SPDX-License-Identifier: BSD-3-Clause - -"""Bootstrap icon4py into the *currently activated* venv (the gt4py CI venv). - -Edits icon4py's `pyproject.toml` so that `[tool.uv.sources]` points -`gt4py` (and optionally `dace`) at local-path editable installs, regenerates -the lockfile, and runs `uv sync --active` to install icon4py + its other -dependencies into `$VIRTUAL_ENV`. - -This is what makes the editable gt4py / dace branches survive everything -downstream — including the icon4py noxfile's own `uv sync` call when our -dace_deterministic_codegen checker runs `nox --no-venv`. - -Usage (run from anywhere): - - python ci/dace_deterministic_codegen/bootstrap_icon4py.py \\ - --icon4py /path/to/icon4py \\ - --gt4py /path/to/gt4py-dace_toolchain_deterministic \\ - --dace /path/to/dace # optional - -Idempotent: re-running it is safe; the [tool.uv.sources] entries are -overwritten in place. -""" - -from __future__ import annotations - -import argparse -import os -import shutil -import subprocess -import sys -from pathlib import Path - - -try: - import tomllib # Python 3.11+ -except ModuleNotFoundError: - import tomli as tomllib # type: ignore[import-not-found] - -try: - import tomli_w -except ModuleNotFoundError: - print( - "error: this script needs `tomli_w`. install with:\n" - " uv pip install tomli_w # or pip install tomli_w", - file=sys.stderr, - ) - sys.exit(2) - - -def _is_python_project(path: Path) -> bool: - """A directory is installable by uv if it has any of these markers.""" - return any((path / m).is_file() for m in ("pyproject.toml", "setup.py", "setup.cfg")) - - -def patch_sources(pyproject: Path, overrides: dict[str, Path]) -> None: - """Set `[tool.uv.sources][] = {path = "...", editable = true}` for - every (pkg, path) in overrides. Other entries are preserved.""" - with pyproject.open("rb") as f: - doc = tomllib.load(f) - - sources = doc.setdefault("tool", {}).setdefault("uv", {}).setdefault("sources", {}) - for pkg, path in overrides.items(): - sources[pkg] = {"path": str(path), "editable": True} - - # Make a backup once. Idempotent: don't overwrite an existing backup, - # which would clobber the pristine original after a re-run. - backup = pyproject.with_suffix(pyproject.suffix + ".dace_deterministic_codegen.bak") - if not backup.exists(): - shutil.copy2(pyproject, backup) - - with pyproject.open("wb") as f: - tomli_w.dump(doc, f) - print(f"patched {pyproject} (backup at {backup.name})") - - -def run(cmd: list[str], cwd: Path) -> None: - print(f"+ {' '.join(cmd)} (cwd={cwd})") - rc = subprocess.run(cmd, cwd=str(cwd)).returncode - if rc != 0: - sys.exit(rc) - - -def main() -> int: - description = ( - __doc__.split("\n\n", 1)[0] - if __doc__ - else "Bootstrap icon4py into the currently activated venv (the gt4py CI venv)." - ) - p = argparse.ArgumentParser(description=description) - p.add_argument( - "--icon4py", - required=True, - type=Path, - metavar="PATH", - help=( - "Path to icon4py checkout. Accepts BOTH absolute and relative " - "paths. Relative paths are resolved against the current working " - "directory." - ), - ) - p.add_argument( - "--gt4py", - required=True, - type=Path, - metavar="PATH", - help=( - "Path to gt4py checkout to install editable. Accepts BOTH " - "absolute and relative paths (resolved against cwd)." - ), - ) - p.add_argument( - "--dace", - type=Path, - default=None, - metavar="PATH", - help=( - "Optional path to dace checkout (absolute or relative). If " - "omitted, dace resolves through icon4py's existing source pin." - ), - ) - p.add_argument( - "--no-lock", action="store_true", help="Skip `uv lock`. Useful if you already locked." - ) - p.add_argument( - "--no-sync", - action="store_true", - help="Skip `uv sync`. Useful for CI steps that sync later.", - ) - args = p.parse_args() - - # Resolve every path NOW. The script can be run from any cwd. - icon4py = args.icon4py.expanduser().resolve() - gt4py = args.gt4py.expanduser().resolve() - dace = args.dace.expanduser().resolve() if args.dace else None - - pyproject = icon4py / "pyproject.toml" - if not pyproject.is_file(): - print(f"error: no pyproject.toml at {pyproject}", file=sys.stderr) - return 2 - if not _is_python_project(gt4py): - print( - f"error: --gt4py path is not a python project (no pyproject.toml, " - f"setup.py, or setup.cfg): {gt4py}", - file=sys.stderr, - ) - return 2 - if dace and not _is_python_project(dace): - print( - f"error: --dace path is not a python project (no pyproject.toml, " - f"setup.py, or setup.cfg): {dace}", - file=sys.stderr, - ) - return 2 - - # Loud warning if no venv is active — the whole point of this script - # is to install INTO the gt4py CI venv. Without VIRTUAL_ENV set, uv - # would create a new .venv and we'd get nowhere. - if not os.environ.get("VIRTUAL_ENV"): - print( - "warning: VIRTUAL_ENV is not set. This script is meant to install " - "icon4py into the *currently activated* venv (typically your " - "gt4py CI venv). Activate it first, then re-run.", - file=sys.stderr, - ) - - overrides: dict[str, Path] = {"gt4py": gt4py} - if dace: - overrides["dace"] = dace - patch_sources(pyproject, overrides) - - if not args.no_lock: - # Regenerate uv.lock so it matches the new [tool.uv.sources]. - run(["uv", "lock"], cwd=icon4py) - if not args.no_sync: - # --active = use $VIRTUAL_ENV (the gt4py venv) instead of ./venv/. - run(["uv", "sync", "--active"], cwd=icon4py) - - print() - print("done. quick sanity check:") - print(' python -c "import gt4py.next; print(gt4py.next.__file__)"') - print(f" # should print a path inside {gt4py}") - if dace: - print(' python -c "import dace; print(dace.__file__)"') - print(f" # should print a path inside {dace}") - return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/ci/dace_deterministic_codegen/dace_deterministic_codegen.py b/ci/dace_deterministic_codegen/dace_deterministic_codegen.py deleted file mode 100644 index 5fd908b3b2..0000000000 --- a/ci/dace_deterministic_codegen/dace_deterministic_codegen.py +++ /dev/null @@ -1,701 +0,0 @@ -#!/usr/bin/env python3 -# GT4Py - GridTools Framework -# -# Copyright (c) 2014-2024, ETH Zurich -# All rights reserved. -# -# Please, refer to the LICENSE file in the root directory. -# SPDX-License-Identifier: BSD-3-Clause - -"""GT4Py / DaCe codegen determinism check. - -Drives an icon4py test selection through nox **twice** with isolated -gt4py build caches, then checks that the generated source files under -each program's `src/` are byte-identical between the two runs. A diff -is a determinism bug. - -Compares only the contents of `/src/` — the actual generated -backend code. Currently supports cpu, cuda, and hip (hip is emitted by -dace under `src/cuda/hip/`). Any other top-level backend under `src/` -(mpi, sve, mlir, snitch, …) causes the checker to fail with a clear -message rather than silently ignore it. - -Valid `--selection` and `--component` values are read from icon4py's -own `noxfile.py` at runtime (no hardcoding here), so the checker -tracks any future changes to icon4py's parametrization automatically. - -Mirrors icon4py's `ci/dace.yml` invocation pattern, with the session -name configurable: - - nox -r -s "-(, )" -- - -Defaults to `=test_model`, which is the icon4py main test -entry point and what `ci/dace.yml` uses. - -Outputs land at `/_dace_deterministic_codegen/`: - run1/.gt4py_cache/... run1/test.log - run2/.gt4py_cache/... run2/test.log - diffs//.diff (only on mismatch) - report.txt -""" - -from __future__ import annotations - -import argparse -import ast -import dataclasses -import difflib -import hashlib -import os -import re -import shutil -import subprocess -import sys -from pathlib import Path -from typing import Optional - - -# --------------------------------------------------------------------------- -# Constants -# --------------------------------------------------------------------------- - -#: GT4Py names each cached program folder `_`. -PROGRAM_FOLDER_RE = re.compile(r"^(?P.+)_(?P[0-9a-f]{64})$") - -#: The single directory under each program folder we compare. Only `src/`, -#: nothing else — by design. dace also writes `include/`, `sample/`, -#: `program.sdfg`, source maps under `map/`, runtime metadata -#: (`dace.conf`, `*.csv`), and build artifacts under `build/`. None of -#: those are the codegen surface we care about for this check. -CODEGEN_ROOT = "src" - -#: Backends recognized as direct children of `src/`. dace lays out -#: codegen as `src//[/]`: -#: -#: - CPU codegen → src/cpu/.cpp -#: - CUDA codegen → src/cuda/.cu -#: - HIP codegen → src/cuda/hip/.cpp (NOTE: under cuda/) -#: -#: HIP is dispatched by dace's CUDA target with `target_type="hip"`, so -#: it lands as a *subdirectory* of `src/cuda/`, not its own top-level -#: backend folder. That means {cpu, cuda} as a top-level allowlist is -#: enough to cover all three: cpu via `cpu/`, cuda + hip both via -#: `cuda/` (with `rglob` picking up the nested hip files). -#: -#: If a snapshot ever encounters another top-level backend (mpi, sve, -#: mlir, snitch, …), the checker fails loudly rather than silently -#: ignoring — those would need explicit support added here. -SUPPORTED_BACKENDS: frozenset[str] = frozenset({"cpu", "cuda"}) - -#: Where outputs are written, relative to the icon4py checkout. -WORKDIR_NAME = "_dace_deterministic_codegen" - - -# --------------------------------------------------------------------------- -# icon4py noxfile introspection -# --------------------------------------------------------------------------- - - -class NoxfileIntrospectionError(RuntimeError): - """Raised when we can't extract sessions/components from the noxfile.""" - - -def introspect_icon4py_noxfile( - noxfile: Path, -) -> tuple[frozenset[str], frozenset[str]]: - """Parse icon4py's noxfile.py and extract the valid `selection` and - `component` values. Returns `(selections, components)`. - - Reads the noxfile as AST — does not execute it. Two reasons: - importing would require `nox` in this script's environment, and - noxfile imports often have side effects (icon4py's pulls in a - handful of typing imports plus nox's own session machinery). - - Looks for two type-alias definitions matching icon4py main: - - ModelTestsSubset: TypeAlias = Literal["datatest", "stencils", "basic"] - ModelSubpackagePath: TypeAlias = Literal["atmosphere/advection", ...] - - Components are derived from the *leaf name* of each subpackage path - (`subpackage.split("/")[-1]`), matching the `id=...` icon4py uses - in nox.param. So `atmosphere/subgrid_scale_physics/muphys` becomes - the component `muphys`. - """ - if not noxfile.is_file(): - raise NoxfileIntrospectionError( - f"no noxfile.py at {noxfile} — is --icon4py the icon4py repo root?" - ) - - try: - tree = ast.parse(noxfile.read_text()) - except SyntaxError as e: - raise NoxfileIntrospectionError(f"could not parse {noxfile} as Python: {e}") from e - - selections = _extract_literal_strings(tree, "ModelTestsSubset") - subpackages = _extract_literal_strings(tree, "ModelSubpackagePath") - - if not selections: - raise NoxfileIntrospectionError( - f"could not find `ModelTestsSubset: TypeAlias = Literal[...]` " - f"in {noxfile}. icon4py's noxfile structure may have changed." - ) - if not subpackages: - raise NoxfileIntrospectionError( - f"could not find `ModelSubpackagePath: TypeAlias = Literal[...]` " - f"in {noxfile}. icon4py's noxfile structure may have changed." - ) - - components = frozenset(p.rsplit("/", 1)[-1] for p in subpackages) - return frozenset(selections), components - - -def _extract_literal_strings(tree: ast.AST, alias_name: str) -> list[str]: - """Find `: TypeAlias = Literal["a", "b", ...]` in the AST - and return the string literals. Returns [] if not found or shape is - unexpected (caller decides whether that's fatal).""" - for node in ast.walk(tree): - if not (isinstance(node, ast.AnnAssign) and isinstance(node.target, ast.Name)): - continue - if node.target.id != alias_name: - continue - # Match the AST pattern for Literal["a", "b", ...]: - # a Subscript whose value is the Name "Literal" and whose slice is - # either a Tuple of string Constants or a single string Constant. - v = node.value - if not isinstance(v, ast.Subscript): - continue - elts: list[ast.expr] = [] - if isinstance(v.slice, ast.Tuple): - elts = list(v.slice.elts) - else: - # Single-arg Literal["foo"] - elts = [v.slice] - out: list[str] = [] - for e in elts: - if isinstance(e, ast.Constant) and isinstance(e.value, str): - out.append(e.value) - return out - return [] - - -# --------------------------------------------------------------------------- -# Snapshot -# --------------------------------------------------------------------------- - - -@dataclasses.dataclass(frozen=True) -class FileEntry: - relpath: str - sha256: str - - -@dataclasses.dataclass -class ProgramSnapshot: - name: str - folder: Path - files: dict[str, FileEntry] - - -class UnsupportedBackendError(RuntimeError): - """A program's `src/` contained a top-level backend other than cpu/cuda.""" - - -def snapshot_run(cache_root: Path) -> dict[str, ProgramSnapshot]: - """Walk a `.gt4py_cache` and snapshot every program's generated source. - - For each `_/` folder, we read everything under - `/src/` recursively. dace lays this out as - `src//[/]`: - - src/cpu/.cpp - src/cuda/.cu (CUDA — target_type="") - src/cuda/hip/.cpp (HIP — target_type="hip", under cuda/) - - Currently supports cpu and cuda as top-level backends. HIP is - handled implicitly because dace nests it inside `src/cuda/hip/`, - not as a separate top-level directory; the recursive walk picks - it up automatically. - - If we encounter any *other* top-level backend under `src/` (mpi, - sve, mlir, snitch, ...), raises UnsupportedBackendError so the - user knows immediately rather than silently skipping. - """ - if not cache_root.exists(): - return {} - - out: dict[str, ProgramSnapshot] = {} - for folder in sorted(p for p in cache_root.iterdir() if p.is_dir()): - m = PROGRAM_FOLDER_RE.match(folder.name) - if not m: - continue - name = m.group("name") - - src_root = folder / CODEGEN_ROOT - if not src_root.is_dir(): - # No src/ at all — record an empty snapshot. Pairing logic - # downstream will flag it if its counterpart in the other run - # has files. - out[name] = ProgramSnapshot(name=name, folder=folder, files={}) - continue - - # Backend check: every direct child of src/ must be a supported - # top-level backend. HIP lives nested under cuda/, so cuda is - # what matters here, not "hip". - backend_dirs = sorted(d for d in src_root.iterdir() if d.is_dir()) - for bd in backend_dirs: - if bd.name not in SUPPORTED_BACKENDS: - raise UnsupportedBackendError( - f"unsupported dace backend `{bd.name}/` found under " - f"{src_root} — this checker currently supports " - f"{sorted(SUPPORTED_BACKENDS)} as top-level backends " - f"(HIP is handled under `cuda/hip/`). Add explicit " - f"support in dace_deterministic_codegen.py before " - f"running this selection." - ) - - # rglob recursively descends — picks up `cuda/hip/` along - # with `cpu/` and `cuda/`, no special-casing needed. - files: dict[str, FileEntry] = {} - for fpath in sorted(src_root.rglob("*")): - if not fpath.is_file(): - continue - rel = fpath.relative_to(folder).as_posix() - files[rel] = FileEntry(relpath=rel, sha256=_sha256(fpath)) - out[name] = ProgramSnapshot(name=name, folder=folder, files=files) - return out - - -def _sha256(path: Path) -> str: - h = hashlib.sha256() - with path.open("rb") as f: - for chunk in iter(lambda: f.read(1 << 16), b""): - h.update(chunk) - return h.hexdigest() - - -# --------------------------------------------------------------------------- -# Compare -# --------------------------------------------------------------------------- - - -@dataclasses.dataclass -class ProgramResult: - name: str - match: bool - differing_files: list[str] - only_in_run1: list[str] - only_in_run2: list[str] - - -def compare( - snap1: dict[str, ProgramSnapshot], - snap2: dict[str, ProgramSnapshot], -) -> list[ProgramResult]: - results: list[ProgramResult] = [] - for name in sorted(set(snap1) | set(snap2)): - s1 = snap1.get(name) - s2 = snap2.get(name) - - if s1 is None or s2 is None: - results.append( - ProgramResult( - name=name, - match=False, - differing_files=[], - only_in_run1=sorted((s1.files if s1 else {}).keys()), - only_in_run2=sorted((s2.files if s2 else {}).keys()), - ) - ) - continue - - keys1, keys2 = set(s1.files), set(s2.files) - only1 = sorted(keys1 - keys2) - only2 = sorted(keys2 - keys1) - differing = sorted( - rel for rel in keys1 & keys2 if s1.files[rel].sha256 != s2.files[rel].sha256 - ) - results.append( - ProgramResult( - name=name, - match=not (differing or only1 or only2), - differing_files=differing, - only_in_run1=only1, - only_in_run2=only2, - ) - ) - return results - - -# --------------------------------------------------------------------------- -# Diff + report -# --------------------------------------------------------------------------- - - -def write_diffs( - results: list[ProgramResult], - snap1: dict[str, ProgramSnapshot], - snap2: dict[str, ProgramSnapshot], - diffs_dir: Path, -) -> None: - for r in results: - if r.match: - continue - s1, s2 = snap1.get(r.name), snap2.get(r.name) - prog_dir = diffs_dir / r.name - for rel in r.differing_files: - f1 = (s1.folder / rel) if s1 else None - f2 = (s2.folder / rel) if s2 else None - if not (f1 and f2 and f1.exists() and f2.exists()): - continue - try: - t1 = f1.read_text().splitlines(keepends=True) - t2 = f2.read_text().splitlines(keepends=True) - except UnicodeDecodeError: - prog_dir.mkdir(parents=True, exist_ok=True) - (prog_dir / f"{rel.replace('/', '__')}.binary-differs").write_text( - f"binary content differs:\n run1: {f1}\n run2: {f2}\n" - ) - continue - udiff = "".join( - difflib.unified_diff( - t1, - t2, - fromfile=f"run1/{rel}", - tofile=f"run2/{rel}", - n=3, - ) - ) - prog_dir.mkdir(parents=True, exist_ok=True) - (prog_dir / f"{rel.replace('/', '__')}.diff").write_text(udiff) - - -def render_report(results: list[ProgramResult]) -> str: - n_total = len(results) - n_match = sum(1 for r in results if r.match) - n_diff = n_total - n_match - - lines = [f"Programs: {n_total} matches: {n_match} mismatches: {n_diff}", ""] - for r in results: - lines.append(f" [{'MATCH ' if r.match else 'DIFFER'}] {r.name}") - if not r.match: - for rel in r.differing_files: - lines.append(f" differs: {rel}") - for rel in r.only_in_run1: - lines.append(f" only in run1: {rel}") - for rel in r.only_in_run2: - lines.append(f" only in run2: {rel}") - - lines.append("") - if n_total == 0: - lines.append("RESULT: no programs observed (nothing was cached).") - elif n_diff == 0: - lines.append(f"RESULT: codegen deterministic — {n_match} program(s) match.") - else: - lines.append(f"RESULT: NON-DETERMINISTIC CODEGEN — {n_diff}/{n_total} program(s) differ.") - return "\n".join(lines) + "\n" - - -# --------------------------------------------------------------------------- -# Nox runner -# --------------------------------------------------------------------------- - - -def run_nox( - icon4py: Path, - run_dir: Path, - log_path: Path, - session: str, - selection: str, - component: str, - python: str, - posargs: list[str], -) -> int: - """Run nox once with `GT4PY_BUILD_CACHE_DIR=run_dir`. Returns the exit code. - - Mirrors `ci/dace.yml`: positional session ID, `-r` to reuse the venv - between runs (so run1 and run2 see identical venv state — important - for the determinism check). - - NOTE: gt4py's config appends `.gt4py_cache` to GT4PY_BUILD_CACHE_DIR, - so `run_dir` is the *parent*: gt4py creates - `run_dir/.gt4py_cache/_/` inside it. - """ - session_id = f"{session}-{python}({selection}, {component})" - argv = ["nox", "-r", "-s", session_id] - if posargs: - argv.append("--") - argv.extend(posargs) - - env = dict(os.environ.items()) - env["GT4PY_BUILD_CACHE_DIR"] = str(run_dir) - env["GT4PY_BUILD_CACHE_LIFETIME"] = "persistent" - - log_path.parent.mkdir(parents=True, exist_ok=True) - with log_path.open("w") as logf: - logf.write( - f"# cwd: {icon4py}\n" - "# command:\n " + "\n ".join(repr(a) for a in argv) + "\n" - f"# GT4PY_BUILD_CACHE_DIR={run_dir}\n" - f"# (gt4py appends .gt4py_cache; cache lands at {run_dir}/.gt4py_cache/)\n" - "# ---\n" - ) - logf.flush() - proc = subprocess.run( - argv, cwd=str(icon4py), env=env, stdout=logf, stderr=subprocess.STDOUT - ) - return proc.returncode - - -# --------------------------------------------------------------------------- -# Workdir -# --------------------------------------------------------------------------- - - -@dataclasses.dataclass -class Workdir: - """Two parent dirs for gt4py's cache + a place for logs/diffs/report.""" - - root: Path - - @property - def run1_dir(self) -> Path: - return self.root / "run1" - - @property - def run2_dir(self) -> Path: - return self.root / "run2" - - @property - def cache1(self) -> Path: - return self.run1_dir / ".gt4py_cache" - - @property - def cache2(self) -> Path: - return self.run2_dir / ".gt4py_cache" - - @property - def log1(self) -> Path: - return self.run1_dir / "test.log" - - @property - def log2(self) -> Path: - return self.run2_dir / "test.log" - - @property - def diffs(self) -> Path: - return self.root / "diffs" - - @property - def report(self) -> Path: - return self.root / "report.txt" - - def prepare(self) -> None: - """Wipe stale state from previous invocations.""" - for d in (self.run1_dir, self.run2_dir, self.diffs): - if d.exists(): - shutil.rmtree(d) - for d in (self.run1_dir, self.run2_dir): - d.mkdir(parents=True, exist_ok=True) - if self.report.exists(): - self.report.unlink() - - -# --------------------------------------------------------------------------- -# CLI -# --------------------------------------------------------------------------- - - -def parse_args(argv: Optional[list[str]] = None) -> argparse.Namespace: - p = argparse.ArgumentParser( - prog="dace_deterministic_codegen", - description=( - "Run an icon4py test selection twice via nox with isolated gt4py " - "caches and check that the generated source code is byte-identical." - ), - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - ) - p.add_argument( - "--icon4py", - required=True, - type=Path, - metavar="PATH", - help=( - "Path to icon4py checkout. Accepts BOTH absolute and relative " - "paths. Relative paths are resolved against the current working " - "directory (i.e. wherever you invoke this script from)." - ), - ) - p.add_argument( - "--session", - default="test_model", - metavar="NAME", - help=( - "Nox session name. Composed with --python/--selection/--component " - "into the final session ID `-(, " - ")`. Default matches icon4py's main test entry point." - ), - ) - p.add_argument( - "--selection", - required=True, - metavar="NAME", - help=( - "icon4py noxfile selection (e.g. stencils, datatest, basic). " - "Validated at runtime against icon4py's actual noxfile." - ), - ) - p.add_argument( - "--component", - required=True, - metavar="NAME", - help=( - "icon4py noxfile subpackage leaf name (e.g. muphys, dycore). " - "Validated at runtime against icon4py's actual noxfile." - ), - ) - p.add_argument( - "--python", - default="3.10", - metavar="X.Y", - help="Python version for the nox session.", - ) - p.add_argument( - "--workdir", - type=Path, - default=None, - metavar="PATH", - help=( - "Where run1/, run2/, diffs/, and report.txt are written. " - "Accepts absolute or relative paths (resolved against cwd). " - "If the directory already exists from a prior run, its contents " - "are wiped before this run starts — no merging or appending. " - "Default: /_dace_deterministic_codegen/" - ), - ) - p.add_argument( - "--posarg", - action="append", - default=[], - dest="posargs", - metavar="ARG", - help=( - "Forwarded to pytest via `nox -- ARG`. Repeatable. " - "Example: --posarg=--backend=dace_cpu --posarg=--grid=icon_regional" - ), - ) - return p.parse_args(argv) - - -def main(argv: Optional[list[str]] = None) -> int: - args = parse_args(argv) - - # Resolve every path to absolute up-front, so the checker can be run - # from any cwd. We print what the path resolved to — `--icon4py ../foo` - # behaves intuitively but it's nice to confirm what it landed on. - icon4py = args.icon4py.expanduser().resolve() - if not args.icon4py.is_absolute(): - print(f"--icon4py resolved to: {icon4py}") - if not icon4py.is_dir(): - print(f"error: --icon4py path is not a directory: {icon4py}", file=sys.stderr) - return 2 - noxfile_path = icon4py / "noxfile.py" - if not noxfile_path.is_file(): - print( - f"error: no noxfile.py at {noxfile_path} — is --icon4py the icon4py repo root?", - file=sys.stderr, - ) - return 2 - - # Introspect icon4py's noxfile to discover the legal selection / - # component values. This avoids hardcoding the lists, so the checker - # auto-tracks any future changes to icon4py's noxfile structure. - try: - valid_selections, valid_components = introspect_icon4py_noxfile(noxfile_path) - except NoxfileIntrospectionError as e: - print(f"error: {e}", file=sys.stderr) - return 2 - - if args.selection not in valid_selections: - print( - f"error: --selection {args.selection!r} is not one of " - f"{sorted(valid_selections)} (extracted from {noxfile_path})", - file=sys.stderr, - ) - return 2 - if args.component not in valid_components: - print( - f"error: --component {args.component!r} is not one of " - f"{sorted(valid_components)} (extracted from {noxfile_path})", - file=sys.stderr, - ) - return 2 - - workdir_root = ( - args.workdir.expanduser().resolve() if args.workdir is not None else icon4py / WORKDIR_NAME - ) - if args.workdir is not None and not args.workdir.is_absolute(): - print(f"--workdir resolved to: {workdir_root}") - workdir = Workdir(root=workdir_root) - workdir.prepare() # wipes run1/, run2/, diffs/, report.txt — see prepare() - - session_id = f"{args.session}-{args.python}({args.selection}, {args.component})" - - # ----- Run 1 - print(f"[1/2] nox -s '{session_id}' (cache: {workdir.run1_dir})", flush=True) - rc1 = run_nox( - icon4py, - workdir.run1_dir, - workdir.log1, - args.session, - args.selection, - args.component, - args.python, - args.posargs, - ) - if rc1 != 0: - print(f"error: run 1 failed (exit {rc1}). See log: {workdir.log1}", file=sys.stderr) - return 4 - - # ----- Run 2 - print(f"[2/2] nox -s '{session_id}' (cache: {workdir.run2_dir})", flush=True) - rc2 = run_nox( - icon4py, - workdir.run2_dir, - workdir.log2, - args.session, - args.selection, - args.component, - args.python, - args.posargs, - ) - if rc2 != 0: - print(f"error: run 2 failed (exit {rc2}). See log: {workdir.log2}", file=sys.stderr) - return 4 - - # ----- Snapshot + compare + report - try: - snap1 = snapshot_run(workdir.cache1) - snap2 = snapshot_run(workdir.cache2) - except UnsupportedBackendError as e: - print(f"error: {e}", file=sys.stderr) - return 2 - results = compare(snap1, snap2) - write_diffs(results, snap1, snap2, workdir.diffs) - report = render_report(results) - workdir.report.write_text(report) - print(report) - print(f"workdir: {workdir.root}") - - if not results: - print( - f"error: no programs observed in either run — check the logs:\n" - f" {workdir.log1}\n {workdir.log2}", - file=sys.stderr, - ) - return 3 - - return 1 if any(not r.match for r in results) else 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/ci/dace_deterministic_codegen/run_in_ci.sh b/ci/dace_deterministic_codegen/run_in_ci.sh deleted file mode 100644 index 22462f4b94..0000000000 --- a/ci/dace_deterministic_codegen/run_in_ci.sh +++ /dev/null @@ -1,247 +0,0 @@ -#!/usr/bin/env bash -# GT4Py - GridTools Framework -# -# Copyright (c) 2014-2024, ETH Zurich -# All rights reserved. -# -# Please, refer to the LICENSE file in the root directory. -# SPDX-License-Identifier: BSD-3-Clause - -# Driver for running the dace_deterministic_codegen checker in CI. -# -# Encapsulates the clone + bootstrap + checker invocation so the YAML -# stays minimal and the logic is easy to reproduce locally (just set -# the env vars and run the script). -# -# Required environment variables (CI sets all of these via job vars): -# GT4PY_PATH Existing gt4py checkout (the commit under test). -# ICON4PY_REPO Git URL to clone icon4py from. -# ICON4PY_REF Git ref (branch, tag, or SHA) to checkout. -# ICON4PY_PATH Where to clone icon4py to (created if missing). -# DACE_DETERMINISM_SELECTION icon4py noxfile selection: stencils|datatest|basic. -# DACE_DETERMINISM_COMPONENT icon4py subpackage leaf: muphys|dycore|... -# DACE_DETERMINISM_PYTHON Python version for the nox session: 3.10, 3.14, ... -# DACE_DETERMINISM_BACKEND dace_cpu | dace_gpu (passed to pytest as --backend=...) -# DACE_DETERMINISM_GRID Grid name passed to pytest as --grid=... -# -# Optional environment variables: -# DACE_REPO Git URL for a custom dace fork. When set, -# DACE_REF and DACE_PATH must also be set. -# DACE_REF Git ref of the custom dace branch. -# DACE_PATH Where to clone dace to (created if missing). -# DACE_DETERMINISM_WORKDIR Where run1/, run2/, diffs/, report.txt land. -# Default: ${ICON4PY_PATH}/_dace_deterministic_codegen -# DACE_DETERMINISM_ARTIFACT_DIR If set, the workdir is copied here at the end -# (success or failure). Set to a path under -# ${CI_PROJECT_DIR} for GitLab CI artifact upload. -# DACE_DETERMINISM_NOX_EXTRAS Extra icon4py optional-dependency groups to -# inject into the nox session venv's `uv sync` -# (e.g. "cuda12/13" on Santis GH200, "rocm6/7" on -# AMD). Required for dace_gpu / gtfn_gpu runs: -# icon4py's `model_backends.py` reads -# `gtx.CUPY_DEVICE_TYPE`, which is `None` -# unless cupy is installed in the nox venv, -# which only happens via the cuda12/13/ -# rocm6/7 extra. Forwarded verbatim to icon4py's -# `ICON4PY_NOX_UV_CUSTOM_SESSION_EXTRAS` -# (noxfile.py). Leave unset for dace_cpu. -# -# Custom dace branch behaviour: -# - If DACE_REPO is unset, dace lands in the nox session venv via -# icon4py's existing [tool.uv.sources] pin (currently the -# GridTools/pypi published wheel). The parent venv may have its -# own dace from the gt4py CI venv setup, but that's separate — -# nox creates a fresh isolated venv and uv sync's into it from -# icon4py's pyproject.toml. -# - If DACE_REPO is set, the dace repo is cloned at DACE_REF and -# icon4py's [tool.uv.sources] is patched to point at the clone. -# Both the parent venv (Step 2) and the nox session venv (Step 3 -# onwards, via the patched source pin) end up with editable dace -# from the same local path. -# -# Exit codes: passed through from dace_deterministic_codegen.py. -# 0 = deterministic, 1 = differs, 2/3/4 = checker errors. -# See the checker README for the full table. - -set -euo pipefail - -# --- Validate required env vars ------------------------------------------- -required=( - GT4PY_PATH - ICON4PY_REPO - ICON4PY_REF - ICON4PY_PATH - DACE_DETERMINISM_SELECTION - DACE_DETERMINISM_COMPONENT - DACE_DETERMINISM_PYTHON - DACE_DETERMINISM_BACKEND - DACE_DETERMINISM_GRID -) -missing=() -for v in "${required[@]}"; do - if [[ -z "${!v:-}" ]]; then - missing+=("$v") - fi -done -if (( ${#missing[@]} > 0 )); then - echo "error: missing required env vars: ${missing[*]}" >&2 - exit 2 -fi - -# Custom dace branch is all-or-nothing: setting one of the three -# DACE_* vars without the others would leave us in a half-configured -# state where it's unclear whether the local dace is supposed to win -# over icon4py's source pin. -if [[ -n "${DACE_REPO:-}" ]]; then - if [[ -z "${DACE_REF:-}" || -z "${DACE_PATH:-}" ]]; then - echo "error: DACE_REPO is set but DACE_REF and/or DACE_PATH are not." >&2 - echo " To use a custom dace branch, set all three together:" >&2 - echo " DACE_REPO - git URL of the dace fork" >&2 - echo " DACE_REF - branch, tag, or SHA to check out" >&2 - echo " DACE_PATH - where to clone dace (typically \${WORKDIR}/dace)" >&2 - exit 2 - fi -fi - -# Active venv check. The Docker image sets VIRTUAL_ENV; bare local runs -# might not. We don't auto-activate — that's the caller's responsibility — -# but warn if it's missing, since installing into the system Python is -# almost never what's wanted. -if [[ -z "${VIRTUAL_ENV:-}" ]]; then - echo "warning: VIRTUAL_ENV is not set. Activate the gt4py CI venv first" >&2 - echo " (the gt4py CI Docker image sets this automatically; only" >&2 - echo " relevant when running this script outside the CI image)." >&2 -fi - -DACE_DETERMINISM_WORKDIR_DEFAULT="${ICON4PY_PATH}/_dace_deterministic_codegen" -DACE_DETERMINISM_WORKDIR="${DACE_DETERMINISM_WORKDIR:-${DACE_DETERMINISM_WORKDIR_DEFAULT}}" - -CHECKER_DIR="${GT4PY_PATH}/ci/dace_deterministic_codegen" -CHECKER="${CHECKER_DIR}/dace_deterministic_codegen.py" -BOOTSTRAP="${CHECKER_DIR}/bootstrap_icon4py.py" - -if [[ ! -f "$CHECKER" ]]; then - echo "error: checker not found at $CHECKER" >&2 - echo " (is GT4PY_PATH=$GT4PY_PATH the gt4py repo root?)" >&2 - exit 2 -fi -if [[ ! -f "$BOOTSTRAP" ]]; then - echo "error: bootstrap not found at $BOOTSTRAP" >&2 - exit 2 -fi - -# --- Helper: shallow-clone a repo at a ref, with SHA fallback ------------ -# Some git versions can't combine --depth 1 with arbitrary commit SHAs in -# `clone -b`. If -b fails, fall back to a full clone + explicit checkout. -clone_at_ref() { - local repo="$1" ref="$2" dest="$3" label="$4" - if [[ -d "${dest}/.git" ]]; then - echo " (${label} already cloned at ${dest}; fetching ${ref})" - git -C "${dest}" fetch --depth 1 origin "${ref}" - git -C "${dest}" checkout FETCH_HEAD - return - fi - if ! git clone --depth 1 -b "${ref}" "${repo}" "${dest}" 2>/dev/null; then - echo " (-b ${ref} failed; ${ref} may be a SHA — doing full clone + checkout)" - git clone "${repo}" "${dest}" - git -C "${dest}" checkout "${ref}" - fi -} - -# --- Step 1: clone icon4py at the pinned ref ----------------------------- -echo "==> [1/4] cloning icon4py @ ${ICON4PY_REF} from ${ICON4PY_REPO}" -clone_at_ref "${ICON4PY_REPO}" "${ICON4PY_REF}" "${ICON4PY_PATH}" "icon4py" - -# --- Step 1b (optional): clone custom dace ------------------------------- -if [[ -n "${DACE_REPO:-}" ]]; then - echo "==> [1b/4] cloning dace @ ${DACE_REF} from ${DACE_REPO}" - clone_at_ref "${DACE_REPO}" "${DACE_REF}" "${DACE_PATH}" "dace" -fi - -# --- Step 2: install editable gt4py (+ dace) + tomli_w into the venv ----- -# The gt4py CI Docker image already has gt4py's deps installed via uv -# sync --no-install-project. We add gt4py itself (editable, pointing at -# our checkout), tomli_w (which bootstrap_icon4py.py imports), and -# optionally dace (editable, when a custom branch is being tested). -# --no-deps skips re-resolving heavy transitive deps; the icon4py -# bootstrap below will handle anything missing via uv sync --active. -if [[ -n "${DACE_PATH:-}" ]]; then - echo "==> [2/4] installing editable gt4py + dace + tomli_w into ${VIRTUAL_ENV:-system Python}" -else - echo "==> [2/4] installing editable gt4py + tomli_w into ${VIRTUAL_ENV:-system Python}" -fi -uv pip install --no-deps -e "${GT4PY_PATH}" -if [[ -n "${DACE_PATH:-}" ]]; then - uv pip install --no-deps -e "${DACE_PATH}" -fi -uv pip install tomli_w - -# --- Step 3: bootstrap icon4py into the active venv ---------------------- -# Patches icon4py's [tool.uv.sources] so gt4py (and optionally dace) -# resolve to our local checkouts, then `uv lock` + `uv sync --active`. -# This is what makes the editable installs survive when icon4py's noxfile -# creates its session venv and runs `uv sync` inside it — that uv sync -# sees the patched source pins and installs editable from the same paths. -echo "==> [3/4] bootstrapping icon4py into the active venv" -bootstrap_args=( --icon4py "${ICON4PY_PATH}" --gt4py "${GT4PY_PATH}" ) -if [[ -n "${DACE_PATH:-}" ]]; then - bootstrap_args+=( --dace "${DACE_PATH}" ) -fi -python "${BOOTSTRAP}" "${bootstrap_args[@]}" - -# --- Step 4: run the determinism checker --------------------------------- -echo "==> [4/4] running the determinism checker" -echo " selection=${DACE_DETERMINISM_SELECTION} component=${DACE_DETERMINISM_COMPONENT}" -echo " python=${DACE_DETERMINISM_PYTHON} backend=${DACE_DETERMINISM_BACKEND} grid=${DACE_DETERMINISM_GRID}" -echo " workdir=${DACE_DETERMINISM_WORKDIR}" - -if [[ -n "${DACE_DETERMINISM_NOX_EXTRAS:-}" ]]; then - echo " nox_extras=${DACE_DETERMINISM_NOX_EXTRAS}" - export ICON4PY_NOX_UV_CUSTOM_SESSION_EXTRAS="${DACE_DETERMINISM_NOX_EXTRAS}" -fi - -# Run with `set +e` and capture the exit code so the artifact-copy step -# below runs whether the checker reported determinism, non-determinism, -# or a tooling error. The checker is the source of truth on the exit -# code; we just defer reacting to it. -set +e -python "${CHECKER}" \ - --icon4py "${ICON4PY_PATH}" \ - --selection "${DACE_DETERMINISM_SELECTION}" \ - --component "${DACE_DETERMINISM_COMPONENT}" \ - --python "${DACE_DETERMINISM_PYTHON}" \ - --workdir "${DACE_DETERMINISM_WORKDIR}" \ - --posarg=--backend="${DACE_DETERMINISM_BACKEND}" \ - --posarg=--grid="${DACE_DETERMINISM_GRID}" -checker_rc=$? -set -e - -# --- Step 5 (optional): publish artifacts -------------------------------- -# If DACE_DETERMINISM_ARTIFACT_DIR is set (typically in CI to a path -# under ${CI_PROJECT_DIR}), copy the workdir there so GitLab can pick -# it up as a build artifact. We do this whether the checker passed or -# failed — both outcomes have a useful report.txt. -if [[ -n "${DACE_DETERMINISM_ARTIFACT_DIR:-}" ]]; then - echo "==> publishing artifacts to ${DACE_DETERMINISM_ARTIFACT_DIR}" - rm -rf "${DACE_DETERMINISM_ARTIFACT_DIR}" - mkdir -p "$(dirname "${DACE_DETERMINISM_ARTIFACT_DIR}")" - if [[ -d "${DACE_DETERMINISM_WORKDIR}" ]]; then - cp -r "${DACE_DETERMINISM_WORKDIR}" "${DACE_DETERMINISM_ARTIFACT_DIR}" - else - # Checker errored before creating the workdir — leave a note so - # the artifact upload still has something for diagnosis from the - # GitLab UI without ssh'ing to the runner. - mkdir -p "${DACE_DETERMINISM_ARTIFACT_DIR}" - cat > "${DACE_DETERMINISM_ARTIFACT_DIR}/MISSING_WORKDIR.txt" < None: ) +# -- DaCe codegen determinism check -- +# +# The two `test_*_determinism` sessions below each run gt4py's pytest +# selection twice with isolated GT4PY_BUILD_CACHE_DIR per run, then +# verify the DaCe-generated source files under /src/ are +# byte-identical between the two runs. A diff is a determinism bug +# somewhere in the gt4py + dace toolchain for that test selection. +# +# Comparison logic (snapshot, hash, diff, report) lives in +# `scripts/dace_deterministic_codegen.py`; the helper below just +# wires gt4py's existing pytest invocation pattern into a "run +# twice + compare" loop. +# +# Workdir at REPO_ROOT/_dace_deterministic_codegen/ (wiped before +# each session invocation): +# run1/.gt4py_cache/... (first run's cached programs) +# run2/.gt4py_cache/... (second run's cached programs) +# diffs//.diff (only on mismatch) +# report.txt (human-readable summary) +# +# Only `dace` codegen is checked (`internal` doesn't go through dace), +# so the codegen parameter is dropped from these sessions' signatures. + +DACE_DETERMINISM_WORKDIR_NAME: Final = "_dace_deterministic_codegen" + + +def _run_dace_determinism_check( + session: nox.Session, + pytest_args: Sequence[str], +) -> None: + """Run pytest twice with isolated GT4PY_BUILD_CACHE_DIR and verify the + DaCe-generated source files are byte-identical between the two runs. + + On mismatch, calls ``session.error(...)`` with a pointer to the + diffs/ directory and report.txt so the failure is actionable. + """ + workdir = REPO_ROOT / DACE_DETERMINISM_WORKDIR_NAME + if workdir.exists(): + shutil.rmtree(workdir) + + run1_dir = workdir / "run1" + run2_dir = workdir / "run2" + run1_dir.mkdir(parents=True) + run2_dir.mkdir(parents=True) + + # gt4py appends `.gt4py_cache` to GT4PY_BUILD_CACHE_DIR, so we pass + # the parent directory and the cache lands at .gt4py_cache/ underneath. + # Setting GT4PY_BUILD_CACHE_LIFETIME to `persistent` keeps the cache + # around long enough for the snapshot pass to read it. + # + # Setting DACE_compiler_build_folder_mode to `development` is REQUIRED. + # gt4py configures dace to `production` mode by default, which cleans + # up the dace build folder after compilation — leaving only the + # compiled .so/.dylib and stripping the src/ tree we need to + # diff. Forcing `development` keeps src/cpu/*.cpp and + # src/cuda/*.cu around so the checker has codegen to compare. + # (See src/gt4py/next/program_processors/runners/dace/workflow/ + # common.py:138-144 for the upstream config this overrides; the + # comment there explicitly documents this env var as the escape + # hatch.) + for run_dir in (run1_dir, run2_dir): + session.run( + *pytest_args, + *session.posargs, + env=session.env + | { + "GT4PY_BUILD_CACHE_DIR": str(run_dir), + "GT4PY_BUILD_CACHE_LIFETIME": "persistent", + "DACE_compiler_build_folder_mode": "development", + }, + ) + + # Import the comparison library from scripts/. It uses only stdlib, + # so it runs fine in nox's runtime python (no session venv needed). + if str(REPO_ROOT) not in sys.path: + sys.path.insert(0, str(REPO_ROOT)) + from scripts.dace_deterministic_codegen import ( + DeterminismError, + NoProgramsObservedError, + NoSourceFilesObservedError, + UnsupportedBackendError, + check_determinism, + ) + + try: + check_determinism( + run1_dir / ".gt4py_cache", + run2_dir / ".gt4py_cache", + diffs_dir=workdir / "diffs", + report_path=workdir / "report.txt", + ) + except DeterminismError as e: + session.error(f"{e}\nSee {workdir / 'report.txt'} and {workdir / 'diffs'}/") + except NoProgramsObservedError as e: + session.error(f"{e}\nLikely the pytest selection collected no tests.") + except NoSourceFilesObservedError as e: + session.error(str(e)) + except UnsupportedBackendError as e: + session.error(str(e)) + finally: + # Reclaim disk after the comparison. The two per-run gt4py caches + # are ~hundreds of MB each in development mode, and dace's own + # `.dacecache/` at the repo root (used for SDFGs not routed + # through gt4py's build_folder override) is comparably bulky. + # We always keep `workdir/diffs/` and `workdir/report.txt` — + # those are the artifacts a maintainer actually needs to debug + # a determinism failure; the raw caches are reproducible by + # rerunning the session. + for victim in ( + run1_dir, + run2_dir, + REPO_ROOT / ".dacecache", + ): + if victim.exists(): + session.log(f"cleanup: removing {victim}") + shutil.rmtree(victim, ignore_errors=True) + + +@nox.session(python=PYTHON_VERSIONS, tags=["cartesian", "dace", "determinism"]) +@nox.parametrize("device", [*DeviceNoxParam.values()]) +def test_cartesian_determinism( + session: nox.Session, + device: DeviceOption, +) -> None: + """Run selected 'gt4py.cartesian' DaCe tests twice and verify codegen + is byte-identical between the two runs.""" + + codegen_settings = CodeGenDaceTestSettings["dace"] + device_settings = DeviceTestSettings[device] + extras = [ + "standard", + "testing", + *codegen_settings.get("extras", []), + *device_settings.get("extras", []), + ] + groups = ["test", *codegen_settings.get("groups", []), *device_settings.get("groups", [])] + + install_session_venv(session, extras=extras, groups=groups) + + markers = " and ".join(codegen_settings["markers"] + device_settings["markers"]) + + _run_dace_determinism_check( + session, + pytest_args=[ + *"pytest --cache-clear -sv -n auto --dist loadgroup".split(), + "-m", + f"{markers}", + str(pathlib.Path("tests") / "cartesian_tests"), + ], + ) + + +@nox.session(python=PYTHON_VERSIONS, tags=["next", "dace", "determinism"]) +@nox.parametrize( + "meshlib", + [ + nox.param("nomesh", id="nomesh", tags=["nomesh"]), + nox.param("atlas", id="atlas", tags=["atlas"]), + ], +) +@nox.parametrize("device", [*DeviceNoxParam.values()]) +def test_next_determinism( + session: nox.Session, + device: DeviceOption, + meshlib: Literal["nomesh", "atlas"], +) -> None: + """Run selected 'gt4py.next' DaCe tests twice and verify codegen + is byte-identical between the two runs.""" + + codegen_settings = CodeGenDaceTestSettings["dace"] + device_settings = DeviceTestSettings[device] + extras = [ + "standard", + "testing", + *codegen_settings.get("extras", []), + *device_settings.get("extras", []), + ] + groups = ["test", *codegen_settings.get("groups", []), *device_settings.get("groups", [])] + mesh_markers: list[str] = [] + + match meshlib: + case "nomesh": + mesh_markers.append("not requires_atlas") + case "atlas": + mesh_markers.append("requires_atlas") + groups.append("frameworks") + + install_session_venv(session, extras=extras, groups=groups) + + markers = " and ".join(codegen_settings["markers"] + device_settings["markers"] + mesh_markers) + + _run_dace_determinism_check( + session, + pytest_args=[ + *"pytest --cache-clear -sv -n auto --dist loadgroup".split(), + "-m", + f"{markers}", + str(pathlib.Path("tests") / "next_tests"), + ], + ) + + if __name__ == "__main__": nox.main() diff --git a/scripts/dace_deterministic_codegen.py b/scripts/dace_deterministic_codegen.py new file mode 100644 index 0000000000..b0d643b4c2 --- /dev/null +++ b/scripts/dace_deterministic_codegen.py @@ -0,0 +1,533 @@ +#!/usr/bin/env python3 +# GT4Py - GridTools Framework +# +# Copyright (c) 2014-2024, ETH Zurich +# All rights reserved. +# +# Please, refer to the LICENSE file in the root directory. +# SPDX-License-Identifier: BSD-3-Clause + +"""GT4Py / DaCe codegen determinism check. + +Library + CLI for verifying that gt4py's DaCe backend produces +byte-identical generated source files across two runs of the same +test selection. Used by the ``test_*_determinism`` nox sessions in +``noxfile.py``; also runnable standalone for ad-hoc comparison of +two existing caches. + +The check compares everything dace writes as generated source under +each cached program's ``src/`` — ``cpu/`` and ``cuda/`` (with HIP +picked up automatically under ``cuda/hip/``). It deliberately +ignores SDFGs, build artifacts, source maps, and runtime metadata. + +If a snapshot ever encounters a top-level backend other than cpu or +cuda (mpi, sve, mlir, snitch, ...), it errors with a clear message +rather than silently skipping. + +As a library +------------ + +:: + + from scripts.dace_deterministic_codegen import check_determinism + + check_determinism( + cache1=Path(".../run1/.gt4py_cache"), + cache2=Path(".../run2/.gt4py_cache"), + diffs_dir=Path(".../diffs"), # optional + report_path=Path(".../report.txt"), # optional + ) + +Raises ``DeterminismError`` on mismatch, ``NoProgramsObservedError`` +if both caches are empty, ``NoSourceFilesObservedError`` if programs +were cached but contain no source files (typically a missing +``DACE_compiler_build_folder_mode=development``), or +``UnsupportedBackendError`` if the codegen produced an unfamiliar +backend layout. + +As a CLI +-------- + +:: + + python scripts/dace_deterministic_codegen.py \\ + --run1 path/to/cache1/.gt4py_cache \\ + --run2 path/to/cache2/.gt4py_cache \\ + [--diffs-dir DIR] [--report FILE] + +Exit codes: + + 0 codegen is deterministic + 1 codegen differs (see diffs/ and report.txt) + 2 bad arguments / unsupported backend / no source files captured + 3 no programs observed in either cache +""" + +from __future__ import annotations + +import argparse +import dataclasses +import difflib +import hashlib +import re +import sys +from pathlib import Path +from typing import Optional + + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +#: GT4Py names each cached program folder ``_``. +PROGRAM_FOLDER_RE = re.compile(r"^(?P.+)_(?P[0-9a-f]{64})$") + +#: The single directory under each program folder we compare. Only ``src/``, +#: nothing else — by design. dace also writes ``include/``, ``sample/``, +#: ``program.sdfg``, source maps under ``map/``, runtime metadata +#: (``dace.conf``, ``*.csv``), and build artifacts under ``build/``. None of +#: those are the codegen surface we care about for this check. +CODEGEN_ROOT = "src" + +#: Backends recognized as direct children of ``src/``. dace lays out +#: codegen as ``src//[/]``: +#: +#: - CPU codegen -> src/cpu/.cpp +#: - CUDA codegen -> src/cuda/.cu +#: - HIP codegen -> src/cuda/hip/.cpp (NOTE: under cuda/) +#: +#: HIP is dispatched by dace's CUDA target with ``target_type="hip"``, so +#: it lands as a *subdirectory* of ``src/cuda/``, not its own top-level +#: backend folder. That means {cpu, cuda} as a top-level allowlist is +#: enough to cover all three: cpu via ``cpu/``, cuda + hip both via +#: ``cuda/`` (with ``rglob`` picking up the nested hip files). +#: +#: If a snapshot ever encounters another top-level backend (mpi, sve, +#: mlir, snitch, ...), the checker fails loudly rather than silently +#: ignoring — those would need explicit support added here. +SUPPORTED_BACKENDS: frozenset[str] = frozenset({"cpu", "cuda"}) + + +# --------------------------------------------------------------------------- +# Exceptions +# --------------------------------------------------------------------------- + + +class UnsupportedBackendError(RuntimeError): + """A program's ``src/`` contained a top-level backend other than cpu/cuda.""" + + +class NoProgramsObservedError(RuntimeError): + """Neither cache contained any cached program folders.""" + + +class NoSourceFilesObservedError(RuntimeError): + """Programs were observed in the caches but none contained any source files. + + Almost always means dace's build folder mode was left at ``production``, + which strips the generated ``src/`` tree after compilation. Set + ``DACE_compiler_build_folder_mode=development`` (lowercase matters) + before running the tests so the codegen surface survives into the + cache and there's actually something to compare. + """ + + +class DeterminismError(RuntimeError): + """Two snapshots compared non-identical. ``.results`` carries the details.""" + + def __init__(self, message: str, results: list["ProgramResult"]) -> None: + super().__init__(message) + self.results = results + + +# --------------------------------------------------------------------------- +# Snapshot +# --------------------------------------------------------------------------- + + +@dataclasses.dataclass(frozen=True) +class FileEntry: + relpath: str + sha256: str + + +@dataclasses.dataclass +class ProgramSnapshot: + name: str + folder: Path + files: dict[str, FileEntry] + + +def snapshot_run(cache_root: Path) -> dict[str, ProgramSnapshot]: + """Walk a gt4py build cache and snapshot every program's generated source. + + The input directory's name is irrelevant — the function looks for + immediate subdirectories matching ``_<64-char-hex-digest>`` + (gt4py's program-folder naming) and reads ``/src/`` + recursively under each one. HIP files at ``src/cuda/hip/`` are + picked up automatically by the recursive walk. + + Returns an empty dict (rather than raising) when the path does not + exist, is not a directory, has no subdirectories, or has only + subdirectories whose names don't match the program-folder pattern. + Callers can use :func:`_diagnose_empty_cache` to get a human + description of which of those it was. + + Raises :class:`UnsupportedBackendError` if any program's ``src/`` + contains a top-level backend not in :data:`SUPPORTED_BACKENDS`. + """ + if not cache_root.is_dir(): + return {} + + out: dict[str, ProgramSnapshot] = {} + for folder in sorted(p for p in cache_root.iterdir() if p.is_dir()): + m = PROGRAM_FOLDER_RE.match(folder.name) + if not m: + continue + name = m.group("name") + + src_root = folder / CODEGEN_ROOT + if not src_root.is_dir(): + # No src/ at all — record an empty snapshot. Pairing logic + # downstream will flag it if its counterpart has files. + out[name] = ProgramSnapshot(name=name, folder=folder, files={}) + continue + + # Backend check: every direct child of src/ must be a supported + # top-level backend. HIP lives nested under cuda/, so cuda is + # what matters here, not "hip". + for bd in sorted(d for d in src_root.iterdir() if d.is_dir()): + if bd.name not in SUPPORTED_BACKENDS: + raise UnsupportedBackendError( + f"unsupported dace backend `{bd.name}/` found under " + f"{src_root} — this checker currently supports " + f"{sorted(SUPPORTED_BACKENDS)} as top-level backends " + f"(HIP is handled under `cuda/hip/`). Add explicit " + f"support in scripts/dace_deterministic_codegen.py " + f"before running this selection." + ) + + # rglob recursively descends — picks up `cuda/hip/` along + # with `cpu/` and `cuda/`, no special-casing needed. + files: dict[str, FileEntry] = {} + for fpath in sorted(src_root.rglob("*")): + if not fpath.is_file(): + continue + rel = fpath.relative_to(folder).as_posix() + files[rel] = FileEntry(relpath=rel, sha256=_sha256(fpath)) + out[name] = ProgramSnapshot(name=name, folder=folder, files=files) + return out + + +def _sha256(path: Path) -> str: + h = hashlib.sha256() + with path.open("rb") as f: + for chunk in iter(lambda: f.read(1 << 16), b""): + h.update(chunk) + return h.hexdigest() + + +def _diagnose_empty_cache(cache_root: Path) -> str: + """Return a one-line explanation of why :func:`snapshot_run` found nothing. + + Distinguishes "path didn't exist", "path is a file", "directory has + no subdirectories", and "directory has subdirectories but none match + the program-folder pattern" (with a sample of the names found, so a + wrong-path mistake — e.g. passing the parent of ``.gt4py_cache`` — + is immediately visible). + """ + if not cache_root.exists(): + return "path does not exist" + if not cache_root.is_dir(): + return "path exists but is not a directory" + subdirs = sorted(p for p in cache_root.iterdir() if p.is_dir()) + if not subdirs: + return "directory has no subdirectories" + matching = [p for p in subdirs if PROGRAM_FOLDER_RE.match(p.name)] + if not matching: + sample_names = [p.name for p in subdirs[:3]] + suffix = f" (and {len(subdirs) - 3} more)" if len(subdirs) > 3 else "" + return ( + f"directory contains {len(subdirs)} subdirectory(ies) but none " + f"match the program-folder pattern `_<64-char-hex-digest>/` " + f"(saw: {sample_names}{suffix}). If one of those names is " + f"`.gt4py_cache`, you've passed the parent of the cache by mistake." + ) + return "directory has matching subdirectories but they were filtered out" + + +# --------------------------------------------------------------------------- +# Compare +# --------------------------------------------------------------------------- + + +@dataclasses.dataclass +class ProgramResult: + name: str + match: bool + differing_files: list[str] + only_in_run1: list[str] + only_in_run2: list[str] + + +def compare( + snap1: dict[str, ProgramSnapshot], + snap2: dict[str, ProgramSnapshot], +) -> list[ProgramResult]: + """Pair programs by name across the two snapshots and diff their files.""" + results: list[ProgramResult] = [] + for name in sorted(set(snap1) | set(snap2)): + s1 = snap1.get(name) + s2 = snap2.get(name) + + if s1 is None or s2 is None: + results.append( + ProgramResult( + name=name, + match=False, + differing_files=[], + only_in_run1=sorted((s1.files if s1 else {}).keys()), + only_in_run2=sorted((s2.files if s2 else {}).keys()), + ) + ) + continue + + keys1, keys2 = set(s1.files), set(s2.files) + only1 = sorted(keys1 - keys2) + only2 = sorted(keys2 - keys1) + differing = sorted( + rel for rel in keys1 & keys2 if s1.files[rel].sha256 != s2.files[rel].sha256 + ) + results.append( + ProgramResult( + name=name, + match=not (differing or only1 or only2), + differing_files=differing, + only_in_run1=only1, + only_in_run2=only2, + ) + ) + return results + + +# --------------------------------------------------------------------------- +# Diff + report +# --------------------------------------------------------------------------- + + +def write_diffs( + results: list[ProgramResult], + snap1: dict[str, ProgramSnapshot], + snap2: dict[str, ProgramSnapshot], + diffs_dir: Path, +) -> None: + """Emit a unified diff per differing file under ``diffs_dir//``.""" + for r in results: + if r.match: + continue + s1, s2 = snap1.get(r.name), snap2.get(r.name) + prog_dir = diffs_dir / r.name + for rel in r.differing_files: + f1 = (s1.folder / rel) if s1 else None + f2 = (s2.folder / rel) if s2 else None + if not (f1 and f2 and f1.exists() and f2.exists()): + continue + try: + t1 = f1.read_text().splitlines(keepends=True) + t2 = f2.read_text().splitlines(keepends=True) + except UnicodeDecodeError: + prog_dir.mkdir(parents=True, exist_ok=True) + (prog_dir / f"{rel.replace('/', '__')}.binary-differs").write_text( + f"binary content differs:\n run1: {f1}\n run2: {f2}\n" + ) + continue + udiff = "".join( + difflib.unified_diff( + t1, + t2, + fromfile=f"run1/{rel}", + tofile=f"run2/{rel}", + n=3, + ) + ) + prog_dir.mkdir(parents=True, exist_ok=True) + (prog_dir / f"{rel.replace('/', '__')}.diff").write_text(udiff) + + +def render_report(results: list[ProgramResult]) -> str: + n_total = len(results) + n_match = sum(1 for r in results if r.match) + n_diff = n_total - n_match + + lines = [f"Programs: {n_total} matches: {n_match} mismatches: {n_diff}", ""] + for r in results: + lines.append(f" [{'MATCH ' if r.match else 'DIFFER'}] {r.name}") + if not r.match: + for rel in r.differing_files: + lines.append(f" differs: {rel}") + for rel in r.only_in_run1: + lines.append(f" only in run1: {rel}") + for rel in r.only_in_run2: + lines.append(f" only in run2: {rel}") + + lines.append("") + if n_total == 0: + lines.append("RESULT: no programs observed (nothing was cached).") + elif n_diff == 0: + lines.append(f"RESULT: codegen deterministic — {n_match} program(s) match.") + else: + lines.append(f"RESULT: NON-DETERMINISTIC CODEGEN — {n_diff}/{n_total} program(s) differ.") + return "\n".join(lines) + "\n" + + +# --------------------------------------------------------------------------- +# Library entry point +# --------------------------------------------------------------------------- + + +def check_determinism( + cache1: Path, + cache2: Path, + *, + diffs_dir: Optional[Path] = None, + report_path: Optional[Path] = None, +) -> list[ProgramResult]: + """Compare two gt4py caches; write artifacts; raise on mismatch. + + Snapshots both caches (under ``/src/`` recursively) and + diffs them. Optionally writes per-file unified diffs to + ``diffs_dir//`` and a human-readable summary to + ``report_path``. + + Returns the list of :class:`ProgramResult` on a successful match. + + Raises: + UnsupportedBackendError: + A snapshot contained a backend other than cpu/cuda. + NoProgramsObservedError: + Both caches were empty — likely zero tests collected. + DeterminismError: + One or more programs differed between the two runs. + """ + snap1 = snapshot_run(cache1) + snap2 = snapshot_run(cache2) + results = compare(snap1, snap2) + + if diffs_dir is not None: + write_diffs(results, snap1, snap2, diffs_dir) + if report_path is not None: + report_path.parent.mkdir(parents=True, exist_ok=True) + report_path.write_text(render_report(results)) + + if not results: + diag1 = _diagnose_empty_cache(cache1) + diag2 = _diagnose_empty_cache(cache2) + raise NoProgramsObservedError( + "no programs observed in either cache:\n" + f" run1 ({cache1}): {diag1}\n" + f" run2 ({cache2}): {diag2}" + ) + + # Safety net for the silent-false-positive case where both runs cached + # programs but every program's src/ tree is empty — typically because + # dace's build_folder_mode is `production` (the gt4py default). Without + # this, the comparator would see {} == {} for every program and report + # `deterministic` despite there being nothing to compare. + total_files = sum(len(s.files) for s in snap1.values()) + sum( + len(s.files) for s in snap2.values() + ) + if total_files == 0: + raise NoSourceFilesObservedError( + f"{len(results)} program(s) cached, but none of them contain any " + f"source files under src/. This almost always means dace's build " + f"folder mode is `production` rather than `development`, which " + f"strips the codegen output after compilation. Set " + f"DACE_compiler_build_folder_mode=development (lowercase matters) " + f"before running the tests so src/cpu/*.cpp and src/cuda/*.cu " + f"survive into the cache." + ) + + n_diff = sum(1 for r in results if not r.match) + if n_diff > 0: + raise DeterminismError( + f"DaCe codegen is non-deterministic: {n_diff}/{len(results)} program(s) differ", + results, + ) + return results + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + + +def parse_args(argv: Optional[list[str]] = None) -> argparse.Namespace: + p = argparse.ArgumentParser( + prog="dace_deterministic_codegen", + description=( + "Compare two gt4py build caches and check whether the DaCe " + "generated source files are byte-identical between them." + ), + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + p.add_argument( + "--run1", + required=True, + type=Path, + metavar="PATH", + help="Path to the first .gt4py_cache directory.", + ) + p.add_argument( + "--run2", + required=True, + type=Path, + metavar="PATH", + help="Path to the second .gt4py_cache directory.", + ) + p.add_argument( + "--diffs-dir", + type=Path, + default=None, + metavar="PATH", + help="If set, write per-file unified diffs to this directory.", + ) + p.add_argument( + "--report", + type=Path, + default=None, + metavar="PATH", + help="If set, write the human-readable summary report to this file.", + ) + return p.parse_args(argv) + + +def main(argv: Optional[list[str]] = None) -> int: + args = parse_args(argv) + + try: + results = check_determinism( + args.run1.expanduser().resolve(), + args.run2.expanduser().resolve(), + diffs_dir=args.diffs_dir.expanduser().resolve() if args.diffs_dir else None, + report_path=args.report.expanduser().resolve() if args.report else None, + ) + except UnsupportedBackendError as e: + print(f"error: {e}", file=sys.stderr) + return 2 + except NoProgramsObservedError as e: + print(f"error: {e}", file=sys.stderr) + return 3 + except NoSourceFilesObservedError as e: + print(f"error: {e}", file=sys.stderr) + return 2 + except DeterminismError as e: + print(render_report(e.results)) + print(f"error: {e}", file=sys.stderr) + return 1 + + print(render_report(results)) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From 0af9045fc4392cbf5d572038f9a609db4a3d7d9f Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Thu, 21 May 2026 09:47:39 +0200 Subject: [PATCH 08/28] Infrastructure to test DaCe's codegen (in)deterministic behavior: Test ONLY GT4Py tests (next & cartesian) --- ci/cscs-ci-dace-determinism.yml | 12 ++++++------ noxfile.py | 6 +++--- scripts/dace_deterministic_codegen.py | 20 ++++++++------------ 3 files changed, 17 insertions(+), 21 deletions(-) diff --git a/ci/cscs-ci-dace-determinism.yml b/ci/cscs-ci-dace-determinism.yml index aa3b482440..76e480ccdb 100644 --- a/ci/cscs-ci-dace-determinism.yml +++ b/ci/cscs-ci-dace-determinism.yml @@ -11,7 +11,7 @@ # DaCe codegen determinism check # ============================== # -# Drives gt4py's `test_*_determinism` nox sessions, each of which +# Drives gt4py's `test_*_dace_determinism` nox sessions, each of which # runs pytest twice with isolated GT4PY_BUILD_CACHE_DIR and asserts # the DaCe-generated source files under /src/ are # byte-identical between the two runs. @@ -20,7 +20,7 @@ # that test selection. # # Logic lives in: -# noxfile.py (test_*_determinism sessions) +# noxfile.py (test_*_dace_determinism sessions) # scripts/dace_deterministic_codegen.py (cache comparison lib + CLI) # # This file just wires those sessions into GitLab CI: when to run, on @@ -90,8 +90,8 @@ dace_determinism_cscs_gh200_cuda: parallel: matrix: - NOX_SESSION: - - "test_next_determinism-3.10(cuda12, nomesh)" - - "test_cartesian_determinism-3.10(cuda12)" + - "test_next_dace_determinism-3.10(cuda12, nomesh)" + - "test_cartesian_dace_determinism-3.10(cuda12)" # CPU determinism on Santis GH200. dace_determinism_cscs_gh200_cpu: @@ -110,5 +110,5 @@ dace_determinism_cscs_gh200_cpu: parallel: matrix: - NOX_SESSION: - - "test_next_determinism-3.10(cpu, nomesh)" - - "test_cartesian_determinism-3.10(cpu)" + - "test_next_dace_determinism-3.10(cpu, nomesh)" + - "test_cartesian_dace_determinism-3.10(cpu)" diff --git a/noxfile.py b/noxfile.py index ad5ec36909..1aaebb3f12 100755 --- a/noxfile.py +++ b/noxfile.py @@ -349,7 +349,7 @@ def test_typing_exports(session: nox.Session) -> None: # -- DaCe codegen determinism check -- # -# The two `test_*_determinism` sessions below each run gt4py's pytest +# The two `test_*_dace_determinism` sessions below each run gt4py's pytest # selection twice with isolated GT4PY_BUILD_CACHE_DIR per run, then # verify the DaCe-generated source files under /src/ are # byte-identical between the two runs. A diff is a determinism bug @@ -467,7 +467,7 @@ def _run_dace_determinism_check( @nox.session(python=PYTHON_VERSIONS, tags=["cartesian", "dace", "determinism"]) @nox.parametrize("device", [*DeviceNoxParam.values()]) -def test_cartesian_determinism( +def test_cartesian_dace_determinism( session: nox.Session, device: DeviceOption, ) -> None: @@ -508,7 +508,7 @@ def test_cartesian_determinism( ], ) @nox.parametrize("device", [*DeviceNoxParam.values()]) -def test_next_determinism( +def test_next_dace_determinism( session: nox.Session, device: DeviceOption, meshlib: Literal["nomesh", "atlas"], diff --git a/scripts/dace_deterministic_codegen.py b/scripts/dace_deterministic_codegen.py index b0d643b4c2..a6049d872d 100644 --- a/scripts/dace_deterministic_codegen.py +++ b/scripts/dace_deterministic_codegen.py @@ -72,7 +72,6 @@ import re import sys from pathlib import Path -from typing import Optional # --------------------------------------------------------------------------- @@ -135,7 +134,7 @@ class NoSourceFilesObservedError(RuntimeError): class DeterminismError(RuntimeError): """Two snapshots compared non-identical. ``.results`` carries the details.""" - def __init__(self, message: str, results: list["ProgramResult"]) -> None: + def __init__(self, message: str, results: list[ProgramResult]) -> None: super().__init__(message) self.results = results @@ -363,12 +362,9 @@ def render_report(results: list[ProgramResult]) -> str: for r in results: lines.append(f" [{'MATCH ' if r.match else 'DIFFER'}] {r.name}") if not r.match: - for rel in r.differing_files: - lines.append(f" differs: {rel}") - for rel in r.only_in_run1: - lines.append(f" only in run1: {rel}") - for rel in r.only_in_run2: - lines.append(f" only in run2: {rel}") + lines.extend(f" differs: {rel}" for rel in r.differing_files) + lines.extend(f" only in run1: {rel}" for rel in r.only_in_run1) + lines.extend(f" only in run2: {rel}" for rel in r.only_in_run2) lines.append("") if n_total == 0: @@ -389,8 +385,8 @@ def check_determinism( cache1: Path, cache2: Path, *, - diffs_dir: Optional[Path] = None, - report_path: Optional[Path] = None, + diffs_dir: Path | None = None, + report_path: Path | None = None, ) -> list[ProgramResult]: """Compare two gt4py caches; write artifacts; raise on mismatch. @@ -461,7 +457,7 @@ def check_determinism( # --------------------------------------------------------------------------- -def parse_args(argv: Optional[list[str]] = None) -> argparse.Namespace: +def parse_args(argv: list[str] | None = None) -> argparse.Namespace: p = argparse.ArgumentParser( prog="dace_deterministic_codegen", description=( @@ -501,7 +497,7 @@ def parse_args(argv: Optional[list[str]] = None) -> argparse.Namespace: return p.parse_args(argv) -def main(argv: Optional[list[str]] = None) -> int: +def main(argv: list[str] | None = None) -> int: args = parse_args(argv) try: From a6ba257123019762757f65d41a773fed7dd09d91 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Thu, 21 May 2026 09:59:12 +0200 Subject: [PATCH 09/28] Infrastructure to test DaCe's codegen (in)deterministic behavior: Test ONLY GT4Py tests (next & cartesian) --- ci/cscs-ci-dace-determinism.yml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/ci/cscs-ci-dace-determinism.yml b/ci/cscs-ci-dace-determinism.yml index 76e480ccdb..2cbe778a0d 100644 --- a/ci/cscs-ci-dace-determinism.yml +++ b/ci/cscs-ci-dace-determinism.yml @@ -26,13 +26,6 @@ # This file just wires those sessions into GitLab CI: when to run, on # which runners, and with which (session, device) cells. # -# Run policy -# ---------- -# - Scheduled (nightly) on `main`. -# - Manual via "Run Pipeline" from the GitLab UI on any branch. -# - NOT on every commit. Each cell is two end-to-end pytest runs; -# the cost vs. signal for "every PR" is bad. -# # Failure semantics # ----------------- # `allow_failure: true` while the determinism work stabilizes — From 247768f5fdc9fdfe50d0b1691bf37dc4cc3daa09 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Thu, 21 May 2026 10:48:56 +0200 Subject: [PATCH 10/28] Infrastructure to test DaCe's codegen (in)deterministic behavior: Test ONLY GT4Py tests (next & cartesian) --- ci/cscs-ci-dace-determinism.yml | 2 -- noxfile.py | 10 +++++----- scripts/dace_deterministic_codegen.py | 3 +-- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/ci/cscs-ci-dace-determinism.yml b/ci/cscs-ci-dace-determinism.yml index 2cbe778a0d..f6f6c115f9 100644 --- a/ci/cscs-ci-dace-determinism.yml +++ b/ci/cscs-ci-dace-determinism.yml @@ -65,7 +65,6 @@ stages: - cd "${WORKDIR}/gt4py" && git fetch --depth 1 origin "${CI_COMMIT_SHA}" && git checkout "${CI_COMMIT_SHA}" - cd "${WORKDIR}/gt4py" && ./noxfile.py -s "${NOX_SESSION}" -# GPU determinism on Santis GH200 (CUDA 12). dace_determinism_cscs_gh200_cuda: extends: - .container-runner-santis-gh200 @@ -86,7 +85,6 @@ dace_determinism_cscs_gh200_cuda: - "test_next_dace_determinism-3.10(cuda12, nomesh)" - "test_cartesian_dace_determinism-3.10(cuda12)" -# CPU determinism on Santis GH200. dace_determinism_cscs_gh200_cpu: extends: - .container-runner-santis-gh200 diff --git a/noxfile.py b/noxfile.py index 1aaebb3f12..56a0f1f917 100755 --- a/noxfile.py +++ b/noxfile.py @@ -404,7 +404,7 @@ def _run_dace_determinism_check( # diff. Forcing `development` keeps src/cpu/*.cpp and # src/cuda/*.cu around so the checker has codegen to compare. # (See src/gt4py/next/program_processors/runners/dace/workflow/ - # common.py:138-144 for the upstream config this overrides; the + # common.py for the upstream config this overrides; the # comment there explicitly documents this env var as the escape # hatch.) for run_dir in (run1_dir, run2_dir): @@ -455,14 +455,14 @@ def _run_dace_determinism_check( # those are the artifacts a maintainer actually needs to debug # a determinism failure; the raw caches are reproducible by # rerunning the session. - for victim in ( + for tbd in ( run1_dir, run2_dir, REPO_ROOT / ".dacecache", ): - if victim.exists(): - session.log(f"cleanup: removing {victim}") - shutil.rmtree(victim, ignore_errors=True) + if tbd.exists(): + session.log(f"cleanup: removing {tbd}") + shutil.rmtree(tbd, ignore_errors=True) @nox.session(python=PYTHON_VERSIONS, tags=["cartesian", "dace", "determinism"]) diff --git a/scripts/dace_deterministic_codegen.py b/scripts/dace_deterministic_codegen.py index a6049d872d..3e2140f311 100644 --- a/scripts/dace_deterministic_codegen.py +++ b/scripts/dace_deterministic_codegen.py @@ -21,8 +21,7 @@ ignores SDFGs, build artifacts, source maps, and runtime metadata. If a snapshot ever encounters a top-level backend other than cpu or -cuda (mpi, sve, mlir, snitch, ...), it errors with a clear message -rather than silently skipping. +cuda, it errors with a clear message rather than silently skipping. As a library ------------ From 2a266d7d670bcdabffe1a1bf7c7db7601fa0dbd6 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Thu, 21 May 2026 15:58:15 +0200 Subject: [PATCH 11/28] Infrastructure to test DaCe's codegen (in)deterministic behavior: CI/CD pipeline --- ci/cscs-ci-dace-determinism.yml | 134 ++++++++++++++++++++++++-------- ci/cscs-ci.yml | 1 - 2 files changed, 101 insertions(+), 34 deletions(-) diff --git a/ci/cscs-ci-dace-determinism.yml b/ci/cscs-ci-dace-determinism.yml index f6f6c115f9..86a89f374c 100644 --- a/ci/cscs-ci-dace-determinism.yml +++ b/ci/cscs-ci-dace-determinism.yml @@ -8,52 +8,96 @@ # SPDX-License-Identifier: BSD-3-Clause # -# DaCe codegen determinism check -# ============================== +# DaCe codegen determinism check — STANDALONE CSCS-CI PIPELINE +# ============================================================ # -# Drives gt4py's `test_*_dace_determinism` nox sessions, each of which -# runs pytest twice with isolated GT4PY_BUILD_CACHE_DIR and asserts -# the DaCe-generated source files under /src/ are -# byte-identical between the two runs. -# -# A diff means the gt4py + dace toolchain is non-deterministic for -# that test selection. +# How to trigger +# -------------- +# Whitelisted users trigger it on any PR by posting the comment: # -# Logic lives in: -# noxfile.py (test_*_dace_determinism sessions) -# scripts/dace_deterministic_codegen.py (cache comparison lib + CLI) +# cscs-ci run dace-determinism # -# This file just wires those sessions into GitLab CI: when to run, on -# which runners, and with which (session, device) cells. -# -# Failure semantics -# ----------------- -# `allow_failure: true` while the determinism work stabilizes — -# surface regressions on the dashboard without gating merges. Drop -# `allow_failure` once we have a sustained green stretch on `main`. +# What it does +# ------------ +# Drives gt4py's `test_*_dace_determinism` nox sessions, each of which +# runs pytest twice with isolated GT4PY_BUILD_CACHE_DIR and asserts the +# DaCe-generated source files under /src/ are byte-identical +# between the two runs. A diff means the gt4py + dace toolchain is +# non-deterministic for that test selection. # -# Adding new matrix cells -# ----------------------- -# The (subpackage, device, meshlib) dimension uses GitLab's -# `parallel.matrix` via NOX_SESSION. Adding cells is a one-line -# change to the relevant matrix block. +# Logic lives in: +# noxfile.py (test_*_dace_determinism sessions) +# scripts/dace_deterministic_codegen.py (cache comparison lib + CLI) + +include: + - remote: 'https://gitlab.com/cscs-ci/recipes/-/raw/master/templates/v2/.ci-ext.yml' + - local: 'ci/cscs-ci-ext-config.yml' + +variables: + CUDA_VERSION: '12.6.2' + ROCM_VERSION: '7.1.1' + UBUNTU_VERSION: '24.04' + UV_VERSION: '0.11.2' stages: - - dace-determinism + - build + - test + +# -- Build stage -------------------------------------------------------------- +# Mirrors `.build_common` in `cscs-ci.yml` and produces the same image tags +# (so the cached image from the default pipeline is reused when available). +# Both CUDA (for GH200) and ROCm (for AMD MI300) variants are built — the +# determinism matrix below covers cpu+cuda on GH200 and rocm7 on AMD. +.build_common: + stage: build + extends: + - .dynamic-image-name + variables: + BASE_IMAGE: jfrog.svc.cscs.ch/dockerhub/ubuntu:${UBUNTU_VERSION} + CSCS_REBUILD_POLICY: if-not-exists + DOCKERFILE: ci/Dockerfile + DOCKER_BUILD_ARGS: '["BASE_IMAGE", "CACHE_DIR", "EXTRA_APTGET", "EXTRA_UV_ENV_VARS", "EXTRA_UV_PIP_ARGS", "EXTRA_UV_SYNC_ARGS", "PY_VERSION", "UV_VERSION", "WORKDIR_PATH" ]' + PERSIST_IMAGE_NAME: ${CSCS_REGISTRY_PATH}/public/${ARCH}/base/gt4py-ci-${PY_VERSION} + WATCH_FILECHANGES: 'ci/Dockerfile ci/cscs-ci.yml ci/cscs-ci-ext-config.yml uv.lock' + parallel: + matrix: + - PY_VERSION: ['3.10'] + +.build_extra_cuda: + variables: + BASE_IMAGE: jfrog.svc.cscs.ch/dockerhub/nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} + EXTRA_UV_SYNC_ARGS: "--extra cuda12" + +.build_extra_rocm: + variables: + BASE_IMAGE: jfrog.svc.cscs.ch/dockerhub/rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete + EXTRA_UV_SYNC_ARGS: "--extra rocm7" + EXTRA_UV_ENV_VARS: "CUPY_INSTALL_USE_HIP=1 HCC_AMDGPU_TARGET=gfx942 ROCM_HOME=/opt/rocm" + KUBERNETES_MEMORY_REQUEST: "64Gi" + KUBERNETES_MEMORY_LIMIT: "64Gi" -# Shared template for all dace-determinism jobs. +build_cscs_gh200: + extends: + - .container-builder-cscs-gh200 + - .build_common + - .build_extra_cuda + needs: [] + +build_cscs_amd_rocm: + extends: + - .container-builder-cscs-zen2 + - .build_common + - .build_extra_rocm + needs: [] + +# -- Test stage: the determinism check ---------------------------------------- .dace_determinism_common: - stage: dace-determinism + stage: test image: ${CSCS_REGISTRY_PATH}/public/${ARCH}/base/gt4py-ci-${PY_VERSION}:${DOCKER_TAG} variables: PY_VERSION: '3.10' SLURM_JOB_NUM_NODES: 1 SLURM_TIMELIMIT: 30 - rules: - # Schedule + manual web trigger only. No on-push, no on-MR. - - if: $CI_PIPELINE_SOURCE == "schedule" - - if: $CI_PIPELINE_SOURCE == "web" - - when: never allow_failure: true artifacts: when: always @@ -103,3 +147,27 @@ dace_determinism_cscs_gh200_cpu: - NOX_SESSION: - "test_next_dace_determinism-3.10(cpu, nomesh)" - "test_cartesian_dace_determinism-3.10(cpu)" + +dace_determinism_cscs_amd_rocm: + extends: + - .tds-container-runner-beverin-mi200 + - .dace_determinism_common + needs: + - job: build_cscs_amd_rocm + parallel: + matrix: + - PY_VERSION: '3.10' + variables: + SLURM_GPUS_PER_NODE: 4 + SLURM_PARTITION: mi300 + GT4PY_BUILD_JOBS: 8 + PYTEST_XDIST_AUTO_NUM_WORKERS: 32 + CMAKE_PREFIX_PATH: /opt/rocm + CUDA_HOME: /opt/rocm + CXX: /opt/rocm/bin/hipcc + SLURM_TIMELIMIT: 40 + parallel: + matrix: + - NOX_SESSION: + - "test_next_dace_determinism-3.10(rocm7, nomesh)" + - "test_cartesian_dace_determinism-3.10(rocm7)" diff --git a/ci/cscs-ci.yml b/ci/cscs-ci.yml index 6ec2e1e96c..0f4475ad61 100644 --- a/ci/cscs-ci.yml +++ b/ci/cscs-ci.yml @@ -11,7 +11,6 @@ include: - remote: 'https://gitlab.com/cscs-ci/recipes/-/raw/master/templates/v2/.ci-ext.yml' - local: 'ci/cscs-ci-ext-config.yml' - - local: 'ci/cscs-ci-dace-determinism.yml' # Note: # block-name-with-dashes -> defined in remote cscs-ci ext include From 9c2b44d97ae9ec7f39cec584840d5a953d0a8fa7 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Fri, 22 May 2026 13:50:57 +0200 Subject: [PATCH 12/28] WIP --- noxfile.py | 2 + scripts/dace_deterministic_codegen.py | 123 ++++++++++++++++++++++---- 2 files changed, 108 insertions(+), 17 deletions(-) diff --git a/noxfile.py b/noxfile.py index 56a0f1f917..0ada065013 100755 --- a/noxfile.py +++ b/noxfile.py @@ -417,6 +417,7 @@ def _run_dace_determinism_check( "GT4PY_BUILD_CACHE_LIFETIME": "persistent", "DACE_compiler_build_folder_mode": "development", }, + success_codes=[0, 1, NO_TESTS_COLLECTED_EXIT_CODE], ) # Import the comparison library from scripts/. It uses only stdlib, @@ -437,6 +438,7 @@ def _run_dace_determinism_check( run2_dir / ".gt4py_cache", diffs_dir=workdir / "diffs", report_path=workdir / "report.txt", + tolerate_missing=True, ) except DeterminismError as e: session.error(f"{e}\nSee {workdir / 'report.txt'} and {workdir / 'diffs'}/") diff --git a/scripts/dace_deterministic_codegen.py b/scripts/dace_deterministic_codegen.py index 3e2140f311..31a4f60ddf 100644 --- a/scripts/dace_deterministic_codegen.py +++ b/scripts/dace_deterministic_codegen.py @@ -267,6 +267,28 @@ class ProgramResult: only_in_run1: list[str] only_in_run2: list[str] + @property + def missing_on_one_side(self) -> bool: + """True iff the program was cached in only one of the two runs. + + Distinguished from 'differs by content' (where the program is in + both runs but at least one file's bytes differ) — the latter is + always a determinism failure, the former is often a flaky-test + artifact and can be tolerated via ``tolerate_missing``. + + The compare() loop populates only_in_runN exhaustively with the + absent side's file list ONLY when the whole program is missing; + when both programs are present but one happens to carry an extra + file, only_in_runN contains only that extra file. We distinguish + the two by requiring exactly one side to be wholly empty (which + is what compare() emits for the missing-program case). + """ + return ( + not self.match + and not self.differing_files + and (bool(self.only_in_run1) ^ bool(self.only_in_run2)) + ) + def compare( snap1: dict[str, ProgramSnapshot], @@ -352,14 +374,27 @@ def write_diffs( (prog_dir / f"{rel.replace('/', '__')}.diff").write_text(udiff) -def render_report(results: list[ProgramResult]) -> str: +def render_report(results: list[ProgramResult], *, tolerate_missing: bool = False) -> str: n_total = len(results) - n_match = sum(1 for r in results if r.match) - n_diff = n_total - n_match + n_missing = sum(1 for r in results if r.missing_on_one_side) + n_diff_content = sum( + 1 for r in results if r.differing_files or (not r.match and not r.missing_on_one_side) + ) + n_match = n_total - n_missing - n_diff_content - lines = [f"Programs: {n_total} matches: {n_match} mismatches: {n_diff}", ""] + header = ( + f"Programs: {n_total} matches: {n_match} " + f"differs: {n_diff_content} only-in-one-run: {n_missing}" + ) + lines = [header, ""] for r in results: - lines.append(f" [{'MATCH ' if r.match else 'DIFFER'}] {r.name}") + if r.match: + tag = "MATCH " + elif r.missing_on_one_side: + tag = "ONE-OF" + else: + tag = "DIFFER" + lines.append(f" [{tag}] {r.name}") if not r.match: lines.extend(f" differs: {rel}" for rel in r.differing_files) lines.extend(f" only in run1: {rel}" for rel in r.only_in_run1) @@ -368,10 +403,19 @@ def render_report(results: list[ProgramResult]) -> str: lines.append("") if n_total == 0: lines.append("RESULT: no programs observed (nothing was cached).") - elif n_diff == 0: + elif n_diff_content == 0 and n_missing == 0: lines.append(f"RESULT: codegen deterministic — {n_match} program(s) match.") + elif n_diff_content == 0 and tolerate_missing: + lines.append( + f"RESULT: codegen deterministic across the {n_match} shared program(s); " + f"{n_missing} program(s) cached in only one run (tolerated)." + ) else: - lines.append(f"RESULT: NON-DETERMINISTIC CODEGEN — {n_diff}/{n_total} program(s) differ.") + suffix = f" (plus {n_missing} cached in only one run)" if n_missing else "" + lines.append( + f"RESULT: NON-DETERMINISTIC CODEGEN — {n_diff_content}/{n_total} " + f"program(s) differ by content{suffix}." + ) return "\n".join(lines) + "\n" @@ -386,6 +430,7 @@ def check_determinism( *, diffs_dir: Path | None = None, report_path: Path | None = None, + tolerate_missing: bool = True, ) -> list[ProgramResult]: """Compare two gt4py caches; write artifacts; raise on mismatch. @@ -396,13 +441,31 @@ def check_determinism( Returns the list of :class:`ProgramResult` on a successful match. + The ``tolerate_missing`` parameter controls how programs that ended + up cached in only one of the two runs are treated: + + * ``True`` (default, lenient) — only programs that are in BOTH caches but + whose source files differ byte-for-byte trigger + :class:`DeterminismError`. Programs cached on only one side are + still itemized in the report (so you can investigate) but do not + cause the check to fail. + * ``False`` (strict) — any program missing from one side + is a determinism failure, on the theory that test selection + should be deterministic and a missing program signals real + non-determinism somewhere in the toolchain. + Raises: UnsupportedBackendError: A snapshot contained a backend other than cpu/cuda. NoProgramsObservedError: Both caches were empty — likely zero tests collected. + NoSourceFilesObservedError: + Programs were cached but no source files survived + (usually a missing ``DACE_compiler_build_folder_mode=development``). DeterminismError: - One or more programs differed between the two runs. + One or more programs differed between the two runs. Under + ``tolerate_missing=True`` this requires at least one + *content* difference. """ snap1 = snapshot_run(cache1) snap2 = snapshot_run(cache2) @@ -412,7 +475,7 @@ def check_determinism( write_diffs(results, snap1, snap2, diffs_dir) if report_path is not None: report_path.parent.mkdir(parents=True, exist_ok=True) - report_path.write_text(render_report(results)) + report_path.write_text(render_report(results, tolerate_missing=tolerate_missing)) if not results: diag1 = _diagnose_empty_cache(cache1) @@ -442,12 +505,25 @@ def check_determinism( f"survive into the cache." ) - n_diff = sum(1 for r in results if not r.match) - if n_diff > 0: - raise DeterminismError( - f"DaCe codegen is non-deterministic: {n_diff}/{len(results)} program(s) differ", - results, - ) + # Count true differs (program in both runs, content differs) and missing + # (program only in one run). Under tolerate_missing, only true differs + # raise; under strict mode, both do. + n_true_differs = sum( + 1 for r in results if r.differing_files or (not r.missing_on_one_side and not r.match) + ) + n_missing = sum(1 for r in results if r.missing_on_one_side) + n_failed = n_true_differs if tolerate_missing else (n_true_differs + n_missing) + + if n_failed > 0: + if tolerate_missing: + msg = ( + f"DaCe codegen is non-deterministic: {n_true_differs}/{len(results)} " + f"program(s) differ by content (plus {n_missing} cached in only one " + f"run, ignored under tolerate_missing)" + ) + else: + msg = f"DaCe codegen is non-deterministic: {n_failed}/{len(results)} program(s) differ" + raise DeterminismError(msg, results) return results @@ -493,6 +569,18 @@ def parse_args(argv: list[str] | None = None) -> argparse.Namespace: metavar="PATH", help="If set, write the human-readable summary report to this file.", ) + p.add_argument( + "--tolerate-missing", + action=argparse.BooleanOptionalAction, + default=True, + help=( + "Whether to skip programs cached in only one of the two runs. " + "Default: lenient — only content differences in shared programs " + "raise. Pass --no-tolerate-missing for strict mode, where any " + "program absent from one cache also counts as a determinism " + "failure." + ), + ) return p.parse_args(argv) @@ -505,6 +593,7 @@ def main(argv: list[str] | None = None) -> int: args.run2.expanduser().resolve(), diffs_dir=args.diffs_dir.expanduser().resolve() if args.diffs_dir else None, report_path=args.report.expanduser().resolve() if args.report else None, + tolerate_missing=args.tolerate_missing, ) except UnsupportedBackendError as e: print(f"error: {e}", file=sys.stderr) @@ -516,11 +605,11 @@ def main(argv: list[str] | None = None) -> int: print(f"error: {e}", file=sys.stderr) return 2 except DeterminismError as e: - print(render_report(e.results)) + print(render_report(e.results, tolerate_missing=args.tolerate_missing)) print(f"error: {e}", file=sys.stderr) return 1 - print(render_report(results)) + print(render_report(results, tolerate_missing=args.tolerate_missing)) return 0 From c6632a1986739de9a1de418e58fb5b66d255dd29 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Fri, 22 May 2026 14:23:46 +0200 Subject: [PATCH 13/28] Infrastructure to test DaCe's codegen (in)deterministic behavior: Fixing cartesian(cpu) --- noxfile.py | 114 +++++++-- scripts/dace_deterministic_codegen.py | 342 +++++++++++++++++++++----- 2 files changed, 368 insertions(+), 88 deletions(-) diff --git a/noxfile.py b/noxfile.py index 0ada065013..3d4408b649 100755 --- a/noxfile.py +++ b/noxfile.py @@ -376,10 +376,28 @@ def test_typing_exports(session: nox.Session) -> None: def _run_dace_determinism_check( session: nox.Session, pytest_args: Sequence[str], + *, + layout: Literal["next", "cartesian"], ) -> None: - """Run pytest twice with isolated GT4PY_BUILD_CACHE_DIR and verify the + """Run pytest twice with an isolated cache per run, then verify the DaCe-generated source files are byte-identical between the two runs. + The ``layout`` parameter selects which cache mechanism gt4py is using: + + * ``"next"`` — sets ``GT4PY_BUILD_CACHE_DIR=`` so the cache + lands at ``/.gt4py_cache/``, where the comparator walks + ``_/src/{cpu,cuda}/...``. + * ``"cartesian"`` — sets ``GT_CACHE_ROOT=`` plus + ``GT_CACHE_PYTEST_DIR=/gt_cache`` AND passes + ``--keep-gtcache`` to pytest. The conftest in + ``tests/cartesian_tests/conftest.py`` unconditionally + ``shutil.rmtree``\\ s its cache directory at ``pytest_sessionfinish`` + unless that CLI flag is present — that gating is independent of the + env vars, so we need both knobs. The comparator then walks + ``/gt_cache/py_// + /__/`` and compares + ``m_*.py`` + ``bindings.{cpp,cu}`` + ``computation.hpp``. + On mismatch, calls ``session.error(...)`` with a pointer to the diffs/ directory and report.txt so the failure is actionable. """ @@ -392,31 +410,71 @@ def _run_dace_determinism_check( run1_dir.mkdir(parents=True) run2_dir.mkdir(parents=True) - # gt4py appends `.gt4py_cache` to GT4PY_BUILD_CACHE_DIR, so we pass - # the parent directory and the cache lands at .gt4py_cache/ underneath. - # Setting GT4PY_BUILD_CACHE_LIFETIME to `persistent` keeps the cache - # around long enough for the snapshot pass to read it. + # Per-layout knobs: + # - cache_subdir: the subdirectory of run_dir where the cache lands + # - extra_pytest: additional pytest CLI args (cartesian needs + # --keep-gtcache; see conftest in + # tests/cartesian_tests/conftest.py:pytest_sessionfinish) + # - env_for_run: env-var overrides for the pytest subprocess # - # Setting DACE_compiler_build_folder_mode to `development` is REQUIRED. - # gt4py configures dace to `production` mode by default, which cleans - # up the dace build folder after compilation — leaving only the - # compiled .so/.dylib and stripping the src/ tree we need to - # diff. Forcing `development` keeps src/cpu/*.cpp and - # src/cuda/*.cu around so the checker has codegen to compare. - # (See src/gt4py/next/program_processors/runners/dace/workflow/ - # common.py for the upstream config this overrides; the - # comment there explicitly documents this env var as the escape - # hatch.) + # Setting DACE_compiler_build_folder_mode to `development` is REQUIRED for + # both layouts. gt4py configures dace to `production` mode by default, + # which cleans up the dace build folder after compilation — leaving only + # the compiled .so and stripping the codegen sources we need to diff. + # Forcing `development` keeps `src/...` (next) and `bindings.{cpp,cu}` + + # `computation.hpp` (cartesian) around so the checker has codegen to + # compare. (See src/gt4py/next/program_processors/runners/dace/workflow/ + # common.py for the upstream next-side config this overrides; the + # comment there explicitly documents this env var as the escape hatch.) + if layout == "cartesian": + cache_subdir = "gt_cache" + extra_pytest_args: list[str] = ["--keep-gtcache"] + + def env_for_run(run_dir: pathlib.Path) -> dict[str, str]: + # gt4py.cartesian and gt4py.next have entirely separate caching + # subsystems with separate env vars. cartesian uses + # GT_CACHE_ROOT (the `root_path` for cache_settings) and + # GT_CACHE_PYTEST_DIR (which the conftest writes into + # cache_settings["dir_name"]). Both required to isolate the + # cache per run; --keep-gtcache is required for it to survive + # pytest_sessionfinish. + return { + "GT_CACHE_ROOT": str(run_dir), + "GT_CACHE_PYTEST_DIR": str(run_dir / cache_subdir), + "DACE_compiler_build_folder_mode": "development", + } + else: + cache_subdir = ".gt4py_cache" + extra_pytest_args = [] + + def env_for_run(run_dir: pathlib.Path) -> dict[str, str]: + # gt4py.next appends `.gt4py_cache` to GT4PY_BUILD_CACHE_DIR, so + # we pass the parent directory and the cache lands at + # .gt4py_cache/ underneath. Setting GT4PY_BUILD_CACHE_LIFETIME + # to `persistent` keeps the cache around long enough for the + # snapshot pass to read it. + return { + "GT4PY_BUILD_CACHE_DIR": str(run_dir), + "GT4PY_BUILD_CACHE_LIFETIME": "persistent", + "DACE_compiler_build_folder_mode": "development", + } + for run_dir in (run1_dir, run2_dir): session.run( *pytest_args, + *extra_pytest_args, *session.posargs, - env=session.env - | { - "GT4PY_BUILD_CACHE_DIR": str(run_dir), - "GT4PY_BUILD_CACHE_LIFETIME": "persistent", - "DACE_compiler_build_folder_mode": "development", - }, + env=session.env | env_for_run(run_dir), + # The determinism check cares only about whether the DaCe + # codegen lands deterministically in the cache; individual + # test outcomes are irrelevant. Failed tests (exit code 1) + # often reflect runtime issues that have nothing to do with + # codegen — e.g., GPU contention from pytest-xdist workers + # racing for a single CUDA context on Santis, producing + # spurious cupy OutOfMemoryErrors. As long as SOME programs + # got cached, the comparator (called below with + # tolerate_missing=True) extracts the determinism signal from + # whatever overlap is present. success_codes=[0, 1, NO_TESTS_COLLECTED_EXIT_CODE], ) @@ -434,11 +492,15 @@ def _run_dace_determinism_check( try: check_determinism( - run1_dir / ".gt4py_cache", - run2_dir / ".gt4py_cache", + run1_dir / cache_subdir, + run2_dir / cache_subdir, diffs_dir=workdir / "diffs", report_path=workdir / "report.txt", + # Programs cached in only one run are reported but not + # counted as determinism failures — see the success_codes + # note above for why this is the right policy here. tolerate_missing=True, + layout=layout, ) except DeterminismError as e: session.error(f"{e}\nSee {workdir / 'report.txt'} and {workdir / 'diffs'}/") @@ -449,8 +511,8 @@ def _run_dace_determinism_check( except UnsupportedBackendError as e: session.error(str(e)) finally: - # Reclaim disk after the comparison. The two per-run gt4py caches - # are ~hundreds of MB each in development mode, and dace's own + # Reclaim disk after the comparison. The two per-run caches are + # ~hundreds of MB each in development mode, and dace's own # `.dacecache/` at the repo root (used for SDFGs not routed # through gt4py's build_folder override) is comparably bulky. # We always keep `workdir/diffs/` and `workdir/report.txt` — @@ -498,6 +560,7 @@ def test_cartesian_dace_determinism( f"{markers}", str(pathlib.Path("tests") / "cartesian_tests"), ], + layout="cartesian", ) @@ -548,6 +611,7 @@ def test_next_dace_determinism( f"{markers}", str(pathlib.Path("tests") / "next_tests"), ], + layout="next", ) diff --git a/scripts/dace_deterministic_codegen.py b/scripts/dace_deterministic_codegen.py index 31a4f60ddf..1f547959d0 100644 --- a/scripts/dace_deterministic_codegen.py +++ b/scripts/dace_deterministic_codegen.py @@ -15,13 +15,26 @@ ``noxfile.py``; also runnable standalone for ad-hoc comparison of two existing caches. -The check compares everything dace writes as generated source under -each cached program's ``src/`` — ``cpu/`` and ``cuda/`` (with HIP -picked up automatically under ``cuda/hip/``). It deliberately -ignores SDFGs, build artifacts, source maps, and runtime metadata. - -If a snapshot ever encounters a top-level backend other than cpu or -cuda, it errors with a clear message rather than silently skipping. +Supports both gt4py cache layouts: + +* ``layout="next"`` (default) — the ``gt4py.next`` cache, a flat + ``/_/src/{cpu,cuda}/...`` structure + written via ``GT4PY_BUILD_CACHE_DIR``. Compares everything dace + writes as generated source under each program's ``src/``. + Unknown top-level backends (anything other than cpu/cuda, with + HIP nesting under cuda/hip) raise :class:`UnsupportedBackendError`. + +* ``layout="cartesian"`` — the ``gt4py.cartesian`` cache, a deeply + nested ``/py_///__/...`` structure written via + ``GT_CACHE_ROOT`` + ``GT_CACHE_PYTEST_DIR`` (with the conftest's + ``--keep-gtcache`` flag needed to survive ``pytest_sessionfinish``). + Compares the top-level ``m_*.py`` loader plus ``bindings.{cpp,cu}`` + and ``computation.hpp`` under ``m_*_pyext_BUILD/``. Skips compiled + artifacts (``*.so``, ``*.o``, ``__pycache__/``), gzipped SDFG + archives (``*.sdfgz`` — gzip headers carry timestamps), the + metadata file (``*.cacheinfo``), and the recursive build mirror + directories (``_GT_/``, ``tmp/``) inside ``_pyext_BUILD/``. As a library ------------ @@ -33,7 +46,8 @@ check_determinism( cache1=Path(".../run1/.gt4py_cache"), cache2=Path(".../run2/.gt4py_cache"), - diffs_dir=Path(".../diffs"), # optional + layout="next", # or "cartesian" + diffs_dir=Path(".../diffs"), # optional report_path=Path(".../report.txt"), # optional ) @@ -41,8 +55,8 @@ if both caches are empty, ``NoSourceFilesObservedError`` if programs were cached but contain no source files (typically a missing ``DACE_compiler_build_folder_mode=development``), or -``UnsupportedBackendError`` if the codegen produced an unfamiliar -backend layout. +``UnsupportedBackendError`` if the next-layout codegen produced an +unfamiliar top-level backend. As a CLI -------- @@ -50,8 +64,9 @@ :: python scripts/dace_deterministic_codegen.py \\ - --run1 path/to/cache1/.gt4py_cache \\ - --run2 path/to/cache2/.gt4py_cache \\ + --run1 path/to/cache1 \\ + --run2 path/to/cache2 \\ + --layout {next,cartesian} \\ [--diffs-dir DIR] [--report FILE] Exit codes: @@ -71,6 +86,14 @@ import re import sys from pathlib import Path +from typing import Literal + + +#: Cache layout dispatch tag. ``"next"`` is the gt4py.next cache +#: (flat ``/_/src/...`` structure); ``"cartesian"`` +#: is the gt4py.cartesian cache (deeply nested ``/py_*/ +#: //__/...``). +Layout = Literal["next", "cartesian"] # --------------------------------------------------------------------------- @@ -106,6 +129,53 @@ SUPPORTED_BACKENDS: frozenset[str] = frozenset({"cpu", "cuda"}) +# Cartesian layout constants ------------------------------------------------ + +#: Suffix that marks the per-stencil build directory inside a cartesian +#: program folder, e.g. ``m_TestCopy_dacecpu_4__dacecpu_a8441f26b4_pyext_BUILD/``. +#: Inside that directory we look at the TOP LEVEL only — its ``_GT_/`` and +#: ``tmp/`` subdirectories contain recursive copies of the build path that +#: setuptools spawns when building into an absolute prefix, and those are +#: build artifacts, not codegen output. +CARTESIAN_BUILD_DIR_SUFFIX = "_pyext_BUILD" + +#: Names of files inside ``m_*_pyext_BUILD/`` whose contents we byte-compare. +#: ``bindings.{cpp,cu}`` is gt4py.cartesian's pybind11 wrapper around the +#: dace SDFG; ``computation.hpp`` is dace's generated kernel implementation. +#: Both reflect the codegen surface directly — a non-deterministic codegen +#: pass will show up here. +CARTESIAN_BUILD_SOURCE_NAMES: frozenset[str] = frozenset( + {"bindings.cpp", "bindings.cu", "computation.hpp"} +) + +#: Directory-name prefixes inside a program folder that we MUST NOT descend +#: into when searching for ``m_*.py`` loader stubs. ``__pycache__`` is +#: Python's bytecode cache; the build dir holds compiler-generated artifacts. +CARTESIAN_SKIP_DIRS: frozenset[str] = frozenset({"__pycache__"}) + +#: The 10-hex codegen digest gt4py.cartesian embeds in filenames like +#: ``m______.py`` and the build +#: directory ``m_...__pyext_BUILD/``. We replace it with the +#: literal ```` in the snapshot's relpath keys so that +#: ``bindings.cpp`` from run1 (digest ``a8441f26b4``) and from run2 +#: (digest ``bbbbbbbbbb``) map to the same path, surfacing a real +#: content diff rather than two "only-in-one-run" entries that look +#: like flaky test selection. +#: +#: The pattern matches ``_`` + 10 lowercase hex + a boundary that is +#: either a file extension (``.py``, ``.so``, ``.sdfgz``, ``.cacheinfo``) +#: or the literal ``_pyext_BUILD`` suffix. Anchoring on those endings +#: avoids false-positive matches inside arbitrary identifiers. +CARTESIAN_DIGEST_RE = re.compile(r"_(?P[0-9a-f]{10})(?=(\.|_pyext_BUILD))") + + +def _normalize_cartesian_relpath(relpath: str) -> str: + """Replace the 10-hex codegen digest in a cartesian relpath with the + literal token ````. Idempotent. Leaves non-matching paths + unchanged.""" + return CARTESIAN_DIGEST_RE.sub("_", relpath) + + # --------------------------------------------------------------------------- # Exceptions # --------------------------------------------------------------------------- @@ -145,8 +215,17 @@ def __init__(self, message: str, results: list[ProgramResult]) -> None: @dataclasses.dataclass(frozen=True) class FileEntry: + #: Logical relative path used as the comparison key. For ``layout="next"`` + #: this is the on-disk path under the program folder verbatim. For + #: ``layout="cartesian"`` the 10-hex codegen digest in filenames is + #: replaced with the literal token ```` so equivalent files + #: across two runs (which carry different digests) still pair up. relpath: str sha256: str + #: The actual filename on disk relative to the program folder. Equal to + #: ``relpath`` for next; un-normalized (real digest preserved) for + #: cartesian. Used by :func:`write_diffs` to read the file back. + disk_relpath: str @dataclasses.dataclass @@ -156,21 +235,36 @@ class ProgramSnapshot: files: dict[str, FileEntry] -def snapshot_run(cache_root: Path) -> dict[str, ProgramSnapshot]: +def snapshot_run(cache_root: Path, *, layout: Layout = "next") -> dict[str, ProgramSnapshot]: """Walk a gt4py build cache and snapshot every program's generated source. + Dispatches on ``layout`` to either :func:`_snapshot_run_next` (the flat + ``/_/src/...`` structure of gt4py.next) or + :func:`_snapshot_run_cartesian` (the deeply nested + ``/py_*///__/...`` + structure of gt4py.cartesian). + + Returns an empty dict (rather than raising) when the path doesn't + exist or contains no programs in the expected layout; callers can + pair the empty result with :func:`_diagnose_empty_cache` for a + human-readable explanation of why. + """ + if layout == "next": + return _snapshot_run_next(cache_root) + if layout == "cartesian": + return _snapshot_run_cartesian(cache_root) + raise ValueError(f"unknown layout: {layout!r}, expected 'next' or 'cartesian'") + + +def _snapshot_run_next(cache_root: Path) -> dict[str, ProgramSnapshot]: + """Snapshot a gt4py.next-layout cache. + The input directory's name is irrelevant — the function looks for immediate subdirectories matching ``_<64-char-hex-digest>`` - (gt4py's program-folder naming) and reads ``/src/`` + (gt4py.next's program-folder naming) and reads ``/src/`` recursively under each one. HIP files at ``src/cuda/hip/`` are picked up automatically by the recursive walk. - Returns an empty dict (rather than raising) when the path does not - exist, is not a directory, has no subdirectories, or has only - subdirectories whose names don't match the program-folder pattern. - Callers can use :func:`_diagnose_empty_cache` to get a human - description of which of those it was. - Raises :class:`UnsupportedBackendError` if any program's ``src/`` contains a top-level backend not in :data:`SUPPORTED_BACKENDS`. """ @@ -212,11 +306,80 @@ def snapshot_run(cache_root: Path) -> dict[str, ProgramSnapshot]: if not fpath.is_file(): continue rel = fpath.relative_to(folder).as_posix() - files[rel] = FileEntry(relpath=rel, sha256=_sha256(fpath)) + files[rel] = FileEntry(relpath=rel, sha256=_sha256(fpath), disk_relpath=rel) out[name] = ProgramSnapshot(name=name, folder=folder, files=files) return out +def _snapshot_run_cartesian(cache_root: Path) -> dict[str, ProgramSnapshot]: + """Snapshot a gt4py.cartesian-layout cache. + + Program identity is the **relative path** from ``cache_root`` to the + ``__`` folder, e.g. + ``py310_1013/dacecpu/cartesian_tests/integration_tests/multi_feature_tests + /test_suites/TestCopy_dacecpu_4``. Two runs of the same parametrized + test should produce the same relative path, so this works as a stable + matching key across runs. + + Files compared per program (everything else is skipped — see module + docstring for rationale): + + * ``m_*.py`` at the top of the program folder — the gt4py loader + stub. Its filename embeds the 10-hex codegen digest, and the file + body references it; either changing is a determinism signal. + * Files exactly one level inside ``m_*_pyext_BUILD/`` whose basename + is in :data:`CARTESIAN_BUILD_SOURCE_NAMES` (``bindings.cpp``, + ``bindings.cu``, ``computation.hpp``). The ``_GT_/`` and ``tmp/`` + subdirectories of the build dir are recursive build-path mirrors + that setuptools creates when targeting an absolute prefix — they + contain object files and duplicated outputs, not codegen. + """ + if not cache_root.is_dir(): + return {} + + # Discover program folders by finding every top-level `m_*.py` loader. + # "Top-level" here means: not under __pycache__ and not under any + # *_pyext_BUILD directory (which contains its own copies of generated + # files we don't want). + program_dirs: set[Path] = set() + for py in cache_root.rglob("m_*.py"): + if not py.is_file(): + continue + parts = py.relative_to(cache_root).parts + # Reject if any ancestor is __pycache__ or any *_pyext_BUILD dir + if any( + p in CARTESIAN_SKIP_DIRS or p.endswith(CARTESIAN_BUILD_DIR_SUFFIX) for p in parts[:-1] + ): + continue + program_dirs.add(py.parent) + + out: dict[str, ProgramSnapshot] = {} + for prog_dir in sorted(program_dirs): + program_id = prog_dir.relative_to(cache_root).as_posix() + + files: dict[str, FileEntry] = {} + + # Top-level m_*.py file(s) — the gt4py loader stub(s). + for f in sorted(prog_dir.glob("m_*.py")): + if f.is_file(): + rel = _normalize_cartesian_relpath(f.name) + files[rel] = FileEntry(relpath=rel, sha256=_sha256(f), disk_relpath=f.name) + + # Files DIRECTLY under any m_*_pyext_BUILD/ — iterdir, not rglob, + # so we don't descend into _GT_/ or tmp/ which carry build artifacts. + for build_dir in sorted(prog_dir.glob(f"m_*{CARTESIAN_BUILD_DIR_SUFFIX}")): + if not build_dir.is_dir(): + continue + for f in sorted(build_dir.iterdir()): + if f.is_file() and f.name in CARTESIAN_BUILD_SOURCE_NAMES: + disk_rel = f"{build_dir.name}/{f.name}" + rel = _normalize_cartesian_relpath(disk_rel) + files[rel] = FileEntry(relpath=rel, sha256=_sha256(f), disk_relpath=disk_rel) + + out[program_id] = ProgramSnapshot(name=program_id, folder=prog_dir, files=files) + return out + + def _sha256(path: Path) -> str: h = hashlib.sha256() with path.open("rb") as f: @@ -225,19 +388,39 @@ def _sha256(path: Path) -> str: return h.hexdigest() -def _diagnose_empty_cache(cache_root: Path) -> str: - """Return a one-line explanation of why :func:`snapshot_run` found nothing. - - Distinguishes "path didn't exist", "path is a file", "directory has - no subdirectories", and "directory has subdirectories but none match - the program-folder pattern" (with a sample of the names found, so a - wrong-path mistake — e.g. passing the parent of ``.gt4py_cache`` — - is immediately visible). - """ +def _diagnose_empty_cache(cache_root: Path, *, layout: Layout = "next") -> str: + """Return a one-line explanation of why :func:`snapshot_run` found nothing.""" if not cache_root.exists(): return "path does not exist" if not cache_root.is_dir(): return "path exists but is not a directory" + + if layout == "cartesian": + # For cartesian we expect /py_//... + # If there's no py*_*/, the user likely passed the wrong path + # (e.g. the run_dir instead of run_dir/gt_cache). + subdirs = sorted(p for p in cache_root.iterdir() if p.is_dir()) + if not subdirs: + return "directory has no subdirectories" + pyver_dirs = [p for p in subdirs if re.match(r"^py\d+_\d+$", p.name)] + if not pyver_dirs: + sample_names = [p.name for p in subdirs[:3]] + suffix = f" (and {len(subdirs) - 3} more)" if len(subdirs) > 3 else "" + return ( + f"directory contains {len(subdirs)} subdirectory(ies) but none " + f"match cartesian's per-Python-version pattern `py_/` " + f"(saw: {sample_names}{suffix}). Did you pass the cache root, " + f"or its parent?" + ) + # py*/ exists but no m_*.py loader stubs were found anywhere + return ( + "cartesian cache structure present but contains no `m_*.py` loader " + "stubs at any depth — pytest probably collected zero tests, or the " + "conftest's `--keep-gtcache` flag wasn't passed and the cache was " + "wiped at session teardown." + ) + + # Fall through to the next-layout diagnostic subdirs = sorted(p for p in cache_root.iterdir() if p.is_dir()) if not subdirs: return "directory has no subdirectories" @@ -348,8 +531,13 @@ def write_diffs( s1, s2 = snap1.get(r.name), snap2.get(r.name) prog_dir = diffs_dir / r.name for rel in r.differing_files: - f1 = (s1.folder / rel) if s1 else None - f2 = (s2.folder / rel) if s2 else None + # `rel` is the canonical (normalized) key; the on-disk filename + # may differ from it (cartesian normalizes the 10-hex digest). + # Look up the per-side FileEntry to recover the real path. + e1 = s1.files.get(rel) if s1 else None + e2 = s2.files.get(rel) if s2 else None + f1 = (s1.folder / e1.disk_relpath) if (s1 and e1) else None + f2 = (s2.folder / e2.disk_relpath) if (s2 and e2) else None if not (f1 and f2 and f1.exists() and f2.exists()): continue try: @@ -431,44 +619,48 @@ def check_determinism( diffs_dir: Path | None = None, report_path: Path | None = None, tolerate_missing: bool = True, + layout: Layout = "next", ) -> list[ProgramResult]: """Compare two gt4py caches; write artifacts; raise on mismatch. - Snapshots both caches (under ``/src/`` recursively) and + Snapshots both caches (using the ``layout``-specific walker) and diffs them. Optionally writes per-file unified diffs to ``diffs_dir//`` and a human-readable summary to ``report_path``. Returns the list of :class:`ProgramResult` on a successful match. - The ``tolerate_missing`` parameter controls how programs that ended - up cached in only one of the two runs are treated: - - * ``True`` (default, lenient) — only programs that are in BOTH caches but - whose source files differ byte-for-byte trigger - :class:`DeterminismError`. Programs cached on only one side are - still itemized in the report (so you can investigate) but do not - cause the check to fail. - * ``False`` (strict) — any program missing from one side - is a determinism failure, on the theory that test selection - should be deterministic and a missing program signals real - non-determinism somewhere in the toolchain. + Parameters + ---------- + cache1, cache2 + Roots of the two caches to compare. For ``layout="next"``, this + is the ``.gt4py_cache/`` directory (i.e. the parent of all the + ``_/`` program folders). For ``layout="cartesian"``, + this is the directory pointed to by ``GT_CACHE_PYTEST_DIR`` (i.e. + the parent of ``py_/``). + layout + Which cache layout to expect. See module docstring for details. + tolerate_missing + See module docstring. Default ``True`` (lenient). + diffs_dir, report_path + If set, persist diagnostic artifacts. Raises: UnsupportedBackendError: - A snapshot contained a backend other than cpu/cuda. + (next layout only) A snapshot contained a backend other than cpu/cuda. NoProgramsObservedError: - Both caches were empty — likely zero tests collected. + Both caches were empty — likely zero tests collected or the + cache was wiped at teardown. NoSourceFilesObservedError: - Programs were cached but no source files survived - (usually a missing ``DACE_compiler_build_folder_mode=development``). + Programs were cached but no source files survived (usually a + missing ``DACE_compiler_build_folder_mode=development``). DeterminismError: One or more programs differed between the two runs. Under ``tolerate_missing=True`` this requires at least one *content* difference. """ - snap1 = snapshot_run(cache1) - snap2 = snapshot_run(cache2) + snap1 = snapshot_run(cache1, layout=layout) + snap2 = snapshot_run(cache2, layout=layout) results = compare(snap1, snap2) if diffs_dir is not None: @@ -478,8 +670,8 @@ def check_determinism( report_path.write_text(render_report(results, tolerate_missing=tolerate_missing)) if not results: - diag1 = _diagnose_empty_cache(cache1) - diag2 = _diagnose_empty_cache(cache2) + diag1 = _diagnose_empty_cache(cache1, layout=layout) + diag2 = _diagnose_empty_cache(cache2, layout=layout) raise NoProgramsObservedError( "no programs observed in either cache:\n" f" run1 ({cache1}): {diag1}\n" @@ -487,7 +679,7 @@ def check_determinism( ) # Safety net for the silent-false-positive case where both runs cached - # programs but every program's src/ tree is empty — typically because + # programs but every program's source tree is empty — typically because # dace's build_folder_mode is `production` (the gt4py default). Without # this, the comparator would see {} == {} for every program and report # `deterministic` despite there being nothing to compare. @@ -495,14 +687,20 @@ def check_determinism( len(s.files) for s in snap2.values() ) if total_files == 0: + if layout == "cartesian": + hint = ( + "programs were cached but contain none of `m_*.py`, " + "`bindings.{cpp,cu}`, or `computation.hpp`" + ) + else: + hint = "none of them contain any source files under src/" raise NoSourceFilesObservedError( - f"{len(results)} program(s) cached, but none of them contain any " - f"source files under src/. This almost always means dace's build " - f"folder mode is `production` rather than `development`, which " - f"strips the codegen output after compilation. Set " - f"DACE_compiler_build_folder_mode=development (lowercase matters) " - f"before running the tests so src/cpu/*.cpp and src/cuda/*.cu " - f"survive into the cache." + f"{len(results)} program(s) cached, but {hint}. This almost " + f"always means dace's build folder mode is `production` rather " + f"than `development`, which strips the codegen output after " + f"compilation. Set DACE_compiler_build_folder_mode=development " + f"(lowercase matters) before running the tests so the codegen " + f"survives into the cache." ) # Count true differs (program in both runs, content differs) and missing @@ -546,14 +744,31 @@ def parse_args(argv: list[str] | None = None) -> argparse.Namespace: required=True, type=Path, metavar="PATH", - help="Path to the first .gt4py_cache directory.", + help=( + "Path to the first cache root. For --layout next, this is the " + ".gt4py_cache/ directory. For --layout cartesian, this is the " + "directory that GT_CACHE_PYTEST_DIR pointed to (the parent of " + "py_/)." + ), ) p.add_argument( "--run2", required=True, type=Path, metavar="PATH", - help="Path to the second .gt4py_cache directory.", + help="Path to the second cache root. Same conventions as --run1.", + ) + p.add_argument( + "--layout", + choices=["next", "cartesian"], + default="next", + help=( + "Cache layout. `next` is gt4py.next's flat " + "/_/src/... structure (default). `cartesian` " + "is gt4py.cartesian's deeply nested " + "/py_///__/ " + "structure." + ), ) p.add_argument( "--diffs-dir", @@ -594,6 +809,7 @@ def main(argv: list[str] | None = None) -> int: diffs_dir=args.diffs_dir.expanduser().resolve() if args.diffs_dir else None, report_path=args.report.expanduser().resolve() if args.report else None, tolerate_missing=args.tolerate_missing, + layout=args.layout, ) except UnsupportedBackendError as e: print(f"error: {e}", file=sys.stderr) From 5f818a30eafc3e9b50d951f44dc3cf458578f094 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Fri, 22 May 2026 15:24:39 +0200 Subject: [PATCH 14/28] Infrastructure to test DaCe's codegen (in)deterministic behavior: Fixing CI/CD (artifacts) --- ci/cscs-ci-dace-determinism.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ci/cscs-ci-dace-determinism.yml b/ci/cscs-ci-dace-determinism.yml index 86a89f374c..21382ad8e2 100644 --- a/ci/cscs-ci-dace-determinism.yml +++ b/ci/cscs-ci-dace-determinism.yml @@ -108,6 +108,8 @@ build_cscs_amd_rocm: - mkdir -p "${WORKDIR}/gt4py" && git clone --depth 1 "${CSCS_CI_ORIG_CLONE_URL}" "${WORKDIR}/gt4py" - cd "${WORKDIR}/gt4py" && git fetch --depth 1 origin "${CI_COMMIT_SHA}" && git checkout "${CI_COMMIT_SHA}" - cd "${WORKDIR}/gt4py" && ./noxfile.py -s "${NOX_SESSION}" + after_script: + - cp -r "${WORKDIR}/gt4py/_dace_deterministic_codegen" "${CI_PROJECT_DIR}/" 2>/dev/null || true dace_determinism_cscs_gh200_cuda: extends: From f7a4c44ccc52be7db03033d18306792122daddb3 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Tue, 26 May 2026 05:41:15 +0200 Subject: [PATCH 15/28] Infrastructure to test DaCe's codegen (in)deterministic behavior: Fixing CI/CD (artifacts) --- ci/cscs-ci-dace-determinism.yml | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/ci/cscs-ci-dace-determinism.yml b/ci/cscs-ci-dace-determinism.yml index 21382ad8e2..152f0a42a2 100644 --- a/ci/cscs-ci-dace-determinism.yml +++ b/ci/cscs-ci-dace-determinism.yml @@ -107,9 +107,14 @@ build_cscs_amd_rocm: script: - mkdir -p "${WORKDIR}/gt4py" && git clone --depth 1 "${CSCS_CI_ORIG_CLONE_URL}" "${WORKDIR}/gt4py" - cd "${WORKDIR}/gt4py" && git fetch --depth 1 origin "${CI_COMMIT_SHA}" && git checkout "${CI_COMMIT_SHA}" - - cd "${WORKDIR}/gt4py" && ./noxfile.py -s "${NOX_SESSION}" - after_script: - - cp -r "${WORKDIR}/gt4py/_dace_deterministic_codegen" "${CI_PROJECT_DIR}/" 2>/dev/null || true + - | + cd "${WORKDIR}/gt4py" + ./noxfile.py -s "${NOX_SESSION}" + nox_rc=$? + if [ -d _dace_deterministic_codegen ]; then + cp -r _dace_deterministic_codegen "${CI_PROJECT_DIR}/" + fi + exit $nox_rc dace_determinism_cscs_gh200_cuda: extends: @@ -167,7 +172,7 @@ dace_determinism_cscs_amd_rocm: CMAKE_PREFIX_PATH: /opt/rocm CUDA_HOME: /opt/rocm CXX: /opt/rocm/bin/hipcc - SLURM_TIMELIMIT: 40 + SLURM_TIMELIMIT: 60 parallel: matrix: - NOX_SESSION: From 5e8dedbc092e3f3cf4513058257dc9020f8f1fce Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Tue, 26 May 2026 06:31:34 +0200 Subject: [PATCH 16/28] Infrastructure to test DaCe's codegen (in)deterministic behavior: Fixing CI/CD (artifacts) --- ci/cscs-ci-dace-determinism.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/ci/cscs-ci-dace-determinism.yml b/ci/cscs-ci-dace-determinism.yml index 152f0a42a2..6cac476af4 100644 --- a/ci/cscs-ci-dace-determinism.yml +++ b/ci/cscs-ci-dace-determinism.yml @@ -97,7 +97,7 @@ build_cscs_amd_rocm: variables: PY_VERSION: '3.10' SLURM_JOB_NUM_NODES: 1 - SLURM_TIMELIMIT: 30 + SLURM_TIMELIMIT: 60 allow_failure: true artifacts: when: always @@ -109,8 +109,8 @@ build_cscs_amd_rocm: - cd "${WORKDIR}/gt4py" && git fetch --depth 1 origin "${CI_COMMIT_SHA}" && git checkout "${CI_COMMIT_SHA}" - | cd "${WORKDIR}/gt4py" - ./noxfile.py -s "${NOX_SESSION}" - nox_rc=$? + nox_rc=0 + ./noxfile.py -s "${NOX_SESSION}" || nox_rc=$? if [ -d _dace_deterministic_codegen ]; then cp -r _dace_deterministic_codegen "${CI_PROJECT_DIR}/" fi @@ -172,7 +172,6 @@ dace_determinism_cscs_amd_rocm: CMAKE_PREFIX_PATH: /opt/rocm CUDA_HOME: /opt/rocm CXX: /opt/rocm/bin/hipcc - SLURM_TIMELIMIT: 60 parallel: matrix: - NOX_SESSION: From 9cb5b5f44d221a4f1f2a4b2baab42497596427df Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Tue, 26 May 2026 10:49:20 +0200 Subject: [PATCH 17/28] Infrastructure to test DaCe's codegen (in)deterministic behavior: Fixing CI/CD (artifacts) --- ci/cscs-ci-dace-determinism.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ci/cscs-ci-dace-determinism.yml b/ci/cscs-ci-dace-determinism.yml index 6cac476af4..cc3da00df8 100644 --- a/ci/cscs-ci-dace-determinism.yml +++ b/ci/cscs-ci-dace-determinism.yml @@ -96,6 +96,7 @@ build_cscs_amd_rocm: image: ${CSCS_REGISTRY_PATH}/public/${ARCH}/base/gt4py-ci-${PY_VERSION}:${DOCKER_TAG} variables: PY_VERSION: '3.10' + USE_MPI: 0 SLURM_JOB_NUM_NODES: 1 SLURM_TIMELIMIT: 60 allow_failure: true @@ -130,6 +131,7 @@ dace_determinism_cscs_gh200_cuda: SLURM_PARTITION: 'shared' GT4PY_BUILD_JOBS: 8 PYTEST_XDIST_AUTO_NUM_WORKERS: 32 + SLURM_TIMELIMIT: 90 parallel: matrix: - NOX_SESSION: @@ -149,6 +151,7 @@ dace_determinism_cscs_gh200_cpu: SLURM_PARTITION: 'shared' GT4PY_BUILD_JOBS: 8 PYTEST_XDIST_AUTO_NUM_WORKERS: 32 + SLURM_TIMELIMIT: 90 parallel: matrix: - NOX_SESSION: From e119731017e8cd07621c867d33708568e4428d32 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Tue, 26 May 2026 13:38:12 +0200 Subject: [PATCH 18/28] Infrastructure to test DaCe's codegen (in)deterministic behavior: Fixing CI/CD (time limit: WIP) --- ci/cscs-ci-dace-determinism.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/ci/cscs-ci-dace-determinism.yml b/ci/cscs-ci-dace-determinism.yml index cc3da00df8..5e4c05d896 100644 --- a/ci/cscs-ci-dace-determinism.yml +++ b/ci/cscs-ci-dace-determinism.yml @@ -131,7 +131,6 @@ dace_determinism_cscs_gh200_cuda: SLURM_PARTITION: 'shared' GT4PY_BUILD_JOBS: 8 PYTEST_XDIST_AUTO_NUM_WORKERS: 32 - SLURM_TIMELIMIT: 90 parallel: matrix: - NOX_SESSION: @@ -151,7 +150,6 @@ dace_determinism_cscs_gh200_cpu: SLURM_PARTITION: 'shared' GT4PY_BUILD_JOBS: 8 PYTEST_XDIST_AUTO_NUM_WORKERS: 32 - SLURM_TIMELIMIT: 90 parallel: matrix: - NOX_SESSION: From aa21bb38132cba18755f1cd726eead6a9c436e34 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Tue, 26 May 2026 14:09:05 +0200 Subject: [PATCH 19/28] Infrastructure to test DaCe's codegen (in)deterministic behavior: Fixing CI/CD (time limit: WIP) --- noxfile.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/noxfile.py b/noxfile.py index 3d4408b649..a44fec94c2 100755 --- a/noxfile.py +++ b/noxfile.py @@ -558,6 +558,7 @@ def test_cartesian_dace_determinism( *"pytest --cache-clear -sv -n auto --dist loadgroup".split(), "-m", f"{markers}", + "--ignore=tests/cartesian_tests/unit_tests/test_gtc/dace", str(pathlib.Path("tests") / "cartesian_tests"), ], layout="cartesian", @@ -609,6 +610,7 @@ def test_next_dace_determinism( *"pytest --cache-clear -sv -n auto --dist loadgroup".split(), "-m", f"{markers}", + "--ignore=tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/transformation_tests", str(pathlib.Path("tests") / "next_tests"), ], layout="next", From 714d383c3faa57fe2a86a53094db18a2aa1a8781 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Tue, 26 May 2026 15:18:45 +0200 Subject: [PATCH 20/28] Infrastructure to test DaCe's codegen (in)deterministic behavior: Fixing CI/CD (time limit: WIP) --- noxfile.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/noxfile.py b/noxfile.py index a44fec94c2..2a00724653 100755 --- a/noxfile.py +++ b/noxfile.py @@ -559,6 +559,7 @@ def test_cartesian_dace_determinism( "-m", f"{markers}", "--ignore=tests/cartesian_tests/unit_tests/test_gtc/dace", + "--ignore=tests/cartesian_tests/unit_tests/backend_tests/test_dace_backend.py", str(pathlib.Path("tests") / "cartesian_tests"), ], layout="cartesian", @@ -611,6 +612,11 @@ def test_next_dace_determinism( "-m", f"{markers}", "--ignore=tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/transformation_tests", + "--ignore=tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_gtir_to_sdfg.py", + "--ignore=tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_translation.py", + "--ignore=tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_domain.py", + "--ignore=tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_sdfg_callable.py", + "--ignore=tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_utils.py", str(pathlib.Path("tests") / "next_tests"), ], layout="next", From 83aa993eda1aaaa6151d5ebf23970a29ab76ee81 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Tue, 26 May 2026 17:09:06 +0200 Subject: [PATCH 21/28] Infrastructure to test DaCe's codegen (in)deterministic behavior: Fixing CI/CD (time limit: WIP) --- noxfile.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/noxfile.py b/noxfile.py index 2a00724653..d14aab05c7 100755 --- a/noxfile.py +++ b/noxfile.py @@ -558,9 +558,12 @@ def test_cartesian_dace_determinism( *"pytest --cache-clear -sv -n auto --dist loadgroup".split(), "-m", f"{markers}", - "--ignore=tests/cartesian_tests/unit_tests/test_gtc/dace", - "--ignore=tests/cartesian_tests/unit_tests/backend_tests/test_dace_backend.py", - str(pathlib.Path("tests") / "cartesian_tests"), + # Restrict to integration tests only. The dace-marked + # tests under unit_tests/ either build SDFGs by hand or + # exercise sub-stages of the dace backend in isolation, + # neither of which populates gt_cache/ in a way the + # determinism comparator can see. + str(pathlib.Path("tests") / "cartesian_tests" / "integration_tests"), ], layout="cartesian", ) @@ -611,13 +614,12 @@ def test_next_dace_determinism( *"pytest --cache-clear -sv -n auto --dist loadgroup".split(), "-m", f"{markers}", - "--ignore=tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/transformation_tests", - "--ignore=tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_gtir_to_sdfg.py", - "--ignore=tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_translation.py", - "--ignore=tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_domain.py", - "--ignore=tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_sdfg_callable.py", - "--ignore=tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_utils.py", - str(pathlib.Path("tests") / "next_tests"), + # Restrict to integration tests only. The dace-marked + # tests under unit_tests/ either build SDFGs by hand or + # exercise sub-stages of the dace backend in isolation, + # neither of which populates .gt4py_cache/ in a way the + # determinism comparator can see. + str(pathlib.Path("tests") / "next_tests" / "integration_tests"), ], layout="next", ) From 61e9d1f10661238afad37597b0e8fe2edd291b9e Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Wed, 27 May 2026 05:37:42 +0200 Subject: [PATCH 22/28] Infrastructure to test DaCe's codegen (in)deterministic behavior: Fixing CI/CD (time limit: WIP) --- noxfile.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/noxfile.py b/noxfile.py index d14aab05c7..a3f4cac209 100755 --- a/noxfile.py +++ b/noxfile.py @@ -511,6 +511,20 @@ def env_for_run(run_dir: pathlib.Path) -> dict[str, str]: except UnsupportedBackendError as e: session.error(str(e)) finally: + # Dump the determinism report to stdout so it lands in the CI + # log. report.txt itself only lives on the runner filesystem + # and isn't easy to retrieve after the job ends, but the CI + # log is. Printed on both success and failure so we always + # have a baseline of what programs were observed and which + # (if any) differed. The report is small — one line per + # program plus a short header — bounded by the test selection. + report_path = workdir / "report.txt" + if report_path.exists(): + banner = "=" * 70 + print(f"\n{banner}\ndeterminism report ({report_path}):\n{banner}") + print(report_path.read_text(), end="") + print(f"{banner}\n", flush=True) + # Reclaim disk after the comparison. The two per-run caches are # ~hundreds of MB each in development mode, and dace's own # `.dacecache/` at the repo root (used for SDFGs not routed @@ -558,12 +572,7 @@ def test_cartesian_dace_determinism( *"pytest --cache-clear -sv -n auto --dist loadgroup".split(), "-m", f"{markers}", - # Restrict to integration tests only. The dace-marked - # tests under unit_tests/ either build SDFGs by hand or - # exercise sub-stages of the dace backend in isolation, - # neither of which populates gt_cache/ in a way the - # determinism comparator can see. - str(pathlib.Path("tests") / "cartesian_tests" / "integration_tests"), + str(pathlib.Path("tests") / "cartesian_tests"), ], layout="cartesian", ) @@ -614,12 +623,7 @@ def test_next_dace_determinism( *"pytest --cache-clear -sv -n auto --dist loadgroup".split(), "-m", f"{markers}", - # Restrict to integration tests only. The dace-marked - # tests under unit_tests/ either build SDFGs by hand or - # exercise sub-stages of the dace backend in isolation, - # neither of which populates .gt4py_cache/ in a way the - # determinism comparator can see. - str(pathlib.Path("tests") / "next_tests" / "integration_tests"), + str(pathlib.Path("tests") / "next_tests"), ], layout="next", ) From d7ba1282bad757a468e576cd49a0123f515e2a3b Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Wed, 27 May 2026 14:57:48 +0200 Subject: [PATCH 23/28] Infrastructure to test DaCe's codegen (in)deterministic behavior: Fixing CI/CD --- ci/cscs-ci-dace-determinism.yml | 104 ++++++++++++++++---------------- noxfile.py | 14 ----- 2 files changed, 51 insertions(+), 67 deletions(-) diff --git a/ci/cscs-ci-dace-determinism.yml b/ci/cscs-ci-dace-determinism.yml index 5e4c05d896..61865176d1 100644 --- a/ci/cscs-ci-dace-determinism.yml +++ b/ci/cscs-ci-dace-determinism.yml @@ -20,10 +20,10 @@ # What it does # ------------ # Drives gt4py's `test_*_dace_determinism` nox sessions, each of which -# runs pytest twice with isolated GT4PY_BUILD_CACHE_DIR and asserts the -# DaCe-generated source files under /src/ are byte-identical -# between the two runs. A diff means the gt4py + dace toolchain is -# non-deterministic for that test selection. +# runs pytest twice with isolated build caches and asserts the +# DaCe-generated source files are byte-identical between the two runs. +# A diff means the gt4py + dace toolchain is non-deterministic for that +# test selection. # # Logic lives in: # noxfile.py (test_*_dace_determinism sessions) @@ -39,15 +39,13 @@ variables: UBUNTU_VERSION: '24.04' UV_VERSION: '0.11.2' +.test_python_versions: &test_python_versions ['3.10'] + stages: - build - test -# -- Build stage -------------------------------------------------------------- -# Mirrors `.build_common` in `cscs-ci.yml` and produces the same image tags -# (so the cached image from the default pipeline is reused when available). -# Both CUDA (for GH200) and ROCm (for AMD MI300) variants are built — the -# determinism matrix below covers cpu+cuda on GH200 and rocm7 on AMD. +# -- Build stage -- .build_common: stage: build extends: @@ -58,10 +56,11 @@ stages: DOCKERFILE: ci/Dockerfile DOCKER_BUILD_ARGS: '["BASE_IMAGE", "CACHE_DIR", "EXTRA_APTGET", "EXTRA_UV_ENV_VARS", "EXTRA_UV_PIP_ARGS", "EXTRA_UV_SYNC_ARGS", "PY_VERSION", "UV_VERSION", "WORKDIR_PATH" ]' PERSIST_IMAGE_NAME: ${CSCS_REGISTRY_PATH}/public/${ARCH}/base/gt4py-ci-${PY_VERSION} + WATCH_FILECHANGES: 'ci/Dockerfile ci/cscs-ci.yml ci/cscs-ci-ext-config.yml uv.lock' parallel: matrix: - - PY_VERSION: ['3.10'] + - PY_VERSION: *test_python_versions .build_extra_cuda: variables: @@ -90,13 +89,13 @@ build_cscs_amd_rocm: - .build_extra_rocm needs: [] -# -- Test stage: the determinism check ---------------------------------------- +# -- Test stage -- .dace_determinism_common: stage: test image: ${CSCS_REGISTRY_PATH}/public/${ARCH}/base/gt4py-ci-${PY_VERSION}:${DOCKER_TAG} variables: - PY_VERSION: '3.10' - USE_MPI: 0 + TEST_VARIANTS: 'cpu' + USE_MPI: 0 # TODO(havogt): to workaround the libfabric hook injecting incompatible libraries SLURM_JOB_NUM_NODES: 1 SLURM_TIMELIMIT: 60 allow_failure: true @@ -105,76 +104,75 @@ build_cscs_amd_rocm: paths: - _dace_deterministic_codegen/ expire_in: 1 month + parallel: + matrix: + - SUBPACKAGE: [cartesian] + SUBVARIANT: ['cuda12', 'rocm7', 'cpu'] + PY_VERSION: *test_python_versions + - SUBPACKAGE: [next] + SUBVARIANT: ['cuda12', 'rocm7', 'cpu'] + DETAIL: ["nomesh"] + PY_VERSION: *test_python_versions + rules: &exclude_variants_rules + - if: '$SUBVARIANT == "cpu" && ($TEST_VARIANTS !~ /(^|\s)cpu(\s|$)/)' + when: never + - if: '$SUBVARIANT == "cuda12" && ($TEST_VARIANTS !~ /(^|\s)cuda12(\s|$)/)' + when: never + - if: '$SUBVARIANT == "rocm7" && ($TEST_VARIANTS !~ /(^|\s)rocm7(\s|$)/)' + when: never + script: - mkdir -p "${WORKDIR}/gt4py" && git clone --depth 1 "${CSCS_CI_ORIG_CLONE_URL}" "${WORKDIR}/gt4py" - cd "${WORKDIR}/gt4py" && git fetch --depth 1 origin "${CI_COMMIT_SHA}" && git checkout "${CI_COMMIT_SHA}" + - export NOX_SESSION_ARGS="(${SUBVARIANT}${DETAIL:+, $DETAIL})" - | cd "${WORKDIR}/gt4py" nox_rc=0 - ./noxfile.py -s "${NOX_SESSION}" || nox_rc=$? + ./noxfile.py -s "test_${SUBPACKAGE}_dace_determinism-${PY_VERSION}${NOX_SESSION_ARGS}" || nox_rc=$? if [ -d _dace_deterministic_codegen ]; then cp -r _dace_deterministic_codegen "${CI_PROJECT_DIR}/" fi exit $nox_rc -dace_determinism_cscs_gh200_cuda: +dace_determinism_cscs_gh200: extends: - .container-runner-santis-gh200 - .dace_determinism_common needs: - - job: build_cscs_gh200 - parallel: - matrix: - - PY_VERSION: '3.10' + - build_cscs_gh200 variables: + TEST_VARIANTS: 'cpu cuda12' SLURM_GPUS_PER_NODE: 1 SLURM_PARTITION: 'shared' GT4PY_BUILD_JOBS: 8 PYTEST_XDIST_AUTO_NUM_WORKERS: 32 - parallel: - matrix: - - NOX_SESSION: - - "test_next_dace_determinism-3.10(cuda12, nomesh)" - - "test_cartesian_dace_determinism-3.10(cuda12)" - -dace_determinism_cscs_gh200_cpu: - extends: - - .container-runner-santis-gh200 - - .dace_determinism_common - needs: - - job: build_cscs_gh200 - parallel: - matrix: - - PY_VERSION: '3.10' - variables: - SLURM_PARTITION: 'shared' - GT4PY_BUILD_JOBS: 8 - PYTEST_XDIST_AUTO_NUM_WORKERS: 32 - parallel: - matrix: - - NOX_SESSION: - - "test_next_dace_determinism-3.10(cpu, nomesh)" - - "test_cartesian_dace_determinism-3.10(cpu)" + rules: + - *exclude_variants_rules + - if: $SUBPACKAGE == 'next' && $DETAIL == 'nomesh' + variables: + # TODO: investigate why the dace tests seem to hang with multiple jobs + GT4PY_BUILD_JOBS: 1 + - if: $SUBPACKAGE == 'cartesian' && $SUBVARIANT == 'cuda12' + variables: + GT4PY_BUILD_JOBS: 1 + - when: on_success dace_determinism_cscs_amd_rocm: extends: - .tds-container-runner-beverin-mi200 - .dace_determinism_common needs: - - job: build_cscs_amd_rocm - parallel: - matrix: - - PY_VERSION: '3.10' + - build_cscs_amd_rocm variables: + TEST_VARIANTS: 'rocm7' SLURM_GPUS_PER_NODE: 4 - SLURM_PARTITION: mi300 GT4PY_BUILD_JOBS: 8 PYTEST_XDIST_AUTO_NUM_WORKERS: 32 + SLURM_PARTITION: mi300 CMAKE_PREFIX_PATH: /opt/rocm CUDA_HOME: /opt/rocm CXX: /opt/rocm/bin/hipcc - parallel: - matrix: - - NOX_SESSION: - - "test_next_dace_determinism-3.10(rocm7, nomesh)" - - "test_cartesian_dace_determinism-3.10(rocm7)" + rules: + - *exclude_variants_rules + - when: on_success + diff --git a/noxfile.py b/noxfile.py index a3f4cac209..3d4408b649 100755 --- a/noxfile.py +++ b/noxfile.py @@ -511,20 +511,6 @@ def env_for_run(run_dir: pathlib.Path) -> dict[str, str]: except UnsupportedBackendError as e: session.error(str(e)) finally: - # Dump the determinism report to stdout so it lands in the CI - # log. report.txt itself only lives on the runner filesystem - # and isn't easy to retrieve after the job ends, but the CI - # log is. Printed on both success and failure so we always - # have a baseline of what programs were observed and which - # (if any) differed. The report is small — one line per - # program plus a short header — bounded by the test selection. - report_path = workdir / "report.txt" - if report_path.exists(): - banner = "=" * 70 - print(f"\n{banner}\ndeterminism report ({report_path}):\n{banner}") - print(report_path.read_text(), end="") - print(f"{banner}\n", flush=True) - # Reclaim disk after the comparison. The two per-run caches are # ~hundreds of MB each in development mode, and dace's own # `.dacecache/` at the repo root (used for SDFGs not routed From 2154dd03d13f25257a202d897dd89e37cdc33d35 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Wed, 27 May 2026 16:34:23 +0200 Subject: [PATCH 24/28] Infrastructure to test DaCe's codegen (in)deterministic behavior: Fixing CI/CD --- noxfile.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 3d4408b649..d95e7b12a6 100755 --- a/noxfile.py +++ b/noxfile.py @@ -552,10 +552,34 @@ def test_cartesian_dace_determinism( markers = " and ".join(codegen_settings["markers"] + device_settings["markers"]) + # gt4py.cartesian's setuptools-based compile path has been observed to + # deadlock under pytest-xdist's `-n auto` (=32 on a typical SLURM + # allocation) when used with the determinism check's double-pytest-pass + # workflow: pytest hangs silently inside `multi_feature_tests/ + # test_code_generation.py` or `test_dace_parsing.py` with no further + # output until SLURM SIGTERMs the job at the time limit. Symptom is + # consistent across cpu / cuda12 / rocm7 — i.e. it's compile-contention, + # not host-compiler-specific. + # + # Unlike gt4py.next, cartesian has no env var that bounds compile + # parallelism (`GT4PY_BUILD_JOBS` is only read in + # `src/gt4py/next/config.py`), so the only knob we have here is the + # xdist worker count itself. Capping at 4 by default keeps a healthy + # margin from the contention threshold. Raise via + # `GT4PY_CARTESIAN_DETERMINISM_XDIST` (e.g. set "8" or "auto" in CI) + # if a higher count is known-safe in the target environment. + xdist_workers = os.environ.get("GT4PY_CARTESIAN_DETERMINISM_XDIST", "4") + _run_dace_determinism_check( session, pytest_args=[ - *"pytest --cache-clear -sv -n auto --dist loadgroup".split(), + "pytest", + "--cache-clear", + "-sv", + "-n", + xdist_workers, + "--dist", + "loadgroup", "-m", f"{markers}", str(pathlib.Path("tests") / "cartesian_tests"), From 1f8a86831bd43a03562cfad6db759d59846d504d Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Wed, 27 May 2026 17:52:54 +0200 Subject: [PATCH 25/28] Infrastructure to test DaCe's codegen (in)deterministic behavior: Fixing CI/CD --- ci/cscs-ci-dace-determinism.yml | 12 ++++++++---- noxfile.py | 21 ++++----------------- 2 files changed, 12 insertions(+), 21 deletions(-) diff --git a/ci/cscs-ci-dace-determinism.yml b/ci/cscs-ci-dace-determinism.yml index 61865176d1..655684a5da 100644 --- a/ci/cscs-ci-dace-determinism.yml +++ b/ci/cscs-ci-dace-determinism.yml @@ -56,7 +56,6 @@ stages: DOCKERFILE: ci/Dockerfile DOCKER_BUILD_ARGS: '["BASE_IMAGE", "CACHE_DIR", "EXTRA_APTGET", "EXTRA_UV_ENV_VARS", "EXTRA_UV_PIP_ARGS", "EXTRA_UV_SYNC_ARGS", "PY_VERSION", "UV_VERSION", "WORKDIR_PATH" ]' PERSIST_IMAGE_NAME: ${CSCS_REGISTRY_PATH}/public/${ARCH}/base/gt4py-ci-${PY_VERSION} - WATCH_FILECHANGES: 'ci/Dockerfile ci/cscs-ci.yml ci/cscs-ci-ext-config.yml uv.lock' parallel: matrix: @@ -152,9 +151,12 @@ dace_determinism_cscs_gh200: variables: # TODO: investigate why the dace tests seem to hang with multiple jobs GT4PY_BUILD_JOBS: 1 - - if: $SUBPACKAGE == 'cartesian' && $SUBVARIANT == 'cuda12' + - if: $SUBPACKAGE == 'cartesian' && $SUBVARIANT == 'cpu' variables: - GT4PY_BUILD_JOBS: 1 + GT4PY_CARTESIAN_DETERMINISM_XDIST: 2 + - if: $SUBPACKAGE == 'cartesian' + variables: + GT4PY_CARTESIAN_DETERMINISM_XDIST: 4 - when: on_success dace_determinism_cscs_amd_rocm: @@ -174,5 +176,7 @@ dace_determinism_cscs_amd_rocm: CXX: /opt/rocm/bin/hipcc rules: - *exclude_variants_rules + - if: $SUBPACKAGE == 'cartesian' + variables: + GT4PY_CARTESIAN_DETERMINISM_XDIST: 4 - when: on_success - diff --git a/noxfile.py b/noxfile.py index d95e7b12a6..7f79613b21 100755 --- a/noxfile.py +++ b/noxfile.py @@ -552,23 +552,10 @@ def test_cartesian_dace_determinism( markers = " and ".join(codegen_settings["markers"] + device_settings["markers"]) - # gt4py.cartesian's setuptools-based compile path has been observed to - # deadlock under pytest-xdist's `-n auto` (=32 on a typical SLURM - # allocation) when used with the determinism check's double-pytest-pass - # workflow: pytest hangs silently inside `multi_feature_tests/ - # test_code_generation.py` or `test_dace_parsing.py` with no further - # output until SLURM SIGTERMs the job at the time limit. Symptom is - # consistent across cpu / cuda12 / rocm7 — i.e. it's compile-contention, - # not host-compiler-specific. - # - # Unlike gt4py.next, cartesian has no env var that bounds compile - # parallelism (`GT4PY_BUILD_JOBS` is only read in - # `src/gt4py/next/config.py`), so the only knob we have here is the - # xdist worker count itself. Capping at 4 by default keeps a healthy - # margin from the contention threshold. Raise via - # `GT4PY_CARTESIAN_DETERMINISM_XDIST` (e.g. set "8" or "auto" in CI) - # if a higher count is known-safe in the target environment. - xdist_workers = os.environ.get("GT4PY_CARTESIAN_DETERMINISM_XDIST", "4") + xdist_workers = os.environ.get( + "GT4PY_CARTESIAN_DETERMINISM_XDIST", + "2" if device == "cpu" else "4", + ) _run_dace_determinism_check( session, From 424a0ddf04cc2f7183dd0c608bb7fd89316935b0 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Wed, 27 May 2026 19:14:12 +0200 Subject: [PATCH 26/28] Infrastructure to test DaCe's codegen (in)deterministic behavior: Fixing CI/CD --- ci/cscs-ci-dace-determinism.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/cscs-ci-dace-determinism.yml b/ci/cscs-ci-dace-determinism.yml index 655684a5da..2d44d718d8 100644 --- a/ci/cscs-ci-dace-determinism.yml +++ b/ci/cscs-ci-dace-determinism.yml @@ -153,7 +153,7 @@ dace_determinism_cscs_gh200: GT4PY_BUILD_JOBS: 1 - if: $SUBPACKAGE == 'cartesian' && $SUBVARIANT == 'cpu' variables: - GT4PY_CARTESIAN_DETERMINISM_XDIST: 2 + GT4PY_CARTESIAN_DETERMINISM_XDIST: 3 - if: $SUBPACKAGE == 'cartesian' variables: GT4PY_CARTESIAN_DETERMINISM_XDIST: 4 From 1b8465a6954259c11fe275db6b536df324aad361 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Wed, 27 May 2026 20:43:45 +0200 Subject: [PATCH 27/28] Infrastructure to test DaCe's codegen (in)deterministic behavior: Fixing CI/CD --- ci/cscs-ci-dace-determinism.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/ci/cscs-ci-dace-determinism.yml b/ci/cscs-ci-dace-determinism.yml index 2d44d718d8..55f1297879 100644 --- a/ci/cscs-ci-dace-determinism.yml +++ b/ci/cscs-ci-dace-determinism.yml @@ -140,7 +140,7 @@ dace_determinism_cscs_gh200: needs: - build_cscs_gh200 variables: - TEST_VARIANTS: 'cpu cuda12' + TEST_VARIANTS: 'cuda12' SLURM_GPUS_PER_NODE: 1 SLURM_PARTITION: 'shared' GT4PY_BUILD_JOBS: 8 @@ -151,9 +151,6 @@ dace_determinism_cscs_gh200: variables: # TODO: investigate why the dace tests seem to hang with multiple jobs GT4PY_BUILD_JOBS: 1 - - if: $SUBPACKAGE == 'cartesian' && $SUBVARIANT == 'cpu' - variables: - GT4PY_CARTESIAN_DETERMINISM_XDIST: 3 - if: $SUBPACKAGE == 'cartesian' variables: GT4PY_CARTESIAN_DETERMINISM_XDIST: 4 @@ -166,7 +163,7 @@ dace_determinism_cscs_amd_rocm: needs: - build_cscs_amd_rocm variables: - TEST_VARIANTS: 'rocm7' + TEST_VARIANTS: 'cpu rocm7' SLURM_GPUS_PER_NODE: 4 GT4PY_BUILD_JOBS: 8 PYTEST_XDIST_AUTO_NUM_WORKERS: 32 From f2be3408900ec1c6074be5637791dae5f88a8733 Mon Sep 17 00:00:00 2001 From: Christos Kotsalos Date: Thu, 28 May 2026 05:57:58 +0200 Subject: [PATCH 28/28] Infrastructure to test DaCe's codegen (in)deterministic behavior: Fixing CI/CD --- ci/cscs-ci-dace-determinism.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/cscs-ci-dace-determinism.yml b/ci/cscs-ci-dace-determinism.yml index 55f1297879..e0257d93c4 100644 --- a/ci/cscs-ci-dace-determinism.yml +++ b/ci/cscs-ci-dace-determinism.yml @@ -98,6 +98,7 @@ build_cscs_amd_rocm: SLURM_JOB_NUM_NODES: 1 SLURM_TIMELIMIT: 60 allow_failure: true + resource_group: ${SLURM_PARTITION}_${SUBPACKAGE} artifacts: when: always paths: