From 06b7c6a644029917df921bafb9a34297756f6456 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sat, 16 May 2026 07:49:06 -0700
Subject: [PATCH 01/66] [Test] Pin behaviour for @qd.data_oriented with raw
 qd.ndarray members

---
 tests/python/test_data_oriented_ndarray.py | 364 +++++++++++++++++++++
 1 file changed, 364 insertions(+)
 create mode 100644 tests/python/test_data_oriented_ndarray.py

diff --git a/tests/python/test_data_oriented_ndarray.py b/tests/python/test_data_oriented_ndarray.py
new file mode 100644
index 0000000000..f490518b28
--- /dev/null
+++ b/tests/python/test_data_oriented_ndarray.py
@@ -0,0 +1,364 @@
+"""Tests for ``@qd.data_oriented`` classes whose members are raw ``qd.ndarray`` (not ``qd.field``, not
+``qd.Tensor`` wrappers).
+
+The user-guide doc ``docs/source/user_guide/compound_types.md`` claims this pattern is not supported
+("can contain ndarray? no" for ``@qd.data_oriented``). But the in-tree error message in
+``python/quadrants/lang/impl.py`` lists ``@qd.data_oriented / frozen-dataclass template`` as a
+*supported* route, and the ndarray-in-struct infrastructure added by ``#561 [Type] Tensor 24``
+(2026-04-28) — specifically ``_predeclare_struct_ndarrays`` in
+``python/quadrants/lang/ast/ast_transformers/function_def_transformer.py`` — explicitly walks both
+``dataclasses.is_dataclass(val)`` and ``hasattr(val, "__dict__")`` containers, the latter being the
+data_oriented case.
+
+This file pins what actually works, and documents the gaps. See
+``perso_hugh/doc/data_oriented_ndarray.md`` for the design analysis.
+"""
+
+import dataclasses
+
+import numpy as np
+import pytest
+
+import quadrants as qd
+
+from tests import test_utils
+
+
+# ---------------------------------------------------------------------------
+# 1. Single raw qd.ndarray attribute (scalar element type).
+# ---------------------------------------------------------------------------
+
+
+@test_utils.test(arch=qd.cpu)
+def test_data_oriented_single_ndarray():
+    N = 6
+
+    @qd.data_oriented
+    class State:
+        def __init__(self, x):
+            self.x = x
+
+    x = qd.ndarray(qd.i32, shape=(N,))
+    state = State(x=x)
+
+    @qd.kernel
+    def fill(s: qd.template()):
+        for i in range(N):
+            s.x[i] = i * 3
+
+    fill(state)
+    np.testing.assert_array_equal(x.to_numpy(), np.arange(N) * 3)
+
+
+# ---------------------------------------------------------------------------
+# 2. Vector ndarray attribute.
+# ---------------------------------------------------------------------------
+
+
+@test_utils.test(arch=qd.cpu)
+def test_data_oriented_vector_ndarray():
+    N = 4
+
+    @qd.data_oriented
+    class State:
+        def __init__(self, v):
+            self.v = v
+
+    v = qd.Vector.ndarray(3, qd.f32, shape=(N,))
+    state = State(v=v)
+
+    @qd.kernel
+    def fill(s: qd.template()):
+        for i in range(N):
+            s.v[i] = qd.Vector([float(i), float(i) * 2.0, float(i) * 3.0])
+
+    fill(state)
+    out = v.to_numpy()
+    for i in range(N):
+        np.testing.assert_array_equal(out[i], np.array([i, i * 2, i * 3], dtype=np.float32))
+
+
+# ---------------------------------------------------------------------------
+# 3. Multiple ndarray attributes in the same class.
+# ---------------------------------------------------------------------------
+
+
+@test_utils.test(arch=qd.cpu)
+def test_data_oriented_multiple_ndarrays():
+    N = 5
+
+    @qd.data_oriented
+    class State:
+        def __init__(self, a, b):
+            self.a = a
+            self.b = b
+
+    a = qd.ndarray(qd.i32, shape=(N,))
+    b = qd.ndarray(qd.f32, shape=(N,))
+    state = State(a=a, b=b)
+
+    @qd.kernel
+    def run(s: qd.template()):
+        for i in range(N):
+            s.a[i] = i + 1
+            s.b[i] = float(i) * 0.5
+
+    run(state)
+    np.testing.assert_array_equal(a.to_numpy(), np.arange(1, N + 1))
+    np.testing.assert_array_equal(b.to_numpy(), np.arange(N, dtype=np.float32) * 0.5)
+
+
+# ---------------------------------------------------------------------------
+# 4. Mixed qd.field + qd.ndarray in the same data_oriented class.
+# ---------------------------------------------------------------------------
+
+
+@test_utils.test(arch=qd.cpu)
+def test_data_oriented_mixed_field_and_ndarray():
+    N = 4
+
+    @qd.data_oriented
+    class State:
+        def __init__(self, f, n):
+            self.f = f
+            self.n = n
+
+    f = qd.field(qd.i32, shape=(N,))
+    n = qd.ndarray(qd.i32, shape=(N,))
+    state = State(f=f, n=n)
+
+    @qd.kernel
+    def run(s: qd.template()):
+        for i in range(N):
+            s.f[i] = i + 1
+            s.n[i] = s.f[i] * 10
+
+    run(state)
+    np.testing.assert_array_equal(f.to_numpy(), np.arange(1, N + 1))
+    np.testing.assert_array_equal(n.to_numpy(), np.arange(1, N + 1) * 10)
+
+
+# ---------------------------------------------------------------------------
+# 5. Nested @qd.data_oriented (outer holds inner; inner holds ndarray).
+# ---------------------------------------------------------------------------
+
+
+@test_utils.test(arch=qd.cpu)
+def test_data_oriented_nested():
+    N = 4
+
+    @qd.data_oriented
+    class Inner:
+        def __init__(self, x):
+            self.x = x
+
+    @qd.data_oriented
+    class Outer:
+        def __init__(self, inner):
+            self.inner = inner
+
+    x = qd.ndarray(qd.i32, shape=(N,))
+    outer = Outer(inner=Inner(x=x))
+
+    @qd.kernel
+    def run(s: qd.template()):
+        for i in range(N):
+            s.inner.x[i] = i * 7
+
+    run(outer)
+    np.testing.assert_array_equal(x.to_numpy(), np.arange(N) * 7)
+
+
+# ---------------------------------------------------------------------------
+# 6. Mutation: same instance, reassign ndarray attribute to a *same-shape* ndarray between calls.
+#    The launch-time stale-cache guard (``_mutable_nd_cached_val`` in kernel.py) is supposed to fold the
+#    live ndarray id into args_hash so the launch context is not served stale. We pin that behaviour
+#    here for the data_oriented case.
+# ---------------------------------------------------------------------------
+
+
+@test_utils.test(arch=qd.cpu)
+def test_data_oriented_ndarray_reassign_same_shape():
+    N = 4
+
+    @qd.data_oriented
+    class State:
+        def __init__(self, x):
+            self.x = x
+
+    x1 = qd.ndarray(qd.i32, shape=(N,))
+    x2 = qd.ndarray(qd.i32, shape=(N,))
+    state = State(x=x1)
+
+    @qd.kernel
+    def run(s: qd.template()):
+        for i in range(N):
+            s.x[i] = i + 100
+
+    run(state)
+    np.testing.assert_array_equal(x1.to_numpy(), np.arange(100, 100 + N))
+
+    state.x = x2
+    run(state)
+    np.testing.assert_array_equal(x2.to_numpy(), np.arange(100, 100 + N))
+    np.testing.assert_array_equal(x1.to_numpy(), np.arange(100, 100 + N))  # x1 unchanged
+
+
+# ---------------------------------------------------------------------------
+# 7. Mutation cross-shape: reassign ndarray attribute to a *different-dtype* ndarray.
+#    The template-mapper specialisation key (in ``_template_mapper_hotpath._extract_arg``) returns
+#    ``weakref.ref(arg)`` for ``is_data_oriented(arg)``; it does NOT descend into ndarray children to
+#    compute a dtype/ndim-dependent spec key. So if the data_oriented instance's id is unchanged but
+#    its ndarray attribute is reassigned to a different dtype, we expect either:
+#      - a graceful recompile/raise, or
+#      - silent miscompilation (the bug case — current expected outcome per static analysis).
+#    Mark xfail with strict=False so we record the actual outcome without breaking CI.
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.xfail(strict=False, reason="Gap A: data_oriented specialisation key does not include ndarray dtype/ndim")
+@test_utils.test(arch=qd.cpu)
+def test_data_oriented_ndarray_reassign_different_dtype():
+    N = 4
+
+    @qd.data_oriented
+    class State:
+        def __init__(self, x):
+            self.x = x
+
+    x_i32 = qd.ndarray(qd.i32, shape=(N,))
+    x_f32 = qd.ndarray(qd.f32, shape=(N,))
+    state = State(x=x_i32)
+
+    @qd.kernel
+    def run(s: qd.template()):
+        for i in range(N):
+            s.x[i] = s.x[i] + 1
+
+    run(state)
+    np.testing.assert_array_equal(x_i32.to_numpy(), np.array([1, 1, 1, 1], dtype=np.int32))
+
+    state.x = x_f32
+    run(state)
+    np.testing.assert_array_equal(x_f32.to_numpy(), np.array([1.0, 1.0, 1.0, 1.0], dtype=np.float32))
+
+
+# ---------------------------------------------------------------------------
+# 8. Distinct instances of same class -> spec-key behaviour. Documents that today each fresh instance
+#    triggers a recompile (because the spec key is ``weakref.ref(arg)`` identity). This is a perf
+#    concern, not a correctness one. We assert correctness here; the recompile count is documented as
+#    a perf note.
+# ---------------------------------------------------------------------------
+
+
+@test_utils.test(arch=qd.cpu)
+def test_data_oriented_distinct_instances():
+    N = 4
+
+    @qd.data_oriented
+    class State:
+        def __init__(self, x):
+            self.x = x
+
+    a_arr = qd.ndarray(qd.i32, shape=(N,))
+    b_arr = qd.ndarray(qd.i32, shape=(N,))
+    a = State(x=a_arr)
+    b = State(x=b_arr)
+
+    @qd.kernel
+    def run(s: qd.template()):
+        for i in range(N):
+            s.x[i] = i + 1
+
+    run(a)
+    run(b)
+    np.testing.assert_array_equal(a_arr.to_numpy(), np.arange(1, N + 1))
+    np.testing.assert_array_equal(b_arr.to_numpy(), np.arange(1, N + 1))
+
+
+# ---------------------------------------------------------------------------
+# 9. Fastcache cold then warm. Per the fastcache doc (``user_guide/fastcache.md`` line 129),
+#    ``@qd.data_oriented`` objects are supported in the cache key. We don't assert cross-process here
+#    (that requires a fresh interpreter); we assert that ``cache_stored`` becomes True on the first
+#    call and ``cache_key_generated`` is True (i.e. no PARAM_INVALID fallthrough due to the ndarray
+#    member).
+# ---------------------------------------------------------------------------
+
+
+@test_utils.test(arch=qd.cpu)
+def test_data_oriented_ndarray_fastcache_eligible():
+    N = 4
+
+    @qd.data_oriented
+    class State:
+        def __init__(self, x):
+            self.x = x
+
+    x = qd.ndarray(qd.i32, shape=(N,))
+    state = State(x=x)
+
+    @qd.kernel(fastcache=True)
+    def run(s: qd.template()):
+        for i in range(N):
+            s.x[i] = i * 2
+
+    run(state)
+    obs = run._primal.src_ll_cache_observations
+    np.testing.assert_array_equal(x.to_numpy(), np.arange(N) * 2)
+    assert obs.cache_key_generated, "cache key should be generated for data_oriented + ndarray"
+
+
+# ---------------------------------------------------------------------------
+# 10. Pure validation: a @qd.pure @qd.kernel taking a data_oriented arg with an ndarray member should
+#     compile and run, mirroring the existing ``test_pure_validation_data_oriented_as_param`` test
+#     which only covers ``qd.field``.
+# ---------------------------------------------------------------------------
+
+
+@test_utils.test(arch=qd.cpu)
+def test_data_oriented_ndarray_pure():
+    N = 4
+
+    @qd.data_oriented
+    class State:
+        def __init__(self, x):
+            self.x = x
+
+    x = qd.ndarray(qd.i32, shape=(N,))
+    state = State(x=x)
+
+    @qd.pure
+    @qd.kernel
+    def run(s: qd.template()):
+        for i in range(N):
+            s.x[i] = i * 5
+
+    run(state)
+    np.testing.assert_array_equal(x.to_numpy(), np.arange(N) * 5)
+
+
+# ---------------------------------------------------------------------------
+# 11. Counter-test: confirm a dataclass-of-NDArray works (sanity check that the existing supported
+#     route still works; if this fails, the test environment itself is broken, not the data_oriented
+#     path).
+# ---------------------------------------------------------------------------
+
+
+@test_utils.test(arch=qd.cpu)
+def test_dataclass_ndarray_sanity():
+    N = 4
+
+    @dataclasses.dataclass
+    class State:
+        x: qd.types.NDArray[qd.i32, 1]
+
+    x = qd.ndarray(qd.i32, shape=(N,))
+    state = State(x=x)
+
+    @qd.kernel
+    def run(s: State):
+        for i in range(N):
+            s.x[i] = i * 11
+
+    run(state)
+    np.testing.assert_array_equal(x.to_numpy(), np.arange(N) * 11)

From d4350efaae1641a0c668d401f743f97e36fe7730 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sat, 16 May 2026 07:59:13 -0700
Subject: [PATCH 02/66] [Fix] Recurse through nested data_oriented / dataclass
 children when pre-declaring struct ndarrays
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

``_predeclare_struct_ndarrays._walk_obj`` only recursed into ``dataclasses.is_dataclass`` children of
a dataclass root; for non-dataclass roots (the ``@qd.data_oriented`` case) it didn't recurse at all.
That meant an ndarray held by a nested ``@qd.data_oriented`` (or a ``dataclasses.dataclass`` reached
through a ``@qd.data_oriented`` attribute, or vice versa) was never registered as a kernel arg, and
``state.inner.x[i] = ...`` raised ``QuadrantsCompilationError`` with "Ndarray ... used in kernel
scope but not registered as a kernel parameter".

Extend both branches to recurse on either a dataclass instance or an ``is_data_oriented(child)``
value. Pure superset of the prior walk — same shape, just more permissive on which children to
descend into.

Bug pinned by ``tests/python/test_data_oriented_ndarray.py::test_data_oriented_nested`` and the new
nesting / cross-container tests in the same file.
---
 .../lang/ast/ast_transformers/function_def_transformer.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/python/quadrants/lang/ast/ast_transformers/function_def_transformer.py b/python/quadrants/lang/ast/ast_transformers/function_def_transformer.py
index 101726d858..e2d199d243 100644
--- a/python/quadrants/lang/ast/ast_transformers/function_def_transformer.py
+++ b/python/quadrants/lang/ast/ast_transformers/function_def_transformer.py
@@ -34,7 +34,7 @@
 from quadrants.lang.matrix import MatrixType
 from quadrants.lang.stream import stream_parallel
 from quadrants.lang.struct import StructType
-from quadrants.lang.util import to_quadrants_type
+from quadrants.lang.util import is_data_oriented, to_quadrants_type
 from quadrants.types import annotations, buffer_view_type, ndarray_type, primitive_types
 
 
@@ -226,7 +226,7 @@ def _walk_obj(obj, arg_idx, path):
                         child = child._unwrap()
                     if isinstance(child, _ndarray.Ndarray):
                         _register_ndarray(child, arg_idx, (*path, field.name))
-                    elif dataclasses.is_dataclass(child) and not isinstance(child, type):
+                    elif (dataclasses.is_dataclass(child) and not isinstance(child, type)) or is_data_oriented(child):
                         _walk_obj(child, arg_idx, (*path, field.name))
             else:
                 for attr_name, attr_val in vars(obj).items():
@@ -234,6 +234,10 @@ def _walk_obj(obj, arg_idx, path):
                         attr_val = attr_val._unwrap()
                     if isinstance(attr_val, _ndarray.Ndarray):
                         _register_ndarray(attr_val, arg_idx, (*path, attr_name))
+                    elif (dataclasses.is_dataclass(attr_val) and not isinstance(attr_val, type)) or is_data_oriented(
+                        attr_val
+                    ):
+                        _walk_obj(attr_val, arg_idx, (*path, attr_name))
 
         def _register_ndarray(nd, arg_idx, attr_chain):
             key = id(nd)

From 97afa6d7bde542e99b4b4eb5a3fbd07b59ebb9cb Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sat, 16 May 2026 07:59:22 -0700
Subject: [PATCH 03/66] [Fix] Launch-context stale guard fires for
 @qd.data_oriented containers, not just non-frozen dataclasses

``launch_kernel`` folds the live id(s) of struct-held ndarrays into ``args_hash`` only when the host
container is "mutable", and used ``type(args[idx]).__hash__ is None`` as the predicate. Python sets
``__hash__ = None`` for non-frozen dataclasses (the common ``eq=True, frozen=False`` default), so
that arm fires correctly for them. But ``@qd.data_oriented`` classes inherit ``object.__hash__``,
which is never ``None``, so the guard missed them entirely. Consequence: reassigning ``state.x =
other_ndarray`` on the same data_oriented instance left ``args_hash`` unchanged, hit the
launch-context cache, and re-launched the kernel against the stale ndarray binding (the old ``x1``).

Extend the predicate with an explicit ``is_data_oriented(args[idx])`` arm. The launch-context cache
is a perf optimisation so widening its invalidation predicate is safe.

Bug pinned by ``tests/python/test_data_oriented_ndarray.py::test_data_oriented_ndarray_reassign_same_shape``
and ``::test_data_oriented_nested_ndarray_reassign``.
---
 python/quadrants/lang/kernel.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/python/quadrants/lang/kernel.py b/python/quadrants/lang/kernel.py
index 0b45a5816b..6b636e717d 100644
--- a/python/quadrants/lang/kernel.py
+++ b/python/quadrants/lang/kernel.py
@@ -51,7 +51,7 @@ def _kernel_coverage_enabled() -> bool:
 )
 from quadrants.lang.impl import Program
 from quadrants.lang.shell import _shell_pop_print
-from quadrants.lang.util import cook_dtype
+from quadrants.lang.util import cook_dtype, is_data_oriented
 from quadrants.types import (
     primitive_types,
     template,
@@ -465,12 +465,21 @@ def launch_kernel(
         # Stale-cache guard for mutable structs containing ndarrays. Frozen dataclass fields cannot be reassigned, so
         # id(struct) in args_hash is already sufficient. For mutable structs, ndarray attributes can change between
         # calls while the struct id stays the same, so we fold the live ndarray id(s) into the hash.
+        #
+        # The predicate must catch any "host container in which ndarray member references can be reassigned at runtime"
+        # case. Non-frozen dataclasses have ``__hash__ is None`` (Python sets it when ``eq=True, frozen=False``), so
+        # they hit the first arm. ``@qd.data_oriented`` classes inherit ``object.__hash__`` so the ``__hash__ is None``
+        # check is False for them — we need a separate arm. Without this arm, ``state.x = other_ndarray`` on the same
+        # data_oriented instance would not invalidate the launch-context cache and the kernel would re-launch against
+        # the stale binding.
         if key != self._mutable_nd_cached_key:
             if self._struct_ndarray_launch_info_by_key:
                 struct_nd_info = self._struct_ndarray_launch_info_by_key.get(key)
                 if struct_nd_info:
                     self._mutable_nd_cached_val = [
-                        (idx, chain) for _, idx, chain in struct_nd_info if type(args[idx]).__hash__ is None
+                        (idx, chain)
+                        for _, idx, chain in struct_nd_info
+                        if type(args[idx]).__hash__ is None or is_data_oriented(args[idx])
                     ]
                 else:
                     self._mutable_nd_cached_val = []

From 49a723bbe9772e5063dc2c10b06430d8b01366f2 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sat, 16 May 2026 08:01:50 -0700
Subject: [PATCH 04/66] [Test] Extend @qd.data_oriented + ndarray coverage:
 cross-container nesting, deep nesting, mutation through chain, multi-kernel,
 sub-func
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds tests 12-17 to the file added in 06b7c6a64:
- data_oriented holding (frozen) dataclass that holds ndarray
- dataclass holding data_oriented that holds ndarray (kernel-arg via qd.template())
- 3-level data_oriented nesting
- mutation through 2-level chain (outer.inner.x reassign)
- two kernels sharing the same data_oriented instance
- ndarray access via @qd.func sub-call

The dataclass-of-data_oriented case uses qd.template() rather than typed dataclass kernel arg
because the typed-dataclass-arg form goes through ``_transform_kernel_arg`` which does not currently
recurse on data_oriented field types — tracked as a separate follow-up.

Also tightens the xfail reason on test_data_oriented_ndarray_reassign_different_dtype to call out
that the remaining failure is the template-mapper spec-key gap, not the launch-cache gap (latter
fixed by the kernel.py change in this PR).
---
 tests/python/test_data_oriented_ndarray.py | 220 ++++++++++++++++++++-
 1 file changed, 219 insertions(+), 1 deletion(-)

diff --git a/tests/python/test_data_oriented_ndarray.py b/tests/python/test_data_oriented_ndarray.py
index f490518b28..cca38fc274 100644
--- a/tests/python/test_data_oriented_ndarray.py
+++ b/tests/python/test_data_oriented_ndarray.py
@@ -216,7 +216,16 @@ def run(s: qd.template()):
 # ---------------------------------------------------------------------------
 
 
-@pytest.mark.xfail(strict=False, reason="Gap A: data_oriented specialisation key does not include ndarray dtype/ndim")
+@pytest.mark.xfail(
+    strict=False,
+    reason=(
+        "Gap A: ``_template_mapper_hotpath._extract_arg`` returns ``weakref.ref(arg)`` for "
+        "``is_data_oriented(arg)`` instead of descending into ``vars(arg)`` to emit per-field shape "
+        "descriptors. Same instance + reassign to different dtype reuses the compiled kernel for the "
+        "original dtype, so the second launch corrupts the new-dtype buffer. Separate from Bug 2; not "
+        "addressed in this PR."
+    ),
+)
 @test_utils.test(arch=qd.cpu)
 def test_data_oriented_ndarray_reassign_different_dtype():
     N = 4
@@ -362,3 +371,212 @@ def run(s: State):
 
     run(state)
     np.testing.assert_array_equal(x.to_numpy(), np.arange(N) * 11)
+
+
+# ---------------------------------------------------------------------------
+# 12. data_oriented holding a (frozen) dataclass that holds an ndarray.
+#     Exercises the ``else`` branch of ``_walk_obj`` recursing through a dataclass child — added by
+#     the Bug 1 fix.
+# ---------------------------------------------------------------------------
+
+
+@test_utils.test(arch=qd.cpu)
+def test_data_oriented_holding_dataclass_with_ndarray():
+    N = 4
+
+    @dataclasses.dataclass(frozen=True)
+    class Inner:
+        x: qd.types.NDArray[qd.i32, 1]
+
+    @qd.data_oriented
+    class Outer:
+        def __init__(self, inner):
+            self.inner = inner
+
+    x = qd.ndarray(qd.i32, shape=(N,))
+    outer = Outer(inner=Inner(x=x))
+
+    @qd.kernel
+    def run(s: qd.template()):
+        for i in range(N):
+            s.inner.x[i] = i + 1
+
+    run(outer)
+    np.testing.assert_array_equal(x.to_numpy(), np.arange(1, N + 1))
+
+
+# ---------------------------------------------------------------------------
+# 13. Frozen dataclass holding a data_oriented holding an ndarray, kernel-arg via ``qd.template()``.
+#     Exercises the dataclass branch of ``_walk_obj`` recursing through a data_oriented child — added
+#     by the Bug 1 fix. The outer dataclass must be frozen because (i) non-frozen dataclasses are
+#     unhashable in Python (``__hash__ is None``) and the template-mapper key tuple needs the value
+#     to be hashable, and (ii) the typed-dataclass-arg form (``def run(s: Outer):``) goes through
+#     ``_transform_kernel_arg`` which does not currently recurse on data_oriented field *types* (as
+#     opposed to values) — that's a separate follow-up.
+# ---------------------------------------------------------------------------
+
+
+@test_utils.test(arch=qd.cpu)
+def test_dataclass_holding_data_oriented_with_ndarray():
+    N = 4
+
+    @qd.data_oriented
+    class Inner:
+        def __init__(self, x):
+            self.x = x
+
+    @dataclasses.dataclass(frozen=True)
+    class Outer:
+        inner: Inner
+
+    x = qd.ndarray(qd.i32, shape=(N,))
+    outer = Outer(inner=Inner(x=x))
+
+    @qd.kernel
+    def run(s: qd.template()):
+        for i in range(N):
+            s.inner.x[i] = i + 5
+
+    run(outer)
+    np.testing.assert_array_equal(x.to_numpy(), np.arange(5, 5 + N))
+
+
+# ---------------------------------------------------------------------------
+# 14. Three-level nesting: data_oriented(data_oriented(data_oriented(ndarray))).
+# ---------------------------------------------------------------------------
+
+
+@test_utils.test(arch=qd.cpu)
+def test_data_oriented_three_level_nesting():
+    N = 4
+
+    @qd.data_oriented
+    class L3:
+        def __init__(self, x):
+            self.x = x
+
+    @qd.data_oriented
+    class L2:
+        def __init__(self, l3):
+            self.l3 = l3
+
+    @qd.data_oriented
+    class L1:
+        def __init__(self, l2):
+            self.l2 = l2
+
+    x = qd.ndarray(qd.i32, shape=(N,))
+    root = L1(l2=L2(l3=L3(x=x)))
+
+    @qd.kernel
+    def run(s: qd.template()):
+        for i in range(N):
+            s.l2.l3.x[i] = i * 13
+
+    run(root)
+    np.testing.assert_array_equal(x.to_numpy(), np.arange(N) * 13)
+
+
+# ---------------------------------------------------------------------------
+# 15. Mutation on a nested ndarray: outer.inner.x reassigned between kernel calls. Verifies the
+#     Bug 2 stale-cache guard fires even when the ndarray lives several attribute hops deep.
+# ---------------------------------------------------------------------------
+
+
+@test_utils.test(arch=qd.cpu)
+def test_data_oriented_nested_ndarray_reassign():
+    N = 4
+
+    @qd.data_oriented
+    class Inner:
+        def __init__(self, x):
+            self.x = x
+
+    @qd.data_oriented
+    class Outer:
+        def __init__(self, inner):
+            self.inner = inner
+
+    x1 = qd.ndarray(qd.i32, shape=(N,))
+    x2 = qd.ndarray(qd.i32, shape=(N,))
+    outer = Outer(inner=Inner(x=x1))
+
+    @qd.kernel
+    def run(s: qd.template()):
+        for i in range(N):
+            s.inner.x[i] = i + 200
+
+    run(outer)
+    np.testing.assert_array_equal(x1.to_numpy(), np.arange(200, 200 + N))
+
+    outer.inner.x = x2
+    run(outer)
+    np.testing.assert_array_equal(x2.to_numpy(), np.arange(200, 200 + N))
+
+
+# ---------------------------------------------------------------------------
+# 16. Same data_oriented instance, two kernels sharing it. Verifies the launch-info per-kernel
+#     bookkeeping is independent (each kernel's compile sets up its own pre-declared ndarray args).
+# ---------------------------------------------------------------------------
+
+
+@test_utils.test(arch=qd.cpu)
+def test_data_oriented_two_kernels_same_instance():
+    N = 4
+
+    @qd.data_oriented
+    class State:
+        def __init__(self, x, y):
+            self.x = x
+            self.y = y
+
+    x = qd.ndarray(qd.i32, shape=(N,))
+    y = qd.ndarray(qd.i32, shape=(N,))
+    state = State(x=x, y=y)
+
+    @qd.kernel
+    def fill_x(s: qd.template()):
+        for i in range(N):
+            s.x[i] = i + 1
+
+    @qd.kernel
+    def fill_y_from_x(s: qd.template()):
+        for i in range(N):
+            s.y[i] = s.x[i] * 100
+
+    fill_x(state)
+    fill_y_from_x(state)
+    np.testing.assert_array_equal(x.to_numpy(), np.arange(1, N + 1))
+    np.testing.assert_array_equal(y.to_numpy(), np.arange(1, N + 1) * 100)
+
+
+# ---------------------------------------------------------------------------
+# 17. data_oriented + ndarray + @qd.func sub-call. Pins that the AST-time attribute resolution in
+#     ``build_Attribute`` (which uses the predeclared AnyArray cache) works when the access happens
+#     inside a func, not just the top-level kernel.
+# ---------------------------------------------------------------------------
+
+
+@test_utils.test(arch=qd.cpu)
+def test_data_oriented_ndarray_via_func():
+    N = 4
+
+    @qd.data_oriented
+    class State:
+        def __init__(self, x):
+            self.x = x
+
+    x = qd.ndarray(qd.i32, shape=(N,))
+    state = State(x=x)
+
+    @qd.func
+    def write(s: qd.template(), i: qd.i32, v: qd.i32):
+        s.x[i] = v
+
+    @qd.kernel
+    def run(s: qd.template()):
+        for i in range(N):
+            write(s, i, i * 9)
+
+    run(state)
+    np.testing.assert_array_equal(x.to_numpy(), np.arange(N) * 9)

From 9bdeca54397f8140f856cda52f3bb47ad5086777 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sat, 16 May 2026 08:03:42 -0700
Subject: [PATCH 05/66] [Doc] @qd.data_oriented can contain ndarrays

Update compound_types.md to reflect what landed in #561 [Type] Tensor 24 (which added
``_predeclare_struct_ndarrays``) and what's fixed in this PR (the nested + mutation cases). The
old "no" cell predated the Tensor 24 infrastructure by ~6 weeks and was already inconsistent with
the in-tree error message in ``python/quadrants/lang/impl.py`` which lists "@qd.data_oriented /
frozen-dataclass template" as the supported route for ndarrays inside structs.

Add an ndarray-member example under the @qd.data_oriented section.
---
 docs/source/user_guide/compound_types.md | 26 +++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index 79c007d7aa..6145227a9d 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -12,7 +12,7 @@ The following compound types are available:
 | type                               | can be passed to qd.kernel? | can be passed to qd.func? | can contain ndarray? | can contain field? | can be nested? | supports differentiation? |
 |------------------------------------|:---------------------------:|:-------------------------:|:--------------------:|:------------------:|:--------------:|:-------------------------:|
 | `dataclasses.dataclass`            | yes                         | yes                       | yes                  | yes                | yes            | no [*1]                   |
-| `@qd.data_oriented`               | yes                         | yes                       | no                   | yes                | yes            | yes                       |
+| `@qd.data_oriented`               | yes                         | yes                       | yes                  | yes                | yes            | yes                       |
 | `@qd.struct`, `@qd.dataclass`     | yes                         | yes                       | no                   | yes                | yes            | yes                       |
 
 ## Recommendation
@@ -148,6 +148,30 @@ sim.step()
 
 `@qd.data_oriented` objects can also be passed as `qd.Template` parameters to kernels defined outside the class, and they support nesting (one `@qd.data_oriented` struct containing another).
 
+### ndarray members
+
+`@qd.data_oriented` classes may also hold `qd.ndarray` (and `qd.Vector.ndarray` / `qd.Matrix.ndarray`) members. Subscript access inside kernels works the same as for `dataclasses.dataclass`:
+
+```python
+@qd.data_oriented
+class State:
+    def __init__(self, n):
+        self.x = qd.ndarray(qd.f32, shape=(n,))
+        self.v = qd.ndarray(qd.f32, shape=(n,))
+
+@qd.kernel
+def step(s: qd.template()):
+    for i in range(s.x.shape[0]):
+        s.x[i] += s.v[i]
+
+state = State(100)
+step(state)
+```
+
+Mixing `qd.field` and `qd.ndarray` members in the same class is also supported. Nested `@qd.data_oriented` (or nested `dataclasses.dataclass`) containers with ndarrays inside are walked recursively.
+
+Note: as with `dataclasses.dataclass`, reassigning an ndarray member between kernel calls (`state.x = other_ndarray`) is allowed; the kernel re-binds against the live value on the next launch.
+
 ## qd.struct / qd.dataclass
 
 `@qd.struct` (and its alias `@qd.dataclass`) is a Quadrants-native struct type. It can only contain fields and primitive types, not ndarrays.

From dc7997b628df648dc5fd1b12f6bce776ff63294d Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sat, 16 May 2026 08:32:08 -0700
Subject: [PATCH 06/66] [Fix] Gap A: template-mapper spec key descends into
 data_oriented ndarray members
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

``_extract_arg`` returned ``weakref.ref(arg)`` for any ``is_data_oriented(arg)``, which over-shared
the compiled kernel when ``state.x`` was reassigned to an ndarray of a different dtype or ndim on
the same instance — the second launch re-used the kernel specialised for the original shape and
silently corrupted the new-shape buffer.

Walk the reachable ``Ndarray`` members (recursively through nested data_oriented and dataclass
children) and prepend their ``(path, element_type, ndim, needs_grad, layout)`` descriptors to the
spec key. Same memory-leak avoidance — the descriptors are values, no strong reference to the
ndarray itself, and the weakref to the container is preserved for the per-instance identity tail.

Containers with *no* ndarrays (the genesis field-backend ``@qd.data_oriented`` workload) take the
existing short path unchanged — ``_collect_struct_nd_descriptors`` returns an empty list and we
return ``weakref.ref(arg)`` as before. So this is a no-op for the existing hot path, and the
overhead is paid only by containers that actually hold ndarrays.

Pinned by ``test_data_oriented_ndarray_reassign_different_dtype`` (was xfail, now passes),
``::reassign_different_ndim``, ``::nested_ndarray_reassign_different_dtype``, and
``::field_only_no_speckey_change`` (no-regression case).
---
 .../lang/_template_mapper_hotpath.py          | 47 ++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/python/quadrants/lang/_template_mapper_hotpath.py b/python/quadrants/lang/_template_mapper_hotpath.py
index 55d505665f..96d7418307 100644
--- a/python/quadrants/lang/_template_mapper_hotpath.py
+++ b/python/quadrants/lang/_template_mapper_hotpath.py
@@ -25,6 +25,7 @@
 a consequence of inlining 'is_dataclass' and 'fields'.
 """
 
+import dataclasses
 import weakref
 from dataclasses import _FIELD, _FIELDS
 from typing import Any, Union
@@ -71,6 +72,35 @@
 _primitive_types = {int, float, bool}
 
 
+def _collect_struct_nd_descriptors(obj: Any, path: str, out: list) -> None:
+    """Walk a ``@qd.data_oriented`` (or dataclass) container's reachable ``Ndarray`` members and append a per-ndarray
+    shape descriptor ``(path, element_type, ndim, needs_grad, layout)`` to ``out``. Used by the template-mapper to
+    refine the specialisation key when the container holds ndarrays — see the data_oriented branch in
+    ``_extract_arg``.
+
+    Walks both ``dataclasses.is_dataclass(child)`` and ``is_data_oriented(child)`` children recursively. Mirrors the
+    walker used at compile time in ``_predeclare_struct_ndarrays``, so the compile-time pre-declaration and the
+    specialisation key see the same set of ndarrays.
+    """
+    if dataclasses.is_dataclass(obj) and not isinstance(obj, type):
+        children = ((f.name, getattr(obj, f.name)) for f in dataclasses.fields(obj))
+    else:
+        children = obj.__dict__.items()
+    for k, v in children:
+        full = f"{path}.{k}" if path else k
+        if type(v) in _TENSOR_WRAPPER_TYPES:
+            v = v._unwrap()
+        v_type = type(v)
+        if issubclass(v_type, Ndarray):
+            type_id = id(v.element_type)
+            element_type = type_id if type_id in primitive_types.type_ids else v.element_type
+            out.append((full, element_type, len(v.shape), v.grad is not None, v._qd_layout))
+        elif is_data_oriented(v):
+            _collect_struct_nd_descriptors(v, full, out)
+        elif dataclasses.is_dataclass(v) and not isinstance(v, type):
+            _collect_struct_nd_descriptors(v, full, out)
+
+
 def _extract_arg(raise_on_templated_floats: bool, arg: Any, annotation: AnnotationType, arg_name: str) -> Any:
     # ``qd.Tensor`` wrappers passed as struct fields. Top-level kernel-arg unwrap in ``Kernel.__call__`` covers direct
     # args, but the dataclass-field recursion at the bottom of this function walks struct attributes via raw
@@ -124,7 +154,7 @@ def _extract_arg(raise_on_templated_floats: bool, arg: Any, annotation: Annotati
             raise QuadrantsRuntimeTypeError(
                 "Ndarray shouldn't be passed in via `qd.template()`, please annotate your kernel using `qd.types.ndarray(...)` instead"
             )
-        if arg_type in _composite_mutable_types or is_data_oriented(arg):
+        if arg_type in _composite_mutable_types:
             # [Composite arguments] Return weak reference to the object
             # Quadrants kernel will cache the extracted arguments, thus we can't simply return the original argument.
             # Instead, a weak reference to the original value is returned to avoid memory leak.
@@ -134,6 +164,21 @@ def _extract_arg(raise_on_templated_floats: bool, arg: Any, annotation: Annotati
             # 1. Invalid weak-ref will leave a dead(dangling) entry in both caches: "self.mapping" and "self.compiled_functions"
             # 2. Different argument instances with same type and same value, will get templatized into separate kernels.
             return weakref.ref(arg)
+        if is_data_oriented(arg):
+            # Same memory-leak avoidance as above — keep ``weakref.ref(arg)`` so the spec key never holds a strong
+            # reference to user state. But for data_oriented containers that hold ``Ndarray`` members, the live
+            # ``weakref`` alone is too coarse: same instance with ``state.x = other_ndarray`` of a different dtype/ndim
+            # would re-use the previously-compiled kernel, which was specialised for the old shape. Walk the reachable
+            # ndarrays and prepend their shape descriptors so dtype/ndim changes trigger re-specialisation. Mirrors what
+            # the dataclass branch below does via ``annotation_fields``.
+            #
+            # Containers with no ndarrays keep the original short-path (one spec per instance via weakref) so this is
+            # a no-op for the existing data_oriented + qd.field workloads (genesis field-backend).
+            nd_descriptors: list = []
+            _collect_struct_nd_descriptors(arg, "", nd_descriptors)
+            if nd_descriptors:
+                return (id(type(arg)), tuple(nd_descriptors), weakref.ref(arg))
+            return weakref.ref(arg)
 
         # Return value directly for other types, i.e. primitive types and all qd.Field-derived classes
         if raise_on_templated_floats and arg_type is float:

From 906ce1905a96add80329d45613ab5e87c6fbf9b7 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sat, 16 May 2026 08:32:19 -0700
Subject: [PATCH 07/66] [Test] Gap A: spec-key descent into data_oriented
 ndarray members

- Unmark test_data_oriented_ndarray_reassign_different_dtype as xfail (passes now).
- Add ::reassign_different_ndim to cover the 1D->2D shape change case.
- Add ::nested_ndarray_reassign_different_dtype to confirm the recursive walker reaches a leaf
  ndarray through a nested @qd.data_oriented chain.
- Add ::field_only_no_speckey_change to pin the no-regression case (data_oriented with only field
  members still uses the original weakref short-path).
---
 tests/python/test_data_oriented_ndarray.py | 117 +++++++++++++++++++--
 1 file changed, 107 insertions(+), 10 deletions(-)

diff --git a/tests/python/test_data_oriented_ndarray.py b/tests/python/test_data_oriented_ndarray.py
index cca38fc274..3cec588dd1 100644
--- a/tests/python/test_data_oriented_ndarray.py
+++ b/tests/python/test_data_oriented_ndarray.py
@@ -216,16 +216,6 @@ def run(s: qd.template()):
 # ---------------------------------------------------------------------------
 
 
-@pytest.mark.xfail(
-    strict=False,
-    reason=(
-        "Gap A: ``_template_mapper_hotpath._extract_arg`` returns ``weakref.ref(arg)`` for "
-        "``is_data_oriented(arg)`` instead of descending into ``vars(arg)`` to emit per-field shape "
-        "descriptors. Same instance + reassign to different dtype reuses the compiled kernel for the "
-        "original dtype, so the second launch corrupts the new-dtype buffer. Separate from Bug 2; not "
-        "addressed in this PR."
-    ),
-)
 @test_utils.test(arch=qd.cpu)
 def test_data_oriented_ndarray_reassign_different_dtype():
     N = 4
@@ -580,3 +570,110 @@ def run(s: qd.template()):
 
     run(state)
     np.testing.assert_array_equal(x.to_numpy(), np.arange(N) * 9)
+
+
+# ---------------------------------------------------------------------------
+# 18. Reassign ndarray to a *different ndim* on the same data_oriented instance.
+#     Complementary to test 7 (different-dtype). Spec key must change so a 2D-specialised kernel is
+#     not reused for a 1D ndarray. Pins the Gap A fix from the dtype side.
+# ---------------------------------------------------------------------------
+
+
+@test_utils.test(arch=qd.cpu)
+def test_data_oriented_ndarray_reassign_different_ndim():
+    @qd.data_oriented
+    class State:
+        def __init__(self, x):
+            self.x = x
+
+    x_1d = qd.ndarray(qd.i32, shape=(4,))
+    x_2d = qd.ndarray(qd.i32, shape=(2, 3))
+    state = State(x=x_1d)
+
+    @qd.kernel
+    def fill_1d(s: qd.template()):
+        for i in range(4):
+            s.x[i] = i * 2
+
+    @qd.kernel
+    def fill_2d(s: qd.template()):
+        for i, j in qd.ndrange(2, 3):
+            s.x[i, j] = i * 10 + j
+
+    fill_1d(state)
+    np.testing.assert_array_equal(x_1d.to_numpy(), np.arange(4) * 2)
+
+    state.x = x_2d
+    fill_2d(state)
+    np.testing.assert_array_equal(x_2d.to_numpy(), np.array([[0, 1, 2], [10, 11, 12]], dtype=np.int32))
+
+
+# ---------------------------------------------------------------------------
+# 19. Spec-key descent for nested data_oriented + ndarray reassign at the leaf. Confirms the
+#     recursive walker in ``_collect_struct_nd_descriptors`` reaches through nested data_oriented.
+# ---------------------------------------------------------------------------
+
+
+@test_utils.test(arch=qd.cpu)
+def test_data_oriented_nested_ndarray_reassign_different_dtype():
+    @qd.data_oriented
+    class Inner:
+        def __init__(self, x):
+            self.x = x
+
+    @qd.data_oriented
+    class Outer:
+        def __init__(self, inner):
+            self.inner = inner
+
+    x_i32 = qd.ndarray(qd.i32, shape=(4,))
+    x_f32 = qd.ndarray(qd.f32, shape=(4,))
+    outer = Outer(inner=Inner(x=x_i32))
+
+    @qd.kernel
+    def run_i32(s: qd.template()):
+        for i in range(4):
+            s.inner.x[i] = i + 1
+
+    @qd.kernel
+    def run_f32(s: qd.template()):
+        for i in range(4):
+            s.inner.x[i] = float(i) + 0.5
+
+    run_i32(outer)
+    np.testing.assert_array_equal(x_i32.to_numpy(), np.arange(1, 5))
+
+    outer.inner.x = x_f32
+    run_f32(outer)
+    np.testing.assert_array_equal(x_f32.to_numpy(), np.arange(4, dtype=np.float32) + 0.5)
+
+
+# ---------------------------------------------------------------------------
+# 20. No spec-key regression for data_oriented containers WITHOUT ndarrays. The Gap A fix prepends
+#     ndarray descriptors only when ndarrays are present; otherwise the original ``weakref.ref(arg)``
+#     spec key is preserved (one spec per instance). This test pins the no-ndarray case.
+# ---------------------------------------------------------------------------
+
+
+@test_utils.test(arch=qd.cpu)
+def test_data_oriented_field_only_no_speckey_change():
+    N = 4
+
+    @qd.data_oriented
+    class State:
+        def __init__(self, f):
+            self.f = f
+
+    f = qd.field(qd.i32, shape=(N,))
+    state = State(f=f)
+
+    @qd.kernel
+    def run(s: qd.template()):
+        for i in range(N):
+            s.f[i] = i + 1
+
+    run(state)
+    np.testing.assert_array_equal(f.to_numpy(), np.arange(1, N + 1))
+
+    # Run a second time on the same instance — should reuse the same compiled kernel.
+    run(state)

From a0db648b22cffe8e53659fa458f7d9771974671a Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sat, 16 May 2026 08:34:30 -0700
Subject: [PATCH 08/66] [Fix] Template-mapper args_hash invalidates when
 data_oriented ndarray member is reassigned
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The spec-key fix in dc7997b62 (``_extract_arg`` descends into ``is_data_oriented(arg)`` to emit
ndarray shape descriptors) was being silently bypassed for the same-instance case: ``TemplateMapper.
lookup`` has a fast-path ``_mapping_cache_tracker`` keyed only on ``tuple(id(arg) for arg in args)``,
which short-circuits ``extract()`` whenever the same instance is passed again. So
``run(state)``-then-``state.x = other``-then-``run(state)`` re-used the cached spec key from the
first call and the kernel kept its original compile-time dtype/ndim.

Fold the ids of all ndarrays reachable through any ``is_data_oriented(arg)`` (recursively, via
nested data_oriented and dataclass children) into ``args_hash``. Reassigning a member ndarray
changes its id, which changes the hash, which forces ``extract()`` and (when warranted) a fresh
compilation. No-op for data_oriented containers with no ndarrays.

Mirror at this cache layer of the launch-context stale-guard fix from 97afa6d7b.

Pinned by ``test_data_oriented_ndarray_reassign_different_dtype`` — was failing under just the
``_extract_arg`` change because of this cache layer; now passes.
---
 python/quadrants/lang/_template_mapper.py | 34 +++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/python/quadrants/lang/_template_mapper.py b/python/quadrants/lang/_template_mapper.py
index c87b8baf84..1e9f17c6bf 100644
--- a/python/quadrants/lang/_template_mapper.py
+++ b/python/quadrants/lang/_template_mapper.py
@@ -1,15 +1,38 @@
+import dataclasses
 from functools import partial
 from typing import Any, TypeAlias
 from weakref import ReferenceType
 
 from quadrants.lang import impl
+from quadrants.lang._ndarray import Ndarray
 from quadrants.lang.impl import Program
 from quadrants.lang.kernel_arguments import ArgMetadata
+from quadrants.lang.util import is_data_oriented
 
 from .._test_tools import warnings_helper
 from ._kernel_types import ArgsHash
 from ._template_mapper_hotpath import _extract_arg, _primitive_types
 
+
+def _collect_data_oriented_nd_ids(obj: Any, out: list) -> None:
+    """Walk a ``@qd.data_oriented`` (or dataclass) container's reachable ``Ndarray`` members and append
+    ``id(ndarray)`` to ``out``. Mirrors ``_template_mapper_hotpath._collect_struct_nd_descriptors`` but emits identities
+    instead of shape descriptors. Used to refine ``args_hash`` so that reassigning a member ndarray on the same
+    data_oriented instance invalidates the ``_mapping_cache_tracker`` and re-runs ``extract()``.
+    """
+    if dataclasses.is_dataclass(obj) and not isinstance(obj, type):
+        children = ((f.name, getattr(obj, f.name)) for f in dataclasses.fields(obj))
+    else:
+        children = obj.__dict__.items()
+    for _, v in children:
+        v_type = type(v)
+        if issubclass(v_type, Ndarray):
+            out.append(id(v))
+        elif is_data_oriented(v):
+            _collect_data_oriented_nd_ids(v, out)
+        elif dataclasses.is_dataclass(v) and not isinstance(v, type):
+            _collect_data_oriented_nd_ids(v, out)
+
 Key: TypeAlias = tuple[Any, ...]
 
 
@@ -71,6 +94,17 @@ def lookup(self, raise_on_templated_floats: bool, args: tuple[Any, ...]) -> tupl
         # branching for primitive types dramatically improve performance of hash computation.
         mapping_cache_tracker: list[ReferenceType | None] | None = None
         args_hash: ArgsHash = tuple([id(arg) for arg in args])
+        # ``@qd.data_oriented`` containers can have their member ndarrays reassigned between calls on the same instance
+        # (``state.x = other_ndarray``). The id(arg) alone does not capture that, so the spec-key cache below would
+        # serve a stale entry and the new ndarray's dtype/ndim would be wrong. Fold the reachable ndarray ids into the
+        # hash. No-op for data_oriented containers that hold no ndarrays — the walker returns an empty list. See
+        # ``_collect_data_oriented_nd_ids``.
+        nd_ids: list = []
+        for arg in args:
+            if is_data_oriented(arg):
+                _collect_data_oriented_nd_ids(arg, nd_ids)
+        if nd_ids:
+            args_hash = args_hash + tuple(nd_ids)
         try:
             mapping_cache_tracker = self._mapping_cache_tracker[args_hash]
         except KeyError:

From c9598ad866ab1a7d50f3fd4786f8341965abb543 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sat, 16 May 2026 08:36:14 -0700
Subject: [PATCH 09/66] [Fix] Clear error for @qd.data_oriented field type
 inside typed-dataclass kernel arg
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Annotating a kernel arg as a dataclass whose field type is a ``@qd.data_oriented`` class mixes two
incompatible kernel-arg patterns:

  - Typed-dataclass args are flattened into per-leaf kernel args using the field type annotations at
    compile time (``_transform_kernel_arg`` recurses on ``field.type``).
  - ``@qd.data_oriented`` containers don't carry per-attribute type annotations — their ndarray and
    field members are walked at kernel-compile time from the *value* (``vars(self)``) via
    ``_predeclare_struct_ndarrays``, which only fires for ``qd.template()`` / ``qd.Tensor`` outer
    annotations.

Before this commit, the data_oriented field type fell through ``_transform_kernel_arg``'s else
branch and bubbled up a confusing ``Invalid data type`` error from ``cook_dtype``. Now we raise a
``QuadrantsSyntaxError`` naming the offending field and pointing users at the recommended fix
(``s: qd.template()``).

Pinned by ``test_typed_dataclass_with_data_oriented_field_raises_clear_error``.
---
 .../function_def_transformer.py               | 15 +++++++++
 tests/python/test_data_oriented_ndarray.py    | 31 +++++++++++++++++++
 2 files changed, 46 insertions(+)

diff --git a/python/quadrants/lang/ast/ast_transformers/function_def_transformer.py b/python/quadrants/lang/ast/ast_transformers/function_def_transformer.py
index e2d199d243..1bdd14dbd8 100644
--- a/python/quadrants/lang/ast/ast_transformers/function_def_transformer.py
+++ b/python/quadrants/lang/ast/ast_transformers/function_def_transformer.py
@@ -149,6 +149,21 @@ def _transform_kernel_arg(
                         field.type,
                         this_arg_features[field_idx],
                     )
+                elif isinstance(field.type, type) and getattr(field.type, "_data_oriented", False):
+                    # ``@qd.data_oriented`` field type inside a typed-dataclass kernel arg. The two patterns are
+                    # semantically incompatible at this layer: dataclass kernel-arg recursion uses annotations to
+                    # flatten leaf fields into per-leaf kernel args at compile time, but data_oriented containers don't
+                    # carry per-attribute type annotations — they need a value-driven walk
+                    # (``_predeclare_struct_ndarrays``), which only fires for ``qd.template()`` / ``qd.Tensor``
+                    # annotations. Rather than silently miscompile, raise a clear error pointing users to the
+                    # recommended pattern.
+                    raise QuadrantsSyntaxError(
+                        f"Kernel arg {argument_name!r}: field {field.name!r} has @qd.data_oriented type "
+                        f"{field.type.__name__!r}, which cannot be flattened into a typed-dataclass kernel arg. "
+                        f"Use ``{argument_name}: qd.template()`` for the outer kernel arg annotation instead; "
+                        f"data_oriented contents (including nested ndarrays) are walked at kernel-compile time via "
+                        f"the template path."
+                    )
                 else:
                     result, obj = FunctionDefTransformer._decl_and_create_variable(
                         ctx,
diff --git a/tests/python/test_data_oriented_ndarray.py b/tests/python/test_data_oriented_ndarray.py
index 3cec588dd1..3ea6c88e70 100644
--- a/tests/python/test_data_oriented_ndarray.py
+++ b/tests/python/test_data_oriented_ndarray.py
@@ -655,6 +655,37 @@ def run_f32(s: qd.template()):
 # ---------------------------------------------------------------------------
 
 
+# ---------------------------------------------------------------------------
+# 21. Typed-dataclass kernel arg with a ``@qd.data_oriented`` field type — should error clearly
+#     pointing the user to ``qd.template()``. The two patterns are incompatible at the kernel-arg
+#     layer: dataclass kernel args are flattened using annotations, data_oriented containers need a
+#     value-driven walk. Pins the helpful error message.
+# ---------------------------------------------------------------------------
+
+
+@test_utils.test(arch=qd.cpu)
+def test_typed_dataclass_with_data_oriented_field_raises_clear_error():
+    @qd.data_oriented
+    class Inner:
+        def __init__(self, x):
+            self.x = x
+
+    @dataclasses.dataclass
+    class Outer:
+        inner: Inner
+
+    x = qd.ndarray(qd.i32, shape=(4,))
+    outer = Outer(inner=Inner(x=x))
+
+    @qd.kernel
+    def run(s: Outer):
+        for i in range(4):
+            s.inner.x[i] = i + 1
+
+    with pytest.raises(Exception, match="data_oriented.*qd.template"):
+        run(outer)
+
+
 @test_utils.test(arch=qd.cpu)
 def test_data_oriented_field_only_no_speckey_change():
     N = 4

From 93893e5f2bebc99117098366e6b096f126e86730 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sat, 16 May 2026 08:48:38 -0700
Subject: [PATCH 10/66] [Perf] Per-class cache of data_oriented ndarray
 attribute paths for Gap A
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The unconditional ``vars(arg).items()`` recursion that the Gap A fix added to both ``_extract_arg``
and ``TemplateMapper.lookup`` was paid once per kernel call per data_oriented arg. For the genesis
field-backend, where the ``@qd.data_oriented`` Solver is passed as ``self`` to every kernel and
holds dozens of attributes, this cost ~150 FPS/env on anymal_c (B=4096) — measured ~14% regression
in paired runs.

Cache the attribute paths to ndarrays per class (``type(arg) -> list[tuple[str, ...]]``). First
call for a class walks once via ``_build_struct_nd_paths``; subsequent calls do a dict lookup +
``getattr`` chains for the (typically zero or one or two) cached paths. For solvers with no ndarray
members (genesis field backend), the cached list is empty and the per-call cost collapses to a
single dict lookup.

Trades freshness for speed: assumes the *set* of ndarray-holding attribute paths is stable across
instances of the same class. Genesis Solver and similar data_oriented containers declare members
in ``__init__`` and don't add new ones later, so this is safe. Documented in the docstring for
``_struct_nd_paths_for``.

Shared between ``_template_mapper.py`` (id collection for args_hash) and
``_template_mapper_hotpath.py`` (shape descriptors for spec key) — same paths, different payload.
---
 python/quadrants/lang/_template_mapper.py     | 31 +++------
 .../lang/_template_mapper_hotpath.py          | 69 ++++++++++++++-----
 2 files changed, 62 insertions(+), 38 deletions(-)

diff --git a/python/quadrants/lang/_template_mapper.py b/python/quadrants/lang/_template_mapper.py
index 1e9f17c6bf..e4ecf4d83e 100644
--- a/python/quadrants/lang/_template_mapper.py
+++ b/python/quadrants/lang/_template_mapper.py
@@ -1,37 +1,28 @@
-import dataclasses
 from functools import partial
 from typing import Any, TypeAlias
 from weakref import ReferenceType
 
 from quadrants.lang import impl
-from quadrants.lang._ndarray import Ndarray
 from quadrants.lang.impl import Program
 from quadrants.lang.kernel_arguments import ArgMetadata
 from quadrants.lang.util import is_data_oriented
 
 from .._test_tools import warnings_helper
 from ._kernel_types import ArgsHash
-from ._template_mapper_hotpath import _extract_arg, _primitive_types
+from ._template_mapper_hotpath import _extract_arg, _primitive_types, _struct_nd_paths_for
 
 
-def _collect_data_oriented_nd_ids(obj: Any, out: list) -> None:
-    """Walk a ``@qd.data_oriented`` (or dataclass) container's reachable ``Ndarray`` members and append
-    ``id(ndarray)`` to ``out``. Mirrors ``_template_mapper_hotpath._collect_struct_nd_descriptors`` but emits identities
-    instead of shape descriptors. Used to refine ``args_hash`` so that reassigning a member ndarray on the same
-    data_oriented instance invalidates the ``_mapping_cache_tracker`` and re-runs ``extract()``.
+def _collect_data_oriented_nd_ids(arg: Any, out: list) -> None:
+    """Append ``id(ndarray)`` for every ndarray reachable from ``arg``, using the per-class path cache in
+    ``_template_mapper_hotpath._struct_nd_paths_for`` so the first call walks ``vars(arg)`` once and subsequent calls
+    are just ``getattr`` chains. Empty path list short-circuits with zero work — critical for genesis's
+    ``@qd.data_oriented`` Solver passed as ``self`` to every kernel.
     """
-    if dataclasses.is_dataclass(obj) and not isinstance(obj, type):
-        children = ((f.name, getattr(obj, f.name)) for f in dataclasses.fields(obj))
-    else:
-        children = obj.__dict__.items()
-    for _, v in children:
-        v_type = type(v)
-        if issubclass(v_type, Ndarray):
-            out.append(id(v))
-        elif is_data_oriented(v):
-            _collect_data_oriented_nd_ids(v, out)
-        elif dataclasses.is_dataclass(v) and not isinstance(v, type):
-            _collect_data_oriented_nd_ids(v, out)
+    for chain in _struct_nd_paths_for(arg):
+        v = arg
+        for a in chain:
+            v = getattr(v, a)
+        out.append(id(v))
 
 Key: TypeAlias = tuple[Any, ...]
 
diff --git a/python/quadrants/lang/_template_mapper_hotpath.py b/python/quadrants/lang/_template_mapper_hotpath.py
index 96d7418307..20910c0d4e 100644
--- a/python/quadrants/lang/_template_mapper_hotpath.py
+++ b/python/quadrants/lang/_template_mapper_hotpath.py
@@ -72,33 +72,66 @@
 _primitive_types = {int, float, bool}
 
 
-def _collect_struct_nd_descriptors(obj: Any, path: str, out: list) -> None:
-    """Walk a ``@qd.data_oriented`` (or dataclass) container's reachable ``Ndarray`` members and append a per-ndarray
-    shape descriptor ``(path, element_type, ndim, needs_grad, layout)`` to ``out``. Used by the template-mapper to
-    refine the specialisation key when the container holds ndarrays — see the data_oriented branch in
-    ``_extract_arg``.
+# Per-class cache: ``type(arg) -> list[tuple[str, ...]]`` of attribute paths whose values are ``Ndarray`` instances at
+# first observation. Populated lazily by ``_struct_nd_paths_for`` on the first call with each new data_oriented (or
+# nested dataclass) class. Empty list means "this class holds no ndarrays anywhere", in which case subsequent calls
+# pay only a dict-lookup per arg. Non-empty list short-circuits the full ``vars()`` recursion and just resolves each
+# cached path via ``getattr`` chains. Critical for the genesis field-backend hot path: the ``@qd.data_oriented``
+# Solver is passed as ``self`` to most kernels and holds dozens of attributes, so a full per-call ``vars()`` walk
+# costs >100ns per kernel and trashed FPS until this cache was added.
+_struct_nd_paths_cache: dict[type, list[tuple]] = {}
 
-    Walks both ``dataclasses.is_dataclass(child)`` and ``is_data_oriented(child)`` children recursively. Mirrors the
-    walker used at compile time in ``_predeclare_struct_ndarrays``, so the compile-time pre-declaration and the
-    specialisation key see the same set of ndarrays.
-    """
+
+def _build_struct_nd_paths(obj: Any, prefix: tuple, out: list) -> None:
     if dataclasses.is_dataclass(obj) and not isinstance(obj, type):
         children = ((f.name, getattr(obj, f.name)) for f in dataclasses.fields(obj))
     else:
         children = obj.__dict__.items()
     for k, v in children:
-        full = f"{path}.{k}" if path else k
+        chain = prefix + (k,)
         if type(v) in _TENSOR_WRAPPER_TYPES:
             v = v._unwrap()
         v_type = type(v)
         if issubclass(v_type, Ndarray):
-            type_id = id(v.element_type)
-            element_type = type_id if type_id in primitive_types.type_ids else v.element_type
-            out.append((full, element_type, len(v.shape), v.grad is not None, v._qd_layout))
-        elif is_data_oriented(v):
-            _collect_struct_nd_descriptors(v, full, out)
-        elif dataclasses.is_dataclass(v) and not isinstance(v, type):
-            _collect_struct_nd_descriptors(v, full, out)
+            out.append(chain)
+        elif is_data_oriented(v) or (dataclasses.is_dataclass(v) and not isinstance(v, type)):
+            _build_struct_nd_paths(v, chain, out)
+
+
+def _struct_nd_paths_for(arg: Any) -> list[tuple]:
+    """Return the cached attribute paths (each a tuple of attr-name strings) at which ``Ndarray`` instances are
+    reachable from ``arg`` of type ``type(arg)``. First call for a class walks ``arg`` once via
+    ``_build_struct_nd_paths``; subsequent calls are dict-lookups.
+
+    Trades freshness for speed: assumes the *set* of ndarray-holding attribute paths is stable across instances of
+    the same class. The genesis Solver and similar ``@qd.data_oriented`` containers satisfy this — their ndarray
+    members are declared in ``__init__`` and not added later. If you need to add an ndarray attribute after the first
+    kernel launch on an instance of a given class, the new attribute won't be tracked. Call ``invalidate_struct_nd_
+    paths_for`` (below) or restart the program.
+    """
+    cls = type(arg)
+    paths = _struct_nd_paths_cache.get(cls)
+    if paths is None:
+        paths = []
+        _build_struct_nd_paths(arg, (), paths)
+        _struct_nd_paths_cache[cls] = paths
+    return paths
+
+
+def _collect_struct_nd_descriptors(arg: Any, out: list) -> None:
+    """Emit per-ndarray shape descriptors ``(joined-path, element_type, ndim, needs_grad, layout)`` for every ndarray
+    reachable from ``arg``. Used by the template-mapper to refine the spec key for ``@qd.data_oriented`` args holding
+    ndarrays — see the data_oriented branch in ``_extract_arg``.
+    """
+    for chain in _struct_nd_paths_for(arg):
+        v = arg
+        for a in chain:
+            v = getattr(v, a)
+        if type(v) in _TENSOR_WRAPPER_TYPES:
+            v = v._unwrap()
+        type_id = id(v.element_type)
+        element_type = type_id if type_id in primitive_types.type_ids else v.element_type
+        out.append((".".join(chain), element_type, len(v.shape), v.grad is not None, v._qd_layout))
 
 
 def _extract_arg(raise_on_templated_floats: bool, arg: Any, annotation: AnnotationType, arg_name: str) -> Any:
@@ -175,7 +208,7 @@ def _extract_arg(raise_on_templated_floats: bool, arg: Any, annotation: Annotati
             # Containers with no ndarrays keep the original short-path (one spec per instance via weakref) so this is
             # a no-op for the existing data_oriented + qd.field workloads (genesis field-backend).
             nd_descriptors: list = []
-            _collect_struct_nd_descriptors(arg, "", nd_descriptors)
+            _collect_struct_nd_descriptors(arg, nd_descriptors)
             if nd_descriptors:
                 return (id(type(arg)), tuple(nd_descriptors), weakref.ref(arg))
             return weakref.ref(arg)

From ce769a7bdbfe229e1f057f56c7af65b79764f370 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sat, 16 May 2026 09:19:21 -0700
Subject: [PATCH 11/66] [Doc] Nesting compatibility matrix for compound types +
 spot tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Documents what combinations of `dataclasses.dataclass`, `@qd.data_oriented`, `@qd.struct`,
`qd.ndarray`, and `qd.field` work as nested members, after the data_oriented + ndarray fix series.

Three additions:

1. Per-container × per-member-type matrix replacing the previous text-only claim that
   ``@qd.data_oriented`` could not contain ndarrays.

2. Outer kernel-arg annotation rules: when to use ``qd.template()`` vs a typed-dataclass
   annotation, including the ``frozen=True`` requirement for a dataclass passed via
   ``qd.template()`` and the rejection of ``@qd.data_oriented`` field types inside a typed-dataclass
   kernel arg (matches the error from c9598ad86).

3. Reassignment + restrictions: documents that ndarray reassignment with different dtype/ndim is
   supported (Gap A), and that the ndarray-bearing attribute set on a data_oriented class is
   assumed stable across instances (path-cache caveat from 93893e5f2).

Plus three spot tests in ``test_data_oriented_mixed_combos.py`` that empirically pin the more
involved matrix claims:

- ``test_data_oriented_with_ndarray_field_and_nested_data_oriented``: single data_oriented holding
  ndarray + field + nested data_oriented + primitive simultaneously.
- ``test_dataclass_with_data_oriented_via_template``: frozen dataclass holding a data_oriented
  holding an ndarray, passed via ``qd.template()``.
- ``test_data_oriented_with_dataclass_and_ndarray_sibling``: data_oriented holding both a direct
  ndarray AND a dataclass-with-ndarray sibling.

All three pass on cluster.
---
 docs/source/user_guide/compound_types.md      | 46 ++++++++-
 .../python/test_data_oriented_mixed_combos.py | 99 +++++++++++++++++++
 2 files changed, 143 insertions(+), 2 deletions(-)
 create mode 100644 tests/python/test_data_oriented_mixed_combos.py

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index 6145227a9d..9636a1ad63 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -6,7 +6,7 @@ It can be useful to combine multiple ndarrays or fields together into a single s
 
 The following compound types are available:
 - `dataclasses.dataclass` — **recommended**
-- `@qd.data_oriented` — for classes that define `@qd.kernel` methods, cannot contain ndarrays
+- `@qd.data_oriented` — for classes that define `@qd.kernel` methods
 - `@qd.struct` / `@qd.dataclass` — legacy, field-only
 
 | type                               | can be passed to qd.kernel? | can be passed to qd.func? | can contain ndarray? | can contain field? | can be nested? | supports differentiation? |
@@ -15,6 +15,8 @@ The following compound types are available:
 | `@qd.data_oriented`               | yes                         | yes                       | yes                  | yes                | yes            | yes                       |
 | `@qd.struct`, `@qd.dataclass`     | yes                         | yes                       | no                   | yes                | yes            | yes                       |
 
+See [Nesting compatibility](#nesting-compatibility) below for a per-container × per-member-type breakdown, including the constraints on the outer kernel-arg annotation and ndarray reassignment.
+
 ## Recommendation
 
 **Use `dataclasses.dataclass` for new code.** It supports both fields and ndarrays, can be nested, and uses standard Python — no Quadrants-specific decorator needed.
@@ -170,7 +172,47 @@ step(state)
 
 Mixing `qd.field` and `qd.ndarray` members in the same class is also supported. Nested `@qd.data_oriented` (or nested `dataclasses.dataclass`) containers with ndarrays inside are walked recursively.
 
-Note: as with `dataclasses.dataclass`, reassigning an ndarray member between kernel calls (`state.x = other_ndarray`) is allowed; the kernel re-binds against the live value on the next launch.
+Note: as with `dataclasses.dataclass`, reassigning an ndarray member between kernel calls (`state.x = other_ndarray`) is allowed; the kernel re-binds against the live value on the next launch. Reassigning to an ndarray of a different `dtype` or `ndim` also works — a fresh kernel is compiled and cached for the new shape.
+
+## Nesting compatibility
+
+This table summarises which member types are allowed inside which container type. "yes" means the member is walked correctly when the container is passed to a kernel; "no" means the member is ignored or the combination raises an error.
+
+| Container ↓ &nbsp;&nbsp;&nbsp; / &nbsp;&nbsp;&nbsp; Member → | `qd.ndarray` | `qd.field` | primitive | `dataclasses.dataclass` | `@qd.data_oriented` | `@qd.struct` / `@qd.dataclass` |
+|---|:---:|:---:|:---:|:---:|:---:|:---:|
+| `dataclasses.dataclass`         | yes | yes | yes | yes | yes [\*1] | yes |
+| `@qd.data_oriented`             | yes | yes | yes | yes | yes      | yes |
+| `@qd.struct` / `@qd.dataclass`  | no  | yes | yes | no  | no       | yes |
+
+[\*1] A `dataclasses.dataclass` may *hold* a `@qd.data_oriented` member, but the **outer kernel-arg annotation** must be `qd.template()`, not the dataclass type itself. Passing a typed-dataclass kernel arg (`def k(s: Outer)`) whose field type is a `@qd.data_oriented` class raises a clear `QuadrantsSyntaxError` at compile time pointing you to `qd.template()`. The reason: typed-dataclass kernel args are flattened from annotations, but `@qd.data_oriented` carries no per-attribute annotations — its members are walked from the live instance, which only happens on the template path.
+
+### Outer kernel-arg annotation
+
+The outermost annotation you put on the kernel parameter determines how the container is walked:
+
+| Annotation | Kernel-arg walker | Notes |
+|---|---|---|
+| `qd.types.NDArray[...]`           | ndarray slot                                       | leaf-level only |
+| `MyDataclass` (dataclass type)    | per-field flatten using annotations                | needs every field to have a quadrants-typed annotation |
+| `qd.template()`                   | value-driven walk of `vars(self)` / dataclass fields | supports the full nesting matrix above |
+
+Two practical consequences:
+
+- **Containers with `@qd.data_oriented` anywhere in the tree** must be passed via `qd.template()` (or be the `self` of a `@qd.kernel` method on a `@qd.data_oriented` class). Using a typed-dataclass annotation on the outermost arg errors.
+- **A non-frozen `dataclasses.dataclass`** can be passed via the typed-dataclass annotation, but cannot be the outer `qd.template()` arg — `qd.template()` uses the instance as a dict key inside the template-mapper and a non-frozen dataclass has `__hash__ = None`. Add `frozen=True` if you need to pass it as `qd.template()` (for example, when it holds `@qd.data_oriented` children).
+
+### Reassigning ndarray members
+
+For both `dataclasses.dataclass` and `@qd.data_oriented` containers passed via `qd.template()`, reassigning an ndarray member between kernel launches is supported, including changes to `dtype`, `ndim`, or layout. A new specialised kernel is compiled and cached for the new shape; subsequent launches with the original shape continue to use the original cached kernel.
+
+### Restrictions
+
+A few combinations are still unsupported:
+
+- **`@qd.struct` / `@qd.dataclass` cannot contain ndarrays.** This is a legacy field-only type. Use `dataclasses.dataclass` or `@qd.data_oriented` instead.
+- **A typed-dataclass kernel-arg annotation cannot have a `@qd.data_oriented` field type** (see [\*1] above) — errors clearly at compile time.
+- **An outer `qd.template()` arg of dataclass type must be `frozen=True`** — non-frozen dataclasses are unhashable and the template-mapper cannot use them as cache keys.
+- **The set of ndarray-bearing attributes on a `@qd.data_oriented` class is assumed stable across instances.** Declare ndarray attributes in `__init__`, don't add new attributes after the first kernel launch on an instance of that class; the path cache is per-class and won't pick up attributes added later.
 
 ## qd.struct / qd.dataclass
 
diff --git a/tests/python/test_data_oriented_mixed_combos.py b/tests/python/test_data_oriented_mixed_combos.py
new file mode 100644
index 0000000000..2dc09c7a38
--- /dev/null
+++ b/tests/python/test_data_oriented_mixed_combos.py
@@ -0,0 +1,99 @@
+"""Spot tests for the nesting compatibility matrix in compound_types.md.
+
+These are not part of the main fix's test surface; they exist to empirically verify the table claims
+in the user-facing doc.
+"""
+
+import dataclasses
+
+import numpy as np
+
+import quadrants as qd
+from tests import test_utils
+
+
+@test_utils.test(arch=qd.cpu)
+def test_data_oriented_with_ndarray_field_and_nested_data_oriented():
+    """A single @qd.data_oriented holding all three of: ndarray, field, nested @qd.data_oriented."""
+
+    N = 4
+
+    @qd.data_oriented
+    class Inner:
+        def __init__(self):
+            self.y = qd.ndarray(qd.i32, shape=(N,))
+
+    @qd.data_oriented
+    class State:
+        def __init__(self):
+            self.x = qd.ndarray(qd.i32, shape=(N,))
+            self.f = qd.field(qd.i32, shape=(N,))
+            self.inner = Inner()
+            self.scale = 7
+
+    state = State()
+
+    @qd.kernel
+    def run(s: qd.template()):
+        for i in range(N):
+            s.x[i] = i
+            s.f[i] = i * 2
+            s.inner.y[i] = i + s.scale
+
+    run(state)
+    np.testing.assert_array_equal(state.x.to_numpy(), np.arange(N))
+    np.testing.assert_array_equal(state.f.to_numpy(), np.arange(N) * 2)
+    np.testing.assert_array_equal(state.inner.y.to_numpy(), np.arange(N) + 7)
+
+
+@test_utils.test(arch=qd.cpu)
+def test_dataclass_with_data_oriented_via_template():
+    """A dataclass (frozen=True) holding a @qd.data_oriented holding an ndarray, passed via qd.template()."""
+    N = 4
+
+    @qd.data_oriented
+    class Inner:
+        def __init__(self):
+            self.y = qd.ndarray(qd.i32, shape=(N,))
+
+    @dataclasses.dataclass(frozen=True)
+    class Outer:
+        inner: Inner
+
+    outer = Outer(inner=Inner())
+
+    @qd.kernel
+    def run(s: qd.template()):
+        for i in range(N):
+            s.inner.y[i] = i + 11
+
+    run(outer)
+    np.testing.assert_array_equal(outer.inner.y.to_numpy(), np.arange(N) + 11)
+
+
+@test_utils.test(arch=qd.cpu)
+def test_data_oriented_with_dataclass_and_ndarray_sibling():
+    """@qd.data_oriented holding both a direct ndarray AND a dataclass-with-ndarray sibling."""
+    N = 4
+
+    @dataclasses.dataclass
+    class Inner:
+        z: qd.types.ndarray(dtype=qd.i32, ndim=1)
+
+    @qd.data_oriented
+    class State:
+        def __init__(self):
+            self.x = qd.ndarray(qd.i32, shape=(N,))
+            self.inner = Inner(z=qd.ndarray(qd.i32, shape=(N,)))
+
+    state = State()
+
+    @qd.kernel
+    def run(s: qd.template()):
+        for i in range(N):
+            s.x[i] = i + 1
+            s.inner.z[i] = i + 100
+
+    run(state)
+    np.testing.assert_array_equal(state.x.to_numpy(), np.arange(1, N + 1))
+    np.testing.assert_array_equal(state.inner.z.to_numpy(), np.arange(N) + 100)

From dd4de40197473ea207cbfc6177437c4e8d562bcd Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sat, 16 May 2026 10:33:28 -0700
Subject: [PATCH 12/66] [Doc] Fix @qd.struct ghost reference in compound_types
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

``@qd.struct`` does not exist as an exported symbol — ``dir(qd)`` has only ``Struct``,
``StructField``, and ``dataclass``. The original doc claimed ``@qd.struct`` / ``@qd.dataclass``
as a legacy decorator pair, but only ``@qd.dataclass`` exists. The function-form equivalent
``qd.types.struct(name1=type1, ...)`` produces the same ``StructType``.

Replace all ``@qd.struct`` references with ``@qd.dataclass`` (with a parenthetical note pointing
to the function-form factory ``qd.types.struct``). No semantic change — the row's "field-only,
no ndarrays" classification was already correct; only the name was wrong.
---
 docs/source/user_guide/compound_types.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index 9636a1ad63..7506008eed 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -7,13 +7,13 @@ It can be useful to combine multiple ndarrays or fields together into a single s
 The following compound types are available:
 - `dataclasses.dataclass` — **recommended**
 - `@qd.data_oriented` — for classes that define `@qd.kernel` methods
-- `@qd.struct` / `@qd.dataclass` — legacy, field-only
+- `@qd.dataclass` (and its function-form equivalent `qd.types.struct(...)`) — legacy Quadrants `StructType`, field-only
 
 | type                               | can be passed to qd.kernel? | can be passed to qd.func? | can contain ndarray? | can contain field? | can be nested? | supports differentiation? |
 |------------------------------------|:---------------------------:|:-------------------------:|:--------------------:|:------------------:|:--------------:|:-------------------------:|
 | `dataclasses.dataclass`            | yes                         | yes                       | yes                  | yes                | yes            | no [*1]                   |
 | `@qd.data_oriented`               | yes                         | yes                       | yes                  | yes                | yes            | yes                       |
-| `@qd.struct`, `@qd.dataclass`     | yes                         | yes                       | no                   | yes                | yes            | yes                       |
+| `@qd.dataclass` / `qd.types.struct` | yes                       | yes                       | no                   | yes                | yes            | yes                       |
 
 See [Nesting compatibility](#nesting-compatibility) below for a per-container × per-member-type breakdown, including the constraints on the outer kernel-arg annotation and ndarray reassignment.
 
@@ -178,11 +178,11 @@ Note: as with `dataclasses.dataclass`, reassigning an ndarray member between ker
 
 This table summarises which member types are allowed inside which container type. "yes" means the member is walked correctly when the container is passed to a kernel; "no" means the member is ignored or the combination raises an error.
 
-| Container ↓ &nbsp;&nbsp;&nbsp; / &nbsp;&nbsp;&nbsp; Member → | `qd.ndarray` | `qd.field` | primitive | `dataclasses.dataclass` | `@qd.data_oriented` | `@qd.struct` / `@qd.dataclass` |
+| Container ↓ &nbsp;&nbsp;&nbsp; / &nbsp;&nbsp;&nbsp; Member → | `qd.ndarray` | `qd.field` | primitive | `dataclasses.dataclass` | `@qd.data_oriented` | `@qd.dataclass` |
 |---|:---:|:---:|:---:|:---:|:---:|:---:|
 | `dataclasses.dataclass`         | yes | yes | yes | yes | yes [\*1] | yes |
 | `@qd.data_oriented`             | yes | yes | yes | yes | yes      | yes |
-| `@qd.struct` / `@qd.dataclass`  | no  | yes | yes | no  | no       | yes |
+| `@qd.dataclass`                 | no  | yes | yes | no  | no       | yes |
 
 [\*1] A `dataclasses.dataclass` may *hold* a `@qd.data_oriented` member, but the **outer kernel-arg annotation** must be `qd.template()`, not the dataclass type itself. Passing a typed-dataclass kernel arg (`def k(s: Outer)`) whose field type is a `@qd.data_oriented` class raises a clear `QuadrantsSyntaxError` at compile time pointing you to `qd.template()`. The reason: typed-dataclass kernel args are flattened from annotations, but `@qd.data_oriented` carries no per-attribute annotations — its members are walked from the live instance, which only happens on the template path.
 
@@ -209,14 +209,14 @@ For both `dataclasses.dataclass` and `@qd.data_oriented` containers passed via `
 
 A few combinations are still unsupported:
 
-- **`@qd.struct` / `@qd.dataclass` cannot contain ndarrays.** This is a legacy field-only type. Use `dataclasses.dataclass` or `@qd.data_oriented` instead.
+- **`@qd.dataclass` (the Quadrants `StructType` decorator) cannot contain ndarrays.** This is a legacy field-only type. Use `dataclasses.dataclass` or `@qd.data_oriented` instead. (The function-form factory `qd.types.struct(...)` produces the same `StructType` and has the same restrictions.)
 - **A typed-dataclass kernel-arg annotation cannot have a `@qd.data_oriented` field type** (see [\*1] above) — errors clearly at compile time.
 - **An outer `qd.template()` arg of dataclass type must be `frozen=True`** — non-frozen dataclasses are unhashable and the template-mapper cannot use them as cache keys.
 - **The set of ndarray-bearing attributes on a `@qd.data_oriented` class is assumed stable across instances.** Declare ndarray attributes in `__init__`, don't add new attributes after the first kernel launch on an instance of that class; the path cache is per-class and won't pick up attributes added later.
 
-## qd.struct / qd.dataclass
+## qd.dataclass / qd.types.struct
 
-`@qd.struct` (and its alias `@qd.dataclass`) is a Quadrants-native struct type. It can only contain fields and primitive types, not ndarrays.
+`@qd.dataclass` is a Quadrants-native `StructType` decorator. The function-form factory `qd.types.struct(name1=type1, ...)` produces the same `StructType`. Both can only contain fields and primitive types (and other `StructType` members), not ndarrays.
 
 ```python
 @qd.dataclass

From 46825ab92742926773517092b3a761df422bd693 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sat, 16 May 2026 10:35:43 -0700
Subject: [PATCH 13/66] [Test] Pin fastcache + @qd.data_oriented + ndarray
 end-to-end behavior
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Belt-and-braces tests for the case the user explicitly requires: fastcache should work when a
@qd.data_oriented contains ndarrays (with or without primitives or nested data_oriented
children), and should *correctly fall back* (not error, not silently miscompile) when the
container holds a qd.field.

Pattern adapted from ``test_cache.test_fastcache``: call ``qd_init_same_arch`` twice with the
same ``offline_cache_file_path`` to simulate two processes. Monkeypatch ``launch_kernel`` to
capture ``compiled_kernel_data`` per call: ``None`` on the cold init (compile) and a non-None
``CompiledKernelData`` on the warm init (loaded from disk fastcache).

New tests:

- ``test_data_oriented_ndarray_fastcache_cross_init`` — single ndarray member, second init loads
  from disk.
- ``test_data_oriented_nested_ndarray_fastcache_cross_init`` — nested @qd.data_oriented + ndarray
  member, second init loads from disk. Exercises the args_hasher recursion.
- ``test_data_oriented_ndarray_fastcache_dtype_key_distinct`` — two different ndarray dtypes on
  the same data_oriented produce two distinct cache entries; both load from disk on warm init.
  Pins the ``[nd-{dtype}-{ndim}]`` repr in args_hasher.
- ``test_data_oriented_field_disables_fastcache_but_runs`` — data_oriented + qd.field documented
  fallback: ``cache_key_generated`` is False, but the kernel still runs correctly.

The pre-existing ``test_data_oriented_ndarray_fastcache_eligible`` (kept) checks the in-process
``cache_key_generated`` flag; these four add cross-init disk-cache verification.
---
 tests/python/test_data_oriented_ndarray.py | 170 +++++++++++++++++++++
 1 file changed, 170 insertions(+)

diff --git a/tests/python/test_data_oriented_ndarray.py b/tests/python/test_data_oriented_ndarray.py
index 3ea6c88e70..7bc55b123d 100644
--- a/tests/python/test_data_oriented_ndarray.py
+++ b/tests/python/test_data_oriented_ndarray.py
@@ -284,6 +284,176 @@ def run(s: qd.template()):
 # ---------------------------------------------------------------------------
 
 
+# ---------------------------------------------------------------------------
+# 9b. Fastcache end-to-end with ``@qd.data_oriented`` holding ndarrays. Pattern adapted from
+#     ``test_cache.test_fastcache``: call ``qd_init_same_arch`` twice with the same cache directory
+#     to simulate two processes, monkeypatch ``launch_kernel`` to capture whether
+#     ``compiled_kernel_data`` was loaded from disk. On the second init the data_oriented + ndarray
+#     kernel should be served from the on-disk fastcache.
+# ---------------------------------------------------------------------------
+
+
+@test_utils.test(arch=qd.cpu)
+def test_data_oriented_ndarray_fastcache_cross_init(tmp_path, monkeypatch):
+    from quadrants._test_tools import qd_init_same_arch
+
+    launch_kernel_orig = qd.lang.kernel_impl.Kernel.launch_kernel
+    captured_compiled_kernel_data = []
+
+    def launch_kernel(self, key, t_kernel, compiled_kernel_data, *args, qd_stream=None):
+        captured_compiled_kernel_data.append(compiled_kernel_data)
+        return launch_kernel_orig(self, key, t_kernel, compiled_kernel_data, *args, qd_stream=qd_stream)
+
+    monkeypatch.setattr("quadrants.lang.kernel_impl.Kernel.launch_kernel", launch_kernel)
+
+    @qd.data_oriented
+    class State:
+        def __init__(self, x):
+            self.x = x
+
+    @qd.kernel(fastcache=True)
+    def run(s: qd.template()):
+        for i in range(4):
+            s.x[i] = i * 3
+
+    qd_init_same_arch(offline_cache_file_path=str(tmp_path), offline_cache=True)
+    state = State(x=qd.ndarray(qd.i32, shape=(4,)))
+    run(state)
+    np.testing.assert_array_equal(state.x.to_numpy(), np.arange(4) * 3)
+    assert captured_compiled_kernel_data[-1] is None, "cold init should compile, not load"
+
+    qd_init_same_arch(offline_cache_file_path=str(tmp_path), offline_cache=True)
+    state = State(x=qd.ndarray(qd.i32, shape=(4,)))
+    run(state)
+    np.testing.assert_array_equal(state.x.to_numpy(), np.arange(4) * 3)
+    assert captured_compiled_kernel_data[-1] is not None, "warm init should load from disk fastcache"
+
+
+# ---------------------------------------------------------------------------
+# 9c. Same as 9b but with a *nested* ``@qd.data_oriented`` holding an ndarray. Pins that the
+#     fastcache args_hasher recursion handles nested data_oriented containers correctly across
+#     processes.
+# ---------------------------------------------------------------------------
+
+
+@test_utils.test(arch=qd.cpu)
+def test_data_oriented_nested_ndarray_fastcache_cross_init(tmp_path, monkeypatch):
+    from quadrants._test_tools import qd_init_same_arch
+
+    launch_kernel_orig = qd.lang.kernel_impl.Kernel.launch_kernel
+    captured = []
+
+    def launch_kernel(self, key, t_kernel, compiled_kernel_data, *args, qd_stream=None):
+        captured.append(compiled_kernel_data)
+        return launch_kernel_orig(self, key, t_kernel, compiled_kernel_data, *args, qd_stream=qd_stream)
+
+    monkeypatch.setattr("quadrants.lang.kernel_impl.Kernel.launch_kernel", launch_kernel)
+
+    @qd.data_oriented
+    class Inner:
+        def __init__(self, y):
+            self.y = y
+
+    @qd.data_oriented
+    class Outer:
+        def __init__(self, inner):
+            self.inner = inner
+
+    @qd.kernel(fastcache=True)
+    def run(s: qd.template()):
+        for i in range(4):
+            s.inner.y[i] = i + 11
+
+    qd_init_same_arch(offline_cache_file_path=str(tmp_path), offline_cache=True)
+    outer = Outer(inner=Inner(y=qd.ndarray(qd.i32, shape=(4,))))
+    run(outer)
+    assert captured[-1] is None
+
+    qd_init_same_arch(offline_cache_file_path=str(tmp_path), offline_cache=True)
+    outer = Outer(inner=Inner(y=qd.ndarray(qd.i32, shape=(4,))))
+    run(outer)
+    assert captured[-1] is not None, "nested data_oriented + ndarray should load from fastcache"
+
+
+# ---------------------------------------------------------------------------
+# 9d. Fastcache key is dtype-sensitive: same kernel source, different ndarray dtype in the
+#     data_oriented member -> two distinct disk cache entries. Pins the args_hasher's
+#     ``[nd-{dtype}-{ndim}{layout}]`` repr.
+# ---------------------------------------------------------------------------
+
+
+@test_utils.test(arch=qd.cpu)
+def test_data_oriented_ndarray_fastcache_dtype_key_distinct(tmp_path, monkeypatch):
+    from quadrants._test_tools import qd_init_same_arch
+
+    launch_kernel_orig = qd.lang.kernel_impl.Kernel.launch_kernel
+    captured = []
+
+    def launch_kernel(self, key, t_kernel, compiled_kernel_data, *args, qd_stream=None):
+        captured.append(compiled_kernel_data)
+        return launch_kernel_orig(self, key, t_kernel, compiled_kernel_data, *args, qd_stream=qd_stream)
+
+    monkeypatch.setattr("quadrants.lang.kernel_impl.Kernel.launch_kernel", launch_kernel)
+
+    @qd.data_oriented
+    class State:
+        def __init__(self, x):
+            self.x = x
+
+    @qd.kernel(fastcache=True)
+    def run(s: qd.template()):
+        for i in range(4):
+            s.x[i] = s.x[i] + 1
+
+    qd_init_same_arch(offline_cache_file_path=str(tmp_path), offline_cache=True)
+    state_i32 = State(x=qd.ndarray(qd.i32, shape=(4,)))
+    state_f32 = State(x=qd.ndarray(qd.f32, shape=(4,)))
+    run(state_i32)
+    run(state_f32)
+    assert captured[-2] is None and captured[-1] is None, "both dtypes cold-compile on first init"
+
+    qd_init_same_arch(offline_cache_file_path=str(tmp_path), offline_cache=True)
+    state_i32 = State(x=qd.ndarray(qd.i32, shape=(4,)))
+    state_f32 = State(x=qd.ndarray(qd.f32, shape=(4,)))
+    run(state_i32)
+    run(state_f32)
+    assert captured[-2] is not None and captured[-1] is not None, "both dtypes load from disk"
+    np.testing.assert_array_equal(state_i32.x.to_numpy(), [1, 1, 1, 1])
+    np.testing.assert_array_equal(state_f32.x.to_numpy(), np.array([1.0] * 4, dtype=np.float32))
+
+
+# ---------------------------------------------------------------------------
+# 9e. Documented fallback: a @qd.data_oriented containing a qd.field disables fastcache for the
+#     whole call (args_hasher returns None for ScalarField). The kernel still runs correctly via
+#     non-fastcache compilation. This test pins the documented fallback so a future "support
+#     fields in fastcache" change explicitly chooses to update this test.
+# ---------------------------------------------------------------------------
+
+
+@test_utils.test(arch=qd.cpu)
+def test_data_oriented_field_disables_fastcache_but_runs(tmp_path, monkeypatch):
+    from quadrants._test_tools import qd_init_same_arch
+
+    qd_init_same_arch(offline_cache_file_path=str(tmp_path), offline_cache=True)
+
+    @qd.data_oriented
+    class State:
+        def __init__(self, n):
+            self.f = qd.field(qd.i32, shape=(n,))
+
+    state = State(4)
+
+    @qd.kernel(fastcache=True)
+    def run(s: qd.template()):
+        for i in range(4):
+            s.f[i] = i + 7
+
+    run(state)
+    obs = run._primal.src_ll_cache_observations
+    assert obs.cache_key_generated is False, "field child should disable fastcache key generation"
+    np.testing.assert_array_equal(state.f.to_numpy(), np.arange(4) + 7)
+
+
 @test_utils.test(arch=qd.cpu)
 def test_data_oriented_ndarray_fastcache_eligible():
     N = 4

From ee5fbbbdae22045fe9c9b6d9734befe7aea07f16 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sat, 16 May 2026 10:47:19 -0700
Subject: [PATCH 14/66] [Doc] Fastcache with @qd.data_oriented: worked example,
 semantics, footguns
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The existing fastcache.md mentions @qd.data_oriented in the constraint table and in a one-line
note next to the dataclass section, but doesn't give a worked example or spell out the
behavioural semantics. This commit adds a focused subsection covering:

- A worked Simulation example: __init__ allocates state once, @qd.kernel(fastcache=True) method
  consumes it via self.
- Primitive members of @qd.data_oriented are *implicitly templated* — their values are folded
  into the fastcache key without needing add_value_to_cache_key or qd.static(...). This is the
  property that lets the cache differentiate between Simulation(n=8) and Simulation(n=64).
- Tensor contents vs reassignment: a per-operation table showing which mutations share the cache
  entry (element writes, same-dtype/ndim reassignment) and which produce a new entry (dtype or
  ndim change).
- dataclasses.dataclass nesting works, but has the inverse default for primitives — types only,
  not values. Spell out the silent-miscompile risk if you put a qd.static-baked value in a
  dataclass field without FIELD_METADATA_CACHE_VALUE.
- What disables fastcache on a data_oriented arg: any qd.field child anywhere in the tree, with
  a pointer to the perso_hugh follow-up doc.

Also adds a short "Fastcache interaction" cross-reference in compound_types.md so a reader who
lands there is pointed at the fastcache subsection.

No code changes — purely user-facing documentation of behaviour that already exists on the
hp/data-oriented-ndarray-fix branch (data_oriented + ndarray + fastcache works end-to-end across
processes, verified in the investigation doc).
---
 docs/source/user_guide/compound_types.md |   2 +
 docs/source/user_guide/fastcache.md      | 114 +++++++++++++++++++++++
 2 files changed, 116 insertions(+)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index 7506008eed..1f5be5e211 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -174,6 +174,8 @@ Mixing `qd.field` and `qd.ndarray` members in the same class is also supported.
 
 Note: as with `dataclasses.dataclass`, reassigning an ndarray member between kernel calls (`state.x = other_ndarray`) is allowed; the kernel re-binds against the live value on the next launch. Reassigning to an ndarray of a different `dtype` or `ndim` also works — a fresh kernel is compiled and cached for the new shape.
 
+For how `@qd.kernel(fastcache=True)` interacts with `@qd.data_oriented` containers (including which member types are supported, when primitive values trigger recompilation, and the `dataclasses.dataclass` footgun), see [Using fastcache with `@qd.data_oriented`](fastcache.md#using-fastcache-with-qddata_oriented) in the fastcache user guide.
+
 ## Nesting compatibility
 
 This table summarises which member types are allowed inside which container type. "yes" means the member is walked correctly when the container is passed to a kernel; "no" means the member is ignored or the combination raises an error.
diff --git a/docs/source/user_guide/fastcache.md b/docs/source/user_guide/fastcache.md
index 016033a884..4e5c049036 100644
--- a/docs/source/user_guide/fastcache.md
+++ b/docs/source/user_guide/fastcache.md
@@ -80,6 +80,120 @@ With this annotation, changing `num_envs` from 100 to 200 produces a different c
 
 Note: `@qd.data_oriented` objects and `qd.Template` parameters already include primitive values in the cache key automatically — this annotation is only needed for `dataclasses.dataclass` fields.
 
+## Using fastcache with `@qd.data_oriented`
+
+A `@qd.data_oriented` class is the natural place to write simulation-style code: an `__init__` that allocates state once, and `@qd.kernel` methods that operate on that state. Fastcache works with this pattern when the container holds only fastcache-supported member types (ndarrays, primitives, nested data_oriented or dataclasses, enums).
+
+### Worked example
+
+```python
+import quadrants as qd
+
+qd.init(arch=qd.cpu)
+
+
+@qd.data_oriented
+class Simulation:
+    def __init__(self, n: int, dt: float):
+        # Primitives — folded into the fastcache key as values (see below)
+        self.n = n
+        self.dt = dt
+        self.gravity = -9.81
+
+        # Tensors — only their dtype/ndim/layout enter the cache key
+        self.x = qd.ndarray(qd.f32, shape=(n,))
+        self.v = qd.ndarray(qd.f32, shape=(n,))
+
+    @qd.kernel(fastcache=True)
+    def step(self):
+        for i in range(qd.static(self.n)):
+            self.v[i] = self.v[i] + qd.static(self.gravity * self.dt)
+            self.x[i] = self.x[i] + self.v[i] * qd.static(self.dt)
+
+
+sim = Simulation(n=8, dt=0.01)
+sim.step()
+```
+
+On the first call, the kernel compiles and the artifact is written to the fastcache. On a subsequent Python process the artifact is loaded directly — no AST parse, no compile.
+
+### Primitive members are implicitly templated
+
+For members of a `@qd.data_oriented`, the args hasher folds *values* of primitive types (`int`, `float`, `bool`, `enum.Enum`) into the cache key automatically — you do **not** need `add_value_to_cache_key`, and you do **not** need to wrap them in `qd.static(...)` for the cache to be correct (`qd.static(...)` is still useful inside the kernel body to make the intent explicit and to force loop unrolling).
+
+That means changing a primitive member triggers a new compilation:
+
+```python
+sim_a = Simulation(n=8, dt=0.01)   # cache key #1
+sim_b = Simulation(n=64, dt=0.01)  # cache key #2 — different n value
+sim_c = Simulation(n=8, dt=0.005)  # cache key #3 — different dt value
+```
+
+This is the same semantics as if you'd written `n` and `dt` as explicit `qd.template()` parameters. Two instances with the same `(n, dt, gravity)` and the same ndarray dtypes/ndims share a cache entry.
+
+### Tensor contents are not part of the cache key
+
+For ndarray members, only `(dtype, ndim, layout)` enter the key. The actual element values are not hashed. You can mutate `self.x[i] = ...` freely between calls — same compiled kernel, different data.
+
+Reassigning an ndarray member to a different shape or dtype produces a different cache key, which is the correct behaviour:
+
+| Operation                                              | Same cache key? |
+|--------------------------------------------------------|:---:|
+| Mutate elements: `sim.x[i] = 1.0`                      | yes |
+| Reassign same dtype/ndim: `sim.x = qd.ndarray(qd.f32, (n,))` | yes |
+| Reassign different dtype: `sim.x = qd.ndarray(qd.f64, (n,))` | no — different cache entry |
+| Reassign different ndim: `(n,)` → `(n, m)`             | no — different cache entry |
+
+### `dataclasses.dataclass` members work — with one footgun
+
+You can nest a `dataclasses.dataclass` inside a `@qd.data_oriented` (or vice versa) and the walker recurses correctly. **But there is an important asymmetry on primitives:**
+
+| Container         | Primitive child values folded into the cache key by default? |
+|-------------------|:---:|
+| `@qd.data_oriented` | **yes** — implicitly templated |
+| `dataclasses.dataclass` | **no** — type only; opt in per-field with `FIELD_METADATA_CACHE_VALUE` |
+
+If you put a `qd.static(...)`-baked value inside a dataclass without `FIELD_METADATA_CACHE_VALUE`, fastcache can load a kernel compiled for the *wrong* value:
+
+```python
+@dataclasses.dataclass
+class SimConfig:
+    num_layers: int  # WRONG for qd.static — type-only key
+    dt: float        # WRONG for qd.static — type-only key
+
+@qd.data_oriented
+class Simulation:
+    def __init__(self, cfg):
+        self.cfg = cfg
+        ...
+    @qd.kernel(fastcache=True)
+    def step(self):
+        for i in qd.static(range(self.cfg.num_layers)):  # baked-in value!
+            ...
+```
+
+`SimConfig(num_layers=8)` and `SimConfig(num_layers=16)` would hash to the **same** fastcache key, and the second instance could silently load a kernel compiled for 8 iterations. Fix by opting the fields into the key:
+
+```python
+from quadrants.lang._fast_caching import FIELD_METADATA_CACHE_VALUE
+
+@dataclasses.dataclass
+class SimConfig:
+    num_layers: int = dataclasses.field(metadata={FIELD_METADATA_CACHE_VALUE: True})
+    dt: float = dataclasses.field(metadata={FIELD_METADATA_CACHE_VALUE: True})
+```
+
+The asymmetry exists because `@qd.data_oriented` is intended as a "self" container — its primitives are treated as part of the type signature. `dataclasses.dataclass` is a general value container; defaulting primitive values into the key would over-specialise.
+
+### What disables fastcache on a `@qd.data_oriented` arg
+
+The args hasher walks every member and bails out if any single member is fastcache-unsupported. Most relevant:
+
+- A `qd.field` member anywhere in the tree (including nested) disables fastcache for the entire kernel call. A warn-level log line is emitted. The kernel still runs correctly via normal compilation, just without the fastcache speed-up.
+- Any captured external state in the kernel body (closures over `self`-bound names is fine; closures over enclosing-Python-scope names are not, with the same exemptions as for any other fastcache kernel — see [Constraints](#constraints) below).
+
+If you need fastcache for a class that currently uses `qd.field`, the migration path is to replace the fields with `qd.ndarray`s in `__init__`. Field members are tracked as a follow-up in `perso_hugh/doc/data_oriented_fastcache.md`.
+
 ## Constraints
 
 A kernel is eligible for fastcache only if all of the following hold:

From b132b81c050177de4d4f7af238a2bae145573860 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sat, 16 May 2026 11:01:46 -0700
Subject: [PATCH 15/66] [Doc] Restructure fastcache.md: simple main body,
 Advanced subsection for compound-type keying

- Main body now covers only: how to enable fastcache + the constraints
  for enabling it.
- Move all container-specific behaviour (data_oriented primitive value
  folding, dataclasses.dataclass FIELD_METADATA_CACHE_VALUE opt-in,
  qd.field disables fastcache) into a single tight
  "Advanced -> Compound-type cache keying" subsection.
- Drop @qd.data_oriented description from fastcache.md (lives in
  compound_types.md). Drop qd.static <-> fastcache conflation: the two
  mechanisms are orthogonal.
- compound_types.md retains a single cross-link to the new
  fastcache.md#compound-type-cache-keying anchor.
---
 docs/source/user_guide/compound_types.md |   2 +-
 docs/source/user_guide/fastcache.md      | 176 ++++-------------------
 2 files changed, 31 insertions(+), 147 deletions(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index 1f5be5e211..a9e73785c2 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -174,7 +174,7 @@ Mixing `qd.field` and `qd.ndarray` members in the same class is also supported.
 
 Note: as with `dataclasses.dataclass`, reassigning an ndarray member between kernel calls (`state.x = other_ndarray`) is allowed; the kernel re-binds against the live value on the next launch. Reassigning to an ndarray of a different `dtype` or `ndim` also works — a fresh kernel is compiled and cached for the new shape.
 
-For how `@qd.kernel(fastcache=True)` interacts with `@qd.data_oriented` containers (including which member types are supported, when primitive values trigger recompilation, and the `dataclasses.dataclass` footgun), see [Using fastcache with `@qd.data_oriented`](fastcache.md#using-fastcache-with-qddata_oriented) in the fastcache user guide.
+For how `@qd.kernel(fastcache=True)` interacts with compound types (which member types are supported, when primitive values fold into the cache key, the `dataclasses.dataclass` opt-in footgun, and the `qd.field`-disables behaviour), see [Advanced — compound-type cache keying](fastcache.md#compound-type-cache-keying) in the fastcache user guide.
 
 ## Nesting compatibility
 
diff --git a/docs/source/user_guide/fastcache.md b/docs/source/user_guide/fastcache.md
index 4e5c049036..60403e4880 100644
--- a/docs/source/user_guide/fastcache.md
+++ b/docs/source/user_guide/fastcache.md
@@ -50,150 +50,6 @@ qd.init(arch=qd.gpu)
 # qd.init(arch=qd.gpu, print_non_pure=True)
 ```
 
-## Dataclass fields with cached values
-
-By default, for `dataclasses.dataclass` parameters, fastcache only includes the *types* of each field in the cache key, not their values. This is fine for fields like ndarrays, where the compiled kernel doesn't depend on the actual data, only the dtype and dimensionality.
-
-However, some dataclass fields hold configuration values that get baked into the compiled kernel — typically values used with `qd.static()`, such as loop bounds or feature flags:
-
-```python
-for i in qd.static(range(config.num_layers)):
-    ...
-```
-
-Here the value of `num_layers` is compiled into the kernel. Concretely the loop will be unrolled, at compile time. If `num_layers` changes, a different kernel must be compiled.
-
-Mark such fields with `add_value_to_cache_key` so their values are included in the cache key:
-
-```python
-import dataclasses
-from quadrants.lang._fast_caching import FIELD_METADATA_CACHE_VALUE
-
-@dataclasses.dataclass
-class SimConfig:
-    num_envs: int = dataclasses.field(metadata={FIELD_METADATA_CACHE_VALUE: True})
-    dt: float = dataclasses.field(metadata={FIELD_METADATA_CACHE_VALUE: True})
-    use_gravity: bool = dataclasses.field(metadata={FIELD_METADATA_CACHE_VALUE: True})
-```
-
-With this annotation, changing `num_envs` from 100 to 200 produces a different cache key so the correct compiled kernel is looked up (or compiled if not yet cached). Without it, the wrong kernel could be loaded.
-
-Note: `@qd.data_oriented` objects and `qd.Template` parameters already include primitive values in the cache key automatically — this annotation is only needed for `dataclasses.dataclass` fields.
-
-## Using fastcache with `@qd.data_oriented`
-
-A `@qd.data_oriented` class is the natural place to write simulation-style code: an `__init__` that allocates state once, and `@qd.kernel` methods that operate on that state. Fastcache works with this pattern when the container holds only fastcache-supported member types (ndarrays, primitives, nested data_oriented or dataclasses, enums).
-
-### Worked example
-
-```python
-import quadrants as qd
-
-qd.init(arch=qd.cpu)
-
-
-@qd.data_oriented
-class Simulation:
-    def __init__(self, n: int, dt: float):
-        # Primitives — folded into the fastcache key as values (see below)
-        self.n = n
-        self.dt = dt
-        self.gravity = -9.81
-
-        # Tensors — only their dtype/ndim/layout enter the cache key
-        self.x = qd.ndarray(qd.f32, shape=(n,))
-        self.v = qd.ndarray(qd.f32, shape=(n,))
-
-    @qd.kernel(fastcache=True)
-    def step(self):
-        for i in range(qd.static(self.n)):
-            self.v[i] = self.v[i] + qd.static(self.gravity * self.dt)
-            self.x[i] = self.x[i] + self.v[i] * qd.static(self.dt)
-
-
-sim = Simulation(n=8, dt=0.01)
-sim.step()
-```
-
-On the first call, the kernel compiles and the artifact is written to the fastcache. On a subsequent Python process the artifact is loaded directly — no AST parse, no compile.
-
-### Primitive members are implicitly templated
-
-For members of a `@qd.data_oriented`, the args hasher folds *values* of primitive types (`int`, `float`, `bool`, `enum.Enum`) into the cache key automatically — you do **not** need `add_value_to_cache_key`, and you do **not** need to wrap them in `qd.static(...)` for the cache to be correct (`qd.static(...)` is still useful inside the kernel body to make the intent explicit and to force loop unrolling).
-
-That means changing a primitive member triggers a new compilation:
-
-```python
-sim_a = Simulation(n=8, dt=0.01)   # cache key #1
-sim_b = Simulation(n=64, dt=0.01)  # cache key #2 — different n value
-sim_c = Simulation(n=8, dt=0.005)  # cache key #3 — different dt value
-```
-
-This is the same semantics as if you'd written `n` and `dt` as explicit `qd.template()` parameters. Two instances with the same `(n, dt, gravity)` and the same ndarray dtypes/ndims share a cache entry.
-
-### Tensor contents are not part of the cache key
-
-For ndarray members, only `(dtype, ndim, layout)` enter the key. The actual element values are not hashed. You can mutate `self.x[i] = ...` freely between calls — same compiled kernel, different data.
-
-Reassigning an ndarray member to a different shape or dtype produces a different cache key, which is the correct behaviour:
-
-| Operation                                              | Same cache key? |
-|--------------------------------------------------------|:---:|
-| Mutate elements: `sim.x[i] = 1.0`                      | yes |
-| Reassign same dtype/ndim: `sim.x = qd.ndarray(qd.f32, (n,))` | yes |
-| Reassign different dtype: `sim.x = qd.ndarray(qd.f64, (n,))` | no — different cache entry |
-| Reassign different ndim: `(n,)` → `(n, m)`             | no — different cache entry |
-
-### `dataclasses.dataclass` members work — with one footgun
-
-You can nest a `dataclasses.dataclass` inside a `@qd.data_oriented` (or vice versa) and the walker recurses correctly. **But there is an important asymmetry on primitives:**
-
-| Container         | Primitive child values folded into the cache key by default? |
-|-------------------|:---:|
-| `@qd.data_oriented` | **yes** — implicitly templated |
-| `dataclasses.dataclass` | **no** — type only; opt in per-field with `FIELD_METADATA_CACHE_VALUE` |
-
-If you put a `qd.static(...)`-baked value inside a dataclass without `FIELD_METADATA_CACHE_VALUE`, fastcache can load a kernel compiled for the *wrong* value:
-
-```python
-@dataclasses.dataclass
-class SimConfig:
-    num_layers: int  # WRONG for qd.static — type-only key
-    dt: float        # WRONG for qd.static — type-only key
-
-@qd.data_oriented
-class Simulation:
-    def __init__(self, cfg):
-        self.cfg = cfg
-        ...
-    @qd.kernel(fastcache=True)
-    def step(self):
-        for i in qd.static(range(self.cfg.num_layers)):  # baked-in value!
-            ...
-```
-
-`SimConfig(num_layers=8)` and `SimConfig(num_layers=16)` would hash to the **same** fastcache key, and the second instance could silently load a kernel compiled for 8 iterations. Fix by opting the fields into the key:
-
-```python
-from quadrants.lang._fast_caching import FIELD_METADATA_CACHE_VALUE
-
-@dataclasses.dataclass
-class SimConfig:
-    num_layers: int = dataclasses.field(metadata={FIELD_METADATA_CACHE_VALUE: True})
-    dt: float = dataclasses.field(metadata={FIELD_METADATA_CACHE_VALUE: True})
-```
-
-The asymmetry exists because `@qd.data_oriented` is intended as a "self" container — its primitives are treated as part of the type signature. `dataclasses.dataclass` is a general value container; defaulting primitive values into the key would over-specialise.
-
-### What disables fastcache on a `@qd.data_oriented` arg
-
-The args hasher walks every member and bails out if any single member is fastcache-unsupported. Most relevant:
-
-- A `qd.field` member anywhere in the tree (including nested) disables fastcache for the entire kernel call. A warn-level log line is emitted. The kernel still runs correctly via normal compilation, just without the fastcache speed-up.
-- Any captured external state in the kernel body (closures over `self`-bound names is fine; closures over enclosing-Python-scope names are not, with the same exemptions as for any other fastcache kernel — see [Constraints](#constraints) below).
-
-If you need fastcache for a class that currently uses `qd.field`, the migration path is to replace the fields with `qd.ndarray`s in `__init__`. Field members are tracked as a follow-up in `perso_hugh/doc/data_oriented_fastcache.md`.
-
 ## Constraints
 
 A kernel is eligible for fastcache only if all of the following hold:
@@ -239,8 +95,8 @@ Fastcache supports the following parameter types:
 | `qd.types.NDArray` (scalar, vector, matrix) | Yes | dtype, ndim, layout |
 | `torch.Tensor` | Yes | dtype, ndim |
 | `numpy.ndarray` | Yes | dtype, ndim |
-| `dataclasses.dataclass` | Yes | field types recursively; field values if annotated with `add_value_to_cache_key` (see [above](#dataclass-fields-with-cached-values)) |
-| `@qd.data_oriented` objects | Yes | member types and primitive member values recursively |
+| `dataclasses.dataclass` | Yes | field types recursively; field values if annotated with `FIELD_METADATA_CACHE_VALUE` (see [Advanced — compound-type cache keying](#compound-type-cache-keying)) |
+| `@qd.data_oriented` objects | Yes | member types recursively; primitive member values folded automatically (see [Advanced — compound-type cache keying](#compound-type-cache-keying)) |
 | `qd.Template` primitives (int, float, bool) | Yes | type and value (baked into kernel) |
 | Non-template primitives (int, float, bool) | Yes | type only |
 | `enum.Enum` | Yes | name and value |
@@ -286,3 +142,31 @@ print(obs.cache_stored)         # True if the compiled kernel was stored to cach
 ```
 
 On the first run you'll see `cache_stored=True` but `cache_loaded=False`. On the second run (after `qd.init`), `cache_loaded=True`.
+
+### Compound-type cache keying
+
+The args hasher walks compound-type kernel parameters recursively. For each leaf member it decides what (if anything) to fold into the cache key. The headline rules:
+
+**`@qd.data_oriented`:** the walker descends into `vars(obj)`. For each child:
+
+- `qd.ndarray` member — `(dtype, ndim, layout)` folded into the key. Element values are not.
+- Primitive (`int` / `float` / `bool` / `enum.Enum`) member — *value* folded into the key. Two instances of the same class with different primitive member values get different cache entries.
+- Nested `@qd.data_oriented` member — recurses.
+- Nested `dataclasses.dataclass` member — recurses (with the dataclass rules below).
+- `qd.field` member — fastcache is disabled for the entire kernel call. The kernel still runs via normal compilation; a warn-level log line is emitted.
+
+**`dataclasses.dataclass`:** the walker descends into the declared fields. For each field, only the *type* is folded into the cache key by default — **not** the value. To include a field's value, annotate it:
+
+```python
+import dataclasses
+from quadrants.lang._fast_caching import FIELD_METADATA_CACHE_VALUE
+
+@dataclasses.dataclass
+class SimConfig:
+    num_layers: int = dataclasses.field(metadata={FIELD_METADATA_CACHE_VALUE: True})
+    dt: float = dataclasses.field(metadata={FIELD_METADATA_CACHE_VALUE: True})
+```
+
+This is necessary whenever the compiled kernel depends on the field's *value* rather than just its type (for example, when the value is used as a loop bound that the compiler bakes into the generated code). Without the annotation, two `SimConfig` instances with different `num_layers` values would share a fastcache key, and the second instance would silently load a kernel compiled for the wrong value.
+
+Note the asymmetry: `@qd.data_oriented` primitive members fold their *values* into the key automatically; `dataclasses.dataclass` fields fold only their *types* unless you opt in per-field.

From 6d1c820fa50494de0fb92aa1865336950edf2fe9 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sat, 16 May 2026 11:07:52 -0700
Subject: [PATCH 16/66] [Doc] Use 'member' consistently for compound-type
 members; drop ambiguous bare 'field'

In fastcache.md and compound_types.md, several places used the bare word
'field' to mean 'attribute of a dataclasses.dataclass / @qd.data_oriented
container'. Because qd.field is itself a documented Quadrants type
(listed in the same parameter-types table that disables fastcache when
it appears), bare 'field' was ambiguous. Standardise on 'member' for
compound-type members. Keep:

- 'qd.field' / 'ScalarField' / 'MatrixField' / 'qd.dataclass' /
  'StructType' references unchanged (these are the Quadrants types).
- 'dataclasses.field(...)' unchanged (Python stdlib API).
- 'attribute' only where it means Python attribute-access syntax
  (`s.foo`) or the `src_ll_cache_observations` Python instance
  attribute.

Also clean up the purity-constraint closure-list example to drop
'fields' (it was unrelated to the qd.field/dataclass-field distinction
and was just listing examples of external state).
---
 docs/source/user_guide/compound_types.md | 14 +++++++-------
 docs/source/user_guide/fastcache.md      | 10 +++++-----
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index a9e73785c2..ea2e18e0ba 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -93,7 +93,7 @@ def k2(s: Outer) -> None:
 
 ### Passing nested sub-structs to a `qd.func`
 
-You can pass either a whole nested-dataclass argument or one of its sub-struct fields to a `qd.func`. The callee declares the sub-struct's type as the parameter annotation; the caller writes the attribute access at the call site:
+You can pass either a whole nested-dataclass argument or one of its sub-struct members to a `qd.func`. The callee declares the sub-struct's type as the parameter annotation; the caller writes the attribute access at the call site:
 
 ```python
 @dataclass
@@ -125,7 +125,7 @@ Sub-struct passing supports:
 - arbitrary nesting depth (`f(s.a.b.c)` where each level is a dataclass)
 - positional and keyword call sites (`f(s.inner)` and `f(inner=s.inner)`)
 - call sites both directly inside `@qd.kernel` bodies and inside other `@qd.func` bodies
-- pruning of the sub-struct's leaf fields that the callee never reads
+- pruning of the sub-struct's leaf members that the callee never reads
 
 Note: assigning a sub-struct to a local variable and then passing it (`t = s.inner; touch_inner(t)`) is **not** supported. Pass the attribute access directly at the call site.
 
@@ -186,7 +186,7 @@ This table summarises which member types are allowed inside which container type
 | `@qd.data_oriented`             | yes | yes | yes | yes | yes      | yes |
 | `@qd.dataclass`                 | no  | yes | yes | no  | no       | yes |
 
-[\*1] A `dataclasses.dataclass` may *hold* a `@qd.data_oriented` member, but the **outer kernel-arg annotation** must be `qd.template()`, not the dataclass type itself. Passing a typed-dataclass kernel arg (`def k(s: Outer)`) whose field type is a `@qd.data_oriented` class raises a clear `QuadrantsSyntaxError` at compile time pointing you to `qd.template()`. The reason: typed-dataclass kernel args are flattened from annotations, but `@qd.data_oriented` carries no per-attribute annotations — its members are walked from the live instance, which only happens on the template path.
+[\*1] A `dataclasses.dataclass` may *hold* a `@qd.data_oriented` member, but the **outer kernel-arg annotation** must be `qd.template()`, not the dataclass type itself. Passing a typed-dataclass kernel arg (`def k(s: Outer)`) whose member type is a `@qd.data_oriented` class raises a clear `QuadrantsSyntaxError` at compile time pointing you to `qd.template()`. The reason: typed-dataclass kernel args are flattened from annotations, but `@qd.data_oriented` carries no per-member annotations — its members are walked from the live instance, which only happens on the template path.
 
 ### Outer kernel-arg annotation
 
@@ -195,8 +195,8 @@ The outermost annotation you put on the kernel parameter determines how the cont
 | Annotation | Kernel-arg walker | Notes |
 |---|---|---|
 | `qd.types.NDArray[...]`           | ndarray slot                                       | leaf-level only |
-| `MyDataclass` (dataclass type)    | per-field flatten using annotations                | needs every field to have a quadrants-typed annotation |
-| `qd.template()`                   | value-driven walk of `vars(self)` / dataclass fields | supports the full nesting matrix above |
+| `MyDataclass` (dataclass type)    | per-member flatten using annotations               | needs every member to have a quadrants-typed annotation |
+| `qd.template()`                   | value-driven walk of `vars(self)` / dataclass members | supports the full nesting matrix above |
 
 Two practical consequences:
 
@@ -212,9 +212,9 @@ For both `dataclasses.dataclass` and `@qd.data_oriented` containers passed via `
 A few combinations are still unsupported:
 
 - **`@qd.dataclass` (the Quadrants `StructType` decorator) cannot contain ndarrays.** This is a legacy field-only type. Use `dataclasses.dataclass` or `@qd.data_oriented` instead. (The function-form factory `qd.types.struct(...)` produces the same `StructType` and has the same restrictions.)
-- **A typed-dataclass kernel-arg annotation cannot have a `@qd.data_oriented` field type** (see [\*1] above) — errors clearly at compile time.
+- **A typed-dataclass kernel-arg annotation cannot have a `@qd.data_oriented` member type** (see [\*1] above) — errors clearly at compile time.
 - **An outer `qd.template()` arg of dataclass type must be `frozen=True`** — non-frozen dataclasses are unhashable and the template-mapper cannot use them as cache keys.
-- **The set of ndarray-bearing attributes on a `@qd.data_oriented` class is assumed stable across instances.** Declare ndarray attributes in `__init__`, don't add new attributes after the first kernel launch on an instance of that class; the path cache is per-class and won't pick up attributes added later.
+- **The set of ndarray-bearing members on a `@qd.data_oriented` class is assumed stable across instances.** Declare ndarray members in `__init__`, don't add new members after the first kernel launch on an instance of that class; the path cache is per-class and won't pick up members added later.
 
 ## qd.dataclass / qd.types.struct
 
diff --git a/docs/source/user_guide/fastcache.md b/docs/source/user_guide/fastcache.md
index 60403e4880..822eb4998d 100644
--- a/docs/source/user_guide/fastcache.md
+++ b/docs/source/user_guide/fastcache.md
@@ -56,7 +56,7 @@ A kernel is eligible for fastcache only if all of the following hold:
 
 ### 1. All data flows through parameters
 
-The kernel must receive every piece of data it operates on as an explicit parameter. It must **not** capture variables from the enclosing Python scope (closures over fields, ndarrays, or mutable globals). This is the core "purity" constraint — the compiled kernel's behavior must be fully determined by its arguments.
+The kernel must receive every piece of data it operates on as an explicit parameter. It must **not** capture variables from the enclosing Python scope (closures over ndarrays, mutable globals, or any other external state). This is the core "purity" constraint — the compiled kernel's behavior must be fully determined by its arguments.
 
 ```python
 a = qd.ndarray(qd.f32, (10,))
@@ -95,7 +95,7 @@ Fastcache supports the following parameter types:
 | `qd.types.NDArray` (scalar, vector, matrix) | Yes | dtype, ndim, layout |
 | `torch.Tensor` | Yes | dtype, ndim |
 | `numpy.ndarray` | Yes | dtype, ndim |
-| `dataclasses.dataclass` | Yes | field types recursively; field values if annotated with `FIELD_METADATA_CACHE_VALUE` (see [Advanced — compound-type cache keying](#compound-type-cache-keying)) |
+| `dataclasses.dataclass` | Yes | member types recursively; member values if annotated with `FIELD_METADATA_CACHE_VALUE` (see [Advanced — compound-type cache keying](#compound-type-cache-keying)) |
 | `@qd.data_oriented` objects | Yes | member types recursively; primitive member values folded automatically (see [Advanced — compound-type cache keying](#compound-type-cache-keying)) |
 | `qd.Template` primitives (int, float, bool) | Yes | type and value (baked into kernel) |
 | Non-template primitives (int, float, bool) | Yes | type only |
@@ -155,7 +155,7 @@ The args hasher walks compound-type kernel parameters recursively. For each leaf
 - Nested `dataclasses.dataclass` member — recurses (with the dataclass rules below).
 - `qd.field` member — fastcache is disabled for the entire kernel call. The kernel still runs via normal compilation; a warn-level log line is emitted.
 
-**`dataclasses.dataclass`:** the walker descends into the declared fields. For each field, only the *type* is folded into the cache key by default — **not** the value. To include a field's value, annotate it:
+**`dataclasses.dataclass`:** the walker descends into the declared members. For each member, only the *type* is folded into the cache key by default — **not** the value. To include a member's value, annotate it:
 
 ```python
 import dataclasses
@@ -167,6 +167,6 @@ class SimConfig:
     dt: float = dataclasses.field(metadata={FIELD_METADATA_CACHE_VALUE: True})
 ```
 
-This is necessary whenever the compiled kernel depends on the field's *value* rather than just its type (for example, when the value is used as a loop bound that the compiler bakes into the generated code). Without the annotation, two `SimConfig` instances with different `num_layers` values would share a fastcache key, and the second instance would silently load a kernel compiled for the wrong value.
+This is necessary whenever the compiled kernel depends on the member's *value* rather than just its type (for example, when the value is used as a loop bound that the compiler bakes into the generated code). Without the annotation, two `SimConfig` instances with different `num_layers` values would share a fastcache key, and the second instance would silently load a kernel compiled for the wrong value.
 
-Note the asymmetry: `@qd.data_oriented` primitive members fold their *values* into the key automatically; `dataclasses.dataclass` fields fold only their *types* unless you opt in per-field.
+Note the asymmetry: `@qd.data_oriented` primitive members fold their *values* into the key automatically; `dataclasses.dataclass` members fold only their *types* unless you opt in per-member.

From 1de65b9d9b993a371b4bd18ea9de8b3bb6c9ca3e Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sat, 16 May 2026 11:10:39 -0700
Subject: [PATCH 17/66] [Doc] Mirror qd.Template wording for @qd.data_oriented
 primitive members ('baked into kernel')

Replace 'folded into the cache key' jargon (which was undefined and
ambiguous: ndarray dtype info is just keyed, whereas data_oriented
primitive children are also Template-style specialised). Mirror the
existing qd.Template row: primitive member values are 'baked into
kernel'. Use 'included in the cache key' for type-only contributions
(ndarray dtype/ndim/layout, dataclass member types).
---
 docs/source/user_guide/compound_types.md |  2 +-
 docs/source/user_guide/fastcache.md      | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index ea2e18e0ba..ab14552c6c 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -174,7 +174,7 @@ Mixing `qd.field` and `qd.ndarray` members in the same class is also supported.
 
 Note: as with `dataclasses.dataclass`, reassigning an ndarray member between kernel calls (`state.x = other_ndarray`) is allowed; the kernel re-binds against the live value on the next launch. Reassigning to an ndarray of a different `dtype` or `ndim` also works — a fresh kernel is compiled and cached for the new shape.
 
-For how `@qd.kernel(fastcache=True)` interacts with compound types (which member types are supported, when primitive values fold into the cache key, the `dataclasses.dataclass` opt-in footgun, and the `qd.field`-disables behaviour), see [Advanced — compound-type cache keying](fastcache.md#compound-type-cache-keying) in the fastcache user guide.
+For how `@qd.kernel(fastcache=True)` interacts with compound types (which member types are supported, when primitive values are baked into the kernel, the `dataclasses.dataclass` opt-in footgun, and the `qd.field`-disables behaviour), see [Advanced — compound-type cache keying](fastcache.md#compound-type-cache-keying) in the fastcache user guide.
 
 ## Nesting compatibility
 
diff --git a/docs/source/user_guide/fastcache.md b/docs/source/user_guide/fastcache.md
index 822eb4998d..1d63ebf496 100644
--- a/docs/source/user_guide/fastcache.md
+++ b/docs/source/user_guide/fastcache.md
@@ -96,7 +96,7 @@ Fastcache supports the following parameter types:
 | `torch.Tensor` | Yes | dtype, ndim |
 | `numpy.ndarray` | Yes | dtype, ndim |
 | `dataclasses.dataclass` | Yes | member types recursively; member values if annotated with `FIELD_METADATA_CACHE_VALUE` (see [Advanced — compound-type cache keying](#compound-type-cache-keying)) |
-| `@qd.data_oriented` objects | Yes | member types recursively; primitive member values folded automatically (see [Advanced — compound-type cache keying](#compound-type-cache-keying)) |
+| `@qd.data_oriented` objects | Yes | member types recursively; primitive member values baked into kernel (see [Advanced — compound-type cache keying](#compound-type-cache-keying)) |
 | `qd.Template` primitives (int, float, bool) | Yes | type and value (baked into kernel) |
 | Non-template primitives (int, float, bool) | Yes | type only |
 | `enum.Enum` | Yes | name and value |
@@ -145,17 +145,17 @@ On the first run you'll see `cache_stored=True` but `cache_loaded=False`. On the
 
 ### Compound-type cache keying
 
-The args hasher walks compound-type kernel parameters recursively. For each leaf member it decides what (if anything) to fold into the cache key. The headline rules:
+The args hasher walks compound-type kernel parameters recursively. For each leaf member it decides what (if anything) contributes to the cache key. The headline rules:
 
 **`@qd.data_oriented`:** the walker descends into `vars(obj)`. For each child:
 
-- `qd.ndarray` member — `(dtype, ndim, layout)` folded into the key. Element values are not.
-- Primitive (`int` / `float` / `bool` / `enum.Enum`) member — *value* folded into the key. Two instances of the same class with different primitive member values get different cache entries.
+- `qd.ndarray` member — `(dtype, ndim, layout)` is included in the cache key. Element values are not.
+- Primitive (`int` / `float` / `bool` / `enum.Enum`) member — value is baked into the kernel (same semantics as a `qd.Template` primitive). Two instances of the same class with different primitive member values get different cache entries.
 - Nested `@qd.data_oriented` member — recurses.
 - Nested `dataclasses.dataclass` member — recurses (with the dataclass rules below).
 - `qd.field` member — fastcache is disabled for the entire kernel call. The kernel still runs via normal compilation; a warn-level log line is emitted.
 
-**`dataclasses.dataclass`:** the walker descends into the declared members. For each member, only the *type* is folded into the cache key by default — **not** the value. To include a member's value, annotate it:
+**`dataclasses.dataclass`:** the walker descends into the declared members. For each member, only the *type* is included in the cache key by default — **not** the value. To include a member's value, annotate it:
 
 ```python
 import dataclasses
@@ -169,4 +169,4 @@ class SimConfig:
 
 This is necessary whenever the compiled kernel depends on the member's *value* rather than just its type (for example, when the value is used as a loop bound that the compiler bakes into the generated code). Without the annotation, two `SimConfig` instances with different `num_layers` values would share a fastcache key, and the second instance would silently load a kernel compiled for the wrong value.
 
-Note the asymmetry: `@qd.data_oriented` primitive members fold their *values* into the key automatically; `dataclasses.dataclass` members fold only their *types* unless you opt in per-member.
+Note the asymmetry: `@qd.data_oriented` primitive members are baked into the kernel automatically (same semantics as `qd.Template`); `dataclasses.dataclass` members contribute only their *type* to the cache key unless you opt in per-member.

From a648c3f9d6a1c69eacd79898f21fd5198b7e2ad1 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sat, 16 May 2026 11:11:19 -0700
Subject: [PATCH 18/66] [Doc] @qd.data_oriented row: 'types and values' to
 mirror qd.Template row exactly

---
 docs/source/user_guide/fastcache.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/user_guide/fastcache.md b/docs/source/user_guide/fastcache.md
index 1d63ebf496..5d4e9381c8 100644
--- a/docs/source/user_guide/fastcache.md
+++ b/docs/source/user_guide/fastcache.md
@@ -96,7 +96,7 @@ Fastcache supports the following parameter types:
 | `torch.Tensor` | Yes | dtype, ndim |
 | `numpy.ndarray` | Yes | dtype, ndim |
 | `dataclasses.dataclass` | Yes | member types recursively; member values if annotated with `FIELD_METADATA_CACHE_VALUE` (see [Advanced — compound-type cache keying](#compound-type-cache-keying)) |
-| `@qd.data_oriented` objects | Yes | member types recursively; primitive member values baked into kernel (see [Advanced — compound-type cache keying](#compound-type-cache-keying)) |
+| `@qd.data_oriented` objects | Yes | member types recursively; primitive member types and values baked into kernel (see [Advanced — compound-type cache keying](#compound-type-cache-keying)) |
 | `qd.Template` primitives (int, float, bool) | Yes | type and value (baked into kernel) |
 | Non-template primitives (int, float, bool) | Yes | type only |
 | `enum.Enum` | Yes | name and value |

From a55d3605539646de1f2da9f5400c188bdaa04a94 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sat, 16 May 2026 11:17:28 -0700
Subject: [PATCH 19/66] [Doc] Tighten path-cache stability restriction: actual
 failure modes are delete + late-reassign-with-different-dtype

Previous wording said 'don't add new members after the first kernel
launch'. Empirical results show this is overly broad: adding new ndarray
attributes on later instances of the same class is safe (each instance
gets its own spec entry via per-instance weakref; the compile-time
walker registers all reachable ndarrays). The actual failure modes are:

  (a) Deleting an ndarray attribute that was present on the first
      launch -> AttributeError on the next launch (the cached path
      still does getattr on the missing attribute).

  (b) Reassigning a post-first-walk ndarray attribute (a member that
      wasn't on the first instance walked, was added later, and is now
      re-assigned) to one with a different dtype/ndim -> not detected
      by the id-augmented args_hash invalidation tracker; stale
      compiled kernel is silently reused -> bit-reinterpretation of
      the new storage.

Verified empirically via ~/ais/deskai9/tmp/check_path_cache_stability.py
on cluster (cases A/B safe; C errors; D safe via per-instance weakref;
E silent miscompile - f32 array reassigned over i32 displays the i32
bit pattern as ~4e-45).
---
 docs/source/user_guide/compound_types.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index ab14552c6c..e2a0249ca8 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -214,7 +214,9 @@ A few combinations are still unsupported:
 - **`@qd.dataclass` (the Quadrants `StructType` decorator) cannot contain ndarrays.** This is a legacy field-only type. Use `dataclasses.dataclass` or `@qd.data_oriented` instead. (The function-form factory `qd.types.struct(...)` produces the same `StructType` and has the same restrictions.)
 - **A typed-dataclass kernel-arg annotation cannot have a `@qd.data_oriented` member type** (see [\*1] above) — errors clearly at compile time.
 - **An outer `qd.template()` arg of dataclass type must be `frozen=True`** — non-frozen dataclasses are unhashable and the template-mapper cannot use them as cache keys.
-- **The set of ndarray-bearing members on a `@qd.data_oriented` class is assumed stable across instances.** Declare ndarray members in `__init__`, don't add new members after the first kernel launch on an instance of that class; the path cache is per-class and won't pick up members added later.
+- **Declare all ndarray members on a `@qd.data_oriented` class in `__init__`.** The template-mapper caches the set of ndarray-attribute paths reachable from the first instance walked, per class. Adding *new* ndarray attributes on later instances of the same class is safe — the per-instance weakref in the spec key disambiguates them, and the compile-time walker registers all reachable ndarrays. But:
+  - **Deleting an ndarray attribute** that was present on the first launch raises `AttributeError` on the next launch (the cached path still tries to `getattr` the missing attribute).
+  - **Reassigning a post-first-walk ndarray attribute** (one not present on the first instance walked, then added later and re-assigned) to one with a different `dtype` / `ndim` is *not* detected by the in-memory invalidation tracker. The stale compiled kernel is silently reused, leading to bit-reinterpretation of the new array's storage.
 
 ## qd.dataclass / qd.types.struct
 

From 6667ba63831136a5067316aa06fe44e06e87eb42 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sat, 16 May 2026 11:22:06 -0700
Subject: [PATCH 20/66] [Test] Fix fastcache cross-init tests: filter captured
 launches by kernel name

test_data_oriented_ndarray_fastcache_cross_init was asserting on the LAST
launch_kernel call, but state.x.to_numpy() between run(state) and the
assertion launches an internal ndarray_to_ext_arr kernel that is
is_pure=False and so always has compiled_kernel_data=None. The assertion
captured the wrong launch and the test failed even though the actual
fastcache load for the user kernel worked correctly (verified via
src_ll_cache_observations.cache_loaded=True in a debug repro).

Filter the captured list to only the user kernel ('run'). Applied the
same filter to the other two cross-init fastcache tests (which happened
to pass because their assertions came before .to_numpy(), but the filter
makes the pattern robust against future test edits).
---
 tests/python/test_data_oriented_ndarray.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/tests/python/test_data_oriented_ndarray.py b/tests/python/test_data_oriented_ndarray.py
index 7bc55b123d..3fd55f3170 100644
--- a/tests/python/test_data_oriented_ndarray.py
+++ b/tests/python/test_data_oriented_ndarray.py
@@ -301,7 +301,12 @@ def test_data_oriented_ndarray_fastcache_cross_init(tmp_path, monkeypatch):
     captured_compiled_kernel_data = []
 
     def launch_kernel(self, key, t_kernel, compiled_kernel_data, *args, qd_stream=None):
-        captured_compiled_kernel_data.append(compiled_kernel_data)
+        # Filter to the user kernel only; .to_numpy() launches an internal
+        # ``ndarray_to_ext_arr`` kernel that is not fastcache-eligible
+        # (is_pure=False) and would always make compiled_kernel_data=None,
+        # masking the actual fastcache behaviour of ``run``.
+        if self.func.__name__ == "run":
+            captured_compiled_kernel_data.append(compiled_kernel_data)
         return launch_kernel_orig(self, key, t_kernel, compiled_kernel_data, *args, qd_stream=qd_stream)
 
     monkeypatch.setattr("quadrants.lang.kernel_impl.Kernel.launch_kernel", launch_kernel)
@@ -344,7 +349,10 @@ def test_data_oriented_nested_ndarray_fastcache_cross_init(tmp_path, monkeypatch
     captured = []
 
     def launch_kernel(self, key, t_kernel, compiled_kernel_data, *args, qd_stream=None):
-        captured.append(compiled_kernel_data)
+        # Filter to the user kernel only; .to_numpy() launches a non-fastcache
+        # internal kernel that would otherwise drown the run-kernel data we care about.
+        if self.func.__name__ == "run":
+            captured.append(compiled_kernel_data)
         return launch_kernel_orig(self, key, t_kernel, compiled_kernel_data, *args, qd_stream=qd_stream)
 
     monkeypatch.setattr("quadrants.lang.kernel_impl.Kernel.launch_kernel", launch_kernel)
@@ -390,7 +398,10 @@ def test_data_oriented_ndarray_fastcache_dtype_key_distinct(tmp_path, monkeypatc
     captured = []
 
     def launch_kernel(self, key, t_kernel, compiled_kernel_data, *args, qd_stream=None):
-        captured.append(compiled_kernel_data)
+        # Filter to the user kernel only; .to_numpy() launches a non-fastcache
+        # internal kernel that would otherwise drown the run-kernel data we care about.
+        if self.func.__name__ == "run":
+            captured.append(compiled_kernel_data)
         return launch_kernel_orig(self, key, t_kernel, compiled_kernel_data, *args, qd_stream=qd_stream)
 
     monkeypatch.setattr("quadrants.lang.kernel_impl.Kernel.launch_kernel", launch_kernel)

From e9c50b4fcdf6f30e65ce08a1adfaf2ce33fcaadc Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sat, 16 May 2026 11:32:23 -0700
Subject: [PATCH 21/66] [Style] pre-commit auto-fixes: black wrap + ruff
 import-sort

---
 python/quadrants/lang/_template_mapper.py       | 7 ++++++-
 tests/python/test_data_oriented_mixed_combos.py | 1 +
 tests/python/test_data_oriented_ndarray.py      | 1 -
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/python/quadrants/lang/_template_mapper.py b/python/quadrants/lang/_template_mapper.py
index e4ecf4d83e..fd45b9913e 100644
--- a/python/quadrants/lang/_template_mapper.py
+++ b/python/quadrants/lang/_template_mapper.py
@@ -9,7 +9,11 @@
 
 from .._test_tools import warnings_helper
 from ._kernel_types import ArgsHash
-from ._template_mapper_hotpath import _extract_arg, _primitive_types, _struct_nd_paths_for
+from ._template_mapper_hotpath import (
+    _extract_arg,
+    _primitive_types,
+    _struct_nd_paths_for,
+)
 
 
 def _collect_data_oriented_nd_ids(arg: Any, out: list) -> None:
@@ -24,6 +28,7 @@ def _collect_data_oriented_nd_ids(arg: Any, out: list) -> None:
             v = getattr(v, a)
         out.append(id(v))
 
+
 Key: TypeAlias = tuple[Any, ...]
 
 
diff --git a/tests/python/test_data_oriented_mixed_combos.py b/tests/python/test_data_oriented_mixed_combos.py
index 2dc09c7a38..6f17f2043f 100644
--- a/tests/python/test_data_oriented_mixed_combos.py
+++ b/tests/python/test_data_oriented_mixed_combos.py
@@ -9,6 +9,7 @@
 import numpy as np
 
 import quadrants as qd
+
 from tests import test_utils
 
 
diff --git a/tests/python/test_data_oriented_ndarray.py b/tests/python/test_data_oriented_ndarray.py
index 3fd55f3170..083fd1a19d 100644
--- a/tests/python/test_data_oriented_ndarray.py
+++ b/tests/python/test_data_oriented_ndarray.py
@@ -23,7 +23,6 @@
 
 from tests import test_utils
 
-
 # ---------------------------------------------------------------------------
 # 1. Single raw qd.ndarray attribute (scalar element type).
 # ---------------------------------------------------------------------------

From abf242b40738e127178d054b89504b894d8e5181 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:20:44 -0700
Subject: [PATCH 22/66] [Doc] Move @qd.kernel inside @qd.data_oriented class in
 the ndarray-members example

The intro paragraph says '@qd.data_oriented is designed for classes that
define @qd.kernel methods as class members.' The ndarray-members example
just below was defining the kernel outside the class (taking s: qd.template())
which contradicted the paragraph and was inconsistent with the qd.field
example above it. Move step() inside State as a self-bound @qd.kernel
method.
---
 docs/source/user_guide/compound_types.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index e2a0249ca8..3318e8e55b 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -161,13 +161,13 @@ class State:
         self.x = qd.ndarray(qd.f32, shape=(n,))
         self.v = qd.ndarray(qd.f32, shape=(n,))
 
-@qd.kernel
-def step(s: qd.template()):
-    for i in range(s.x.shape[0]):
-        s.x[i] += s.v[i]
+    @qd.kernel
+    def step(self):
+        for i in range(self.x.shape[0]):
+            self.x[i] += self.v[i]
 
 state = State(100)
-step(state)
+state.step()
 ```
 
 Mixing `qd.field` and `qd.ndarray` members in the same class is also supported. Nested `@qd.data_oriented` (or nested `dataclasses.dataclass`) containers with ndarrays inside are walked recursively.

From 4c27e2e55229ea280d7a7e18651386139cfd4dd2 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:25:17 -0700
Subject: [PATCH 23/66] [Doc] Document primitive members on @qd.data_oriented
 self as template-valued (one compile per distinct value)

Primitive members (int/float/bool/enum) on a @qd.data_oriented class are
read at AST-parse time and baked into the kernel IR. Different instances
with different primitive values each trigger a fresh compilation via the
per-instance weakref in the spec key. Add a short subsection with an
example.
---
 docs/source/user_guide/compound_types.md | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index 3318e8e55b..51063609b9 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -150,6 +150,26 @@ sim.step()
 
 `@qd.data_oriented` objects can also be passed as `qd.Template` parameters to kernels defined outside the class, and they support nesting (one `@qd.data_oriented` struct containing another).
 
+### Primitive members
+
+Primitive members on `self` (e.g. `int`, `float`, `bool`, `enum.Enum`) are supported, but they are treated as **template values**: each distinct primitive value across instances triggers a new kernel compilation, with the value baked into the kernel IR.
+
+```python
+@qd.data_oriented
+class Simulation:
+    def __init__(self, n):
+        self.n = n
+        self.x = qd.ndarray(qd.f32, shape=(n,))
+
+    @qd.kernel
+    def step(self):
+        for i in range(self.n):
+            self.x[i] += 1.0
+
+Simulation(100).step()   # compiles kernel #1 with n=100 baked in
+Simulation(200).step()   # compiles kernel #2 with n=200 baked in
+```
+
 ### ndarray members
 
 `@qd.data_oriented` classes may also hold `qd.ndarray` (and `qd.Vector.ndarray` / `qd.Matrix.ndarray`) members. Subscript access inside kernels works the same as for `dataclasses.dataclass`:

From 1f539e6f19fa95a9b08c5ffc9cf490c3cfab28b3 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:26:31 -0700
Subject: [PATCH 24/66] [Doc] State ndarray-member subscript behaviour directly
 instead of cross-referencing dataclasses.dataclass

---
 docs/source/user_guide/compound_types.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index 51063609b9..c3fcba0c49 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -172,7 +172,7 @@ Simulation(200).step()   # compiles kernel #2 with n=200 baked in
 
 ### ndarray members
 
-`@qd.data_oriented` classes may also hold `qd.ndarray` (and `qd.Vector.ndarray` / `qd.Matrix.ndarray`) members. Subscript access inside kernels works the same as for `dataclasses.dataclass`:
+`@qd.data_oriented` classes may also hold `qd.ndarray` (and `qd.Vector.ndarray` / `qd.Matrix.ndarray`) members. Inside a `@qd.kernel`, `self.x[i]` reads and writes the element of the ndarray member at index `i`; `self.x.shape[d]` is the length along dimension `d`.
 
 ```python
 @qd.data_oriented

From 730cbcba1261903af7280dd30f0c55e9509c44e9 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:27:26 -0700
Subject: [PATCH 25/66] [Doc] Drop 'as with dataclasses.dataclass'
 cross-reference in ndarray-reassign note

---
 docs/source/user_guide/compound_types.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index c3fcba0c49..1740c50059 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -192,7 +192,7 @@ state.step()
 
 Mixing `qd.field` and `qd.ndarray` members in the same class is also supported. Nested `@qd.data_oriented` (or nested `dataclasses.dataclass`) containers with ndarrays inside are walked recursively.
 
-Note: as with `dataclasses.dataclass`, reassigning an ndarray member between kernel calls (`state.x = other_ndarray`) is allowed; the kernel re-binds against the live value on the next launch. Reassigning to an ndarray of a different `dtype` or `ndim` also works — a fresh kernel is compiled and cached for the new shape.
+Note: reassigning an ndarray member between kernel calls (`state.x = other_ndarray`) is allowed; the kernel re-binds against the live value on the next launch. Reassigning to an ndarray of a different `dtype` or `ndim` also works — a fresh kernel is compiled and cached for the new shape.
 
 For how `@qd.kernel(fastcache=True)` interacts with compound types (which member types are supported, when primitive values are baked into the kernel, the `dataclasses.dataclass` opt-in footgun, and the `qd.field`-disables behaviour), see [Advanced — compound-type cache keying](fastcache.md#compound-type-cache-keying) in the fastcache user guide.
 

From 57e1b958a2866087d14010710b84151012a8f872 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:28:39 -0700
Subject: [PATCH 26/66] [Doc] Simplify fastcache cross-link in
 @qd.data_oriented section: drop dataclass mention

---
 docs/source/user_guide/compound_types.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index 1740c50059..4493deb827 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -194,7 +194,7 @@ Mixing `qd.field` and `qd.ndarray` members in the same class is also supported.
 
 Note: reassigning an ndarray member between kernel calls (`state.x = other_ndarray`) is allowed; the kernel re-binds against the live value on the next launch. Reassigning to an ndarray of a different `dtype` or `ndim` also works — a fresh kernel is compiled and cached for the new shape.
 
-For how `@qd.kernel(fastcache=True)` interacts with compound types (which member types are supported, when primitive values are baked into the kernel, the `dataclasses.dataclass` opt-in footgun, and the `qd.field`-disables behaviour), see [Advanced — compound-type cache keying](fastcache.md#compound-type-cache-keying) in the fastcache user guide.
+`@qd.kernel(fastcache=True)` is supported on `@qd.data_oriented` classes, but is disabled at runtime if any `qd.field` member is reachable from `self`. For details see [Advanced — compound-type cache keying](fastcache.md#compound-type-cache-keying).
 
 ## Nesting compatibility
 

From d4ca2119a40f11ae820072074d54a17d069a415c Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:31:37 -0700
Subject: [PATCH 27/66] [Doc] Drop ndarray-reassign note and tighten fastcache
 cross-link in @qd.data_oriented section

---
 docs/source/user_guide/compound_types.md | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index 4493deb827..841445b8fe 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -192,9 +192,7 @@ state.step()
 
 Mixing `qd.field` and `qd.ndarray` members in the same class is also supported. Nested `@qd.data_oriented` (or nested `dataclasses.dataclass`) containers with ndarrays inside are walked recursively.
 
-Note: reassigning an ndarray member between kernel calls (`state.x = other_ndarray`) is allowed; the kernel re-binds against the live value on the next launch. Reassigning to an ndarray of a different `dtype` or `ndim` also works — a fresh kernel is compiled and cached for the new shape.
-
-`@qd.kernel(fastcache=True)` is supported on `@qd.data_oriented` classes, but is disabled at runtime if any `qd.field` member is reachable from `self`. For details see [Advanced — compound-type cache keying](fastcache.md#compound-type-cache-keying).
+`@qd.kernel(fastcache=True)` is supported on `@qd.data_oriented` classes; see [Advanced — compound-type cache keying](fastcache.md#compound-type-cache-keying) for the cache-keying rules and what disables it.
 
 ## Nesting compatibility
 

From b72a7a7982be1c02519f289e0a85493a69590ffa Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:32:05 -0700
Subject: [PATCH 28/66] [Doc] Drop ndarray subscript-access description in
 @qd.data_oriented section

---
 docs/source/user_guide/compound_types.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index 841445b8fe..ece8a75791 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -172,7 +172,7 @@ Simulation(200).step()   # compiles kernel #2 with n=200 baked in
 
 ### ndarray members
 
-`@qd.data_oriented` classes may also hold `qd.ndarray` (and `qd.Vector.ndarray` / `qd.Matrix.ndarray`) members. Inside a `@qd.kernel`, `self.x[i]` reads and writes the element of the ndarray member at index `i`; `self.x.shape[d]` is the length along dimension `d`.
+`@qd.data_oriented` classes may also hold `qd.ndarray` (and `qd.Vector.ndarray` / `qd.Matrix.ndarray`) members.
 
 ```python
 @qd.data_oriented

From 18ff7bd8d099a163422600604598c647094297f5 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:33:38 -0700
Subject: [PATCH 29/66] [Doc] Promote fastcache cross-link to its own ###
 Fastcache subsection under @qd.data_oriented

---
 docs/source/user_guide/compound_types.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index ece8a75791..63b75da5a3 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -192,6 +192,8 @@ state.step()
 
 Mixing `qd.field` and `qd.ndarray` members in the same class is also supported. Nested `@qd.data_oriented` (or nested `dataclasses.dataclass`) containers with ndarrays inside are walked recursively.
 
+### Fastcache
+
 `@qd.kernel(fastcache=True)` is supported on `@qd.data_oriented` classes; see [Advanced — compound-type cache keying](fastcache.md#compound-type-cache-keying) for the cache-keying rules and what disables it.
 
 ## Nesting compatibility

From 33f4744e21167568cb112f1065452bfde7a6ba1e Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:34:56 -0700
Subject: [PATCH 30/66] [Doc] Rename '### ndarray members' to '### Tensor
 members'; cover qd.field, qd.ndarray, qd.tensor uniformly

---
 docs/source/user_guide/compound_types.md | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index 63b75da5a3..06fffac4b1 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -170,27 +170,31 @@ Simulation(100).step()   # compiles kernel #1 with n=100 baked in
 Simulation(200).step()   # compiles kernel #2 with n=200 baked in
 ```
 
-### ndarray members
+### Tensor members
 
-`@qd.data_oriented` classes may also hold `qd.ndarray` (and `qd.Vector.ndarray` / `qd.Matrix.ndarray`) members.
+`@qd.data_oriented` classes may hold tensor members of any backend: `qd.field`, `qd.ndarray` (including `qd.Vector.ndarray` / `qd.Matrix.ndarray`), and the unified `qd.tensor` dispatcher (which selects a backend via the `backend=` keyword — see [tensor](tensor.md)).
 
 ```python
 @qd.data_oriented
 class State:
     def __init__(self, n):
-        self.x = qd.ndarray(qd.f32, shape=(n,))
-        self.v = qd.ndarray(qd.f32, shape=(n,))
+        self.n = n
+        self.a = qd.field(qd.f32, shape=n)
+        self.b = qd.ndarray(qd.f32, shape=(n,))
+        self.c = qd.tensor(qd.f32, shape=(n,))
 
     @qd.kernel
     def step(self):
-        for i in range(self.x.shape[0]):
-            self.x[i] += self.v[i]
+        for i in range(self.n):
+            self.a[i] += 1.0
+            self.b[i] += 1.0
+            self.c[i] += 1.0
 
 state = State(100)
 state.step()
 ```
 
-Mixing `qd.field` and `qd.ndarray` members in the same class is also supported. Nested `@qd.data_oriented` (or nested `dataclasses.dataclass`) containers with ndarrays inside are walked recursively.
+Mixing tensor backends in the same class is supported. Nested `@qd.data_oriented` (or nested `dataclasses.dataclass`) containers with tensor members inside are walked recursively.
 
 ### Fastcache
 

From 883243eefa82789b2e52cb59e8c851bf8c7061c2 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:35:26 -0700
Subject: [PATCH 31/66] [Doc] @qd.data_oriented Fastcache subsection: spell out
 'disabled for fields'

---
 docs/source/user_guide/compound_types.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index 06fffac4b1..b4e90d3b7a 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -198,7 +198,7 @@ Mixing tensor backends in the same class is supported. Nested `@qd.data_oriented
 
 ### Fastcache
 
-`@qd.kernel(fastcache=True)` is supported on `@qd.data_oriented` classes; see [Advanced — compound-type cache keying](fastcache.md#compound-type-cache-keying) for the cache-keying rules and what disables it.
+`@qd.kernel(fastcache=True)` is supported on `@qd.data_oriented` classes, but is disabled for fields; see [Advanced — compound-type cache keying](fastcache.md#compound-type-cache-keying) for more information.
 
 ## Nesting compatibility
 

From 3504250feebac997e14a8c6e87aadd543e8cb504 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:36:05 -0700
Subject: [PATCH 32/66] [Doc] Tensor members: shorten qd.tensor description to
 'or qd.Tensor'

---
 docs/source/user_guide/compound_types.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index b4e90d3b7a..75c695096b 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -172,7 +172,7 @@ Simulation(200).step()   # compiles kernel #2 with n=200 baked in
 
 ### Tensor members
 
-`@qd.data_oriented` classes may hold tensor members of any backend: `qd.field`, `qd.ndarray` (including `qd.Vector.ndarray` / `qd.Matrix.ndarray`), and the unified `qd.tensor` dispatcher (which selects a backend via the `backend=` keyword — see [tensor](tensor.md)).
+`@qd.data_oriented` classes may hold tensor members of any backend: `qd.field`, `qd.ndarray` (including `qd.Vector.ndarray` / `qd.Matrix.ndarray`), or `qd.Tensor`.
 
 ```python
 @qd.data_oriented

From cc01339b88062890df7ee9e2b2c6dcaf7d65bd0d Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:36:38 -0700
Subject: [PATCH 33/66] [Doc] Tensor members: simplify nested-container
 sentence to 'Nested @qd.data_oriented containers are supported.'

---
 docs/source/user_guide/compound_types.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index 75c695096b..2d36c0c08c 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -194,7 +194,7 @@ state = State(100)
 state.step()
 ```
 
-Mixing tensor backends in the same class is supported. Nested `@qd.data_oriented` (or nested `dataclasses.dataclass`) containers with tensor members inside are walked recursively.
+Mixing tensor backends in the same class is supported. Nested `@qd.data_oriented` containers are supported.
 
 ### Fastcache
 

From df3113ef3ca4d0e24acf89dc10aa3ba66be3f0f1 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:37:15 -0700
Subject: [PATCH 34/66] [Doc] Fastcache subsection: 'methods of
 @qd.data_oriented classes'

---
 docs/source/user_guide/compound_types.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index 2d36c0c08c..a579f5b632 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -198,7 +198,7 @@ Mixing tensor backends in the same class is supported. Nested `@qd.data_oriented
 
 ### Fastcache
 
-`@qd.kernel(fastcache=True)` is supported on `@qd.data_oriented` classes, but is disabled for fields; see [Advanced — compound-type cache keying](fastcache.md#compound-type-cache-keying) for more information.
+`@qd.kernel(fastcache=True)` is supported on methods of `@qd.data_oriented` classes, but is disabled for fields; see [Advanced — compound-type cache keying](fastcache.md#compound-type-cache-keying) for more information.
 
 ## Nesting compatibility
 

From 7f5fd12f5e44e963cd7c78efa8de5a3c2298cadc Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:37:43 -0700
Subject: [PATCH 35/66] [Doc] Tensor members: drop qd.Vector.ndarray /
 qd.Matrix.ndarray parenthetical

---
 docs/source/user_guide/compound_types.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index a579f5b632..80a047a1ac 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -172,7 +172,7 @@ Simulation(200).step()   # compiles kernel #2 with n=200 baked in
 
 ### Tensor members
 
-`@qd.data_oriented` classes may hold tensor members of any backend: `qd.field`, `qd.ndarray` (including `qd.Vector.ndarray` / `qd.Matrix.ndarray`), or `qd.Tensor`.
+`@qd.data_oriented` classes may hold tensor members of any backend: `qd.field`, `qd.ndarray`, or `qd.Tensor`.
 
 ```python
 @qd.data_oriented

From e7fafeb08f4aa4da68d9e259ea98bf3f4c04f10c Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:38:12 -0700
Subject: [PATCH 36/66] [Doc] Tensor members: drop the mixing-backends +
 nesting trailer sentence

---
 docs/source/user_guide/compound_types.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index 80a047a1ac..1d9b3c983a 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -194,8 +194,6 @@ state = State(100)
 state.step()
 ```
 
-Mixing tensor backends in the same class is supported. Nested `@qd.data_oriented` containers are supported.
-
 ### Fastcache
 
 `@qd.kernel(fastcache=True)` is supported on methods of `@qd.data_oriented` classes, but is disabled for fields; see [Advanced — compound-type cache keying](fastcache.md#compound-type-cache-keying) for more information.

From f9a35df79ace82bf873acc92e1143e40c02b4205 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:38:52 -0700
Subject: [PATCH 37/66] [Doc] Restrictions: drop redundant 'A few combinations
 are still unsupported:' lead-in

---
 docs/source/user_guide/compound_types.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index 1d9b3c983a..618a24922d 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -231,8 +231,6 @@ For both `dataclasses.dataclass` and `@qd.data_oriented` containers passed via `
 
 ### Restrictions
 
-A few combinations are still unsupported:
-
 - **`@qd.dataclass` (the Quadrants `StructType` decorator) cannot contain ndarrays.** This is a legacy field-only type. Use `dataclasses.dataclass` or `@qd.data_oriented` instead. (The function-form factory `qd.types.struct(...)` produces the same `StructType` and has the same restrictions.)
 - **A typed-dataclass kernel-arg annotation cannot have a `@qd.data_oriented` member type** (see [\*1] above) — errors clearly at compile time.
 - **An outer `qd.template()` arg of dataclass type must be `frozen=True`** — non-frozen dataclasses are unhashable and the template-mapper cannot use them as cache keys.

From d336dcd3f6be154eedaa26a5622b38ee64c6255b Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:39:45 -0700
Subject: [PATCH 38/66] [Doc] @qd.dataclass section opener: cut to the
 constraint

---
 docs/source/user_guide/compound_types.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index 618a24922d..024f7d18d8 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -240,7 +240,7 @@ For both `dataclasses.dataclass` and `@qd.data_oriented` containers passed via `
 
 ## qd.dataclass / qd.types.struct
 
-`@qd.dataclass` is a Quadrants-native `StructType` decorator. The function-form factory `qd.types.struct(name1=type1, ...)` produces the same `StructType`. Both can only contain fields and primitive types (and other `StructType` members), not ndarrays.
+`@qd.dataclass` can only contain fields and primitive types (and other `StructType` members), not ndarrays.
 
 ```python
 @qd.dataclass

From 4c5f6226c6d1eed6265798a214dc0401481811e3 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:40:31 -0700
Subject: [PATCH 39/66] [Doc] Remove top-level Recommendation section

---
 docs/source/user_guide/compound_types.md | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index 024f7d18d8..c040ffd8b8 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -17,10 +17,6 @@ The following compound types are available:
 
 See [Nesting compatibility](#nesting-compatibility) below for a per-container × per-member-type breakdown, including the constraints on the outer kernel-arg annotation and ndarray reassignment.
 
-## Recommendation
-
-**Use `dataclasses.dataclass` for new code.** It supports both fields and ndarrays, can be nested, and uses standard Python — no Quadrants-specific decorator needed.
-
 The other compound types exist for historical reasons.
 
 ## dataclasses.dataclass

From 56a439925f1519b9c22797e38747935fe541b539 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:42:31 -0700
Subject: [PATCH 40/66] [Doc] Expand @qd.dataclass section: what it does, when
 to use it, constraints

---
 docs/source/user_guide/compound_types.md | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index c040ffd8b8..9d1b54ce21 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -236,7 +236,7 @@ For both `dataclasses.dataclass` and `@qd.data_oriented` containers passed via `
 
 ## qd.dataclass / qd.types.struct
 
-`@qd.dataclass` can only contain fields and primitive types (and other `StructType` members), not ndarrays.
+`@qd.dataclass` is a class decorator that produces a Quadrants-native `StructType` from a class with type-annotated members and optional `@qd.func` methods. The function-form factory `qd.types.struct(name1=type1, ...)` produces the same `StructType`.
 
 ```python
 @qd.dataclass
@@ -244,4 +244,22 @@ class Particle:
     pos: qd.types.vector(3, qd.f32)
     vel: qd.types.vector(3, qd.f32)
     mass: qd.f32
+
+    @qd.func
+    def kinetic_energy(self):
+        return 0.5 * self.mass * self.vel.dot(self.vel)
+
+particles = Particle.field(shape=(N,))  # SOA-style allocation of N Particles
 ```
+
+Use `@qd.dataclass` when:
+
+- You want to allocate a Quadrants field of struct values (`Type.field(shape=...)`) — typically the SOA layout for field-backed code.
+- You want `@qd.func` methods callable from kernels via `instance.method(...)` syntax.
+
+Constraints:
+
+- Members must be primitives, vectors / matrices, or other `StructType`s — **no ndarrays**.
+- No default values on members.
+
+For plain Python aggregation, mixing in ndarrays, or passing kernel-side state without the field-allocation requirement, prefer `dataclasses.dataclass` (top of this page) or `@qd.data_oriented`.

From ef5f8a60a875bd2c0635c3ecc149d573b6da6585 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:43:52 -0700
Subject: [PATCH 41/66] [Doc] @qd.dataclass section: drop use-cases /
 constraints / cross-reference paragraphs

---
 docs/source/user_guide/compound_types.md | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index 9d1b54ce21..fd7344a022 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -251,15 +251,3 @@ class Particle:
 
 particles = Particle.field(shape=(N,))  # SOA-style allocation of N Particles
 ```
-
-Use `@qd.dataclass` when:
-
-- You want to allocate a Quadrants field of struct values (`Type.field(shape=...)`) — typically the SOA layout for field-backed code.
-- You want `@qd.func` methods callable from kernels via `instance.method(...)` syntax.
-
-Constraints:
-
-- Members must be primitives, vectors / matrices, or other `StructType`s — **no ndarrays**.
-- No default values on members.
-
-For plain Python aggregation, mixing in ndarrays, or passing kernel-side state without the field-allocation requirement, prefer `dataclasses.dataclass` (top of this page) or `@qd.data_oriented`.

From 06580f1a0524f0ac0f5d2a7b4c6f252775c38e8f Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:47:41 -0700
Subject: [PATCH 42/66] [Doc] @qd.dataclass section opener: explain the
 kernel-side vs python-side distinction and the no-ndarray reason (by-value
 embedding)

---
 docs/source/user_guide/compound_types.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index fd7344a022..68320a1516 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -236,7 +236,7 @@ For both `dataclasses.dataclass` and `@qd.data_oriented` containers passed via `
 
 ## qd.dataclass / qd.types.struct
 
-`@qd.dataclass` is a class decorator that produces a Quadrants-native `StructType` from a class with type-annotated members and optional `@qd.func` methods. The function-form factory `qd.types.struct(name1=type1, ...)` produces the same `StructType`.
+Unlike `@qd.data_oriented` and `dataclasses.dataclass`, `@qd.dataclass` creates a struct that is available within the kernels themselves; the former types are only used for structure on the Python side, before compilation. `@qd.dataclass` can be used as the element type of tensors. One key downside of `@qd.dataclass` is that it can only be used with fields and primitives, not with ndarrays. This is because tensors are embedded in the struct by value, not as a reference pointer.
 
 ```python
 @qd.dataclass

From 8899357e4b6428f2d99e46e045996da839b82a28 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:48:39 -0700
Subject: [PATCH 43/66] [Doc] Restore verbatim prose for the @qd.struct vs
 other-compound-types distinction

---
 docs/source/user_guide/compound_types.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index 68320a1516..7e56e8b492 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -236,7 +236,7 @@ For both `dataclasses.dataclass` and `@qd.data_oriented` containers passed via `
 
 ## qd.dataclass / qd.types.struct
 
-Unlike `@qd.data_oriented` and `dataclasses.dataclass`, `@qd.dataclass` creates a struct that is available within the kernels themselves; the former types are only used for structure on the Python side, before compilation. `@qd.dataclass` can be used as the element type of tensors. One key downside of `@qd.dataclass` is that it can only be used with fields and primitives, not with ndarrays. This is because tensors are embedded in the struct by value, not as a reference pointer.
+Unlike `@qd.data_oriented` and `@dataclasses.dataclass`, `@qd.struct` creates a struct that is available within the kernels themselves. The former types are only used for structure on the python side, before compilation. `@qd.struct` can be used as the element type of tensors. One key downside of `@qd.struct` is that they can only be used with fields and primitives, not with ndarray. This is because tensors are embedded in the struct by value, not as a reference pointer.
 
 ```python
 @qd.dataclass

From 8fef5074b94dba76cdf62ae03d51001b97f99eea Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:51:03 -0700
Subject: [PATCH 44/66] [Doc] Replace @qd.struct with @qd.dataclass in opener
 prose (actual API name)

---
 docs/source/user_guide/compound_types.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index 7e56e8b492..d3cd65e489 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -236,7 +236,7 @@ For both `dataclasses.dataclass` and `@qd.data_oriented` containers passed via `
 
 ## qd.dataclass / qd.types.struct
 
-Unlike `@qd.data_oriented` and `@dataclasses.dataclass`, `@qd.struct` creates a struct that is available within the kernels themselves. The former types are only used for structure on the python side, before compilation. `@qd.struct` can be used as the element type of tensors. One key downside of `@qd.struct` is that they can only be used with fields and primitives, not with ndarray. This is because tensors are embedded in the struct by value, not as a reference pointer.
+Unlike `@qd.data_oriented` and `@dataclasses.dataclass`, `@qd.dataclass` creates a struct that is available within the kernels themselves. The former types are only used for structure on the python side, before compilation. `@qd.dataclass` can be used as the element type of tensors. One key downside of `@qd.dataclass` is that they can only be used with fields and primitives, not with ndarray. This is because tensors are embedded in the struct by value, not as a reference pointer.
 
 ```python
 @qd.dataclass

From 92f5fe1aab400de0fc35e4debeeaad4c45dfe364 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:51:31 -0700
Subject: [PATCH 45/66] [Doc] @qd.dataclass: 'element type of fields' not
 'tensors'

---
 docs/source/user_guide/compound_types.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index d3cd65e489..cb8ed475d9 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -236,7 +236,7 @@ For both `dataclasses.dataclass` and `@qd.data_oriented` containers passed via `
 
 ## qd.dataclass / qd.types.struct
 
-Unlike `@qd.data_oriented` and `@dataclasses.dataclass`, `@qd.dataclass` creates a struct that is available within the kernels themselves. The former types are only used for structure on the python side, before compilation. `@qd.dataclass` can be used as the element type of tensors. One key downside of `@qd.dataclass` is that they can only be used with fields and primitives, not with ndarray. This is because tensors are embedded in the struct by value, not as a reference pointer.
+Unlike `@qd.data_oriented` and `@dataclasses.dataclass`, `@qd.dataclass` creates a struct that is available within the kernels themselves. The former types are only used for structure on the python side, before compilation. `@qd.dataclass` can be used as the element type of fields. One key downside of `@qd.dataclass` is that they can only be used with fields and primitives, not with ndarray. This is because tensors are embedded in the struct by value, not as a reference pointer.
 
 ```python
 @qd.dataclass

From 9ea8e5b3b409cbb5b2e09c79a1cdc3f141df4ca4 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:52:13 -0700
Subject: [PATCH 46/66] [Doc] @qd.dataclass: add sentences about @qd.func
 methods and qd.types.struct factory

---
 docs/source/user_guide/compound_types.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index cb8ed475d9..d56a5c32ff 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -238,6 +238,10 @@ For both `dataclasses.dataclass` and `@qd.data_oriented` containers passed via `
 
 Unlike `@qd.data_oriented` and `@dataclasses.dataclass`, `@qd.dataclass` creates a struct that is available within the kernels themselves. The former types are only used for structure on the python side, before compilation. `@qd.dataclass` can be used as the element type of fields. One key downside of `@qd.dataclass` is that they can only be used with fields and primitives, not with ndarray. This is because tensors are embedded in the struct by value, not as a reference pointer.
 
+Methods on a `@qd.dataclass` may be decorated with `@qd.func` so they can be called from kernels via `instance.method(...)` syntax (the call is inlined at compile time, like any other `@qd.func`).
+
+`qd.types.struct(name1=type1, ...)` is the function-form equivalent of `@qd.dataclass`: it builds the same `StructType`, but without a class body — useful when the members are computed dynamically.
+
 ```python
 @qd.dataclass
 class Particle:

From 6ff0848bd6aba9a97fd01a8a734c3d2d42cb6293 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:53:20 -0700
Subject: [PATCH 47/66] [Doc] @qd.dataclass methods sentence: 'Methods can be
 added to ... and ...'

---
 docs/source/user_guide/compound_types.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index d56a5c32ff..f1c6a58808 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -238,7 +238,7 @@ For both `dataclasses.dataclass` and `@qd.data_oriented` containers passed via `
 
 Unlike `@qd.data_oriented` and `@dataclasses.dataclass`, `@qd.dataclass` creates a struct that is available within the kernels themselves. The former types are only used for structure on the python side, before compilation. `@qd.dataclass` can be used as the element type of fields. One key downside of `@qd.dataclass` is that they can only be used with fields and primitives, not with ndarray. This is because tensors are embedded in the struct by value, not as a reference pointer.
 
-Methods on a `@qd.dataclass` may be decorated with `@qd.func` so they can be called from kernels via `instance.method(...)` syntax (the call is inlined at compile time, like any other `@qd.func`).
+Methods can be added to a `@qd.dataclass` and may be decorated with `@qd.func` so they can be called from kernels via `instance.method(...)` syntax (the call is inlined at compile time, like any other `@qd.func`).
 
 `qd.types.struct(name1=type1, ...)` is the function-form equivalent of `@qd.dataclass`: it builds the same `StructType`, but without a class body — useful when the members are computed dynamically.
 

From fd8cd0ae07da149c04c8866801af08c16fd22341 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:54:07 -0700
Subject: [PATCH 48/66] [Doc] @qd.dataclass section: move qd.types.struct
 paragraph to end with its own example

---
 docs/source/user_guide/compound_types.md | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index f1c6a58808..d5caad46a9 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -240,8 +240,6 @@ Unlike `@qd.data_oriented` and `@dataclasses.dataclass`, `@qd.dataclass` creates
 
 Methods can be added to a `@qd.dataclass` and may be decorated with `@qd.func` so they can be called from kernels via `instance.method(...)` syntax (the call is inlined at compile time, like any other `@qd.func`).
 
-`qd.types.struct(name1=type1, ...)` is the function-form equivalent of `@qd.dataclass`: it builds the same `StructType`, but without a class body — useful when the members are computed dynamically.
-
 ```python
 @qd.dataclass
 class Particle:
@@ -255,3 +253,11 @@ class Particle:
 
 particles = Particle.field(shape=(N,))  # SOA-style allocation of N Particles
 ```
+
+`qd.types.struct(name1=type1, ...)` is the function-form equivalent of `@qd.dataclass`: it builds the same `StructType` without a class body — useful when the members are computed dynamically.
+
+```python
+vec3 = qd.types.vector(3, qd.f32)
+Particle = qd.types.struct(pos=vec3, vel=vec3, mass=qd.f32)
+particles = Particle.field(shape=(N,))
+```

From 004cd9a15a05294406e82f00ca01166774f04986 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:55:09 -0700
Subject: [PATCH 49/66] [Doc] qd.types.struct sentence: drop 'useful when
 members are computed dynamically' tail

---
 docs/source/user_guide/compound_types.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index d5caad46a9..957607a83a 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -254,7 +254,7 @@ class Particle:
 particles = Particle.field(shape=(N,))  # SOA-style allocation of N Particles
 ```
 
-`qd.types.struct(name1=type1, ...)` is the function-form equivalent of `@qd.dataclass`: it builds the same `StructType` without a class body — useful when the members are computed dynamically.
+`qd.types.struct(name1=type1, ...)` is the function-form equivalent of `@qd.dataclass`: it builds the same `StructType` without a class body.
 
 ```python
 vec3 = qd.types.vector(3, qd.f32)

From bf85e4e2302d0a88e925992f5a66745b82b47faf Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:56:43 -0700
Subject: [PATCH 50/66] [Doc] @qd.dataclass: split into bare-struct example,
 then methods + @qd.func kernel-call example

---
 docs/source/user_guide/compound_types.md | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index 957607a83a..3b82e4b42e 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -238,6 +238,16 @@ For both `dataclasses.dataclass` and `@qd.data_oriented` containers passed via `
 
 Unlike `@qd.data_oriented` and `@dataclasses.dataclass`, `@qd.dataclass` creates a struct that is available within the kernels themselves. The former types are only used for structure on the python side, before compilation. `@qd.dataclass` can be used as the element type of fields. One key downside of `@qd.dataclass` is that they can only be used with fields and primitives, not with ndarray. This is because tensors are embedded in the struct by value, not as a reference pointer.
 
+```python
+@qd.dataclass
+class Particle:
+    pos: qd.types.vector(3, qd.f32)
+    vel: qd.types.vector(3, qd.f32)
+    mass: qd.f32
+
+particles = Particle.field(shape=(N,))  # SOA-style allocation of N Particles
+```
+
 Methods can be added to a `@qd.dataclass` and may be decorated with `@qd.func` so they can be called from kernels via `instance.method(...)` syntax (the call is inlined at compile time, like any other `@qd.func`).
 
 ```python
@@ -251,7 +261,14 @@ class Particle:
     def kinetic_energy(self):
         return 0.5 * self.mass * self.vel.dot(self.vel)
 
-particles = Particle.field(shape=(N,))  # SOA-style allocation of N Particles
+particles = Particle.field(shape=(N,))
+
+@qd.kernel
+def total_ke() -> qd.f32:
+    total = 0.0
+    for i in range(N):
+        total += particles[i].kinetic_energy()
+    return total
 ```
 
 `qd.types.struct(name1=type1, ...)` is the function-form equivalent of `@qd.dataclass`: it builds the same `StructType` without a class body.

From ccaae546e3ae20f893994dcf9e950ed7da26a54b Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 01:58:04 -0700
Subject: [PATCH 51/66] [Doc] First @qd.dataclass example uses AOS layout (the
 unique-to-StructType capability)

Replaces the misleading 'SOA-style' comment. AOS is the default for
StructType-backed fields, but make it explicit with layout=qd.Layout.AOS
to highlight that an AOS-of-N-cells allocation is the capability that
distinguishes @qd.dataclass / qd.types.struct from the other compound
types (@qd.data_oriented and dataclasses.dataclass cannot be the
element type of a tensor).
---
 docs/source/user_guide/compound_types.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index 3b82e4b42e..5526d4bed6 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -245,7 +245,10 @@ class Particle:
     vel: qd.types.vector(3, qd.f32)
     mass: qd.f32
 
-particles = Particle.field(shape=(N,))  # SOA-style allocation of N Particles
+# AOS layout: each element of `particles` is a (pos, vel, mass) cell contiguous in memory.
+# Only possible because Particle is a StructType — `@qd.data_oriented` and
+# `dataclasses.dataclass` containers can't be the element type of a tensor.
+particles = Particle.field(shape=(N,), layout=qd.Layout.AOS)
 ```
 
 Methods can be added to a `@qd.dataclass` and may be decorated with `@qd.func` so they can be called from kernels via `instance.method(...)` syntax (the call is inlined at compile time, like any other `@qd.func`).

From 820c01a2940b33b8ff4ed0cfeebeb8490c326d8f Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 02:00:58 -0700
Subject: [PATCH 52/66] [Doc] Move 'Nesting compatibility' section to end of
 compound_types.md

---
 docs/source/user_guide/compound_types.md | 80 ++++++++++++------------
 1 file changed, 40 insertions(+), 40 deletions(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index 5526d4bed6..ce2d4decab 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -194,46 +194,6 @@ state.step()
 
 `@qd.kernel(fastcache=True)` is supported on methods of `@qd.data_oriented` classes, but is disabled for fields; see [Advanced — compound-type cache keying](fastcache.md#compound-type-cache-keying) for more information.
 
-## Nesting compatibility
-
-This table summarises which member types are allowed inside which container type. "yes" means the member is walked correctly when the container is passed to a kernel; "no" means the member is ignored or the combination raises an error.
-
-| Container ↓ &nbsp;&nbsp;&nbsp; / &nbsp;&nbsp;&nbsp; Member → | `qd.ndarray` | `qd.field` | primitive | `dataclasses.dataclass` | `@qd.data_oriented` | `@qd.dataclass` |
-|---|:---:|:---:|:---:|:---:|:---:|:---:|
-| `dataclasses.dataclass`         | yes | yes | yes | yes | yes [\*1] | yes |
-| `@qd.data_oriented`             | yes | yes | yes | yes | yes      | yes |
-| `@qd.dataclass`                 | no  | yes | yes | no  | no       | yes |
-
-[\*1] A `dataclasses.dataclass` may *hold* a `@qd.data_oriented` member, but the **outer kernel-arg annotation** must be `qd.template()`, not the dataclass type itself. Passing a typed-dataclass kernel arg (`def k(s: Outer)`) whose member type is a `@qd.data_oriented` class raises a clear `QuadrantsSyntaxError` at compile time pointing you to `qd.template()`. The reason: typed-dataclass kernel args are flattened from annotations, but `@qd.data_oriented` carries no per-member annotations — its members are walked from the live instance, which only happens on the template path.
-
-### Outer kernel-arg annotation
-
-The outermost annotation you put on the kernel parameter determines how the container is walked:
-
-| Annotation | Kernel-arg walker | Notes |
-|---|---|---|
-| `qd.types.NDArray[...]`           | ndarray slot                                       | leaf-level only |
-| `MyDataclass` (dataclass type)    | per-member flatten using annotations               | needs every member to have a quadrants-typed annotation |
-| `qd.template()`                   | value-driven walk of `vars(self)` / dataclass members | supports the full nesting matrix above |
-
-Two practical consequences:
-
-- **Containers with `@qd.data_oriented` anywhere in the tree** must be passed via `qd.template()` (or be the `self` of a `@qd.kernel` method on a `@qd.data_oriented` class). Using a typed-dataclass annotation on the outermost arg errors.
-- **A non-frozen `dataclasses.dataclass`** can be passed via the typed-dataclass annotation, but cannot be the outer `qd.template()` arg — `qd.template()` uses the instance as a dict key inside the template-mapper and a non-frozen dataclass has `__hash__ = None`. Add `frozen=True` if you need to pass it as `qd.template()` (for example, when it holds `@qd.data_oriented` children).
-
-### Reassigning ndarray members
-
-For both `dataclasses.dataclass` and `@qd.data_oriented` containers passed via `qd.template()`, reassigning an ndarray member between kernel launches is supported, including changes to `dtype`, `ndim`, or layout. A new specialised kernel is compiled and cached for the new shape; subsequent launches with the original shape continue to use the original cached kernel.
-
-### Restrictions
-
-- **`@qd.dataclass` (the Quadrants `StructType` decorator) cannot contain ndarrays.** This is a legacy field-only type. Use `dataclasses.dataclass` or `@qd.data_oriented` instead. (The function-form factory `qd.types.struct(...)` produces the same `StructType` and has the same restrictions.)
-- **A typed-dataclass kernel-arg annotation cannot have a `@qd.data_oriented` member type** (see [\*1] above) — errors clearly at compile time.
-- **An outer `qd.template()` arg of dataclass type must be `frozen=True`** — non-frozen dataclasses are unhashable and the template-mapper cannot use them as cache keys.
-- **Declare all ndarray members on a `@qd.data_oriented` class in `__init__`.** The template-mapper caches the set of ndarray-attribute paths reachable from the first instance walked, per class. Adding *new* ndarray attributes on later instances of the same class is safe — the per-instance weakref in the spec key disambiguates them, and the compile-time walker registers all reachable ndarrays. But:
-  - **Deleting an ndarray attribute** that was present on the first launch raises `AttributeError` on the next launch (the cached path still tries to `getattr` the missing attribute).
-  - **Reassigning a post-first-walk ndarray attribute** (one not present on the first instance walked, then added later and re-assigned) to one with a different `dtype` / `ndim` is *not* detected by the in-memory invalidation tracker. The stale compiled kernel is silently reused, leading to bit-reinterpretation of the new array's storage.
-
 ## qd.dataclass / qd.types.struct
 
 Unlike `@qd.data_oriented` and `@dataclasses.dataclass`, `@qd.dataclass` creates a struct that is available within the kernels themselves. The former types are only used for structure on the python side, before compilation. `@qd.dataclass` can be used as the element type of fields. One key downside of `@qd.dataclass` is that they can only be used with fields and primitives, not with ndarray. This is because tensors are embedded in the struct by value, not as a reference pointer.
@@ -281,3 +241,43 @@ vec3 = qd.types.vector(3, qd.f32)
 Particle = qd.types.struct(pos=vec3, vel=vec3, mass=qd.f32)
 particles = Particle.field(shape=(N,))
 ```
+
+## Nesting compatibility
+
+This table summarises which member types are allowed inside which container type. "yes" means the member is walked correctly when the container is passed to a kernel; "no" means the member is ignored or the combination raises an error.
+
+| Container ↓ &nbsp;&nbsp;&nbsp; / &nbsp;&nbsp;&nbsp; Member → | `qd.ndarray` | `qd.field` | primitive | `dataclasses.dataclass` | `@qd.data_oriented` | `@qd.dataclass` |
+|---|:---:|:---:|:---:|:---:|:---:|:---:|
+| `dataclasses.dataclass`         | yes | yes | yes | yes | yes [\*1] | yes |
+| `@qd.data_oriented`             | yes | yes | yes | yes | yes      | yes |
+| `@qd.dataclass`                 | no  | yes | yes | no  | no       | yes |
+
+[\*1] A `dataclasses.dataclass` may *hold* a `@qd.data_oriented` member, but the **outer kernel-arg annotation** must be `qd.template()`, not the dataclass type itself. Passing a typed-dataclass kernel arg (`def k(s: Outer)`) whose member type is a `@qd.data_oriented` class raises a clear `QuadrantsSyntaxError` at compile time pointing you to `qd.template()`. The reason: typed-dataclass kernel args are flattened from annotations, but `@qd.data_oriented` carries no per-member annotations — its members are walked from the live instance, which only happens on the template path.
+
+### Outer kernel-arg annotation
+
+The outermost annotation you put on the kernel parameter determines how the container is walked:
+
+| Annotation | Kernel-arg walker | Notes |
+|---|---|---|
+| `qd.types.NDArray[...]`           | ndarray slot                                       | leaf-level only |
+| `MyDataclass` (dataclass type)    | per-member flatten using annotations               | needs every member to have a quadrants-typed annotation |
+| `qd.template()`                   | value-driven walk of `vars(self)` / dataclass members | supports the full nesting matrix above |
+
+Two practical consequences:
+
+- **Containers with `@qd.data_oriented` anywhere in the tree** must be passed via `qd.template()` (or be the `self` of a `@qd.kernel` method on a `@qd.data_oriented` class). Using a typed-dataclass annotation on the outermost arg errors.
+- **A non-frozen `dataclasses.dataclass`** can be passed via the typed-dataclass annotation, but cannot be the outer `qd.template()` arg — `qd.template()` uses the instance as a dict key inside the template-mapper and a non-frozen dataclass has `__hash__ = None`. Add `frozen=True` if you need to pass it as `qd.template()` (for example, when it holds `@qd.data_oriented` children).
+
+### Reassigning ndarray members
+
+For both `dataclasses.dataclass` and `@qd.data_oriented` containers passed via `qd.template()`, reassigning an ndarray member between kernel launches is supported, including changes to `dtype`, `ndim`, or layout. A new specialised kernel is compiled and cached for the new shape; subsequent launches with the original shape continue to use the original cached kernel.
+
+### Restrictions
+
+- **`@qd.dataclass` (the Quadrants `StructType` decorator) cannot contain ndarrays.** This is a legacy field-only type. Use `dataclasses.dataclass` or `@qd.data_oriented` instead. (The function-form factory `qd.types.struct(...)` produces the same `StructType` and has the same restrictions.)
+- **A typed-dataclass kernel-arg annotation cannot have a `@qd.data_oriented` member type** (see [\*1] above) — errors clearly at compile time.
+- **An outer `qd.template()` arg of dataclass type must be `frozen=True`** — non-frozen dataclasses are unhashable and the template-mapper cannot use them as cache keys.
+- **Declare all ndarray members on a `@qd.data_oriented` class in `__init__`.** The template-mapper caches the set of ndarray-attribute paths reachable from the first instance walked, per class. Adding *new* ndarray attributes on later instances of the same class is safe — the per-instance weakref in the spec key disambiguates them, and the compile-time walker registers all reachable ndarrays. But:
+  - **Deleting an ndarray attribute** that was present on the first launch raises `AttributeError` on the next launch (the cached path still tries to `getattr` the missing attribute).
+  - **Reassigning a post-first-walk ndarray attribute** (one not present on the first instance walked, then added later and re-assigned) to one with a different `dtype` / `ndim` is *not* detected by the in-memory invalidation tracker. The stale compiled kernel is silently reused, leading to bit-reinterpretation of the new array's storage.

From 06d2e86257f4f80452fc0df415515be715a9bb56 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 02:04:55 -0700
Subject: [PATCH 53/66] [Doc] Overview table: dataclasses.dataclass supports
 differentiation (verified empirically)

Old cell said 'no [*1]' with no footnote defined anywhere in the doc.
Empirically verified with ~/ais/deskai9/tmp/check_dataclass_diff.py
on cluster: both typed-dataclass kernel-arg annotation (def k(s: State))
and qd.template() annotation produce correct gradients matching the
non-dataclass baseline for kernels operating on qd.ndarray members.
---
 docs/source/user_guide/compound_types.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index ce2d4decab..456f501db8 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -11,7 +11,7 @@ The following compound types are available:
 
 | type                               | can be passed to qd.kernel? | can be passed to qd.func? | can contain ndarray? | can contain field? | can be nested? | supports differentiation? |
 |------------------------------------|:---------------------------:|:-------------------------:|:--------------------:|:------------------:|:--------------:|:-------------------------:|
-| `dataclasses.dataclass`            | yes                         | yes                       | yes                  | yes                | yes            | no [*1]                   |
+| `dataclasses.dataclass`            | yes                         | yes                       | yes                  | yes                | yes            | yes                       |
 | `@qd.data_oriented`               | yes                         | yes                       | yes                  | yes                | yes            | yes                       |
 | `@qd.dataclass` / `qd.types.struct` | yes                       | yes                       | no                   | yes                | yes            | yes                       |
 

From f7dd090e339cd2ce9725ea8eb6e0ae99611ba20f Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 02:10:48 -0700
Subject: [PATCH 54/66] [Test] AD through dataclasses.dataclass with ndarray,
 field, and qd.tensor members
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pins that gradients flow correctly when kernel arguments are wrapped in plain
Python dataclasses across the tensor types Quadrants exposes:

- qd.ndarray via typed-dataclass annotation + qd.template() path (kernel.grad()).
- qd.field via qd.template() path (qd.ad.Tape).
- qd.tensor(backend=NDARRAY) — same path as qd.ndarray.
- qd.tensor(backend=FIELD) — xfail (pre-existing TensorWrapper.__getitem__ not
  unwrapped through dataclass member access; forward kernels fail identically;
  unrelated to AD).
- Mixed dataclass holding both qd.ndarray and qd.field members; ndarray-side
  gradient verified via kernel.grad() while a field is written in the same
  kernel.

Backs the 'supports differentiation? yes' cell for dataclasses.dataclass in
the compound_types.md overview table (06d2e8625).
---
 tests/python/test_ad_dataclass.py | 304 ++++++++++++++++++++++++++++++
 1 file changed, 304 insertions(+)
 create mode 100644 tests/python/test_ad_dataclass.py

diff --git a/tests/python/test_ad_dataclass.py b/tests/python/test_ad_dataclass.py
new file mode 100644
index 0000000000..7f3e364c15
--- /dev/null
+++ b/tests/python/test_ad_dataclass.py
@@ -0,0 +1,304 @@
+"""Differentiation through ``dataclasses.dataclass`` containers.
+
+These tests pin that gradients flow correctly when kernel arguments are wrapped in plain Python
+dataclasses, across the tensor types Quadrants exposes:
+
+* ``qd.ndarray`` — typed-dataclass annotation + ``qd.template()`` path; gradient via ``kernel.grad()``.
+* ``qd.field`` — ``qd.template()`` path; gradient via ``qd.ad.Tape``.
+* ``qd.tensor(backend=NDARRAY)`` — same path as ``qd.ndarray``; the dispatcher returns a wrapper
+  whose ndarray ``_impl`` is unwrapped by the dataclass-annotation infrastructure.
+* ``qd.tensor(backend=FIELD)`` — xfail (pre-existing, not AD-specific): ``TensorWrapper`` is not
+  unwrapped through dataclass member access for kernel-side subscript. Use ``qd.field`` directly.
+* mixed — single dataclass holding both a ``qd.ndarray`` and a ``qd.field`` member.
+
+Pattern mirrors ``test_ad_ndarray.py`` (ndarray) and ``test_ad_basics.py`` (field). See
+``docs/source/user_guide/compound_types.md`` overview table — column "supports differentiation?"
+for ``dataclasses.dataclass``.
+"""
+
+import dataclasses
+
+import numpy as np
+import pytest
+
+import quadrants as qd
+
+from tests import test_utils
+
+archs_support_ndarray_ad = [qd.cpu, qd.cuda, qd.amdgpu]
+
+
+# ----------------------------------------------------------------------------
+# qd.ndarray members
+# ----------------------------------------------------------------------------
+
+
+@test_utils.test(arch=archs_support_ndarray_ad, default_fp=qd.f64, require=qd.extension.adstack)
+def test_ad_dataclass_ndarray_typed_annotation():
+    """dataclass holding qd.ndarrays, passed via typed-dataclass kernel-arg annotation."""
+    N = 5
+
+    @dataclasses.dataclass(frozen=True)
+    class State:
+        a: qd.types.NDArray[qd.f64, 1]
+        b: qd.types.NDArray[qd.f64, 1]
+        p: qd.types.NDArray[qd.f64, 1]
+
+    @qd.kernel
+    def compute(s: State):
+        for i in range(N):
+            s.p[i] = s.a[i] * s.b[i] + 1.0
+
+    a = qd.ndarray(qd.f64, shape=N, needs_grad=True)
+    b = qd.ndarray(qd.f64, shape=N)
+    p = qd.ndarray(qd.f64, shape=N, needs_grad=True)
+    for i in range(N):
+        a[i] = 3.0
+        b[i] = float(i + 1)
+
+    state = State(a=a, b=b, p=p)
+    compute(state)
+    np.testing.assert_allclose(p.to_numpy(), 3.0 * np.arange(1, N + 1) + 1.0)
+
+    for i in range(N):
+        p.grad[i] = 1.0
+
+    compute.grad(state)
+    np.testing.assert_allclose(a.grad.to_numpy(), b.to_numpy())
+
+
+@test_utils.test(arch=archs_support_ndarray_ad, default_fp=qd.f64, require=qd.extension.adstack)
+def test_ad_dataclass_ndarray_template():
+    """dataclass holding qd.ndarrays, passed via qd.template()."""
+    N = 5
+
+    @dataclasses.dataclass(frozen=True)
+    class State:
+        a: qd.types.NDArray[qd.f64, 1]
+        b: qd.types.NDArray[qd.f64, 1]
+        p: qd.types.NDArray[qd.f64, 1]
+
+    @qd.kernel
+    def compute(s: qd.template()):
+        for i in range(N):
+            s.p[i] = s.a[i] * s.b[i] + 1.0
+
+    a = qd.ndarray(qd.f64, shape=N, needs_grad=True)
+    b = qd.ndarray(qd.f64, shape=N)
+    p = qd.ndarray(qd.f64, shape=N, needs_grad=True)
+    for i in range(N):
+        a[i] = 3.0
+        b[i] = float(i + 1)
+
+    state = State(a=a, b=b, p=p)
+    compute(state)
+    np.testing.assert_allclose(p.to_numpy(), 3.0 * np.arange(1, N + 1) + 1.0)
+
+    for i in range(N):
+        p.grad[i] = 1.0
+
+    compute.grad(state)
+    np.testing.assert_allclose(a.grad.to_numpy(), b.to_numpy())
+
+
+# ----------------------------------------------------------------------------
+# qd.field members
+# ----------------------------------------------------------------------------
+
+
+@test_utils.test(default_fp=qd.f64, require=qd.extension.adstack)
+def test_ad_dataclass_field_template_tape():
+    """dataclass holding qd.fields, passed via qd.template(), gradient via qd.ad.Tape."""
+    N = 5
+
+    @dataclasses.dataclass(frozen=True)
+    class State:
+        a: object
+        b: object
+        loss: object
+
+    a = qd.field(qd.f64, shape=(N,), needs_grad=True)
+    b = qd.field(qd.f64, shape=(N,))
+    loss = qd.field(qd.f64, shape=(), needs_grad=True)
+    for i in range(N):
+        a[i] = 3.0
+        b[i] = float(i + 1)
+
+    state = State(a=a, b=b, loss=loss)
+
+    @qd.kernel
+    def compute(s: qd.template()):
+        for i in range(N):
+            s.loss[None] += s.a[i] * s.b[i]
+
+    with qd.ad.Tape(loss):
+        compute(state)
+
+    # loss = sum_i a[i] * b[i]; dloss/da[i] = b[i]
+    np.testing.assert_allclose(a.grad.to_numpy(), b.to_numpy())
+    expected_loss = float((3.0 * np.arange(1, N + 1)).sum())
+    np.testing.assert_allclose(loss[None], expected_loss)
+
+
+# ----------------------------------------------------------------------------
+# qd.tensor dispatcher
+# ----------------------------------------------------------------------------
+
+
+@test_utils.test(arch=archs_support_ndarray_ad, default_fp=qd.f64, require=qd.extension.adstack)
+def test_ad_dataclass_tensor_ndarray_backend():
+    """dataclass holding qd.tensor(..., backend=NDARRAY) members; ndarray-AD via kernel.grad()."""
+    N = 5
+
+    @dataclasses.dataclass(frozen=True)
+    class State:
+        a: qd.types.NDArray[qd.f64, 1]
+        b: qd.types.NDArray[qd.f64, 1]
+        p: qd.types.NDArray[qd.f64, 1]
+
+    @qd.kernel
+    def compute(s: State):
+        for i in range(N):
+            s.p[i] = s.a[i] * s.b[i] + 1.0
+
+    a = qd.tensor(qd.f64, shape=(N,), backend=qd.Backend.NDARRAY, needs_grad=True)
+    b = qd.tensor(qd.f64, shape=(N,), backend=qd.Backend.NDARRAY)
+    p = qd.tensor(qd.f64, shape=(N,), backend=qd.Backend.NDARRAY, needs_grad=True)
+    for i in range(N):
+        a[i] = 3.0
+        b[i] = float(i + 1)
+
+    state = State(a=a, b=b, p=p)
+    compute(state)
+    np.testing.assert_allclose(p.to_numpy(), 3.0 * np.arange(1, N + 1) + 1.0)
+
+    for i in range(N):
+        p.grad[i] = 1.0
+
+    compute.grad(state)
+    np.testing.assert_allclose(a.grad.to_numpy(), b.to_numpy())
+
+
+@pytest.mark.xfail(
+    reason=(
+        "Pre-existing TensorWrapper limitation (not AD-specific): qd.tensor(backend=FIELD) returned "
+        "by the qd.tensor dispatcher is wrapped in a TensorWrapper whose __getitem__ is not unwrapped "
+        "when accessed via dataclass member (`s.x[i]`). Forward-only kernels fail identically. "
+        "Workaround: use qd.field directly. See test_ad_dataclass_field_template_tape."
+    ),
+    strict=True,
+)
+@test_utils.test(default_fp=qd.f64, require=qd.extension.adstack)
+def test_ad_dataclass_tensor_field_backend_tape():
+    """dataclass holding qd.tensor(..., backend=FIELD) members; field-AD via qd.ad.Tape."""
+    N = 5
+
+    @dataclasses.dataclass(frozen=True)
+    class State:
+        a: object
+        b: object
+        loss: object
+
+    a = qd.tensor(qd.f64, shape=(N,), backend=qd.Backend.FIELD, needs_grad=True)
+    b = qd.tensor(qd.f64, shape=(N,), backend=qd.Backend.FIELD)
+    loss = qd.tensor(qd.f64, shape=(), backend=qd.Backend.FIELD, needs_grad=True)
+    for i in range(N):
+        a[i] = 3.0
+        b[i] = float(i + 1)
+
+    state = State(a=a, b=b, loss=loss)
+
+    @qd.kernel
+    def compute(s: qd.template()):
+        for i in range(N):
+            s.loss[None] += s.a[i] * s.b[i]
+
+    with qd.ad.Tape(loss):
+        compute(state)
+
+    np.testing.assert_allclose(a.grad.to_numpy(), b.to_numpy())
+
+
+# ----------------------------------------------------------------------------
+# Mixed: ndarray + field + tensor in the same dataclass
+# ----------------------------------------------------------------------------
+
+
+@test_utils.test(arch=archs_support_ndarray_ad, default_fp=qd.f64, require=qd.extension.adstack)
+def test_ad_dataclass_mixed_ndarray_and_tensor_ndarray_backend():
+    """Single dataclass holds one qd.ndarray member and one qd.tensor(NDARRAY) member; verify the
+    kernel can read/write both and that gradients flow through both."""
+    N = 5
+
+    @dataclasses.dataclass(frozen=True)
+    class State:
+        a_nd: qd.types.NDArray[qd.f64, 1]
+        a_tens: qd.types.NDArray[qd.f64, 1]
+        b: qd.types.NDArray[qd.f64, 1]
+        p: qd.types.NDArray[qd.f64, 1]
+
+    @qd.kernel
+    def compute(s: State):
+        for i in range(N):
+            s.p[i] = (s.a_nd[i] + s.a_tens[i]) * s.b[i]
+
+    a_nd = qd.ndarray(qd.f64, shape=N, needs_grad=True)
+    a_tens = qd.tensor(qd.f64, shape=(N,), backend=qd.Backend.NDARRAY, needs_grad=True)
+    b = qd.ndarray(qd.f64, shape=N)
+    p = qd.ndarray(qd.f64, shape=N, needs_grad=True)
+    for i in range(N):
+        a_nd[i] = 2.0
+        a_tens[i] = 5.0
+        b[i] = float(i + 1)
+
+    state = State(a_nd=a_nd, a_tens=a_tens, b=b, p=p)
+    compute(state)
+    np.testing.assert_allclose(p.to_numpy(), 7.0 * np.arange(1, N + 1))
+
+    for i in range(N):
+        p.grad[i] = 1.0
+
+    compute.grad(state)
+    # dp/da_nd[i] = b[i] ; dp/da_tens[i] = b[i]
+    np.testing.assert_allclose(a_nd.grad.to_numpy(), b.to_numpy())
+    np.testing.assert_allclose(a_tens.grad.to_numpy(), b.to_numpy())
+
+
+@test_utils.test(arch=archs_support_ndarray_ad, default_fp=qd.f64, require=qd.extension.adstack)
+def test_ad_dataclass_mixed_ndarray_and_field_in_same_class():
+    """Single dataclass holds both a qd.ndarray member and a qd.field member. The kernel reads
+    and writes both. Differentiation is checked through the ndarray path via ``kernel.grad()``
+    (the field is along for the ride; its grad allocation must coexist with ndarray grads)."""
+    N = 5
+
+    @dataclasses.dataclass(frozen=True)
+    class State:
+        a_nd: qd.types.NDArray[qd.f64, 1]
+        out_field: object
+        b: qd.types.NDArray[qd.f64, 1]
+        p: qd.types.NDArray[qd.f64, 1]
+
+    @qd.kernel
+    def compute(s: qd.template()):
+        for i in range(N):
+            s.p[i] = s.a_nd[i] * s.b[i]
+            s.out_field[i] = s.a_nd[i] + s.b[i]
+
+    a_nd = qd.ndarray(qd.f64, shape=N, needs_grad=True)
+    out_field = qd.field(qd.f64, shape=(N,))
+    b = qd.ndarray(qd.f64, shape=N)
+    p = qd.ndarray(qd.f64, shape=N, needs_grad=True)
+    for i in range(N):
+        a_nd[i] = 3.0
+        b[i] = float(i + 1)
+
+    state = State(a_nd=a_nd, out_field=out_field, b=b, p=p)
+    compute(state)
+    np.testing.assert_allclose(p.to_numpy(), 3.0 * np.arange(1, N + 1))
+    np.testing.assert_allclose(out_field.to_numpy(), 3.0 + np.arange(1, N + 1))
+
+    for i in range(N):
+        p.grad[i] = 1.0
+
+    compute.grad(state)
+    np.testing.assert_allclose(a_nd.grad.to_numpy(), b.to_numpy())

From 8c0377c5f9940dc4916c702f8ab42239211e41e6 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 02:11:39 -0700
Subject: [PATCH 55/66] [Doc] compound_types: rephrase intro bullets to
 describe each type's role

Replaces the prior "recommended / for kernel methods / legacy" framing with
objective one-liners: dataclasses.dataclass = lightweight container that can
hold ndarrays; @qd.data_oriented = self-style objects with @qd.kernel methods;
@qd.dataclass = embedded-in-kernel structures (no ndarrays).
---
 docs/source/user_guide/compound_types.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index 456f501db8..137b9a0214 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -5,9 +5,9 @@
 It can be useful to combine multiple ndarrays or fields together into a single struct-like object that can be passed into kernels, and into @qd.func's.
 
 The following compound types are available:
-- `dataclasses.dataclass` — **recommended**
-- `@qd.data_oriented` — for classes that define `@qd.kernel` methods
-- `@qd.dataclass` (and its function-form equivalent `qd.types.struct(...)`) — legacy Quadrants `StructType`, field-only
+- `dataclasses.dataclass` — lightweight container of tensors and primitives; can contain ndarrays
+- `@qd.data_oriented` — for creating objects with `self` that define `@qd.kernel` methods
+- `@qd.dataclass` — for structures that are embedded into the kernel, and don't contain ndarrays
 
 | type                               | can be passed to qd.kernel? | can be passed to qd.func? | can contain ndarray? | can contain field? | can be nested? | supports differentiation? |
 |------------------------------------|:---------------------------:|:-------------------------:|:--------------------:|:------------------:|:--------------:|:-------------------------:|

From 71a53daf53b6bdb5f12c47bea8d9d10eced0148f Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 02:13:16 -0700
Subject: [PATCH 56/66] [Doc] compound_types: prefix dataclasses.dataclass with
 @ in intro/table for consistency

The two other entries (@qd.data_oriented, @qd.dataclass) already use the @
decorator prefix; aligning dataclasses.dataclass with the same form.
---
 docs/source/user_guide/compound_types.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index 137b9a0214..07d54f209b 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -5,15 +5,15 @@
 It can be useful to combine multiple ndarrays or fields together into a single struct-like object that can be passed into kernels, and into @qd.func's.
 
 The following compound types are available:
-- `dataclasses.dataclass` — lightweight container of tensors and primitives; can contain ndarrays
+- `@dataclasses.dataclass` — lightweight container of tensors and primitives; can contain ndarrays
 - `@qd.data_oriented` — for creating objects with `self` that define `@qd.kernel` methods
 - `@qd.dataclass` — for structures that are embedded into the kernel, and don't contain ndarrays
 
 | type                               | can be passed to qd.kernel? | can be passed to qd.func? | can contain ndarray? | can contain field? | can be nested? | supports differentiation? |
 |------------------------------------|:---------------------------:|:-------------------------:|:--------------------:|:------------------:|:--------------:|:-------------------------:|
-| `dataclasses.dataclass`            | yes                         | yes                       | yes                  | yes                | yes            | yes                       |
-| `@qd.data_oriented`               | yes                         | yes                       | yes                  | yes                | yes            | yes                       |
-| `@qd.dataclass` / `qd.types.struct` | yes                       | yes                       | no                   | yes                | yes            | yes                       |
+| `@dataclasses.dataclass`           | yes                         | yes                       | yes                  | yes                | yes            | yes                       |
+| `@qd.data_oriented`                | yes                         | yes                       | yes                  | yes                | yes            | yes                       |
+| `@qd.dataclass` / `qd.types.struct`| yes                         | yes                       | no                   | yes                | yes            | yes                       |
 
 See [Nesting compatibility](#nesting-compatibility) below for a per-container × per-member-type breakdown, including the constraints on the outer kernel-arg annotation and ndarray reassignment.
 

From 46fef24866bdcb5869ae5b590fc7b1ed86b17695 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 02:21:23 -0700
Subject: [PATCH 57/66] [Test] AD dataclass: tensor(FIELD) member works when
 annotated as qd.Tensor

Empirical follow-up to f7dd090e3: the failure was not a TensorWrapper-in-kernel
limitation, it was using `object` as the dataclass-member annotation. With
`qd.Tensor` (or `qd.template()`) member annotations, populate_global_vars_from_
dataclass + FlattenAttributeNameTransformer unwraps the wrapper and rewrites
s.a to the flat name bound to the underlying impl, so kernel-side s.a[i] hits
impl.subscript with a bare Field/Ndarray. The previous xfail was wrong; this
combination is fully supported. qd.ad.Tape(loss) requires the bare impl, so
unwrap loss at the Tape boundary (Tape's API contract is field/scalar).
---
 tests/python/test_ad_dataclass.py | 32 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/tests/python/test_ad_dataclass.py b/tests/python/test_ad_dataclass.py
index 7f3e364c15..dfadb24a9b 100644
--- a/tests/python/test_ad_dataclass.py
+++ b/tests/python/test_ad_dataclass.py
@@ -7,8 +7,9 @@
 * ``qd.field`` — ``qd.template()`` path; gradient via ``qd.ad.Tape``.
 * ``qd.tensor(backend=NDARRAY)`` — same path as ``qd.ndarray``; the dispatcher returns a wrapper
   whose ndarray ``_impl`` is unwrapped by the dataclass-annotation infrastructure.
-* ``qd.tensor(backend=FIELD)`` — xfail (pre-existing, not AD-specific): ``TensorWrapper`` is not
-  unwrapped through dataclass member access for kernel-side subscript. Use ``qd.field`` directly.
+* ``qd.tensor(backend=FIELD)`` — works when the dataclass member is annotated ``qd.Tensor``
+  (or ``qd.template()``). With ``object`` / no annotation the wrapper survives into kernel scope
+  and host-side ``__getitem__`` asserts.
 * mixed — single dataclass holding both a ``qd.ndarray`` and a ``qd.field`` member.
 
 Pattern mirrors ``test_ad_ndarray.py`` (ndarray) and ``test_ad_basics.py`` (field). See
@@ -19,7 +20,6 @@
 import dataclasses
 
 import numpy as np
-import pytest
 
 import quadrants as qd
 
@@ -179,25 +179,23 @@ def compute(s: State):
     np.testing.assert_allclose(a.grad.to_numpy(), b.to_numpy())
 
 
-@pytest.mark.xfail(
-    reason=(
-        "Pre-existing TensorWrapper limitation (not AD-specific): qd.tensor(backend=FIELD) returned "
-        "by the qd.tensor dispatcher is wrapped in a TensorWrapper whose __getitem__ is not unwrapped "
-        "when accessed via dataclass member (`s.x[i]`). Forward-only kernels fail identically. "
-        "Workaround: use qd.field directly. See test_ad_dataclass_field_template_tape."
-    ),
-    strict=True,
-)
 @test_utils.test(default_fp=qd.f64, require=qd.extension.adstack)
 def test_ad_dataclass_tensor_field_backend_tape():
-    """dataclass holding qd.tensor(..., backend=FIELD) members; field-AD via qd.ad.Tape."""
+    """dataclass holding qd.tensor(..., backend=FIELD) members; field-AD via qd.ad.Tape.
+
+    Note: members must be annotated as ``qd.Tensor`` (not ``object``) when the value is a
+    ``qd.tensor(...)`` wrapper. The typed-dataclass / template machinery uses the member
+    annotation to decide whether to unwrap the wrapper into its underlying impl before the
+    kernel sees ``s.x[i]``. With ``object`` annotation the wrapper survives into kernel scope
+    and its host-side ``__getitem__`` asserts.
+    """
     N = 5
 
     @dataclasses.dataclass(frozen=True)
     class State:
-        a: object
-        b: object
-        loss: object
+        a: qd.Tensor
+        b: qd.Tensor
+        loss: qd.Tensor
 
     a = qd.tensor(qd.f64, shape=(N,), backend=qd.Backend.FIELD, needs_grad=True)
     b = qd.tensor(qd.f64, shape=(N,), backend=qd.Backend.FIELD)
@@ -213,7 +211,7 @@ def compute(s: qd.template()):
         for i in range(N):
             s.loss[None] += s.a[i] * s.b[i]
 
-    with qd.ad.Tape(loss):
+    with qd.ad.Tape(loss._unwrap()):
         compute(state)
 
     np.testing.assert_allclose(a.grad.to_numpy(), b.to_numpy())

From 18f995b797b09c5540a0c6708089e2b16627ec15 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 02:22:29 -0700
Subject: [PATCH 58/66] [Doc] tensor: note qd.Tensor is also the
 dataclass-member annotation

---
 docs/source/user_guide/tensor.md | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/docs/source/user_guide/tensor.md b/docs/source/user_guide/tensor.md
index f4b0ab25c3..359413d8a3 100644
--- a/docs/source/user_guide/tensor.md
+++ b/docs/source/user_guide/tensor.md
@@ -203,6 +203,15 @@ fill(b)   # ndarray branch
 
 The kernel argument is unwrapped to the bare impl before the template-mapper / AST sees it, so kernel bodies still write `x[i, j]` and pay no per-call cost for the wrapper.
 
+`qd.Tensor` is also the right annotation when storing a tensor as a `dataclasses.dataclass` member:
+
+```python
+@dataclass
+class State:
+    a: qd.Tensor
+    b: qd.Tensor
+```
+
 ## Pickle
 
 `qd.Tensor` objects are picklable on **both** backends, including under non-identity layouts. Round-trip (pickle then unpickle) preserves the canonical data, the dtype, the shape, and the layout:

From 3ce0ab089163cf1d891af7d8f435ac24220cd4f0 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 02:25:25 -0700
Subject: [PATCH 59/66] [Doc] compound_types: add 'Under the hood' subsection
 for each type

One short paragraph per type describing the compile-time mechanism:

- dataclasses.dataclass: walked via dataclasses.fields, members flattened into
  kernel globals + AST rewritten; ndarrays registered as kernel params.
- @qd.data_oriented: walked via vars(self), no annotations needed; primitives
  baked into IR; per-class path cache keeps the walk cheap.
- @qd.dataclass / qd.types.struct: real StructType, members by value, can be
  element type of a field / tensor; @qd.func methods inlined.
---
 docs/source/user_guide/compound_types.md | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index 07d54f209b..f2582306f7 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -125,6 +125,10 @@ Sub-struct passing supports:
 
 Note: assigning a sub-struct to a local variable and then passing it (`t = s.inner; touch_inner(t)`) is **not** supported. Pass the attribute access directly at the call site.
 
+### Under the hood
+
+At kernel compile, the dataclass is walked via `dataclasses.fields(...)`. Each member is bound into the kernel's globals under a flat name (`s.a` becomes `s__a`) and attribute accesses in the kernel AST are rewritten to use those flat names. Ndarray members are additionally registered as kernel params so kernel-side subscript hits a real ndarray. `qd.Tensor` wrappers are unwrapped to their underlying impl before injection. The container itself is never materialised on the kernel side — only its leaves are.
+
 ## qd.data_oriented
 
 `@qd.data_oriented` is designed for classes that define `@qd.kernel` methods as class members. It wraps these methods to correctly bind `self` during kernel compilation.
@@ -194,6 +198,10 @@ state.step()
 
 `@qd.kernel(fastcache=True)` is supported on methods of `@qd.data_oriented` classes, but is disabled for fields; see [Advanced — compound-type cache keying](fastcache.md#compound-type-cache-keying) for more information.
 
+### Under the hood
+
+`self` is passed via `qd.template()` semantics. At compile time the live instance is walked through `vars(self)` — no member annotations needed. Ndarrays anywhere in the tree (including in nested `@qd.data_oriented` or `dataclasses.dataclass` children) are registered as kernel params. Primitives are read live and baked into the kernel IR, so each distinct value compiles a new specialised kernel. A per-class cache of attribute paths keeps the per-call walk cheap.
+
 ## qd.dataclass / qd.types.struct
 
 Unlike `@qd.data_oriented` and `@dataclasses.dataclass`, `@qd.dataclass` creates a struct that is available within the kernels themselves. The former types are only used for structure on the python side, before compilation. `@qd.dataclass` can be used as the element type of fields. One key downside of `@qd.dataclass` is that they can only be used with fields and primitives, not with ndarray. This is because tensors are embedded in the struct by value, not as a reference pointer.
@@ -242,6 +250,10 @@ Particle = qd.types.struct(pos=vec3, vel=vec3, mass=qd.f32)
 particles = Particle.field(shape=(N,))
 ```
 
+### Under the hood
+
+`@qd.dataclass` produces a Quadrants `StructType` — a real value type with a fixed memory layout. Members are stored by value, which is why ndarrays (heap-backed, dynamic shape) can't be members but fields, primitives, vectors, matrices, and other `StructType`s can. The struct can be the element type of a `qd.field` / `qd.tensor` (SOA or AOS layout). `@qd.func` methods on the class are inlined at the call site like any other `@qd.func`.
+
 ## Nesting compatibility
 
 This table summarises which member types are allowed inside which container type. "yes" means the member is walked correctly when the container is passed to a kernel; "no" means the member is ignored or the combination raises an error.

From 35be370a50f4e809f210ccb48e4330d959b13799 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 02:26:22 -0700
Subject: [PATCH 60/66] [Doc] compound_types: rewrite 'Under the hood'
 subsections at a higher level

Drop internal-implementation references (dataclasses.fields, vars(self), flat
names, AST rewrite, etc.). Just the high-level story per type:

- dataclasses.dataclass: Python-only; compiler flattens members into kernel
  params; container has no kernel-side representation; members read-only.
- @qd.data_oriented: same flatten-into-params story; no annotations needed;
  primitive members baked in as constants (one kernel per distinct value).
- @qd.dataclass: real kernel-side type with fixed memory layout; by-value
  storage; can be the element type of a tensor; explains the no-ndarrays
  rule (heap-allocated, dynamic shape).
---
 docs/source/user_guide/compound_types.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index f2582306f7..b507835546 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -127,7 +127,7 @@ Note: assigning a sub-struct to a local variable and then passing it (`t = s.inn
 
 ### Under the hood
 
-At kernel compile, the dataclass is walked via `dataclasses.fields(...)`. Each member is bound into the kernel's globals under a flat name (`s.a` becomes `s__a`) and attribute accesses in the kernel AST are rewritten to use those flat names. Ndarray members are additionally registered as kernel params so kernel-side subscript hits a real ndarray. `qd.Tensor` wrappers are unwrapped to their underlying impl before injection. The container itself is never materialised on the kernel side — only its leaves are.
+A `dataclasses.dataclass` is a Python-only container. The compiler reads it at compile time and flattens its members into individual kernel parameters — the container itself has no memory layout and doesn't exist on the kernel side. That's why members are read-only: the values are captured once at compile time and re-assigning them afterwards has no effect on running kernels.
 
 ## qd.data_oriented
 
@@ -200,7 +200,7 @@ state.step()
 
 ### Under the hood
 
-`self` is passed via `qd.template()` semantics. At compile time the live instance is walked through `vars(self)` — no member annotations needed. Ndarrays anywhere in the tree (including in nested `@qd.data_oriented` or `dataclasses.dataclass` children) are registered as kernel params. Primitives are read live and baked into the kernel IR, so each distinct value compiles a new specialised kernel. A per-class cache of attribute paths keeps the per-call walk cheap.
+Like `dataclasses.dataclass`, a `@qd.data_oriented` object is Python-only — the compiler flattens it into individual kernel parameters and the object itself has no kernel-side representation. Unlike `dataclasses.dataclass` it needs no member annotations: the compiler reads the live instance's attributes directly. Primitive members are baked into the kernel as constants, so each distinct primitive value compiles a new specialised kernel.
 
 ## qd.dataclass / qd.types.struct
 
@@ -252,7 +252,7 @@ particles = Particle.field(shape=(N,))
 
 ### Under the hood
 
-`@qd.dataclass` produces a Quadrants `StructType` — a real value type with a fixed memory layout. Members are stored by value, which is why ndarrays (heap-backed, dynamic shape) can't be members but fields, primitives, vectors, matrices, and other `StructType`s can. The struct can be the element type of a `qd.field` / `qd.tensor` (SOA or AOS layout). `@qd.func` methods on the class are inlined at the call site like any other `@qd.func`.
+Unlike the other two compound types, `@qd.dataclass` is a real kernel-side type with a fixed memory layout. Each instance is laid out contiguously in memory, members are stored by value, and a tensor of the struct can be allocated (`Particle.field(...)`). Storing by value is also why ndarrays can't be members — ndarrays are heap-allocated buffers with dynamic shape and don't fit into a fixed-size cell.
 
 ## Nesting compatibility
 

From 94e455a7878fe83513478a9948257765c859b20d Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 02:27:12 -0700
Subject: [PATCH 61/66] [Doc] compound_types: drop 'once' from compile-time
 capture phrasing

---
 docs/source/user_guide/compound_types.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index b507835546..5885fdd64e 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -127,7 +127,7 @@ Note: assigning a sub-struct to a local variable and then passing it (`t = s.inn
 
 ### Under the hood
 
-A `dataclasses.dataclass` is a Python-only container. The compiler reads it at compile time and flattens its members into individual kernel parameters — the container itself has no memory layout and doesn't exist on the kernel side. That's why members are read-only: the values are captured once at compile time and re-assigning them afterwards has no effect on running kernels.
+A `dataclasses.dataclass` is a Python-only container. The compiler reads it at compile time and flattens its members into individual kernel parameters — the container itself has no memory layout and doesn't exist on the kernel side. That's why members are read-only: the values are captured at compile time and re-assigning them afterwards has no effect on running kernels.
 
 ## qd.data_oriented
 

From 31b27d7434dd4ae7859d03ee63290f6b70163f53 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 02:31:19 -0700
Subject: [PATCH 62/66] [Doc] compound_types: replace overview table with
 differentiating one

Old table was all-yes across most columns and didn't help the reader pick
between the three types. New table has 5 rows that actually distinguish:

- kernel-side representation (flattened-away vs real type)
- tensor-element-type eligibility
- ndarray support
- @qd.kernel methods on self
- member declaration style

The per-member-type nesting matrix lower in the doc still covers the
detailed allowances.
---
 docs/source/user_guide/compound_types.md | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index 5885fdd64e..e380a3745d 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -9,11 +9,13 @@ The following compound types are available:
 - `@qd.data_oriented` — for creating objects with `self` that define `@qd.kernel` methods
 - `@qd.dataclass` — for structures that are embedded into the kernel, and don't contain ndarrays
 
-| type                               | can be passed to qd.kernel? | can be passed to qd.func? | can contain ndarray? | can contain field? | can be nested? | supports differentiation? |
-|------------------------------------|:---------------------------:|:-------------------------:|:--------------------:|:------------------:|:--------------:|:-------------------------:|
-| `@dataclasses.dataclass`           | yes                         | yes                       | yes                  | yes                | yes            | yes                       |
-| `@qd.data_oriented`                | yes                         | yes                       | yes                  | yes                | yes            | yes                       |
-| `@qd.dataclass` / `qd.types.struct`| yes                         | yes                       | no                   | yes                | yes            | yes                       |
+| property                            | `@dataclasses.dataclass`     | `@qd.data_oriented`              | `@qd.dataclass`                     |
+|-------------------------------------|:----------------------------:|:--------------------------------:|:-----------------------------------:|
+| Kernel-side representation          | none (flattened away)        | none (flattened away)            | real type with fixed memory layout  |
+| Can be tensor element type          | no                           | no                               | yes                                 |
+| Can hold ndarrays                   | yes                          | yes                              | no                                  |
+| `@qd.kernel` methods on `self`      | no                           | yes                              | no                                  |
+| Member declaration                  | type-annotated class fields  | live attributes (no annotations) | type-annotated class fields         |
 
 See [Nesting compatibility](#nesting-compatibility) below for a per-container × per-member-type breakdown, including the constraints on the outer kernel-arg annotation and ndarray reassignment.
 

From 36dc933600a8653cd6ee5c45134bfbf3289eb066 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Sun, 17 May 2026 02:31:35 -0700
Subject: [PATCH 63/66] [Doc] compound_types: drop 'historical reasons' line

---
 docs/source/user_guide/compound_types.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/docs/source/user_guide/compound_types.md b/docs/source/user_guide/compound_types.md
index e380a3745d..7b942e4cab 100644
--- a/docs/source/user_guide/compound_types.md
+++ b/docs/source/user_guide/compound_types.md
@@ -19,8 +19,6 @@ The following compound types are available:
 
 See [Nesting compatibility](#nesting-compatibility) below for a per-container × per-member-type breakdown, including the constraints on the outer kernel-arg annotation and ndarray reassignment.
 
-The other compound types exist for historical reasons.
-
 ## dataclasses.dataclass
 
 `dataclasses.dataclass` allows you to create structs containing:

From 07dc4863b3a4562122c45afe626301439f3bb1ce Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Mon, 18 May 2026 01:46:09 -0700
Subject: [PATCH 64/66] [Fix] _build_struct_nd_paths: handle NamedTuple via
 _asdict() fallback

NamedTuples (decorated as ``@qd.data_oriented``) have no instance ``__dict__``,
so ``obj.__dict__.items()`` raises ``AttributeError: 'Geom' object has no
attribute '__dict__'``. Fall back to ``_asdict()`` first, mirroring the same
fallback already used in ``args_hasher.stringify_obj_type``'s data_oriented
branch.

Pins ``test_args_hasher_named_tuple`` (added in this branch).
---
 python/quadrants/lang/_template_mapper_hotpath.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/python/quadrants/lang/_template_mapper_hotpath.py b/python/quadrants/lang/_template_mapper_hotpath.py
index 20910c0d4e..6df1b54358 100644
--- a/python/quadrants/lang/_template_mapper_hotpath.py
+++ b/python/quadrants/lang/_template_mapper_hotpath.py
@@ -86,7 +86,14 @@ def _build_struct_nd_paths(obj: Any, prefix: tuple, out: list) -> None:
     if dataclasses.is_dataclass(obj) and not isinstance(obj, type):
         children = ((f.name, getattr(obj, f.name)) for f in dataclasses.fields(obj))
     else:
-        children = obj.__dict__.items()
+        # ``NamedTuple`` (decorated as ``@qd.data_oriented``) has no instance ``__dict__`` — fall back to ``_asdict()``
+        # which materialises a dict view of the named fields. Mirrors the same fallback in
+        # ``args_hasher.stringify_obj_type`` so the per-class path cache here picks up ndarray members on NamedTuples
+        # too (regression covered by ``test_args_hasher_named_tuple``).
+        try:
+            children = obj._asdict().items()
+        except AttributeError:
+            children = obj.__dict__.items()
     for k, v in children:
         chain = prefix + (k,)
         if type(v) in _TENSOR_WRAPPER_TYPES:

From 3aa4fe103c8fb91f3e76fdc9794f61285858a2b5 Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Mon, 18 May 2026 06:26:12 -0400
Subject: [PATCH 65/66] [Fix] test_ad_dataclass: require data64 extension for
 f64 tests

Two tests using default_fp=qd.f64 were missing the data64 extension
requirement, causing SPIR-V crashes on Vulkan/Metal backends.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 tests/python/test_ad_dataclass.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/python/test_ad_dataclass.py b/tests/python/test_ad_dataclass.py
index dfadb24a9b..87c1f177f5 100644
--- a/tests/python/test_ad_dataclass.py
+++ b/tests/python/test_ad_dataclass.py
@@ -106,7 +106,7 @@ def compute(s: qd.template()):
 # ----------------------------------------------------------------------------
 
 
-@test_utils.test(default_fp=qd.f64, require=qd.extension.adstack)
+@test_utils.test(default_fp=qd.f64, require=[qd.extension.adstack, qd.extension.data64])
 def test_ad_dataclass_field_template_tape():
     """dataclass holding qd.fields, passed via qd.template(), gradient via qd.ad.Tape."""
     N = 5
@@ -179,7 +179,7 @@ def compute(s: State):
     np.testing.assert_allclose(a.grad.to_numpy(), b.to_numpy())
 
 
-@test_utils.test(default_fp=qd.f64, require=qd.extension.adstack)
+@test_utils.test(default_fp=qd.f64, require=[qd.extension.adstack, qd.extension.data64])
 def test_ad_dataclass_tensor_field_backend_tape():
     """dataclass holding qd.tensor(..., backend=FIELD) members; field-AD via qd.ad.Tape.
 

From 89bb005e5bbb952cf2af881b5244d05e0eebba0d Mon Sep 17 00:00:00 2001
From: Hugh Perkins <hughperkins@gmail.com>
Date: Mon, 18 May 2026 09:37:31 -0700
Subject: [PATCH 66/66] [Style] test docstrings: reflow at 120c per repo
 line-width

Three module/function docstring paragraphs in the new test files were wrapped near 80-100c,
flagged by the AI-based wrap checker. Reflow at 120c to match the repo convention.
---
 tests/python/test_ad_dataclass.py          | 13 ++++++-------
 tests/python/test_data_oriented_ndarray.py |  3 +--
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/tests/python/test_ad_dataclass.py b/tests/python/test_ad_dataclass.py
index 87c1f177f5..d82b1523f9 100644
--- a/tests/python/test_ad_dataclass.py
+++ b/tests/python/test_ad_dataclass.py
@@ -1,7 +1,7 @@
 """Differentiation through ``dataclasses.dataclass`` containers.
 
-These tests pin that gradients flow correctly when kernel arguments are wrapped in plain Python
-dataclasses, across the tensor types Quadrants exposes:
+These tests pin that gradients flow correctly when kernel arguments are wrapped in plain Python dataclasses, across the
+tensor types Quadrants exposes:
 
 * ``qd.ndarray`` — typed-dataclass annotation + ``qd.template()`` path; gradient via ``kernel.grad()``.
 * ``qd.field`` — ``qd.template()`` path; gradient via ``qd.ad.Tape``.
@@ -183,11 +183,10 @@ def compute(s: State):
 def test_ad_dataclass_tensor_field_backend_tape():
     """dataclass holding qd.tensor(..., backend=FIELD) members; field-AD via qd.ad.Tape.
 
-    Note: members must be annotated as ``qd.Tensor`` (not ``object``) when the value is a
-    ``qd.tensor(...)`` wrapper. The typed-dataclass / template machinery uses the member
-    annotation to decide whether to unwrap the wrapper into its underlying impl before the
-    kernel sees ``s.x[i]``. With ``object`` annotation the wrapper survives into kernel scope
-    and its host-side ``__getitem__`` asserts.
+    Note: members must be annotated as ``qd.Tensor`` (not ``object``) when the value is a ``qd.tensor(...)`` wrapper.
+    The typed-dataclass / template machinery uses the member annotation to decide whether to unwrap the wrapper into
+    its underlying impl before the kernel sees ``s.x[i]``. With ``object`` annotation the wrapper survives into kernel
+    scope and its host-side ``__getitem__`` asserts.
     """
     N = 5
 
diff --git a/tests/python/test_data_oriented_ndarray.py b/tests/python/test_data_oriented_ndarray.py
index 083fd1a19d..df10c958f5 100644
--- a/tests/python/test_data_oriented_ndarray.py
+++ b/tests/python/test_data_oriented_ndarray.py
@@ -7,8 +7,7 @@
 *supported* route, and the ndarray-in-struct infrastructure added by ``#561 [Type] Tensor 24``
 (2026-04-28) — specifically ``_predeclare_struct_ndarrays`` in
 ``python/quadrants/lang/ast/ast_transformers/function_def_transformer.py`` — explicitly walks both
-``dataclasses.is_dataclass(val)`` and ``hasattr(val, "__dict__")`` containers, the latter being the
-data_oriented case.
+``dataclasses.is_dataclass(val)`` and ``hasattr(val, "__dict__")`` containers, the latter being the data_oriented case.
 
 This file pins what actually works, and documents the gaps. See
 ``perso_hugh/doc/data_oriented_ndarray.md`` for the design analysis.