dfa1 · dfa1 · Jun 19, 2026 · Jun 19, 2026
diff --git a/docs/adr/0016-vortex-arrow-bridge.md b/docs/adr/0016-vortex-arrow-bridge.md
@@ -115,8 +115,7 @@ Three pieces of work beyond just handing over the existing mmap slices:
 2. **Lazy materialisation.** Lazy arrays (ZigZag/FoR/ALP/Dict/RLE) store the
    *encoded* form, which is not the Arrow values layout, so they must be materialised
    into a contiguous LE segment first. This is exactly the producer step that
-   `ArraySegments.of(...)` (or a future `Array.materialize(arena)` delegation seam,
-   see below) performs, so the internal materialise path feeds the `values` buffer
+   `Array.materialize(arena)` performs (see below), so it feeds the `values` buffer
    directly. Primitive values, VarBin data+offsets, and StringView are already
    Arrow-shaped (zero-copy).
 3. **Lifetime / release contract.** Buffers are zero-copy slices of the mmap'd file
@@ -127,20 +126,28 @@ Three pieces of work beyond just handing over the existing mmap slices:
    consumer calls `release` is a use-after-unmap → native segfault, not a Java
    exception. This is the highest-risk part.
 
-### Relationship to the internal materialise seam
-
-`ArraySegments.of(Array, SegmentAllocator)` already centralises "turn any array
-(lazy or eager) into a contiguous LE primitive segment", and currently re-states each
-encoding's decode formula (ZigZag/FoR/ALP) in a large switch separate from the
-per-element accessor on the lazy array. A standalone refactor — moving that bulk
-materialisation onto the array types as an `Array.materialize(SegmentAllocator)`
-delegation (mirroring the existing `Array.limited(...)` pattern, kept on a
-package-private seam to avoid widening the public API) — stands on its own as a
-locality cleanup. It is **not** an Arrow feature, but it is the natural producer of
-the Arrow `values` buffer, so Option B should build on it rather than duplicate it.
-The contiguous LE segment it yields already matches Arrow's primitive values-buffer
-layout; the gap to a full Arrow array is validity + offsets + children, per the table
-above.
+### Relationship to the `Array.materialize` seam (shipped)
+
+The bulk-materialisation seam Option B builds on now exists:
+`Array.materialize(SegmentAllocator)` — a pure abstract method (mirroring the existing
+`Array.limited(...)` polymorphism) that turns any array, lazy or eager, into a contiguous
+LE primitive segment. Each type owns its path: segment-backed arrays return their buffer
+zero-copy, the `Lazy*` variants apply their inlined decode formula (ZigZag/FoR/ALP) in a
+vectorisable loop next to their per-element accessor, chunked/dict arrays concat/gather,
+and the families with no primary segment (struct, list, variant, byte-parts decimal, null,
+unknown) throw.
+
+This is **not** an Arrow feature — but it is the natural producer of the Arrow `values`
+buffer, so Option B builds on it. The contiguous LE segment it yields already matches
+Arrow's primitive values-buffer layout. Two gaps remain to a full Arrow array, both per
+the table above: validity + offsets + children; and the broadcast edge — a constant column
+materialises to a single-element buffer (`length != elementCount`), which `materialize()`
+returns as-is, so the Arrow producer must expand it to `length` values.
+
+`materialize` is intentionally part of the public `Array` contract (not a package-private
+seam): it is the documented way to obtain a column's contiguous primitive buffer, and a
+future `vortex-arrow` module in a separate package consumes it without further API
+widening.
 
 ### Option C — No bridge; document manual conversion
 

diff --git a/...n/src/test/java/io/github/dfa1/vortex/integration/RustWritesJavaReadsIntegrationTest.java b/...n/src/test/java/io/github/dfa1/vortex/integration/RustWritesJavaReadsIntegrationTest.java
@@ -12,7 +12,6 @@
 import io.github.dfa1.vortex.core.DType;
 import io.github.dfa1.vortex.core.PType;
 import io.github.dfa1.vortex.reader.array.Array;
-import io.github.dfa1.vortex.reader.array.ArraySegments;
 import io.github.dfa1.vortex.reader.array.DoubleArray;
 import io.github.dfa1.vortex.reader.array.LongArray;
 import io.github.dfa1.vortex.reader.ReadRegistry;
@@ -130,7 +129,7 @@ private static List<JavaChunk> scanAll(VortexReader vf,
     /// into a heap primitive array — long[]/int[]/double[]/float[]/short[]/byte[].
     private static Object snapshotArray(Array arr) {
         var ptype = ((DType.Primitive) arr.dtype()).ptype();
-        var seg = ArraySegments.of(arr, Arena.ofAuto());
+        var seg = arr.materialize(Arena.ofAuto());
         return switch (ptype) {
             case I64, U64 -> seg.toArray(ValueLayout.JAVA_LONG_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN));
             case I32, U32 -> seg.toArray(ValueLayout.JAVA_INT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN));

diff --git a/.../src/main/java/io/github/dfa1/vortex/performance/RustWritesJavaReadsBigFileBenchmark.java b/.../src/main/java/io/github/dfa1/vortex/performance/RustWritesJavaReadsBigFileBenchmark.java
@@ -9,7 +9,6 @@
 import dev.vortex.arrow.ArrowAllocation;
 import dev.vortex.jni.NativeLoader;
 import io.github.dfa1.vortex.reader.array.Array;
-import io.github.dfa1.vortex.reader.array.ArraySegments;
 import io.github.dfa1.vortex.reader.ReadRegistry;
 import io.github.dfa1.vortex.reader.VortexReader;
 import io.github.dfa1.vortex.reader.Chunk;
@@ -181,7 +180,7 @@ private long scanJava() throws IOException {
             while (iter.hasNext()) {
                 try (Chunk c = iter.next()) {
                     Array arr = c.columns().get("c0");
-                    MemorySegment buf = ArraySegments.of(arr, Arena.ofAuto());
+                    MemorySegment buf = arr.materialize(Arena.ofAuto());
                     long count = buf.byteSize() / Long.BYTES;
                     for (long i = 0; i < count; i++) {
                         sum += buf.getAtIndex(LE_LONG, i);

diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/ReadRegistry.java b/reader/src/main/java/io/github/dfa1/vortex/reader/ReadRegistry.java
@@ -2,7 +2,6 @@
 
 import io.github.dfa1.vortex.core.VortexException;
 import io.github.dfa1.vortex.reader.array.Array;
-import io.github.dfa1.vortex.reader.array.ArraySegments;
 import io.github.dfa1.vortex.reader.array.UnknownArray;
 import io.github.dfa1.vortex.encoding.EncodingId;
 import io.github.dfa1.vortex.reader.decode.ArrayNode;
@@ -101,7 +100,7 @@ public MemorySegment decodeAsSegment(DecodeContext ctx) {
             case UnknownArrayNode _ -> null;
         };
         if (decoder != null) {
-            return ArraySegments.of(decoder.decode(ctx), ctx.arena());
+            return decoder.decode(ctx).materialize(ctx.arena());
         }
         String id = switch (node) {
             case KnownArrayNode k -> k.encodingId().id();

diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/ScanIterator.java b/reader/src/main/java/io/github/dfa1/vortex/reader/ScanIterator.java
@@ -603,7 +603,7 @@ private Array decodeDictLayout(Layout dictLayout, DType dtype, SegmentAllocator
             // than the claimed rowCount. Full-decode encodings (e.g. bitpacked) already
             // wrote n * elemBytes to the arena during decodeLayout above, so their buffer
             // matches n.
-            MemorySegment codesSeg = ArraySegments.of(codes, arena);
+            MemorySegment codesSeg = codes.materialize(arena);
             long bufferCodes = codesSeg.byteSize() / (long) codesPType.byteSize();
             if (bufferCodes < n) {
                 throw new VortexException(EncodingId.VORTEX_DICT,
@@ -624,7 +624,7 @@ private Array decodeDictLayout(Layout dictLayout, DType dtype, SegmentAllocator
         }
         // Non-Utf8, non-Primitive dict — e.g. extension types backed by VarBin. Fall through
         // to the existing string expansion for compatibility.
-        MemorySegment codesSegFallback = ArraySegments.of(codes, arena);
+        MemorySegment codesSegFallback = codes.materialize(arena);
         long bufferCodesFallback = codesSegFallback.byteSize() / (long) codesPType.byteSize();
         if (bufferCodesFallback < n) {
             throw new VortexException(EncodingId.VORTEX_DICT,
@@ -641,6 +641,9 @@ private Array decodeDictLayout(Layout dictLayout, DType dtype, SegmentAllocator
     /// @param codes      the decoded codes array
     /// @param codesPType code ptype reported by the dict layout metadata
     /// @param n          claimed dict row count
+    // ArraySegments is deprecated-for-removal; this guard is its only caller and moves to
+    // the decode-limits layer with it.
+    @SuppressWarnings("removal")
     private static void validateDictCodesCapacity(Array codes, PType codesPType, long n) {
         Optional<MemorySegment> maybeSeg = ArraySegments.trySegment(codes);
         if (maybeSeg.isEmpty()) {

diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/Array.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/Array.java
@@ -2,6 +2,9 @@
 
 import io.github.dfa1.vortex.core.DType;
 
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SegmentAllocator;
+
 /// Decoded columnar data. Concrete subtypes specialise element access for the JIT;
 /// each covers a specific dtype family.
 ///
@@ -41,6 +44,24 @@ public sealed interface Array
     /// @return an array of length `rows`
     Array limited(long rows);
 
+    /// Materialises this array into its primary backing [MemorySegment],
+    /// allocating from `arena` for lazy variants.
+    ///
+    /// Segment-backed arrays (the `Materialized*` records, `VarBinArray`,
+    /// `GenericArray`, `LazyDecimalArray`) return their existing buffer with no
+    /// copy. Lazy primitive arrays decode element-by-element, the `Lazy*`
+    /// frame-of-reference / zigzag / ALP variants apply their inlined formula in a
+    /// vectorisable loop, and composite arrays (chunked, dict) concatenate or gather
+    /// their children. This is the single materialisation contract behind
+    /// [io.github.dfa1.vortex.reader.decode.DecodeContext#materialize(Array)].
+    ///
+    /// Array families with no row-addressable primary segment (struct, list, variant,
+    /// the byte-parts decimal layout) throw [io.github.dfa1.vortex.core.VortexException].
+    ///
+    /// @param arena allocator used to materialise lazy variants
+    /// @return the primary [MemorySegment]
+    MemorySegment materialize(SegmentAllocator arena);
+
     /// Limits `arr` to its first `rows` elements (semantically `min(length, rows)`),
     /// returning it unchanged when it already fits. Single guard shared by the scan
     /// layer and the composite subtypes that recurse into children, so the