From f32593d5dec6be19a338759f633bba9618fd0cf7 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Mon, 22 Jun 2026 22:26:39 +0200 Subject: [PATCH] refactor(reader): hoist Materialized* boilerplate into a shared base The eight buffer-backed Materialized*Array classes each repeated the same cold plumbing: the dtype/length/buffer triple, the trivial dtype()/length() getters, and the zero-copy materialize()/segmentIfPresent() contract. Extract it into a package-private AbstractMaterializedArray that each leaf now extends. The base deliberately does NOT implement Array (that interface is sealed to the typed element families) and does NOT hoist the hot getX/fold/forEach loops: those stay monomorphic and branch-split in the leaf classes so C2 keeps vectorising them (CLAUDE.md hot-loop rule). Only the cold boilerplate moves. Net -206 LOC, kills the SonarQube duplication smell across the family. Reader suite green (854); javadoc enforcement passes. Co-Authored-By: Claude Opus 4.8 --- .../array/AbstractMaterializedArray.java | 53 +++++++++++++++++++ .../reader/array/MaterializedBoolArray.java | 38 +------------ .../reader/array/MaterializedByteArray.java | 36 +------------ .../reader/array/MaterializedDoubleArray.java | 36 +------------ .../array/MaterializedFloat16Array.java | 37 +------------ .../reader/array/MaterializedFloatArray.java | 36 +------------ .../reader/array/MaterializedIntArray.java | 36 +------------ .../reader/array/MaterializedLongArray.java | 36 +------------ .../reader/array/MaterializedShortArray.java | 36 +------------ 9 files changed, 69 insertions(+), 275 deletions(-) create mode 100644 reader/src/main/java/io/github/dfa1/vortex/reader/array/AbstractMaterializedArray.java diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/AbstractMaterializedArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/AbstractMaterializedArray.java new file mode 100644 index 00000000..bc88d6aa --- /dev/null +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/AbstractMaterializedArray.java @@ -0,0 +1,53 @@ +package io.github.dfa1.vortex.reader.array; + +import io.github.dfa1.vortex.core.DType; + +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; +import java.util.Optional; + +/// Shared cold-path plumbing for the buffer-backed `Materialized*` arrays: the +/// `dtype` / `length` / `buffer` triple, the trivial accessors, and the zero-copy +/// `materialize` / `segmentIfPresent` contract every variant shares. +/// +/// Subclasses keep their own typed element access and the branch-split hot loops +/// (`getX` / `fold` / `forEach`): those must stay monomorphic in the leaf class so the +/// JIT can vectorise them, and so are deliberately not hoisted here. This base only +/// holds the cold boilerplate. +/// +/// Not `implements Array`: that interface is sealed to the typed element families +/// ([IntArray], [LongArray], …), which each leaf implements directly while inheriting +/// the common methods from here. +abstract class AbstractMaterializedArray { + + final DType dtype; + final long length; + final MemorySegment buffer; + + AbstractMaterializedArray(DType dtype, long length, MemorySegment buffer) { + this.dtype = dtype; + this.length = length; + this.buffer = buffer; + } + + public final DType dtype() { + return dtype; + } + + public final long length() { + return length; + } + + /// Returns the backing buffer directly — it is already the contiguous segment the + /// array's materialize contract promises, so no copy or allocation is needed. + /// + /// @param arena unused; the existing buffer is returned as-is + /// @return the backing segment + public final MemorySegment materialize(SegmentAllocator arena) { + return buffer; + } + + public final Optional segmentIfPresent() { + return Optional.of(buffer); + } +} diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedBoolArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedBoolArray.java index 409c9b92..675b7060 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedBoolArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedBoolArray.java @@ -4,17 +4,11 @@ import io.github.dfa1.vortex.core.DType; import java.lang.foreign.MemorySegment; -import java.lang.foreign.SegmentAllocator; import java.lang.foreign.ValueLayout; -import java.util.Optional; /// Buffer-backed [BoolArray] — the fallback used when an encoding decoder /// either materialises the output eagerly or has no lazy variant of its own. -public final class MaterializedBoolArray implements BoolArray { - - private final DType dtype; - private final long length; - private final MemorySegment buffer; +public final class MaterializedBoolArray extends AbstractMaterializedArray implements BoolArray { /// Constructs a `MaterializedBoolArray` backed by the given bit-packed buffer. /// @@ -22,35 +16,7 @@ public final class MaterializedBoolArray implements BoolArray { /// @param length number of logical boolean elements /// @param buffer bit-packed boolean data (LSB-first, one byte per 8 elements) public MaterializedBoolArray(DType dtype, long length, MemorySegment buffer) { - this.dtype = dtype; - this.length = length; - this.buffer = buffer; - } - - @Override - public DType dtype() { - return dtype; - } - - @Override - public long length() { - return length; - } - - /// Returns the backing buffer directly — already an LSB-first packed bitmap, - /// matching the format produced by [BoolArray#materialize(SegmentAllocator)], - /// so no copy or allocation is needed. - /// - /// @param arena unused; the existing buffer is returned as-is - /// @return the backing LSB-first packed bitmap - @Override - public MemorySegment materialize(SegmentAllocator arena) { - return buffer; - } - - @Override - public Optional segmentIfPresent() { - return Optional.of(buffer); + super(dtype, length, buffer); } @Override diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedByteArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedByteArray.java index 1483f81e..e175c4dc 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedByteArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedByteArray.java @@ -5,18 +5,13 @@ import io.github.dfa1.vortex.core.PType; import java.lang.foreign.MemorySegment; -import java.lang.foreign.SegmentAllocator; import java.lang.foreign.ValueLayout; -import java.util.Optional; import java.util.function.LongBinaryOperator; /// Buffer-backed [ByteArray] — the fallback used when an encoding decoder /// either materialises the output eagerly or has no lazy variant of its own. -public final class MaterializedByteArray implements ByteArray { +public final class MaterializedByteArray extends AbstractMaterializedArray implements ByteArray { - private final DType dtype; - private final long length; - private final MemorySegment buffer; private final long elementCount; /// Constructs a `MaterializedByteArray` backed by the given buffer. @@ -25,37 +20,10 @@ public final class MaterializedByteArray implements ByteArray { /// @param length number of logical elements /// @param buffer raw byte data (one byte per element) public MaterializedByteArray(DType dtype, long length, MemorySegment buffer) { - this.dtype = dtype; - this.length = length; - this.buffer = buffer; + super(dtype, length, buffer); this.elementCount = buffer.byteSize(); } - @Override - public DType dtype() { - return dtype; - } - - @Override - public long length() { - return length; - } - - /// Returns the backing buffer directly — already a contiguous one-byte-per-element - /// segment, so no copy or allocation is needed. - /// - /// @param arena unused; the existing buffer is returned as-is - /// @return the backing byte segment - @Override - public MemorySegment materialize(SegmentAllocator arena) { - return buffer; - } - - @Override - public Optional segmentIfPresent() { - return Optional.of(buffer); - } - @Override public byte getByte(long i) { return buffer.get(ValueLayout.JAVA_BYTE, length == elementCount ? i : i % elementCount); diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedDoubleArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedDoubleArray.java index 9341e707..62143715 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedDoubleArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedDoubleArray.java @@ -4,18 +4,13 @@ import io.github.dfa1.vortex.encoding.PTypeIO; import java.lang.foreign.MemorySegment; -import java.lang.foreign.SegmentAllocator; -import java.util.Optional; import java.util.function.DoubleBinaryOperator; import java.util.function.DoubleConsumer; /// Buffer-backed [DoubleArray] — the fallback used when an encoding decoder /// either materialises the output eagerly or has no lazy variant of its own. -public final class MaterializedDoubleArray implements DoubleArray { +public final class MaterializedDoubleArray extends AbstractMaterializedArray implements DoubleArray { - private final DType dtype; - private final long length; - private final MemorySegment buffer; private final long elementCount; /// Constructs a `MaterializedDoubleArray` backed by the given buffer. @@ -24,37 +19,10 @@ public final class MaterializedDoubleArray implements DoubleArray { /// @param length number of logical elements /// @param buffer raw double data (8 bytes per element, little-endian) public MaterializedDoubleArray(DType dtype, long length, MemorySegment buffer) { - this.dtype = dtype; - this.length = length; - this.buffer = buffer; + super(dtype, length, buffer); this.elementCount = buffer.byteSize() / PTypeIO.LE_DOUBLE.byteSize(); } - @Override - public DType dtype() { - return dtype; - } - - @Override - public long length() { - return length; - } - - /// Returns the backing buffer directly — already a contiguous little-endian - /// `f64` segment, so no copy or allocation is needed. - /// - /// @param arena unused; the existing buffer is returned as-is - /// @return the backing little-endian `f64` segment - @Override - public MemorySegment materialize(SegmentAllocator arena) { - return buffer; - } - - @Override - public Optional segmentIfPresent() { - return Optional.of(buffer); - } - @Override public double getDouble(long i) { return buffer.getAtIndex(PTypeIO.LE_DOUBLE, length == elementCount ? i : i % elementCount); diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedFloat16Array.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedFloat16Array.java index e40c3690..6922fbc6 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedFloat16Array.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedFloat16Array.java @@ -5,16 +5,10 @@ import io.github.dfa1.vortex.encoding.PTypeIO; import java.lang.foreign.MemorySegment; -import java.lang.foreign.SegmentAllocator; -import java.util.Optional; /// Buffer-backed [Float16Array] — the fallback used when an encoding decoder /// either materialises the output eagerly or has no lazy variant of its own. -public final class MaterializedFloat16Array implements Float16Array { - - private final DType dtype; - private final long length; - private final MemorySegment buffer; +public final class MaterializedFloat16Array extends AbstractMaterializedArray implements Float16Array { /// Creates a new `MaterializedFloat16Array` backed by the given memory segment. /// @@ -22,34 +16,7 @@ public final class MaterializedFloat16Array implements Float16Array { /// @param length number of elements /// @param buffer little-endian half-precision float data (2 bytes per element) public MaterializedFloat16Array(DType dtype, long length, MemorySegment buffer) { - this.dtype = dtype; - this.length = length; - this.buffer = buffer; - } - - @Override - public DType dtype() { - return dtype; - } - - @Override - public long length() { - return length; - } - - /// Returns the backing buffer directly — already a contiguous little-endian - /// half-precision segment (2 bytes per element), so no copy or allocation is needed. - /// - /// @param arena unused; the existing buffer is returned as-is - /// @return the backing little-endian `f16` segment - @Override - public MemorySegment materialize(SegmentAllocator arena) { - return buffer; - } - - @Override - public Optional segmentIfPresent() { - return Optional.of(buffer); + super(dtype, length, buffer); } @Override diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedFloatArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedFloatArray.java index b3b6d63d..305ef987 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedFloatArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedFloatArray.java @@ -5,17 +5,12 @@ import io.github.dfa1.vortex.encoding.PTypeIO; import java.lang.foreign.MemorySegment; -import java.lang.foreign.SegmentAllocator; -import java.util.Optional; import java.util.function.DoubleBinaryOperator; /// Buffer-backed [FloatArray] — the fallback used when an encoding decoder /// either materialises the output eagerly or has no lazy variant of its own. -public final class MaterializedFloatArray implements FloatArray { +public final class MaterializedFloatArray extends AbstractMaterializedArray implements FloatArray { - private final DType dtype; - private final long length; - private final MemorySegment buffer; private final long elementCount; /// Creates a new `MaterializedFloatArray` backed by the given memory segment. @@ -24,37 +19,10 @@ public final class MaterializedFloatArray implements FloatArray { /// @param length number of elements /// @param buffer little-endian float data (4 bytes per element) public MaterializedFloatArray(DType dtype, long length, MemorySegment buffer) { - this.dtype = dtype; - this.length = length; - this.buffer = buffer; + super(dtype, length, buffer); this.elementCount = buffer.byteSize() / PTypeIO.LE_FLOAT.byteSize(); } - @Override - public DType dtype() { - return dtype; - } - - @Override - public long length() { - return length; - } - - /// Returns the backing buffer directly — already a contiguous little-endian - /// `f32` segment, so no copy or allocation is needed. - /// - /// @param arena unused; the existing buffer is returned as-is - /// @return the backing little-endian `f32` segment - @Override - public MemorySegment materialize(SegmentAllocator arena) { - return buffer; - } - - @Override - public Optional segmentIfPresent() { - return Optional.of(buffer); - } - @Override public float getFloat(long i) { return buffer.getAtIndex(PTypeIO.LE_FLOAT, length == elementCount ? i : i % elementCount); diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedIntArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedIntArray.java index 1786bc1a..24ae80f6 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedIntArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedIntArray.java @@ -5,18 +5,13 @@ import io.github.dfa1.vortex.encoding.PTypeIO; import java.lang.foreign.MemorySegment; -import java.lang.foreign.SegmentAllocator; -import java.util.Optional; import java.util.function.IntBinaryOperator; import java.util.function.IntConsumer; /// Buffer-backed [IntArray] — the fallback used when an encoding decoder /// either materialises the output eagerly or has no lazy variant of its own. -public final class MaterializedIntArray implements IntArray { +public final class MaterializedIntArray extends AbstractMaterializedArray implements IntArray { - private final DType dtype; - private final long length; - private final MemorySegment buffer; private final long elementCount; /// Creates a new `MaterializedIntArray` backed by the given memory segment. @@ -25,37 +20,10 @@ public final class MaterializedIntArray implements IntArray { /// @param length number of elements /// @param buffer little-endian int data (4 bytes per element) public MaterializedIntArray(DType dtype, long length, MemorySegment buffer) { - this.dtype = dtype; - this.length = length; - this.buffer = buffer; + super(dtype, length, buffer); this.elementCount = buffer.byteSize() / PTypeIO.LE_INT.byteSize(); } - @Override - public DType dtype() { - return dtype; - } - - @Override - public long length() { - return length; - } - - /// Returns the backing buffer directly — already a contiguous little-endian - /// `i32` segment, so no copy or allocation is needed. - /// - /// @param arena unused; the existing buffer is returned as-is - /// @return the backing little-endian `i32` segment - @Override - public MemorySegment materialize(SegmentAllocator arena) { - return buffer; - } - - @Override - public Optional segmentIfPresent() { - return Optional.of(buffer); - } - @Override public int getInt(long i) { return buffer.getAtIndex(PTypeIO.LE_INT, length == elementCount ? i : i % elementCount); diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedLongArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedLongArray.java index dbff416a..7e0cfc68 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedLongArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedLongArray.java @@ -5,18 +5,13 @@ import io.github.dfa1.vortex.encoding.PTypeIO; import java.lang.foreign.MemorySegment; -import java.lang.foreign.SegmentAllocator; -import java.util.Optional; import java.util.function.LongBinaryOperator; import java.util.function.LongConsumer; /// Buffer-backed [LongArray] — the fallback used when an encoding decoder /// either materialises the output eagerly or has no lazy variant of its own. -public final class MaterializedLongArray implements LongArray { +public final class MaterializedLongArray extends AbstractMaterializedArray implements LongArray { - private final DType dtype; - private final long length; - private final MemorySegment buffer; private final long elementCount; /// Creates a new `MaterializedLongArray` backed by the given memory segment. @@ -25,37 +20,10 @@ public final class MaterializedLongArray implements LongArray { /// @param length number of elements /// @param buffer little-endian long data (8 bytes per element) public MaterializedLongArray(DType dtype, long length, MemorySegment buffer) { - this.dtype = dtype; - this.length = length; - this.buffer = buffer; + super(dtype, length, buffer); this.elementCount = buffer.byteSize() / PTypeIO.LE_LONG.byteSize(); } - @Override - public DType dtype() { - return dtype; - } - - @Override - public long length() { - return length; - } - - /// Returns the backing buffer directly — already a contiguous little-endian - /// `i64` segment, so no copy or allocation is needed. - /// - /// @param arena unused; the existing buffer is returned as-is - /// @return the backing little-endian `i64` segment - @Override - public MemorySegment materialize(SegmentAllocator arena) { - return buffer; - } - - @Override - public Optional segmentIfPresent() { - return Optional.of(buffer); - } - @Override public long getLong(long i) { return buffer.getAtIndex(PTypeIO.LE_LONG, length == elementCount ? i : i % elementCount); diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedShortArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedShortArray.java index 250b0b32..eccc10bd 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedShortArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedShortArray.java @@ -6,17 +6,12 @@ import io.github.dfa1.vortex.encoding.PTypeIO; import java.lang.foreign.MemorySegment; -import java.lang.foreign.SegmentAllocator; -import java.util.Optional; import java.util.function.LongBinaryOperator; /// Buffer-backed [ShortArray] — the fallback used when an encoding decoder /// either materialises the output eagerly or has no lazy variant of its own. -public final class MaterializedShortArray implements ShortArray { +public final class MaterializedShortArray extends AbstractMaterializedArray implements ShortArray { - private final DType dtype; - private final long length; - private final MemorySegment buffer; private final long elementCount; /// Creates a new `MaterializedShortArray` backed by the given memory segment. @@ -25,37 +20,10 @@ public final class MaterializedShortArray implements ShortArray { /// @param length number of elements /// @param buffer little-endian short data (2 bytes per element) public MaterializedShortArray(DType dtype, long length, MemorySegment buffer) { - this.dtype = dtype; - this.length = length; - this.buffer = buffer; + super(dtype, length, buffer); this.elementCount = buffer.byteSize() / PTypeIO.LE_SHORT.byteSize(); } - @Override - public DType dtype() { - return dtype; - } - - @Override - public long length() { - return length; - } - - /// Returns the backing buffer directly — already a contiguous little-endian - /// `i16` segment, so no copy or allocation is needed. - /// - /// @param arena unused; the existing buffer is returned as-is - /// @return the backing little-endian `i16` segment - @Override - public MemorySegment materialize(SegmentAllocator arena) { - return buffer; - } - - @Override - public Optional segmentIfPresent() { - return Optional.of(buffer); - } - @Override public short getShort(long i) { return buffer.getAtIndex(PTypeIO.LE_SHORT, length == elementCount ? i : i % elementCount);