diff --git a/docs/adr/0016-vortex-arrow-bridge.md b/docs/adr/0016-vortex-arrow-bridge.md index 6f1f8558..3cf9af38 100644 --- a/docs/adr/0016-vortex-arrow-bridge.md +++ b/docs/adr/0016-vortex-arrow-bridge.md @@ -115,8 +115,7 @@ Three pieces of work beyond just handing over the existing mmap slices: 2. **Lazy materialisation.** Lazy arrays (ZigZag/FoR/ALP/Dict/RLE) store the *encoded* form, which is not the Arrow values layout, so they must be materialised into a contiguous LE segment first. This is exactly the producer step that - `ArraySegments.of(...)` (or a future `Array.materialize(arena)` delegation seam, - see below) performs, so the internal materialise path feeds the `values` buffer + `Array.materialize(arena)` performs (see below), so it feeds the `values` buffer directly. Primitive values, VarBin data+offsets, and StringView are already Arrow-shaped (zero-copy). 3. **Lifetime / release contract.** Buffers are zero-copy slices of the mmap'd file @@ -127,20 +126,28 @@ Three pieces of work beyond just handing over the existing mmap slices: consumer calls `release` is a use-after-unmap → native segfault, not a Java exception. This is the highest-risk part. -### Relationship to the internal materialise seam - -`ArraySegments.of(Array, SegmentAllocator)` already centralises "turn any array -(lazy or eager) into a contiguous LE primitive segment", and currently re-states each -encoding's decode formula (ZigZag/FoR/ALP) in a large switch separate from the -per-element accessor on the lazy array. A standalone refactor — moving that bulk -materialisation onto the array types as an `Array.materialize(SegmentAllocator)` -delegation (mirroring the existing `Array.limited(...)` pattern, kept on a -package-private seam to avoid widening the public API) — stands on its own as a -locality cleanup. It is **not** an Arrow feature, but it is the natural producer of -the Arrow `values` buffer, so Option B should build on it rather than duplicate it. -The contiguous LE segment it yields already matches Arrow's primitive values-buffer -layout; the gap to a full Arrow array is validity + offsets + children, per the table -above. +### Relationship to the `Array.materialize` seam (shipped) + +The bulk-materialisation seam Option B builds on now exists: +`Array.materialize(SegmentAllocator)` — a pure abstract method (mirroring the existing +`Array.limited(...)` polymorphism) that turns any array, lazy or eager, into a contiguous +LE primitive segment. Each type owns its path: segment-backed arrays return their buffer +zero-copy, the `Lazy*` variants apply their inlined decode formula (ZigZag/FoR/ALP) in a +vectorisable loop next to their per-element accessor, chunked/dict arrays concat/gather, +and the families with no primary segment (struct, list, variant, byte-parts decimal, null, +unknown) throw. + +This is **not** an Arrow feature — but it is the natural producer of the Arrow `values` +buffer, so Option B builds on it. The contiguous LE segment it yields already matches +Arrow's primitive values-buffer layout. Two gaps remain to a full Arrow array, both per +the table above: validity + offsets + children; and the broadcast edge — a constant column +materialises to a single-element buffer (`length != elementCount`), which `materialize()` +returns as-is, so the Arrow producer must expand it to `length` values. + +`materialize` is intentionally part of the public `Array` contract (not a package-private +seam): it is the documented way to obtain a column's contiguous primitive buffer, and a +future `vortex-arrow` module in a separate package consumes it without further API +widening. ### Option C — No bridge; document manual conversion diff --git a/integration/src/test/java/io/github/dfa1/vortex/integration/RustWritesJavaReadsIntegrationTest.java b/integration/src/test/java/io/github/dfa1/vortex/integration/RustWritesJavaReadsIntegrationTest.java index a34851a1..7c028129 100644 --- a/integration/src/test/java/io/github/dfa1/vortex/integration/RustWritesJavaReadsIntegrationTest.java +++ b/integration/src/test/java/io/github/dfa1/vortex/integration/RustWritesJavaReadsIntegrationTest.java @@ -12,7 +12,6 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.PType; import io.github.dfa1.vortex.reader.array.Array; -import io.github.dfa1.vortex.reader.array.ArraySegments; import io.github.dfa1.vortex.reader.array.DoubleArray; import io.github.dfa1.vortex.reader.array.LongArray; import io.github.dfa1.vortex.reader.ReadRegistry; @@ -130,7 +129,7 @@ private static List scanAll(VortexReader vf, /// into a heap primitive array — long[]/int[]/double[]/float[]/short[]/byte[]. private static Object snapshotArray(Array arr) { var ptype = ((DType.Primitive) arr.dtype()).ptype(); - var seg = ArraySegments.of(arr, Arena.ofAuto()); + var seg = arr.materialize(Arena.ofAuto()); return switch (ptype) { case I64, U64 -> seg.toArray(ValueLayout.JAVA_LONG_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN)); case I32, U32 -> seg.toArray(ValueLayout.JAVA_INT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN)); diff --git a/performance/src/main/java/io/github/dfa1/vortex/performance/RustWritesJavaReadsBigFileBenchmark.java b/performance/src/main/java/io/github/dfa1/vortex/performance/RustWritesJavaReadsBigFileBenchmark.java index c998e657..a483cbf3 100644 --- a/performance/src/main/java/io/github/dfa1/vortex/performance/RustWritesJavaReadsBigFileBenchmark.java +++ b/performance/src/main/java/io/github/dfa1/vortex/performance/RustWritesJavaReadsBigFileBenchmark.java @@ -9,7 +9,6 @@ import dev.vortex.arrow.ArrowAllocation; import dev.vortex.jni.NativeLoader; import io.github.dfa1.vortex.reader.array.Array; -import io.github.dfa1.vortex.reader.array.ArraySegments; import io.github.dfa1.vortex.reader.ReadRegistry; import io.github.dfa1.vortex.reader.VortexReader; import io.github.dfa1.vortex.reader.Chunk; @@ -181,7 +180,7 @@ private long scanJava() throws IOException { while (iter.hasNext()) { try (Chunk c = iter.next()) { Array arr = c.columns().get("c0"); - MemorySegment buf = ArraySegments.of(arr, Arena.ofAuto()); + MemorySegment buf = arr.materialize(Arena.ofAuto()); long count = buf.byteSize() / Long.BYTES; for (long i = 0; i < count; i++) { sum += buf.getAtIndex(LE_LONG, i); diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/ReadRegistry.java b/reader/src/main/java/io/github/dfa1/vortex/reader/ReadRegistry.java index 02ff6f14..62cae7c1 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/ReadRegistry.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/ReadRegistry.java @@ -2,7 +2,6 @@ import io.github.dfa1.vortex.core.VortexException; import io.github.dfa1.vortex.reader.array.Array; -import io.github.dfa1.vortex.reader.array.ArraySegments; import io.github.dfa1.vortex.reader.array.UnknownArray; import io.github.dfa1.vortex.encoding.EncodingId; import io.github.dfa1.vortex.reader.decode.ArrayNode; @@ -101,7 +100,7 @@ public MemorySegment decodeAsSegment(DecodeContext ctx) { case UnknownArrayNode _ -> null; }; if (decoder != null) { - return ArraySegments.of(decoder.decode(ctx), ctx.arena()); + return decoder.decode(ctx).materialize(ctx.arena()); } String id = switch (node) { case KnownArrayNode k -> k.encodingId().id(); diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/ScanIterator.java b/reader/src/main/java/io/github/dfa1/vortex/reader/ScanIterator.java index c0195029..69986af8 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/ScanIterator.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/ScanIterator.java @@ -603,7 +603,7 @@ private Array decodeDictLayout(Layout dictLayout, DType dtype, SegmentAllocator // than the claimed rowCount. Full-decode encodings (e.g. bitpacked) already // wrote n * elemBytes to the arena during decodeLayout above, so their buffer // matches n. - MemorySegment codesSeg = ArraySegments.of(codes, arena); + MemorySegment codesSeg = codes.materialize(arena); long bufferCodes = codesSeg.byteSize() / (long) codesPType.byteSize(); if (bufferCodes < n) { throw new VortexException(EncodingId.VORTEX_DICT, @@ -624,7 +624,7 @@ private Array decodeDictLayout(Layout dictLayout, DType dtype, SegmentAllocator } // Non-Utf8, non-Primitive dict — e.g. extension types backed by VarBin. Fall through // to the existing string expansion for compatibility. - MemorySegment codesSegFallback = ArraySegments.of(codes, arena); + MemorySegment codesSegFallback = codes.materialize(arena); long bufferCodesFallback = codesSegFallback.byteSize() / (long) codesPType.byteSize(); if (bufferCodesFallback < n) { throw new VortexException(EncodingId.VORTEX_DICT, @@ -641,6 +641,9 @@ private Array decodeDictLayout(Layout dictLayout, DType dtype, SegmentAllocator /// @param codes the decoded codes array /// @param codesPType code ptype reported by the dict layout metadata /// @param n claimed dict row count + // ArraySegments is deprecated-for-removal; this guard is its only caller and moves to + // the decode-limits layer with it. + @SuppressWarnings("removal") private static void validateDictCodesCapacity(Array codes, PType codesPType, long n) { Optional maybeSeg = ArraySegments.trySegment(codes); if (maybeSeg.isEmpty()) { diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/Array.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/Array.java index d67554be..2b8b6622 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/Array.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/Array.java @@ -2,6 +2,9 @@ import io.github.dfa1.vortex.core.DType; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; + /// Decoded columnar data. Concrete subtypes specialise element access for the JIT; /// each covers a specific dtype family. /// @@ -41,6 +44,24 @@ public sealed interface Array /// @return an array of length `rows` Array limited(long rows); + /// Materialises this array into its primary backing [MemorySegment], + /// allocating from `arena` for lazy variants. + /// + /// Segment-backed arrays (the `Materialized*` records, `VarBinArray`, + /// `GenericArray`, `LazyDecimalArray`) return their existing buffer with no + /// copy. Lazy primitive arrays decode element-by-element, the `Lazy*` + /// frame-of-reference / zigzag / ALP variants apply their inlined formula in a + /// vectorisable loop, and composite arrays (chunked, dict) concatenate or gather + /// their children. This is the single materialisation contract behind + /// [io.github.dfa1.vortex.reader.decode.DecodeContext#materialize(Array)]. + /// + /// Array families with no row-addressable primary segment (struct, list, variant, + /// the byte-parts decimal layout) throw [io.github.dfa1.vortex.core.VortexException]. + /// + /// @param arena allocator used to materialise lazy variants + /// @return the primary [MemorySegment] + MemorySegment materialize(SegmentAllocator arena); + /// Limits `arr` to its first `rows` elements (semantically `min(length, rows)`), /// returning it unchanged when it already fits. Single guard shared by the scan /// layer and the composite subtypes that recurse into children, so the diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/ArraySegments.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/ArraySegments.java index a30ee36d..cfd8b91d 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/ArraySegments.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/ArraySegments.java @@ -1,27 +1,30 @@ package io.github.dfa1.vortex.reader.array; -import io.github.dfa1.vortex.core.VortexException; -import io.github.dfa1.vortex.encoding.PTypeIO; - import java.lang.foreign.MemorySegment; -import java.lang.foreign.SegmentAllocator; import java.util.Optional; -/// Internal materialization engine: turns any [Array] into its primary -/// [MemorySegment], allocating from a caller-supplied arena for lazy variants. +/// Internal non-allocating probe for an [Array]'s primary [MemorySegment]. /// -/// If `arr` is a [MaskedArray], the inner (data) segment is returned; -/// the validity mask is not surfaced here — callers that need validity must unwrap manually. +/// Unwraps a [MaskedArray] to its inner (data) array first; the validity mask is +/// not surfaced here — callers that need validity must read it from the +/// [MaskedArray] separately. To force a segment (materialising lazy variants), +/// call [Array#materialize(java.lang.foreign.SegmentAllocator)] directly. /// /// **Vortex-internal — not public API.** This class is `public` only because the reader, /// writer, and encoding implementations live in separate Maven modules and need cross-package -/// access; its signatures may change without a deprecation cycle. Encoding decoders should not -/// call it directly — use [io.github.dfa1.vortex.reader.decode.DecodeContext#materialize(Array)] -/// (which routes here). It backs that seam plus -/// [io.github.dfa1.vortex.reader.ReadRegistry#decodeAsSegment] and the scan layer's dictionary -/// validation. Application code should prefer the typed accessors on concrete subtypes — +/// access; its signatures may change without a deprecation cycle. It backs the scan layer's +/// dictionary zip-bomb validation, which needs to inspect a backing buffer only when one +/// already exists. Application code should prefer the typed accessors on concrete subtypes — /// [LongArray#getLong(long)], [IntArray#getInt(long)], /// [DoubleArray#getDouble(long)], and friends. +/// +/// @deprecated transitional — this class survives only as the home of +/// [#trySegment(Array)], the non-allocating probe behind the dictionary +/// zip-bomb guard in [io.github.dfa1.vortex.reader.ScanIterator]. Once the +/// decode-limits layer owns that bound, this class is removed; do not add +/// new callers. Use [Array#materialize(java.lang.foreign.SegmentAllocator)] +/// to obtain a column's segment. +@Deprecated(forRemoval = true) public final class ArraySegments { private ArraySegments() { @@ -31,7 +34,7 @@ private ArraySegments() { /// /// Non-throwing probe for callers that want to operate on the raw buffer only when one /// exists (e.g. zone-map / capacity validation) and skip lazy variants without allocating. - /// To force a segment for a lazy array, use [#of(Array, SegmentAllocator)]. + /// To force a segment for a lazy array, use [Array#materialize(java.lang.foreign.SegmentAllocator)]. /// /// @param arr the array whose segment is needed /// @return the primary [MemorySegment], or empty if `arr` has no segment backing @@ -52,404 +55,4 @@ public static Optional trySegment(Array arr) { default -> Optional.empty(); }; } - - private static MemorySegment primarySegment(Array arr) { - return trySegment(arr).orElseThrow(() -> { - Array data = arr instanceof MaskedArray m ? m.inner() : arr; - return new VortexException(data.getClass().getSimpleName() + " has no primary segment — use of(arr, arena)"); - }); - } - - /// Returns the primary backing segment of `arr`, materialising lazy variants into a - /// fresh segment allocated from `arena`. - /// - /// Use this overload when the caller already holds a chunk-scoped allocator (e.g. - /// [io.github.dfa1.vortex.reader.ReadRegistry#decodeAsSegment]) so lazy array types - /// do not need to carry the arena as a record component. - /// - /// @param arr the array whose segment is needed - /// @param arena allocator used to materialise lazy variants - /// @return the primary [MemorySegment] - /// @throws VortexException if the array type has no primary segment - public static MemorySegment of(Array arr, SegmentAllocator arena) { - Array data = arr instanceof MaskedArray m ? m.inner() : arr; - return switch (data) { - case LazyAlpDoubleArray a -> materialise(a, arena); - case LazyAlpFloatArray a -> materialise(a, arena); - case LazyForLongArray a -> materialise(a, arena); - case LazyForIntArray a -> materialise(a, arena); - case LazyZigZagLongArray a -> materialise(a, arena); - case LazyZigZagIntArray a -> materialise(a, arena); - case ChunkedLongArray a -> materialiseChunkedLong(a, arena); - case ChunkedIntArray a -> materialiseChunkedInt(a, arena); - case ChunkedDoubleArray a -> materialiseChunkedDouble(a, arena); - case ChunkedFloatArray a -> materialiseChunkedFloat(a, arena); - case ChunkedShortArray a -> materialiseChunkedShort(a, arena); - case ChunkedByteArray a -> materialiseChunkedByte(a, arena); - case DictLongArray a -> materialiseDictLong(a, arena); - case DictIntArray a -> materialiseDictInt(a, arena); - case DictDoubleArray a -> materialiseDictDouble(a, arena); - case DictFloatArray a -> materialiseDictFloat(a, arena); - // Generic typed-accessor fallback: any LongArray/IntArray/.../etc. that - // is not segment-backed (e.g. LazyRle*, LazyRunEnd*, LazySparse*, LazyAlpRd*) - // can be materialised via its public typed accessor without a special case. - case LongArray a -> materialiseLong(a, arena); - case IntArray a -> materialiseInt(a, arena); - case DoubleArray a -> materialiseDouble(a, arena); - case FloatArray a -> materialiseFloat(a, arena); - case ShortArray a -> materialiseShort(a, arena); - case ByteArray a -> materialiseByte(a, arena); - case LazyConstantDecimalArray a -> materialiseConstantDecimal(a, arena); - case DecimalArray _ -> primarySegment(arr); - default -> primarySegment(arr); - }; - } - - private static MemorySegment materialiseLong(LongArray a, SegmentAllocator arena) { - long n = a.length(); - MemorySegment dst = arena.allocate(n * 8L, 8); - for (long i = 0; i < n; i++) { - dst.setAtIndex(PTypeIO.LE_LONG, i, a.getLong(i)); - } - return dst; - } - - private static MemorySegment materialiseInt(IntArray a, SegmentAllocator arena) { - long n = a.length(); - MemorySegment dst = arena.allocate(n * 4L, 4); - for (long i = 0; i < n; i++) { - dst.setAtIndex(PTypeIO.LE_INT, i, a.getInt(i)); - } - return dst; - } - - private static MemorySegment materialiseDouble(DoubleArray a, SegmentAllocator arena) { - long n = a.length(); - MemorySegment dst = arena.allocate(n * 8L, 8); - for (long i = 0; i < n; i++) { - dst.setAtIndex(PTypeIO.LE_DOUBLE, i, a.getDouble(i)); - } - return dst; - } - - private static MemorySegment materialiseFloat(FloatArray a, SegmentAllocator arena) { - long n = a.length(); - MemorySegment dst = arena.allocate(n * 4L, 4); - for (long i = 0; i < n; i++) { - dst.setAtIndex(PTypeIO.LE_FLOAT, i, a.getFloat(i)); - } - return dst; - } - - private static MemorySegment materialiseShort(ShortArray a, SegmentAllocator arena) { - long n = a.length(); - MemorySegment dst = arena.allocate(n * 2L, 2); - for (long i = 0; i < n; i++) { - dst.setAtIndex(PTypeIO.LE_SHORT, i, a.getShort(i)); - } - return dst; - } - - private static MemorySegment materialiseByte(ByteArray a, SegmentAllocator arena) { - long n = a.length(); - MemorySegment dst = arena.allocate(n); - for (long i = 0; i < n; i++) { - dst.set(java.lang.foreign.ValueLayout.JAVA_BYTE, i, a.getByte(i)); - } - return dst; - } - - private static MemorySegment materialiseChunkedLong(ChunkedLongArray a, SegmentAllocator arena) { - long n = a.length(); - MemorySegment dst = arena.allocate(n * 8L, 8); - long byteOffset = 0; - for (LongArray child : a.children()) { - MemorySegment src = of((Array) child, arena); - long bytes = child.length() * 8L; - MemorySegment.copy(src, 0, dst, byteOffset, bytes); - byteOffset += bytes; - } - return dst.asReadOnly(); - } - - private static MemorySegment materialiseChunkedInt(ChunkedIntArray a, SegmentAllocator arena) { - long n = a.length(); - MemorySegment dst = arena.allocate(n * 4L, 4); - long byteOffset = 0; - for (IntArray child : a.children()) { - MemorySegment src = of((Array) child, arena); - long bytes = child.length() * 4L; - MemorySegment.copy(src, 0, dst, byteOffset, bytes); - byteOffset += bytes; - } - return dst.asReadOnly(); - } - - private static MemorySegment materialiseChunkedDouble(ChunkedDoubleArray a, SegmentAllocator arena) { - long n = a.length(); - MemorySegment dst = arena.allocate(n * 8L, 8); - long byteOffset = 0; - for (DoubleArray child : a.children()) { - MemorySegment src = of((Array) child, arena); - long bytes = child.length() * 8L; - MemorySegment.copy(src, 0, dst, byteOffset, bytes); - byteOffset += bytes; - } - return dst.asReadOnly(); - } - - private static MemorySegment materialiseChunkedFloat(ChunkedFloatArray a, SegmentAllocator arena) { - long n = a.length(); - MemorySegment dst = arena.allocate(n * 4L, 4); - long byteOffset = 0; - for (FloatArray child : a.children()) { - MemorySegment src = of((Array) child, arena); - long bytes = child.length() * 4L; - MemorySegment.copy(src, 0, dst, byteOffset, bytes); - byteOffset += bytes; - } - return dst.asReadOnly(); - } - - private static MemorySegment materialise(LazyAlpDoubleArray a, SegmentAllocator arena) { - long n = a.length(); - MemorySegment dst = arena.allocate(n * 8L, 8); - double df = a.factorF(); - double de = a.factorE(); - MemorySegment src = a.encoded(); - for (long i = 0; i < n; i++) { - dst.setAtIndex(PTypeIO.LE_DOUBLE, i, (double) src.getAtIndex(PTypeIO.LE_LONG, i) * df * de); - } - return dst; - } - - private static MemorySegment materialise(LazyAlpFloatArray a, SegmentAllocator arena) { - long n = a.length(); - MemorySegment dst = arena.allocate(n * 4L, 4); - float df = a.factorF(); - float de = a.factorE(); - MemorySegment src = a.encoded(); - for (long i = 0; i < n; i++) { - dst.setAtIndex(PTypeIO.LE_FLOAT, i, (float) src.getAtIndex(PTypeIO.LE_INT, i) * df * de); - } - return dst; - } - - private static MemorySegment materialise(LazyForLongArray a, SegmentAllocator arena) { - long n = a.length(); - MemorySegment dst = arena.allocate(n * 8L, 8); - long ref = a.ref(); - MemorySegment src = a.encoded(); - for (long i = 0; i < n; i++) { - dst.setAtIndex(PTypeIO.LE_LONG, i, src.getAtIndex(PTypeIO.LE_LONG, i) + ref); - } - return dst; - } - - private static MemorySegment materialise(LazyForIntArray a, SegmentAllocator arena) { - long n = a.length(); - MemorySegment dst = arena.allocate(n * 4L, 4); - int ref = a.ref(); - MemorySegment src = a.encoded(); - for (long i = 0; i < n; i++) { - dst.setAtIndex(PTypeIO.LE_INT, i, src.getAtIndex(PTypeIO.LE_INT, i) + ref); - } - return dst; - } - - private static MemorySegment materialise(LazyZigZagLongArray a, SegmentAllocator arena) { - long n = a.length(); - MemorySegment dst = arena.allocate(n * 8L, 8); - MemorySegment src = a.encoded(); - for (long i = 0; i < n; i++) { - long u = src.getAtIndex(PTypeIO.LE_LONG, i); - dst.setAtIndex(PTypeIO.LE_LONG, i, (u >>> 1) ^ -(u & 1L)); - } - return dst; - } - - private static MemorySegment materialise(LazyZigZagIntArray a, SegmentAllocator arena) { - long n = a.length(); - MemorySegment dst = arena.allocate(n * 4L, 4); - MemorySegment src = a.encoded(); - for (long i = 0; i < n; i++) { - int u = src.getAtIndex(PTypeIO.LE_INT, i); - dst.setAtIndex(PTypeIO.LE_INT, i, (u >>> 1) ^ -(u & 1)); - } - return dst; - } - - private static MemorySegment materialiseChunkedShort(ChunkedShortArray a, SegmentAllocator arena) { - long n = a.length(); - MemorySegment dst = arena.allocate(n * 2L, 2); - long byteOffset = 0; - for (ShortArray child : a.children()) { - MemorySegment src = of((Array) child, arena); - long bytes = child.length() * 2L; - MemorySegment.copy(src, 0, dst, byteOffset, bytes); - byteOffset += bytes; - } - return dst.asReadOnly(); - } - - private static MemorySegment materialiseChunkedByte(ChunkedByteArray a, SegmentAllocator arena) { - long n = a.length(); - MemorySegment dst = arena.allocate(n); - long byteOffset = 0; - for (ByteArray child : a.children()) { - MemorySegment src = of((Array) child, arena); - long bytes = child.length(); - MemorySegment.copy(src, 0, dst, byteOffset, bytes); - byteOffset += bytes; - } - return dst.asReadOnly(); - } - - private static MemorySegment materialiseDictLong(DictLongArray a, SegmentAllocator arena) { - long n = a.length(); - MemorySegment dst = arena.allocate(n * 8L, 8); - LongArray vals = a.values(); - Array codes = a.codes(); - switch (codes) { - case ByteArray ba -> { - for (long i = 0; i < n; i++) { - dst.setAtIndex(PTypeIO.LE_LONG, i, vals.getLong(Byte.toUnsignedLong(ba.getByte(i)))); - } - } - case ShortArray sa -> { - for (long i = 0; i < n; i++) { - dst.setAtIndex(PTypeIO.LE_LONG, i, vals.getLong(Short.toUnsignedLong(sa.getShort(i)))); - } - } - case IntArray ia -> { - for (long i = 0; i < n; i++) { - dst.setAtIndex(PTypeIO.LE_LONG, i, vals.getLong(Integer.toUnsignedLong(ia.getInt(i)))); - } - } - case LongArray la -> { - for (long i = 0; i < n; i++) { - dst.setAtIndex(PTypeIO.LE_LONG, i, vals.getLong(la.getLong(i))); - } - } - default -> throw new VortexException("DictLongArray: invalid codes type: " - + codes.getClass().getSimpleName()); - } - return dst.asReadOnly(); - } - - private static MemorySegment materialiseDictInt(DictIntArray a, SegmentAllocator arena) { - long n = a.length(); - MemorySegment dst = arena.allocate(n * 4L, 4); - IntArray vals = a.values(); - Array codes = a.codes(); - switch (codes) { - case ByteArray ba -> { - for (long i = 0; i < n; i++) { - dst.setAtIndex(PTypeIO.LE_INT, i, vals.getInt(Byte.toUnsignedLong(ba.getByte(i)))); - } - } - case ShortArray sa -> { - for (long i = 0; i < n; i++) { - dst.setAtIndex(PTypeIO.LE_INT, i, vals.getInt(Short.toUnsignedLong(sa.getShort(i)))); - } - } - case IntArray ia -> { - for (long i = 0; i < n; i++) { - dst.setAtIndex(PTypeIO.LE_INT, i, vals.getInt(Integer.toUnsignedLong(ia.getInt(i)))); - } - } - case LongArray la -> { - for (long i = 0; i < n; i++) { - dst.setAtIndex(PTypeIO.LE_INT, i, vals.getInt(la.getLong(i))); - } - } - default -> throw new VortexException("DictIntArray: invalid codes type: " - + codes.getClass().getSimpleName()); - } - return dst.asReadOnly(); - } - - private static MemorySegment materialiseDictDouble(DictDoubleArray a, SegmentAllocator arena) { - long n = a.length(); - MemorySegment dst = arena.allocate(n * 8L, 8); - DoubleArray vals = a.values(); - Array codes = a.codes(); - switch (codes) { - case ByteArray ba -> { - for (long i = 0; i < n; i++) { - dst.setAtIndex(PTypeIO.LE_DOUBLE, i, vals.getDouble(Byte.toUnsignedLong(ba.getByte(i)))); - } - } - case ShortArray sa -> { - for (long i = 0; i < n; i++) { - dst.setAtIndex(PTypeIO.LE_DOUBLE, i, vals.getDouble(Short.toUnsignedLong(sa.getShort(i)))); - } - } - case IntArray ia -> { - for (long i = 0; i < n; i++) { - dst.setAtIndex(PTypeIO.LE_DOUBLE, i, vals.getDouble(Integer.toUnsignedLong(ia.getInt(i)))); - } - } - case LongArray la -> { - for (long i = 0; i < n; i++) { - dst.setAtIndex(PTypeIO.LE_DOUBLE, i, vals.getDouble(la.getLong(i))); - } - } - default -> throw new VortexException("DictDoubleArray: invalid codes type: " - + codes.getClass().getSimpleName()); - } - return dst.asReadOnly(); - } - - private static MemorySegment materialiseDictFloat(DictFloatArray a, SegmentAllocator arena) { - long n = a.length(); - MemorySegment dst = arena.allocate(n * 4L, 4); - FloatArray vals = a.values(); - Array codes = a.codes(); - switch (codes) { - case ByteArray ba -> { - for (long i = 0; i < n; i++) { - dst.setAtIndex(PTypeIO.LE_FLOAT, i, vals.getFloat(Byte.toUnsignedLong(ba.getByte(i)))); - } - } - case ShortArray sa -> { - for (long i = 0; i < n; i++) { - dst.setAtIndex(PTypeIO.LE_FLOAT, i, vals.getFloat(Short.toUnsignedLong(sa.getShort(i)))); - } - } - case IntArray ia -> { - for (long i = 0; i < n; i++) { - dst.setAtIndex(PTypeIO.LE_FLOAT, i, vals.getFloat(Integer.toUnsignedLong(ia.getInt(i)))); - } - } - case LongArray la -> { - for (long i = 0; i < n; i++) { - dst.setAtIndex(PTypeIO.LE_FLOAT, i, vals.getFloat(la.getLong(i))); - } - } - default -> throw new VortexException("DictFloatArray: invalid codes type: " - + codes.getClass().getSimpleName()); - } - return dst.asReadOnly(); - } - - private static MemorySegment materialiseConstantDecimal(LazyConstantDecimalArray a, SegmentAllocator arena) { - long n = a.length(); - int byteWidth = a.byteWidth(); - MemorySegment dst = arena.allocate(n * byteWidth); - java.math.BigInteger unscaled = a.value().unscaledValue(); - // Write the single constant value in LE two's-complement, repeated n times. - long rawBits = unscaled.longValueExact(); - for (long i = 0; i < n; i++) { - long off = i * byteWidth; - switch (byteWidth) { - case 1 -> dst.set(java.lang.foreign.ValueLayout.JAVA_BYTE, off, (byte) rawBits); - case 2 -> dst.set(PTypeIO.LE_SHORT, off, (short) rawBits); - case 4 -> dst.set(PTypeIO.LE_INT, off, (int) rawBits); - case 8 -> dst.set(PTypeIO.LE_LONG, off, rawBits); - default -> throw new VortexException("LazyConstantDecimalArray: unsupported byteWidth " + byteWidth); - } - } - return dst.asReadOnly(); - } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/BoolArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/BoolArray.java index 92597e3a..abf34b96 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/BoolArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/BoolArray.java @@ -1,6 +1,10 @@ package io.github.dfa1.vortex.reader.array; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; +import java.lang.foreign.ValueLayout; + /// [Array] for bit-packed boolean columns (LSB-first, one byte per 8 elements). /// /// The default impl is [MaterializedBoolArray], a buffer-backed record @@ -32,4 +36,26 @@ default void forEachBoolean(BooleanConsumer c) { default Array limited(long rows) { return new OffsetBoolArray(dtype(), rows, this, 0); } + + /// Scalar fallback: packs every element through [#getBoolean(long)] into a fresh + /// LSB-first bitmap (one byte per 8 elements), matching the on-disk and Arrow + /// validity-buffer layout. Buffer-backed ([MaterializedBoolArray]) overrides with + /// a zero-copy path. The segment is allocated zero-filled, so only set bits are + /// written. + /// + /// @param arena allocator for the output segment + /// @return an LSB-first packed bitmap covering `length()` elements + @Override + default MemorySegment materialize(SegmentAllocator arena) { + long n = length(); + MemorySegment dst = arena.allocate((n + 7) / 8); + for (long i = 0; i < n; i++) { + if (getBoolean(i)) { + long byteIndex = i >>> 3; + byte b = dst.get(ValueLayout.JAVA_BYTE, byteIndex); + dst.set(ValueLayout.JAVA_BYTE, byteIndex, (byte) (b | (1 << (i & 7)))); + } + } + return dst; + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/ByteArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/ByteArray.java index f4c12400..aaa261af 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/ByteArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/ByteArray.java @@ -3,6 +3,9 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.PType; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; +import java.lang.foreign.ValueLayout; import java.util.function.LongBinaryOperator; /// [Array] for I8/U8 primitive columns. @@ -53,4 +56,20 @@ default void forEachByte(ByteConsumer c) { default Array limited(long rows) { return new OffsetByteArray(dtype(), rows, this, 0); } + + /// Scalar fallback: decodes every element through [#getByte(long)] into a fresh + /// one-byte-per-element segment. Buffer-backed ([MaterializedByteArray]) overrides + /// with a zero-copy path. + /// + /// @param arena allocator for the output segment + /// @return a segment of `length()` bytes + @Override + default MemorySegment materialize(SegmentAllocator arena) { + long n = length(); + MemorySegment dst = arena.allocate(n); + for (long i = 0; i < n; i++) { + dst.set(ValueLayout.JAVA_BYTE, i, getByte(i)); + } + return dst; + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/ChunkedByteArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/ChunkedByteArray.java index 39c483a9..17762adf 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/ChunkedByteArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/ChunkedByteArray.java @@ -3,6 +3,8 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.VortexException; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -87,4 +89,23 @@ public void forEachByte(ByteConsumer c) { public Array limited(long rows) { return ChunkedByteArray.of(dtype, rows, ChunkedArrays.limitedChildren(children, offsets, rows)); } + + /// Materialises by concatenating each child's segment into one contiguous + /// byte buffer, each child materialised through its own + /// [ByteArray#materialize(SegmentAllocator)]. + /// + /// @param arena allocator for the output segment + /// @return a read-only byte segment spanning all chunks + @Override + public MemorySegment materialize(SegmentAllocator arena) { + MemorySegment dst = arena.allocate(length); + long byteOffset = 0; + for (ByteArray child : children) { + MemorySegment src = child.materialize(arena); + long bytes = child.length(); + MemorySegment.copy(src, 0, dst, byteOffset, bytes); + byteOffset += bytes; + } + return dst.asReadOnly(); + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/ChunkedDoubleArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/ChunkedDoubleArray.java index eef78063..3d5853d2 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/ChunkedDoubleArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/ChunkedDoubleArray.java @@ -3,6 +3,8 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.VortexException; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -84,4 +86,23 @@ public double fold(double identity, DoubleBinaryOperator op) { public Array limited(long rows) { return ChunkedDoubleArray.of(dtype, rows, ChunkedArrays.limitedChildren(children, offsets, rows)); } + + /// Materialises by concatenating each child's segment into one contiguous + /// little-endian `f64` buffer, each child materialised through its own + /// [DoubleArray#materialize(SegmentAllocator)]. + /// + /// @param arena allocator for the output segment + /// @return a read-only little-endian `f64` segment spanning all chunks + @Override + public MemorySegment materialize(SegmentAllocator arena) { + MemorySegment dst = arena.allocate(length * 8L, 8); + long byteOffset = 0; + for (DoubleArray child : children) { + MemorySegment src = child.materialize(arena); + long bytes = child.length() * 8L; + MemorySegment.copy(src, 0, dst, byteOffset, bytes); + byteOffset += bytes; + } + return dst.asReadOnly(); + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/ChunkedFloatArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/ChunkedFloatArray.java index 4006002c..8a135eb8 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/ChunkedFloatArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/ChunkedFloatArray.java @@ -3,6 +3,8 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.VortexException; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -74,4 +76,23 @@ public double fold(double identity, DoubleBinaryOperator op) { public Array limited(long rows) { return ChunkedFloatArray.of(dtype, rows, ChunkedArrays.limitedChildren(children, offsets, rows)); } + + /// Materialises by concatenating each child's segment into one contiguous + /// little-endian `f32` buffer, each child materialised through its own + /// [FloatArray#materialize(SegmentAllocator)]. + /// + /// @param arena allocator for the output segment + /// @return a read-only little-endian `f32` segment spanning all chunks + @Override + public MemorySegment materialize(SegmentAllocator arena) { + MemorySegment dst = arena.allocate(length * 4L, 4); + long byteOffset = 0; + for (FloatArray child : children) { + MemorySegment src = child.materialize(arena); + long bytes = child.length() * 4L; + MemorySegment.copy(src, 0, dst, byteOffset, bytes); + byteOffset += bytes; + } + return dst.asReadOnly(); + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/ChunkedIntArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/ChunkedIntArray.java index 16c40af7..ebc3ec0d 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/ChunkedIntArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/ChunkedIntArray.java @@ -3,6 +3,8 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.VortexException; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -82,4 +84,23 @@ public int fold(int identity, IntBinaryOperator op) { public Array limited(long rows) { return ChunkedIntArray.of(dtype, rows, ChunkedArrays.limitedChildren(children, offsets, rows)); } + + /// Materialises by concatenating each child's segment into one contiguous + /// little-endian `i32` buffer, each child materialised through its own + /// [IntArray#materialize(SegmentAllocator)]. + /// + /// @param arena allocator for the output segment + /// @return a read-only little-endian `i32` segment spanning all chunks + @Override + public MemorySegment materialize(SegmentAllocator arena) { + MemorySegment dst = arena.allocate(length * 4L, 4); + long byteOffset = 0; + for (IntArray child : children) { + MemorySegment src = child.materialize(arena); + long bytes = child.length() * 4L; + MemorySegment.copy(src, 0, dst, byteOffset, bytes); + byteOffset += bytes; + } + return dst.asReadOnly(); + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/ChunkedLongArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/ChunkedLongArray.java index 6531b6f6..90900e71 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/ChunkedLongArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/ChunkedLongArray.java @@ -3,6 +3,8 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.VortexException; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -107,4 +109,24 @@ public long fold(long identity, LongBinaryOperator op) { public Array limited(long rows) { return ChunkedLongArray.of(dtype, rows, ChunkedArrays.limitedChildren(children, offsets, rows)); } + + /// Materialises by concatenating each child's segment into one contiguous + /// little-endian `i64` buffer. Each child is materialised through its own + /// [LongArray#materialize(SegmentAllocator)], so lazy children decode straight + /// into the shared destination via a bulk copy. + /// + /// @param arena allocator for the output segment + /// @return a read-only little-endian `i64` segment spanning all chunks + @Override + public MemorySegment materialize(SegmentAllocator arena) { + MemorySegment dst = arena.allocate(length * 8L, 8); + long byteOffset = 0; + for (LongArray child : children) { + MemorySegment src = child.materialize(arena); + long bytes = child.length() * 8L; + MemorySegment.copy(src, 0, dst, byteOffset, bytes); + byteOffset += bytes; + } + return dst.asReadOnly(); + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/ChunkedShortArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/ChunkedShortArray.java index f6094aeb..8a48f7cc 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/ChunkedShortArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/ChunkedShortArray.java @@ -3,6 +3,8 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.VortexException; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -88,4 +90,23 @@ public void forEachShort(ShortConsumer c) { public Array limited(long rows) { return ChunkedShortArray.of(dtype, rows, ChunkedArrays.limitedChildren(children, offsets, rows)); } + + /// Materialises by concatenating each child's segment into one contiguous + /// little-endian `i16` buffer, each child materialised through its own + /// [ShortArray#materialize(SegmentAllocator)]. + /// + /// @param arena allocator for the output segment + /// @return a read-only little-endian `i16` segment spanning all chunks + @Override + public MemorySegment materialize(SegmentAllocator arena) { + MemorySegment dst = arena.allocate(length * 2L, 2); + long byteOffset = 0; + for (ShortArray child : children) { + MemorySegment src = child.materialize(arena); + long bytes = child.length() * 2L; + MemorySegment.copy(src, 0, dst, byteOffset, bytes); + byteOffset += bytes; + } + return dst.asReadOnly(); + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/DictDoubleArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/DictDoubleArray.java index 2aee6cdf..a91fe7d9 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/DictDoubleArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/DictDoubleArray.java @@ -2,7 +2,10 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.VortexException; +import io.github.dfa1.vortex.encoding.PTypeIO; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; import java.util.function.DoubleBinaryOperator; import java.util.function.DoubleConsumer; @@ -45,6 +48,45 @@ public double getDouble(long i) { return values.getDouble(DictArrays.readCode(codes, i)); } + /// Materialises by gathering one dictionary value per code into a fresh + /// little-endian `f64` segment. The codes switch is hoisted outside the loop so + /// each branch is a uniform gather over a single code width. + /// + /// @param arena allocator for the output segment + /// @return a read-only little-endian `f64` segment of gathered values + /// @throws VortexException if `codes` is not a supported code-array type + @Override + public MemorySegment materialize(SegmentAllocator arena) { + long n = length; + MemorySegment dst = arena.allocate(n * 8L, 8); + DoubleArray vals = values; + switch (codes) { + case ByteArray ba -> { + for (long i = 0; i < n; i++) { + dst.setAtIndex(PTypeIO.LE_DOUBLE, i, vals.getDouble(Byte.toUnsignedLong(ba.getByte(i)))); + } + } + case ShortArray sa -> { + for (long i = 0; i < n; i++) { + dst.setAtIndex(PTypeIO.LE_DOUBLE, i, vals.getDouble(Short.toUnsignedLong(sa.getShort(i)))); + } + } + case IntArray ia -> { + for (long i = 0; i < n; i++) { + dst.setAtIndex(PTypeIO.LE_DOUBLE, i, vals.getDouble(Integer.toUnsignedLong(ia.getInt(i)))); + } + } + case LongArray la -> { + for (long i = 0; i < n; i++) { + dst.setAtIndex(PTypeIO.LE_DOUBLE, i, vals.getDouble(la.getLong(i))); + } + } + default -> throw new VortexException("DictDoubleArray: invalid codes type: " + + codes.getClass().getSimpleName()); + } + return dst.asReadOnly(); + } + @Override public void forEachDouble(DoubleConsumer cons) { long n = length; diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/DictFloatArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/DictFloatArray.java index 04e08246..733870b4 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/DictFloatArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/DictFloatArray.java @@ -2,7 +2,10 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.VortexException; +import io.github.dfa1.vortex.encoding.PTypeIO; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; import java.util.function.DoubleBinaryOperator; /// Dict-encoded [FloatArray] view. ADR 0012 shape. @@ -44,6 +47,45 @@ public float getFloat(long i) { return values.getFloat(DictArrays.readCode(codes, i)); } + /// Materialises by gathering one dictionary value per code into a fresh + /// little-endian `f32` segment. The codes switch is hoisted outside the loop so + /// each branch is a uniform gather over a single code width. + /// + /// @param arena allocator for the output segment + /// @return a read-only little-endian `f32` segment of gathered values + /// @throws VortexException if `codes` is not a supported code-array type + @Override + public MemorySegment materialize(SegmentAllocator arena) { + long n = length; + MemorySegment dst = arena.allocate(n * 4L, 4); + FloatArray vals = values; + switch (codes) { + case ByteArray ba -> { + for (long i = 0; i < n; i++) { + dst.setAtIndex(PTypeIO.LE_FLOAT, i, vals.getFloat(Byte.toUnsignedLong(ba.getByte(i)))); + } + } + case ShortArray sa -> { + for (long i = 0; i < n; i++) { + dst.setAtIndex(PTypeIO.LE_FLOAT, i, vals.getFloat(Short.toUnsignedLong(sa.getShort(i)))); + } + } + case IntArray ia -> { + for (long i = 0; i < n; i++) { + dst.setAtIndex(PTypeIO.LE_FLOAT, i, vals.getFloat(Integer.toUnsignedLong(ia.getInt(i)))); + } + } + case LongArray la -> { + for (long i = 0; i < n; i++) { + dst.setAtIndex(PTypeIO.LE_FLOAT, i, vals.getFloat(la.getLong(i))); + } + } + default -> throw new VortexException("DictFloatArray: invalid codes type: " + + codes.getClass().getSimpleName()); + } + return dst.asReadOnly(); + } + @Override public double fold(double identity, DoubleBinaryOperator op) { long n = length; diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/DictIntArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/DictIntArray.java index bf3279e4..862e91a7 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/DictIntArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/DictIntArray.java @@ -2,7 +2,10 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.VortexException; +import io.github.dfa1.vortex.encoding.PTypeIO; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; import java.util.function.IntBinaryOperator; import java.util.function.IntConsumer; @@ -45,6 +48,45 @@ public int getInt(long i) { return values.getInt(DictArrays.readCode(codes, i)); } + /// Materialises by gathering one dictionary value per code into a fresh + /// little-endian `i32` segment. The codes switch is hoisted outside the loop so + /// each branch is a uniform gather over a single code width. + /// + /// @param arena allocator for the output segment + /// @return a read-only little-endian `i32` segment of gathered values + /// @throws VortexException if `codes` is not a supported code-array type + @Override + public MemorySegment materialize(SegmentAllocator arena) { + long n = length; + MemorySegment dst = arena.allocate(n * 4L, 4); + IntArray vals = values; + switch (codes) { + case ByteArray ba -> { + for (long i = 0; i < n; i++) { + dst.setAtIndex(PTypeIO.LE_INT, i, vals.getInt(Byte.toUnsignedLong(ba.getByte(i)))); + } + } + case ShortArray sa -> { + for (long i = 0; i < n; i++) { + dst.setAtIndex(PTypeIO.LE_INT, i, vals.getInt(Short.toUnsignedLong(sa.getShort(i)))); + } + } + case IntArray ia -> { + for (long i = 0; i < n; i++) { + dst.setAtIndex(PTypeIO.LE_INT, i, vals.getInt(Integer.toUnsignedLong(ia.getInt(i)))); + } + } + case LongArray la -> { + for (long i = 0; i < n; i++) { + dst.setAtIndex(PTypeIO.LE_INT, i, vals.getInt(la.getLong(i))); + } + } + default -> throw new VortexException("DictIntArray: invalid codes type: " + + codes.getClass().getSimpleName()); + } + return dst.asReadOnly(); + } + @Override public void forEachInt(IntConsumer cons) { long n = length; diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/DictLongArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/DictLongArray.java index 80682fda..2bc699b5 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/DictLongArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/DictLongArray.java @@ -2,7 +2,10 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.VortexException; +import io.github.dfa1.vortex.encoding.PTypeIO; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; import java.util.function.LongBinaryOperator; import java.util.function.LongConsumer; @@ -48,6 +51,45 @@ public long getLong(long i) { return values.getLong(DictArrays.readCode(codes, i)); } + /// Materialises by gathering one dictionary value per code into a fresh + /// little-endian `i64` segment. The codes switch is hoisted outside the loop so + /// each branch is a uniform gather over a single code width. + /// + /// @param arena allocator for the output segment + /// @return a read-only little-endian `i64` segment of gathered values + /// @throws VortexException if `codes` is not a supported code-array type + @Override + public MemorySegment materialize(SegmentAllocator arena) { + long n = length; + MemorySegment dst = arena.allocate(n * 8L, 8); + LongArray vals = values; + switch (codes) { + case ByteArray ba -> { + for (long i = 0; i < n; i++) { + dst.setAtIndex(PTypeIO.LE_LONG, i, vals.getLong(Byte.toUnsignedLong(ba.getByte(i)))); + } + } + case ShortArray sa -> { + for (long i = 0; i < n; i++) { + dst.setAtIndex(PTypeIO.LE_LONG, i, vals.getLong(Short.toUnsignedLong(sa.getShort(i)))); + } + } + case IntArray ia -> { + for (long i = 0; i < n; i++) { + dst.setAtIndex(PTypeIO.LE_LONG, i, vals.getLong(Integer.toUnsignedLong(ia.getInt(i)))); + } + } + case LongArray la -> { + for (long i = 0; i < n; i++) { + dst.setAtIndex(PTypeIO.LE_LONG, i, vals.getLong(la.getLong(i))); + } + } + default -> throw new VortexException("DictLongArray: invalid codes type: " + + codes.getClass().getSimpleName()); + } + return dst.asReadOnly(); + } + @Override public void forEachLong(LongConsumer cons) { long n = length; diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/DoubleArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/DoubleArray.java index a2de9e97..9faa87e1 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/DoubleArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/DoubleArray.java @@ -1,6 +1,10 @@ package io.github.dfa1.vortex.reader.array; +import io.github.dfa1.vortex.encoding.PTypeIO; + +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; import java.util.function.DoubleBinaryOperator; import java.util.function.DoubleConsumer; @@ -50,4 +54,21 @@ default double fold(double identity, DoubleBinaryOperator op) { default Array limited(long rows) { return new OffsetDoubleArray(dtype(), rows, this, 0); } + + /// Scalar fallback: decodes every element through [#getDouble(long)] into a fresh + /// little-endian segment. Buffer-backed ([MaterializedDoubleArray]) and lazy + /// formula-based variants ([LazyAlpDoubleArray], …) override with a zero-copy or + /// vectorised path. + /// + /// @param arena allocator for the output segment + /// @return a little-endian `f64` segment of `length()` elements + @Override + default MemorySegment materialize(SegmentAllocator arena) { + long n = length(); + MemorySegment dst = arena.allocate(n * 8L, 8); + for (long i = 0; i < n; i++) { + dst.setAtIndex(PTypeIO.LE_DOUBLE, i, getDouble(i)); + } + return dst; + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/FixedSizeListArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/FixedSizeListArray.java index f771836d..ab0953d2 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/FixedSizeListArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/FixedSizeListArray.java @@ -1,6 +1,10 @@ package io.github.dfa1.vortex.reader.array; import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.VortexException; + +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; /// Decoded fixed-size list array: holds a flat elements [Array] of length `outerLen * fixedSize`. /// @@ -63,4 +67,15 @@ public Array limited(long rows) { // rows are the first `rows * fixedSize` elements. return new FixedSizeListArray(dtype, rows, Array.limited(elements, rows * fixedSize())); } + + /// Always throws: a fixed-size list wraps a flat elements child, not a single + /// primary segment of its own. Materialise [#elements()] instead. + /// + /// @param arena unused + /// @return never returns + /// @throws VortexException always + @Override + public MemorySegment materialize(SegmentAllocator arena) { + throw new VortexException("FixedSizeListArray has no primary segment"); + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/FloatArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/FloatArray.java index 87d188d1..96e274b4 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/FloatArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/FloatArray.java @@ -1,6 +1,10 @@ package io.github.dfa1.vortex.reader.array; +import io.github.dfa1.vortex.encoding.PTypeIO; + +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; import java.util.function.DoubleBinaryOperator; /// [Array] for F32 primitive columns. @@ -39,4 +43,21 @@ default double fold(double identity, DoubleBinaryOperator op) { default Array limited(long rows) { return new OffsetFloatArray(dtype(), rows, this, 0); } + + /// Scalar fallback: decodes every element through [#getFloat(long)] into a fresh + /// little-endian segment. Buffer-backed ([MaterializedFloatArray]) and lazy + /// formula-based variants ([LazyAlpFloatArray], …) override with a zero-copy or + /// vectorised path. + /// + /// @param arena allocator for the output segment + /// @return a little-endian `f32` segment of `length()` elements + @Override + default MemorySegment materialize(SegmentAllocator arena) { + long n = length(); + MemorySegment dst = arena.allocate(n * 4L, 4); + for (long i = 0; i < n; i++) { + dst.setAtIndex(PTypeIO.LE_FLOAT, i, getFloat(i)); + } + return dst; + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/GenericArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/GenericArray.java index ddfca6c8..6b2d5c41 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/GenericArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/GenericArray.java @@ -4,6 +4,7 @@ import io.github.dfa1.vortex.core.VortexException; import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; import java.lang.foreign.ValueLayout; import java.math.BigDecimal; import java.math.BigInteger; @@ -77,6 +78,15 @@ MemorySegment buffer(int i) { return buffers[i]; } + /// Returns the primary (index 0) raw buffer directly — no copy or allocation. + /// + /// @param arena unused; the existing buffer is returned as-is + /// @return the first backing [MemorySegment] + @Override + public MemorySegment materialize(SegmentAllocator arena) { + return buffers[0]; + } + /// Decodes the decimal value at row `i` from a single-buffer layout. /// /// The buffer holds one little-endian two's-complement integer per row. Element diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/IntArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/IntArray.java index b5a7ad34..abdfc6f7 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/IntArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/IntArray.java @@ -1,6 +1,10 @@ package io.github.dfa1.vortex.reader.array; +import io.github.dfa1.vortex.encoding.PTypeIO; + +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; import java.util.function.IntBinaryOperator; import java.util.function.IntConsumer; @@ -50,4 +54,21 @@ default int fold(int identity, IntBinaryOperator op) { default Array limited(long rows) { return new OffsetIntArray(dtype(), rows, this, 0); } + + /// Scalar fallback: decodes every element through [#getInt(long)] into a fresh + /// little-endian segment. Buffer-backed ([MaterializedIntArray]) and lazy + /// formula-based variants ([LazyForIntArray], [LazyZigZagIntArray], …) override + /// with a zero-copy or vectorised path. + /// + /// @param arena allocator for the output segment + /// @return a little-endian `i32` segment of `length()` elements + @Override + default MemorySegment materialize(SegmentAllocator arena) { + long n = length(); + MemorySegment dst = arena.allocate(n * 4L, 4); + for (long i = 0; i < n; i++) { + dst.setAtIndex(PTypeIO.LE_INT, i, getInt(i)); + } + return dst; + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyAlpDoubleArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyAlpDoubleArray.java index beb252d3..a10bb877 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyAlpDoubleArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyAlpDoubleArray.java @@ -4,6 +4,7 @@ import io.github.dfa1.vortex.encoding.PTypeIO; import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; /// Lazy [DoubleArray] backed by the `vortex.alp` encoded `i64` child segment. /// @@ -29,4 +30,22 @@ public record LazyAlpDoubleArray(DType dtype, long length, MemorySegment encoded public double getDouble(long i) { return (double) encoded.getAtIndex(PTypeIO.LE_LONG, i) * factorF * factorE; } + + /// Bulk-decodes through [#getDouble(long)] into a fresh little-endian `f64` segment. + /// The decode formula (including the two-step factor application that preserves IEEE + /// rounding) lives only in [#getDouble(long)]; this override exists solely to give the + /// JIT a monomorphic, inlinable call site (the shared [DoubleArray] default is + /// megamorphic across every implementation and will not inline or auto-vectorise). + /// + /// @param arena allocator for the output segment + /// @return a little-endian `f64` segment of decoded values + @Override + public MemorySegment materialize(SegmentAllocator arena) { + long n = length; + MemorySegment dst = arena.allocate(n * 8L, 8); + for (long i = 0; i < n; i++) { + dst.setAtIndex(PTypeIO.LE_DOUBLE, i, getDouble(i)); + } + return dst; + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyAlpFloatArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyAlpFloatArray.java index 21251707..b3baa74f 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyAlpFloatArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyAlpFloatArray.java @@ -4,6 +4,7 @@ import io.github.dfa1.vortex.encoding.PTypeIO; import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; /// Lazy [FloatArray] backed by the `vortex.alp` encoded `i32` child segment. /// @@ -24,4 +25,22 @@ public record LazyAlpFloatArray(DType dtype, long length, MemorySegment encoded, public float getFloat(long i) { return (float) encoded.getAtIndex(PTypeIO.LE_INT, i) * factorF * factorE; } + + /// Bulk-decodes through [#getFloat(long)] into a fresh little-endian `f32` segment. + /// The decode formula (including the two-step factor application that preserves IEEE + /// rounding) lives only in [#getFloat(long)]; this override exists solely to give the + /// JIT a monomorphic, inlinable call site (the shared [FloatArray] default is + /// megamorphic across every implementation and will not inline or auto-vectorise). + /// + /// @param arena allocator for the output segment + /// @return a little-endian `f32` segment of decoded values + @Override + public MemorySegment materialize(SegmentAllocator arena) { + long n = length; + MemorySegment dst = arena.allocate(n * 4L, 4); + for (long i = 0; i < n; i++) { + dst.setAtIndex(PTypeIO.LE_FLOAT, i, getFloat(i)); + } + return dst; + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantDecimalArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantDecimalArray.java index a1c87169..b48eb551 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantDecimalArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantDecimalArray.java @@ -1,8 +1,14 @@ package io.github.dfa1.vortex.reader.array; import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.VortexException; +import io.github.dfa1.vortex.encoding.PTypeIO; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; +import java.lang.foreign.ValueLayout; import java.math.BigDecimal; +import java.math.BigInteger; /// Metadata-only decimal array for `vortex.constant` columns. /// @@ -12,8 +18,8 @@ /// @param dtype logical [DType.Decimal] type /// @param length total logical row count /// @param value decoded constant value -/// @param byteWidth element width in bytes (1/2/4/8/16); preserved for materialisation via -/// [io.github.dfa1.vortex.reader.array.ArraySegments] +/// @param byteWidth element width in bytes (1/2/4/8/16); preserved for +/// [#materialize(SegmentAllocator)] public record LazyConstantDecimalArray(DType dtype, long length, BigDecimal value, int byteWidth) implements DecimalArray { /// Returns the constant decimal value for any valid row index. @@ -31,4 +37,29 @@ public BigDecimal getDecimal(long i) { public Array limited(long rows) { return new LazyConstantDecimalArray(dtype, rows, value, byteWidth); } + + /// Materialises by writing the single constant value, in little-endian + /// two's-complement, `length` times into a fresh `byteWidth`-per-row segment. + /// + /// @param arena allocator for the output segment + /// @return a read-only little-endian two's-complement segment of `length` rows + /// @throws VortexException if `byteWidth` is not 1, 2, 4, or 8 + @Override + public MemorySegment materialize(SegmentAllocator arena) { + long n = length; + MemorySegment dst = arena.allocate(n * byteWidth); + BigInteger unscaled = value.unscaledValue(); + long rawBits = unscaled.longValueExact(); + for (long i = 0; i < n; i++) { + long off = i * byteWidth; + switch (byteWidth) { + case 1 -> dst.set(ValueLayout.JAVA_BYTE, off, (byte) rawBits); + case 2 -> dst.set(PTypeIO.LE_SHORT, off, (short) rawBits); + case 4 -> dst.set(PTypeIO.LE_INT, off, (int) rawBits); + case 8 -> dst.set(PTypeIO.LE_LONG, off, rawBits); + default -> throw new VortexException("LazyConstantDecimalArray: unsupported byteWidth " + byteWidth); + } + } + return dst.asReadOnly(); + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyDecimalArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyDecimalArray.java index 8778ba56..d8607133 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyDecimalArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyDecimalArray.java @@ -4,6 +4,7 @@ import io.github.dfa1.vortex.core.VortexException; import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; import java.lang.foreign.ValueLayout; import java.math.BigDecimal; import java.math.BigInteger; @@ -70,4 +71,15 @@ private static BigInteger readSigned128Le(MemorySegment buf, long offset) { public Array limited(long rows) { return new LazyDecimalArray(dtype, rows, buf.asSlice(0, rows * (long) byteWidth), byteWidth); } + + /// Returns the backing buffer directly — already a contiguous little-endian + /// two's-complement integer segment (`byteWidth` bytes per row), so no copy or + /// allocation is needed. + /// + /// @param arena unused; the existing buffer is returned as-is + /// @return the backing little-endian two's-complement segment + @Override + public MemorySegment materialize(SegmentAllocator arena) { + return buf; + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyDecimalBytePartsArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyDecimalBytePartsArray.java index df0099b0..d1c92786 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyDecimalBytePartsArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyDecimalBytePartsArray.java @@ -3,6 +3,8 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.VortexException; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; import java.math.BigDecimal; /// Lazy `vortex.decimal_byte_parts` reassembly. @@ -41,4 +43,15 @@ public BigDecimal getDecimal(long i) { public Array limited(long rows) { return new LazyDecimalBytePartsArray(dtype, rows, Array.limited(msp, rows)); } + + /// Always throws: the byte-parts layout reassembles its mantissa from a child + /// column on demand and has no single contiguous primary segment. + /// + /// @param arena unused + /// @return never returns + /// @throws VortexException always + @Override + public MemorySegment materialize(SegmentAllocator arena) { + throw new VortexException("LazyDecimalBytePartsArray has no primary segment"); + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyForIntArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyForIntArray.java index c902183c..750af6e8 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyForIntArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyForIntArray.java @@ -4,6 +4,7 @@ import io.github.dfa1.vortex.encoding.PTypeIO; import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; /// Lazy [IntArray] backed by the `fastlanes.for` encoded `i32` child segment. /// @@ -22,4 +23,21 @@ public record LazyForIntArray(DType dtype, long length, MemorySegment encoded, i public int getInt(long i) { return encoded.getAtIndex(PTypeIO.LE_INT, i) + ref; } + + /// Bulk-decodes through [#getInt(long)] into a fresh little-endian `i32` segment. + /// The decode formula lives only in [#getInt(long)]; this override exists solely to + /// give the JIT a monomorphic, inlinable call site (the shared [IntArray] default is + /// megamorphic across every implementation and will not inline or auto-vectorise). + /// + /// @param arena allocator for the output segment + /// @return a little-endian `i32` segment of decoded values + @Override + public MemorySegment materialize(SegmentAllocator arena) { + long n = length; + MemorySegment dst = arena.allocate(n * 4L, 4); + for (long i = 0; i < n; i++) { + dst.setAtIndex(PTypeIO.LE_INT, i, getInt(i)); + } + return dst; + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyForLongArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyForLongArray.java index 0b9538a2..422918a7 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyForLongArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyForLongArray.java @@ -4,6 +4,7 @@ import io.github.dfa1.vortex.encoding.PTypeIO; import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; /// Lazy [LongArray] backed by the `fastlanes.for` encoded `i64` child segment. /// @@ -22,4 +23,21 @@ public record LazyForLongArray(DType dtype, long length, MemorySegment encoded, public long getLong(long i) { return encoded.getAtIndex(PTypeIO.LE_LONG, i) + ref; } + + /// Bulk-decodes through [#getLong(long)] into a fresh little-endian `i64` segment. + /// The decode formula lives only in [#getLong(long)]; this override exists solely to + /// give the JIT a monomorphic, inlinable call site (the shared [LongArray] default is + /// megamorphic across every implementation and will not inline or auto-vectorise). + /// + /// @param arena allocator for the output segment + /// @return a little-endian `i64` segment of decoded values + @Override + public MemorySegment materialize(SegmentAllocator arena) { + long n = length; + MemorySegment dst = arena.allocate(n * 8L, 8); + for (long i = 0; i < n; i++) { + dst.setAtIndex(PTypeIO.LE_LONG, i, getLong(i)); + } + return dst; + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyZigZagIntArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyZigZagIntArray.java index 1fcfe41f..614d7574 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyZigZagIntArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyZigZagIntArray.java @@ -4,6 +4,7 @@ import io.github.dfa1.vortex.encoding.PTypeIO; import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; /// Lazy [IntArray] backed by the `vortex.zigzag` encoded `u32` child segment. /// @@ -22,4 +23,21 @@ public int getInt(long i) { int u = encoded.getAtIndex(PTypeIO.LE_INT, i); return (u >>> 1) ^ -(u & 1); } + + /// Bulk-decodes through [#getInt(long)] into a fresh little-endian `i32` segment. + /// The decode formula lives only in [#getInt(long)]; this override exists solely to + /// give the JIT a monomorphic, inlinable call site (the shared [IntArray] default is + /// megamorphic across every implementation and will not inline or auto-vectorise). + /// + /// @param arena allocator for the output segment + /// @return a little-endian `i32` segment of decoded values + @Override + public MemorySegment materialize(SegmentAllocator arena) { + long n = length; + MemorySegment dst = arena.allocate(n * 4L, 4); + for (long i = 0; i < n; i++) { + dst.setAtIndex(PTypeIO.LE_INT, i, getInt(i)); + } + return dst; + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyZigZagLongArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyZigZagLongArray.java index 1508239b..4a65eb5e 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyZigZagLongArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyZigZagLongArray.java @@ -4,6 +4,7 @@ import io.github.dfa1.vortex.encoding.PTypeIO; import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; /// Lazy [LongArray] backed by the `vortex.zigzag` encoded `u64` child segment. /// @@ -22,4 +23,21 @@ public long getLong(long i) { long u = encoded.getAtIndex(PTypeIO.LE_LONG, i); return (u >>> 1) ^ -(u & 1L); } + + /// Bulk-decodes through [#getLong(long)] into a fresh little-endian `i64` segment. + /// The decode formula lives only in [#getLong(long)]; this override exists solely to + /// give the JIT a monomorphic, inlinable call site (the shared [LongArray] default is + /// megamorphic across every implementation and will not inline or auto-vectorise). + /// + /// @param arena allocator for the output segment + /// @return a little-endian `i64` segment of decoded values + @Override + public MemorySegment materialize(SegmentAllocator arena) { + long n = length; + MemorySegment dst = arena.allocate(n * 8L, 8); + for (long i = 0; i < n; i++) { + dst.setAtIndex(PTypeIO.LE_LONG, i, getLong(i)); + } + return dst; + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/ListArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/ListArray.java index 4cb4d04b..8d27076c 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/ListArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/ListArray.java @@ -3,6 +3,9 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.VortexException; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; + /// Decoded variable-length list array. /// /// List `i` covers `elements[offsets[i]..offsets[i+1])`. @@ -69,4 +72,15 @@ public Array limited(long rows) { // shared (trailing elements past offsets[rows] are simply unreferenced). return new ListArray(dtype, rows, elements, Array.limited(offsets, rows + 1)); } + + /// Always throws: a list array is offsets plus a flat elements child, not a + /// single primary segment. Materialise [#elements()] and [#offsets()] separately. + /// + /// @param arena unused + /// @return never returns + /// @throws VortexException always + @Override + public MemorySegment materialize(SegmentAllocator arena) { + throw new VortexException("ListArray has no primary segment"); + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/ListViewArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/ListViewArray.java index b61a10f3..51d03a19 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/ListViewArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/ListViewArray.java @@ -3,6 +3,9 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.VortexException; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; + /// Decoded variable-length list-view array (Arrow ListView layout). /// /// Unlike [ListArray], offsets and sizes are independent per row: @@ -82,4 +85,16 @@ public Array limited(long rows) { return new ListViewArray(dtype, rows, elements, Array.limited(offsets, rows), Array.limited(sizes, rows)); } + + /// Always throws: a list-view array is offsets, sizes, and a flat elements child, + /// not a single primary segment. Materialise [#elements()], [#offsets()], and + /// [#sizes()] separately. + /// + /// @param arena unused + /// @return never returns + /// @throws VortexException always + @Override + public MemorySegment materialize(SegmentAllocator arena) { + throw new VortexException("ListViewArray has no primary segment"); + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LongArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LongArray.java index 0abf4bd8..31c752e7 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LongArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LongArray.java @@ -1,6 +1,10 @@ package io.github.dfa1.vortex.reader.array; +import io.github.dfa1.vortex.encoding.PTypeIO; + +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; import java.util.function.LongBinaryOperator; import java.util.function.LongConsumer; @@ -50,4 +54,21 @@ default long fold(long identity, LongBinaryOperator op) { default Array limited(long rows) { return new OffsetLongArray(dtype(), rows, this, 0); } + + /// Scalar fallback: decodes every element through [#getLong(long)] into a fresh + /// little-endian segment. Buffer-backed ([MaterializedLongArray]) and lazy + /// formula-based variants ([LazyForLongArray], [LazyZigZagLongArray], …) + /// override with a zero-copy or vectorised path. + /// + /// @param arena allocator for the output segment + /// @return a little-endian `i64` segment of `length()` elements + @Override + default MemorySegment materialize(SegmentAllocator arena) { + long n = length(); + MemorySegment dst = arena.allocate(n * 8L, 8); + for (long i = 0; i < n; i++) { + dst.setAtIndex(PTypeIO.LE_LONG, i, getLong(i)); + } + return dst; + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaskedArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaskedArray.java index 5e2e452d..538b9780 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaskedArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaskedArray.java @@ -2,6 +2,9 @@ import io.github.dfa1.vortex.core.DType; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; + /// Decoded `vortex.masked` array: a non-nullable child paired with an optional validity bitmap. /// /// Invariant: `child` has no actual nulls — nullability is expressed solely via @@ -61,4 +64,16 @@ public Array limited(long rows) { BoolArray truncValidity = validity != null ? (BoolArray) Array.limited(validity, rows) : null; return new MaskedArray(truncChild, truncValidity); } + + /// Materialises the inner (data) payload, ignoring the validity mask — the + /// segment returned is the data buffer only. This matches the prior + /// `ArraySegments` behaviour of unwrapping a masked array to its inner data; + /// callers that need validity must read [#validity()] separately. + /// + /// @param arena allocator used to materialise lazy inner variants + /// @return the inner payload's primary [MemorySegment] + @Override + public MemorySegment materialize(SegmentAllocator arena) { + return child.materialize(arena); + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedBoolArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedBoolArray.java index 882ffcf2..d12ea6cb 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedBoolArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedBoolArray.java @@ -4,6 +4,7 @@ import io.github.dfa1.vortex.core.DType; import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; import java.lang.foreign.ValueLayout; /// Buffer-backed [BoolArray] — the fallback used when an encoding decoder @@ -39,6 +40,17 @@ MemorySegment buffer() { return buffer; } + /// Returns the backing buffer directly — already an LSB-first packed bitmap, + /// matching the format produced by [BoolArray#materialize(SegmentAllocator)], + /// so no copy or allocation is needed. + /// + /// @param arena unused; the existing buffer is returned as-is + /// @return the backing LSB-first packed bitmap + @Override + public MemorySegment materialize(SegmentAllocator arena) { + return buffer; + } + @Override public boolean getBoolean(long i) { byte b = buffer.get(ValueLayout.JAVA_BYTE, i >>> 3); diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedByteArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedByteArray.java index 1500b66f..55a73a6f 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedByteArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedByteArray.java @@ -5,6 +5,7 @@ import io.github.dfa1.vortex.core.PType; import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; import java.lang.foreign.ValueLayout; import java.util.function.LongBinaryOperator; @@ -43,6 +44,16 @@ MemorySegment buffer() { return buffer; } + /// Returns the backing buffer directly — already a contiguous one-byte-per-element + /// segment, so no copy or allocation is needed. + /// + /// @param arena unused; the existing buffer is returned as-is + /// @return the backing byte segment + @Override + public MemorySegment materialize(SegmentAllocator arena) { + return buffer; + } + @Override public byte getByte(long i) { return buffer.get(ValueLayout.JAVA_BYTE, length == elementCount ? i : i % elementCount); diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedDoubleArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedDoubleArray.java index 5ab40d0a..c3583287 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedDoubleArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedDoubleArray.java @@ -4,6 +4,7 @@ import io.github.dfa1.vortex.encoding.PTypeIO; import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; import java.util.function.DoubleBinaryOperator; import java.util.function.DoubleConsumer; @@ -42,6 +43,16 @@ MemorySegment buffer() { return buffer; } + /// Returns the backing buffer directly — already a contiguous little-endian + /// `f64` segment, so no copy or allocation is needed. + /// + /// @param arena unused; the existing buffer is returned as-is + /// @return the backing little-endian `f64` segment + @Override + public MemorySegment materialize(SegmentAllocator arena) { + return buffer; + } + @Override public double getDouble(long i) { return buffer.getAtIndex(PTypeIO.LE_DOUBLE, length == elementCount ? i : i % elementCount); diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedFloat16Array.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedFloat16Array.java index 17b29773..2ec5cbf8 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedFloat16Array.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedFloat16Array.java @@ -5,6 +5,7 @@ import io.github.dfa1.vortex.encoding.PTypeIO; import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; /// Buffer-backed [Float16Array] — the fallback used when an encoding decoder /// either materialises the output eagerly or has no lazy variant of its own. @@ -39,6 +40,16 @@ MemorySegment buffer() { return buffer; } + /// Returns the backing buffer directly — already a contiguous little-endian + /// half-precision segment (2 bytes per element), so no copy or allocation is needed. + /// + /// @param arena unused; the existing buffer is returned as-is + /// @return the backing little-endian `f16` segment + @Override + public MemorySegment materialize(SegmentAllocator arena) { + return buffer; + } + @Override public float getFloat(long i) { return Float.float16ToFloat(buffer.getAtIndex(PTypeIO.LE_SHORT, i)); diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedFloatArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedFloatArray.java index c53b73f7..27b46eba 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedFloatArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedFloatArray.java @@ -5,6 +5,7 @@ import io.github.dfa1.vortex.encoding.PTypeIO; import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; import java.util.function.DoubleBinaryOperator; /// Buffer-backed [FloatArray] — the fallback used when an encoding decoder @@ -42,6 +43,16 @@ MemorySegment buffer() { return buffer; } + /// Returns the backing buffer directly — already a contiguous little-endian + /// `f32` segment, so no copy or allocation is needed. + /// + /// @param arena unused; the existing buffer is returned as-is + /// @return the backing little-endian `f32` segment + @Override + public MemorySegment materialize(SegmentAllocator arena) { + return buffer; + } + @Override public float getFloat(long i) { return buffer.getAtIndex(PTypeIO.LE_FLOAT, length == elementCount ? i : i % elementCount); diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedIntArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedIntArray.java index d91681f4..9dfeb022 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedIntArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedIntArray.java @@ -5,6 +5,7 @@ import io.github.dfa1.vortex.encoding.PTypeIO; import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; import java.util.function.IntBinaryOperator; import java.util.function.IntConsumer; @@ -43,6 +44,16 @@ MemorySegment buffer() { return buffer; } + /// Returns the backing buffer directly — already a contiguous little-endian + /// `i32` segment, so no copy or allocation is needed. + /// + /// @param arena unused; the existing buffer is returned as-is + /// @return the backing little-endian `i32` segment + @Override + public MemorySegment materialize(SegmentAllocator arena) { + return buffer; + } + @Override public int getInt(long i) { return buffer.getAtIndex(PTypeIO.LE_INT, length == elementCount ? i : i % elementCount); diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedLongArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedLongArray.java index 6d47f772..45add0bd 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedLongArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedLongArray.java @@ -5,6 +5,7 @@ import io.github.dfa1.vortex.encoding.PTypeIO; import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; import java.util.function.LongBinaryOperator; import java.util.function.LongConsumer; @@ -43,6 +44,16 @@ MemorySegment buffer() { return buffer; } + /// Returns the backing buffer directly — already a contiguous little-endian + /// `i64` segment, so no copy or allocation is needed. + /// + /// @param arena unused; the existing buffer is returned as-is + /// @return the backing little-endian `i64` segment + @Override + public MemorySegment materialize(SegmentAllocator arena) { + return buffer; + } + @Override public long getLong(long i) { return buffer.getAtIndex(PTypeIO.LE_LONG, length == elementCount ? i : i % elementCount); diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedShortArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedShortArray.java index 6ed489ec..ebb42106 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedShortArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedShortArray.java @@ -6,6 +6,7 @@ import io.github.dfa1.vortex.encoding.PTypeIO; import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; import java.util.function.LongBinaryOperator; /// Buffer-backed [ShortArray] — the fallback used when an encoding decoder @@ -43,6 +44,16 @@ MemorySegment buffer() { return buffer; } + /// Returns the backing buffer directly — already a contiguous little-endian + /// `i16` segment, so no copy or allocation is needed. + /// + /// @param arena unused; the existing buffer is returned as-is + /// @return the backing little-endian `i16` segment + @Override + public MemorySegment materialize(SegmentAllocator arena) { + return buffer; + } + @Override public short getShort(long i) { return buffer.getAtIndex(PTypeIO.LE_SHORT, length == elementCount ? i : i % elementCount); diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/NullArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/NullArray.java index 0c86590f..1078055b 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/NullArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/NullArray.java @@ -1,6 +1,10 @@ package io.github.dfa1.vortex.reader.array; import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.VortexException; + +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; /// Concrete [Array] for all-null columns (`DType.Null`). Holds only a row count. /// @@ -12,4 +16,14 @@ public record NullArray(DType dtype, long length) implements Array { public Array limited(long rows) { return new NullArray(dtype, rows); } + + /// Always throws: an all-null column holds only a row count, with no data buffer. + /// + /// @param arena unused + /// @return never returns + /// @throws VortexException always + @Override + public MemorySegment materialize(SegmentAllocator arena) { + throw new VortexException("NullArray has no primary segment"); + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/ShortArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/ShortArray.java index 91953537..aa0a294f 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/ShortArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/ShortArray.java @@ -2,7 +2,10 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.PType; +import io.github.dfa1.vortex.encoding.PTypeIO; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; import java.util.function.LongBinaryOperator; /// [Array] for I16/U16 primitive columns. @@ -53,4 +56,20 @@ default void forEachShort(ShortConsumer c) { default Array limited(long rows) { return new OffsetShortArray(dtype(), rows, this, 0); } + + /// Scalar fallback: decodes every element through [#getShort(long)] into a fresh + /// little-endian segment. Buffer-backed ([MaterializedShortArray]) overrides with + /// a zero-copy path. + /// + /// @param arena allocator for the output segment + /// @return a little-endian `i16` segment of `length()` elements + @Override + default MemorySegment materialize(SegmentAllocator arena) { + long n = length(); + MemorySegment dst = arena.allocate(n * 2L, 2); + for (long i = 0; i < n; i++) { + dst.setAtIndex(PTypeIO.LE_SHORT, i, getShort(i)); + } + return dst; + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/StructArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/StructArray.java index 8b1a03ed..35fd501b 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/StructArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/StructArray.java @@ -3,6 +3,8 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.VortexException; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; import java.util.List; /// Decoded struct array: holds one [Array] per field, keyed by position. @@ -69,4 +71,15 @@ public Array field(String name) { public Array limited(long rows) { return new StructArray(dtype, rows, fields.stream().map(f -> Array.limited(f, rows)).toList()); } + + /// Always throws: a struct has one segment per field, not a single primary + /// segment. Materialise each [#field(int)] separately. + /// + /// @param arena unused + /// @return never returns + /// @throws VortexException always + @Override + public MemorySegment materialize(SegmentAllocator arena) { + throw new VortexException("StructArray has no primary segment"); + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/UnknownArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/UnknownArray.java index 680c7c56..c7a9dbc5 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/UnknownArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/UnknownArray.java @@ -4,6 +4,7 @@ import io.github.dfa1.vortex.core.VortexException; import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; import java.nio.ByteBuffer; /// Opaque passthrough array for encodings unknown to this reader. @@ -49,4 +50,15 @@ public Array child(int i) { public Array limited(long rows) { throw new VortexException("limit: not supported for undecoded encoding '" + encodingId + "'"); } + + /// Unsupported: an unknown encoding's `buffers` are raw, undecoded bytes with no + /// row-addressable structure, so there is no decoded primary segment. + /// + /// @param arena ignored + /// @return never returns + /// @throws VortexException always + @Override + public MemorySegment materialize(SegmentAllocator arena) { + throw new VortexException("materialize: not supported for undecoded encoding '" + encodingId + "'"); + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/VarBinArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/VarBinArray.java index 54cf714f..f97c8995 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/VarBinArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/VarBinArray.java @@ -78,6 +78,18 @@ public VarBinArray limited(long rows) { /// @return the bytes [MemorySegment] MemorySegment bytesSegment(); + /// Returns the concatenated raw bytes segment directly — the primary data + /// buffer is already materialised, so no copy or allocation is needed. + /// Note this is the data buffer only; the per-row offsets are exposed + /// separately by [OffsetMode#offsetsSegment()]. + /// + /// @param arena unused; the existing bytes segment is returned as-is + /// @return the concatenated raw bytes [MemorySegment] + @Override + default MemorySegment materialize(SegmentAllocator arena) { + return bytesSegment(); + } + /// Returns a copy of the raw bytes for element `i`. /// /// @param i zero-based logical index (must be in `[0, length)`) diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/VariantArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/VariantArray.java index a5fa605e..854d5097 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/VariantArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/VariantArray.java @@ -1,6 +1,10 @@ package io.github.dfa1.vortex.reader.array; import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.VortexException; + +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; /// Decoded `vortex.variant` array: semi-structured data with a logical variant dtype. /// @@ -58,4 +62,16 @@ public Array limited(long rows) { return new VariantArray(dtype, rows, Array.limited(coreStorage, rows), shredded != null ? Array.limited(shredded, rows) : null); } + + /// Always throws: a variant array is core-storage plus optional shredded children, + /// not a single primary segment. Materialise [#coreStorage()] / [#shredded()] + /// separately. + /// + /// @param arena unused + /// @return never returns + /// @throws VortexException always + @Override + public MemorySegment materialize(SegmentAllocator arena) { + throw new VortexException("VariantArray has no primary segment"); + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/decode/DecodeContext.java b/reader/src/main/java/io/github/dfa1/vortex/reader/decode/DecodeContext.java index 06b7687e..5b38ee83 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/decode/DecodeContext.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/decode/DecodeContext.java @@ -2,7 +2,6 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.reader.array.Array; -import io.github.dfa1.vortex.reader.array.ArraySegments; import io.github.dfa1.vortex.reader.ReadRegistry; import java.lang.foreign.MemorySegment; @@ -89,7 +88,7 @@ public MemorySegment decodeChildSegment(int i, DType dtype, long rowCount) { /// @param arr the decoded array to materialise /// @return the array's primary [MemorySegment] public MemorySegment materialize(Array arr) { - return ArraySegments.of(arr, arena); + return arr.materialize(arena); } /// Returns the buffer at position `i` in this node's bufferIndices. diff --git a/reader/src/test/java/io/github/dfa1/vortex/reader/array/ArrayMaterializeTest.java b/reader/src/test/java/io/github/dfa1/vortex/reader/array/ArrayMaterializeTest.java new file mode 100644 index 00000000..fe2564f6 --- /dev/null +++ b/reader/src/test/java/io/github/dfa1/vortex/reader/array/ArrayMaterializeTest.java @@ -0,0 +1,323 @@ +package io.github.dfa1.vortex.reader.array; + +import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.VortexException; +import io.github.dfa1.vortex.encoding.PTypeIO; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; + +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.math.BigDecimal; +import java.util.List; + +import static io.github.dfa1.vortex.encoding.DTypes.F64; +import static io.github.dfa1.vortex.encoding.DTypes.I64; +import static io.github.dfa1.vortex.reader.array.TestArrays.bools; +import static io.github.dfa1.vortex.reader.array.TestArrays.bytes; +import static io.github.dfa1.vortex.reader.array.TestArrays.longs; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/// Tests the [Array#materialize(java.lang.foreign.SegmentAllocator)] contract: +/// the zero-copy buffer return on segment-backed arrays, the scalar/bitmap-packing +/// fallbacks on the primitive interfaces, the inlined `Lazy*` decode formulas, the +/// composite concat/gather paths, and the explicit rejection on array families with +/// no primary segment. +class ArrayMaterializeTest { + + private final Arena arena = Arena.ofAuto(); + + @Nested + class ZeroCopy { + + @Test + void materializedLongReturnsBackingBufferWithoutCopy() { + // Given a buffer-backed long array + MaterializedLongArray sut = (MaterializedLongArray) longs(1L, 2L, 3L); + + // When + MemorySegment result = sut.materialize(arena); + + // Then the exact backing segment is handed back — no allocation, no copy + assertThat(result).isSameAs(sut.buffer()); + } + + @Test + void materializedBoolReturnsBackingBitmapWithoutCopy() { + // Given a buffer-backed bool array (already an LSB-first bitmap) + MaterializedBoolArray sut = (MaterializedBoolArray) bools(true, false, true); + + // When + MemorySegment result = sut.materialize(arena); + + // Then + assertThat(result).isSameAs(sut.buffer()); + } + } + + @Nested + class ScalarFallback { + + @Test + void longViewDecodesEveryElementThroughGetLong() { + // Given an OffsetLongArray view (uses the LongArray default, not a buffer return) + Array sut = longs(10L, 20L, 30L, 40L).limited(3); + + // When + MemorySegment result = sut.materialize(arena); + + // Then values come back little-endian in order + assertThat(result.byteSize()).isEqualTo(3 * 8L); + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 0)).isEqualTo(10L); + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 2)).isEqualTo(30L); + } + + @Test + void boolViewPacksLsbFirstBitmap() { + // Given a bool view — exercises the BoolArray packing default (lazy bool + // previously had no materialize path at all). Pattern picks bits in two + // different bytes to catch byte-index / shift mistakes. + Array sut = bools(true, false, false, false, false, false, false, false, true).limited(9); + + // When + MemorySegment result = sut.materialize(arena); + + // Then 9 bits need 2 bytes; only positions 0 and 8 are set + assertThat(result.byteSize()).isEqualTo(2L); + assertThat(bit(result, 0)).isTrue(); + assertThat(bit(result, 1)).isFalse(); + assertThat(bit(result, 7)).isFalse(); + assertThat(bit(result, 8)).isTrue(); + } + } + + @Nested + class VectorizedLazy { + + @Test + void frameOfReferenceAddsReference() { + // Given encoded [1,2,3] with ref 100 + LazyForLongArray sut = new LazyForLongArray(I64, 3, encodedLongs(1L, 2L, 3L), 100L); + + // When + MemorySegment result = sut.materialize(arena); + + // Then each element is decoded + ref + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 0)).isEqualTo(101L); + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 2)).isEqualTo(103L); + } + + @Test + void zigzagDecodesSignedZigzagPattern() { + // Given zigzag-encoded [0,1,2,3] -> decoded [0,-1,1,-2] + LazyZigZagLongArray sut = new LazyZigZagLongArray(I64, 4, encodedLongs(0L, 1L, 2L, 3L)); + + // When + MemorySegment result = sut.materialize(arena); + + // Then + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 0)).isEqualTo(0L); + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 1)).isEqualTo(-1L); + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 2)).isEqualTo(1L); + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 3)).isEqualTo(-2L); + } + + @Test + void alpAppliesBothFactors() { + // Given encoded i64 [1,2,3] with unit factors -> doubles [1.0,2.0,3.0] + LazyAlpDoubleArray sut = new LazyAlpDoubleArray(F64, 3, encodedLongs(1L, 2L, 3L), 1.0, 1.0); + + // When + MemorySegment result = sut.materialize(arena); + + // Then + assertThat(result.getAtIndex(PTypeIO.LE_DOUBLE, 0)).isEqualTo(1.0); + assertThat(result.getAtIndex(PTypeIO.LE_DOUBLE, 2)).isEqualTo(3.0); + } + } + + @Nested + class Composite { + + @Test + void chunkedConcatenatesChildrenInOrder() { + // Given two chunks [0,1,2][3,4] + ChunkedLongArray sut = ChunkedLongArray.of(I64, 5, + List.of(longs(0L, 1L, 2L), longs(3L, 4L))); + + // When + MemorySegment result = sut.materialize(arena); + + // Then one contiguous segment spanning both chunks + assertThat(result.byteSize()).isEqualTo(5 * 8L); + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 0)).isEqualTo(0L); + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 3)).isEqualTo(3L); + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 4)).isEqualTo(4L); + } + + @Test + void dictGathersOneValuePerCode() { + // Given dictionary [10,20] with byte codes [0,1,0] + DictLongArray sut = DictLongArray.of(I64, 3, longs(10L, 20L), bytes((byte) 0, (byte) 1, (byte) 0)); + + // When + MemorySegment result = sut.materialize(arena); + + // Then each row resolves to values[code] + assertThat(result.byteSize()).isEqualTo(3 * 8L); + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 0)).isEqualTo(10L); + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 1)).isEqualTo(20L); + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 2)).isEqualTo(10L); + } + } + + @Nested + class Decimal { + + @Test + void constantDecimalFillsValueEveryRow() { + // Given the constant unscaled mantissa 12345 (scale 2) at 8-byte width over 3 rows + DType.Decimal dtype = new DType.Decimal((byte) 10, (byte) 2, false); + LazyConstantDecimalArray sut = + new LazyConstantDecimalArray(dtype, 3, new BigDecimal("123.45"), 8); + + // When + MemorySegment result = sut.materialize(arena); + + // Then every row holds the same little-endian mantissa + assertThat(result.byteSize()).isEqualTo(3 * 8L); + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 0)).isEqualTo(12345L); + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 2)).isEqualTo(12345L); + } + } + + @Nested + class Masked { + + @Test + void delegatesToInnerDataIgnoringMask() { + // Given a masked array whose inner payload is a plain long array + MaskedArray sut = new MaskedArray(longs(7L, 8L, 9L), null); + + // When + MemorySegment result = sut.materialize(arena); + + // Then the inner data segment is returned (validity is not surfaced here) + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 0)).isEqualTo(7L); + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 2)).isEqualTo(9L); + } + } + + @Nested + class Unsupported { + + @Test + void nullArrayThrows() { + // Given an all-null column (row count only, no data buffer) + NullArray sut = new NullArray(new DType.Null(true), 3); + + // When / Then + assertThatThrownBy(() -> sut.materialize(arena)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("no primary segment"); + } + + @Test + void structArrayThrows() { + // Given a two-field struct + DType.Struct dtype = new DType.Struct(List.of("a", "b"), List.of(I64, I64), false); + StructArray sut = new StructArray(dtype, 2, List.of(longs(1L, 2L), longs(3L, 4L))); + + // When / Then + assertThatThrownBy(() -> sut.materialize(arena)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("no primary segment"); + } + + @Test + void listArrayThrows() { + // Given a list array (offsets + flat elements child) + DType.List dtype = new DType.List(I64, false); + ListArray sut = new ListArray(dtype, 2, longs(1L, 2L, 3L), longs(0L, 2L, 3L)); + + // When / Then + assertThatThrownBy(() -> sut.materialize(arena)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("no primary segment"); + } + + @Test + void listViewArrayThrows() { + // Given a list-view array (offsets + sizes + flat elements child) + DType.List dtype = new DType.List(I64, false); + ListViewArray sut = new ListViewArray(dtype, 2, longs(1L, 2L, 3L), longs(0L, 2L), longs(2L, 1L)); + + // When / Then + assertThatThrownBy(() -> sut.materialize(arena)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("no primary segment"); + } + + @Test + void fixedSizeListArrayThrows() { + // Given a fixed-size list (wraps a flat elements child) + DType.FixedSizeList dtype = new DType.FixedSizeList(I64, 2, false); + FixedSizeListArray sut = new FixedSizeListArray(dtype, 2, longs(1L, 2L, 3L, 4L)); + + // When / Then + assertThatThrownBy(() -> sut.materialize(arena)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("no primary segment"); + } + + @Test + void variantArrayThrows() { + // Given a variant array (core storage + optional shredded children) + VariantArray sut = new VariantArray(I64, 2, longs(1L, 2L), null); + + // When / Then + assertThatThrownBy(() -> sut.materialize(arena)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("no primary segment"); + } + + @Test + void bytePartsDecimalThrows() { + // Given the byte-parts decimal layout (reassembled from a child column on demand) + DType.Decimal dtype = new DType.Decimal((byte) 10, (byte) 2, false); + LazyDecimalBytePartsArray sut = new LazyDecimalBytePartsArray(dtype, 2, longs(1L, 2L)); + + // When / Then + assertThatThrownBy(() -> sut.materialize(arena)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("no primary segment"); + } + + @Test + void unknownArrayThrows() { + // Given an undecoded foreign encoding + UnknownArray sut = new UnknownArray("vortex.mystery", I64, 3, null, + new MemorySegment[0], new Array[0]); + + // When / Then + assertThatThrownBy(() -> sut.materialize(arena)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("vortex.mystery"); + } + } + + private MemorySegment encodedLongs(long... vs) { + MemorySegment seg = arena.allocate(vs.length * 8L, 8); + for (int i = 0; i < vs.length; i++) { + seg.setAtIndex(PTypeIO.LE_LONG, i, vs[i]); + } + return seg; + } + + private static boolean bit(MemorySegment seg, long i) { + byte b = seg.get(ValueLayout.JAVA_BYTE, i >>> 3); + return ((b & 0xff) & (1 << (i & 7))) != 0; + } +} diff --git a/writer/src/test/java/io/github/dfa1/vortex/writer/DictEncodingTest.java b/writer/src/test/java/io/github/dfa1/vortex/writer/DictEncodingTest.java index 495eb420..bb30294c 100644 --- a/writer/src/test/java/io/github/dfa1/vortex/writer/DictEncodingTest.java +++ b/writer/src/test/java/io/github/dfa1/vortex/writer/DictEncodingTest.java @@ -3,7 +3,6 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.PType; import io.github.dfa1.vortex.reader.array.Array; -import io.github.dfa1.vortex.reader.array.ArraySegments; import io.github.dfa1.vortex.writer.encode.DictEncodingEncoder; import io.github.dfa1.vortex.reader.ReadRegistry; import io.github.dfa1.vortex.reader.VortexReader; @@ -102,19 +101,19 @@ void roundTrip_multipleChunks(@TempDir Path tmp) throws IOException { try (Chunk c1 = iter.next()) { Array a1 = c1.columns().get("category"); assertThat(a1.length()).isEqualTo(3L); - assertThat(ArraySegments.of(a1, Arena.ofAuto()).get(LE_INT, 0)).isEqualTo(10); - assertThat(ArraySegments.of(a1, Arena.ofAuto()).get(LE_INT, 4)).isEqualTo(20); - assertThat(ArraySegments.of(a1, Arena.ofAuto()).get(LE_INT, 8)).isEqualTo(10); + assertThat(a1.materialize(Arena.ofAuto()).get(LE_INT, 0)).isEqualTo(10); + assertThat(a1.materialize(Arena.ofAuto()).get(LE_INT, 4)).isEqualTo(20); + assertThat(a1.materialize(Arena.ofAuto()).get(LE_INT, 8)).isEqualTo(10); } assertThat(iter.hasNext()).isTrue(); try (Chunk c2 = iter.next()) { Array a2 = c2.columns().get("category"); assertThat(a2.length()).isEqualTo(4L); - assertThat(ArraySegments.of(a2, Arena.ofAuto()).get(LE_INT, 0)).isEqualTo(30); - assertThat(ArraySegments.of(a2, Arena.ofAuto()).get(LE_INT, 4)).isEqualTo(10); - assertThat(ArraySegments.of(a2, Arena.ofAuto()).get(LE_INT, 8)).isEqualTo(20); - assertThat(ArraySegments.of(a2, Arena.ofAuto()).get(LE_INT, 12)).isEqualTo(30); + assertThat(a2.materialize(Arena.ofAuto()).get(LE_INT, 0)).isEqualTo(30); + assertThat(a2.materialize(Arena.ofAuto()).get(LE_INT, 4)).isEqualTo(10); + assertThat(a2.materialize(Arena.ofAuto()).get(LE_INT, 8)).isEqualTo(20); + assertThat(a2.materialize(Arena.ofAuto()).get(LE_INT, 12)).isEqualTo(30); } assertThat(iter.hasNext()).isFalse(); diff --git a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/BitpackedConstantPatchesBroadcastTest.java b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/BitpackedConstantPatchesBroadcastTest.java index bc1dfd2a..e1990c78 100644 --- a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/BitpackedConstantPatchesBroadcastTest.java +++ b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/BitpackedConstantPatchesBroadcastTest.java @@ -3,7 +3,6 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.PType; import io.github.dfa1.vortex.reader.array.Array; -import io.github.dfa1.vortex.reader.array.ArraySegments; import io.github.dfa1.vortex.reader.decode.ArrayNode; import io.github.dfa1.vortex.reader.decode.DecodeContext; import io.github.dfa1.vortex.encoding.EncodingId; @@ -73,7 +72,7 @@ void bitpackedDecode_withConstantPatchesValues_broadcastsValueAcrossPatches() { // Then assertThat(result.length()).isEqualTo(n); - MemorySegment data = ArraySegments.of(result, Arena.ofAuto()); + MemorySegment data = result.materialize(Arena.ofAuto()); assertThat(data.getAtIndex(PTypeIO.LE_LONG, 2)).isEqualTo(constantPatchValue); for (long i = 0; i < n; i++) { if (i == 2) { diff --git a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/BitpackedEncodingEncoderTest.java b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/BitpackedEncodingEncoderTest.java index 98c2a279..dafe10cd 100644 --- a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/BitpackedEncodingEncoderTest.java +++ b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/BitpackedEncodingEncoderTest.java @@ -1,7 +1,6 @@ package io.github.dfa1.vortex.writer.encode; import io.github.dfa1.vortex.reader.array.Array; -import io.github.dfa1.vortex.reader.array.ArraySegments; import io.github.dfa1.vortex.encoding.DTypes; import io.github.dfa1.vortex.reader.decode.DecodeContext; @@ -56,7 +55,7 @@ void encodeDecode_u32_isLossless(String name, int[] data) { assertThat(result.length()).isEqualTo(data.length); for (int i = 0; i < data.length; i++) { - assertThat(ArraySegments.of(result, Arena.ofAuto()).get(PTypeIO.LE_INT, (long) i * 4)).as("index %d", i).isEqualTo(data[i]); + assertThat(result.materialize(Arena.ofAuto()).get(PTypeIO.LE_INT, (long) i * 4)).as("index %d", i).isEqualTo(data[i]); } } @@ -69,7 +68,7 @@ void encodeDecode_u64_isLossless(String name, long[] data) { assertThat(result.length()).isEqualTo(data.length); for (int i = 0; i < data.length; i++) { - assertThat(ArraySegments.of(result, Arena.ofAuto()).get(PTypeIO.LE_LONG, (long) i * 8)).as("index %d", i).isEqualTo(data[i]); + assertThat(result.materialize(Arena.ofAuto()).get(PTypeIO.LE_LONG, (long) i * 8)).as("index %d", i).isEqualTo(data[i]); } } diff --git a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/BitpackedEncodingPatchesTest.java b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/BitpackedEncodingPatchesTest.java index dd57a9fe..510b320c 100644 --- a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/BitpackedEncodingPatchesTest.java +++ b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/BitpackedEncodingPatchesTest.java @@ -1,7 +1,6 @@ package io.github.dfa1.vortex.writer.encode; import io.github.dfa1.vortex.reader.array.Array; -import io.github.dfa1.vortex.reader.array.ArraySegments; import io.github.dfa1.vortex.reader.decode.ArrayNode; import io.github.dfa1.vortex.encoding.DTypes; import io.github.dfa1.vortex.reader.decode.DecodeContext; @@ -74,11 +73,11 @@ void decode_appliesPatches_overridingBitPackedValues() { // Then assertThat(result.length()).isEqualTo(base.length); - assertThat(ArraySegments.of(result, Arena.ofAuto()).get(PTypeIO.LE_INT, 0L)).isEqualTo(10); - assertThat(ArraySegments.of(result, Arena.ofAuto()).get(PTypeIO.LE_INT, 4L)).isEqualTo(777); - assertThat(ArraySegments.of(result, Arena.ofAuto()).get(PTypeIO.LE_INT, 8L)).isEqualTo(30); - assertThat(ArraySegments.of(result, Arena.ofAuto()).get(PTypeIO.LE_INT, 12L)).isEqualTo(999); - assertThat(ArraySegments.of(result, Arena.ofAuto()).get(PTypeIO.LE_INT, 16L)).isEqualTo(50); + assertThat(result.materialize(Arena.ofAuto()).get(PTypeIO.LE_INT, 0L)).isEqualTo(10); + assertThat(result.materialize(Arena.ofAuto()).get(PTypeIO.LE_INT, 4L)).isEqualTo(777); + assertThat(result.materialize(Arena.ofAuto()).get(PTypeIO.LE_INT, 8L)).isEqualTo(30); + assertThat(result.materialize(Arena.ofAuto()).get(PTypeIO.LE_INT, 12L)).isEqualTo(999); + assertThat(result.materialize(Arena.ofAuto()).get(PTypeIO.LE_INT, 16L)).isEqualTo(50); } } @@ -119,7 +118,7 @@ void encode_thenDecode_roundTripsWithPatches() { assertThat(decoded.length()).isEqualTo(codes.length); for (int i = 0; i < codes.length; i++) { - byte got = ArraySegments.of(decoded, Arena.ofAuto()).get(java.lang.foreign.ValueLayout.JAVA_BYTE, i); + byte got = decoded.materialize(Arena.ofAuto()).get(java.lang.foreign.ValueLayout.JAVA_BYTE, i); assertThat(got).as("idx " + i).isEqualTo(codes[i]); } } diff --git a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/BoolEncodingEncoderTest.java b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/BoolEncodingEncoderTest.java index 40261b1a..fa0118b9 100644 --- a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/BoolEncodingEncoderTest.java +++ b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/BoolEncodingEncoderTest.java @@ -1,7 +1,6 @@ package io.github.dfa1.vortex.writer.encode; import io.github.dfa1.vortex.reader.array.Array; -import io.github.dfa1.vortex.reader.array.ArraySegments; import io.github.dfa1.vortex.reader.array.BoolArray; import io.github.dfa1.vortex.encoding.DTypes; import io.github.dfa1.vortex.reader.decode.DecodeContext; @@ -50,7 +49,7 @@ void encodeDecode_isLossless(boolean[] data) { assertThat(result).isInstanceOf(BoolArray.class); assertThat(result.length()).isEqualTo(data.length); for (int i = 0; i < data.length; i++) { - byte byteVal = ArraySegments.of(result, Arena.ofAuto()).get(ValueLayout.JAVA_BYTE, i / 8); + byte byteVal = result.materialize(Arena.ofAuto()).get(ValueLayout.JAVA_BYTE, i / 8); boolean decoded = ((byteVal >>> (i % 8)) & 1) == 1; assertThat(decoded).as("index %d", i).isEqualTo(data[i]); } diff --git a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/DecimalEncodingEncoderTest.java b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/DecimalEncodingEncoderTest.java index 436ee800..7f66ba7a 100644 --- a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/DecimalEncodingEncoderTest.java +++ b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/DecimalEncodingEncoderTest.java @@ -4,7 +4,6 @@ import io.github.dfa1.vortex.core.PType; import io.github.dfa1.vortex.core.VortexException; import io.github.dfa1.vortex.reader.array.Array; -import io.github.dfa1.vortex.reader.array.ArraySegments; import io.github.dfa1.vortex.reader.decode.DecodeContext; import io.github.dfa1.vortex.encoding.PTypeIO; @@ -44,7 +43,7 @@ void roundTrip_i64Precision_preservesBuffer() { // Then assertThat(result.length()).isEqualTo(values.length); for (int i = 0; i < values.length; i++) { - assertThat(ArraySegments.of(result, Arena.ofAuto()).get(PTypeIO.LE_LONG, (long) i * 8)).isEqualTo(values[i]); + assertThat(result.materialize(Arena.ofAuto()).get(PTypeIO.LE_LONG, (long) i * 8)).isEqualTo(values[i]); } } diff --git a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/DeltaEncodingEncoderTest.java b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/DeltaEncodingEncoderTest.java index 0f0f0243..604edec4 100644 --- a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/DeltaEncodingEncoderTest.java +++ b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/DeltaEncodingEncoderTest.java @@ -1,7 +1,6 @@ package io.github.dfa1.vortex.writer.encode; import io.github.dfa1.vortex.reader.array.Array; -import io.github.dfa1.vortex.reader.array.ArraySegments; import io.github.dfa1.vortex.encoding.DTypes; import io.github.dfa1.vortex.reader.decode.DecodeContext; @@ -70,7 +69,7 @@ void encodeDecode_i64_isLossless(long[] data) { // Then assertThat(result.length()).isEqualTo(data.length); for (int i = 0; i < data.length; i++) { - assertThat(ArraySegments.of(result, Arena.ofAuto()).get(PTypeIO.LE_LONG, (long) i * 8)).as("index %d", i).isEqualTo(data[i]); + assertThat(result.materialize(Arena.ofAuto()).get(PTypeIO.LE_LONG, (long) i * 8)).as("index %d", i).isEqualTo(data[i]); } } @@ -87,7 +86,7 @@ void encodeDecode_i32_isLossless(int[] data) { // Then assertThat(result.length()).isEqualTo(data.length); for (int i = 0; i < data.length; i++) { - assertThat(ArraySegments.of(result, Arena.ofAuto()).get(PTypeIO.LE_INT, (long) i * 4)).as("index %d", i).isEqualTo(data[i]); + assertThat(result.materialize(Arena.ofAuto()).get(PTypeIO.LE_INT, (long) i * 4)).as("index %d", i).isEqualTo(data[i]); } } @@ -104,7 +103,7 @@ void encodeDecode_monotoneI64_isLossless(String name, long[] data) { // Then assertThat(result.length()).isEqualTo(data.length); for (int i = 0; i < data.length; i++) { - assertThat(ArraySegments.of(result, Arena.ofAuto()).get(PTypeIO.LE_LONG, (long) i * 8)).as("index %d", i).isEqualTo(data[i]); + assertThat(result.materialize(Arena.ofAuto()).get(PTypeIO.LE_LONG, (long) i * 8)).as("index %d", i).isEqualTo(data[i]); } } diff --git a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/DictEncodingEncoderTest.java b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/DictEncodingEncoderTest.java index 9072ff81..d358a2ce 100644 --- a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/DictEncodingEncoderTest.java +++ b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/DictEncodingEncoderTest.java @@ -1,7 +1,6 @@ package io.github.dfa1.vortex.writer.encode; import io.github.dfa1.vortex.reader.array.Array; -import io.github.dfa1.vortex.reader.array.ArraySegments; import io.github.dfa1.vortex.reader.array.VarBinArray; import io.github.dfa1.vortex.encoding.DTypes; import io.github.dfa1.vortex.reader.decode.DecodeContext; @@ -97,7 +96,7 @@ void encodeDecode_i32_isLossless(int[] data) { // Then assertThat(result.length()).isEqualTo(data.length); for (int i = 0; i < data.length; i++) { - assertThat(ArraySegments.of(result, Arena.ofAuto()).get(PTypeIO.LE_INT, (long) i * 4)).as("index %d", i).isEqualTo(data[i]); + assertThat(result.materialize(Arena.ofAuto()).get(PTypeIO.LE_INT, (long) i * 4)).as("index %d", i).isEqualTo(data[i]); } } @@ -114,7 +113,7 @@ void encodeDecode_i64_isLossless(long[] data) { // Then assertThat(result.length()).isEqualTo(data.length); for (int i = 0; i < data.length; i++) { - assertThat(ArraySegments.of(result, Arena.ofAuto()).get(PTypeIO.LE_LONG, (long) i * 8)).as("index %d", i).isEqualTo(data[i]); + assertThat(result.materialize(Arena.ofAuto()).get(PTypeIO.LE_LONG, (long) i * 8)).as("index %d", i).isEqualTo(data[i]); } } diff --git a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/MaskedEncodingEncoderTest.java b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/MaskedEncodingEncoderTest.java index 5c513066..fef16f35 100644 --- a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/MaskedEncodingEncoderTest.java +++ b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/MaskedEncodingEncoderTest.java @@ -4,7 +4,6 @@ import io.github.dfa1.vortex.core.PType; import io.github.dfa1.vortex.core.VortexException; import io.github.dfa1.vortex.reader.array.Array; -import io.github.dfa1.vortex.reader.array.ArraySegments; import io.github.dfa1.vortex.reader.array.IntArray; import io.github.dfa1.vortex.reader.array.MaskedArray; @@ -117,9 +116,9 @@ void inner_containsChildValues() { IntArray inner = (IntArray) result.inner(); // Then - assertThat(ArraySegments.of(inner, Arena.ofAuto()).get(PTypeIO.LE_INT, 0L)).isEqualTo(7); - assertThat(ArraySegments.of(inner, Arena.ofAuto()).get(PTypeIO.LE_INT, 4L)).isEqualTo(8); - assertThat(ArraySegments.of(inner, Arena.ofAuto()).get(PTypeIO.LE_INT, 8L)).isEqualTo(9); + assertThat(inner.materialize(Arena.ofAuto()).get(PTypeIO.LE_INT, 0L)).isEqualTo(7); + assertThat(inner.materialize(Arena.ofAuto()).get(PTypeIO.LE_INT, 4L)).isEqualTo(8); + assertThat(inner.materialize(Arena.ofAuto()).get(PTypeIO.LE_INT, 8L)).isEqualTo(9); } @Test diff --git a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/PcoEncodingEncoderTest.java b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/PcoEncodingEncoderTest.java index 274041aa..4d46f9ae 100644 --- a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/PcoEncodingEncoderTest.java +++ b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/PcoEncodingEncoderTest.java @@ -5,7 +5,6 @@ import io.github.dfa1.vortex.encoding.PTypeIO; import io.github.dfa1.vortex.reader.ReadRegistry; import io.github.dfa1.vortex.reader.array.Array; -import io.github.dfa1.vortex.reader.array.ArraySegments; import io.github.dfa1.vortex.reader.decode.DecodeContext; import io.github.dfa1.vortex.reader.decode.PcoEncodingDecoder; import io.github.dfa1.vortex.reader.decode.TestRegistry; @@ -133,7 +132,7 @@ void encodeDecode_i64_isLossless(String name, long[] data) { // Then assertThat(result.length()).isEqualTo(data.length); for (int i = 0; i < data.length; i++) { - assertThat(ArraySegments.of(result, Arena.ofAuto()).get(PTypeIO.LE_LONG, (long) i * 8)).as("index %d", i).isEqualTo(data[i]); + assertThat(result.materialize(Arena.ofAuto()).get(PTypeIO.LE_LONG, (long) i * 8)).as("index %d", i).isEqualTo(data[i]); } } @@ -150,7 +149,7 @@ void encodeDecode_u64_isLossless(String name, long[] data) { // Then assertThat(result.length()).isEqualTo(data.length); for (int i = 0; i < data.length; i++) { - assertThat(ArraySegments.of(result, Arena.ofAuto()).get(PTypeIO.LE_LONG, (long) i * 8)).as("index %d", i).isEqualTo(data[i]); + assertThat(result.materialize(Arena.ofAuto()).get(PTypeIO.LE_LONG, (long) i * 8)).as("index %d", i).isEqualTo(data[i]); } } @@ -167,7 +166,7 @@ void encodeDecode_i32_isLossless(String name, int[] data) { // Then assertThat(result.length()).isEqualTo(data.length); for (int i = 0; i < data.length; i++) { - assertThat(ArraySegments.of(result, Arena.ofAuto()).get(PTypeIO.LE_INT, (long) i * 4)).as("index %d", i).isEqualTo(data[i]); + assertThat(result.materialize(Arena.ofAuto()).get(PTypeIO.LE_INT, (long) i * 4)).as("index %d", i).isEqualTo(data[i]); } } @@ -184,7 +183,7 @@ void encodeDecode_u32_isLossless(String name, int[] data) { // Then assertThat(result.length()).isEqualTo(data.length); for (int i = 0; i < data.length; i++) { - assertThat(ArraySegments.of(result, Arena.ofAuto()).get(PTypeIO.LE_INT, (long) i * 4)).as("index %d", i).isEqualTo(data[i]); + assertThat(result.materialize(Arena.ofAuto()).get(PTypeIO.LE_INT, (long) i * 4)).as("index %d", i).isEqualTo(data[i]); } } @@ -201,7 +200,7 @@ void encodeDecode_i16_isLossless(String name, short[] data) { // Then assertThat(result.length()).isEqualTo(data.length); for (int i = 0; i < data.length; i++) { - assertThat(ArraySegments.of(result, Arena.ofAuto()).get(PTypeIO.LE_SHORT, (long) i * 2)).as("index %d", i).isEqualTo(data[i]); + assertThat(result.materialize(Arena.ofAuto()).get(PTypeIO.LE_SHORT, (long) i * 2)).as("index %d", i).isEqualTo(data[i]); } } @@ -218,7 +217,7 @@ void encodeDecode_u16_isLossless(String name, short[] data) { // Then assertThat(result.length()).isEqualTo(data.length); for (int i = 0; i < data.length; i++) { - assertThat(ArraySegments.of(result, Arena.ofAuto()).get(PTypeIO.LE_SHORT, (long) i * 2)).as("index %d", i).isEqualTo(data[i]); + assertThat(result.materialize(Arena.ofAuto()).get(PTypeIO.LE_SHORT, (long) i * 2)).as("index %d", i).isEqualTo(data[i]); } } @@ -235,7 +234,7 @@ void encodeDecode_f32_isLossless(String name, float[] data) { // Then assertThat(result.length()).isEqualTo(data.length); for (int i = 0; i < data.length; i++) { - assertThat(ArraySegments.of(result, Arena.ofAuto()).get(PTypeIO.LE_FLOAT, (long) i * 4)).as("index %d", i).isEqualTo(data[i]); + assertThat(result.materialize(Arena.ofAuto()).get(PTypeIO.LE_FLOAT, (long) i * 4)).as("index %d", i).isEqualTo(data[i]); } } @@ -252,7 +251,7 @@ void encodeDecode_f64_isLossless(String name, double[] data) { // Then assertThat(result.length()).isEqualTo(data.length); for (int i = 0; i < data.length; i++) { - assertThat(ArraySegments.of(result, Arena.ofAuto()).get(PTypeIO.LE_DOUBLE, (long) i * 8)).as("index %d", i).isEqualTo(data[i]); + assertThat(result.materialize(Arena.ofAuto()).get(PTypeIO.LE_DOUBLE, (long) i * 8)).as("index %d", i).isEqualTo(data[i]); } } diff --git a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/PrimitiveEncodingEncoderTest.java b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/PrimitiveEncodingEncoderTest.java index 81aad4c5..92f145b2 100644 --- a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/PrimitiveEncodingEncoderTest.java +++ b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/PrimitiveEncodingEncoderTest.java @@ -3,7 +3,6 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.PType; import io.github.dfa1.vortex.reader.array.Array; -import io.github.dfa1.vortex.reader.array.ArraySegments; import io.github.dfa1.vortex.reader.array.IntArray; import io.github.dfa1.vortex.reader.array.MaskedArray; import io.github.dfa1.vortex.reader.decode.ArrayNode; @@ -80,7 +79,7 @@ void encodeDecode_i64_isLossless(long[] data) { // Then assertThat(result.length()).isEqualTo(data.length); for (int i = 0; i < data.length; i++) { - assertThat(ArraySegments.of(result, Arena.ofAuto()).get(PTypeIO.LE_LONG, (long) i * 8)).isEqualTo(data[i]); + assertThat(result.materialize(Arena.ofAuto()).get(PTypeIO.LE_LONG, (long) i * 8)).isEqualTo(data[i]); } } @@ -98,7 +97,7 @@ void encodeDecode_i32_isLossless(int[] data) { // Then assertThat(result.length()).isEqualTo(data.length); for (int i = 0; i < data.length; i++) { - assertThat(ArraySegments.of(result, Arena.ofAuto()).get(PTypeIO.LE_INT, (long) i * 4)).isEqualTo(data[i]); + assertThat(result.materialize(Arena.ofAuto()).get(PTypeIO.LE_INT, (long) i * 4)).isEqualTo(data[i]); } } @@ -116,7 +115,7 @@ void encodeDecode_f64_isLossless(double[] data) { // Then assertThat(result.length()).isEqualTo(data.length); for (int i = 0; i < data.length; i++) { - assertThat(ArraySegments.of(result, Arena.ofAuto()).get(PTypeIO.LE_DOUBLE, (long) i * 8)).isEqualTo(data[i]); + assertThat(result.materialize(Arena.ofAuto()).get(PTypeIO.LE_DOUBLE, (long) i * 8)).isEqualTo(data[i]); } } diff --git a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/StructEncodingEncoderTest.java b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/StructEncodingEncoderTest.java index 9bfb341e..69c00f33 100644 --- a/writer/src/test/java/io/github/dfa1/vortex/writer/encode/StructEncodingEncoderTest.java +++ b/writer/src/test/java/io/github/dfa1/vortex/writer/encode/StructEncodingEncoderTest.java @@ -2,7 +2,6 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.reader.array.Array; -import io.github.dfa1.vortex.reader.array.ArraySegments; import io.github.dfa1.vortex.reader.array.LongArray; import io.github.dfa1.vortex.reader.array.MaskedArray; import io.github.dfa1.vortex.reader.array.StructArray; @@ -142,7 +141,7 @@ void decode_nonNullableWrapper_oneChild_returnsValues() { // Then assertThat(result.length()).isEqualTo(data.length); for (int i = 0; i < data.length; i++) { - assertThat(ArraySegments.of(result, Arena.ofAuto()).get(PTypeIO.LE_LONG, (long) i * 8)).isEqualTo(data[i]); + assertThat(result.materialize(Arena.ofAuto()).get(PTypeIO.LE_LONG, (long) i * 8)).isEqualTo(data[i]); } }