|
| 1 | +package io.github.dfa1.vortex.reader.array; |
| 2 | + |
| 3 | +import io.github.dfa1.vortex.core.DType; |
| 4 | +import io.github.dfa1.vortex.core.VortexException; |
| 5 | +import io.github.dfa1.vortex.encoding.PTypeIO; |
| 6 | +import org.junit.jupiter.api.Nested; |
| 7 | +import org.junit.jupiter.api.Test; |
| 8 | + |
| 9 | +import java.lang.foreign.Arena; |
| 10 | +import java.lang.foreign.MemorySegment; |
| 11 | +import java.lang.foreign.ValueLayout; |
| 12 | +import java.math.BigDecimal; |
| 13 | +import java.util.List; |
| 14 | + |
| 15 | +import static io.github.dfa1.vortex.encoding.DTypes.F64; |
| 16 | +import static io.github.dfa1.vortex.encoding.DTypes.I64; |
| 17 | +import static io.github.dfa1.vortex.reader.array.TestArrays.bools; |
| 18 | +import static io.github.dfa1.vortex.reader.array.TestArrays.bytes; |
| 19 | +import static io.github.dfa1.vortex.reader.array.TestArrays.longs; |
| 20 | +import static org.assertj.core.api.Assertions.assertThat; |
| 21 | +import static org.assertj.core.api.Assertions.assertThatThrownBy; |
| 22 | + |
| 23 | +/// Tests the [Array#materialize(java.lang.foreign.SegmentAllocator)] contract: |
| 24 | +/// the zero-copy buffer return on segment-backed arrays, the scalar/bitmap-packing |
| 25 | +/// fallbacks on the primitive interfaces, the inlined `Lazy*` decode formulas, the |
| 26 | +/// composite concat/gather paths, and the explicit rejection on array families with |
| 27 | +/// no primary segment. |
| 28 | +class ArrayMaterializeTest { |
| 29 | + |
| 30 | + private final Arena arena = Arena.ofAuto(); |
| 31 | + |
| 32 | + @Nested |
| 33 | + class ZeroCopy { |
| 34 | + |
| 35 | + @Test |
| 36 | + void materializedLongReturnsBackingBufferWithoutCopy() { |
| 37 | + // Given a buffer-backed long array |
| 38 | + MaterializedLongArray sut = (MaterializedLongArray) longs(1L, 2L, 3L); |
| 39 | + |
| 40 | + // When |
| 41 | + MemorySegment result = sut.materialize(arena); |
| 42 | + |
| 43 | + // Then the exact backing segment is handed back — no allocation, no copy |
| 44 | + assertThat(result).isSameAs(sut.buffer()); |
| 45 | + } |
| 46 | + |
| 47 | + @Test |
| 48 | + void materializedBoolReturnsBackingBitmapWithoutCopy() { |
| 49 | + // Given a buffer-backed bool array (already an LSB-first bitmap) |
| 50 | + MaterializedBoolArray sut = (MaterializedBoolArray) bools(true, false, true); |
| 51 | + |
| 52 | + // When |
| 53 | + MemorySegment result = sut.materialize(arena); |
| 54 | + |
| 55 | + // Then |
| 56 | + assertThat(result).isSameAs(sut.buffer()); |
| 57 | + } |
| 58 | + } |
| 59 | + |
| 60 | + @Nested |
| 61 | + class ScalarFallback { |
| 62 | + |
| 63 | + @Test |
| 64 | + void longViewDecodesEveryElementThroughGetLong() { |
| 65 | + // Given an OffsetLongArray view (uses the LongArray default, not a buffer return) |
| 66 | + Array sut = longs(10L, 20L, 30L, 40L).limited(3); |
| 67 | + |
| 68 | + // When |
| 69 | + MemorySegment result = sut.materialize(arena); |
| 70 | + |
| 71 | + // Then values come back little-endian in order |
| 72 | + assertThat(result.byteSize()).isEqualTo(3 * 8L); |
| 73 | + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 0)).isEqualTo(10L); |
| 74 | + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 2)).isEqualTo(30L); |
| 75 | + } |
| 76 | + |
| 77 | + @Test |
| 78 | + void boolViewPacksLsbFirstBitmap() { |
| 79 | + // Given a bool view — exercises the BoolArray packing default (lazy bool |
| 80 | + // previously had no materialize path at all). Pattern picks bits in two |
| 81 | + // different bytes to catch byte-index / shift mistakes. |
| 82 | + Array sut = bools(true, false, false, false, false, false, false, false, true).limited(9); |
| 83 | + |
| 84 | + // When |
| 85 | + MemorySegment result = sut.materialize(arena); |
| 86 | + |
| 87 | + // Then 9 bits need 2 bytes; only positions 0 and 8 are set |
| 88 | + assertThat(result.byteSize()).isEqualTo(2L); |
| 89 | + assertThat(bit(result, 0)).isTrue(); |
| 90 | + assertThat(bit(result, 1)).isFalse(); |
| 91 | + assertThat(bit(result, 7)).isFalse(); |
| 92 | + assertThat(bit(result, 8)).isTrue(); |
| 93 | + } |
| 94 | + } |
| 95 | + |
| 96 | + @Nested |
| 97 | + class VectorizedLazy { |
| 98 | + |
| 99 | + @Test |
| 100 | + void frameOfReferenceAddsReference() { |
| 101 | + // Given encoded [1,2,3] with ref 100 |
| 102 | + LazyForLongArray sut = new LazyForLongArray(I64, 3, encodedLongs(1L, 2L, 3L), 100L); |
| 103 | + |
| 104 | + // When |
| 105 | + MemorySegment result = sut.materialize(arena); |
| 106 | + |
| 107 | + // Then each element is decoded + ref |
| 108 | + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 0)).isEqualTo(101L); |
| 109 | + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 2)).isEqualTo(103L); |
| 110 | + } |
| 111 | + |
| 112 | + @Test |
| 113 | + void zigzagDecodesSignedZigzagPattern() { |
| 114 | + // Given zigzag-encoded [0,1,2,3] -> decoded [0,-1,1,-2] |
| 115 | + LazyZigZagLongArray sut = new LazyZigZagLongArray(I64, 4, encodedLongs(0L, 1L, 2L, 3L)); |
| 116 | + |
| 117 | + // When |
| 118 | + MemorySegment result = sut.materialize(arena); |
| 119 | + |
| 120 | + // Then |
| 121 | + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 0)).isEqualTo(0L); |
| 122 | + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 1)).isEqualTo(-1L); |
| 123 | + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 2)).isEqualTo(1L); |
| 124 | + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 3)).isEqualTo(-2L); |
| 125 | + } |
| 126 | + |
| 127 | + @Test |
| 128 | + void alpAppliesBothFactors() { |
| 129 | + // Given encoded i64 [1,2,3] with unit factors -> doubles [1.0,2.0,3.0] |
| 130 | + LazyAlpDoubleArray sut = new LazyAlpDoubleArray(F64, 3, encodedLongs(1L, 2L, 3L), 1.0, 1.0); |
| 131 | + |
| 132 | + // When |
| 133 | + MemorySegment result = sut.materialize(arena); |
| 134 | + |
| 135 | + // Then |
| 136 | + assertThat(result.getAtIndex(PTypeIO.LE_DOUBLE, 0)).isEqualTo(1.0); |
| 137 | + assertThat(result.getAtIndex(PTypeIO.LE_DOUBLE, 2)).isEqualTo(3.0); |
| 138 | + } |
| 139 | + } |
| 140 | + |
| 141 | + @Nested |
| 142 | + class Composite { |
| 143 | + |
| 144 | + @Test |
| 145 | + void chunkedConcatenatesChildrenInOrder() { |
| 146 | + // Given two chunks [0,1,2][3,4] |
| 147 | + ChunkedLongArray sut = ChunkedLongArray.of(I64, 5, |
| 148 | + List.of(longs(0L, 1L, 2L), longs(3L, 4L))); |
| 149 | + |
| 150 | + // When |
| 151 | + MemorySegment result = sut.materialize(arena); |
| 152 | + |
| 153 | + // Then one contiguous segment spanning both chunks |
| 154 | + assertThat(result.byteSize()).isEqualTo(5 * 8L); |
| 155 | + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 0)).isEqualTo(0L); |
| 156 | + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 3)).isEqualTo(3L); |
| 157 | + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 4)).isEqualTo(4L); |
| 158 | + } |
| 159 | + |
| 160 | + @Test |
| 161 | + void dictGathersOneValuePerCode() { |
| 162 | + // Given dictionary [10,20] with byte codes [0,1,0] |
| 163 | + DictLongArray sut = DictLongArray.of(I64, 3, longs(10L, 20L), bytes((byte) 0, (byte) 1, (byte) 0)); |
| 164 | + |
| 165 | + // When |
| 166 | + MemorySegment result = sut.materialize(arena); |
| 167 | + |
| 168 | + // Then each row resolves to values[code] |
| 169 | + assertThat(result.byteSize()).isEqualTo(3 * 8L); |
| 170 | + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 0)).isEqualTo(10L); |
| 171 | + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 1)).isEqualTo(20L); |
| 172 | + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 2)).isEqualTo(10L); |
| 173 | + } |
| 174 | + } |
| 175 | + |
| 176 | + @Nested |
| 177 | + class Decimal { |
| 178 | + |
| 179 | + @Test |
| 180 | + void constantDecimalFillsValueEveryRow() { |
| 181 | + // Given the constant unscaled mantissa 12345 (scale 2) at 8-byte width over 3 rows |
| 182 | + DType.Decimal dtype = new DType.Decimal((byte) 10, (byte) 2, false); |
| 183 | + LazyConstantDecimalArray sut = |
| 184 | + new LazyConstantDecimalArray(dtype, 3, new BigDecimal("123.45"), 8); |
| 185 | + |
| 186 | + // When |
| 187 | + MemorySegment result = sut.materialize(arena); |
| 188 | + |
| 189 | + // Then every row holds the same little-endian mantissa |
| 190 | + assertThat(result.byteSize()).isEqualTo(3 * 8L); |
| 191 | + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 0)).isEqualTo(12345L); |
| 192 | + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 2)).isEqualTo(12345L); |
| 193 | + } |
| 194 | + } |
| 195 | + |
| 196 | + @Nested |
| 197 | + class Masked { |
| 198 | + |
| 199 | + @Test |
| 200 | + void delegatesToInnerDataIgnoringMask() { |
| 201 | + // Given a masked array whose inner payload is a plain long array |
| 202 | + MaskedArray sut = new MaskedArray(longs(7L, 8L, 9L), null); |
| 203 | + |
| 204 | + // When |
| 205 | + MemorySegment result = sut.materialize(arena); |
| 206 | + |
| 207 | + // Then the inner data segment is returned (validity is not surfaced here) |
| 208 | + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 0)).isEqualTo(7L); |
| 209 | + assertThat(result.getAtIndex(PTypeIO.LE_LONG, 2)).isEqualTo(9L); |
| 210 | + } |
| 211 | + } |
| 212 | + |
| 213 | + @Nested |
| 214 | + class Unsupported { |
| 215 | + |
| 216 | + @Test |
| 217 | + void nullArrayThrows() { |
| 218 | + // Given an all-null column (row count only, no data buffer) |
| 219 | + NullArray sut = new NullArray(new DType.Null(true), 3); |
| 220 | + |
| 221 | + // When / Then |
| 222 | + assertThatThrownBy(() -> sut.materialize(arena)) |
| 223 | + .isInstanceOf(VortexException.class) |
| 224 | + .hasMessageContaining("no primary segment"); |
| 225 | + } |
| 226 | + |
| 227 | + @Test |
| 228 | + void structArrayThrows() { |
| 229 | + // Given a two-field struct |
| 230 | + DType.Struct dtype = new DType.Struct(List.of("a", "b"), List.of(I64, I64), false); |
| 231 | + StructArray sut = new StructArray(dtype, 2, List.of(longs(1L, 2L), longs(3L, 4L))); |
| 232 | + |
| 233 | + // When / Then |
| 234 | + assertThatThrownBy(() -> sut.materialize(arena)) |
| 235 | + .isInstanceOf(VortexException.class) |
| 236 | + .hasMessageContaining("no primary segment"); |
| 237 | + } |
| 238 | + |
| 239 | + @Test |
| 240 | + void listArrayThrows() { |
| 241 | + // Given a list array (offsets + flat elements child) |
| 242 | + DType.List dtype = new DType.List(I64, false); |
| 243 | + ListArray sut = new ListArray(dtype, 2, longs(1L, 2L, 3L), longs(0L, 2L, 3L)); |
| 244 | + |
| 245 | + // When / Then |
| 246 | + assertThatThrownBy(() -> sut.materialize(arena)) |
| 247 | + .isInstanceOf(VortexException.class) |
| 248 | + .hasMessageContaining("no primary segment"); |
| 249 | + } |
| 250 | + |
| 251 | + @Test |
| 252 | + void listViewArrayThrows() { |
| 253 | + // Given a list-view array (offsets + sizes + flat elements child) |
| 254 | + DType.List dtype = new DType.List(I64, false); |
| 255 | + ListViewArray sut = new ListViewArray(dtype, 2, longs(1L, 2L, 3L), longs(0L, 2L), longs(2L, 1L)); |
| 256 | + |
| 257 | + // When / Then |
| 258 | + assertThatThrownBy(() -> sut.materialize(arena)) |
| 259 | + .isInstanceOf(VortexException.class) |
| 260 | + .hasMessageContaining("no primary segment"); |
| 261 | + } |
| 262 | + |
| 263 | + @Test |
| 264 | + void fixedSizeListArrayThrows() { |
| 265 | + // Given a fixed-size list (wraps a flat elements child) |
| 266 | + DType.FixedSizeList dtype = new DType.FixedSizeList(I64, 2, false); |
| 267 | + FixedSizeListArray sut = new FixedSizeListArray(dtype, 2, longs(1L, 2L, 3L, 4L)); |
| 268 | + |
| 269 | + // When / Then |
| 270 | + assertThatThrownBy(() -> sut.materialize(arena)) |
| 271 | + .isInstanceOf(VortexException.class) |
| 272 | + .hasMessageContaining("no primary segment"); |
| 273 | + } |
| 274 | + |
| 275 | + @Test |
| 276 | + void variantArrayThrows() { |
| 277 | + // Given a variant array (core storage + optional shredded children) |
| 278 | + VariantArray sut = new VariantArray(I64, 2, longs(1L, 2L), null); |
| 279 | + |
| 280 | + // When / Then |
| 281 | + assertThatThrownBy(() -> sut.materialize(arena)) |
| 282 | + .isInstanceOf(VortexException.class) |
| 283 | + .hasMessageContaining("no primary segment"); |
| 284 | + } |
| 285 | + |
| 286 | + @Test |
| 287 | + void bytePartsDecimalThrows() { |
| 288 | + // Given the byte-parts decimal layout (reassembled from a child column on demand) |
| 289 | + DType.Decimal dtype = new DType.Decimal((byte) 10, (byte) 2, false); |
| 290 | + LazyDecimalBytePartsArray sut = new LazyDecimalBytePartsArray(dtype, 2, longs(1L, 2L)); |
| 291 | + |
| 292 | + // When / Then |
| 293 | + assertThatThrownBy(() -> sut.materialize(arena)) |
| 294 | + .isInstanceOf(VortexException.class) |
| 295 | + .hasMessageContaining("no primary segment"); |
| 296 | + } |
| 297 | + |
| 298 | + @Test |
| 299 | + void unknownArrayThrows() { |
| 300 | + // Given an undecoded foreign encoding |
| 301 | + UnknownArray sut = new UnknownArray("vortex.mystery", I64, 3, null, |
| 302 | + new MemorySegment[0], new Array[0]); |
| 303 | + |
| 304 | + // When / Then |
| 305 | + assertThatThrownBy(() -> sut.materialize(arena)) |
| 306 | + .isInstanceOf(VortexException.class) |
| 307 | + .hasMessageContaining("vortex.mystery"); |
| 308 | + } |
| 309 | + } |
| 310 | + |
| 311 | + private MemorySegment encodedLongs(long... vs) { |
| 312 | + MemorySegment seg = arena.allocate(vs.length * 8L, 8); |
| 313 | + for (int i = 0; i < vs.length; i++) { |
| 314 | + seg.setAtIndex(PTypeIO.LE_LONG, i, vs[i]); |
| 315 | + } |
| 316 | + return seg; |
| 317 | + } |
| 318 | + |
| 319 | + private static boolean bit(MemorySegment seg, long i) { |
| 320 | + byte b = seg.get(ValueLayout.JAVA_BYTE, i >>> 3); |
| 321 | + return ((b & 0xff) & (1 << (i & 7))) != 0; |
| 322 | + } |
| 323 | +} |
0 commit comments