Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import io.github.dfa1.vortex.reader.array.FloatArray;
import io.github.dfa1.vortex.reader.array.GenericArray;
import io.github.dfa1.vortex.reader.array.IntArray;
import io.github.dfa1.vortex.reader.array.LazyDecimalBytePartsArray;
import io.github.dfa1.vortex.reader.array.LongArray;
import io.github.dfa1.vortex.reader.array.ShortArray;
import io.github.dfa1.vortex.reader.array.VarBinArray;
Expand Down Expand Up @@ -867,14 +868,16 @@ private static String formatValue(Array array, int i, DType declared) {
? "\"" + a.getString(i) + "\""
: bytesToShortHex(a.getBytes(i));
case GenericArray a when a.dtype() instanceof DType.Decimal ->
tryDecimal(a, i);
tryDecimal(a::getDecimal, a, i);
case LazyDecimalBytePartsArray a -> tryDecimal(a::getDecimal, a, i);
default -> "<" + array.getClass().getSimpleName() + " " + array.dtype() + ">";
};
}

private static String tryDecimal(GenericArray a, int i) {
private static String tryDecimal(java.util.function.LongFunction<java.math.BigDecimal> reader,
Array a, int i) {
try {
return a.getDecimal(i).toPlainString();
return reader.apply(i).toPlainString();
} catch (RuntimeException e) {
String msg = e.getMessage();
if (msg != null && msg.contains("null cell")) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import io.github.dfa1.vortex.reader.array.FloatArray;
import io.github.dfa1.vortex.reader.array.GenericArray;
import io.github.dfa1.vortex.reader.array.IntArray;
import io.github.dfa1.vortex.reader.array.LazyDecimalBytePartsArray;
import io.github.dfa1.vortex.reader.array.LongArray;
import io.github.dfa1.vortex.reader.array.MaskedArray;
import io.github.dfa1.vortex.reader.array.MaterializedBoolArray;
Expand Down Expand Up @@ -279,6 +280,8 @@ private static Array truncateArray(Array arr, long rows, SegmentAllocator arena)
}
case EmptyArray a -> a;
case GenericArray a -> a.withLength(rows);
case LazyDecimalBytePartsArray a ->
new LazyDecimalBytePartsArray(a.dtype(), rows, truncateArray(a.msp(), rows, arena));
default ->
throw new VortexException("limit: truncation not supported for " + arr.getClass().getSimpleName());
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
/// is tied to the `VortexFile`'s Arena.
public sealed interface Array
permits BoolArray, ByteArray, DoubleArray, EmptyArray, FixedSizeListArray, Float16Array,
FloatArray, GenericArray, IntArray, ListArray, ListViewArray, LongArray,
MaskedArray, NullArray, ShortArray, StructArray, UnknownArray, VarBinArray,
VariantArray {
FloatArray, GenericArray, IntArray, LazyDecimalBytePartsArray, ListArray,
ListViewArray, LongArray, MaskedArray, NullArray, ShortArray, StructArray,
UnknownArray, VarBinArray, VariantArray {

/// Returns the number of elements in this array.
///
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package io.github.dfa1.vortex.reader.array;

import io.github.dfa1.vortex.core.VortexException;

import java.math.BigInteger;

/// Package-private helper for the {@link LazyDecimalBytePartsArray} record.
///
/// `vortex.decimal_byte_parts` with `lower_part_count == 0` stores the
/// decimal mantissa as a single signed-integer child column whose ptype the
/// encoder picks (one of `i8 / i16 / i32 / i64`). The child may be wrapped
/// in {@link MaskedArray} for nullable columns. {@link #readMantissa(Array, long)}
/// centralises the per-row dispatch so the record itself stays compact.
final class DecimalBytePartsArrays {

private DecimalBytePartsArrays() {
}

/// Reads `arr[i]` as a signed-magnitude {@link BigInteger} mantissa.
/// Recurses through {@link MaskedArray}; throws on null cells so callers
/// don't silently get a zero mantissa for invalid rows.
///
/// @param arr source typed Array (must be one of Byte/Short/Int/Long, optionally MaskedArray-wrapped)
/// @param i row index
/// @return cell value as a {@link BigInteger}
/// @throws VortexException for null cells or unsupported array types
static BigInteger readMantissa(Array arr, long i) {
return switch (arr) {
case ByteArray a -> BigInteger.valueOf(a.getByte(i));
case ShortArray a -> BigInteger.valueOf(a.getShort(i));
case IntArray a -> BigInteger.valueOf(a.getInt(i));
case LongArray a -> BigInteger.valueOf(a.getLong(i));
case MaskedArray a -> {
if (!a.isValid(i)) {
throw new VortexException("DecimalByteParts: null cell at index " + i);
}
yield readMantissa(a.inner(), i);
}
default -> throw new VortexException(
"DecimalByteParts: unsupported mantissa child type: " + arr.getClass().getSimpleName());
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -76,25 +76,21 @@ MemorySegment buffer(int i) {
return buffers[i];
}

/// Decodes the decimal value at row `i`.
/// Decodes the decimal value at row `i` from a single-buffer layout.
///
/// Handles the two shapes produced by Vortex decimal decoders:
/// The buffer holds one little-endian two's-complement integer per row. Element
/// width is derived from the buffer's byte size divided by {@link #length()},
/// not from the dtype's precision — `vortex.decimal` writes whatever width
/// the encoder chose in its `valuesType` metadata, which can be narrower
/// than the precision alone would allow.
///
/// - **single-buffer**: one raw buffer of little-endian two's-complement
/// integers (one element per row). Element width is derived from the
/// buffer's byte size divided by {@link #length()}, not from the
/// dtype's precision — `vortex.decimal` writes whatever width
/// the encoder chose in its `valuesType` metadata, which can be
/// narrower than the precision alone would allow.
/// - **child-array**: zero buffers, one child holding the most-significant
/// integer part as a {@link LongArray}, {@link IntArray}, {@link ShortArray},
/// or {@link ByteArray}. Produced by `vortex.decimal_byte_parts`
/// when `lower_part_count == 0`.
/// The child-array shape produced by `vortex.decimal_byte_parts` is now
/// handled by {@link LazyDecimalBytePartsArray} directly.
///
/// @param i row index, `0 <= i < length()`
/// @return decoded value as a {@link BigDecimal} with the dtype's scale
/// @throws VortexException if the dtype isn't decimal or the array
/// shape doesn't match either supported layout
/// @throws VortexException if the dtype isn't decimal or the array
/// shape isn't the single-buffer layout
/// @throws IndexOutOfBoundsException if `i` is outside `[0, length())`
public BigDecimal getDecimal(long i) {
if (i < 0 || i >= length) {
Expand All @@ -103,15 +99,11 @@ public BigDecimal getDecimal(long i) {
if (!(dtype instanceof DType.Decimal d)) {
throw new VortexException("getDecimal called on non-decimal dtype: " + dtype);
}
BigInteger mantissa;
if (buffers.length == 1 && children.length == 0) {
mantissa = readSingleBufferMantissa(buffers[0], length, i);
} else if (buffers.length == 0 && children.length == 1) {
mantissa = mantissaFromChild(children[0], i);
} else {
if (buffers.length != 1 || children.length != 0) {
throw new VortexException("getDecimal: unsupported decimal shape buffers="
+ buffers.length + " children=" + children.length);
}
BigInteger mantissa = readSingleBufferMantissa(buffers[0], length, i);
return new BigDecimal(mantissa, d.scale());
}

Expand All @@ -128,24 +120,6 @@ private static BigInteger readSingleBufferMantissa(MemorySegment buf, long lengt
return readSignedLe(buf, i * width, width);
}

private static BigInteger mantissaFromChild(Array child, long i) {
return switch (child) {
case LongArray a -> BigInteger.valueOf(a.getLong(i));
case IntArray a -> BigInteger.valueOf(a.getInt(i));
case ShortArray a -> BigInteger.valueOf(a.getShort(i));
case ByteArray a -> BigInteger.valueOf(a.getByte(i));
case MaskedArray a -> {
if (!a.isValid(i)) {
throw new VortexException("getDecimal: null cell at index " + i);
}
yield mantissaFromChild(a.inner(), i);
}
default ->
throw new VortexException("getDecimal: unsupported mantissa child type "
+ child.getClass().getSimpleName());
};
}

private static final ValueLayout.OfShort SHORT_LE =
ValueLayout.JAVA_SHORT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN);
private static final ValueLayout.OfInt INT_LE =
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package io.github.dfa1.vortex.reader.array;

import io.github.dfa1.vortex.core.DType;
import io.github.dfa1.vortex.core.VortexException;

import java.math.BigDecimal;

/// Lazy `vortex.decimal_byte_parts` reassembly.
///
/// With `lower_part_count == 0` (the only shape this codebase currently
/// emits or accepts) the encoding stores its mantissa as a single signed-integer
/// child column, paired with the parent's {@link DType.Decimal} precision and
/// scale. {@link #getDecimal(long)} reads one cell from the child via
/// {@link DecimalBytePartsArrays#readMantissa(Array, long)} and combines it with
/// the dtype scale to produce a {@link BigDecimal} on demand — no buffer
/// materialisation occurs at construction time.
///
/// @param dtype the parent {@link DType.Decimal} dtype (precision + scale + nullable)
/// @param length total logical row count
/// @param msp child array holding the most-significant integer part of the mantissa
public record LazyDecimalBytePartsArray(DType dtype, long length, Array msp) implements Array {

/// Reads cell `i` as a {@link BigDecimal} with the parent dtype's scale.
///
/// @param i row index, `0 <= i < length()`
/// @return decoded `BigDecimal`
/// @throws VortexException if the dtype isn't a {@link DType.Decimal} or the
/// mantissa cell is null
/// @throws IndexOutOfBoundsException if `i` is outside `[0, length())`
public BigDecimal getDecimal(long i) {
if (i < 0 || i >= length) {
throw new IndexOutOfBoundsException("index " + i + " out of bounds for length " + length);
}
if (!(dtype instanceof DType.Decimal d)) {
throw new VortexException("LazyDecimalBytePartsArray: non-decimal dtype " + dtype);
}
return new BigDecimal(DecimalBytePartsArrays.readMantissa(msp, i), d.scale());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import io.github.dfa1.vortex.core.PType;
import io.github.dfa1.vortex.core.VortexException;
import io.github.dfa1.vortex.reader.array.Array;
import io.github.dfa1.vortex.reader.array.GenericArray;
import io.github.dfa1.vortex.reader.array.LazyDecimalBytePartsArray;
import io.github.dfa1.vortex.encoding.EncodingId;
import io.github.dfa1.vortex.proto.DecimalBytePartsMetadata;

Expand Down Expand Up @@ -57,7 +57,6 @@ public Array decode(DecodeContext ctx) {
mspNode, mspDtype, ctx.rowCount(),
ctx.segmentBuffers(), ctx.registry(), ctx.arena());
Array mspArray = ctx.registry().decode(mspCtx);
return new GenericArray(ctx.dtype(), ctx.rowCount(), new MemorySegment[0],
new Array[]{mspArray});
return new LazyDecimalBytePartsArray(ctx.dtype(), ctx.rowCount(), mspArray);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -122,27 +122,6 @@ void getDecimal_smallPrecisionUsesNarrowerBuffer() {
}
}

@Test
void getDecimal_childArrayShape_decodesViaMostSignificantPart() {
// Given — the shape vortex.decimal_byte_parts decoders produce when
// lower_part_count == 0: zero buffers, one LongArray child carrying
// the i64 mantissa.
try (Arena arena = Arena.ofConfined()) {
MemorySegment mspBuf = arena.allocate(24);
mspBuf.set(ValueLayout.JAVA_LONG_UNALIGNED, 0, 4321L);
mspBuf.set(ValueLayout.JAVA_LONG_UNALIGNED, 8, -100L);
mspBuf.set(ValueLayout.JAVA_LONG_UNALIGNED, 16, 0L);
LongArray msp = new MaterializedLongArray(new DType.Primitive(PType.I64, false), 3, mspBuf);
DType.Decimal dec = new DType.Decimal((byte) 15, (byte) 2, false);
GenericArray sut = new GenericArray(dec, 3, new MemorySegment[0], new Array[]{msp});

// When / Then
assertThat(sut.getDecimal(0)).isEqualByComparingTo(new BigDecimal("43.21"));
assertThat(sut.getDecimal(1)).isEqualByComparingTo(new BigDecimal("-1.00"));
assertThat(sut.getDecimal(2)).isEqualByComparingTo(BigDecimal.ZERO);
}
}

@Test
void getDecimal_i128Buffer_decodesWideMantissa() {
// Given — decimal(38,4) stores mantissas wider than i64; vortex.decimal
Expand Down Expand Up @@ -242,35 +221,6 @@ void getDecimal_indexOutOfBounds_throws() {
}
}

@Test
void getDecimal_nullCellInMaskedChild_throws() {
// Given — mantissa-child shape with a MaskedArray wrapping a LongArray;
// the validity bitmap says index 1 is null. Without the validity check
// the previous code would happily decode whatever bytes sat at that
// slot and return a garbage BigDecimal.
try (Arena arena = Arena.ofConfined()) {
MemorySegment mspBuf = arena.allocate(16);
mspBuf.set(ValueLayout.JAVA_LONG_UNALIGNED, 0, 1234L);
mspBuf.set(ValueLayout.JAVA_LONG_UNALIGNED, 8, 9999L);
LongArray msp = new MaterializedLongArray(new DType.Primitive(PType.I64, false), 2, mspBuf);

MemorySegment validityBuf = arena.allocate(1);
// bit 0 set = index 0 valid; bit 1 clear = index 1 null
validityBuf.set(ValueLayout.JAVA_BYTE, 0, (byte) 0b0000_0001);
BoolArray validity = new MaterializedBoolArray(new DType.Bool(false), 2, validityBuf);

MaskedArray masked = new MaskedArray(msp, validity);
DType.Decimal dec = new DType.Decimal((byte) 15, (byte) 2, true);
GenericArray sut = new GenericArray(dec, 2, new MemorySegment[0], new Array[]{masked});

// When / Then
assertThat(sut.getDecimal(0)).isEqualByComparingTo(new BigDecimal("12.34"));
assertThatThrownBy(() -> sut.getDecimal(1))
.isInstanceOf(io.github.dfa1.vortex.core.VortexException.class)
.hasMessageContaining("null cell at index 1");
}
}

@Test
void getDecimal_nonDecimalDtype_throws() {
// Given — guards against silently returning garbage on misuse
Expand Down
Loading