print dtype (machine-readable)");
diff --git a/core/src/main/java/io/github/dfa1/vortex/core/DType.java b/core/src/main/java/io/github/dfa1/vortex/core/DType.java
index d1113e4e..d4133cae 100644
--- a/core/src/main/java/io/github/dfa1/vortex/core/DType.java
+++ b/core/src/main/java/io/github/dfa1/vortex/core/DType.java
@@ -132,6 +132,16 @@ record Extension(
ByteBuffer metadata,
boolean nullable
) implements DType {
+
+ /// Returns the closed-world classification of this extension's id.
+ /// Pattern-match exhaustively: known ids resolve to the matching
+ /// record, anything else lands in {@link io.github.dfa1.vortex.core.Extension.Custom}.
+ ///
+ /// @return the {@link io.github.dfa1.vortex.core.Extension} record
+ /// for this extension's id
+ public io.github.dfa1.vortex.core.Extension kind() {
+ return io.github.dfa1.vortex.core.Extension.of(extensionId);
+ }
}
/// Variant logical type for semi-structured data (analogous to Parquet variant / JSON).
diff --git a/core/src/main/java/io/github/dfa1/vortex/core/Extension.java b/core/src/main/java/io/github/dfa1/vortex/core/Extension.java
new file mode 100644
index 00000000..73ce1b83
--- /dev/null
+++ b/core/src/main/java/io/github/dfa1/vortex/core/Extension.java
@@ -0,0 +1,333 @@
+package io.github.dfa1.vortex.core;
+
+import io.github.dfa1.vortex.core.array.Array;
+import io.github.dfa1.vortex.core.array.ByteArray;
+import io.github.dfa1.vortex.core.array.FixedSizeListArray;
+import io.github.dfa1.vortex.core.array.IntArray;
+import io.github.dfa1.vortex.core.array.LongArray;
+import io.github.dfa1.vortex.core.array.MaskedArray;
+import io.github.dfa1.vortex.core.array.ShortArray;
+import io.github.dfa1.vortex.encoding.TimeUnit;
+
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.charset.StandardCharsets;
+import java.time.Instant;
+import java.time.LocalDate;
+import java.time.LocalTime;
+import java.time.ZoneId;
+import java.time.ZoneOffset;
+import java.time.ZonedDateTime;
+import java.util.Optional;
+
+/// Sealed hierarchy of Vortex extension dtypes — closed-world view of the
+/// four spec-defined extensions ({@code vortex.date}, {@code vortex.time},
+/// {@code vortex.timestamp}, {@code vortex.uuid}) plus a {@link Custom}
+/// fallback record carrying any other id.
+///
+/// Mirrors the {@link io.github.dfa1.vortex.encoding.Encoding} /
+/// {@link io.github.dfa1.vortex.encoding.EncodingId} pairing in spirit but
+/// merges the kind classification with the typed decode behaviour: each
+/// record exposes its own statically-typed decode methods rather than a
+/// single {@code Object decode(...)} contract that callers would have to
+/// downcast. Pattern-match exhaustively to dispatch:
+///
+/// ```java
+/// switch (ext.kind()) {
+/// case Extension.Date d -> d.decode(storage, i); // LocalDate
+/// case Extension.Time t -> t.decode(ext, storage, i); // LocalTime
+/// case Extension.Timestamp ts -> ts.instant(ext, storage, i); // Instant
+/// case Extension.Uuid u -> u.decode(storage, i); // UUID
+/// case Extension.Custom c -> renderPlaceholder(c.id());
+/// }
+/// ```
+///
+///
{@link DType.Extension} carries the wire-format id as a {@code String}
+/// so unknown ids round-trip without loss; {@link #of(String)} translates to
+/// the matching record.
+public sealed interface Extension {
+
+ /// Singleton for {@link Date}.
+ Date DATE = new Date();
+ /// Singleton for {@link Time}.
+ Time TIME = new Time();
+ /// Singleton for {@link Timestamp}.
+ Timestamp TIMESTAMP = new Timestamp();
+ /// Singleton for {@link Uuid}.
+ Uuid UUID = new Uuid();
+
+ /// Returns the wire-format id string.
+ ///
+ /// @return canonical extension id
+ String id();
+
+ /// Resolves a wire-format id string to its {@link Extension} record.
+ /// Unknown ids land in {@link Custom}.
+ ///
+ /// @param id raw extension id from the file footer
+ /// @return matching record, or {@link Custom} when {@code id} isn't recognised
+ static Extension of(String id) {
+ return switch (id) {
+ case Date.ID -> DATE;
+ case Time.ID -> TIME;
+ case Timestamp.ID -> TIMESTAMP;
+ case Uuid.ID -> UUID;
+ default -> new Custom(id);
+ };
+ }
+
+ /// {@code vortex.date} — days (any signed integer width) since the
+ /// Unix epoch. Per Arrow's canonical Date type.
+ final class Date implements Extension {
+ /// Wire id.
+ public static final String ID = "vortex.date";
+
+ private Date() {
+ }
+
+ @Override public String id() {
+ return ID;
+ }
+
+ /// Decodes the date cell at row {@code i}.
+ ///
+ /// @param storage signed-integer storage (Byte/Short/Int/Long, possibly Masked)
+ /// @param i row index, {@code 0 <= i < storage.length()}
+ /// @return decoded date
+ /// @throws VortexException if storage isn't an integer primitive
+ public LocalDate decode(Array storage, long i) {
+ checkBounds(i, storage.length());
+ return LocalDate.ofEpochDay(epochInteger(storage, i));
+ }
+ }
+
+ /// {@code vortex.time} — sub-day count in the {@link TimeUnit} recorded
+ /// in {@code ext.metadata()} byte 0.
+ final class Time implements Extension {
+ /// Wire id.
+ public static final String ID = "vortex.time";
+
+ private Time() {
+ }
+
+ @Override public String id() {
+ return ID;
+ }
+
+ /// Decodes the time-of-day cell at row {@code i}.
+ ///
+ /// @param ext declared extension dtype carrying the {@link TimeUnit} byte
+ /// @param storage signed-integer storage (I32 for s/ms, I64 for μs/ns)
+ /// @param i row index, {@code 0 <= i < storage.length()}
+ /// @return decoded local time
+ /// @throws VortexException if the metadata unit is {@link TimeUnit#Days}
+ /// or storage isn't an integer primitive
+ public LocalTime decode(DType.Extension ext, Array storage, long i) {
+ checkBounds(i, storage.length());
+ TimeUnit unit = readUnit(ext);
+ if (unit == TimeUnit.Days) {
+ throw new VortexException("Time.decode: Days unit not valid for vortex.time");
+ }
+ long raw = epochInteger(storage, i);
+ long nanos = raw * (1_000_000_000L / unit.divisor());
+ return LocalTime.ofNanoOfDay(nanos);
+ }
+
+ /// Returns the {@link TimeUnit} recorded in the extension metadata.
+ ///
+ /// @param ext extension dtype
+ /// @return decoded time unit
+ public TimeUnit unit(DType.Extension ext) {
+ return readUnit(ext);
+ }
+ }
+
+ /// {@code vortex.timestamp} — I64 epoch count plus optional IANA timezone.
+ /// Metadata layout: {@code byte[0] = TimeUnit tag, bytes[1..3] = tz_len
+ /// (u16 LE), bytes[3..3+tz_len] = tz UTF-8}.
+ final class Timestamp implements Extension {
+ /// Wire id.
+ public static final String ID = "vortex.timestamp";
+
+ private Timestamp() {
+ }
+
+ @Override public String id() {
+ return ID;
+ }
+
+ /// Decodes the timestamp cell at row {@code i} to an {@link Instant},
+ /// ignoring any timezone the metadata carries.
+ ///
+ /// @param ext declared extension dtype
+ /// @param storage signed-integer storage array
+ /// @param i row index, {@code 0 <= i < storage.length()}
+ /// @return decoded instant
+ /// @throws VortexException if the metadata unit is {@link TimeUnit#Days}
+ /// or storage isn't an integer primitive
+ public Instant instant(DType.Extension ext, Array storage, long i) {
+ checkBounds(i, storage.length());
+ TimeUnit unit = readUnit(ext);
+ if (unit == TimeUnit.Days) {
+ throw new VortexException("Timestamp.instant: Days unit not valid");
+ }
+ return instantFromRaw(epochInteger(storage, i), unit);
+ }
+
+ /// Decodes the timestamp cell at row {@code i} to a {@link ZonedDateTime}
+ /// using the timezone from the metadata, defaulting to UTC when absent.
+ ///
+ /// @param ext declared extension dtype
+ /// @param storage signed-integer storage array
+ /// @param i row index, {@code 0 <= i < storage.length()}
+ /// @return decoded zoned date-time
+ public ZonedDateTime zonedDateTime(DType.Extension ext, Array storage, long i) {
+ return instant(ext, storage, i).atZone(timezone(ext).orElse(ZoneOffset.UTC));
+ }
+
+ /// Returns the IANA timezone string recorded in the extension metadata.
+ ///
+ /// @param ext declared extension dtype
+ /// @return parsed zone id, or empty when {@code tz_len == 0}
+ /// @throws VortexException if the metadata is truncated mid-string
+ public Optional timezone(DType.Extension ext) {
+ ByteBuffer meta = ext.metadata();
+ if (meta == null || meta.remaining() < 3) {
+ return Optional.empty();
+ }
+ ByteBuffer le = meta.duplicate().order(ByteOrder.LITTLE_ENDIAN);
+ int basePos = le.position();
+ int tzLen = Short.toUnsignedInt(le.getShort(basePos + 1));
+ if (tzLen == 0) {
+ return Optional.empty();
+ }
+ if (le.remaining() < 3 + tzLen) {
+ throw new VortexException("timestamp metadata truncated: declared tz_len="
+ + tzLen + " but only " + (le.remaining() - 3) + " bytes available");
+ }
+ byte[] tzBytes = new byte[tzLen];
+ for (int k = 0; k < tzLen; k++) {
+ tzBytes[k] = le.get(basePos + 3 + k);
+ }
+ return Optional.of(ZoneId.of(new String(tzBytes, StandardCharsets.UTF_8)));
+ }
+
+ /// Returns the {@link TimeUnit} recorded in the extension metadata.
+ ///
+ /// @param ext extension dtype
+ /// @return decoded time unit
+ public TimeUnit unit(DType.Extension ext) {
+ return readUnit(ext);
+ }
+ }
+
+ /// {@code vortex.uuid} — 16-byte UUID stored as
+ /// {@code FixedSizeList(Primitive(U8), 16)}.
+ final class Uuid implements Extension {
+ /// Wire id.
+ public static final String ID = "vortex.uuid";
+
+ private Uuid() {
+ }
+
+ @Override public String id() {
+ return ID;
+ }
+
+ /// Decodes the UUID cell at row {@code i}.
+ ///
+ /// @param storage UUID storage array
+ /// @param i row index, {@code 0 <= i < storage.length()}
+ /// @return decoded {@link java.util.UUID}
+ /// @throws VortexException if storage isn't a {@code FixedSizeListArray}
+ /// of size 16
+ public java.util.UUID decode(Array storage, long i) {
+ checkBounds(i, storage.length());
+ if (!(storage instanceof FixedSizeListArray fsl)) {
+ throw new VortexException("Uuid.decode: expected FixedSizeListArray, got "
+ + storage.getClass().getSimpleName());
+ }
+ if (fsl.fixedSize() != 16) {
+ throw new VortexException("Uuid.decode: expected fixedSize 16, got " + fsl.fixedSize());
+ }
+ if (!(fsl.elements() instanceof ByteArray bytes)) {
+ throw new VortexException("Uuid.decode: expected ByteArray elements, got "
+ + fsl.elements().getClass().getSimpleName());
+ }
+ long base = i * 16;
+ long msb = 0L;
+ long lsb = 0L;
+ for (int k = 0; k < 8; k++) {
+ msb = (msb << 8) | (bytes.getByte(base + k) & 0xffL);
+ }
+ for (int k = 0; k < 8; k++) {
+ lsb = (lsb << 8) | (bytes.getByte(base + 8 + k) & 0xffL);
+ }
+ return new java.util.UUID(msb, lsb);
+ }
+ }
+
+ /// Open-world escape hatch for any extension id Vortex-java doesn't
+ /// know about. Pattern-match branches that need to render or decode an
+ /// unknown extension read its raw id via {@link #id()}.
+ ///
+ /// @param id raw extension id string
+ record Custom(String id) implements Extension {
+ }
+
+ // ── Shared helpers ────────────────────────────────────────────────────
+
+ /// Reads a signed integer from any of the integer primitive arrays as
+ /// {@code long}. Recurses through {@link MaskedArray}; throws on null
+ /// cells so callers don't silently get garbage for nullable columns.
+ private static long epochInteger(Array storage, long i) {
+ return switch (storage) {
+ case ByteArray a -> a.getByte(i);
+ case ShortArray a -> a.getShort(i);
+ case IntArray a -> a.getInt(i);
+ case LongArray a -> a.getLong(i);
+ case MaskedArray a -> {
+ if (!a.isValid(i)) {
+ throw new VortexException("null cell at index " + i);
+ }
+ yield epochInteger(a.inner(), i);
+ }
+ default -> throw new VortexException(
+ "unsupported storage type " + storage.getClass().getSimpleName());
+ };
+ }
+
+ /// Reads the {@link TimeUnit} metadata byte at the buffer's current
+ /// position; throws if the buffer is null or empty.
+ private static TimeUnit readUnit(DType.Extension ext) {
+ ByteBuffer meta = ext.metadata();
+ if (meta == null || !meta.hasRemaining()) {
+ throw new VortexException("missing TimeUnit metadata byte for " + ext.extensionId());
+ }
+ return TimeUnit.fromTag(meta.get(meta.position()));
+ }
+
+ private static Instant instantFromRaw(long raw, TimeUnit unit) {
+ return switch (unit) {
+ case Seconds -> Instant.ofEpochSecond(raw);
+ case Milliseconds -> Instant.ofEpochMilli(raw);
+ case Microseconds -> {
+ long secs = Math.floorDiv(raw, 1_000_000L);
+ long nanos = Math.floorMod(raw, 1_000_000L) * 1_000L;
+ yield Instant.ofEpochSecond(secs, nanos);
+ }
+ case Nanoseconds -> {
+ long secs = Math.floorDiv(raw, 1_000_000_000L);
+ long nanos = Math.floorMod(raw, 1_000_000_000L);
+ yield Instant.ofEpochSecond(secs, nanos);
+ }
+ case Days -> throw new VortexException("Days unit not valid for instant");
+ };
+ }
+
+ private static void checkBounds(long i, long length) {
+ if (i < 0 || i >= length) {
+ throw new IndexOutOfBoundsException("index " + i + " out of bounds for length " + length);
+ }
+ }
+}
diff --git a/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java b/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java
index d319637c..9147f8be 100644
--- a/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java
+++ b/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java
@@ -1,8 +1,13 @@
package io.github.dfa1.vortex.core.array;
import io.github.dfa1.vortex.core.DType;
+import io.github.dfa1.vortex.core.VortexException;
import java.lang.foreign.MemorySegment;
+import java.lang.foreign.ValueLayout;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.nio.ByteOrder;
/// Fallback [Array] for dtypes that lack a dedicated concrete subtype.
///
@@ -48,10 +53,153 @@ public long length() {
return length;
}
+ /// Returns a view of this array clamped to {@code newLength} logical rows.
+ /// Buffers and children are reused as-is; callers are expected to respect
+ /// {@link #length()} when reading. Used by the scan iterator to honour
+ /// {@code ScanOptions.limit} for dtypes that don't have a typed array.
+ ///
+ /// @param newLength desired logical length; must be {@code <= length()}
+ /// @return a new {@code GenericArray} sharing this array's buffers and children
+ /// @throws IllegalArgumentException if {@code newLength} exceeds the current length
+ public GenericArray withLength(long newLength) {
+ if (newLength < 0 || newLength > length) {
+ throw new IllegalArgumentException(
+ "newLength " + newLength + " out of range [0," + length + "]");
+ }
+ if (newLength == length) {
+ return this;
+ }
+ return new GenericArray(dtype, newLength, buffers, children);
+ }
+
MemorySegment buffer(int i) {
return buffers[i];
}
+ /// Returns the number of raw memory buffers backing this array.
+ ///
+ /// @return buffer count
+ public int bufferCount() {
+ return buffers.length;
+ }
+
+ /// Returns the raw buffer at position {@code i}. Used by callers that need
+ /// to inspect encoded bytes when no typed accessor exists for the dtype
+ /// (e.g. the TUI inspector decoding {@code Decimal} cells).
+ ///
+ /// @param i buffer index
+ /// @return the underlying {@link MemorySegment}
+ public MemorySegment bufferAt(int i) {
+ return buffers[i];
+ }
+
+ /// Returns the number of child arrays.
+ ///
+ /// @return child count
+ public int childCount() {
+ return children.length;
+ }
+
+ /// Decodes the decimal value at row {@code i}.
+ ///
+ /// Handles the two shapes produced by Vortex decimal decoders:
+ ///
+ /// - **single-buffer**: one raw buffer of little-endian two's-complement
+ /// integers (one element per row). Element width is derived from the
+ /// buffer's byte size divided by {@link #length()}, not from the
+ /// dtype's precision — {@code vortex.decimal} writes whatever width
+ /// the encoder chose in its {@code valuesType} metadata, which can be
+ /// narrower than the precision alone would allow.
+ /// - **child-array**: zero buffers, one child holding the most-significant
+ /// integer part as a {@link LongArray}, {@link IntArray}, {@link ShortArray},
+ /// or {@link ByteArray}. Produced by {@code vortex.decimal_byte_parts}
+ /// when {@code lower_part_count == 0}.
+ ///
+ /// @param i row index, {@code 0 <= i < length()}
+ /// @return decoded value as a {@link BigDecimal} with the dtype's scale
+ /// @throws VortexException if the dtype isn't decimal or the array
+ /// shape doesn't match either supported layout
+ /// @throws IndexOutOfBoundsException if {@code i} is outside {@code [0, length())}
+ public BigDecimal getDecimal(long i) {
+ if (i < 0 || i >= length) {
+ throw new IndexOutOfBoundsException("index " + i + " out of bounds for length " + length);
+ }
+ if (!(dtype instanceof DType.Decimal d)) {
+ throw new VortexException("getDecimal called on non-decimal dtype: " + dtype);
+ }
+ BigInteger mantissa;
+ if (buffers.length == 1 && children.length == 0) {
+ mantissa = readSingleBufferMantissa(buffers[0], length, i);
+ } else if (buffers.length == 0 && children.length == 1) {
+ mantissa = mantissaFromChild(children[0], i);
+ } else {
+ throw new VortexException("getDecimal: unsupported decimal shape buffers="
+ + buffers.length + " children=" + children.length);
+ }
+ return new BigDecimal(mantissa, d.scale());
+ }
+
+ private static BigInteger readSingleBufferMantissa(MemorySegment buf, long length, long i) {
+ long bufBytes = buf.byteSize();
+ if (length == 0 || bufBytes % length != 0) {
+ throw new VortexException("getDecimal: buffer size " + bufBytes
+ + " is not a multiple of length " + length);
+ }
+ int width = (int) (bufBytes / length);
+ if (width != 1 && width != 2 && width != 4 && width != 8 && width != 16) {
+ throw new VortexException("getDecimal: unsupported element width " + width + " bytes");
+ }
+ return readSignedLe(buf, i * width, width);
+ }
+
+ private static BigInteger mantissaFromChild(Array child, long i) {
+ return switch (child) {
+ case LongArray a -> BigInteger.valueOf(a.getLong(i));
+ case IntArray a -> BigInteger.valueOf(a.getInt(i));
+ case ShortArray a -> BigInteger.valueOf(a.getShort(i));
+ case ByteArray a -> BigInteger.valueOf(a.getByte(i));
+ case MaskedArray a -> {
+ if (!a.isValid(i)) {
+ throw new VortexException("getDecimal: null cell at index " + i);
+ }
+ yield mantissaFromChild(a.inner(), i);
+ }
+ default ->
+ throw new VortexException("getDecimal: unsupported mantissa child type "
+ + child.getClass().getSimpleName());
+ };
+ }
+
+ private static final ValueLayout.OfShort SHORT_LE =
+ ValueLayout.JAVA_SHORT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN);
+ private static final ValueLayout.OfInt INT_LE =
+ ValueLayout.JAVA_INT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN);
+ private static final ValueLayout.OfLong LONG_LE =
+ ValueLayout.JAVA_LONG_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN);
+
+ private static BigInteger readSignedLe(MemorySegment buf, long offset, int width) {
+ return switch (width) {
+ case 1 -> BigInteger.valueOf(buf.get(ValueLayout.JAVA_BYTE, offset));
+ case 2 -> BigInteger.valueOf(buf.get(SHORT_LE, offset));
+ case 4 -> BigInteger.valueOf(buf.get(INT_LE, offset));
+ case 8 -> BigInteger.valueOf(buf.get(LONG_LE, offset));
+ case 16 -> readSigned128Le(buf, offset);
+ default -> throw new VortexException("readSignedLe: unsupported width " + width);
+ };
+ }
+
+ private static BigInteger readSigned128Le(MemorySegment buf, long offset) {
+ // Two's-complement i128 on disk in little-endian; BigInteger ingests big-endian.
+ // No SIMD intrinsic for 16-byte signed integer, so we materialise into a heap
+ // buffer here. Only fires for decimal(>18, _) — narrow-precision fast paths above
+ // stay allocation-free.
+ byte[] be = new byte[16];
+ for (int k = 0; k < 16; k++) {
+ be[15 - k] = buf.get(ValueLayout.JAVA_BYTE, offset + k);
+ }
+ return new BigInteger(be);
+ }
+
/// Returns the child array at position {@code i}.
///
/// @param i child index
diff --git a/core/src/test/java/io/github/dfa1/vortex/core/ExtensionTest.java b/core/src/test/java/io/github/dfa1/vortex/core/ExtensionTest.java
new file mode 100644
index 00000000..ad684466
--- /dev/null
+++ b/core/src/test/java/io/github/dfa1/vortex/core/ExtensionTest.java
@@ -0,0 +1,285 @@
+package io.github.dfa1.vortex.core;
+
+import io.github.dfa1.vortex.core.array.ByteArray;
+import io.github.dfa1.vortex.core.array.FixedSizeListArray;
+import io.github.dfa1.vortex.core.array.IntArray;
+import io.github.dfa1.vortex.core.array.LongArray;
+import org.junit.jupiter.api.Test;
+
+import java.lang.foreign.Arena;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.ValueLayout;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.charset.StandardCharsets;
+import java.time.Instant;
+import java.time.LocalDate;
+import java.time.LocalTime;
+import java.time.ZoneId;
+import java.time.ZoneOffset;
+import java.time.ZonedDateTime;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+class ExtensionTest {
+
+ private static final DType.Primitive I32 = new DType.Primitive(PType.I32, false);
+ private static final DType.Primitive I64 = new DType.Primitive(PType.I64, false);
+ private static final DType.Primitive U8 = new DType.Primitive(PType.U8, false);
+
+ @Test
+ void of_recognisedIds_returnSingletons() {
+ // Given / When / Then — known ids resolve to the cached singletons so
+ // identity comparison and pattern-match cases work without per-call alloc
+ assertThat(Extension.of("vortex.date")).isSameAs(Extension.DATE);
+ assertThat(Extension.of("vortex.time")).isSameAs(Extension.TIME);
+ assertThat(Extension.of("vortex.timestamp")).isSameAs(Extension.TIMESTAMP);
+ assertThat(Extension.of("vortex.uuid")).isSameAs(Extension.UUID);
+ }
+
+ @Test
+ void of_unknownId_returnsCustomWithRawString() {
+ // Given — open-world fallback; the id must round-trip verbatim so callers
+ // can still apply their own decoding for non-spec extensions
+ Extension sut = Extension.of("acme.geopoint");
+
+ // Then
+ assertThat(sut).isInstanceOf(Extension.Custom.class);
+ assertThat(sut.id()).isEqualTo("acme.geopoint");
+ }
+
+ @Test
+ void kind_onDTypeExtension_dispatchesViaPatternMatch() {
+ // Given — practical sealed-switch usage that motivates the redesign
+ DType.Extension date = ext("vortex.date", I32, null);
+ DType.Extension custom = ext("acme.thing", I32, null);
+
+ // When / Then
+ assertThat(classify(date)).isEqualTo("date");
+ assertThat(classify(custom)).isEqualTo("custom:acme.thing");
+ }
+
+ private static String classify(DType.Extension ext) {
+ return switch (ext.kind()) {
+ case Extension.Date d -> "date";
+ case Extension.Time t -> "time";
+ case Extension.Timestamp ts -> "timestamp";
+ case Extension.Uuid u -> "uuid";
+ case Extension.Custom c -> "custom:" + c.id();
+ };
+ }
+
+ @Test
+ void date_decodes_tpchSample() {
+ // Given — anchor against known TPC-H value 9538 = 1996-02-12
+ try (Arena arena = Arena.ofConfined()) {
+ MemorySegment buf = arena.allocate(4);
+ buf.set(ValueLayout.JAVA_INT_UNALIGNED, 0, 9538);
+ IntArray storage = new IntArray(I32, 1, buf);
+
+ // When / Then
+ assertThat(Extension.DATE.decode(storage, 0)).isEqualTo(LocalDate.of(1996, 2, 12));
+ }
+ }
+
+ @Test
+ void date_negativeDays_returnsPreEpoch() {
+ // Given — defensive: signed storage, pre-1970 must work
+ try (Arena arena = Arena.ofConfined()) {
+ MemorySegment buf = arena.allocate(4);
+ buf.set(ValueLayout.JAVA_INT_UNALIGNED, 0, -1);
+ IntArray storage = new IntArray(I32, 1, buf);
+
+ // When / Then
+ assertThat(Extension.DATE.decode(storage, 0)).isEqualTo(LocalDate.of(1969, 12, 31));
+ }
+ }
+
+ @Test
+ void time_eachUnit_decodesCorrectly() {
+ // Given — round-trip a known time-of-day through every TimeUnit
+ try (Arena arena = Arena.ofConfined()) {
+ // Seconds: 3661 s = 01:01:01
+ assertThat(Extension.TIME.decode(ext("vortex.time", I32, unitByte((byte) 3)),
+ i32(arena, 3661), 0))
+ .isEqualTo(LocalTime.of(1, 1, 1));
+ // Milliseconds: 3_661_500 = 01:01:01.500
+ assertThat(Extension.TIME.decode(ext("vortex.time", I32, unitByte((byte) 2)),
+ i32(arena, 3_661_500), 0))
+ .isEqualTo(LocalTime.of(1, 1, 1, 500_000_000));
+ // Microseconds: 1_000_001 = 00:00:01.000001
+ assertThat(Extension.TIME.decode(ext("vortex.time", I64, unitByte((byte) 1)),
+ i64(arena, 1_000_001L), 0))
+ .isEqualTo(LocalTime.of(0, 0, 1, 1_000));
+ // Nanoseconds: 42 ns past midnight
+ assertThat(Extension.TIME.decode(ext("vortex.time", I64, unitByte((byte) 0)),
+ i64(arena, 42L), 0))
+ .isEqualTo(LocalTime.ofNanoOfDay(42));
+ }
+ }
+
+ @Test
+ void time_daysUnit_throws() {
+ // Given — Days isn't a sub-second unit
+ try (Arena arena = Arena.ofConfined()) {
+ DType.Extension ext = ext("vortex.time", I32, unitByte((byte) 4));
+
+ // When / Then
+ assertThatThrownBy(() -> Extension.TIME.decode(ext, i32(arena, 0), 0))
+ .isInstanceOf(VortexException.class)
+ .hasMessageContaining("Days unit not valid");
+ }
+ }
+
+ @Test
+ void timestamp_instant_microsecondsPath_handlesNegativeRaw() {
+ // Given — pre-epoch micros exercise the floorDiv / floorMod path
+ long micros = -1_500_001L; // -1.500001s
+ try (Arena arena = Arena.ofConfined()) {
+ DType.Extension ext = ext("vortex.timestamp", I64, tzMeta((byte) 1, null));
+
+ // When
+ Instant got = Extension.TIMESTAMP.instant(ext, i64(arena, micros), 0);
+
+ // Then
+ assertThat(got.getEpochSecond()).isEqualTo(-2L);
+ assertThat(got.getNano()).isEqualTo(499_999_000);
+ }
+ }
+
+ @Test
+ void timestamp_zonedDateTime_withTimezone_appliesIt() {
+ // Given — ms since epoch + Europe/Paris tz in metadata
+ try (Arena arena = Arena.ofConfined()) {
+ DType.Extension ext = ext("vortex.timestamp", I64, tzMeta((byte) 2, "Europe/Paris"));
+
+ // When
+ ZonedDateTime got = Extension.TIMESTAMP.zonedDateTime(ext, i64(arena, 1_000L), 0);
+
+ // Then
+ assertThat(got.getZone()).isEqualTo(ZoneId.of("Europe/Paris"));
+ assertThat(got.toInstant()).isEqualTo(Instant.ofEpochMilli(1_000L));
+ }
+ }
+
+ @Test
+ void timestamp_zonedDateTime_noTimezone_defaultsToUtc() {
+ // Given — tz_len = 0 should fall back to UTC for unambiguity
+ try (Arena arena = Arena.ofConfined()) {
+ DType.Extension ext = ext("vortex.timestamp", I64, tzMeta((byte) 2, null));
+
+ // When
+ ZonedDateTime got = Extension.TIMESTAMP.zonedDateTime(ext, i64(arena, 0L), 0);
+
+ // Then
+ assertThat(got.getZone()).isEqualTo(ZoneOffset.UTC);
+ }
+ }
+
+ @Test
+ void timestamp_timezone_truncatedMetadata_throws() {
+ // Given — declared tz_len longer than buffer can carry
+ ByteBuffer meta = ByteBuffer.allocate(6).order(ByteOrder.LITTLE_ENDIAN);
+ meta.put(0, (byte) 2);
+ meta.putShort(1, (short) 5);
+ meta.put(3, (byte) 'U');
+ meta.put(4, (byte) 'T');
+ meta.put(5, (byte) 'C');
+ DType.Extension truncated = ext("vortex.timestamp", I64, meta);
+
+ // When / Then
+ assertThatThrownBy(() -> Extension.TIMESTAMP.timezone(truncated))
+ .isInstanceOf(VortexException.class)
+ .hasMessageContaining("truncated");
+ }
+
+ @Test
+ void uuid_roundTripsKnownValue() {
+ // Given — RFC 9562 example
+ java.util.UUID expected = java.util.UUID.fromString("123e4567-e89b-12d3-a456-426614174000");
+ try (Arena arena = Arena.ofConfined()) {
+ MemorySegment buf = arena.allocate(16);
+ long msb = expected.getMostSignificantBits();
+ long lsb = expected.getLeastSignificantBits();
+ for (int k = 0; k < 8; k++) {
+ buf.set(ValueLayout.JAVA_BYTE, k, (byte) ((msb >> (56 - 8 * k)) & 0xff));
+ buf.set(ValueLayout.JAVA_BYTE, 8 + k, (byte) ((lsb >> (56 - 8 * k)) & 0xff));
+ }
+ ByteArray inner = new ByteArray(U8, 16, buf);
+ FixedSizeListArray storage = new FixedSizeListArray(
+ new DType.FixedSizeList(U8, 16, false), 1, inner);
+
+ // When / Then
+ assertThat(Extension.UUID.decode(storage, 0)).isEqualTo(expected);
+ }
+ }
+
+ @Test
+ void uuid_allOnes_decodesWithoutSignExtension() {
+ // Given — 0xff in every byte trips sign-extension bugs in the mask
+ try (Arena arena = Arena.ofConfined()) {
+ MemorySegment buf = arena.allocate(16);
+ for (int k = 0; k < 16; k++) {
+ buf.set(ValueLayout.JAVA_BYTE, k, (byte) 0xff);
+ }
+ ByteArray inner = new ByteArray(U8, 16, buf);
+ FixedSizeListArray storage = new FixedSizeListArray(
+ new DType.FixedSizeList(U8, 16, false), 1, inner);
+
+ // When / Then
+ assertThat(Extension.UUID.decode(storage, 0))
+ .isEqualTo(new java.util.UUID(-1L, -1L));
+ }
+ }
+
+ @Test
+ void uuid_wrongFixedSize_throws() {
+ // Given — 8 != 16; reject up front
+ try (Arena arena = Arena.ofConfined()) {
+ ByteArray inner = new ByteArray(U8, 8, arena.allocate(8));
+ FixedSizeListArray storage = new FixedSizeListArray(
+ new DType.FixedSizeList(U8, 8, false), 1, inner);
+
+ // When / Then
+ assertThatThrownBy(() -> Extension.UUID.decode(storage, 0))
+ .isInstanceOf(VortexException.class)
+ .hasMessageContaining("fixedSize 16");
+ }
+ }
+
+ // ── helpers ──────────────────────────────────────────────────────────
+
+ private static DType.Extension ext(String id, DType storage, ByteBuffer meta) {
+ return new DType.Extension(id, storage, meta, false);
+ }
+
+ private static ByteBuffer unitByte(byte tag) {
+ ByteBuffer meta = ByteBuffer.allocate(1);
+ meta.put(0, tag);
+ return meta;
+ }
+
+ private static ByteBuffer tzMeta(byte unitTag, String tz) {
+ byte[] tzBytes = tz == null ? new byte[0] : tz.getBytes(StandardCharsets.UTF_8);
+ ByteBuffer meta = ByteBuffer.allocate(3 + tzBytes.length).order(ByteOrder.LITTLE_ENDIAN);
+ meta.put(0, unitTag);
+ meta.putShort(1, (short) tzBytes.length);
+ for (int k = 0; k < tzBytes.length; k++) {
+ meta.put(3 + k, tzBytes[k]);
+ }
+ return meta;
+ }
+
+ private static IntArray i32(Arena arena, int value) {
+ MemorySegment buf = arena.allocate(4);
+ buf.set(ValueLayout.JAVA_INT_UNALIGNED, 0, value);
+ return new IntArray(I32, 1, buf);
+ }
+
+ private static LongArray i64(Arena arena, long value) {
+ MemorySegment buf = arena.allocate(8);
+ buf.set(ValueLayout.JAVA_LONG_UNALIGNED, 0, value);
+ return new LongArray(I64, 1, buf);
+ }
+}
diff --git a/core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java b/core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java
new file mode 100644
index 00000000..f3f56a0a
--- /dev/null
+++ b/core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java
@@ -0,0 +1,284 @@
+package io.github.dfa1.vortex.core.array;
+
+import io.github.dfa1.vortex.core.DType;
+import io.github.dfa1.vortex.core.PType;
+import io.github.dfa1.vortex.core.VortexException;
+import org.junit.jupiter.api.Test;
+
+import java.lang.foreign.Arena;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.ValueLayout;
+import java.math.BigDecimal;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+class GenericArrayTest {
+
+ private static final DType DTYPE = new DType.Primitive(PType.I64, false);
+
+ @Test
+ void withLength_shorterLength_returnsClampedView() {
+ // Given — full-size array of 10 elements
+ try (Arena arena = Arena.ofConfined()) {
+ MemorySegment seg = arena.allocate(80);
+ GenericArray sut = new GenericArray(DTYPE, 10, seg);
+
+ // When
+ GenericArray clamped = sut.withLength(4);
+
+ // Then — length reflects new bound; buffer is reused (no copy)
+ assertThat(clamped.length()).isEqualTo(4);
+ assertThat(clamped.dtype()).isEqualTo(DTYPE);
+ }
+ }
+
+ @Test
+ void withLength_sameLength_returnsSameInstance() {
+ // Given
+ try (Arena arena = Arena.ofConfined()) {
+ GenericArray sut = new GenericArray(DTYPE, 10, arena.allocate(80));
+
+ // When / Then — no-op short-circuits to avoid wrapper allocation
+ assertThat(sut.withLength(10)).isSameAs(sut);
+ }
+ }
+
+ @Test
+ void withLength_zero_returnsEmptyView() {
+ // Given — boundary case: truncating to zero must still produce a valid
+ // GenericArray (length() == 0) rather than throw
+ try (Arena arena = Arena.ofConfined()) {
+ GenericArray sut = new GenericArray(DTYPE, 5, arena.allocate(40));
+
+ // When
+ GenericArray clamped = sut.withLength(0);
+
+ // Then
+ assertThat(clamped.length()).isZero();
+ }
+ }
+
+ @Test
+ void withLength_greaterThanCurrent_throws() {
+ // Given — protects against silently extending past the backing buffer
+ try (Arena arena = Arena.ofConfined()) {
+ GenericArray sut = new GenericArray(DTYPE, 3, arena.allocate(24));
+
+ // When / Then
+ assertThatThrownBy(() -> sut.withLength(4))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining("out of range");
+ }
+ }
+
+ @Test
+ void withLength_negative_throws() {
+ // Given
+ try (Arena arena = Arena.ofConfined()) {
+ GenericArray sut = new GenericArray(DTYPE, 3, arena.allocate(24));
+
+ // When / Then
+ assertThatThrownBy(() -> sut.withLength(-1))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining("out of range");
+ }
+ }
+
+ @Test
+ void getDecimal_i64Buffer_decodesMantissaScaledByDtype() {
+ // Given — decimal(15,2): precision 15 → 8-byte (I64) mantissa; values
+ // 1234 / -50 / 0 should render as 12.34 / -0.50 / 0.00.
+ try (Arena arena = Arena.ofConfined()) {
+ MemorySegment buf = arena.allocate(24);
+ buf.set(ValueLayout.JAVA_LONG_UNALIGNED, 0, 1234L);
+ buf.set(ValueLayout.JAVA_LONG_UNALIGNED, 8, -50L);
+ buf.set(ValueLayout.JAVA_LONG_UNALIGNED, 16, 0L);
+ DType.Decimal dec = new DType.Decimal((byte) 15, (byte) 2, false);
+ GenericArray sut = new GenericArray(dec, 3, buf);
+
+ // When / Then
+ assertThat(sut.getDecimal(0)).isEqualByComparingTo(new BigDecimal("12.34"));
+ assertThat(sut.getDecimal(1)).isEqualByComparingTo(new BigDecimal("-0.50"));
+ assertThat(sut.getDecimal(2)).isEqualByComparingTo(BigDecimal.ZERO);
+ }
+ }
+
+ @Test
+ void getDecimal_smallPrecisionUsesNarrowerBuffer() {
+ // Given — decimal(4,1): precision 4 → 2-byte (I16) mantissa
+ try (Arena arena = Arena.ofConfined()) {
+ MemorySegment buf = arena.allocate(4);
+ buf.set(ValueLayout.JAVA_SHORT_UNALIGNED, 0, (short) 99);
+ buf.set(ValueLayout.JAVA_SHORT_UNALIGNED, 2, (short) -1);
+ DType.Decimal dec = new DType.Decimal((byte) 4, (byte) 1, false);
+ GenericArray sut = new GenericArray(dec, 2, buf);
+
+ // When / Then — 99 / 10 = 9.9; -1 / 10 = -0.1 (signed extension matters)
+ assertThat(sut.getDecimal(0)).isEqualByComparingTo(new BigDecimal("9.9"));
+ assertThat(sut.getDecimal(1)).isEqualByComparingTo(new BigDecimal("-0.1"));
+ }
+ }
+
+ @Test
+ void getDecimal_childArrayShape_decodesViaMostSignificantPart() {
+ // Given — the shape vortex.decimal_byte_parts decoders produce when
+ // lower_part_count == 0: zero buffers, one LongArray child carrying
+ // the i64 mantissa.
+ try (Arena arena = Arena.ofConfined()) {
+ MemorySegment mspBuf = arena.allocate(24);
+ mspBuf.set(ValueLayout.JAVA_LONG_UNALIGNED, 0, 4321L);
+ mspBuf.set(ValueLayout.JAVA_LONG_UNALIGNED, 8, -100L);
+ mspBuf.set(ValueLayout.JAVA_LONG_UNALIGNED, 16, 0L);
+ LongArray msp = new LongArray(new DType.Primitive(PType.I64, false), 3, mspBuf);
+ DType.Decimal dec = new DType.Decimal((byte) 15, (byte) 2, false);
+ GenericArray sut = new GenericArray(dec, 3, new MemorySegment[0], new Array[]{msp});
+
+ // When / Then
+ assertThat(sut.getDecimal(0)).isEqualByComparingTo(new BigDecimal("43.21"));
+ assertThat(sut.getDecimal(1)).isEqualByComparingTo(new BigDecimal("-1.00"));
+ assertThat(sut.getDecimal(2)).isEqualByComparingTo(BigDecimal.ZERO);
+ }
+ }
+
+ @Test
+ void getDecimal_i128Buffer_decodesWideMantissa() {
+ // Given — decimal(38,4) stores mantissas wider than i64; vortex.decimal
+ // writes 16-byte little-endian two's-complement. Two values: 2^70 (way
+ // above I64.MAX) and -2^70 anchor the high-precision path the
+ // narrower-width tests never exercise.
+ try (Arena arena = Arena.ofConfined()) {
+ MemorySegment buf = arena.allocate(32);
+ java.math.BigInteger pos = java.math.BigInteger.TWO.pow(70);
+ java.math.BigInteger neg = pos.negate();
+ writeI128Le(buf, 0, pos);
+ writeI128Le(buf, 16, neg);
+ DType.Decimal dec = new DType.Decimal((byte) 38, (byte) 4, false);
+ GenericArray sut = new GenericArray(dec, 2, buf);
+
+ // When / Then
+ assertThat(sut.getDecimal(0)).isEqualByComparingTo(new BigDecimal(pos, 4));
+ assertThat(sut.getDecimal(1)).isEqualByComparingTo(new BigDecimal(neg, 4));
+ }
+ }
+
+ private static void writeI128Le(MemorySegment buf, long offset, java.math.BigInteger value) {
+ // BigInteger.toByteArray() returns minimum-length big-endian two's-complement.
+ // Pad / sign-extend to 16 bytes, then reverse into the little-endian wire slot.
+ byte[] be = value.toByteArray();
+ byte[] le16 = new byte[16];
+ // sign-extend pad in big-endian form
+ byte sign = (byte) (value.signum() < 0 ? 0xFF : 0x00);
+ for (int i = 0; i < 16; i++) {
+ le16[15 - i] = sign;
+ }
+ for (int i = 0; i < be.length && i < 16; i++) {
+ le16[i] = be[be.length - 1 - i];
+ }
+ for (int i = 0; i < 16; i++) {
+ buf.set(ValueLayout.JAVA_BYTE, offset + i, le16[i]);
+ }
+ }
+
+ @Test
+ void getDecimal_widthDerivedFromBufferNotPrecision() {
+ // Given — decimal(15,2) is precision 15 (≤18 → "should" be I64), but
+ // vortex.decimal stores at whatever valuesType the encoder picked. A
+ // narrower width fits if all values are small. The old precision-based
+ // table picked 8 bytes here and read garbage. The current impl derives
+ // width from buffer.byteSize / length, so storing 3 I32 values at the
+ // same precision 15 decodes correctly.
+ try (Arena arena = Arena.ofConfined()) {
+ MemorySegment buf = arena.allocate(12); // 3 × 4 bytes (I32 mantissa)
+ buf.set(ValueLayout.JAVA_INT_UNALIGNED, 0, 1234);
+ buf.set(ValueLayout.JAVA_INT_UNALIGNED, 4, -50);
+ buf.set(ValueLayout.JAVA_INT_UNALIGNED, 8, 0);
+ DType.Decimal dec = new DType.Decimal((byte) 15, (byte) 2, false);
+ GenericArray sut = new GenericArray(dec, 3, buf);
+
+ // When / Then
+ assertThat(sut.getDecimal(0)).isEqualByComparingTo(new BigDecimal("12.34"));
+ assertThat(sut.getDecimal(1)).isEqualByComparingTo(new BigDecimal("-0.50"));
+ assertThat(sut.getDecimal(2)).isEqualByComparingTo(BigDecimal.ZERO);
+ }
+ }
+
+ @Test
+ void getDecimal_unalignedBufferSize_throws() {
+ // Given — buffer size not a clean multiple of length means we can't
+ // derive a sensible per-element width; fail fast rather than silently
+ // reading garbage from a half-element offset.
+ try (Arena arena = Arena.ofConfined()) {
+ MemorySegment buf = arena.allocate(7); // not divisible by length=2
+ DType.Decimal dec = new DType.Decimal((byte) 4, (byte) 0, false);
+ GenericArray sut = new GenericArray(dec, 2, buf);
+
+ // When / Then
+ assertThatThrownBy(() -> sut.getDecimal(0))
+ .isInstanceOf(io.github.dfa1.vortex.core.VortexException.class)
+ .hasMessageContaining("not a multiple");
+ }
+ }
+
+ @Test
+ void getDecimal_indexOutOfBounds_throws() {
+ // Given — explicit bounds check guards against silent garbage reads
+ // when callers don't respect length()
+ try (Arena arena = Arena.ofConfined()) {
+ MemorySegment buf = arena.allocate(8);
+ DType.Decimal dec = new DType.Decimal((byte) 4, (byte) 0, false);
+ GenericArray sut = new GenericArray(dec, 1, buf);
+
+ // When / Then
+ assertThatThrownBy(() -> sut.getDecimal(-1))
+ .isInstanceOf(IndexOutOfBoundsException.class);
+ assertThatThrownBy(() -> sut.getDecimal(1))
+ .isInstanceOf(IndexOutOfBoundsException.class)
+ .hasMessageContaining("out of bounds");
+ assertThatThrownBy(() -> sut.getDecimal(Long.MAX_VALUE))
+ .isInstanceOf(IndexOutOfBoundsException.class);
+ }
+ }
+
+ @Test
+ void getDecimal_nullCellInMaskedChild_throws() {
+ // Given — mantissa-child shape with a MaskedArray wrapping a LongArray;
+ // the validity bitmap says index 1 is null. Without the validity check
+ // the previous code would happily decode whatever bytes sat at that
+ // slot and return a garbage BigDecimal.
+ try (Arena arena = Arena.ofConfined()) {
+ MemorySegment mspBuf = arena.allocate(16);
+ mspBuf.set(ValueLayout.JAVA_LONG_UNALIGNED, 0, 1234L);
+ mspBuf.set(ValueLayout.JAVA_LONG_UNALIGNED, 8, 9999L);
+ LongArray msp = new LongArray(new DType.Primitive(PType.I64, false), 2, mspBuf);
+
+ MemorySegment validityBuf = arena.allocate(1);
+ // bit 0 set = index 0 valid; bit 1 clear = index 1 null
+ validityBuf.set(ValueLayout.JAVA_BYTE, 0, (byte) 0b0000_0001);
+ BoolArray validity = new BoolArray(new DType.Bool(false), 2, validityBuf);
+
+ MaskedArray masked = new MaskedArray(msp, validity);
+ DType.Decimal dec = new DType.Decimal((byte) 15, (byte) 2, true);
+ GenericArray sut = new GenericArray(dec, 2, new MemorySegment[0], new Array[]{masked});
+
+ // When / Then
+ assertThat(sut.getDecimal(0)).isEqualByComparingTo(new BigDecimal("12.34"));
+ assertThatThrownBy(() -> sut.getDecimal(1))
+ .isInstanceOf(io.github.dfa1.vortex.core.VortexException.class)
+ .hasMessageContaining("null cell at index 1");
+ }
+ }
+
+ @Test
+ void getDecimal_nonDecimalDtype_throws() {
+ // Given — guards against silently returning garbage on misuse
+ try (Arena arena = Arena.ofConfined()) {
+ GenericArray sut = new GenericArray(DTYPE, 1, arena.allocate(8));
+
+ // When / Then
+ assertThatThrownBy(() -> sut.getDecimal(0))
+ .isInstanceOf(VortexException.class)
+ .hasMessageContaining("non-decimal");
+ }
+ }
+}
diff --git a/docs/compatibility.md b/docs/compatibility.md
index 683879b4..aad65434 100644
--- a/docs/compatibility.md
+++ b/docs/compatibility.md
@@ -56,6 +56,50 @@ try (VortexReader vf = VortexReader.open(path, registry)) {
}
```
+## Extension types
+
+Extension dtypes wrap a primitive storage array with a logical-id tag plus optional
+metadata. The Rust catalogue lives in
+[`vortex-array/src/extension/`](https://github.com/vortex-data/vortex/tree/develop/vortex-array/src/extension);
+each subdir below names a canonical extension id and its on-disk shape.
+
+Extensions are exposed as a sealed `Extension` hierarchy. Each record carries
+its own typed decode methods; pattern-match on `ext.kind()` to dispatch:
+
+```java
+switch (ext.kind()) {
+ case Extension.Date d -> d.decode(storage, i); // LocalDate
+ case Extension.Time t -> t.decode(ext, storage, i); // LocalTime
+ case Extension.Timestamp ts -> ts.instant(ext, storage, i); // Instant
+ case Extension.Uuid u -> u.decode(storage, i); // UUID
+ case Extension.Custom c -> ... // any other id, raw String available
+}
+```
+
+| Extension id | Record | Storage | Metadata | Status |
+|---------------------|----------------------|-------------------------------------------------|-------------------------------------------|--------|
+| `vortex.date` | `Extension.Date` | Signed integer days since 1970-01-01 | none | ✅ |
+| `vortex.time` | `Extension.Time` | I32 (s/ms) or I64 (μs/ns) since midnight | 1 byte: `TimeUnit` | ✅ |
+| `vortex.timestamp` | `Extension.Timestamp`| I64 epoch count in the recorded `TimeUnit` | unit byte + u16 LE tz_len + UTF-8 tz | ✅ |
+| `vortex.uuid` | `Extension.Uuid` | `FixedSizeList(Primitive(U8), 16)` | none | ✅ |
+| _custom ids_ | `Extension.Custom` | _whatever the column declares_ | _opaque bytes_ | passthrough |
+
+`TimeUnit` (see [`extension/datetime/unit.rs`](https://github.com/vortex-data/vortex/blob/develop/vortex-array/src/extension/datetime/unit.rs))
+encodes precision in the first metadata byte:
+
+| Value | Unit |
+|-------|--------------|
+| 0 | Nanoseconds |
+| 1 | Microseconds |
+| 2 | Milliseconds |
+| 3 | Seconds |
+| 4 | Days |
+
+For unsupported extension ids the inspector falls back to a placeholder cell
+(`>`); the underlying storage array still decodes
+correctly via the primitive accessors, callers just have to format the value
+themselves.
+
## S3 Fixture Status (v0.72.0)
Cross-language round-trips tested against Rust-written fixture files hosted at
diff --git a/inspector/pom.xml b/inspector/pom.xml
new file mode 100644
index 00000000..e88d9212
--- /dev/null
+++ b/inspector/pom.xml
@@ -0,0 +1,49 @@
+
+
+ 4.0.0
+
+ io.github.dfa1.vortex
+ vortex-java
+ 0.5.0-SNAPSHOT
+
+
+ vortex-inspector
+
+ vortex-inspector
+ Inspector for the Vortex columnar file format. Produces a structural tree model of a file's
+ schema, layout, and encodings, plus text and Lanterna-based TUI renderers.
+
+
+
+
+
+ io.github.dfa1.vortex
+ vortex-core
+
+
+ io.github.dfa1.vortex
+ vortex-reader
+
+
+ com.google.flatbuffers
+ flatbuffers-java
+
+
+
+
+ org.junit.jupiter
+ junit-jupiter
+ test
+
+
+ org.assertj
+ assertj-core
+ test
+
+
+ org.mockito
+ mockito-junit-jupiter
+ test
+
+
+
diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java
new file mode 100644
index 00000000..ca4fae4d
--- /dev/null
+++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java
@@ -0,0 +1,281 @@
+package io.github.dfa1.vortex.inspect;
+
+import io.github.dfa1.vortex.core.ArrayStats;
+import io.github.dfa1.vortex.core.DType;
+import io.github.dfa1.vortex.core.Footer;
+import io.github.dfa1.vortex.core.Layout;
+import io.github.dfa1.vortex.core.SegmentSpec;
+import io.github.dfa1.vortex.fbs.Array;
+import io.github.dfa1.vortex.fbs.ArrayNode;
+import io.github.dfa1.vortex.io.VortexHandle;
+
+import java.lang.foreign.MemorySegment;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.ArrayList;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Optional;
+import java.util.Set;
+
+/// Structured snapshot of a Vortex file's schema, layout, and encoding usage.
+///
+/// Built once from a [VortexHandle] via [#build(VortexHandle)] and then consumed by renderers
+/// (text or TUI). Immutable — does not retain the handle.
+///
+/// @param version Vortex file format version stored in the trailer
+/// @param fileSize total file length in bytes
+/// @param dtype top-level data type (typically [DType.Struct])
+/// @param registeredEncodings encoding IDs declared in the file footer
+/// @param usedEncodings encoding IDs actually referenced by Flat layout segments
+/// @param segmentSpecs all on-disk segments referenced by the footer, in index order
+/// @param totalRowCount total logical rows in the file (root layout's row count)
+/// @param root root layout node
+public record InspectorTree(
+ int version,
+ long fileSize,
+ DType dtype,
+ List registeredEncodings,
+ Set usedEncodings,
+ List segmentSpecs,
+ long totalRowCount,
+ Node root) {
+
+ /// Number of on-disk segments referenced by the footer.
+ ///
+ /// @return segment count
+ public int segmentCount() {
+ return segmentSpecs.size();
+ }
+
+ /// Sum of segment lengths in bytes.
+ ///
+ /// @return total segment bytes
+ public long totalSegmentBytes() {
+ long total = 0;
+ for (SegmentSpec spec : segmentSpecs) {
+ total += spec.length();
+ }
+ return total;
+ }
+
+ /// One layout node in the inspector tree.
+ ///
+ /// @param layout underlying [Layout] from the file footer
+ /// @param fieldName column name when this node is a direct child of a top-level struct
+ /// @param usedEncodings encoding IDs referenced by this subtree
+ /// @param stats per-array statistics decoded from the segment's FlatBuffer
+ /// @param children child nodes
+ public record Node(
+ Layout layout,
+ Optional fieldName,
+ Set usedEncodings,
+ ArrayStats stats,
+ List children) {
+ }
+
+ /// Builds an inspector tree from an open Vortex file handle.
+ ///
+ /// @param handle open file handle
+ /// @return immutable inspector tree
+ public static InspectorTree build(VortexHandle handle) {
+ return build(handle, Progress.NOOP);
+ }
+
+ /// Builds an inspector tree without peeking segments — every node starts
+ /// with an empty encoding set and {@link ArrayStats#empty()} stats. The
+ /// resulting tree contains only structure derived from the file's footer
+ /// and layout, so the call is essentially free on remote handles.
+ ///
+ /// Use with {@link #peek(Node, VortexHandle)} for lazy on-demand resolution.
+ ///
+ /// @param handle open file handle
+ /// @return immutable shallow inspector tree
+ public static InspectorTree buildShallow(VortexHandle handle) {
+ Footer footer = handle.footer();
+ Layout layout = handle.layout();
+ DType dtype = handle.dtype();
+ List colNames = (dtype instanceof DType.Struct s) ? s.fieldNames() : List.of();
+ Node root = shallowNode(layout, Optional.empty());
+ if (layout.isStruct()) {
+ List named = new ArrayList<>(root.children().size());
+ for (int i = 0; i < root.children().size(); i++) {
+ Node child = root.children().get(i);
+ String name = i < colNames.size() ? colNames.get(i) : "col" + i;
+ named.add(new Node(child.layout(), Optional.of(name),
+ Set.of(), ArrayStats.empty(), child.children()));
+ }
+ root = new Node(root.layout(), Optional.empty(), Set.of(),
+ ArrayStats.empty(), List.copyOf(named));
+ }
+ return new InspectorTree(
+ handle.version(),
+ handle.fileSize(),
+ dtype,
+ footer.arraySpecs(),
+ Set.of(),
+ footer.segmentSpecs(),
+ layout.rowCount(),
+ root);
+ }
+
+ private static Node shallowNode(Layout layout, Optional fieldName) {
+ List children = new ArrayList<>(layout.children().size());
+ for (Layout child : layout.children()) {
+ children.add(shallowNode(child, Optional.empty()));
+ }
+ return new Node(layout, fieldName, Set.of(), ArrayStats.empty(), List.copyOf(children));
+ }
+
+ /// Resolves encoding id + stats for one Flat node by reading its first
+ /// segment. Returns [Peek#EMPTY] for non-Flat nodes, segments under
+ /// compression, or missing data.
+ ///
+ /// Callers should cache the result — every call triggers a fresh
+ /// {@code handle.slice()}, which is a network round-trip on remote handles.
+ ///
+ /// @param node node to resolve
+ /// @param handle open file handle
+ /// @return peek result; never {@code null}
+ public static Peek peek(Node node, VortexHandle handle) {
+ Layout layout = node.layout();
+ if (!layout.isFlat() || layout.segments().isEmpty()) {
+ return Peek.EMPTY;
+ }
+ int segIdx = layout.segments().getFirst();
+ SegmentSpec spec = handle.footer().segmentSpecs().get(segIdx);
+ if (spec.compression().code != 0) {
+ return Peek.EMPTY;
+ }
+ MemorySegment seg = handle.slice(spec.offset(), spec.length());
+ return peekFlatRoot(seg, handle.footer().arraySpecs());
+ }
+
+ /// Builds an inspector tree from an open Vortex file handle, reporting
+ /// progress on each Flat-segment peek (which on remote-storage handles
+ /// triggers a separate HTTP range request).
+ ///
+ /// @param handle open file handle
+ /// @param progress progress sink receiving {@code (current, total)} after each segment peek
+ /// @return immutable inspector tree
+ public static InspectorTree build(VortexHandle handle, Progress progress) {
+ Footer footer = handle.footer();
+ Layout layout = handle.layout();
+ DType dtype = handle.dtype();
+
+ int total = countPeekableSegments(layout, footer);
+ int[] counter = {0};
+
+ List colNames = (dtype instanceof DType.Struct s) ? s.fieldNames() : List.of();
+ Set overallUsed = new LinkedHashSet<>();
+ Node root = buildNode(layout, Optional.empty(), handle, footer.arraySpecs(),
+ overallUsed, progress, counter, total);
+ if (layout.isStruct()) {
+ List namedChildren = new ArrayList<>(root.children().size());
+ for (int i = 0; i < root.children().size(); i++) {
+ Node child = root.children().get(i);
+ String name = i < colNames.size() ? colNames.get(i) : "col" + i;
+ namedChildren.add(new Node(child.layout(), Optional.of(name),
+ child.usedEncodings(), child.stats(), child.children()));
+ }
+ root = new Node(root.layout(), Optional.empty(), root.usedEncodings(),
+ root.stats(), List.copyOf(namedChildren));
+ }
+
+ return new InspectorTree(
+ handle.version(),
+ handle.fileSize(),
+ dtype,
+ footer.arraySpecs(),
+ Set.copyOf(overallUsed),
+ footer.segmentSpecs(),
+ layout.rowCount(),
+ root);
+ }
+
+ private static Node buildNode(Layout layout, Optional fieldName, VortexHandle handle,
+ List arraySpecs, Set overallUsed,
+ Progress progress, int[] counter, int total) {
+ Set localUsed = new LinkedHashSet<>();
+ ArrayStats stats = ArrayStats.empty();
+ if (layout.isFlat() && !layout.segments().isEmpty()) {
+ int segIdx = layout.segments().getFirst();
+ SegmentSpec spec = handle.footer().segmentSpecs().get(segIdx);
+ if (spec.compression().code == 0) {
+ MemorySegment seg = handle.slice(spec.offset(), spec.length());
+ Peek peek = peekFlatRoot(seg, arraySpecs);
+ if (peek.encoding() != null) {
+ localUsed.add(peek.encoding());
+ overallUsed.add(peek.encoding());
+ }
+ stats = peek.stats();
+ counter[0]++;
+ progress.update(counter[0], total);
+ }
+ }
+ List children = new ArrayList<>(layout.children().size());
+ for (Layout child : layout.children()) {
+ Node n = buildNode(child, Optional.empty(), handle, arraySpecs, overallUsed,
+ progress, counter, total);
+ localUsed.addAll(n.usedEncodings());
+ children.add(n);
+ }
+ return new Node(layout, fieldName, Set.copyOf(localUsed), stats, List.copyOf(children));
+ }
+
+ private static int countPeekableSegments(Layout layout, Footer footer) {
+ int n = 0;
+ if (layout.isFlat() && !layout.segments().isEmpty()) {
+ SegmentSpec spec = footer.segmentSpecs().get(layout.segments().getFirst());
+ if (spec.compression().code == 0) {
+ n++;
+ }
+ }
+ for (Layout child : layout.children()) {
+ n += countPeekableSegments(child, footer);
+ }
+ return n;
+ }
+
+ /// Callback used by [#build(VortexHandle, Progress)] to report how many
+ /// flat segments have been peeked so far. Implementations may render a
+ /// progress bar, log, or ignore (see [#NOOP]).
+ @FunctionalInterface
+ public interface Progress {
+ /// Sink that discards updates.
+ Progress NOOP = (current, total) -> {
+ };
+
+ /// Reports progress.
+ ///
+ /// @param current number of segments peeked so far
+ /// @param total total peekable segments in the file
+ void update(int current, int total);
+ }
+
+ private static Peek peekFlatRoot(MemorySegment seg, List arraySpecs) {
+ int segLen = (int) seg.byteSize();
+ ByteBuffer bb = seg.asByteBuffer().order(ByteOrder.LITTLE_ENDIAN);
+ int fbLen = bb.getInt(segLen - 4);
+ int fbStart = segLen - 4 - fbLen;
+ ByteBuffer fbBuf = bb.slice(fbStart, fbLen).order(ByteOrder.LITTLE_ENDIAN);
+ Array fbArray = Array.getRootAsArray(fbBuf);
+ ArrayNode root = fbArray.root();
+ if (root == null) {
+ return new Peek(null, ArrayStats.empty());
+ }
+ return new Peek(arraySpecs.get(root.encoding()), ArrayStats.fromFbs(root.stats()));
+ }
+
+ /// Result of a single Flat segment peek - the resolved encoding id (or
+ /// {@code null} when the FlatBuffer carried no root) plus the per-array
+ /// statistics decoded from the same FlatBuffer.
+ ///
+ /// @param encoding resolved encoding id from the array spec table, or {@code null}
+ /// @param stats per-array stats, or {@link ArrayStats#empty()} if unknown
+ public record Peek(String encoding, ArrayStats stats) {
+ /// Sentinel returned for non-Flat nodes, compressed segments, or
+ /// segments that don't carry an array root.
+ public static final Peek EMPTY = new Peek(null, ArrayStats.empty());
+ }
+}
diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/IoWorker.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/IoWorker.java
new file mode 100644
index 00000000..87580501
--- /dev/null
+++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/IoWorker.java
@@ -0,0 +1,103 @@
+package io.github.dfa1.vortex.inspect;
+
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/// Single-threaded I/O executor that owns one {@link io.github.dfa1.vortex.io.VortexHandle}.
+///
+/// Vortex readers use a confined {@link java.lang.foreign.Arena}, so every
+/// {@code slice()} / {@code scan()} call must happen on the same thread that
+/// opened the file. The TUI dispatches all such calls to this worker so the
+/// render loop on the main thread never crosses the arena's owning thread.
+///
+/// {@link #pending()} drives the status-line counter; callers should check it
+/// when computing UI state.
+public final class IoWorker implements AutoCloseable {
+
+ private final BlockingQueue queue = new LinkedBlockingQueue<>();
+ private final Thread thread;
+ private final AtomicInteger pending = new AtomicInteger();
+ private volatile boolean closed;
+
+ /// Creates and starts the worker thread.
+ ///
+ /// @param name thread name
+ public IoWorker(String name) {
+ this.thread = new Thread(this::loop, name);
+ this.thread.setDaemon(true);
+ this.thread.start();
+ }
+
+ /// Submits a task to run on the worker thread. Returns immediately.
+ ///
+ /// @param task task that performs I/O and updates shared state
+ public void submit(Runnable task) {
+ if (closed) {
+ return;
+ }
+ pending.incrementAndGet();
+ queue.offer(() -> {
+ try {
+ task.run();
+ } finally {
+ pending.decrementAndGet();
+ }
+ });
+ }
+
+ /// Runs a task on the worker thread and waits for it to complete.
+ /// Used at startup to open the handle on the worker's owning thread.
+ ///
+ /// @param task task to execute
+ /// @throws InterruptedException if the calling thread is interrupted while waiting
+ public void runAndAwait(Runnable task) throws InterruptedException {
+ Object signal = new Object();
+ boolean[] done = {false};
+ submit(() -> {
+ try {
+ task.run();
+ } finally {
+ synchronized (signal) {
+ done[0] = true;
+ signal.notifyAll();
+ }
+ }
+ });
+ synchronized (signal) {
+ while (!done[0]) {
+ signal.wait();
+ }
+ }
+ }
+
+ /// Number of submitted tasks that have not yet finished.
+ ///
+ /// @return pending count, including the currently running task
+ public int pending() {
+ return pending.get();
+ }
+
+ private void loop() {
+ while (!closed) {
+ Runnable task;
+ try {
+ task = queue.take();
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ return;
+ }
+ try {
+ task.run();
+ } catch (RuntimeException ignored) {
+ // Task is expected to capture its own failures into shared state.
+ }
+ }
+ }
+
+ @Override
+ public void close() {
+ closed = true;
+ thread.interrupt();
+ }
+}
diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspector.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspector.java
new file mode 100644
index 00000000..e1eee450
--- /dev/null
+++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspector.java
@@ -0,0 +1,203 @@
+package io.github.dfa1.vortex.inspect;
+
+import io.github.dfa1.vortex.core.ArrayStats;
+import io.github.dfa1.vortex.core.DType;
+import io.github.dfa1.vortex.core.Layout;
+import io.github.dfa1.vortex.core.SegmentSpec;
+import io.github.dfa1.vortex.io.VortexHandle;
+
+import java.util.List;
+
+/// Produces a human-readable summary of a Vortex file's structure and encodings.
+public final class VortexInspector {
+
+ private VortexInspector() {
+ }
+
+ /// Builds a multi-line text report for the given file handle.
+ ///
+ /// @param handle open file handle
+ /// @return formatted report
+ public static String inspect(VortexHandle handle) {
+ return render(InspectorTree.build(handle));
+ }
+
+ /// Builds a multi-line text report from a pre-built inspector tree.
+ ///
+ /// @param tree inspector tree
+ /// @return formatted report
+ public static String render(InspectorTree tree) {
+ var sb = new StringBuilder();
+
+ sb.append("Vortex v").append(tree.version())
+ .append(" ").append(formatBytes(tree.fileSize()))
+ .append(" ").append(tree.totalRowCount()).append(" rows").append('\n');
+ sb.append('\n');
+
+ sb.append("Schema:\n");
+ appendSchema(sb, tree.dtype(), " ");
+ sb.append('\n');
+
+ sb.append("Registered encodings: ").append(String.join(", ", tree.registeredEncodings())).append('\n');
+ sb.append('\n');
+
+ sb.append("Used encodings: ").append(String.join(", ", tree.usedEncodings())).append('\n');
+ sb.append('\n');
+
+ sb.append("Segments: ").append(tree.segmentCount())
+ .append(" total ").append(formatBytes(tree.totalSegmentBytes())).append('\n');
+ appendSegmentTable(sb, tree.segmentSpecs(), " ");
+ sb.append('\n');
+
+ sb.append("Layout:\n");
+ appendLayout(sb, tree.root(), " ");
+
+ return sb.toString();
+ }
+
+ private static void appendSegmentTable(StringBuilder sb, List specs, String indent) {
+ for (int i = 0; i < specs.size(); i++) {
+ SegmentSpec spec = specs.get(i);
+ sb.append(indent).append('[').append(i).append("] ")
+ .append("off=").append(spec.offset())
+ .append(" len=").append(formatBytes(spec.length()))
+ .append(" compression=").append(spec.compression().name())
+ .append('\n');
+ }
+ }
+
+ private static void appendLayout(StringBuilder sb, InspectorTree.Node node, String indent) {
+ Layout layout = node.layout();
+ if (layout.isStruct()) {
+ sb.append(indent).append("struct (").append(layout.rowCount()).append(" rows)\n");
+ for (InspectorTree.Node child : node.children()) {
+ String name = child.fieldName().orElse("?");
+ sb.append(indent).append(" ").append(name).append(": ");
+ appendLayoutInline(sb, child.layout());
+ if (!child.usedEncodings().isEmpty()) {
+ sb.append(" [").append(String.join(", ", child.usedEncodings())).append("]");
+ }
+ ArrayStats agg = aggregateStats(child);
+ if (agg.min() != null || agg.max() != null) {
+ sb.append(" min=").append(format(agg.min()))
+ .append(" max=").append(format(agg.max()));
+ }
+ sb.append('\n');
+ }
+ } else {
+ sb.append(indent);
+ appendLayoutInline(sb, layout);
+ sb.append('\n');
+ }
+ }
+
+ private static ArrayStats aggregateStats(InspectorTree.Node node) {
+ Object min = node.stats().min();
+ Object max = node.stats().max();
+ for (InspectorTree.Node child : node.children()) {
+ ArrayStats cs = aggregateStats(child);
+ min = pickMin(min, cs.min());
+ max = pickMax(max, cs.max());
+ }
+ if (min == null && max == null) {
+ return ArrayStats.empty();
+ }
+ return new ArrayStats(min, max, null, null, null, null);
+ }
+
+ @SuppressWarnings({"unchecked", "rawtypes"})
+ private static Object pickMin(Object a, Object b) {
+ if (a == null) {
+ return b;
+ }
+ if (b == null) {
+ return a;
+ }
+ if (a.getClass() != b.getClass() || !(a instanceof Comparable)) {
+ return a;
+ }
+ return ((Comparable) a).compareTo(b) <= 0 ? a : b;
+ }
+
+ @SuppressWarnings({"unchecked", "rawtypes"})
+ private static Object pickMax(Object a, Object b) {
+ if (a == null) {
+ return b;
+ }
+ if (b == null) {
+ return a;
+ }
+ if (a.getClass() != b.getClass() || !(a instanceof Comparable)) {
+ return a;
+ }
+ return ((Comparable) a).compareTo(b) >= 0 ? a : b;
+ }
+
+ private static String format(Object v) {
+ if (v == null) {
+ return "?";
+ }
+ String s = v.toString();
+ if (s.length() > 30) {
+ return s.substring(0, 27) + "...";
+ }
+ return s;
+ }
+
+ private static void appendLayoutInline(StringBuilder sb, Layout layout) {
+ sb.append(layout.encodingId()).append('(').append(layout.rowCount()).append(" rows)");
+ if (layout.children().isEmpty()) {
+ return;
+ }
+ sb.append(" → ");
+ if (layout.children().size() == 1) {
+ appendLayoutInline(sb, layout.children().getFirst());
+ } else {
+ sb.append(layout.children().size()).append("× [");
+ appendLayoutInline(sb, layout.children().getFirst());
+ sb.append("]");
+ }
+ }
+
+ private static void appendSchema(StringBuilder sb, DType dtype, String indent) {
+ if (dtype instanceof DType.Struct s) {
+ int maxLen = s.fieldNames().stream().mapToInt(String::length).max().orElse(0);
+ for (int i = 0; i < s.fieldNames().size(); i++) {
+ String name = s.fieldNames().get(i);
+ sb.append(indent).append(name)
+ .append(" ".repeat(maxLen - name.length() + 1))
+ .append(formatDType(s.fieldTypes().get(i))).append('\n');
+ }
+ } else {
+ sb.append(indent).append(formatDType(dtype)).append('\n');
+ }
+ }
+
+ private static String formatDType(DType dtype) {
+ return switch (dtype) {
+ case DType.Primitive(var pt, var nullable) -> pt.name() + (nullable ? "?" : "");
+ case DType.Utf8(var nullable) -> "utf8" + (nullable ? "?" : "");
+ case DType.Binary(var nullable) -> "binary" + (nullable ? "?" : "");
+ case DType.Bool(var nullable) -> "bool" + (nullable ? "?" : "");
+ case DType.Null ignored -> "null";
+ case DType.Decimal(var p, var s, var nullable) -> "decimal(" + p + "," + s + ")" + (nullable ? "?" : "");
+ case DType.Struct ignored -> "struct";
+ case DType.List(var elem, var nullable) -> "list<" + formatDType(elem) + ">" + (nullable ? "?" : "");
+ case DType.FixedSizeList(var elem, var size, var nullable) ->
+ "list<" + formatDType(elem) + ">[" + size + "]" + (nullable ? "?" : "");
+ case DType.Extension(var id, var storage, var meta, var nullable) ->
+ "ext<" + id + ">" + (nullable ? "?" : "");
+ case DType.Variant(var nullable) -> "variant" + (nullable ? "?" : "");
+ };
+ }
+
+ private static String formatBytes(long bytes) {
+ if (bytes < 1024) {
+ return bytes + " B";
+ }
+ if (bytes < 1024 * 1024) {
+ return String.format("%.1f KB", bytes / 1024.0);
+ }
+ return String.format("%.1f MB", bytes / (1024.0 * 1024.0));
+ }
+}
diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java
new file mode 100644
index 00000000..aa4d2641
--- /dev/null
+++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java
@@ -0,0 +1,823 @@
+package io.github.dfa1.vortex.inspect;
+
+import io.github.dfa1.vortex.core.Layout;
+import io.github.dfa1.vortex.core.SegmentSpec;
+import io.github.dfa1.vortex.core.DType;
+import io.github.dfa1.vortex.core.array.Array;
+import io.github.dfa1.vortex.core.array.BoolArray;
+import io.github.dfa1.vortex.core.array.ByteArray;
+import io.github.dfa1.vortex.core.array.DoubleArray;
+import io.github.dfa1.vortex.core.array.FloatArray;
+import io.github.dfa1.vortex.core.array.GenericArray;
+import io.github.dfa1.vortex.core.array.IntArray;
+import io.github.dfa1.vortex.core.array.LongArray;
+import io.github.dfa1.vortex.core.array.ShortArray;
+import io.github.dfa1.vortex.core.array.VarBinArray;
+import io.github.dfa1.vortex.inspect.term.Ansi;
+import io.github.dfa1.vortex.inspect.term.Key;
+import io.github.dfa1.vortex.inspect.term.Terminal;
+import io.github.dfa1.vortex.io.VortexHandle;
+import io.github.dfa1.vortex.scan.Chunk;
+import io.github.dfa1.vortex.scan.ScanIterator;
+import io.github.dfa1.vortex.scan.ScanOptions;
+
+import java.io.IOException;
+import java.lang.foreign.MemorySegment;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.IdentityHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+
+/// Interactive viewer for a Vortex file's inspector tree, drawn with raw ANSI
+/// escapes — no library dependency.
+///
+/// Renders a two-pane terminal UI: layout tree on the left, node details on
+/// the right. Quit with {@code q} or {@code Esc}.
+public final class VortexInspectorTui {
+
+ private VortexInspectorTui() {
+ }
+
+ /// Opens the terminal in raw mode, builds an inspector tree, and runs the
+ /// interactive viewer until quit.
+ ///
+ /// @param handle open Vortex file handle
+ /// @throws IOException if the terminal cannot be initialized
+ public static void show(VortexHandle handle) throws IOException {
+ show(handle, InspectorTree.Progress.NOOP);
+ }
+
+ /// Builds an inspector tree (reporting progress on each segment peek)
+ /// and runs the interactive viewer until quit. The TUI now uses the
+ /// shallow builder so the screen is interactive immediately; encoding,
+ /// stats and data previews are fetched lazily as the user navigates.
+ /// The {@code progress} parameter is retained for source compatibility
+ /// but is no longer invoked - shallow build does no peeks.
+ ///
+ /// @param handle open Vortex file handle
+ /// @param progress unused; kept for API stability
+ /// @throws IOException if the terminal cannot be initialized
+ public static void show(VortexHandle handle, InspectorTree.Progress progress) throws IOException {
+ show(handle, null, progress);
+ }
+
+ /// Variant that dispatches every {@code handle} I/O call onto the supplied
+ /// {@link IoWorker}. Required when the handle was opened on a different
+ /// thread (Vortex readers use a confined {@link java.lang.foreign.Arena},
+ /// so cross-thread access throws {@code WrongThreadException}).
+ ///
+ /// Passing {@code null} for {@code worker} falls back to synchronous I/O
+ /// on the render thread — fine for tests but causes the sluggishness this
+ /// machinery was built to avoid.
+ ///
+ /// @param handle open Vortex file handle
+ /// @param worker I/O dispatcher that owns the handle's thread; may be {@code null}
+ /// @param progress unused; kept for API stability
+ /// @throws IOException if the terminal cannot be initialized
+ public static void show(VortexHandle handle, IoWorker worker, InspectorTree.Progress progress)
+ throws IOException {
+ InspectorTree tree = InspectorTree.buildShallow(handle);
+ try (Terminal term = Terminal.open()) {
+ new Loop(term, tree, handle, worker).run();
+ }
+ }
+
+ private static final class Loop {
+ /// Bytes shown per Flat segment when falling back to the raw hex view.
+ private static final int HEX_PREVIEW_BYTES = 256;
+
+ /// Decoded values shown per column in the data view.
+ private static final int DATA_PREVIEW_ROWS = 32;
+
+ /// Render cadence while idle — drives spinner animation and reaping of
+ /// background fetches so updates land even when the user isn't typing.
+ private static final long POLL_INTERVAL_MS = 80;
+
+ /// ASCII spinner frames; cycled by render tick.
+ private static final char[] SPINNER = {'|', '/', '-', '\\'};
+
+ private final Terminal term;
+ private final InspectorTree tree;
+ private final VortexHandle handle;
+ private final IoWorker worker;
+ // Identity-keyed containers throughout: InspectorTree.Node wraps a
+ // Layout record whose ByteBuffer metadata field crashes with
+ // WrongThreadException when its hashCode reads arena-confined bytes
+ // from any thread other than the handle's owner. Identity hashing
+ // sidesteps that entirely and matches the natural semantics — Nodes
+ // are constructed exactly once per shallow build and uniquely
+ // identify a position in the tree.
+ private final Set expanded =
+ Collections.newSetFromMap(new IdentityHashMap<>());
+ private final Map peekCache =
+ Collections.synchronizedMap(new IdentityHashMap<>());
+ private final Set peekInFlight =
+ Collections.synchronizedSet(Collections.newSetFromMap(new IdentityHashMap<>()));
+ private final Map hexCache =
+ Collections.synchronizedMap(new IdentityHashMap<>());
+ private final Set hexInFlight =
+ Collections.synchronizedSet(Collections.newSetFromMap(new IdentityHashMap<>()));
+ private final ConcurrentMap dataCache = new ConcurrentHashMap<>();
+ private final Map dictCache =
+ Collections.synchronizedMap(new IdentityHashMap<>());
+ private final Map columnOf = new IdentityHashMap<>();
+ private final Set statsChildren =
+ Collections.newSetFromMap(new IdentityHashMap<>());
+ private volatile String lastError;
+ private long tick;
+ private int selected;
+ private int scrollOffset;
+
+ Loop(Terminal term, InspectorTree tree, VortexHandle handle, IoWorker worker) {
+ this.term = term;
+ this.tree = tree;
+ this.handle = handle;
+ this.worker = worker;
+ this.expanded.add(tree.root());
+ indexColumns(tree.root());
+ indexStatsChildrenOnWorker(tree.root());
+ prefetchTopColumns();
+ }
+
+ private void indexStatsChildrenOnWorker(InspectorTree.Node root) {
+ if (worker == null) {
+ indexStatsChildren(root);
+ return;
+ }
+ try {
+ worker.runAndAwait(() -> indexStatsChildren(root));
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ }
+ }
+
+ private void indexStatsChildren(InspectorTree.Node node) {
+ Layout layout = node.layout();
+ if (layout.isZoned() && node.children().size() >= 2) {
+ // Zoned: child[0] = data, child[1] = per-chunk stats payload
+ statsChildren.add(node.children().get(1));
+ } else if (layout.isChunked() && hasLeadingStats(layout) && !node.children().isEmpty()) {
+ // Chunked with metadata[0] == 1: child[0] is the stats payload
+ statsChildren.add(node.children().get(0));
+ }
+ for (InspectorTree.Node child : node.children()) {
+ indexStatsChildren(child);
+ }
+ }
+
+ private static boolean hasLeadingStats(Layout layout) {
+ java.nio.ByteBuffer meta = layout.metadata();
+ return meta != null && meta.hasRemaining() && meta.get(meta.position()) == 1;
+ }
+
+ private void prefetchTopColumns() {
+ if (!tree.root().layout().isStruct()) {
+ return;
+ }
+ for (InspectorTree.Node col : tree.root().children()) {
+ col.fieldName().ifPresent(this::startDataLoad);
+ }
+ }
+
+ private void indexColumns(InspectorTree.Node root) {
+ if (!root.layout().isStruct()) {
+ return;
+ }
+ for (InspectorTree.Node colNode : root.children()) {
+ colNode.fieldName().ifPresent(name -> tagSubtree(colNode, name));
+ }
+ }
+
+ private void tagSubtree(InspectorTree.Node node, String columnName) {
+ columnOf.put(node, columnName);
+ for (InspectorTree.Node child : node.children()) {
+ tagSubtree(child, columnName);
+ }
+ }
+
+ private InspectorTree.Peek peek(InspectorTree.Node node) {
+ InspectorTree.Peek cached = peekCache.get(node);
+ if (cached != null) {
+ return cached;
+ }
+ if (worker == null) {
+ InspectorTree.Peek p = safePeek(node);
+ peekCache.put(node, p);
+ return p;
+ }
+ if (peekInFlight.add(node)) {
+ worker.submit(() -> {
+ try {
+ peekCache.put(node, safePeek(node));
+ } finally {
+ peekInFlight.remove(node);
+ }
+ });
+ }
+ return InspectorTree.Peek.EMPTY;
+ }
+
+ private InspectorTree.Peek safePeek(InspectorTree.Node node) {
+ try {
+ return InspectorTree.peek(node, handle);
+ } catch (RuntimeException e) {
+ lastError = "peek: " + messageOf(e);
+ return InspectorTree.Peek.EMPTY;
+ }
+ }
+
+ void run() throws IOException {
+ while (true) {
+ List- items = flatten();
+ if (selected >= items.size()) {
+ selected = items.size() - 1;
+ }
+ if (selected < 0) {
+ selected = 0;
+ }
+ render(items);
+ Optional maybeKey = term.readKey(POLL_INTERVAL_MS);
+ if (maybeKey.isEmpty()) {
+ tick++;
+ continue;
+ }
+ Key key = maybeKey.get();
+ if (isQuit(key)) {
+ return;
+ }
+ handleKey(key, items);
+ tick++;
+ }
+ }
+
+ private void handleKey(Key key, List
- items) {
+ switch (key) {
+ case Key.ArrowDown ignored -> selected = Math.min(selected + 1, items.size() - 1);
+ case Key.ArrowUp ignored -> selected = Math.max(selected - 1, 0);
+ case Key.ArrowRight ignored -> expandSelected(items);
+ case Key.Enter ignored -> toggleSelected(items);
+ case Key.ArrowLeft ignored -> {
+ if (selected < items.size()) {
+ expanded.remove(items.get(selected).node());
+ }
+ }
+ case Key.PageDown ignored -> selected = Math.min(selected + 10, items.size() - 1);
+ case Key.PageUp ignored -> selected = Math.max(selected - 10, 0);
+ case Key.Home ignored -> selected = 0;
+ case Key.End ignored -> selected = items.size() - 1;
+ default -> {
+ }
+ }
+ }
+
+ private void expandSelected(List
- items) {
+ if (selected < items.size()) {
+ InspectorTree.Node n = items.get(selected).node();
+ if (!n.children().isEmpty()) {
+ expanded.add(n);
+ }
+ }
+ }
+
+ private void toggleSelected(List
- items) {
+ if (selected >= items.size()) {
+ return;
+ }
+ InspectorTree.Node n = items.get(selected).node();
+ if (n.children().isEmpty()) {
+ return;
+ }
+ if (!expanded.add(n)) {
+ expanded.remove(n);
+ }
+ }
+
+ private static boolean isQuit(Key key) {
+ return key instanceof Key.Escape
+ || key instanceof Key.Eof
+ || (key instanceof Key.Char(char c) && (c == 'q' || c == 'Q'));
+ }
+
+ private List
- flatten() {
+ List
- out = new ArrayList<>();
+ walk(tree.root(), 0, out);
+ return out;
+ }
+
+ private void walk(InspectorTree.Node node, int depth, List
- out) {
+ out.add(new Item(node, depth));
+ if (expanded.contains(node)) {
+ for (InspectorTree.Node child : node.children()) {
+ walk(child, depth + 1, out);
+ }
+ }
+ }
+
+ private void render(List
- items) throws IOException {
+ Terminal.Size size = term.size();
+ int width = size.cols();
+ int height = size.rows();
+ int leftWidth = Math.max(20, width / 2);
+ int bodyTop = 2;
+ int bodyBottom = height - 2;
+ int bodyHeight = bodyBottom - bodyTop;
+
+ if (selected < scrollOffset) {
+ scrollOffset = selected;
+ } else if (selected >= scrollOffset + bodyHeight) {
+ scrollOffset = selected - bodyHeight + 1;
+ }
+
+ StringBuilder buf = new StringBuilder(width * height);
+ buf.append(Ansi.CLEAR_SCREEN);
+ drawHeader(buf, width);
+ drawTree(buf, items, bodyTop, bodyHeight, leftWidth);
+ drawDivider(buf, leftWidth, bodyTop, bodyBottom);
+ if (!items.isEmpty()) {
+ drawDetails(buf, items.get(selected).node(),
+ leftWidth + 2, bodyTop, width - leftWidth - 2, bodyHeight);
+ }
+ drawStatus(buf, width, height - 1);
+ drawFooter(buf, width, height);
+ buf.append(Ansi.moveTo(height, 1));
+ term.write(buf.toString());
+ term.flush();
+ }
+
+ private void drawStatus(StringBuilder buf, int width, int row) {
+ int loads = worker == null ? 0 : worker.pending();
+ String err = lastError;
+ String text;
+ int bg;
+ if (err != null) {
+ text = " ! " + err;
+ bg = 41; // red
+ } else if (loads > 0) {
+ text = " " + SPINNER[(int) (tick % SPINNER.length)]
+ + " I/O " + loads + " pending";
+ bg = 44; // blue
+ } else {
+ text = " ready";
+ bg = 42; // green
+ }
+ buf.append(Ansi.moveTo(row, 1));
+ buf.append(Ansi.bg(bg)).append(Ansi.fg(30));
+ buf.append(pad(text, width));
+ buf.append(Ansi.RESET);
+ }
+
+ private void drawHeader(StringBuilder buf, int width) {
+ String header = " vortex-inspect — v" + tree.version()
+ + " " + formatBytes(tree.fileSize())
+ + " rows=" + tree.totalRowCount()
+ + " segs=" + tree.segmentCount()
+ + " (" + formatBytes(tree.totalSegmentBytes()) + ")";
+ buf.append(Ansi.moveTo(1, 1));
+ buf.append(Ansi.bg(46)).append(Ansi.fg(30));
+ buf.append(pad(header, width));
+ buf.append(Ansi.RESET);
+ }
+
+ private void drawFooter(StringBuilder buf, int width, int height) {
+ buf.append(Ansi.moveTo(height, 1));
+ buf.append(Ansi.bg(47)).append(Ansi.fg(30));
+ buf.append(pad(" ↑↓ nav →/Enter expand ← collapse q quit ", width));
+ buf.append(Ansi.RESET);
+ }
+
+ private void drawTree(StringBuilder buf, List
- items, int top, int rows, int leftWidth) {
+ for (int row = 0; row < rows; row++) {
+ int idx = scrollOffset + row;
+ buf.append(Ansi.moveTo(top + row + 1, 1));
+ if (idx >= items.size()) {
+ buf.append(pad("", leftWidth - 1));
+ continue;
+ }
+ Item item = items.get(idx);
+ boolean isSelected = idx == selected;
+ if (isSelected) {
+ buf.append(Ansi.bg(43)).append(Ansi.fg(30));
+ }
+ buf.append(pad(renderItem(item), leftWidth - 1));
+ if (isSelected) {
+ buf.append(Ansi.RESET);
+ }
+ }
+ }
+
+ private String renderItem(Item item) {
+ InspectorTree.Node node = item.node();
+ String marker;
+ if (node.children().isEmpty()) {
+ marker = " ";
+ } else if (expanded.contains(node)) {
+ marker = "v ";
+ } else {
+ marker = "> ";
+ }
+ String label = item.depth() == 0 && node.layout().isStruct()
+ ? "struct"
+ : node.fieldName().map(n -> n + ": ").orElse("") + node.layout().encodingId();
+ String tag = statsChildren.contains(node) ? ", stats" : "";
+ return " ".repeat(item.depth() * 2) + marker + label
+ + " (" + node.layout().rowCount() + " rows" + tag + ")";
+ }
+
+ private void drawDivider(StringBuilder buf, int col, int top, int bottom) {
+ for (int y = top; y < bottom; y++) {
+ buf.append(Ansi.moveTo(y + 1, col + 1)).append('|');
+ }
+ }
+
+ private void drawDetails(StringBuilder buf, InspectorTree.Node node,
+ int col, int top, int width, int rows) {
+ List lines = detailLines(node);
+ for (int i = 0; i < lines.size() && i < rows; i++) {
+ buf.append(Ansi.moveTo(top + i + 1, col + 1));
+ buf.append(truncate(lines.get(i), width));
+ }
+ }
+
+ private List detailLines(InspectorTree.Node node) {
+ List lines = new ArrayList<>();
+ Layout layout = node.layout();
+ InspectorTree.Peek p = peek(node);
+ lines.add("Encoding: " + (p.encoding() != null ? p.encoding() : layout.encodingId()));
+ node.fieldName().ifPresent(name -> lines.add("Field: " + name));
+ String col = columnOf.get(node);
+ if (col != null && !node.fieldName().isPresent()) {
+ lines.add("Column: " + col);
+ }
+ lines.add("Rows: " + layout.rowCount());
+ lines.add("Children: " + layout.children().size());
+ if (!layout.segments().isEmpty()) {
+ long subtotal = 0;
+ for (int idx : layout.segments()) {
+ subtotal += tree.segmentSpecs().get(idx).length();
+ }
+ lines.add("Segments: " + layout.segments().size()
+ + " (" + formatBytes(subtotal) + ")");
+ long rows = layout.rowCount();
+ for (int idx : layout.segments()) {
+ SegmentSpec spec = tree.segmentSpecs().get(idx);
+ String bits = rows > 0
+ ? " bits/elem=" + String.format("%.2f", spec.length() * 8.0 / rows)
+ : "";
+ lines.add(" [" + idx + "] off=" + spec.offset()
+ + " len=" + formatBytes(spec.length())
+ + " compression=" + spec.compression().name()
+ + bits);
+ }
+ } else {
+ lines.add("Segments: 0");
+ }
+ if (p.stats().min() != null || p.stats().max() != null) {
+ lines.add("");
+ lines.add("Stats:");
+ if (p.stats().min() != null) {
+ lines.add(" min: " + p.stats().min());
+ }
+ if (p.stats().max() != null) {
+ lines.add(" max: " + p.stats().max());
+ }
+ }
+ if (layout.isDict() && layout.children().size() >= 1) {
+ DataState dictState = loadDictPreview(node);
+ lines.add("");
+ switch (dictState) {
+ case DataState.Pending ignored ->
+ lines.add("Dictionary: " + SPINNER[(int) (tick % SPINNER.length)] + " loading...");
+ case DataState.Failed(String msg) ->
+ lines.add("Dictionary: ! " + msg);
+ case DataState.Loaded(List values) -> {
+ lines.add("Dictionary (" + values.size() + " entries):");
+ for (int i = 0; i < values.size(); i++) {
+ lines.add(String.format(" [%2d] %s", i, values.get(i)));
+ }
+ }
+ }
+ }
+ if (col != null) {
+ DataState state = loadDataPreview(col);
+ lines.add("");
+ switch (state) {
+ case DataState.Pending ignored ->
+ lines.add("Data (column '" + col + "'): "
+ + SPINNER[(int) (tick % SPINNER.length)] + " loading...");
+ case DataState.Failed(String msg) ->
+ lines.add("Data (column '" + col + "'): ! " + msg);
+ case DataState.Loaded(List values) -> {
+ lines.add("Data (column '" + col + "', first " + values.size() + " rows):");
+ for (int i = 0; i < values.size(); i++) {
+ lines.add(String.format(" [%2d] %s", i, values.get(i)));
+ }
+ }
+ }
+ } else if (layout.isFlat() && !layout.segments().isEmpty()) {
+ byte[] preview = loadHexPreview(node);
+ if (preview.length > 0) {
+ lines.add("");
+ int segIdx = layout.segments().getFirst();
+ SegmentSpec spec = tree.segmentSpecs().get(segIdx);
+ lines.add("Hex (first " + preview.length + " B of segment "
+ + segIdx + ", total " + formatBytes(spec.length()) + "):");
+ for (int off = 0; off < preview.length; off += 16) {
+ lines.add(formatHexRow(preview, off));
+ }
+ }
+ }
+ return lines;
+ }
+
+ private DataState loadDataPreview(String columnName) {
+ DataState existing = dataCache.get(columnName);
+ if (existing != null) {
+ return existing;
+ }
+ startDataLoad(columnName);
+ return dataCache.getOrDefault(columnName, DataState.PENDING);
+ }
+
+ private DataState loadDictPreview(InspectorTree.Node dictNode) {
+ DataState existing = dictCache.get(dictNode);
+ if (existing != null) {
+ return existing;
+ }
+ if (dictCache.putIfAbsent(dictNode, DataState.PENDING) != null) {
+ return dictCache.get(dictNode);
+ }
+ if (worker == null) {
+ runDictLoad(dictNode);
+ } else {
+ worker.submit(() -> runDictLoad(dictNode));
+ }
+ return dictCache.getOrDefault(dictNode, DataState.PENDING);
+ }
+
+ private void runDictLoad(InspectorTree.Node dictNode) {
+ try {
+ Layout values = dictNode.layout().children().get(0);
+ DType dtype = columnDtypeFor(dictNode);
+ if (dtype == null) {
+ dictCache.put(dictNode, new DataState.Loaded(List.of()));
+ return;
+ }
+ try (java.lang.foreign.Arena arena = java.lang.foreign.Arena.ofConfined()) {
+ int segIdx = values.segments().getFirst();
+ SegmentSpec spec = tree.segmentSpecs().get(segIdx);
+ java.lang.foreign.MemorySegment seg = handle.slice(spec.offset(), spec.length());
+ io.github.dfa1.vortex.core.array.Array arr =
+ new io.github.dfa1.vortex.encoding.FlatSegmentDecoder(handle.registry())
+ .decode(seg, handle.footer().arraySpecs(),
+ dtype, values.rowCount(), arena);
+ int n = (int) Math.min(arr.length(), DATA_PREVIEW_ROWS);
+ List out = new ArrayList<>(n);
+ for (int i = 0; i < n; i++) {
+ out.add(formatValue(arr, i, dtype));
+ }
+ dictCache.put(dictNode, new DataState.Loaded(List.copyOf(out)));
+ }
+ } catch (RuntimeException e) {
+ dictCache.put(dictNode, new DataState.Failed(messageOf(e)));
+ lastError = "dict: " + messageOf(e);
+ }
+ }
+
+ private DType columnDtypeFor(InspectorTree.Node node) {
+ String col = columnOf.get(node);
+ if (col == null) {
+ return tree.dtype();
+ }
+ return columnDtypeByName(col);
+ }
+
+ private DType columnDtypeByName(String columnName) {
+ DType root = tree.dtype();
+ if (root instanceof DType.Struct s) {
+ int idx = s.fieldNames().indexOf(columnName);
+ if (idx >= 0) {
+ return s.fieldTypes().get(idx);
+ }
+ }
+ return root;
+ }
+
+ private void startDataLoad(String columnName) {
+ if (dataCache.putIfAbsent(columnName, DataState.PENDING) != null) {
+ return;
+ }
+ if (worker == null) {
+ runDataLoad(columnName);
+ return;
+ }
+ worker.submit(() -> runDataLoad(columnName));
+ }
+
+ private void runDataLoad(String columnName) {
+ try {
+ DType declared = columnDtypeByName(columnName);
+ ScanOptions opts = ScanOptions.columns(columnName).withLimit(DATA_PREVIEW_ROWS);
+ try (ScanIterator it = handle.scan(opts)) {
+ if (!it.hasNext()) {
+ dataCache.put(columnName, new DataState.Loaded(List.of()));
+ return;
+ }
+ try (Chunk chunk = it.next()) {
+ Array array = chunk.columns().get(columnName);
+ if (array == null) {
+ dataCache.put(columnName, new DataState.Loaded(List.of()));
+ return;
+ }
+ int n = (int) Math.min(array.length(), DATA_PREVIEW_ROWS);
+ List out = new ArrayList<>(n);
+ for (int i = 0; i < n; i++) {
+ out.add(formatValue(array, i, declared));
+ }
+ dataCache.put(columnName, new DataState.Loaded(List.copyOf(out)));
+ }
+ }
+ } catch (RuntimeException e) {
+ dataCache.put(columnName, new DataState.Failed(messageOf(e)));
+ lastError = columnName + ": " + messageOf(e);
+ }
+ }
+
+ private static String messageOf(Throwable t) {
+ String m = t.getMessage();
+ return m != null ? m : t.getClass().getSimpleName();
+ }
+
+ /// Per-column data fetch state — pending while a virtual thread is
+ /// fetching, loaded with values once decoded, failed with a message
+ /// on error. Sealed so callers can pattern-match exhaustively.
+ sealed interface DataState {
+ /// Singleton state for a fetch in flight.
+ DataState PENDING = new Pending();
+
+ /// In-flight fetch.
+ record Pending() implements DataState {
+ }
+
+ /// Completed fetch with decoded values.
+ ///
+ /// @param values formatted first rows of the column
+ record Loaded(List values) implements DataState {
+ }
+
+ /// Failed fetch carrying a short error description.
+ ///
+ /// @param message short error string
+ record Failed(String message) implements DataState {
+ }
+ }
+
+ private static String formatValue(Array array, int i, DType declared) {
+ if (declared instanceof DType.Extension ext
+ && ext.kind() instanceof io.github.dfa1.vortex.core.Extension.Date date) {
+ try {
+ return date.decode(array, i).toString();
+ } catch (RuntimeException e) {
+ // fall through to generic rendering on shape mismatch
+ }
+ }
+ return switch (array) {
+ case LongArray a -> Long.toString(a.getLong(i));
+ case IntArray a -> Integer.toString(a.getInt(i));
+ case ShortArray a -> Short.toString(a.getShort(i));
+ case ByteArray a -> Byte.toString(a.getByte(i));
+ case DoubleArray a -> Double.toString(a.getDouble(i));
+ case FloatArray a -> Float.toString(a.getFloat(i));
+ case BoolArray a -> Boolean.toString(a.getBoolean(i));
+ case VarBinArray a -> a.dtype() instanceof DType.Utf8
+ ? "\"" + a.getString(i) + "\""
+ : bytesToShortHex(a.getBytes(i));
+ case GenericArray a when a.dtype() instanceof DType.Decimal ->
+ tryDecimal(a, i);
+ default -> "<" + array.getClass().getSimpleName() + " " + array.dtype() + ">";
+ };
+ }
+
+ private static String tryDecimal(GenericArray a, int i) {
+ try {
+ return a.getDecimal(i).toPlainString();
+ } catch (RuntimeException e) {
+ String msg = e.getMessage();
+ if (msg != null && msg.contains("null cell")) {
+ return "null";
+ }
+ return "<" + a.getClass().getSimpleName() + " " + a.dtype() + ">";
+ }
+ }
+
+ private static String bytesToShortHex(byte[] bytes) {
+ int n = Math.min(bytes.length, 16);
+ StringBuilder sb = new StringBuilder(n * 3 + 2);
+ sb.append("0x");
+ for (int i = 0; i < n; i++) {
+ sb.append(String.format("%02x", bytes[i] & 0xff));
+ }
+ if (bytes.length > n) {
+ sb.append("...");
+ }
+ return sb.toString();
+ }
+
+ private byte[] loadHexPreview(InspectorTree.Node node) {
+ byte[] cached = hexCache.get(node);
+ if (cached != null) {
+ return cached;
+ }
+ if (worker == null) {
+ byte[] bytes = fetchHex(node);
+ hexCache.put(node, bytes);
+ return bytes;
+ }
+ if (hexInFlight.add(node)) {
+ worker.submit(() -> {
+ try {
+ hexCache.put(node, fetchHex(node));
+ } finally {
+ hexInFlight.remove(node);
+ }
+ });
+ }
+ return new byte[0];
+ }
+
+ private byte[] fetchHex(InspectorTree.Node node) {
+ Layout layout = node.layout();
+ int segIdx = layout.segments().getFirst();
+ SegmentSpec spec = tree.segmentSpecs().get(segIdx);
+ int wanted = (int) Math.min((long) HEX_PREVIEW_BYTES, spec.length());
+ if (wanted <= 0) {
+ return new byte[0];
+ }
+ try {
+ MemorySegment seg = handle.slice(spec.offset(), wanted);
+ byte[] buf = new byte[wanted];
+ MemorySegment.copy(seg, 0, MemorySegment.ofArray(buf), 0, wanted);
+ return buf;
+ } catch (RuntimeException e) {
+ lastError = "hex: " + messageOf(e);
+ return new byte[0];
+ }
+ }
+
+ private static String formatHexRow(byte[] data, int offset) {
+ StringBuilder sb = new StringBuilder(80);
+ sb.append(String.format("%08x ", offset));
+ for (int i = 0; i < 16; i++) {
+ int idx = offset + i;
+ if (idx < data.length) {
+ sb.append(String.format("%02x ", data[idx] & 0xff));
+ } else {
+ sb.append(" ");
+ }
+ if (i == 7) {
+ sb.append(' ');
+ }
+ }
+ sb.append(" |");
+ for (int i = 0; i < 16; i++) {
+ int idx = offset + i;
+ if (idx >= data.length) {
+ sb.append(' ');
+ continue;
+ }
+ int b = data[idx] & 0xff;
+ sb.append(b >= 0x20 && b < 0x7f ? (char) b : '.');
+ }
+ sb.append('|');
+ return sb.toString();
+ }
+
+ private record Item(InspectorTree.Node node, int depth) {
+ }
+
+ private static String pad(String s, int width) {
+ if (s.length() >= width) {
+ return s.substring(0, width);
+ }
+ return s + " ".repeat(width - s.length());
+ }
+
+ private static String truncate(String s, int width) {
+ return s.length() > width ? s.substring(0, width) : s;
+ }
+
+ private static String formatBytes(long bytes) {
+ if (bytes < 1024) {
+ return bytes + " B";
+ }
+ if (bytes < 1024 * 1024) {
+ return String.format("%.1f KB", bytes / 1024.0);
+ }
+ return String.format("%.1f MB", bytes / (1024.0 * 1024.0));
+ }
+ }
+}
diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/Ansi.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/Ansi.java
new file mode 100644
index 00000000..4395f263
--- /dev/null
+++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/Ansi.java
@@ -0,0 +1,63 @@
+package io.github.dfa1.vortex.inspect.term;
+
+/// ANSI / xterm CSI escape constants and small formatting helpers.
+///
+/// Sequences are plain ASCII once the leading {@code ESC} (0x1B) byte is included.
+/// They're written verbatim to {@code System.out} once raw mode is enabled.
+public final class Ansi {
+
+ /// ESC (0x1B) - the byte every CSI sequence starts with.
+ public static final String ESC = String.valueOf((char) 0x1B);
+
+ /// Control Sequence Introducer: {@code ESC + '['}.
+ public static final String CSI = ESC + "[";
+
+ /// Clear entire screen.
+ public static final String CLEAR_SCREEN = CSI + "2J";
+
+ /// Move cursor to top-left.
+ public static final String CURSOR_HOME = CSI + "H";
+
+ /// Reset all SGR attributes.
+ public static final String RESET = CSI + "0m";
+
+ /// Hide the cursor.
+ public static final String HIDE_CURSOR = CSI + "?25l";
+
+ /// Show the cursor.
+ public static final String SHOW_CURSOR = CSI + "?25h";
+
+ /// Switch to the alternate screen buffer.
+ public static final String ENTER_ALT_SCREEN = CSI + "?1049h";
+
+ /// Restore the primary screen buffer.
+ public static final String EXIT_ALT_SCREEN = CSI + "?1049l";
+
+ private Ansi() {
+ }
+
+ /// Move the cursor to (1-based) {@code row}, {@code col}.
+ ///
+ /// @param row 1-based row index
+ /// @param col 1-based column index
+ /// @return CSI sequence
+ public static String moveTo(int row, int col) {
+ return CSI + row + ";" + col + "H";
+ }
+
+ /// Standard SGR foreground colour (codes 30-37 normal, 90-97 bright).
+ ///
+ /// @param code SGR colour code
+ /// @return CSI sequence
+ public static String fg(int code) {
+ return CSI + code + "m";
+ }
+
+ /// Standard SGR background colour (codes 40-47 normal, 100-107 bright).
+ ///
+ /// @param code SGR colour code
+ /// @return CSI sequence
+ public static String bg(int code) {
+ return CSI + code + "m";
+ }
+}
diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/Key.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/Key.java
new file mode 100644
index 00000000..7f0a663d
--- /dev/null
+++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/Key.java
@@ -0,0 +1,77 @@
+package io.github.dfa1.vortex.inspect.term;
+
+/// Decoded terminal input event.
+public sealed interface Key {
+
+ /// Up arrow.
+ enum ArrowUp implements Key {
+ /// Singleton instance.
+ INSTANCE
+ }
+
+ /// Down arrow.
+ enum ArrowDown implements Key {
+ /// Singleton instance.
+ INSTANCE
+ }
+
+ /// Left arrow.
+ enum ArrowLeft implements Key {
+ /// Singleton instance.
+ INSTANCE
+ }
+
+ /// Right arrow.
+ enum ArrowRight implements Key {
+ /// Singleton instance.
+ INSTANCE
+ }
+
+ /// Page Up.
+ enum PageUp implements Key {
+ /// Singleton instance.
+ INSTANCE
+ }
+
+ /// Page Down.
+ enum PageDown implements Key {
+ /// Singleton instance.
+ INSTANCE
+ }
+
+ /// Home key.
+ enum Home implements Key {
+ /// Singleton instance.
+ INSTANCE
+ }
+
+ /// End key.
+ enum End implements Key {
+ /// Singleton instance.
+ INSTANCE
+ }
+
+ /// Enter / Return (LF or CR).
+ enum Enter implements Key {
+ /// Singleton instance.
+ INSTANCE
+ }
+
+ /// Bare Escape key press (no CSI sequence followed).
+ enum Escape implements Key {
+ /// Singleton instance.
+ INSTANCE
+ }
+
+ /// End of input - stdin closed.
+ enum Eof implements Key {
+ /// Singleton instance.
+ INSTANCE
+ }
+
+ /// Printable character.
+ ///
+ /// @param value ASCII codepoint (multi-byte UTF-8 not decoded here)
+ record Char(char value) implements Key {
+ }
+}
diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/KeyDecoder.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/KeyDecoder.java
new file mode 100644
index 00000000..824fe12d
--- /dev/null
+++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/KeyDecoder.java
@@ -0,0 +1,89 @@
+package io.github.dfa1.vortex.inspect.term;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/// Translates raw stdin bytes into [Key] events.
+///
+/// Recognises common CSI sequences emitted by xterm-compatible terminals:
+/// {@code ESC [ A/B/C/D} for arrows, {@code ESC [ 5~ / 6~} for PgUp/PgDn,
+/// {@code ESC [ H / F} and {@code ESC [ 1~ / 4~} for Home/End. Any unrecognised
+/// escape sequence is dropped and decoding continues with the next byte.
+///
+/// Stateless across reads - call [#next(InputStream)] for each event.
+public final class KeyDecoder {
+
+ private KeyDecoder() {
+ }
+
+ /// Reads the next key from {@code in}, blocking until at least one byte arrives.
+ ///
+ /// @param in raw input stream (typically {@code System.in} in cbreak mode)
+ /// @return the decoded key, or [Key.Eof] if the stream is at EOF
+ /// @throws IOException if the underlying read fails
+ public static Key next(InputStream in) throws IOException {
+ int b = in.read();
+ if (b < 0) {
+ return Key.Eof.INSTANCE;
+ }
+ if (b == 0x1B) {
+ return readAfterEsc(in);
+ }
+ if (b == '\r' || b == '\n') {
+ return Key.Enter.INSTANCE;
+ }
+ return new Key.Char((char) b);
+ }
+
+ private static Key readAfterEsc(InputStream in) throws IOException {
+ // Bare ESC: no follow-up byte available within a short window.
+ // We approximate by peeking via available(); proper terminal IO would
+ // use a select() / VTIME timer, but this is enough for q/Esc quit.
+ if (in.available() == 0) {
+ return Key.Escape.INSTANCE;
+ }
+ int b1 = in.read();
+ if (b1 != '[' && b1 != 'O') {
+ return Key.Escape.INSTANCE;
+ }
+ int b2 = in.read();
+ return switch (b2) {
+ case 'A' -> Key.ArrowUp.INSTANCE;
+ case 'B' -> Key.ArrowDown.INSTANCE;
+ case 'C' -> Key.ArrowRight.INSTANCE;
+ case 'D' -> Key.ArrowLeft.INSTANCE;
+ case 'H' -> Key.Home.INSTANCE;
+ case 'F' -> Key.End.INSTANCE;
+ default -> readTildeSequence(in, b2);
+ };
+ }
+
+ private static Key readTildeSequence(InputStream in, int firstDigit) throws IOException {
+ if (firstDigit < '0' || firstDigit > '9') {
+ return Key.Escape.INSTANCE;
+ }
+ int digit = firstDigit - '0';
+ int next = in.read();
+ if (next == -1) {
+ return Key.Eof.INSTANCE;
+ }
+ // Two-digit codes like ESC [ 15~; collapse to single digit by ignoring extras.
+ while (next >= '0' && next <= '9') {
+ digit = digit * 10 + (next - '0');
+ next = in.read();
+ if (next == -1) {
+ return Key.Eof.INSTANCE;
+ }
+ }
+ if (next != '~') {
+ return Key.Escape.INSTANCE;
+ }
+ return switch (digit) {
+ case 1, 7 -> Key.Home.INSTANCE;
+ case 4, 8 -> Key.End.INSTANCE;
+ case 5 -> Key.PageUp.INSTANCE;
+ case 6 -> Key.PageDown.INSTANCE;
+ default -> Key.Escape.INSTANCE;
+ };
+ }
+}
diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/PosixTerminal.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/PosixTerminal.java
new file mode 100644
index 00000000..8ab92e6f
--- /dev/null
+++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/PosixTerminal.java
@@ -0,0 +1,185 @@
+package io.github.dfa1.vortex.inspect.term;
+
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Optional;
+import java.lang.foreign.Arena;
+import java.lang.foreign.FunctionDescriptor;
+import java.lang.foreign.Linker;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SymbolLookup;
+import java.lang.foreign.ValueLayout;
+import java.lang.invoke.MethodHandle;
+
+/// POSIX (Linux + macOS) raw-mode terminal implementation.
+///
+/// Uses libc {@code tcgetattr} / {@code cfmakeraw} / {@code tcsetattr} via FFM
+/// to put stdin in non-canonical, no-echo mode. {@code ioctl(TIOCGWINSZ)}
+/// queries the terminal size on every call (no SIGWINCH plumbing).
+///
+/// On entry: saves the original {@code termios}, switches to alt screen, hides
+/// the cursor. On [#close()]: restores everything, even on exceptions, via a
+/// shutdown hook that fires if the caller skips try-with-resources.
+public final class PosixTerminal implements Terminal {
+
+ private static final int STDIN_FD = 0;
+ private static final int STDOUT_FD = 1;
+ private static final int TCSANOW = 0;
+
+ private static final long TIOCGWINSZ = isMac() ? 0x40087468L : 0x5413L;
+
+ /// {@code struct termios} is at most 72 bytes (macOS); Linux glibc is 60.
+ /// 128 is a comfortable upper bound and lets the same code work on both.
+ private static final long TERMIOS_SIZE = 128;
+
+ private static final Linker LINKER = Linker.nativeLinker();
+ private static final SymbolLookup LIBC = LINKER.defaultLookup();
+
+ private static final MethodHandle TCGETATTR = downcall("tcgetattr",
+ FunctionDescriptor.of(ValueLayout.JAVA_INT, ValueLayout.JAVA_INT, ValueLayout.ADDRESS));
+ private static final MethodHandle TCSETATTR = downcall("tcsetattr",
+ FunctionDescriptor.of(ValueLayout.JAVA_INT, ValueLayout.JAVA_INT, ValueLayout.JAVA_INT, ValueLayout.ADDRESS));
+ private static final MethodHandle CFMAKERAW = downcall("cfmakeraw",
+ FunctionDescriptor.ofVoid(ValueLayout.ADDRESS));
+ private static final MethodHandle IOCTL = LINKER.downcallHandle(
+ LIBC.find("ioctl").orElseThrow(() -> new UnsatisfiedLinkError("ioctl")),
+ FunctionDescriptor.of(ValueLayout.JAVA_INT,
+ ValueLayout.JAVA_INT, ValueLayout.JAVA_LONG, ValueLayout.ADDRESS),
+ Linker.Option.firstVariadicArg(2));
+
+ private final Arena arena;
+ private final MemorySegment savedTermios;
+ private final PrintStream out;
+ private final Thread shutdownHook;
+ private boolean closed;
+
+ private PosixTerminal(Arena arena, MemorySegment savedTermios) {
+ this.arena = arena;
+ this.savedTermios = savedTermios;
+ this.out = System.out;
+ this.shutdownHook = new Thread(this::restore, "posix-term-restore");
+ Runtime.getRuntime().addShutdownHook(shutdownHook);
+ }
+
+ /// Enters raw mode and switches to the alternate screen.
+ ///
+ /// @return open terminal
+ /// @throws IOException if {@code tcgetattr} or {@code tcsetattr} fails
+ public static PosixTerminal open() throws IOException {
+ Arena arena = Arena.ofShared();
+ try {
+ MemorySegment saved = arena.allocate(TERMIOS_SIZE);
+ MemorySegment raw = arena.allocate(TERMIOS_SIZE);
+ int rc = (int) TCGETATTR.invokeExact(STDIN_FD, saved);
+ if (rc != 0) {
+ throw new IOException("tcgetattr failed: rc=" + rc);
+ }
+ MemorySegment.copy(saved, 0, raw, 0, TERMIOS_SIZE);
+ CFMAKERAW.invokeExact(raw);
+ rc = (int) TCSETATTR.invokeExact(STDIN_FD, TCSANOW, raw);
+ if (rc != 0) {
+ throw new IOException("tcsetattr failed: rc=" + rc);
+ }
+ PosixTerminal term = new PosixTerminal(arena, saved);
+ term.out.print(Ansi.ENTER_ALT_SCREEN);
+ term.out.print(Ansi.HIDE_CURSOR);
+ term.out.print(Ansi.CLEAR_SCREEN);
+ term.out.flush();
+ return term;
+ } catch (Throwable t) {
+ arena.close();
+ if (t instanceof IOException io) {
+ throw io;
+ }
+ throw new IOException(t);
+ }
+ }
+
+ @Override
+ public Size size() {
+ MemorySegment ws = arena.allocate(8);
+ try {
+ int rc = (int) IOCTL.invokeExact(STDOUT_FD, TIOCGWINSZ, ws);
+ if (rc != 0) {
+ return new Size(24, 80);
+ }
+ } catch (Throwable t) {
+ return new Size(24, 80);
+ }
+ int rows = Short.toUnsignedInt(ws.get(ValueLayout.JAVA_SHORT, 0));
+ int cols = Short.toUnsignedInt(ws.get(ValueLayout.JAVA_SHORT, 2));
+ if (rows == 0 || cols == 0) {
+ return new Size(24, 80);
+ }
+ return new Size(rows, cols);
+ }
+
+ @Override
+ public void write(String s) {
+ out.print(s);
+ }
+
+ @Override
+ public void flush() {
+ out.flush();
+ }
+
+ @Override
+ public Key readKey() throws IOException {
+ return KeyDecoder.next(System.in);
+ }
+
+ @Override
+ public Optional readKey(long timeoutMs) throws IOException {
+ long deadline = System.nanoTime() + timeoutMs * 1_000_000L;
+ while (System.in.available() == 0) {
+ if (System.nanoTime() >= deadline) {
+ return Optional.empty();
+ }
+ try {
+ Thread.sleep(20);
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ return Optional.empty();
+ }
+ }
+ return Optional.of(KeyDecoder.next(System.in));
+ }
+
+ @Override
+ public void close() {
+ if (closed) {
+ return;
+ }
+ closed = true;
+ try {
+ Runtime.getRuntime().removeShutdownHook(shutdownHook);
+ } catch (IllegalStateException ignored) {
+ // JVM already shutting down.
+ }
+ restore();
+ arena.close();
+ }
+
+ private void restore() {
+ try {
+ out.print(Ansi.SHOW_CURSOR);
+ out.print(Ansi.EXIT_ALT_SCREEN);
+ out.print(Ansi.RESET);
+ out.flush();
+ TCSETATTR.invokeExact(STDIN_FD, TCSANOW, savedTermios);
+ } catch (Throwable ignored) {
+ // Best-effort: JVM is exiting; nothing useful to do.
+ }
+ }
+
+ private static MethodHandle downcall(String name, FunctionDescriptor desc) {
+ return LINKER.downcallHandle(
+ LIBC.find(name).orElseThrow(() -> new UnsatisfiedLinkError(name)),
+ desc);
+ }
+
+ private static boolean isMac() {
+ return System.getProperty("os.name", "").toLowerCase().contains("mac");
+ }
+}
diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/Terminal.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/Terminal.java
new file mode 100644
index 00000000..425af92c
--- /dev/null
+++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/Terminal.java
@@ -0,0 +1,80 @@
+package io.github.dfa1.vortex.inspect.term;
+
+import java.io.IOException;
+import java.util.Optional;
+
+/// Direct, dependency-free terminal abstraction.
+///
+/// Implementations toggle the OS console into raw / non-canonical mode on
+/// [#open()] and restore the prior state on [#close()]. Output is plain bytes
+/// to {@code System.out}; input is buffered keystrokes from {@code System.in}.
+///
+/// Usage:
+/// ```
+/// try (Terminal term = Terminal.open()) {
+/// term.write(Ansi.CLEAR_SCREEN);
+/// Key k = term.readKey();
+/// ...
+/// }
+/// ```
+public sealed interface Terminal extends AutoCloseable
+ permits PosixTerminal, WindowsTerminal {
+
+ /// Opens the platform-appropriate raw-mode terminal.
+ ///
+ /// Picks [PosixTerminal] on Linux / macOS and [WindowsTerminal] on Windows
+ /// based on {@code os.name}.
+ ///
+ /// @return an open raw terminal handle
+ /// @throws IOException if the OS-level setup fails
+ static Terminal open() throws IOException {
+ String os = System.getProperty("os.name", "").toLowerCase();
+ if (os.contains("win")) {
+ return WindowsTerminal.open();
+ }
+ return PosixTerminal.open();
+ }
+
+ /// Current terminal size in cells.
+ ///
+ /// @return rows and columns at this moment (re-queried each call)
+ Size size();
+
+ /// Writes a string of bytes (ASCII / UTF-8) verbatim to the terminal.
+ ///
+ /// @param s text to send (may contain ANSI escapes)
+ /// @throws IOException if the write fails
+ void write(String s) throws IOException;
+
+ /// Flushes any buffered output.
+ ///
+ /// @throws IOException if flush fails
+ void flush() throws IOException;
+
+ /// Blocks until a key is available, then returns the decoded event.
+ ///
+ /// @return next key
+ /// @throws IOException if reading fails
+ Key readKey() throws IOException;
+
+ /// Reads a key with a wall-clock deadline. Returns {@link Optional#empty()}
+ /// if the timeout elapses before any input is available.
+ ///
+ /// @param timeoutMs maximum time to wait, in milliseconds
+ /// @return the decoded key, or empty on timeout
+ /// @throws IOException if reading fails
+ Optional readKey(long timeoutMs) throws IOException;
+
+ /// Restores the original terminal mode and exits the alternate screen.
+ ///
+ /// Idempotent - safe to call multiple times.
+ @Override
+ void close();
+
+ /// Terminal dimensions in character cells.
+ ///
+ /// @param rows number of rows
+ /// @param cols number of columns
+ record Size(int rows, int cols) {
+ }
+}
diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/WindowsTerminal.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/WindowsTerminal.java
new file mode 100644
index 00000000..5a0c6630
--- /dev/null
+++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/WindowsTerminal.java
@@ -0,0 +1,210 @@
+package io.github.dfa1.vortex.inspect.term;
+
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Optional;
+import java.lang.foreign.Arena;
+import java.lang.foreign.FunctionDescriptor;
+import java.lang.foreign.Linker;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SymbolLookup;
+import java.lang.foreign.ValueLayout;
+import java.lang.invoke.MethodHandle;
+
+/// Windows console raw-mode implementation via kernel32.dll.
+///
+/// Toggles {@code ENABLE_VIRTUAL_TERMINAL_PROCESSING} on stdout (Win10 1809+
+/// required) so ANSI sequences in [Ansi] render natively. Stdin runs with
+/// line-input + echo + processed-input disabled and VT input enabled so xterm
+/// arrow sequences arrive intact.
+///
+/// {@code GetConsoleScreenBufferInfo} drives [#size()]; we report the visible
+/// window rect, not the scrollback buffer.
+public final class WindowsTerminal implements Terminal {
+
+ private static final long STD_INPUT_HANDLE = -10L;
+ private static final long STD_OUTPUT_HANDLE = -11L;
+
+ private static final int ENABLE_PROCESSED_INPUT = 0x0001;
+ private static final int ENABLE_LINE_INPUT = 0x0002;
+ private static final int ENABLE_ECHO_INPUT = 0x0004;
+ private static final int ENABLE_VIRTUAL_TERMINAL_INPUT = 0x0200;
+
+ private static final int ENABLE_PROCESSED_OUTPUT = 0x0001;
+ private static final int ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004;
+
+ private static final Linker LINKER = Linker.nativeLinker();
+ private static final SymbolLookup KERNEL32 = SymbolLookup.libraryLookup(
+ "kernel32", Arena.global());
+
+ private static final MethodHandle GET_STD_HANDLE = downcall("GetStdHandle",
+ FunctionDescriptor.of(ValueLayout.ADDRESS, ValueLayout.JAVA_LONG));
+ private static final MethodHandle GET_CONSOLE_MODE = downcall("GetConsoleMode",
+ FunctionDescriptor.of(ValueLayout.JAVA_INT, ValueLayout.ADDRESS, ValueLayout.ADDRESS));
+ private static final MethodHandle SET_CONSOLE_MODE = downcall("SetConsoleMode",
+ FunctionDescriptor.of(ValueLayout.JAVA_INT, ValueLayout.ADDRESS, ValueLayout.JAVA_INT));
+ private static final MethodHandle GET_CONSOLE_SCREEN_BUFFER_INFO = downcall(
+ "GetConsoleScreenBufferInfo",
+ FunctionDescriptor.of(ValueLayout.JAVA_INT, ValueLayout.ADDRESS, ValueLayout.ADDRESS));
+
+ private final Arena arena;
+ private final MemorySegment stdoutHandle;
+ private final int savedInMode;
+ private final int savedOutMode;
+ private final PrintStream out;
+ private final Thread shutdownHook;
+ private boolean closed;
+
+ private WindowsTerminal(Arena arena, MemorySegment stdoutHandle,
+ int savedInMode, int savedOutMode) {
+ this.arena = arena;
+ this.stdoutHandle = stdoutHandle;
+ this.savedInMode = savedInMode;
+ this.savedOutMode = savedOutMode;
+ this.out = System.out;
+ this.shutdownHook = new Thread(this::restore, "windows-term-restore");
+ Runtime.getRuntime().addShutdownHook(shutdownHook);
+ }
+
+ /// Enables VT processing on stdout and VT input on stdin.
+ ///
+ /// @return open terminal
+ /// @throws IOException if console handles cannot be obtained or modes set
+ public static WindowsTerminal open() throws IOException {
+ Arena arena = Arena.ofShared();
+ try {
+ MemorySegment stdin = invokeHandle(GET_STD_HANDLE, STD_INPUT_HANDLE);
+ MemorySegment stdout = invokeHandle(GET_STD_HANDLE, STD_OUTPUT_HANDLE);
+ int inMode = readMode(arena, stdin);
+ int outMode = readMode(arena, stdout);
+
+ int newIn = (inMode & ~(ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT
+ | ENABLE_PROCESSED_INPUT)) | ENABLE_VIRTUAL_TERMINAL_INPUT;
+ int newOut = outMode | ENABLE_VIRTUAL_TERMINAL_PROCESSING | ENABLE_PROCESSED_OUTPUT;
+
+ if ((int) SET_CONSOLE_MODE.invokeExact(stdin, newIn) == 0) {
+ throw new IOException("SetConsoleMode(stdin) failed");
+ }
+ if ((int) SET_CONSOLE_MODE.invokeExact(stdout, newOut) == 0) {
+ throw new IOException("SetConsoleMode(stdout) failed");
+ }
+
+ WindowsTerminal term = new WindowsTerminal(arena, stdout, inMode, outMode);
+ term.out.print(Ansi.ENTER_ALT_SCREEN);
+ term.out.print(Ansi.HIDE_CURSOR);
+ term.out.print(Ansi.CLEAR_SCREEN);
+ term.out.flush();
+ return term;
+ } catch (Throwable t) {
+ arena.close();
+ if (t instanceof IOException io) {
+ throw io;
+ }
+ throw new IOException(t);
+ }
+ }
+
+ @Override
+ public Size size() {
+ // CONSOLE_SCREEN_BUFFER_INFO is 22 bytes: COORD(4) size, COORD(4) cursor,
+ // WORD(2) attrs, SMALL_RECT(8) window, COORD(4) max. We only need window.
+ MemorySegment info = arena.allocate(22);
+ try {
+ int rc = (int) GET_CONSOLE_SCREEN_BUFFER_INFO.invokeExact(stdoutHandle, info);
+ if (rc == 0) {
+ return new Size(24, 80);
+ }
+ } catch (Throwable t) {
+ return new Size(24, 80);
+ }
+ int left = info.get(ValueLayout.JAVA_SHORT, 10);
+ int top = info.get(ValueLayout.JAVA_SHORT, 12);
+ int right = info.get(ValueLayout.JAVA_SHORT, 14);
+ int bottom = info.get(ValueLayout.JAVA_SHORT, 16);
+ int rows = bottom - top + 1;
+ int cols = right - left + 1;
+ if (rows <= 0 || cols <= 0) {
+ return new Size(24, 80);
+ }
+ return new Size(rows, cols);
+ }
+
+ @Override
+ public void write(String s) {
+ out.print(s);
+ }
+
+ @Override
+ public void flush() {
+ out.flush();
+ }
+
+ @Override
+ public Key readKey() throws IOException {
+ return KeyDecoder.next(System.in);
+ }
+
+ @Override
+ public Optional readKey(long timeoutMs) throws IOException {
+ long deadline = System.nanoTime() + timeoutMs * 1_000_000L;
+ while (System.in.available() == 0) {
+ if (System.nanoTime() >= deadline) {
+ return Optional.empty();
+ }
+ try {
+ Thread.sleep(20);
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ return Optional.empty();
+ }
+ }
+ return Optional.of(KeyDecoder.next(System.in));
+ }
+
+ @Override
+ public void close() {
+ if (closed) {
+ return;
+ }
+ closed = true;
+ try {
+ Runtime.getRuntime().removeShutdownHook(shutdownHook);
+ } catch (IllegalStateException ignored) {
+ // JVM already shutting down.
+ }
+ restore();
+ arena.close();
+ }
+
+ private void restore() {
+ try {
+ out.print(Ansi.SHOW_CURSOR);
+ out.print(Ansi.EXIT_ALT_SCREEN);
+ out.print(Ansi.RESET);
+ out.flush();
+ MemorySegment stdin = invokeHandle(GET_STD_HANDLE, STD_INPUT_HANDLE);
+ SET_CONSOLE_MODE.invokeExact(stdin, savedInMode);
+ SET_CONSOLE_MODE.invokeExact(stdoutHandle, savedOutMode);
+ } catch (Throwable ignored) {
+ // Best-effort: JVM is exiting; nothing useful to do.
+ }
+ }
+
+ private static int readMode(Arena arena, MemorySegment handle) throws Throwable {
+ MemorySegment slot = arena.allocate(4);
+ if ((int) GET_CONSOLE_MODE.invokeExact(handle, slot) == 0) {
+ throw new IOException("GetConsoleMode failed");
+ }
+ return slot.get(ValueLayout.JAVA_INT, 0);
+ }
+
+ private static MemorySegment invokeHandle(MethodHandle mh, long stdHandle) throws Throwable {
+ return (MemorySegment) mh.invokeExact(stdHandle);
+ }
+
+ private static MethodHandle downcall(String name, FunctionDescriptor desc) {
+ return LINKER.downcallHandle(
+ KERNEL32.find(name).orElseThrow(() -> new UnsatisfiedLinkError(name)),
+ desc);
+ }
+}
diff --git a/inspector/src/test/java/io/github/dfa1/vortex/inspect/InspectorTreeTest.java b/inspector/src/test/java/io/github/dfa1/vortex/inspect/InspectorTreeTest.java
new file mode 100644
index 00000000..cf4398aa
--- /dev/null
+++ b/inspector/src/test/java/io/github/dfa1/vortex/inspect/InspectorTreeTest.java
@@ -0,0 +1,278 @@
+package io.github.dfa1.vortex.inspect;
+
+import io.github.dfa1.vortex.core.CompressionScheme;
+import io.github.dfa1.vortex.core.DType;
+import io.github.dfa1.vortex.core.Footer;
+import io.github.dfa1.vortex.core.Layout;
+import io.github.dfa1.vortex.core.PType;
+import io.github.dfa1.vortex.core.SegmentSpec;
+import io.github.dfa1.vortex.io.VortexHandle;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.Mock;
+import org.mockito.junit.jupiter.MockitoExtension;
+
+import java.util.List;
+import java.util.Set;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.mockito.BDDMockito.given;
+
+@ExtendWith(MockitoExtension.class)
+class InspectorTreeTest {
+
+ @Mock
+ VortexHandle handle;
+
+ @Test
+ void build_withStructDType_assignsFieldNamesToColumns() {
+ // Given
+ Layout idLeaf = leaf("vortex.constant", 10);
+ Layout valLeaf = leaf("vortex.constant", 10);
+ Layout root = struct(10, List.of(idLeaf, valLeaf));
+ DType dtype = new DType.Struct(
+ List.of("id", "value"),
+ List.of(new DType.Primitive(PType.I64, false), new DType.Primitive(PType.F64, false)),
+ false);
+ givenHandle(dtype, root, List.of("vortex.constant"), List.of());
+
+ // When
+ InspectorTree sut = InspectorTree.build(handle);
+
+ // Then
+ assertThat(sut.root().fieldName()).isEmpty();
+ assertThat(sut.root().children()).hasSize(2);
+ assertThat(sut.root().children().get(0).fieldName()).contains("id");
+ assertThat(sut.root().children().get(1).fieldName()).contains("value");
+ }
+
+ @Test
+ void build_withFewerColNamesThanChildren_fillsWithSyntheticNames() {
+ // Given — defensive path: malformed footer with a struct layout whose child count
+ // exceeds the dtype's named fields. Should not throw; should fall back to col0/col1...
+ Layout root = struct(0, List.of(leaf("vortex.constant", 0), leaf("vortex.constant", 0)));
+ DType dtype = new DType.Struct(List.of("only"),
+ List.of(new DType.Primitive(PType.I32, false)), false);
+ givenHandle(dtype, root, List.of("vortex.constant"), List.of());
+
+ // When
+ InspectorTree sut = InspectorTree.build(handle);
+
+ // Then
+ assertThat(sut.root().children().get(0).fieldName()).contains("only");
+ assertThat(sut.root().children().get(1).fieldName()).contains("col1");
+ }
+
+ @Test
+ void build_withNonStructRoot_leavesFieldNameEmpty() {
+ // Given
+ Layout root = leaf("vortex.flat", 100);
+ DType dtype = new DType.Primitive(PType.I64, false);
+ givenHandle(dtype, root, List.of("vortex.flat"), List.of());
+
+ // When
+ InspectorTree sut = InspectorTree.build(handle);
+
+ // Then
+ assertThat(sut.root().fieldName()).isEmpty();
+ assertThat(sut.root().children()).isEmpty();
+ }
+
+ @Test
+ void build_sumsSegmentBytesAndCountsSegments() {
+ // Given
+ Layout root = leaf("vortex.flat", 0);
+ DType dtype = new DType.Primitive(PType.I32, false);
+ List segs = List.of(
+ new SegmentSpec(0, 128, (byte) 0, CompressionScheme.LZ4),
+ new SegmentSpec(128, 256, (byte) 0, CompressionScheme.LZ4),
+ new SegmentSpec(384, 64, (byte) 0, CompressionScheme.LZ4));
+ givenHandle(dtype, root, List.of("vortex.flat"), segs);
+
+ // When
+ InspectorTree sut = InspectorTree.build(handle);
+
+ // Then
+ assertThat(sut.segmentCount()).isEqualTo(3);
+ assertThat(sut.totalSegmentBytes()).isEqualTo(128L + 256L + 64L);
+ assertThat(sut.segmentSpecs()).containsExactlyElementsOf(segs);
+ }
+
+ @Test
+ void build_setsTotalRowCountFromRootLayout() {
+ // Given — total rows is the root layout's row count, regardless of struct/non-struct
+ Layout root = struct(12_345L, List.of(leaf("vortex.constant", 12_345L)));
+ DType dtype = new DType.Struct(List.of("c"),
+ List.of(new DType.Primitive(PType.I32, false)), false);
+ givenHandle(dtype, root, List.of("vortex.constant"), List.of());
+
+ // When
+ InspectorTree sut = InspectorTree.build(handle);
+
+ // Then
+ assertThat(sut.totalRowCount()).isEqualTo(12_345L);
+ }
+
+ @Test
+ void build_carriesVersionAndFileSize() {
+ // Given
+ Layout root = leaf("vortex.flat", 0);
+ DType dtype = new DType.Primitive(PType.I32, false);
+ given(handle.version()).willReturn(7);
+ given(handle.fileSize()).willReturn(123_456L);
+ given(handle.dtype()).willReturn(dtype);
+ given(handle.layout()).willReturn(root);
+ given(handle.footer()).willReturn(new Footer(List.of("vortex.flat"), List.of(), List.of(), List.of()));
+
+ // When
+ InspectorTree sut = InspectorTree.build(handle);
+
+ // Then
+ assertThat(sut.version()).isEqualTo(7);
+ assertThat(sut.fileSize()).isEqualTo(123_456L);
+ }
+
+ @Test
+ void build_reportsProgressOncePerPeekedSegment() {
+ // Given — struct of two compressed (skipped) + two uncompressed Flat columns.
+ // Only uncompressed leaves trigger peekFlatRoot, so progress should fire twice
+ // with total=2.
+ Layout c1 = new Layout("vortex.flat", 0, null, List.of(), List.of(0));
+ Layout c2 = new Layout("vortex.flat", 0, null, List.of(), List.of(1));
+ Layout c3 = new Layout("vortex.flat", 0, null, List.of(), List.of(2));
+ Layout root = struct(0, List.of(c1, c2, c3));
+ DType dtype = new DType.Struct(List.of("a", "b", "c"),
+ List.of(new DType.Primitive(PType.I32, false),
+ new DType.Primitive(PType.I32, false),
+ new DType.Primitive(PType.I32, false)),
+ false);
+ List segs = List.of(
+ new SegmentSpec(0, 1024, (byte) 0, CompressionScheme.ZSTD), // skipped
+ new SegmentSpec(1024, 1024, (byte) 0, CompressionScheme.LZ4), // skipped
+ new SegmentSpec(2048, 1024, (byte) 0, CompressionScheme.LZ4)); // skipped
+ givenHandle(dtype, root, List.of("vortex.flat"), segs);
+
+ java.util.List reports = new java.util.ArrayList<>();
+
+ // When
+ InspectorTree.build(handle, (cur, tot) -> reports.add(new int[]{cur, tot}));
+
+ // Then — all three are compressed, so no peeks fire; progress never called
+ assertThat(reports).isEmpty();
+ }
+
+ @Test
+ void build_progressNoop_isAcceptedAndProducesSameTree() {
+ // Given
+ Layout root = struct(0, List.of(leaf("vortex.constant", 0)));
+ DType dtype = new DType.Struct(List.of("c"),
+ List.of(new DType.Primitive(PType.I32, false)), false);
+ givenHandle(dtype, root, List.of("vortex.constant"), List.of());
+
+ // When / Then — NOOP passes; no NPE
+ InspectorTree sut = InspectorTree.build(handle, InspectorTree.Progress.NOOP);
+ assertThat(sut.root().children()).hasSize(1);
+ }
+
+ @Test
+ void buildShallow_skipsAllSlicesAndStillNamesColumns() {
+ // Given — shallow build is the path the TUI uses; it must touch zero segment
+ // bytes (so opening a remote file is instant) yet still populate fieldName on
+ // top-level struct children.
+ Layout col0 = new Layout("vortex.flat", 10, null, List.of(), List.of(0));
+ Layout col1 = new Layout("vortex.flat", 10, null, List.of(), List.of(1));
+ Layout root = struct(10, List.of(col0, col1));
+ DType dtype = new DType.Struct(List.of("id", "value"),
+ List.of(new DType.Primitive(PType.I64, false),
+ new DType.Primitive(PType.F64, false)),
+ false);
+ List segs = List.of(
+ new SegmentSpec(0, 64, (byte) 0, CompressionScheme.NONE),
+ new SegmentSpec(64, 64, (byte) 0, CompressionScheme.NONE));
+ givenHandle(dtype, root, List.of("vortex.flat"), segs);
+
+ // When
+ InspectorTree sut = InspectorTree.buildShallow(handle);
+
+ // Then — column names assigned, but no peek fired so stats / usedEncodings empty
+ assertThat(sut.root().children().get(0).fieldName()).contains("id");
+ assertThat(sut.root().children().get(1).fieldName()).contains("value");
+ assertThat(sut.usedEncodings()).isEmpty();
+ assertThat(sut.root().children().get(0).usedEncodings()).isEmpty();
+ assertThat(sut.root().children().get(0).stats()).isEqualTo(io.github.dfa1.vortex.core.ArrayStats.empty());
+ // Slice is reserved for lazy peek; shallow build must never call it
+ org.mockito.Mockito.verify(handle, org.mockito.Mockito.never()).slice(
+ org.mockito.Mockito.anyLong(), org.mockito.Mockito.anyLong());
+ }
+
+ @Test
+ void peek_nonFlatNode_returnsEmptyWithoutSlicing() {
+ // Given — peek is the lazy hook the TUI uses on the selected node. Non-Flat
+ // layouts (struct, chunked, stats wrappers) carry no array root and must short
+ // out without slicing, so navigating to them doesn't hit the network.
+ Layout structLayout = struct(0, List.of());
+ InspectorTree.Node node = new InspectorTree.Node(structLayout, java.util.Optional.empty(),
+ Set.of(), io.github.dfa1.vortex.core.ArrayStats.empty(), List.of());
+
+ // When
+ InspectorTree.Peek result = InspectorTree.peek(node, handle);
+
+ // Then
+ assertThat(result).isSameAs(InspectorTree.Peek.EMPTY);
+ org.mockito.Mockito.verify(handle, org.mockito.Mockito.never()).slice(
+ org.mockito.Mockito.anyLong(), org.mockito.Mockito.anyLong());
+ }
+
+ @Test
+ void peek_compressedFlatSegment_returnsEmptyWithoutSlicing() {
+ // Given — compressed segments would need the encoding to decompress before
+ // their FlatBuffer can be parsed; peek skips them rather than slicing garbage.
+ Layout flat = new Layout("vortex.flat", 10, null, List.of(), List.of(0));
+ InspectorTree.Node node = new InspectorTree.Node(flat, java.util.Optional.empty(),
+ Set.of(), io.github.dfa1.vortex.core.ArrayStats.empty(), List.of());
+ given(handle.footer()).willReturn(new io.github.dfa1.vortex.core.Footer(
+ List.of("vortex.flat"), List.of(),
+ List.of(new SegmentSpec(0, 100, (byte) 0, CompressionScheme.ZSTD)),
+ List.of()));
+
+ // When
+ InspectorTree.Peek result = InspectorTree.peek(node, handle);
+
+ // Then
+ assertThat(result).isSameAs(InspectorTree.Peek.EMPTY);
+ org.mockito.Mockito.verify(handle, org.mockito.Mockito.never()).slice(
+ org.mockito.Mockito.anyLong(), org.mockito.Mockito.anyLong());
+ }
+
+ @Test
+ void build_flatChildWithCompressedSegment_skipsRootEncodingPeek() {
+ // Given — peekRootEncoding() reads the segment as a FlatBuffer; compressed segments
+ // are intentionally skipped so a malformed or compressed payload can't crash the
+ // inspector. With code != NONE we should still build a tree, with no encodings used.
+ Layout root = new Layout("vortex.flat", 0, null, List.of(), List.of(0));
+ DType dtype = new DType.Primitive(PType.I32, false);
+ SegmentSpec compressed = new SegmentSpec(0, 1024, (byte) 0, CompressionScheme.ZSTD);
+ givenHandle(dtype, root, List.of("vortex.flat"), List.of(compressed));
+
+ // When
+ InspectorTree sut = InspectorTree.build(handle);
+
+ // Then
+ assertThat(sut.usedEncodings()).isEmpty();
+ assertThat(sut.root().usedEncodings()).isEmpty();
+ }
+
+ private void givenHandle(DType dtype, Layout layout, List arraySpecs, List segs) {
+ given(handle.dtype()).willReturn(dtype);
+ given(handle.layout()).willReturn(layout);
+ given(handle.footer()).willReturn(new Footer(arraySpecs, List.of(), segs, List.of()));
+ }
+
+ private static Layout struct(long rows, List children) {
+ return new Layout("vortex.struct", rows, null, children, List.of());
+ }
+
+ private static Layout leaf(String encodingId, long rows) {
+ return new Layout(encodingId, rows, null, List.of(), List.of());
+ }
+}
diff --git a/inspector/src/test/java/io/github/dfa1/vortex/inspect/VortexInspectorTest.java b/inspector/src/test/java/io/github/dfa1/vortex/inspect/VortexInspectorTest.java
new file mode 100644
index 00000000..b1926274
--- /dev/null
+++ b/inspector/src/test/java/io/github/dfa1/vortex/inspect/VortexInspectorTest.java
@@ -0,0 +1,203 @@
+package io.github.dfa1.vortex.inspect;
+
+import io.github.dfa1.vortex.core.ArrayStats;
+import io.github.dfa1.vortex.core.CompressionScheme;
+import io.github.dfa1.vortex.core.DType;
+import io.github.dfa1.vortex.core.Layout;
+import io.github.dfa1.vortex.core.PType;
+import io.github.dfa1.vortex.core.SegmentSpec;
+import org.junit.jupiter.api.Test;
+
+import java.util.List;
+import java.util.Optional;
+import java.util.Set;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+class VortexInspectorTest {
+
+ @Test
+ void render_struct_listsHeaderColumnsAndUsedEncodings() {
+ // Given
+ InspectorTree sut = struct2col(2, 4096L,
+ List.of(
+ new SegmentSpec(0, 512, (byte) 0, CompressionScheme.NONE),
+ new SegmentSpec(512, 512, (byte) 0, CompressionScheme.LZ4)),
+ Set.of("fastlanes.bitpacked", "vortex.constant"));
+
+ // When
+ String out = VortexInspector.render(sut);
+
+ // Then
+ assertThat(out)
+ .contains("Vortex v2")
+ .contains("4.0 KB")
+ .contains("1000 rows")
+ .contains("Schema:")
+ .contains("id")
+ .contains("value")
+ .contains("Registered encodings: vortex.flat, fastlanes.bitpacked, vortex.constant")
+ .contains("Used encodings: ")
+ .contains("Segments: 2")
+ .contains("[0] off=0 len=512 B compression=NONE")
+ .contains("[1] off=512 len=512 B compression=LZ4")
+ .contains("Layout:")
+ .contains("struct (1000 rows)")
+ .contains("[fastlanes.bitpacked]");
+ }
+
+ @Test
+ void render_segmentTable_listsEverySegment() {
+ // Given — verify table prints one line per segment with offset + size + compression
+ List specs = List.of(
+ new SegmentSpec(0, 1024, (byte) 0, CompressionScheme.ZSTD),
+ new SegmentSpec(1024, 2048, (byte) 0, CompressionScheme.NONE),
+ new SegmentSpec(3072, 4096, (byte) 0, CompressionScheme.LZ4));
+ InspectorTree sut = struct2col(1, 8192L, specs, Set.of());
+
+ // When
+ String out = VortexInspector.render(sut);
+
+ // Then
+ assertThat(out)
+ .contains("[0] off=0 len=1.0 KB compression=ZSTD")
+ .contains("[1] off=1024 len=2.0 KB compression=NONE")
+ .contains("[2] off=3072 len=4.0 KB compression=LZ4");
+ }
+
+ @Test
+ void render_nonStruct_inlinesSingleColumnLayout() {
+ // Given
+ Layout leaf = new Layout("vortex.flat", 100, null, List.of(), List.of());
+ InspectorTree.Node root = new InspectorTree.Node(leaf, Optional.empty(), Set.of(), ArrayStats.empty(), List.of());
+ InspectorTree sut = new InspectorTree(
+ 1, 256L,
+ new DType.Primitive(PType.I32, false),
+ List.of("vortex.flat"), Set.of(),
+ List.of(new SegmentSpec(0, 256, (byte) 0, CompressionScheme.NONE)),
+ 100L, root);
+
+ // When
+ String out = VortexInspector.render(sut);
+
+ // Then
+ assertThat(out).contains("vortex.flat(100 rows)");
+ assertThat(out).doesNotContain("struct (");
+ }
+
+ @Test
+ void render_formatsBytesAcrossUnits() {
+ // Given — bytes / KB / MB boundaries
+ List oneSeg = List.of(new SegmentSpec(0, 1, (byte) 0, CompressionScheme.NONE));
+ InspectorTree small = struct2col(1, 512L, oneSeg, Set.of());
+ InspectorTree medium = struct2col(1, 2048L, oneSeg, Set.of());
+ InspectorTree large = struct2col(1, 5L * 1024 * 1024, oneSeg, Set.of());
+
+ // When / Then
+ assertThat(VortexInspector.render(small)).contains("512 B");
+ assertThat(VortexInspector.render(medium)).contains("2.0 KB");
+ assertThat(VortexInspector.render(large)).contains("5.0 MB");
+ }
+
+ @Test
+ void render_chainsChildrenWithArrow() {
+ // Given — nested zoned → chunked → flat chain
+ Layout flat = new Layout("vortex.flat", 1000, null, List.of(), List.of());
+ Layout chunked = new Layout("vortex.chunked", 1000, null, List.of(flat), List.of());
+ Layout zoned = new Layout("vortex.stats", 1000, null, List.of(chunked), List.of());
+ Layout structLayout = new Layout("vortex.struct", 1000, null, List.of(zoned), List.of());
+
+ InspectorTree.Node flatN = new InspectorTree.Node(flat, Optional.empty(), Set.of(), ArrayStats.empty(), List.of());
+ InspectorTree.Node chunkedN = new InspectorTree.Node(chunked, Optional.empty(), Set.of(), ArrayStats.empty(), List.of(flatN));
+ InspectorTree.Node zonedN = new InspectorTree.Node(zoned, Optional.of("v"), Set.of(), ArrayStats.empty(), List.of(chunkedN));
+ InspectorTree.Node rootN = new InspectorTree.Node(structLayout, Optional.empty(), Set.of(), ArrayStats.empty(), List.of(zonedN));
+
+ InspectorTree sut = new InspectorTree(
+ 1, 1024L,
+ new DType.Struct(List.of("v"), List.of(new DType.Primitive(PType.I32, false)), false),
+ List.of("vortex.flat"), Set.of(),
+ List.of(), 1000L, rootN);
+
+ // When
+ String out = VortexInspector.render(sut);
+
+ // Then
+ assertThat(out).contains("vortex.stats(1000 rows) → vortex.chunked(1000 rows) → vortex.flat(1000 rows)");
+ }
+
+ @Test
+ void render_aggregatesMinMaxAcrossChunks() {
+ // Given — column with two chunked Flat leaves; aggregate should fold each leaf's stats
+ Layout chunk1 = new Layout("vortex.flat", 500, null, List.of(), List.of());
+ Layout chunk2 = new Layout("vortex.flat", 500, null, List.of(), List.of());
+ Layout chunked = new Layout("vortex.chunked", 1000, null, List.of(chunk1, chunk2), List.of());
+ Layout structLayout = new Layout("vortex.struct", 1000, null, List.of(chunked), List.of());
+
+ InspectorTree.Node c1 = new InspectorTree.Node(chunk1, Optional.empty(), Set.of(),
+ new ArrayStats(10L, 50L, null, null, null, null), List.of());
+ InspectorTree.Node c2 = new InspectorTree.Node(chunk2, Optional.empty(), Set.of(),
+ new ArrayStats(5L, 100L, null, null, null, null), List.of());
+ InspectorTree.Node chunkedN = new InspectorTree.Node(chunked, Optional.of("id"),
+ Set.of("vortex.flat"), ArrayStats.empty(), List.of(c1, c2));
+ InspectorTree.Node rootN = new InspectorTree.Node(structLayout, Optional.empty(),
+ Set.of("vortex.flat"), ArrayStats.empty(), List.of(chunkedN));
+
+ InspectorTree sut = new InspectorTree(1, 1024L,
+ new DType.Struct(List.of("id"), List.of(new DType.Primitive(PType.I64, false)), false),
+ List.of("vortex.flat"), Set.of(), List.of(), 1000L, rootN);
+
+ // When
+ String out = VortexInspector.render(sut);
+
+ // Then — min over (10, 5) = 5; max over (50, 100) = 100
+ assertThat(out).contains("min=5 max=100");
+ }
+
+ @Test
+ void render_columnWithoutStats_omitsMinMax() {
+ // Given — default tree has ArrayStats.empty() on every node
+ InspectorTree sut = struct2col(1, 100L, List.of(), Set.of());
+
+ // When
+ String out = VortexInspector.render(sut);
+
+ // Then
+ assertThat(out).doesNotContain("min=");
+ assertThat(out).doesNotContain("max=");
+ }
+
+ @Test
+ void render_emptyUsedEncodings_omitsBracketSuffix() {
+ // Given — column with no resolved encodings should not emit " []" noise
+ InspectorTree sut = struct2col(1, 100L, List.of(), Set.of());
+
+ // When
+ String out = VortexInspector.render(sut);
+
+ // Then
+ assertThat(out).doesNotContain(" []");
+ }
+
+ private static InspectorTree struct2col(int version, long fileSize, List specs, Set usedById) {
+ Layout idLeaf = new Layout("fastlanes.bitpacked", 1000, null, List.of(), List.of());
+ Layout valLeaf = new Layout("vortex.constant", 1000, null, List.of(), List.of());
+ Layout root = new Layout("vortex.struct", 1000, null, List.of(idLeaf, valLeaf), List.of());
+
+ InspectorTree.Node idNode = new InspectorTree.Node(idLeaf,
+ Optional.of("id"), Set.of("fastlanes.bitpacked"), ArrayStats.empty(), List.of());
+ InspectorTree.Node valNode = new InspectorTree.Node(valLeaf,
+ Optional.of("value"), Set.of("vortex.constant"), ArrayStats.empty(), List.of());
+ InspectorTree.Node rootNode = new InspectorTree.Node(root,
+ Optional.empty(), Set.of("fastlanes.bitpacked", "vortex.constant"),
+ ArrayStats.empty(), List.of(idNode, valNode));
+
+ DType dtype = new DType.Struct(
+ List.of("id", "value"),
+ List.of(new DType.Primitive(PType.I64, false), new DType.Primitive(PType.F64, false)),
+ false);
+
+ return new InspectorTree(version, fileSize, dtype,
+ List.of("vortex.flat", "fastlanes.bitpacked", "vortex.constant"),
+ usedById, specs, 1000L, rootNode);
+ }
+}
diff --git a/inspector/src/test/java/io/github/dfa1/vortex/inspect/term/AnsiTest.java b/inspector/src/test/java/io/github/dfa1/vortex/inspect/term/AnsiTest.java
new file mode 100644
index 00000000..7ab3c4c2
--- /dev/null
+++ b/inspector/src/test/java/io/github/dfa1/vortex/inspect/term/AnsiTest.java
@@ -0,0 +1,42 @@
+package io.github.dfa1.vortex.inspect.term;
+
+import org.junit.jupiter.api.Test;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+class AnsiTest {
+
+ private static final char ESC = (char) 0x1B;
+
+ @Test
+ void escConstant_isAsciiEscapeByte() {
+ // Given / When / Then — every CSI sequence relies on this being 0x1B
+ assertThat(Ansi.ESC).isEqualTo(String.valueOf(ESC));
+ }
+
+ @Test
+ void moveTo_formatsRowAndColumn() {
+ // Given / When / Then
+ assertThat(Ansi.moveTo(5, 12)).isEqualTo(ESC + "[5;12H");
+ }
+
+ @Test
+ void fgAndBg_emitSgrCode() {
+ // Given / When / Then
+ assertThat(Ansi.fg(31)).isEqualTo(ESC + "[31m");
+ assertThat(Ansi.bg(42)).isEqualTo(ESC + "[42m");
+ }
+
+ @Test
+ void clearAndCursorConstants_startWithCsi() {
+ // Given / When / Then — guard against accidental edits dropping the ESC prefix
+ String csi = ESC + "[";
+ assertThat(Ansi.CLEAR_SCREEN).startsWith(csi).endsWith("2J");
+ assertThat(Ansi.CURSOR_HOME).startsWith(csi).endsWith("H");
+ assertThat(Ansi.HIDE_CURSOR).isEqualTo(csi + "?25l");
+ assertThat(Ansi.SHOW_CURSOR).isEqualTo(csi + "?25h");
+ assertThat(Ansi.ENTER_ALT_SCREEN).isEqualTo(csi + "?1049h");
+ assertThat(Ansi.EXIT_ALT_SCREEN).isEqualTo(csi + "?1049l");
+ assertThat(Ansi.RESET).isEqualTo(csi + "0m");
+ }
+}
diff --git a/inspector/src/test/java/io/github/dfa1/vortex/inspect/term/KeyDecoderTest.java b/inspector/src/test/java/io/github/dfa1/vortex/inspect/term/KeyDecoderTest.java
new file mode 100644
index 00000000..69009f5a
--- /dev/null
+++ b/inspector/src/test/java/io/github/dfa1/vortex/inspect/term/KeyDecoderTest.java
@@ -0,0 +1,128 @@
+package io.github.dfa1.vortex.inspect.term;
+
+import org.junit.jupiter.api.Test;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+class KeyDecoderTest {
+
+ @Test
+ void next_arrowUp_decodesCsiA() throws IOException {
+ // Given
+ ByteArrayInputStream in = bytes(0x1B, '[', 'A');
+
+ // When
+ Key sut = KeyDecoder.next(in);
+
+ // Then
+ assertThat(sut).isEqualTo(Key.ArrowUp.INSTANCE);
+ }
+
+ @Test
+ void next_allArrows_decodeIndependently() throws IOException {
+ // Given / When / Then
+ assertThat(KeyDecoder.next(bytes(0x1B, '[', 'A'))).isEqualTo(Key.ArrowUp.INSTANCE);
+ assertThat(KeyDecoder.next(bytes(0x1B, '[', 'B'))).isEqualTo(Key.ArrowDown.INSTANCE);
+ assertThat(KeyDecoder.next(bytes(0x1B, '[', 'C'))).isEqualTo(Key.ArrowRight.INSTANCE);
+ assertThat(KeyDecoder.next(bytes(0x1B, '[', 'D'))).isEqualTo(Key.ArrowLeft.INSTANCE);
+ }
+
+ @Test
+ void next_homeAndEnd_decodeBothCsiAndTildeForms() throws IOException {
+ // Given / When / Then — xterm sends ESC[H/F; rxvt and others send ESC[1~/4~
+ assertThat(KeyDecoder.next(bytes(0x1B, '[', 'H'))).isEqualTo(Key.Home.INSTANCE);
+ assertThat(KeyDecoder.next(bytes(0x1B, '[', 'F'))).isEqualTo(Key.End.INSTANCE);
+ assertThat(KeyDecoder.next(bytes(0x1B, '[', '1', '~'))).isEqualTo(Key.Home.INSTANCE);
+ assertThat(KeyDecoder.next(bytes(0x1B, '[', '4', '~'))).isEqualTo(Key.End.INSTANCE);
+ }
+
+ @Test
+ void next_pageUpAndDown_decodeTildeSequences() throws IOException {
+ // Given / When / Then
+ assertThat(KeyDecoder.next(bytes(0x1B, '[', '5', '~'))).isEqualTo(Key.PageUp.INSTANCE);
+ assertThat(KeyDecoder.next(bytes(0x1B, '[', '6', '~'))).isEqualTo(Key.PageDown.INSTANCE);
+ }
+
+ @Test
+ void next_bareEscape_returnsEscapeWhenNoFollowupAvailable() throws IOException {
+ // Given — single ESC byte with no further input
+ ByteArrayInputStream in = bytes(0x1B);
+
+ // When
+ Key sut = KeyDecoder.next(in);
+
+ // Then
+ assertThat(sut).isEqualTo(Key.Escape.INSTANCE);
+ }
+
+ @Test
+ void next_enterFromCrAndLf_bothDecodeToEnter() throws IOException {
+ // Given / When / Then
+ assertThat(KeyDecoder.next(bytes('\r'))).isEqualTo(Key.Enter.INSTANCE);
+ assertThat(KeyDecoder.next(bytes('\n'))).isEqualTo(Key.Enter.INSTANCE);
+ }
+
+ @Test
+ void next_printableChar_returnsChar() throws IOException {
+ // Given
+ ByteArrayInputStream in = bytes('q');
+
+ // When
+ Key sut = KeyDecoder.next(in);
+
+ // Then
+ assertThat(sut).isInstanceOf(Key.Char.class);
+ assertThat(((Key.Char) sut).value()).isEqualTo('q');
+ }
+
+ @Test
+ void next_eof_returnsEof() throws IOException {
+ // Given — empty stream
+ ByteArrayInputStream in = bytes();
+
+ // When
+ Key sut = KeyDecoder.next(in);
+
+ // Then
+ assertThat(sut).isEqualTo(Key.Eof.INSTANCE);
+ }
+
+ @Test
+ void next_unknownCsiLetter_yieldsEscape() throws IOException {
+ // Given — ESC [ Z is xterm reverse-tab; we don't recognise it
+ ByteArrayInputStream in = bytes(0x1B, '[', 'Z');
+
+ // When
+ Key sut = KeyDecoder.next(in);
+
+ // Then — defensive: never emit garbage as Char on an unknown CSI
+ assertThat(sut).isEqualTo(Key.Escape.INSTANCE);
+ }
+
+ @Test
+ void next_multiDigitTildeCode_handlesTwoDigits() throws IOException {
+ // Given — ESC [ 15 ~ is xterm F5; we treat unknown numbers as Escape but
+ // must still consume the trailing '~' rather than leak it as a character
+ ByteArrayInputStream in = bytes(0x1B, '[', '1', '5', '~', 'x');
+
+ // When
+ Key first = KeyDecoder.next(in);
+ Key second = KeyDecoder.next(in);
+
+ // Then
+ assertThat(first).isEqualTo(Key.Escape.INSTANCE);
+ assertThat(second).isInstanceOf(Key.Char.class);
+ assertThat(((Key.Char) second).value()).isEqualTo('x');
+ }
+
+ private static ByteArrayInputStream bytes(int... bs) {
+ byte[] out = new byte[bs.length];
+ for (int i = 0; i < bs.length; i++) {
+ out[i] = (byte) bs[i];
+ }
+ return new ByteArrayInputStream(out);
+ }
+}
diff --git a/inspector/src/test/java/io/github/dfa1/vortex/inspect/term/WindowsTerminalSmokeTest.java b/inspector/src/test/java/io/github/dfa1/vortex/inspect/term/WindowsTerminalSmokeTest.java
new file mode 100644
index 00000000..3bb9dfbf
--- /dev/null
+++ b/inspector/src/test/java/io/github/dfa1/vortex/inspect/term/WindowsTerminalSmokeTest.java
@@ -0,0 +1,63 @@
+package io.github.dfa1.vortex.inspect.term;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.condition.EnabledOnOs;
+import org.junit.jupiter.api.condition.OS;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+/// Smoke test for the FFM-based Windows console binding.
+///
+/// Runs only on Windows (other OSes lack kernel32). The goal is to catch
+/// missing-symbol / signature-mismatch regressions in CI without requiring
+/// a real interactive TTY:
+///
+/// - Class load alone forces every {@code Linker.downcallHandle} to resolve
+/// its kernel32 symbol. A missing entry point throws
+/// {@link UnsatisfiedLinkError} during static initialization.
+/// - Bit-flag math for the VT mode toggles is verified directly so a typo
+/// in a constant fails here, not in a customer's terminal.
+class WindowsTerminalSmokeTest {
+
+ @Test
+ @EnabledOnOs(OS.WINDOWS)
+ void classLoad_resolvesEveryKernel32Symbol() {
+ // Given / When — touching the class triggers , which calls
+ // Linker.downcallHandle for every imported kernel32 function.
+ Class> sut = WindowsTerminal.class;
+
+ // Then
+ assertThat(sut).isNotNull();
+ assertThat(sut.getDeclaredMethods()).isNotEmpty();
+ }
+
+ @Test
+ @EnabledOnOs(OS.WINDOWS)
+ void modeFlagMath_inputModeMasksLineEchoProcessed_andSetsVtInput() {
+ // Given — typical default cmd.exe input mode: line + echo + processed input enabled
+ int defaultInMode = 0x0001 | 0x0002 | 0x0004; // PROCESSED | LINE | ECHO
+
+ // When — same transform that WindowsTerminal.open applies
+ int raw = (defaultInMode & ~(0x0002 | 0x0004 | 0x0001)) | 0x0200;
+
+ // Then — line / echo / processed cleared, VT input set
+ assertThat(raw & 0x0002).isZero();
+ assertThat(raw & 0x0004).isZero();
+ assertThat(raw & 0x0001).isZero();
+ assertThat(raw & 0x0200).isEqualTo(0x0200);
+ }
+
+ @Test
+ @EnabledOnOs(OS.WINDOWS)
+ void modeFlagMath_outputModeAddsVtProcessing() {
+ // Given — default output mode
+ int defaultOutMode = 0x0001; // PROCESSED_OUTPUT only
+
+ // When
+ int withVt = defaultOutMode | 0x0004 | 0x0001;
+
+ // Then
+ assertThat(withVt & 0x0004).isEqualTo(0x0004);
+ assertThat(withVt & 0x0001).isEqualTo(0x0001);
+ }
+}
diff --git a/integration/pom.xml b/integration/pom.xml
index c4e2f978..2c33101b 100644
--- a/integration/pom.xml
+++ b/integration/pom.xml
@@ -38,6 +38,11 @@
vortex-reader
test