diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d1cda504..016a13c6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,3 +33,26 @@ jobs: - name: Build and test run: ./mvnw verify + + inspector-windows: + runs-on: windows-latest + steps: + - uses: actions/checkout@v6 + + - name: Set up Azul Zulu JDK 25 + uses: actions/setup-java@v5 + with: + distribution: zulu + java-version: '25' + + - name: Cache Maven repository + uses: actions/cache@v5 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + + - name: Test inspector module + shell: bash + run: ./mvnw test -pl inspector -am diff --git a/bom/pom.xml b/bom/pom.xml index 1bf420e3..071e4904 100644 --- a/bom/pom.xml +++ b/bom/pom.xml @@ -47,6 +47,11 @@ vortex-jdbc ${project.version} + + io.github.dfa1.vortex + vortex-inspector + ${project.version} + diff --git a/cli/pom.xml b/cli/pom.xml index c6f82cd3..f02df04c 100644 --- a/cli/pom.xml +++ b/cli/pom.xml @@ -30,6 +30,10 @@ io.github.dfa1.vortex vortex-reader + + io.github.dfa1.vortex + vortex-inspector + org.junit.jupiter @@ -71,6 +75,23 @@ io.github.dfa1.vortex.cli.VortexCli + + + ALL-UNNAMED + diff --git a/cli/src/main/java/io/github/dfa1/vortex/cli/InspectCommand.java b/cli/src/main/java/io/github/dfa1/vortex/cli/InspectCommand.java index c79d130e..5e699f38 100644 --- a/cli/src/main/java/io/github/dfa1/vortex/cli/InspectCommand.java +++ b/cli/src/main/java/io/github/dfa1/vortex/cli/InspectCommand.java @@ -1,9 +1,13 @@ package io.github.dfa1.vortex.cli; -import io.github.dfa1.vortex.io.VortexInspector; +import io.github.dfa1.vortex.inspect.VortexInspector; +import io.github.dfa1.vortex.io.VortexHandle; +import io.github.dfa1.vortex.io.VortexHttpReader; import io.github.dfa1.vortex.io.VortexReader; import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; import java.nio.file.Files; import java.nio.file.Path; @@ -14,20 +18,54 @@ private InspectCommand() { static int run(String[] args) { if (args.length != 2) { - System.err.println("usage: inspect "); + System.err.println("usage: inspect "); return ExitStatus.USAGE_ERROR; } - Path path = Path.of(args[1]); - if (!Files.exists(path)) { - System.err.println("file not found: " + path); - return ExitStatus.FILE_NOT_FOUND; - } - try (VortexReader reader = VortexReader.open(path)) { - System.out.print(VortexInspector.inspect(reader)); + try (VortexHandle handle = open(args[1])) { + if (handle == null) { + return ExitStatus.FILE_NOT_FOUND; + } + System.out.print(VortexInspector.inspect(handle)); return ExitStatus.OK; - } catch (IOException e) { - System.err.println("error: " + e.getMessage()); + } catch (IOException | RuntimeException e) { + System.err.println("error: " + describe(e)); + if (System.getenv("VORTEX_DEBUG") != null) { + e.printStackTrace(System.err); + } return ExitStatus.ERROR; } } + + private static String describe(Throwable t) { + StringBuilder sb = new StringBuilder(); + Throwable cur = t; + while (cur != null) { + if (!sb.isEmpty()) { + sb.append(" -> "); + } + sb.append(cur.getClass().getSimpleName()); + if (cur.getMessage() != null) { + sb.append(": ").append(cur.getMessage()); + } + cur = cur.getCause(); + } + return sb.toString(); + } + + private static VortexHandle open(String target) throws IOException { + if (target.startsWith("http://") || target.startsWith("https://")) { + try { + return VortexHttpReader.open(new URI(target)); + } catch (URISyntaxException e) { + System.err.println("invalid URL: " + target); + return null; + } + } + Path path = Path.of(target); + if (!Files.exists(path)) { + System.err.println("file not found: " + path); + return null; + } + return VortexReader.open(path); + } } diff --git a/cli/src/main/java/io/github/dfa1/vortex/cli/TuiCommand.java b/cli/src/main/java/io/github/dfa1/vortex/cli/TuiCommand.java new file mode 100644 index 00000000..602ae982 --- /dev/null +++ b/cli/src/main/java/io/github/dfa1/vortex/cli/TuiCommand.java @@ -0,0 +1,129 @@ +package io.github.dfa1.vortex.cli; + +import io.github.dfa1.vortex.inspect.InspectorTree; +import io.github.dfa1.vortex.inspect.IoWorker; +import io.github.dfa1.vortex.inspect.VortexInspectorTui; +import io.github.dfa1.vortex.io.VortexHandle; +import io.github.dfa1.vortex.io.VortexHttpReader; +import io.github.dfa1.vortex.io.VortexReader; + +import java.io.IOException; +import java.io.PrintStream; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.concurrent.atomic.AtomicReference; + +final class TuiCommand { + + private TuiCommand() { + } + + static int run(String[] args) { + if (args.length != 2) { + System.err.println("usage: tui "); + return ExitStatus.USAGE_ERROR; + } + try (IoWorker worker = new IoWorker("vortex-tui-io")) { + VortexHandle handle = openOnWorker(worker, args[1]); + if (handle == null) { + return ExitStatus.FILE_NOT_FOUND; + } + try { + VortexInspectorTui.show(handle, worker, progressBar(System.err)); + } finally { + closeOnWorker(worker, handle); + } + return ExitStatus.OK; + } catch (IOException | RuntimeException | InterruptedException e) { + if (e instanceof InterruptedException) { + Thread.currentThread().interrupt(); + } + System.err.println("error: " + describe(e)); + if (System.getenv("VORTEX_DEBUG") != null) { + e.printStackTrace(System.err); + } + return ExitStatus.ERROR; + } + } + + private static VortexHandle openOnWorker(IoWorker worker, String target) + throws InterruptedException, IOException { + AtomicReference handle = new AtomicReference<>(); + AtomicReference failure = new AtomicReference<>(); + worker.runAndAwait(() -> { + try { + handle.set(open(target)); + } catch (IOException e) { + failure.set(e); + } + }); + if (failure.get() != null) { + throw failure.get(); + } + return handle.get(); + } + + private static void closeOnWorker(IoWorker worker, VortexHandle handle) { + try { + worker.runAndAwait(handle::close); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + + private static VortexHandle open(String target) throws IOException { + if (target.startsWith("http://") || target.startsWith("https://")) { + try { + return VortexHttpReader.open(new URI(target)); + } catch (URISyntaxException e) { + System.err.println("invalid URL: " + target); + return null; + } + } + Path path = Path.of(target); + if (!Files.exists(path)) { + System.err.println("file not found: " + path); + return null; + } + return VortexReader.open(path); + } + + private static InspectorTree.Progress progressBar(PrintStream out) { + int width = 30; + return (current, total) -> { + if (total <= 0) { + return; + } + int filled = (int) ((long) current * width / total); + StringBuilder bar = new StringBuilder(width + 32); + bar.append('\r').append("Loading metadata ["); + for (int i = 0; i < width; i++) { + bar.append(i < filled ? '#' : '-'); + } + bar.append("] ").append(current).append('/').append(total); + if (current == total) { + bar.append('\n'); + } + out.print(bar); + out.flush(); + }; + } + + private static String describe(Throwable t) { + StringBuilder sb = new StringBuilder(); + Throwable cur = t; + while (cur != null) { + if (!sb.isEmpty()) { + sb.append(" -> "); + } + sb.append(cur.getClass().getSimpleName()); + if (cur.getMessage() != null) { + sb.append(": ").append(cur.getMessage()); + } + cur = cur.getCause(); + } + return sb.toString(); + } +} diff --git a/cli/src/main/java/io/github/dfa1/vortex/cli/VortexCli.java b/cli/src/main/java/io/github/dfa1/vortex/cli/VortexCli.java index 2279e2c9..b958489c 100644 --- a/cli/src/main/java/io/github/dfa1/vortex/cli/VortexCli.java +++ b/cli/src/main/java/io/github/dfa1/vortex/cli/VortexCli.java @@ -17,6 +17,7 @@ public static void main(String[] args) { } int exit = switch (args[0]) { case "inspect" -> InspectCommand.run(args); + case "tui" -> TuiCommand.run(args); case "export" -> ExportCommand.run(args); case "import" -> ImportCommand.run(args); case "schema" -> SchemaCommand.run(args); @@ -35,7 +36,8 @@ public static void main(String[] args) { static void printUsage(PrintStream out) { out.println("Usage: java -jar vortex.jar [args]"); - out.println(" inspect print file structure"); + out.println(" inspect print file structure; url is http(s)://"); + out.println(" tui open interactive inspector; url is http(s)://"); out.println(" export write CSV to stdout"); out.println(" import [out.vortex] convert CSV or Parquet to Vortex"); out.println(" schema print dtype (machine-readable)"); diff --git a/core/src/main/java/io/github/dfa1/vortex/core/DType.java b/core/src/main/java/io/github/dfa1/vortex/core/DType.java index d1113e4e..d4133cae 100644 --- a/core/src/main/java/io/github/dfa1/vortex/core/DType.java +++ b/core/src/main/java/io/github/dfa1/vortex/core/DType.java @@ -132,6 +132,16 @@ record Extension( ByteBuffer metadata, boolean nullable ) implements DType { + + /// Returns the closed-world classification of this extension's id. + /// Pattern-match exhaustively: known ids resolve to the matching + /// record, anything else lands in {@link io.github.dfa1.vortex.core.Extension.Custom}. + /// + /// @return the {@link io.github.dfa1.vortex.core.Extension} record + /// for this extension's id + public io.github.dfa1.vortex.core.Extension kind() { + return io.github.dfa1.vortex.core.Extension.of(extensionId); + } } /// Variant logical type for semi-structured data (analogous to Parquet variant / JSON). diff --git a/core/src/main/java/io/github/dfa1/vortex/core/Extension.java b/core/src/main/java/io/github/dfa1/vortex/core/Extension.java new file mode 100644 index 00000000..73ce1b83 --- /dev/null +++ b/core/src/main/java/io/github/dfa1/vortex/core/Extension.java @@ -0,0 +1,333 @@ +package io.github.dfa1.vortex.core; + +import io.github.dfa1.vortex.core.array.Array; +import io.github.dfa1.vortex.core.array.ByteArray; +import io.github.dfa1.vortex.core.array.FixedSizeListArray; +import io.github.dfa1.vortex.core.array.IntArray; +import io.github.dfa1.vortex.core.array.LongArray; +import io.github.dfa1.vortex.core.array.MaskedArray; +import io.github.dfa1.vortex.core.array.ShortArray; +import io.github.dfa1.vortex.encoding.TimeUnit; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalTime; +import java.time.ZoneId; +import java.time.ZoneOffset; +import java.time.ZonedDateTime; +import java.util.Optional; + +/// Sealed hierarchy of Vortex extension dtypes — closed-world view of the +/// four spec-defined extensions ({@code vortex.date}, {@code vortex.time}, +/// {@code vortex.timestamp}, {@code vortex.uuid}) plus a {@link Custom} +/// fallback record carrying any other id. +/// +///

Mirrors the {@link io.github.dfa1.vortex.encoding.Encoding} / +/// {@link io.github.dfa1.vortex.encoding.EncodingId} pairing in spirit but +/// merges the kind classification with the typed decode behaviour: each +/// record exposes its own statically-typed decode methods rather than a +/// single {@code Object decode(...)} contract that callers would have to +/// downcast. Pattern-match exhaustively to dispatch: +/// +/// ```java +/// switch (ext.kind()) { +/// case Extension.Date d -> d.decode(storage, i); // LocalDate +/// case Extension.Time t -> t.decode(ext, storage, i); // LocalTime +/// case Extension.Timestamp ts -> ts.instant(ext, storage, i); // Instant +/// case Extension.Uuid u -> u.decode(storage, i); // UUID +/// case Extension.Custom c -> renderPlaceholder(c.id()); +/// } +/// ``` +/// +///

{@link DType.Extension} carries the wire-format id as a {@code String} +/// so unknown ids round-trip without loss; {@link #of(String)} translates to +/// the matching record. +public sealed interface Extension { + + /// Singleton for {@link Date}. + Date DATE = new Date(); + /// Singleton for {@link Time}. + Time TIME = new Time(); + /// Singleton for {@link Timestamp}. + Timestamp TIMESTAMP = new Timestamp(); + /// Singleton for {@link Uuid}. + Uuid UUID = new Uuid(); + + /// Returns the wire-format id string. + /// + /// @return canonical extension id + String id(); + + /// Resolves a wire-format id string to its {@link Extension} record. + /// Unknown ids land in {@link Custom}. + /// + /// @param id raw extension id from the file footer + /// @return matching record, or {@link Custom} when {@code id} isn't recognised + static Extension of(String id) { + return switch (id) { + case Date.ID -> DATE; + case Time.ID -> TIME; + case Timestamp.ID -> TIMESTAMP; + case Uuid.ID -> UUID; + default -> new Custom(id); + }; + } + + /// {@code vortex.date} — days (any signed integer width) since the + /// Unix epoch. Per Arrow's canonical Date type. + final class Date implements Extension { + /// Wire id. + public static final String ID = "vortex.date"; + + private Date() { + } + + @Override public String id() { + return ID; + } + + /// Decodes the date cell at row {@code i}. + /// + /// @param storage signed-integer storage (Byte/Short/Int/Long, possibly Masked) + /// @param i row index, {@code 0 <= i < storage.length()} + /// @return decoded date + /// @throws VortexException if storage isn't an integer primitive + public LocalDate decode(Array storage, long i) { + checkBounds(i, storage.length()); + return LocalDate.ofEpochDay(epochInteger(storage, i)); + } + } + + /// {@code vortex.time} — sub-day count in the {@link TimeUnit} recorded + /// in {@code ext.metadata()} byte 0. + final class Time implements Extension { + /// Wire id. + public static final String ID = "vortex.time"; + + private Time() { + } + + @Override public String id() { + return ID; + } + + /// Decodes the time-of-day cell at row {@code i}. + /// + /// @param ext declared extension dtype carrying the {@link TimeUnit} byte + /// @param storage signed-integer storage (I32 for s/ms, I64 for μs/ns) + /// @param i row index, {@code 0 <= i < storage.length()} + /// @return decoded local time + /// @throws VortexException if the metadata unit is {@link TimeUnit#Days} + /// or storage isn't an integer primitive + public LocalTime decode(DType.Extension ext, Array storage, long i) { + checkBounds(i, storage.length()); + TimeUnit unit = readUnit(ext); + if (unit == TimeUnit.Days) { + throw new VortexException("Time.decode: Days unit not valid for vortex.time"); + } + long raw = epochInteger(storage, i); + long nanos = raw * (1_000_000_000L / unit.divisor()); + return LocalTime.ofNanoOfDay(nanos); + } + + /// Returns the {@link TimeUnit} recorded in the extension metadata. + /// + /// @param ext extension dtype + /// @return decoded time unit + public TimeUnit unit(DType.Extension ext) { + return readUnit(ext); + } + } + + /// {@code vortex.timestamp} — I64 epoch count plus optional IANA timezone. + /// Metadata layout: {@code byte[0] = TimeUnit tag, bytes[1..3] = tz_len + /// (u16 LE), bytes[3..3+tz_len] = tz UTF-8}. + final class Timestamp implements Extension { + /// Wire id. + public static final String ID = "vortex.timestamp"; + + private Timestamp() { + } + + @Override public String id() { + return ID; + } + + /// Decodes the timestamp cell at row {@code i} to an {@link Instant}, + /// ignoring any timezone the metadata carries. + /// + /// @param ext declared extension dtype + /// @param storage signed-integer storage array + /// @param i row index, {@code 0 <= i < storage.length()} + /// @return decoded instant + /// @throws VortexException if the metadata unit is {@link TimeUnit#Days} + /// or storage isn't an integer primitive + public Instant instant(DType.Extension ext, Array storage, long i) { + checkBounds(i, storage.length()); + TimeUnit unit = readUnit(ext); + if (unit == TimeUnit.Days) { + throw new VortexException("Timestamp.instant: Days unit not valid"); + } + return instantFromRaw(epochInteger(storage, i), unit); + } + + /// Decodes the timestamp cell at row {@code i} to a {@link ZonedDateTime} + /// using the timezone from the metadata, defaulting to UTC when absent. + /// + /// @param ext declared extension dtype + /// @param storage signed-integer storage array + /// @param i row index, {@code 0 <= i < storage.length()} + /// @return decoded zoned date-time + public ZonedDateTime zonedDateTime(DType.Extension ext, Array storage, long i) { + return instant(ext, storage, i).atZone(timezone(ext).orElse(ZoneOffset.UTC)); + } + + /// Returns the IANA timezone string recorded in the extension metadata. + /// + /// @param ext declared extension dtype + /// @return parsed zone id, or empty when {@code tz_len == 0} + /// @throws VortexException if the metadata is truncated mid-string + public Optional timezone(DType.Extension ext) { + ByteBuffer meta = ext.metadata(); + if (meta == null || meta.remaining() < 3) { + return Optional.empty(); + } + ByteBuffer le = meta.duplicate().order(ByteOrder.LITTLE_ENDIAN); + int basePos = le.position(); + int tzLen = Short.toUnsignedInt(le.getShort(basePos + 1)); + if (tzLen == 0) { + return Optional.empty(); + } + if (le.remaining() < 3 + tzLen) { + throw new VortexException("timestamp metadata truncated: declared tz_len=" + + tzLen + " but only " + (le.remaining() - 3) + " bytes available"); + } + byte[] tzBytes = new byte[tzLen]; + for (int k = 0; k < tzLen; k++) { + tzBytes[k] = le.get(basePos + 3 + k); + } + return Optional.of(ZoneId.of(new String(tzBytes, StandardCharsets.UTF_8))); + } + + /// Returns the {@link TimeUnit} recorded in the extension metadata. + /// + /// @param ext extension dtype + /// @return decoded time unit + public TimeUnit unit(DType.Extension ext) { + return readUnit(ext); + } + } + + /// {@code vortex.uuid} — 16-byte UUID stored as + /// {@code FixedSizeList(Primitive(U8), 16)}. + final class Uuid implements Extension { + /// Wire id. + public static final String ID = "vortex.uuid"; + + private Uuid() { + } + + @Override public String id() { + return ID; + } + + /// Decodes the UUID cell at row {@code i}. + /// + /// @param storage UUID storage array + /// @param i row index, {@code 0 <= i < storage.length()} + /// @return decoded {@link java.util.UUID} + /// @throws VortexException if storage isn't a {@code FixedSizeListArray} + /// of size 16 + public java.util.UUID decode(Array storage, long i) { + checkBounds(i, storage.length()); + if (!(storage instanceof FixedSizeListArray fsl)) { + throw new VortexException("Uuid.decode: expected FixedSizeListArray, got " + + storage.getClass().getSimpleName()); + } + if (fsl.fixedSize() != 16) { + throw new VortexException("Uuid.decode: expected fixedSize 16, got " + fsl.fixedSize()); + } + if (!(fsl.elements() instanceof ByteArray bytes)) { + throw new VortexException("Uuid.decode: expected ByteArray elements, got " + + fsl.elements().getClass().getSimpleName()); + } + long base = i * 16; + long msb = 0L; + long lsb = 0L; + for (int k = 0; k < 8; k++) { + msb = (msb << 8) | (bytes.getByte(base + k) & 0xffL); + } + for (int k = 0; k < 8; k++) { + lsb = (lsb << 8) | (bytes.getByte(base + 8 + k) & 0xffL); + } + return new java.util.UUID(msb, lsb); + } + } + + /// Open-world escape hatch for any extension id Vortex-java doesn't + /// know about. Pattern-match branches that need to render or decode an + /// unknown extension read its raw id via {@link #id()}. + /// + /// @param id raw extension id string + record Custom(String id) implements Extension { + } + + // ── Shared helpers ──────────────────────────────────────────────────── + + /// Reads a signed integer from any of the integer primitive arrays as + /// {@code long}. Recurses through {@link MaskedArray}; throws on null + /// cells so callers don't silently get garbage for nullable columns. + private static long epochInteger(Array storage, long i) { + return switch (storage) { + case ByteArray a -> a.getByte(i); + case ShortArray a -> a.getShort(i); + case IntArray a -> a.getInt(i); + case LongArray a -> a.getLong(i); + case MaskedArray a -> { + if (!a.isValid(i)) { + throw new VortexException("null cell at index " + i); + } + yield epochInteger(a.inner(), i); + } + default -> throw new VortexException( + "unsupported storage type " + storage.getClass().getSimpleName()); + }; + } + + /// Reads the {@link TimeUnit} metadata byte at the buffer's current + /// position; throws if the buffer is null or empty. + private static TimeUnit readUnit(DType.Extension ext) { + ByteBuffer meta = ext.metadata(); + if (meta == null || !meta.hasRemaining()) { + throw new VortexException("missing TimeUnit metadata byte for " + ext.extensionId()); + } + return TimeUnit.fromTag(meta.get(meta.position())); + } + + private static Instant instantFromRaw(long raw, TimeUnit unit) { + return switch (unit) { + case Seconds -> Instant.ofEpochSecond(raw); + case Milliseconds -> Instant.ofEpochMilli(raw); + case Microseconds -> { + long secs = Math.floorDiv(raw, 1_000_000L); + long nanos = Math.floorMod(raw, 1_000_000L) * 1_000L; + yield Instant.ofEpochSecond(secs, nanos); + } + case Nanoseconds -> { + long secs = Math.floorDiv(raw, 1_000_000_000L); + long nanos = Math.floorMod(raw, 1_000_000_000L); + yield Instant.ofEpochSecond(secs, nanos); + } + case Days -> throw new VortexException("Days unit not valid for instant"); + }; + } + + private static void checkBounds(long i, long length) { + if (i < 0 || i >= length) { + throw new IndexOutOfBoundsException("index " + i + " out of bounds for length " + length); + } + } +} diff --git a/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java b/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java index d319637c..9147f8be 100644 --- a/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java +++ b/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java @@ -1,8 +1,13 @@ package io.github.dfa1.vortex.core.array; import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.VortexException; import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.math.BigDecimal; +import java.math.BigInteger; +import java.nio.ByteOrder; /// Fallback [Array] for dtypes that lack a dedicated concrete subtype. /// @@ -48,10 +53,153 @@ public long length() { return length; } + /// Returns a view of this array clamped to {@code newLength} logical rows. + /// Buffers and children are reused as-is; callers are expected to respect + /// {@link #length()} when reading. Used by the scan iterator to honour + /// {@code ScanOptions.limit} for dtypes that don't have a typed array. + /// + /// @param newLength desired logical length; must be {@code <= length()} + /// @return a new {@code GenericArray} sharing this array's buffers and children + /// @throws IllegalArgumentException if {@code newLength} exceeds the current length + public GenericArray withLength(long newLength) { + if (newLength < 0 || newLength > length) { + throw new IllegalArgumentException( + "newLength " + newLength + " out of range [0," + length + "]"); + } + if (newLength == length) { + return this; + } + return new GenericArray(dtype, newLength, buffers, children); + } + MemorySegment buffer(int i) { return buffers[i]; } + /// Returns the number of raw memory buffers backing this array. + /// + /// @return buffer count + public int bufferCount() { + return buffers.length; + } + + /// Returns the raw buffer at position {@code i}. Used by callers that need + /// to inspect encoded bytes when no typed accessor exists for the dtype + /// (e.g. the TUI inspector decoding {@code Decimal} cells). + /// + /// @param i buffer index + /// @return the underlying {@link MemorySegment} + public MemorySegment bufferAt(int i) { + return buffers[i]; + } + + /// Returns the number of child arrays. + /// + /// @return child count + public int childCount() { + return children.length; + } + + /// Decodes the decimal value at row {@code i}. + /// + /// Handles the two shapes produced by Vortex decimal decoders: + /// + /// - **single-buffer**: one raw buffer of little-endian two's-complement + /// integers (one element per row). Element width is derived from the + /// buffer's byte size divided by {@link #length()}, not from the + /// dtype's precision — {@code vortex.decimal} writes whatever width + /// the encoder chose in its {@code valuesType} metadata, which can be + /// narrower than the precision alone would allow. + /// - **child-array**: zero buffers, one child holding the most-significant + /// integer part as a {@link LongArray}, {@link IntArray}, {@link ShortArray}, + /// or {@link ByteArray}. Produced by {@code vortex.decimal_byte_parts} + /// when {@code lower_part_count == 0}. + /// + /// @param i row index, {@code 0 <= i < length()} + /// @return decoded value as a {@link BigDecimal} with the dtype's scale + /// @throws VortexException if the dtype isn't decimal or the array + /// shape doesn't match either supported layout + /// @throws IndexOutOfBoundsException if {@code i} is outside {@code [0, length())} + public BigDecimal getDecimal(long i) { + if (i < 0 || i >= length) { + throw new IndexOutOfBoundsException("index " + i + " out of bounds for length " + length); + } + if (!(dtype instanceof DType.Decimal d)) { + throw new VortexException("getDecimal called on non-decimal dtype: " + dtype); + } + BigInteger mantissa; + if (buffers.length == 1 && children.length == 0) { + mantissa = readSingleBufferMantissa(buffers[0], length, i); + } else if (buffers.length == 0 && children.length == 1) { + mantissa = mantissaFromChild(children[0], i); + } else { + throw new VortexException("getDecimal: unsupported decimal shape buffers=" + + buffers.length + " children=" + children.length); + } + return new BigDecimal(mantissa, d.scale()); + } + + private static BigInteger readSingleBufferMantissa(MemorySegment buf, long length, long i) { + long bufBytes = buf.byteSize(); + if (length == 0 || bufBytes % length != 0) { + throw new VortexException("getDecimal: buffer size " + bufBytes + + " is not a multiple of length " + length); + } + int width = (int) (bufBytes / length); + if (width != 1 && width != 2 && width != 4 && width != 8 && width != 16) { + throw new VortexException("getDecimal: unsupported element width " + width + " bytes"); + } + return readSignedLe(buf, i * width, width); + } + + private static BigInteger mantissaFromChild(Array child, long i) { + return switch (child) { + case LongArray a -> BigInteger.valueOf(a.getLong(i)); + case IntArray a -> BigInteger.valueOf(a.getInt(i)); + case ShortArray a -> BigInteger.valueOf(a.getShort(i)); + case ByteArray a -> BigInteger.valueOf(a.getByte(i)); + case MaskedArray a -> { + if (!a.isValid(i)) { + throw new VortexException("getDecimal: null cell at index " + i); + } + yield mantissaFromChild(a.inner(), i); + } + default -> + throw new VortexException("getDecimal: unsupported mantissa child type " + + child.getClass().getSimpleName()); + }; + } + + private static final ValueLayout.OfShort SHORT_LE = + ValueLayout.JAVA_SHORT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN); + private static final ValueLayout.OfInt INT_LE = + ValueLayout.JAVA_INT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN); + private static final ValueLayout.OfLong LONG_LE = + ValueLayout.JAVA_LONG_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN); + + private static BigInteger readSignedLe(MemorySegment buf, long offset, int width) { + return switch (width) { + case 1 -> BigInteger.valueOf(buf.get(ValueLayout.JAVA_BYTE, offset)); + case 2 -> BigInteger.valueOf(buf.get(SHORT_LE, offset)); + case 4 -> BigInteger.valueOf(buf.get(INT_LE, offset)); + case 8 -> BigInteger.valueOf(buf.get(LONG_LE, offset)); + case 16 -> readSigned128Le(buf, offset); + default -> throw new VortexException("readSignedLe: unsupported width " + width); + }; + } + + private static BigInteger readSigned128Le(MemorySegment buf, long offset) { + // Two's-complement i128 on disk in little-endian; BigInteger ingests big-endian. + // No SIMD intrinsic for 16-byte signed integer, so we materialise into a heap + // buffer here. Only fires for decimal(>18, _) — narrow-precision fast paths above + // stay allocation-free. + byte[] be = new byte[16]; + for (int k = 0; k < 16; k++) { + be[15 - k] = buf.get(ValueLayout.JAVA_BYTE, offset + k); + } + return new BigInteger(be); + } + /// Returns the child array at position {@code i}. /// /// @param i child index diff --git a/core/src/test/java/io/github/dfa1/vortex/core/ExtensionTest.java b/core/src/test/java/io/github/dfa1/vortex/core/ExtensionTest.java new file mode 100644 index 00000000..ad684466 --- /dev/null +++ b/core/src/test/java/io/github/dfa1/vortex/core/ExtensionTest.java @@ -0,0 +1,285 @@ +package io.github.dfa1.vortex.core; + +import io.github.dfa1.vortex.core.array.ByteArray; +import io.github.dfa1.vortex.core.array.FixedSizeListArray; +import io.github.dfa1.vortex.core.array.IntArray; +import io.github.dfa1.vortex.core.array.LongArray; +import org.junit.jupiter.api.Test; + +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalTime; +import java.time.ZoneId; +import java.time.ZoneOffset; +import java.time.ZonedDateTime; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +class ExtensionTest { + + private static final DType.Primitive I32 = new DType.Primitive(PType.I32, false); + private static final DType.Primitive I64 = new DType.Primitive(PType.I64, false); + private static final DType.Primitive U8 = new DType.Primitive(PType.U8, false); + + @Test + void of_recognisedIds_returnSingletons() { + // Given / When / Then — known ids resolve to the cached singletons so + // identity comparison and pattern-match cases work without per-call alloc + assertThat(Extension.of("vortex.date")).isSameAs(Extension.DATE); + assertThat(Extension.of("vortex.time")).isSameAs(Extension.TIME); + assertThat(Extension.of("vortex.timestamp")).isSameAs(Extension.TIMESTAMP); + assertThat(Extension.of("vortex.uuid")).isSameAs(Extension.UUID); + } + + @Test + void of_unknownId_returnsCustomWithRawString() { + // Given — open-world fallback; the id must round-trip verbatim so callers + // can still apply their own decoding for non-spec extensions + Extension sut = Extension.of("acme.geopoint"); + + // Then + assertThat(sut).isInstanceOf(Extension.Custom.class); + assertThat(sut.id()).isEqualTo("acme.geopoint"); + } + + @Test + void kind_onDTypeExtension_dispatchesViaPatternMatch() { + // Given — practical sealed-switch usage that motivates the redesign + DType.Extension date = ext("vortex.date", I32, null); + DType.Extension custom = ext("acme.thing", I32, null); + + // When / Then + assertThat(classify(date)).isEqualTo("date"); + assertThat(classify(custom)).isEqualTo("custom:acme.thing"); + } + + private static String classify(DType.Extension ext) { + return switch (ext.kind()) { + case Extension.Date d -> "date"; + case Extension.Time t -> "time"; + case Extension.Timestamp ts -> "timestamp"; + case Extension.Uuid u -> "uuid"; + case Extension.Custom c -> "custom:" + c.id(); + }; + } + + @Test + void date_decodes_tpchSample() { + // Given — anchor against known TPC-H value 9538 = 1996-02-12 + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(4); + buf.set(ValueLayout.JAVA_INT_UNALIGNED, 0, 9538); + IntArray storage = new IntArray(I32, 1, buf); + + // When / Then + assertThat(Extension.DATE.decode(storage, 0)).isEqualTo(LocalDate.of(1996, 2, 12)); + } + } + + @Test + void date_negativeDays_returnsPreEpoch() { + // Given — defensive: signed storage, pre-1970 must work + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(4); + buf.set(ValueLayout.JAVA_INT_UNALIGNED, 0, -1); + IntArray storage = new IntArray(I32, 1, buf); + + // When / Then + assertThat(Extension.DATE.decode(storage, 0)).isEqualTo(LocalDate.of(1969, 12, 31)); + } + } + + @Test + void time_eachUnit_decodesCorrectly() { + // Given — round-trip a known time-of-day through every TimeUnit + try (Arena arena = Arena.ofConfined()) { + // Seconds: 3661 s = 01:01:01 + assertThat(Extension.TIME.decode(ext("vortex.time", I32, unitByte((byte) 3)), + i32(arena, 3661), 0)) + .isEqualTo(LocalTime.of(1, 1, 1)); + // Milliseconds: 3_661_500 = 01:01:01.500 + assertThat(Extension.TIME.decode(ext("vortex.time", I32, unitByte((byte) 2)), + i32(arena, 3_661_500), 0)) + .isEqualTo(LocalTime.of(1, 1, 1, 500_000_000)); + // Microseconds: 1_000_001 = 00:00:01.000001 + assertThat(Extension.TIME.decode(ext("vortex.time", I64, unitByte((byte) 1)), + i64(arena, 1_000_001L), 0)) + .isEqualTo(LocalTime.of(0, 0, 1, 1_000)); + // Nanoseconds: 42 ns past midnight + assertThat(Extension.TIME.decode(ext("vortex.time", I64, unitByte((byte) 0)), + i64(arena, 42L), 0)) + .isEqualTo(LocalTime.ofNanoOfDay(42)); + } + } + + @Test + void time_daysUnit_throws() { + // Given — Days isn't a sub-second unit + try (Arena arena = Arena.ofConfined()) { + DType.Extension ext = ext("vortex.time", I32, unitByte((byte) 4)); + + // When / Then + assertThatThrownBy(() -> Extension.TIME.decode(ext, i32(arena, 0), 0)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("Days unit not valid"); + } + } + + @Test + void timestamp_instant_microsecondsPath_handlesNegativeRaw() { + // Given — pre-epoch micros exercise the floorDiv / floorMod path + long micros = -1_500_001L; // -1.500001s + try (Arena arena = Arena.ofConfined()) { + DType.Extension ext = ext("vortex.timestamp", I64, tzMeta((byte) 1, null)); + + // When + Instant got = Extension.TIMESTAMP.instant(ext, i64(arena, micros), 0); + + // Then + assertThat(got.getEpochSecond()).isEqualTo(-2L); + assertThat(got.getNano()).isEqualTo(499_999_000); + } + } + + @Test + void timestamp_zonedDateTime_withTimezone_appliesIt() { + // Given — ms since epoch + Europe/Paris tz in metadata + try (Arena arena = Arena.ofConfined()) { + DType.Extension ext = ext("vortex.timestamp", I64, tzMeta((byte) 2, "Europe/Paris")); + + // When + ZonedDateTime got = Extension.TIMESTAMP.zonedDateTime(ext, i64(arena, 1_000L), 0); + + // Then + assertThat(got.getZone()).isEqualTo(ZoneId.of("Europe/Paris")); + assertThat(got.toInstant()).isEqualTo(Instant.ofEpochMilli(1_000L)); + } + } + + @Test + void timestamp_zonedDateTime_noTimezone_defaultsToUtc() { + // Given — tz_len = 0 should fall back to UTC for unambiguity + try (Arena arena = Arena.ofConfined()) { + DType.Extension ext = ext("vortex.timestamp", I64, tzMeta((byte) 2, null)); + + // When + ZonedDateTime got = Extension.TIMESTAMP.zonedDateTime(ext, i64(arena, 0L), 0); + + // Then + assertThat(got.getZone()).isEqualTo(ZoneOffset.UTC); + } + } + + @Test + void timestamp_timezone_truncatedMetadata_throws() { + // Given — declared tz_len longer than buffer can carry + ByteBuffer meta = ByteBuffer.allocate(6).order(ByteOrder.LITTLE_ENDIAN); + meta.put(0, (byte) 2); + meta.putShort(1, (short) 5); + meta.put(3, (byte) 'U'); + meta.put(4, (byte) 'T'); + meta.put(5, (byte) 'C'); + DType.Extension truncated = ext("vortex.timestamp", I64, meta); + + // When / Then + assertThatThrownBy(() -> Extension.TIMESTAMP.timezone(truncated)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("truncated"); + } + + @Test + void uuid_roundTripsKnownValue() { + // Given — RFC 9562 example + java.util.UUID expected = java.util.UUID.fromString("123e4567-e89b-12d3-a456-426614174000"); + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(16); + long msb = expected.getMostSignificantBits(); + long lsb = expected.getLeastSignificantBits(); + for (int k = 0; k < 8; k++) { + buf.set(ValueLayout.JAVA_BYTE, k, (byte) ((msb >> (56 - 8 * k)) & 0xff)); + buf.set(ValueLayout.JAVA_BYTE, 8 + k, (byte) ((lsb >> (56 - 8 * k)) & 0xff)); + } + ByteArray inner = new ByteArray(U8, 16, buf); + FixedSizeListArray storage = new FixedSizeListArray( + new DType.FixedSizeList(U8, 16, false), 1, inner); + + // When / Then + assertThat(Extension.UUID.decode(storage, 0)).isEqualTo(expected); + } + } + + @Test + void uuid_allOnes_decodesWithoutSignExtension() { + // Given — 0xff in every byte trips sign-extension bugs in the mask + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(16); + for (int k = 0; k < 16; k++) { + buf.set(ValueLayout.JAVA_BYTE, k, (byte) 0xff); + } + ByteArray inner = new ByteArray(U8, 16, buf); + FixedSizeListArray storage = new FixedSizeListArray( + new DType.FixedSizeList(U8, 16, false), 1, inner); + + // When / Then + assertThat(Extension.UUID.decode(storage, 0)) + .isEqualTo(new java.util.UUID(-1L, -1L)); + } + } + + @Test + void uuid_wrongFixedSize_throws() { + // Given — 8 != 16; reject up front + try (Arena arena = Arena.ofConfined()) { + ByteArray inner = new ByteArray(U8, 8, arena.allocate(8)); + FixedSizeListArray storage = new FixedSizeListArray( + new DType.FixedSizeList(U8, 8, false), 1, inner); + + // When / Then + assertThatThrownBy(() -> Extension.UUID.decode(storage, 0)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("fixedSize 16"); + } + } + + // ── helpers ────────────────────────────────────────────────────────── + + private static DType.Extension ext(String id, DType storage, ByteBuffer meta) { + return new DType.Extension(id, storage, meta, false); + } + + private static ByteBuffer unitByte(byte tag) { + ByteBuffer meta = ByteBuffer.allocate(1); + meta.put(0, tag); + return meta; + } + + private static ByteBuffer tzMeta(byte unitTag, String tz) { + byte[] tzBytes = tz == null ? new byte[0] : tz.getBytes(StandardCharsets.UTF_8); + ByteBuffer meta = ByteBuffer.allocate(3 + tzBytes.length).order(ByteOrder.LITTLE_ENDIAN); + meta.put(0, unitTag); + meta.putShort(1, (short) tzBytes.length); + for (int k = 0; k < tzBytes.length; k++) { + meta.put(3 + k, tzBytes[k]); + } + return meta; + } + + private static IntArray i32(Arena arena, int value) { + MemorySegment buf = arena.allocate(4); + buf.set(ValueLayout.JAVA_INT_UNALIGNED, 0, value); + return new IntArray(I32, 1, buf); + } + + private static LongArray i64(Arena arena, long value) { + MemorySegment buf = arena.allocate(8); + buf.set(ValueLayout.JAVA_LONG_UNALIGNED, 0, value); + return new LongArray(I64, 1, buf); + } +} diff --git a/core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java b/core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java new file mode 100644 index 00000000..f3f56a0a --- /dev/null +++ b/core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java @@ -0,0 +1,284 @@ +package io.github.dfa1.vortex.core.array; + +import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.PType; +import io.github.dfa1.vortex.core.VortexException; +import org.junit.jupiter.api.Test; + +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.math.BigDecimal; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +class GenericArrayTest { + + private static final DType DTYPE = new DType.Primitive(PType.I64, false); + + @Test + void withLength_shorterLength_returnsClampedView() { + // Given — full-size array of 10 elements + try (Arena arena = Arena.ofConfined()) { + MemorySegment seg = arena.allocate(80); + GenericArray sut = new GenericArray(DTYPE, 10, seg); + + // When + GenericArray clamped = sut.withLength(4); + + // Then — length reflects new bound; buffer is reused (no copy) + assertThat(clamped.length()).isEqualTo(4); + assertThat(clamped.dtype()).isEqualTo(DTYPE); + } + } + + @Test + void withLength_sameLength_returnsSameInstance() { + // Given + try (Arena arena = Arena.ofConfined()) { + GenericArray sut = new GenericArray(DTYPE, 10, arena.allocate(80)); + + // When / Then — no-op short-circuits to avoid wrapper allocation + assertThat(sut.withLength(10)).isSameAs(sut); + } + } + + @Test + void withLength_zero_returnsEmptyView() { + // Given — boundary case: truncating to zero must still produce a valid + // GenericArray (length() == 0) rather than throw + try (Arena arena = Arena.ofConfined()) { + GenericArray sut = new GenericArray(DTYPE, 5, arena.allocate(40)); + + // When + GenericArray clamped = sut.withLength(0); + + // Then + assertThat(clamped.length()).isZero(); + } + } + + @Test + void withLength_greaterThanCurrent_throws() { + // Given — protects against silently extending past the backing buffer + try (Arena arena = Arena.ofConfined()) { + GenericArray sut = new GenericArray(DTYPE, 3, arena.allocate(24)); + + // When / Then + assertThatThrownBy(() -> sut.withLength(4)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("out of range"); + } + } + + @Test + void withLength_negative_throws() { + // Given + try (Arena arena = Arena.ofConfined()) { + GenericArray sut = new GenericArray(DTYPE, 3, arena.allocate(24)); + + // When / Then + assertThatThrownBy(() -> sut.withLength(-1)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("out of range"); + } + } + + @Test + void getDecimal_i64Buffer_decodesMantissaScaledByDtype() { + // Given — decimal(15,2): precision 15 → 8-byte (I64) mantissa; values + // 1234 / -50 / 0 should render as 12.34 / -0.50 / 0.00. + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(24); + buf.set(ValueLayout.JAVA_LONG_UNALIGNED, 0, 1234L); + buf.set(ValueLayout.JAVA_LONG_UNALIGNED, 8, -50L); + buf.set(ValueLayout.JAVA_LONG_UNALIGNED, 16, 0L); + DType.Decimal dec = new DType.Decimal((byte) 15, (byte) 2, false); + GenericArray sut = new GenericArray(dec, 3, buf); + + // When / Then + assertThat(sut.getDecimal(0)).isEqualByComparingTo(new BigDecimal("12.34")); + assertThat(sut.getDecimal(1)).isEqualByComparingTo(new BigDecimal("-0.50")); + assertThat(sut.getDecimal(2)).isEqualByComparingTo(BigDecimal.ZERO); + } + } + + @Test + void getDecimal_smallPrecisionUsesNarrowerBuffer() { + // Given — decimal(4,1): precision 4 → 2-byte (I16) mantissa + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(4); + buf.set(ValueLayout.JAVA_SHORT_UNALIGNED, 0, (short) 99); + buf.set(ValueLayout.JAVA_SHORT_UNALIGNED, 2, (short) -1); + DType.Decimal dec = new DType.Decimal((byte) 4, (byte) 1, false); + GenericArray sut = new GenericArray(dec, 2, buf); + + // When / Then — 99 / 10 = 9.9; -1 / 10 = -0.1 (signed extension matters) + assertThat(sut.getDecimal(0)).isEqualByComparingTo(new BigDecimal("9.9")); + assertThat(sut.getDecimal(1)).isEqualByComparingTo(new BigDecimal("-0.1")); + } + } + + @Test + void getDecimal_childArrayShape_decodesViaMostSignificantPart() { + // Given — the shape vortex.decimal_byte_parts decoders produce when + // lower_part_count == 0: zero buffers, one LongArray child carrying + // the i64 mantissa. + try (Arena arena = Arena.ofConfined()) { + MemorySegment mspBuf = arena.allocate(24); + mspBuf.set(ValueLayout.JAVA_LONG_UNALIGNED, 0, 4321L); + mspBuf.set(ValueLayout.JAVA_LONG_UNALIGNED, 8, -100L); + mspBuf.set(ValueLayout.JAVA_LONG_UNALIGNED, 16, 0L); + LongArray msp = new LongArray(new DType.Primitive(PType.I64, false), 3, mspBuf); + DType.Decimal dec = new DType.Decimal((byte) 15, (byte) 2, false); + GenericArray sut = new GenericArray(dec, 3, new MemorySegment[0], new Array[]{msp}); + + // When / Then + assertThat(sut.getDecimal(0)).isEqualByComparingTo(new BigDecimal("43.21")); + assertThat(sut.getDecimal(1)).isEqualByComparingTo(new BigDecimal("-1.00")); + assertThat(sut.getDecimal(2)).isEqualByComparingTo(BigDecimal.ZERO); + } + } + + @Test + void getDecimal_i128Buffer_decodesWideMantissa() { + // Given — decimal(38,4) stores mantissas wider than i64; vortex.decimal + // writes 16-byte little-endian two's-complement. Two values: 2^70 (way + // above I64.MAX) and -2^70 anchor the high-precision path the + // narrower-width tests never exercise. + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(32); + java.math.BigInteger pos = java.math.BigInteger.TWO.pow(70); + java.math.BigInteger neg = pos.negate(); + writeI128Le(buf, 0, pos); + writeI128Le(buf, 16, neg); + DType.Decimal dec = new DType.Decimal((byte) 38, (byte) 4, false); + GenericArray sut = new GenericArray(dec, 2, buf); + + // When / Then + assertThat(sut.getDecimal(0)).isEqualByComparingTo(new BigDecimal(pos, 4)); + assertThat(sut.getDecimal(1)).isEqualByComparingTo(new BigDecimal(neg, 4)); + } + } + + private static void writeI128Le(MemorySegment buf, long offset, java.math.BigInteger value) { + // BigInteger.toByteArray() returns minimum-length big-endian two's-complement. + // Pad / sign-extend to 16 bytes, then reverse into the little-endian wire slot. + byte[] be = value.toByteArray(); + byte[] le16 = new byte[16]; + // sign-extend pad in big-endian form + byte sign = (byte) (value.signum() < 0 ? 0xFF : 0x00); + for (int i = 0; i < 16; i++) { + le16[15 - i] = sign; + } + for (int i = 0; i < be.length && i < 16; i++) { + le16[i] = be[be.length - 1 - i]; + } + for (int i = 0; i < 16; i++) { + buf.set(ValueLayout.JAVA_BYTE, offset + i, le16[i]); + } + } + + @Test + void getDecimal_widthDerivedFromBufferNotPrecision() { + // Given — decimal(15,2) is precision 15 (≤18 → "should" be I64), but + // vortex.decimal stores at whatever valuesType the encoder picked. A + // narrower width fits if all values are small. The old precision-based + // table picked 8 bytes here and read garbage. The current impl derives + // width from buffer.byteSize / length, so storing 3 I32 values at the + // same precision 15 decodes correctly. + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(12); // 3 × 4 bytes (I32 mantissa) + buf.set(ValueLayout.JAVA_INT_UNALIGNED, 0, 1234); + buf.set(ValueLayout.JAVA_INT_UNALIGNED, 4, -50); + buf.set(ValueLayout.JAVA_INT_UNALIGNED, 8, 0); + DType.Decimal dec = new DType.Decimal((byte) 15, (byte) 2, false); + GenericArray sut = new GenericArray(dec, 3, buf); + + // When / Then + assertThat(sut.getDecimal(0)).isEqualByComparingTo(new BigDecimal("12.34")); + assertThat(sut.getDecimal(1)).isEqualByComparingTo(new BigDecimal("-0.50")); + assertThat(sut.getDecimal(2)).isEqualByComparingTo(BigDecimal.ZERO); + } + } + + @Test + void getDecimal_unalignedBufferSize_throws() { + // Given — buffer size not a clean multiple of length means we can't + // derive a sensible per-element width; fail fast rather than silently + // reading garbage from a half-element offset. + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(7); // not divisible by length=2 + DType.Decimal dec = new DType.Decimal((byte) 4, (byte) 0, false); + GenericArray sut = new GenericArray(dec, 2, buf); + + // When / Then + assertThatThrownBy(() -> sut.getDecimal(0)) + .isInstanceOf(io.github.dfa1.vortex.core.VortexException.class) + .hasMessageContaining("not a multiple"); + } + } + + @Test + void getDecimal_indexOutOfBounds_throws() { + // Given — explicit bounds check guards against silent garbage reads + // when callers don't respect length() + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(8); + DType.Decimal dec = new DType.Decimal((byte) 4, (byte) 0, false); + GenericArray sut = new GenericArray(dec, 1, buf); + + // When / Then + assertThatThrownBy(() -> sut.getDecimal(-1)) + .isInstanceOf(IndexOutOfBoundsException.class); + assertThatThrownBy(() -> sut.getDecimal(1)) + .isInstanceOf(IndexOutOfBoundsException.class) + .hasMessageContaining("out of bounds"); + assertThatThrownBy(() -> sut.getDecimal(Long.MAX_VALUE)) + .isInstanceOf(IndexOutOfBoundsException.class); + } + } + + @Test + void getDecimal_nullCellInMaskedChild_throws() { + // Given — mantissa-child shape with a MaskedArray wrapping a LongArray; + // the validity bitmap says index 1 is null. Without the validity check + // the previous code would happily decode whatever bytes sat at that + // slot and return a garbage BigDecimal. + try (Arena arena = Arena.ofConfined()) { + MemorySegment mspBuf = arena.allocate(16); + mspBuf.set(ValueLayout.JAVA_LONG_UNALIGNED, 0, 1234L); + mspBuf.set(ValueLayout.JAVA_LONG_UNALIGNED, 8, 9999L); + LongArray msp = new LongArray(new DType.Primitive(PType.I64, false), 2, mspBuf); + + MemorySegment validityBuf = arena.allocate(1); + // bit 0 set = index 0 valid; bit 1 clear = index 1 null + validityBuf.set(ValueLayout.JAVA_BYTE, 0, (byte) 0b0000_0001); + BoolArray validity = new BoolArray(new DType.Bool(false), 2, validityBuf); + + MaskedArray masked = new MaskedArray(msp, validity); + DType.Decimal dec = new DType.Decimal((byte) 15, (byte) 2, true); + GenericArray sut = new GenericArray(dec, 2, new MemorySegment[0], new Array[]{masked}); + + // When / Then + assertThat(sut.getDecimal(0)).isEqualByComparingTo(new BigDecimal("12.34")); + assertThatThrownBy(() -> sut.getDecimal(1)) + .isInstanceOf(io.github.dfa1.vortex.core.VortexException.class) + .hasMessageContaining("null cell at index 1"); + } + } + + @Test + void getDecimal_nonDecimalDtype_throws() { + // Given — guards against silently returning garbage on misuse + try (Arena arena = Arena.ofConfined()) { + GenericArray sut = new GenericArray(DTYPE, 1, arena.allocate(8)); + + // When / Then + assertThatThrownBy(() -> sut.getDecimal(0)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("non-decimal"); + } + } +} diff --git a/docs/compatibility.md b/docs/compatibility.md index 683879b4..aad65434 100644 --- a/docs/compatibility.md +++ b/docs/compatibility.md @@ -56,6 +56,50 @@ try (VortexReader vf = VortexReader.open(path, registry)) { } ``` +## Extension types + +Extension dtypes wrap a primitive storage array with a logical-id tag plus optional +metadata. The Rust catalogue lives in +[`vortex-array/src/extension/`](https://github.com/vortex-data/vortex/tree/develop/vortex-array/src/extension); +each subdir below names a canonical extension id and its on-disk shape. + +Extensions are exposed as a sealed `Extension` hierarchy. Each record carries +its own typed decode methods; pattern-match on `ext.kind()` to dispatch: + +```java +switch (ext.kind()) { + case Extension.Date d -> d.decode(storage, i); // LocalDate + case Extension.Time t -> t.decode(ext, storage, i); // LocalTime + case Extension.Timestamp ts -> ts.instant(ext, storage, i); // Instant + case Extension.Uuid u -> u.decode(storage, i); // UUID + case Extension.Custom c -> ... // any other id, raw String available +} +``` + +| Extension id | Record | Storage | Metadata | Status | +|---------------------|----------------------|-------------------------------------------------|-------------------------------------------|--------| +| `vortex.date` | `Extension.Date` | Signed integer days since 1970-01-01 | none | ✅ | +| `vortex.time` | `Extension.Time` | I32 (s/ms) or I64 (μs/ns) since midnight | 1 byte: `TimeUnit` | ✅ | +| `vortex.timestamp` | `Extension.Timestamp`| I64 epoch count in the recorded `TimeUnit` | unit byte + u16 LE tz_len + UTF-8 tz | ✅ | +| `vortex.uuid` | `Extension.Uuid` | `FixedSizeList(Primitive(U8), 16)` | none | ✅ | +| _custom ids_ | `Extension.Custom` | _whatever the column declares_ | _opaque bytes_ | passthrough | + +`TimeUnit` (see [`extension/datetime/unit.rs`](https://github.com/vortex-data/vortex/blob/develop/vortex-array/src/extension/datetime/unit.rs)) +encodes precision in the first metadata byte: + +| Value | Unit | +|-------|--------------| +| 0 | Nanoseconds | +| 1 | Microseconds | +| 2 | Milliseconds | +| 3 | Seconds | +| 4 | Days | + +For unsupported extension ids the inspector falls back to a placeholder cell +(`>`); the underlying storage array still decodes +correctly via the primitive accessors, callers just have to format the value +themselves. + ## S3 Fixture Status (v0.72.0) Cross-language round-trips tested against Rust-written fixture files hosted at diff --git a/inspector/pom.xml b/inspector/pom.xml new file mode 100644 index 00000000..e88d9212 --- /dev/null +++ b/inspector/pom.xml @@ -0,0 +1,49 @@ + + + 4.0.0 + + io.github.dfa1.vortex + vortex-java + 0.5.0-SNAPSHOT + + + vortex-inspector + + vortex-inspector + Inspector for the Vortex columnar file format. Produces a structural tree model of a file's + schema, layout, and encodings, plus text and Lanterna-based TUI renderers. + + + + + + io.github.dfa1.vortex + vortex-core + + + io.github.dfa1.vortex + vortex-reader + + + com.google.flatbuffers + flatbuffers-java + + + + + org.junit.jupiter + junit-jupiter + test + + + org.assertj + assertj-core + test + + + org.mockito + mockito-junit-jupiter + test + + + diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java new file mode 100644 index 00000000..ca4fae4d --- /dev/null +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java @@ -0,0 +1,281 @@ +package io.github.dfa1.vortex.inspect; + +import io.github.dfa1.vortex.core.ArrayStats; +import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.Footer; +import io.github.dfa1.vortex.core.Layout; +import io.github.dfa1.vortex.core.SegmentSpec; +import io.github.dfa1.vortex.fbs.Array; +import io.github.dfa1.vortex.fbs.ArrayNode; +import io.github.dfa1.vortex.io.VortexHandle; + +import java.lang.foreign.MemorySegment; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.ArrayList; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Optional; +import java.util.Set; + +/// Structured snapshot of a Vortex file's schema, layout, and encoding usage. +/// +/// Built once from a [VortexHandle] via [#build(VortexHandle)] and then consumed by renderers +/// (text or TUI). Immutable — does not retain the handle. +/// +/// @param version Vortex file format version stored in the trailer +/// @param fileSize total file length in bytes +/// @param dtype top-level data type (typically [DType.Struct]) +/// @param registeredEncodings encoding IDs declared in the file footer +/// @param usedEncodings encoding IDs actually referenced by Flat layout segments +/// @param segmentSpecs all on-disk segments referenced by the footer, in index order +/// @param totalRowCount total logical rows in the file (root layout's row count) +/// @param root root layout node +public record InspectorTree( + int version, + long fileSize, + DType dtype, + List registeredEncodings, + Set usedEncodings, + List segmentSpecs, + long totalRowCount, + Node root) { + + /// Number of on-disk segments referenced by the footer. + /// + /// @return segment count + public int segmentCount() { + return segmentSpecs.size(); + } + + /// Sum of segment lengths in bytes. + /// + /// @return total segment bytes + public long totalSegmentBytes() { + long total = 0; + for (SegmentSpec spec : segmentSpecs) { + total += spec.length(); + } + return total; + } + + /// One layout node in the inspector tree. + /// + /// @param layout underlying [Layout] from the file footer + /// @param fieldName column name when this node is a direct child of a top-level struct + /// @param usedEncodings encoding IDs referenced by this subtree + /// @param stats per-array statistics decoded from the segment's FlatBuffer + /// @param children child nodes + public record Node( + Layout layout, + Optional fieldName, + Set usedEncodings, + ArrayStats stats, + List children) { + } + + /// Builds an inspector tree from an open Vortex file handle. + /// + /// @param handle open file handle + /// @return immutable inspector tree + public static InspectorTree build(VortexHandle handle) { + return build(handle, Progress.NOOP); + } + + /// Builds an inspector tree without peeking segments — every node starts + /// with an empty encoding set and {@link ArrayStats#empty()} stats. The + /// resulting tree contains only structure derived from the file's footer + /// and layout, so the call is essentially free on remote handles. + /// + /// Use with {@link #peek(Node, VortexHandle)} for lazy on-demand resolution. + /// + /// @param handle open file handle + /// @return immutable shallow inspector tree + public static InspectorTree buildShallow(VortexHandle handle) { + Footer footer = handle.footer(); + Layout layout = handle.layout(); + DType dtype = handle.dtype(); + List colNames = (dtype instanceof DType.Struct s) ? s.fieldNames() : List.of(); + Node root = shallowNode(layout, Optional.empty()); + if (layout.isStruct()) { + List named = new ArrayList<>(root.children().size()); + for (int i = 0; i < root.children().size(); i++) { + Node child = root.children().get(i); + String name = i < colNames.size() ? colNames.get(i) : "col" + i; + named.add(new Node(child.layout(), Optional.of(name), + Set.of(), ArrayStats.empty(), child.children())); + } + root = new Node(root.layout(), Optional.empty(), Set.of(), + ArrayStats.empty(), List.copyOf(named)); + } + return new InspectorTree( + handle.version(), + handle.fileSize(), + dtype, + footer.arraySpecs(), + Set.of(), + footer.segmentSpecs(), + layout.rowCount(), + root); + } + + private static Node shallowNode(Layout layout, Optional fieldName) { + List children = new ArrayList<>(layout.children().size()); + for (Layout child : layout.children()) { + children.add(shallowNode(child, Optional.empty())); + } + return new Node(layout, fieldName, Set.of(), ArrayStats.empty(), List.copyOf(children)); + } + + /// Resolves encoding id + stats for one Flat node by reading its first + /// segment. Returns [Peek#EMPTY] for non-Flat nodes, segments under + /// compression, or missing data. + /// + /// Callers should cache the result — every call triggers a fresh + /// {@code handle.slice()}, which is a network round-trip on remote handles. + /// + /// @param node node to resolve + /// @param handle open file handle + /// @return peek result; never {@code null} + public static Peek peek(Node node, VortexHandle handle) { + Layout layout = node.layout(); + if (!layout.isFlat() || layout.segments().isEmpty()) { + return Peek.EMPTY; + } + int segIdx = layout.segments().getFirst(); + SegmentSpec spec = handle.footer().segmentSpecs().get(segIdx); + if (spec.compression().code != 0) { + return Peek.EMPTY; + } + MemorySegment seg = handle.slice(spec.offset(), spec.length()); + return peekFlatRoot(seg, handle.footer().arraySpecs()); + } + + /// Builds an inspector tree from an open Vortex file handle, reporting + /// progress on each Flat-segment peek (which on remote-storage handles + /// triggers a separate HTTP range request). + /// + /// @param handle open file handle + /// @param progress progress sink receiving {@code (current, total)} after each segment peek + /// @return immutable inspector tree + public static InspectorTree build(VortexHandle handle, Progress progress) { + Footer footer = handle.footer(); + Layout layout = handle.layout(); + DType dtype = handle.dtype(); + + int total = countPeekableSegments(layout, footer); + int[] counter = {0}; + + List colNames = (dtype instanceof DType.Struct s) ? s.fieldNames() : List.of(); + Set overallUsed = new LinkedHashSet<>(); + Node root = buildNode(layout, Optional.empty(), handle, footer.arraySpecs(), + overallUsed, progress, counter, total); + if (layout.isStruct()) { + List namedChildren = new ArrayList<>(root.children().size()); + for (int i = 0; i < root.children().size(); i++) { + Node child = root.children().get(i); + String name = i < colNames.size() ? colNames.get(i) : "col" + i; + namedChildren.add(new Node(child.layout(), Optional.of(name), + child.usedEncodings(), child.stats(), child.children())); + } + root = new Node(root.layout(), Optional.empty(), root.usedEncodings(), + root.stats(), List.copyOf(namedChildren)); + } + + return new InspectorTree( + handle.version(), + handle.fileSize(), + dtype, + footer.arraySpecs(), + Set.copyOf(overallUsed), + footer.segmentSpecs(), + layout.rowCount(), + root); + } + + private static Node buildNode(Layout layout, Optional fieldName, VortexHandle handle, + List arraySpecs, Set overallUsed, + Progress progress, int[] counter, int total) { + Set localUsed = new LinkedHashSet<>(); + ArrayStats stats = ArrayStats.empty(); + if (layout.isFlat() && !layout.segments().isEmpty()) { + int segIdx = layout.segments().getFirst(); + SegmentSpec spec = handle.footer().segmentSpecs().get(segIdx); + if (spec.compression().code == 0) { + MemorySegment seg = handle.slice(spec.offset(), spec.length()); + Peek peek = peekFlatRoot(seg, arraySpecs); + if (peek.encoding() != null) { + localUsed.add(peek.encoding()); + overallUsed.add(peek.encoding()); + } + stats = peek.stats(); + counter[0]++; + progress.update(counter[0], total); + } + } + List children = new ArrayList<>(layout.children().size()); + for (Layout child : layout.children()) { + Node n = buildNode(child, Optional.empty(), handle, arraySpecs, overallUsed, + progress, counter, total); + localUsed.addAll(n.usedEncodings()); + children.add(n); + } + return new Node(layout, fieldName, Set.copyOf(localUsed), stats, List.copyOf(children)); + } + + private static int countPeekableSegments(Layout layout, Footer footer) { + int n = 0; + if (layout.isFlat() && !layout.segments().isEmpty()) { + SegmentSpec spec = footer.segmentSpecs().get(layout.segments().getFirst()); + if (spec.compression().code == 0) { + n++; + } + } + for (Layout child : layout.children()) { + n += countPeekableSegments(child, footer); + } + return n; + } + + /// Callback used by [#build(VortexHandle, Progress)] to report how many + /// flat segments have been peeked so far. Implementations may render a + /// progress bar, log, or ignore (see [#NOOP]). + @FunctionalInterface + public interface Progress { + /// Sink that discards updates. + Progress NOOP = (current, total) -> { + }; + + /// Reports progress. + /// + /// @param current number of segments peeked so far + /// @param total total peekable segments in the file + void update(int current, int total); + } + + private static Peek peekFlatRoot(MemorySegment seg, List arraySpecs) { + int segLen = (int) seg.byteSize(); + ByteBuffer bb = seg.asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); + int fbLen = bb.getInt(segLen - 4); + int fbStart = segLen - 4 - fbLen; + ByteBuffer fbBuf = bb.slice(fbStart, fbLen).order(ByteOrder.LITTLE_ENDIAN); + Array fbArray = Array.getRootAsArray(fbBuf); + ArrayNode root = fbArray.root(); + if (root == null) { + return new Peek(null, ArrayStats.empty()); + } + return new Peek(arraySpecs.get(root.encoding()), ArrayStats.fromFbs(root.stats())); + } + + /// Result of a single Flat segment peek - the resolved encoding id (or + /// {@code null} when the FlatBuffer carried no root) plus the per-array + /// statistics decoded from the same FlatBuffer. + /// + /// @param encoding resolved encoding id from the array spec table, or {@code null} + /// @param stats per-array stats, or {@link ArrayStats#empty()} if unknown + public record Peek(String encoding, ArrayStats stats) { + /// Sentinel returned for non-Flat nodes, compressed segments, or + /// segments that don't carry an array root. + public static final Peek EMPTY = new Peek(null, ArrayStats.empty()); + } +} diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/IoWorker.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/IoWorker.java new file mode 100644 index 00000000..87580501 --- /dev/null +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/IoWorker.java @@ -0,0 +1,103 @@ +package io.github.dfa1.vortex.inspect; + +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.atomic.AtomicInteger; + +/// Single-threaded I/O executor that owns one {@link io.github.dfa1.vortex.io.VortexHandle}. +/// +/// Vortex readers use a confined {@link java.lang.foreign.Arena}, so every +/// {@code slice()} / {@code scan()} call must happen on the same thread that +/// opened the file. The TUI dispatches all such calls to this worker so the +/// render loop on the main thread never crosses the arena's owning thread. +/// +/// {@link #pending()} drives the status-line counter; callers should check it +/// when computing UI state. +public final class IoWorker implements AutoCloseable { + + private final BlockingQueue queue = new LinkedBlockingQueue<>(); + private final Thread thread; + private final AtomicInteger pending = new AtomicInteger(); + private volatile boolean closed; + + /// Creates and starts the worker thread. + /// + /// @param name thread name + public IoWorker(String name) { + this.thread = new Thread(this::loop, name); + this.thread.setDaemon(true); + this.thread.start(); + } + + /// Submits a task to run on the worker thread. Returns immediately. + /// + /// @param task task that performs I/O and updates shared state + public void submit(Runnable task) { + if (closed) { + return; + } + pending.incrementAndGet(); + queue.offer(() -> { + try { + task.run(); + } finally { + pending.decrementAndGet(); + } + }); + } + + /// Runs a task on the worker thread and waits for it to complete. + /// Used at startup to open the handle on the worker's owning thread. + /// + /// @param task task to execute + /// @throws InterruptedException if the calling thread is interrupted while waiting + public void runAndAwait(Runnable task) throws InterruptedException { + Object signal = new Object(); + boolean[] done = {false}; + submit(() -> { + try { + task.run(); + } finally { + synchronized (signal) { + done[0] = true; + signal.notifyAll(); + } + } + }); + synchronized (signal) { + while (!done[0]) { + signal.wait(); + } + } + } + + /// Number of submitted tasks that have not yet finished. + /// + /// @return pending count, including the currently running task + public int pending() { + return pending.get(); + } + + private void loop() { + while (!closed) { + Runnable task; + try { + task = queue.take(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + return; + } + try { + task.run(); + } catch (RuntimeException ignored) { + // Task is expected to capture its own failures into shared state. + } + } + } + + @Override + public void close() { + closed = true; + thread.interrupt(); + } +} diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspector.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspector.java new file mode 100644 index 00000000..e1eee450 --- /dev/null +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspector.java @@ -0,0 +1,203 @@ +package io.github.dfa1.vortex.inspect; + +import io.github.dfa1.vortex.core.ArrayStats; +import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.Layout; +import io.github.dfa1.vortex.core.SegmentSpec; +import io.github.dfa1.vortex.io.VortexHandle; + +import java.util.List; + +/// Produces a human-readable summary of a Vortex file's structure and encodings. +public final class VortexInspector { + + private VortexInspector() { + } + + /// Builds a multi-line text report for the given file handle. + /// + /// @param handle open file handle + /// @return formatted report + public static String inspect(VortexHandle handle) { + return render(InspectorTree.build(handle)); + } + + /// Builds a multi-line text report from a pre-built inspector tree. + /// + /// @param tree inspector tree + /// @return formatted report + public static String render(InspectorTree tree) { + var sb = new StringBuilder(); + + sb.append("Vortex v").append(tree.version()) + .append(" ").append(formatBytes(tree.fileSize())) + .append(" ").append(tree.totalRowCount()).append(" rows").append('\n'); + sb.append('\n'); + + sb.append("Schema:\n"); + appendSchema(sb, tree.dtype(), " "); + sb.append('\n'); + + sb.append("Registered encodings: ").append(String.join(", ", tree.registeredEncodings())).append('\n'); + sb.append('\n'); + + sb.append("Used encodings: ").append(String.join(", ", tree.usedEncodings())).append('\n'); + sb.append('\n'); + + sb.append("Segments: ").append(tree.segmentCount()) + .append(" total ").append(formatBytes(tree.totalSegmentBytes())).append('\n'); + appendSegmentTable(sb, tree.segmentSpecs(), " "); + sb.append('\n'); + + sb.append("Layout:\n"); + appendLayout(sb, tree.root(), " "); + + return sb.toString(); + } + + private static void appendSegmentTable(StringBuilder sb, List specs, String indent) { + for (int i = 0; i < specs.size(); i++) { + SegmentSpec spec = specs.get(i); + sb.append(indent).append('[').append(i).append("] ") + .append("off=").append(spec.offset()) + .append(" len=").append(formatBytes(spec.length())) + .append(" compression=").append(spec.compression().name()) + .append('\n'); + } + } + + private static void appendLayout(StringBuilder sb, InspectorTree.Node node, String indent) { + Layout layout = node.layout(); + if (layout.isStruct()) { + sb.append(indent).append("struct (").append(layout.rowCount()).append(" rows)\n"); + for (InspectorTree.Node child : node.children()) { + String name = child.fieldName().orElse("?"); + sb.append(indent).append(" ").append(name).append(": "); + appendLayoutInline(sb, child.layout()); + if (!child.usedEncodings().isEmpty()) { + sb.append(" [").append(String.join(", ", child.usedEncodings())).append("]"); + } + ArrayStats agg = aggregateStats(child); + if (agg.min() != null || agg.max() != null) { + sb.append(" min=").append(format(agg.min())) + .append(" max=").append(format(agg.max())); + } + sb.append('\n'); + } + } else { + sb.append(indent); + appendLayoutInline(sb, layout); + sb.append('\n'); + } + } + + private static ArrayStats aggregateStats(InspectorTree.Node node) { + Object min = node.stats().min(); + Object max = node.stats().max(); + for (InspectorTree.Node child : node.children()) { + ArrayStats cs = aggregateStats(child); + min = pickMin(min, cs.min()); + max = pickMax(max, cs.max()); + } + if (min == null && max == null) { + return ArrayStats.empty(); + } + return new ArrayStats(min, max, null, null, null, null); + } + + @SuppressWarnings({"unchecked", "rawtypes"}) + private static Object pickMin(Object a, Object b) { + if (a == null) { + return b; + } + if (b == null) { + return a; + } + if (a.getClass() != b.getClass() || !(a instanceof Comparable)) { + return a; + } + return ((Comparable) a).compareTo(b) <= 0 ? a : b; + } + + @SuppressWarnings({"unchecked", "rawtypes"}) + private static Object pickMax(Object a, Object b) { + if (a == null) { + return b; + } + if (b == null) { + return a; + } + if (a.getClass() != b.getClass() || !(a instanceof Comparable)) { + return a; + } + return ((Comparable) a).compareTo(b) >= 0 ? a : b; + } + + private static String format(Object v) { + if (v == null) { + return "?"; + } + String s = v.toString(); + if (s.length() > 30) { + return s.substring(0, 27) + "..."; + } + return s; + } + + private static void appendLayoutInline(StringBuilder sb, Layout layout) { + sb.append(layout.encodingId()).append('(').append(layout.rowCount()).append(" rows)"); + if (layout.children().isEmpty()) { + return; + } + sb.append(" → "); + if (layout.children().size() == 1) { + appendLayoutInline(sb, layout.children().getFirst()); + } else { + sb.append(layout.children().size()).append("× ["); + appendLayoutInline(sb, layout.children().getFirst()); + sb.append("]"); + } + } + + private static void appendSchema(StringBuilder sb, DType dtype, String indent) { + if (dtype instanceof DType.Struct s) { + int maxLen = s.fieldNames().stream().mapToInt(String::length).max().orElse(0); + for (int i = 0; i < s.fieldNames().size(); i++) { + String name = s.fieldNames().get(i); + sb.append(indent).append(name) + .append(" ".repeat(maxLen - name.length() + 1)) + .append(formatDType(s.fieldTypes().get(i))).append('\n'); + } + } else { + sb.append(indent).append(formatDType(dtype)).append('\n'); + } + } + + private static String formatDType(DType dtype) { + return switch (dtype) { + case DType.Primitive(var pt, var nullable) -> pt.name() + (nullable ? "?" : ""); + case DType.Utf8(var nullable) -> "utf8" + (nullable ? "?" : ""); + case DType.Binary(var nullable) -> "binary" + (nullable ? "?" : ""); + case DType.Bool(var nullable) -> "bool" + (nullable ? "?" : ""); + case DType.Null ignored -> "null"; + case DType.Decimal(var p, var s, var nullable) -> "decimal(" + p + "," + s + ")" + (nullable ? "?" : ""); + case DType.Struct ignored -> "struct"; + case DType.List(var elem, var nullable) -> "list<" + formatDType(elem) + ">" + (nullable ? "?" : ""); + case DType.FixedSizeList(var elem, var size, var nullable) -> + "list<" + formatDType(elem) + ">[" + size + "]" + (nullable ? "?" : ""); + case DType.Extension(var id, var storage, var meta, var nullable) -> + "ext<" + id + ">" + (nullable ? "?" : ""); + case DType.Variant(var nullable) -> "variant" + (nullable ? "?" : ""); + }; + } + + private static String formatBytes(long bytes) { + if (bytes < 1024) { + return bytes + " B"; + } + if (bytes < 1024 * 1024) { + return String.format("%.1f KB", bytes / 1024.0); + } + return String.format("%.1f MB", bytes / (1024.0 * 1024.0)); + } +} diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java new file mode 100644 index 00000000..aa4d2641 --- /dev/null +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java @@ -0,0 +1,823 @@ +package io.github.dfa1.vortex.inspect; + +import io.github.dfa1.vortex.core.Layout; +import io.github.dfa1.vortex.core.SegmentSpec; +import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.array.Array; +import io.github.dfa1.vortex.core.array.BoolArray; +import io.github.dfa1.vortex.core.array.ByteArray; +import io.github.dfa1.vortex.core.array.DoubleArray; +import io.github.dfa1.vortex.core.array.FloatArray; +import io.github.dfa1.vortex.core.array.GenericArray; +import io.github.dfa1.vortex.core.array.IntArray; +import io.github.dfa1.vortex.core.array.LongArray; +import io.github.dfa1.vortex.core.array.ShortArray; +import io.github.dfa1.vortex.core.array.VarBinArray; +import io.github.dfa1.vortex.inspect.term.Ansi; +import io.github.dfa1.vortex.inspect.term.Key; +import io.github.dfa1.vortex.inspect.term.Terminal; +import io.github.dfa1.vortex.io.VortexHandle; +import io.github.dfa1.vortex.scan.Chunk; +import io.github.dfa1.vortex.scan.ScanIterator; +import io.github.dfa1.vortex.scan.ScanOptions; + +import java.io.IOException; +import java.lang.foreign.MemorySegment; +import java.util.ArrayList; +import java.util.Collections; +import java.util.IdentityHashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; + +/// Interactive viewer for a Vortex file's inspector tree, drawn with raw ANSI +/// escapes — no library dependency. +/// +/// Renders a two-pane terminal UI: layout tree on the left, node details on +/// the right. Quit with {@code q} or {@code Esc}. +public final class VortexInspectorTui { + + private VortexInspectorTui() { + } + + /// Opens the terminal in raw mode, builds an inspector tree, and runs the + /// interactive viewer until quit. + /// + /// @param handle open Vortex file handle + /// @throws IOException if the terminal cannot be initialized + public static void show(VortexHandle handle) throws IOException { + show(handle, InspectorTree.Progress.NOOP); + } + + /// Builds an inspector tree (reporting progress on each segment peek) + /// and runs the interactive viewer until quit. The TUI now uses the + /// shallow builder so the screen is interactive immediately; encoding, + /// stats and data previews are fetched lazily as the user navigates. + /// The {@code progress} parameter is retained for source compatibility + /// but is no longer invoked - shallow build does no peeks. + /// + /// @param handle open Vortex file handle + /// @param progress unused; kept for API stability + /// @throws IOException if the terminal cannot be initialized + public static void show(VortexHandle handle, InspectorTree.Progress progress) throws IOException { + show(handle, null, progress); + } + + /// Variant that dispatches every {@code handle} I/O call onto the supplied + /// {@link IoWorker}. Required when the handle was opened on a different + /// thread (Vortex readers use a confined {@link java.lang.foreign.Arena}, + /// so cross-thread access throws {@code WrongThreadException}). + /// + /// Passing {@code null} for {@code worker} falls back to synchronous I/O + /// on the render thread — fine for tests but causes the sluggishness this + /// machinery was built to avoid. + /// + /// @param handle open Vortex file handle + /// @param worker I/O dispatcher that owns the handle's thread; may be {@code null} + /// @param progress unused; kept for API stability + /// @throws IOException if the terminal cannot be initialized + public static void show(VortexHandle handle, IoWorker worker, InspectorTree.Progress progress) + throws IOException { + InspectorTree tree = InspectorTree.buildShallow(handle); + try (Terminal term = Terminal.open()) { + new Loop(term, tree, handle, worker).run(); + } + } + + private static final class Loop { + /// Bytes shown per Flat segment when falling back to the raw hex view. + private static final int HEX_PREVIEW_BYTES = 256; + + /// Decoded values shown per column in the data view. + private static final int DATA_PREVIEW_ROWS = 32; + + /// Render cadence while idle — drives spinner animation and reaping of + /// background fetches so updates land even when the user isn't typing. + private static final long POLL_INTERVAL_MS = 80; + + /// ASCII spinner frames; cycled by render tick. + private static final char[] SPINNER = {'|', '/', '-', '\\'}; + + private final Terminal term; + private final InspectorTree tree; + private final VortexHandle handle; + private final IoWorker worker; + // Identity-keyed containers throughout: InspectorTree.Node wraps a + // Layout record whose ByteBuffer metadata field crashes with + // WrongThreadException when its hashCode reads arena-confined bytes + // from any thread other than the handle's owner. Identity hashing + // sidesteps that entirely and matches the natural semantics — Nodes + // are constructed exactly once per shallow build and uniquely + // identify a position in the tree. + private final Set expanded = + Collections.newSetFromMap(new IdentityHashMap<>()); + private final Map peekCache = + Collections.synchronizedMap(new IdentityHashMap<>()); + private final Set peekInFlight = + Collections.synchronizedSet(Collections.newSetFromMap(new IdentityHashMap<>())); + private final Map hexCache = + Collections.synchronizedMap(new IdentityHashMap<>()); + private final Set hexInFlight = + Collections.synchronizedSet(Collections.newSetFromMap(new IdentityHashMap<>())); + private final ConcurrentMap dataCache = new ConcurrentHashMap<>(); + private final Map dictCache = + Collections.synchronizedMap(new IdentityHashMap<>()); + private final Map columnOf = new IdentityHashMap<>(); + private final Set statsChildren = + Collections.newSetFromMap(new IdentityHashMap<>()); + private volatile String lastError; + private long tick; + private int selected; + private int scrollOffset; + + Loop(Terminal term, InspectorTree tree, VortexHandle handle, IoWorker worker) { + this.term = term; + this.tree = tree; + this.handle = handle; + this.worker = worker; + this.expanded.add(tree.root()); + indexColumns(tree.root()); + indexStatsChildrenOnWorker(tree.root()); + prefetchTopColumns(); + } + + private void indexStatsChildrenOnWorker(InspectorTree.Node root) { + if (worker == null) { + indexStatsChildren(root); + return; + } + try { + worker.runAndAwait(() -> indexStatsChildren(root)); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + + private void indexStatsChildren(InspectorTree.Node node) { + Layout layout = node.layout(); + if (layout.isZoned() && node.children().size() >= 2) { + // Zoned: child[0] = data, child[1] = per-chunk stats payload + statsChildren.add(node.children().get(1)); + } else if (layout.isChunked() && hasLeadingStats(layout) && !node.children().isEmpty()) { + // Chunked with metadata[0] == 1: child[0] is the stats payload + statsChildren.add(node.children().get(0)); + } + for (InspectorTree.Node child : node.children()) { + indexStatsChildren(child); + } + } + + private static boolean hasLeadingStats(Layout layout) { + java.nio.ByteBuffer meta = layout.metadata(); + return meta != null && meta.hasRemaining() && meta.get(meta.position()) == 1; + } + + private void prefetchTopColumns() { + if (!tree.root().layout().isStruct()) { + return; + } + for (InspectorTree.Node col : tree.root().children()) { + col.fieldName().ifPresent(this::startDataLoad); + } + } + + private void indexColumns(InspectorTree.Node root) { + if (!root.layout().isStruct()) { + return; + } + for (InspectorTree.Node colNode : root.children()) { + colNode.fieldName().ifPresent(name -> tagSubtree(colNode, name)); + } + } + + private void tagSubtree(InspectorTree.Node node, String columnName) { + columnOf.put(node, columnName); + for (InspectorTree.Node child : node.children()) { + tagSubtree(child, columnName); + } + } + + private InspectorTree.Peek peek(InspectorTree.Node node) { + InspectorTree.Peek cached = peekCache.get(node); + if (cached != null) { + return cached; + } + if (worker == null) { + InspectorTree.Peek p = safePeek(node); + peekCache.put(node, p); + return p; + } + if (peekInFlight.add(node)) { + worker.submit(() -> { + try { + peekCache.put(node, safePeek(node)); + } finally { + peekInFlight.remove(node); + } + }); + } + return InspectorTree.Peek.EMPTY; + } + + private InspectorTree.Peek safePeek(InspectorTree.Node node) { + try { + return InspectorTree.peek(node, handle); + } catch (RuntimeException e) { + lastError = "peek: " + messageOf(e); + return InspectorTree.Peek.EMPTY; + } + } + + void run() throws IOException { + while (true) { + List items = flatten(); + if (selected >= items.size()) { + selected = items.size() - 1; + } + if (selected < 0) { + selected = 0; + } + render(items); + Optional maybeKey = term.readKey(POLL_INTERVAL_MS); + if (maybeKey.isEmpty()) { + tick++; + continue; + } + Key key = maybeKey.get(); + if (isQuit(key)) { + return; + } + handleKey(key, items); + tick++; + } + } + + private void handleKey(Key key, List items) { + switch (key) { + case Key.ArrowDown ignored -> selected = Math.min(selected + 1, items.size() - 1); + case Key.ArrowUp ignored -> selected = Math.max(selected - 1, 0); + case Key.ArrowRight ignored -> expandSelected(items); + case Key.Enter ignored -> toggleSelected(items); + case Key.ArrowLeft ignored -> { + if (selected < items.size()) { + expanded.remove(items.get(selected).node()); + } + } + case Key.PageDown ignored -> selected = Math.min(selected + 10, items.size() - 1); + case Key.PageUp ignored -> selected = Math.max(selected - 10, 0); + case Key.Home ignored -> selected = 0; + case Key.End ignored -> selected = items.size() - 1; + default -> { + } + } + } + + private void expandSelected(List items) { + if (selected < items.size()) { + InspectorTree.Node n = items.get(selected).node(); + if (!n.children().isEmpty()) { + expanded.add(n); + } + } + } + + private void toggleSelected(List items) { + if (selected >= items.size()) { + return; + } + InspectorTree.Node n = items.get(selected).node(); + if (n.children().isEmpty()) { + return; + } + if (!expanded.add(n)) { + expanded.remove(n); + } + } + + private static boolean isQuit(Key key) { + return key instanceof Key.Escape + || key instanceof Key.Eof + || (key instanceof Key.Char(char c) && (c == 'q' || c == 'Q')); + } + + private List flatten() { + List out = new ArrayList<>(); + walk(tree.root(), 0, out); + return out; + } + + private void walk(InspectorTree.Node node, int depth, List out) { + out.add(new Item(node, depth)); + if (expanded.contains(node)) { + for (InspectorTree.Node child : node.children()) { + walk(child, depth + 1, out); + } + } + } + + private void render(List items) throws IOException { + Terminal.Size size = term.size(); + int width = size.cols(); + int height = size.rows(); + int leftWidth = Math.max(20, width / 2); + int bodyTop = 2; + int bodyBottom = height - 2; + int bodyHeight = bodyBottom - bodyTop; + + if (selected < scrollOffset) { + scrollOffset = selected; + } else if (selected >= scrollOffset + bodyHeight) { + scrollOffset = selected - bodyHeight + 1; + } + + StringBuilder buf = new StringBuilder(width * height); + buf.append(Ansi.CLEAR_SCREEN); + drawHeader(buf, width); + drawTree(buf, items, bodyTop, bodyHeight, leftWidth); + drawDivider(buf, leftWidth, bodyTop, bodyBottom); + if (!items.isEmpty()) { + drawDetails(buf, items.get(selected).node(), + leftWidth + 2, bodyTop, width - leftWidth - 2, bodyHeight); + } + drawStatus(buf, width, height - 1); + drawFooter(buf, width, height); + buf.append(Ansi.moveTo(height, 1)); + term.write(buf.toString()); + term.flush(); + } + + private void drawStatus(StringBuilder buf, int width, int row) { + int loads = worker == null ? 0 : worker.pending(); + String err = lastError; + String text; + int bg; + if (err != null) { + text = " ! " + err; + bg = 41; // red + } else if (loads > 0) { + text = " " + SPINNER[(int) (tick % SPINNER.length)] + + " I/O " + loads + " pending"; + bg = 44; // blue + } else { + text = " ready"; + bg = 42; // green + } + buf.append(Ansi.moveTo(row, 1)); + buf.append(Ansi.bg(bg)).append(Ansi.fg(30)); + buf.append(pad(text, width)); + buf.append(Ansi.RESET); + } + + private void drawHeader(StringBuilder buf, int width) { + String header = " vortex-inspect — v" + tree.version() + + " " + formatBytes(tree.fileSize()) + + " rows=" + tree.totalRowCount() + + " segs=" + tree.segmentCount() + + " (" + formatBytes(tree.totalSegmentBytes()) + ")"; + buf.append(Ansi.moveTo(1, 1)); + buf.append(Ansi.bg(46)).append(Ansi.fg(30)); + buf.append(pad(header, width)); + buf.append(Ansi.RESET); + } + + private void drawFooter(StringBuilder buf, int width, int height) { + buf.append(Ansi.moveTo(height, 1)); + buf.append(Ansi.bg(47)).append(Ansi.fg(30)); + buf.append(pad(" ↑↓ nav →/Enter expand ← collapse q quit ", width)); + buf.append(Ansi.RESET); + } + + private void drawTree(StringBuilder buf, List items, int top, int rows, int leftWidth) { + for (int row = 0; row < rows; row++) { + int idx = scrollOffset + row; + buf.append(Ansi.moveTo(top + row + 1, 1)); + if (idx >= items.size()) { + buf.append(pad("", leftWidth - 1)); + continue; + } + Item item = items.get(idx); + boolean isSelected = idx == selected; + if (isSelected) { + buf.append(Ansi.bg(43)).append(Ansi.fg(30)); + } + buf.append(pad(renderItem(item), leftWidth - 1)); + if (isSelected) { + buf.append(Ansi.RESET); + } + } + } + + private String renderItem(Item item) { + InspectorTree.Node node = item.node(); + String marker; + if (node.children().isEmpty()) { + marker = " "; + } else if (expanded.contains(node)) { + marker = "v "; + } else { + marker = "> "; + } + String label = item.depth() == 0 && node.layout().isStruct() + ? "struct" + : node.fieldName().map(n -> n + ": ").orElse("") + node.layout().encodingId(); + String tag = statsChildren.contains(node) ? ", stats" : ""; + return " ".repeat(item.depth() * 2) + marker + label + + " (" + node.layout().rowCount() + " rows" + tag + ")"; + } + + private void drawDivider(StringBuilder buf, int col, int top, int bottom) { + for (int y = top; y < bottom; y++) { + buf.append(Ansi.moveTo(y + 1, col + 1)).append('|'); + } + } + + private void drawDetails(StringBuilder buf, InspectorTree.Node node, + int col, int top, int width, int rows) { + List lines = detailLines(node); + for (int i = 0; i < lines.size() && i < rows; i++) { + buf.append(Ansi.moveTo(top + i + 1, col + 1)); + buf.append(truncate(lines.get(i), width)); + } + } + + private List detailLines(InspectorTree.Node node) { + List lines = new ArrayList<>(); + Layout layout = node.layout(); + InspectorTree.Peek p = peek(node); + lines.add("Encoding: " + (p.encoding() != null ? p.encoding() : layout.encodingId())); + node.fieldName().ifPresent(name -> lines.add("Field: " + name)); + String col = columnOf.get(node); + if (col != null && !node.fieldName().isPresent()) { + lines.add("Column: " + col); + } + lines.add("Rows: " + layout.rowCount()); + lines.add("Children: " + layout.children().size()); + if (!layout.segments().isEmpty()) { + long subtotal = 0; + for (int idx : layout.segments()) { + subtotal += tree.segmentSpecs().get(idx).length(); + } + lines.add("Segments: " + layout.segments().size() + + " (" + formatBytes(subtotal) + ")"); + long rows = layout.rowCount(); + for (int idx : layout.segments()) { + SegmentSpec spec = tree.segmentSpecs().get(idx); + String bits = rows > 0 + ? " bits/elem=" + String.format("%.2f", spec.length() * 8.0 / rows) + : ""; + lines.add(" [" + idx + "] off=" + spec.offset() + + " len=" + formatBytes(spec.length()) + + " compression=" + spec.compression().name() + + bits); + } + } else { + lines.add("Segments: 0"); + } + if (p.stats().min() != null || p.stats().max() != null) { + lines.add(""); + lines.add("Stats:"); + if (p.stats().min() != null) { + lines.add(" min: " + p.stats().min()); + } + if (p.stats().max() != null) { + lines.add(" max: " + p.stats().max()); + } + } + if (layout.isDict() && layout.children().size() >= 1) { + DataState dictState = loadDictPreview(node); + lines.add(""); + switch (dictState) { + case DataState.Pending ignored -> + lines.add("Dictionary: " + SPINNER[(int) (tick % SPINNER.length)] + " loading..."); + case DataState.Failed(String msg) -> + lines.add("Dictionary: ! " + msg); + case DataState.Loaded(List values) -> { + lines.add("Dictionary (" + values.size() + " entries):"); + for (int i = 0; i < values.size(); i++) { + lines.add(String.format(" [%2d] %s", i, values.get(i))); + } + } + } + } + if (col != null) { + DataState state = loadDataPreview(col); + lines.add(""); + switch (state) { + case DataState.Pending ignored -> + lines.add("Data (column '" + col + "'): " + + SPINNER[(int) (tick % SPINNER.length)] + " loading..."); + case DataState.Failed(String msg) -> + lines.add("Data (column '" + col + "'): ! " + msg); + case DataState.Loaded(List values) -> { + lines.add("Data (column '" + col + "', first " + values.size() + " rows):"); + for (int i = 0; i < values.size(); i++) { + lines.add(String.format(" [%2d] %s", i, values.get(i))); + } + } + } + } else if (layout.isFlat() && !layout.segments().isEmpty()) { + byte[] preview = loadHexPreview(node); + if (preview.length > 0) { + lines.add(""); + int segIdx = layout.segments().getFirst(); + SegmentSpec spec = tree.segmentSpecs().get(segIdx); + lines.add("Hex (first " + preview.length + " B of segment " + + segIdx + ", total " + formatBytes(spec.length()) + "):"); + for (int off = 0; off < preview.length; off += 16) { + lines.add(formatHexRow(preview, off)); + } + } + } + return lines; + } + + private DataState loadDataPreview(String columnName) { + DataState existing = dataCache.get(columnName); + if (existing != null) { + return existing; + } + startDataLoad(columnName); + return dataCache.getOrDefault(columnName, DataState.PENDING); + } + + private DataState loadDictPreview(InspectorTree.Node dictNode) { + DataState existing = dictCache.get(dictNode); + if (existing != null) { + return existing; + } + if (dictCache.putIfAbsent(dictNode, DataState.PENDING) != null) { + return dictCache.get(dictNode); + } + if (worker == null) { + runDictLoad(dictNode); + } else { + worker.submit(() -> runDictLoad(dictNode)); + } + return dictCache.getOrDefault(dictNode, DataState.PENDING); + } + + private void runDictLoad(InspectorTree.Node dictNode) { + try { + Layout values = dictNode.layout().children().get(0); + DType dtype = columnDtypeFor(dictNode); + if (dtype == null) { + dictCache.put(dictNode, new DataState.Loaded(List.of())); + return; + } + try (java.lang.foreign.Arena arena = java.lang.foreign.Arena.ofConfined()) { + int segIdx = values.segments().getFirst(); + SegmentSpec spec = tree.segmentSpecs().get(segIdx); + java.lang.foreign.MemorySegment seg = handle.slice(spec.offset(), spec.length()); + io.github.dfa1.vortex.core.array.Array arr = + new io.github.dfa1.vortex.encoding.FlatSegmentDecoder(handle.registry()) + .decode(seg, handle.footer().arraySpecs(), + dtype, values.rowCount(), arena); + int n = (int) Math.min(arr.length(), DATA_PREVIEW_ROWS); + List out = new ArrayList<>(n); + for (int i = 0; i < n; i++) { + out.add(formatValue(arr, i, dtype)); + } + dictCache.put(dictNode, new DataState.Loaded(List.copyOf(out))); + } + } catch (RuntimeException e) { + dictCache.put(dictNode, new DataState.Failed(messageOf(e))); + lastError = "dict: " + messageOf(e); + } + } + + private DType columnDtypeFor(InspectorTree.Node node) { + String col = columnOf.get(node); + if (col == null) { + return tree.dtype(); + } + return columnDtypeByName(col); + } + + private DType columnDtypeByName(String columnName) { + DType root = tree.dtype(); + if (root instanceof DType.Struct s) { + int idx = s.fieldNames().indexOf(columnName); + if (idx >= 0) { + return s.fieldTypes().get(idx); + } + } + return root; + } + + private void startDataLoad(String columnName) { + if (dataCache.putIfAbsent(columnName, DataState.PENDING) != null) { + return; + } + if (worker == null) { + runDataLoad(columnName); + return; + } + worker.submit(() -> runDataLoad(columnName)); + } + + private void runDataLoad(String columnName) { + try { + DType declared = columnDtypeByName(columnName); + ScanOptions opts = ScanOptions.columns(columnName).withLimit(DATA_PREVIEW_ROWS); + try (ScanIterator it = handle.scan(opts)) { + if (!it.hasNext()) { + dataCache.put(columnName, new DataState.Loaded(List.of())); + return; + } + try (Chunk chunk = it.next()) { + Array array = chunk.columns().get(columnName); + if (array == null) { + dataCache.put(columnName, new DataState.Loaded(List.of())); + return; + } + int n = (int) Math.min(array.length(), DATA_PREVIEW_ROWS); + List out = new ArrayList<>(n); + for (int i = 0; i < n; i++) { + out.add(formatValue(array, i, declared)); + } + dataCache.put(columnName, new DataState.Loaded(List.copyOf(out))); + } + } + } catch (RuntimeException e) { + dataCache.put(columnName, new DataState.Failed(messageOf(e))); + lastError = columnName + ": " + messageOf(e); + } + } + + private static String messageOf(Throwable t) { + String m = t.getMessage(); + return m != null ? m : t.getClass().getSimpleName(); + } + + /// Per-column data fetch state — pending while a virtual thread is + /// fetching, loaded with values once decoded, failed with a message + /// on error. Sealed so callers can pattern-match exhaustively. + sealed interface DataState { + /// Singleton state for a fetch in flight. + DataState PENDING = new Pending(); + + /// In-flight fetch. + record Pending() implements DataState { + } + + /// Completed fetch with decoded values. + /// + /// @param values formatted first rows of the column + record Loaded(List values) implements DataState { + } + + /// Failed fetch carrying a short error description. + /// + /// @param message short error string + record Failed(String message) implements DataState { + } + } + + private static String formatValue(Array array, int i, DType declared) { + if (declared instanceof DType.Extension ext + && ext.kind() instanceof io.github.dfa1.vortex.core.Extension.Date date) { + try { + return date.decode(array, i).toString(); + } catch (RuntimeException e) { + // fall through to generic rendering on shape mismatch + } + } + return switch (array) { + case LongArray a -> Long.toString(a.getLong(i)); + case IntArray a -> Integer.toString(a.getInt(i)); + case ShortArray a -> Short.toString(a.getShort(i)); + case ByteArray a -> Byte.toString(a.getByte(i)); + case DoubleArray a -> Double.toString(a.getDouble(i)); + case FloatArray a -> Float.toString(a.getFloat(i)); + case BoolArray a -> Boolean.toString(a.getBoolean(i)); + case VarBinArray a -> a.dtype() instanceof DType.Utf8 + ? "\"" + a.getString(i) + "\"" + : bytesToShortHex(a.getBytes(i)); + case GenericArray a when a.dtype() instanceof DType.Decimal -> + tryDecimal(a, i); + default -> "<" + array.getClass().getSimpleName() + " " + array.dtype() + ">"; + }; + } + + private static String tryDecimal(GenericArray a, int i) { + try { + return a.getDecimal(i).toPlainString(); + } catch (RuntimeException e) { + String msg = e.getMessage(); + if (msg != null && msg.contains("null cell")) { + return "null"; + } + return "<" + a.getClass().getSimpleName() + " " + a.dtype() + ">"; + } + } + + private static String bytesToShortHex(byte[] bytes) { + int n = Math.min(bytes.length, 16); + StringBuilder sb = new StringBuilder(n * 3 + 2); + sb.append("0x"); + for (int i = 0; i < n; i++) { + sb.append(String.format("%02x", bytes[i] & 0xff)); + } + if (bytes.length > n) { + sb.append("..."); + } + return sb.toString(); + } + + private byte[] loadHexPreview(InspectorTree.Node node) { + byte[] cached = hexCache.get(node); + if (cached != null) { + return cached; + } + if (worker == null) { + byte[] bytes = fetchHex(node); + hexCache.put(node, bytes); + return bytes; + } + if (hexInFlight.add(node)) { + worker.submit(() -> { + try { + hexCache.put(node, fetchHex(node)); + } finally { + hexInFlight.remove(node); + } + }); + } + return new byte[0]; + } + + private byte[] fetchHex(InspectorTree.Node node) { + Layout layout = node.layout(); + int segIdx = layout.segments().getFirst(); + SegmentSpec spec = tree.segmentSpecs().get(segIdx); + int wanted = (int) Math.min((long) HEX_PREVIEW_BYTES, spec.length()); + if (wanted <= 0) { + return new byte[0]; + } + try { + MemorySegment seg = handle.slice(spec.offset(), wanted); + byte[] buf = new byte[wanted]; + MemorySegment.copy(seg, 0, MemorySegment.ofArray(buf), 0, wanted); + return buf; + } catch (RuntimeException e) { + lastError = "hex: " + messageOf(e); + return new byte[0]; + } + } + + private static String formatHexRow(byte[] data, int offset) { + StringBuilder sb = new StringBuilder(80); + sb.append(String.format("%08x ", offset)); + for (int i = 0; i < 16; i++) { + int idx = offset + i; + if (idx < data.length) { + sb.append(String.format("%02x ", data[idx] & 0xff)); + } else { + sb.append(" "); + } + if (i == 7) { + sb.append(' '); + } + } + sb.append(" |"); + for (int i = 0; i < 16; i++) { + int idx = offset + i; + if (idx >= data.length) { + sb.append(' '); + continue; + } + int b = data[idx] & 0xff; + sb.append(b >= 0x20 && b < 0x7f ? (char) b : '.'); + } + sb.append('|'); + return sb.toString(); + } + + private record Item(InspectorTree.Node node, int depth) { + } + + private static String pad(String s, int width) { + if (s.length() >= width) { + return s.substring(0, width); + } + return s + " ".repeat(width - s.length()); + } + + private static String truncate(String s, int width) { + return s.length() > width ? s.substring(0, width) : s; + } + + private static String formatBytes(long bytes) { + if (bytes < 1024) { + return bytes + " B"; + } + if (bytes < 1024 * 1024) { + return String.format("%.1f KB", bytes / 1024.0); + } + return String.format("%.1f MB", bytes / (1024.0 * 1024.0)); + } + } +} diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/Ansi.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/Ansi.java new file mode 100644 index 00000000..4395f263 --- /dev/null +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/Ansi.java @@ -0,0 +1,63 @@ +package io.github.dfa1.vortex.inspect.term; + +/// ANSI / xterm CSI escape constants and small formatting helpers. +/// +/// Sequences are plain ASCII once the leading {@code ESC} (0x1B) byte is included. +/// They're written verbatim to {@code System.out} once raw mode is enabled. +public final class Ansi { + + /// ESC (0x1B) - the byte every CSI sequence starts with. + public static final String ESC = String.valueOf((char) 0x1B); + + /// Control Sequence Introducer: {@code ESC + '['}. + public static final String CSI = ESC + "["; + + /// Clear entire screen. + public static final String CLEAR_SCREEN = CSI + "2J"; + + /// Move cursor to top-left. + public static final String CURSOR_HOME = CSI + "H"; + + /// Reset all SGR attributes. + public static final String RESET = CSI + "0m"; + + /// Hide the cursor. + public static final String HIDE_CURSOR = CSI + "?25l"; + + /// Show the cursor. + public static final String SHOW_CURSOR = CSI + "?25h"; + + /// Switch to the alternate screen buffer. + public static final String ENTER_ALT_SCREEN = CSI + "?1049h"; + + /// Restore the primary screen buffer. + public static final String EXIT_ALT_SCREEN = CSI + "?1049l"; + + private Ansi() { + } + + /// Move the cursor to (1-based) {@code row}, {@code col}. + /// + /// @param row 1-based row index + /// @param col 1-based column index + /// @return CSI sequence + public static String moveTo(int row, int col) { + return CSI + row + ";" + col + "H"; + } + + /// Standard SGR foreground colour (codes 30-37 normal, 90-97 bright). + /// + /// @param code SGR colour code + /// @return CSI sequence + public static String fg(int code) { + return CSI + code + "m"; + } + + /// Standard SGR background colour (codes 40-47 normal, 100-107 bright). + /// + /// @param code SGR colour code + /// @return CSI sequence + public static String bg(int code) { + return CSI + code + "m"; + } +} diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/Key.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/Key.java new file mode 100644 index 00000000..7f0a663d --- /dev/null +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/Key.java @@ -0,0 +1,77 @@ +package io.github.dfa1.vortex.inspect.term; + +/// Decoded terminal input event. +public sealed interface Key { + + /// Up arrow. + enum ArrowUp implements Key { + /// Singleton instance. + INSTANCE + } + + /// Down arrow. + enum ArrowDown implements Key { + /// Singleton instance. + INSTANCE + } + + /// Left arrow. + enum ArrowLeft implements Key { + /// Singleton instance. + INSTANCE + } + + /// Right arrow. + enum ArrowRight implements Key { + /// Singleton instance. + INSTANCE + } + + /// Page Up. + enum PageUp implements Key { + /// Singleton instance. + INSTANCE + } + + /// Page Down. + enum PageDown implements Key { + /// Singleton instance. + INSTANCE + } + + /// Home key. + enum Home implements Key { + /// Singleton instance. + INSTANCE + } + + /// End key. + enum End implements Key { + /// Singleton instance. + INSTANCE + } + + /// Enter / Return (LF or CR). + enum Enter implements Key { + /// Singleton instance. + INSTANCE + } + + /// Bare Escape key press (no CSI sequence followed). + enum Escape implements Key { + /// Singleton instance. + INSTANCE + } + + /// End of input - stdin closed. + enum Eof implements Key { + /// Singleton instance. + INSTANCE + } + + /// Printable character. + /// + /// @param value ASCII codepoint (multi-byte UTF-8 not decoded here) + record Char(char value) implements Key { + } +} diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/KeyDecoder.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/KeyDecoder.java new file mode 100644 index 00000000..824fe12d --- /dev/null +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/KeyDecoder.java @@ -0,0 +1,89 @@ +package io.github.dfa1.vortex.inspect.term; + +import java.io.IOException; +import java.io.InputStream; + +/// Translates raw stdin bytes into [Key] events. +/// +/// Recognises common CSI sequences emitted by xterm-compatible terminals: +/// {@code ESC [ A/B/C/D} for arrows, {@code ESC [ 5~ / 6~} for PgUp/PgDn, +/// {@code ESC [ H / F} and {@code ESC [ 1~ / 4~} for Home/End. Any unrecognised +/// escape sequence is dropped and decoding continues with the next byte. +/// +/// Stateless across reads - call [#next(InputStream)] for each event. +public final class KeyDecoder { + + private KeyDecoder() { + } + + /// Reads the next key from {@code in}, blocking until at least one byte arrives. + /// + /// @param in raw input stream (typically {@code System.in} in cbreak mode) + /// @return the decoded key, or [Key.Eof] if the stream is at EOF + /// @throws IOException if the underlying read fails + public static Key next(InputStream in) throws IOException { + int b = in.read(); + if (b < 0) { + return Key.Eof.INSTANCE; + } + if (b == 0x1B) { + return readAfterEsc(in); + } + if (b == '\r' || b == '\n') { + return Key.Enter.INSTANCE; + } + return new Key.Char((char) b); + } + + private static Key readAfterEsc(InputStream in) throws IOException { + // Bare ESC: no follow-up byte available within a short window. + // We approximate by peeking via available(); proper terminal IO would + // use a select() / VTIME timer, but this is enough for q/Esc quit. + if (in.available() == 0) { + return Key.Escape.INSTANCE; + } + int b1 = in.read(); + if (b1 != '[' && b1 != 'O') { + return Key.Escape.INSTANCE; + } + int b2 = in.read(); + return switch (b2) { + case 'A' -> Key.ArrowUp.INSTANCE; + case 'B' -> Key.ArrowDown.INSTANCE; + case 'C' -> Key.ArrowRight.INSTANCE; + case 'D' -> Key.ArrowLeft.INSTANCE; + case 'H' -> Key.Home.INSTANCE; + case 'F' -> Key.End.INSTANCE; + default -> readTildeSequence(in, b2); + }; + } + + private static Key readTildeSequence(InputStream in, int firstDigit) throws IOException { + if (firstDigit < '0' || firstDigit > '9') { + return Key.Escape.INSTANCE; + } + int digit = firstDigit - '0'; + int next = in.read(); + if (next == -1) { + return Key.Eof.INSTANCE; + } + // Two-digit codes like ESC [ 15~; collapse to single digit by ignoring extras. + while (next >= '0' && next <= '9') { + digit = digit * 10 + (next - '0'); + next = in.read(); + if (next == -1) { + return Key.Eof.INSTANCE; + } + } + if (next != '~') { + return Key.Escape.INSTANCE; + } + return switch (digit) { + case 1, 7 -> Key.Home.INSTANCE; + case 4, 8 -> Key.End.INSTANCE; + case 5 -> Key.PageUp.INSTANCE; + case 6 -> Key.PageDown.INSTANCE; + default -> Key.Escape.INSTANCE; + }; + } +} diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/PosixTerminal.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/PosixTerminal.java new file mode 100644 index 00000000..8ab92e6f --- /dev/null +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/PosixTerminal.java @@ -0,0 +1,185 @@ +package io.github.dfa1.vortex.inspect.term; + +import java.io.IOException; +import java.io.PrintStream; +import java.util.Optional; +import java.lang.foreign.Arena; +import java.lang.foreign.FunctionDescriptor; +import java.lang.foreign.Linker; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SymbolLookup; +import java.lang.foreign.ValueLayout; +import java.lang.invoke.MethodHandle; + +/// POSIX (Linux + macOS) raw-mode terminal implementation. +/// +/// Uses libc {@code tcgetattr} / {@code cfmakeraw} / {@code tcsetattr} via FFM +/// to put stdin in non-canonical, no-echo mode. {@code ioctl(TIOCGWINSZ)} +/// queries the terminal size on every call (no SIGWINCH plumbing). +/// +/// On entry: saves the original {@code termios}, switches to alt screen, hides +/// the cursor. On [#close()]: restores everything, even on exceptions, via a +/// shutdown hook that fires if the caller skips try-with-resources. +public final class PosixTerminal implements Terminal { + + private static final int STDIN_FD = 0; + private static final int STDOUT_FD = 1; + private static final int TCSANOW = 0; + + private static final long TIOCGWINSZ = isMac() ? 0x40087468L : 0x5413L; + + /// {@code struct termios} is at most 72 bytes (macOS); Linux glibc is 60. + /// 128 is a comfortable upper bound and lets the same code work on both. + private static final long TERMIOS_SIZE = 128; + + private static final Linker LINKER = Linker.nativeLinker(); + private static final SymbolLookup LIBC = LINKER.defaultLookup(); + + private static final MethodHandle TCGETATTR = downcall("tcgetattr", + FunctionDescriptor.of(ValueLayout.JAVA_INT, ValueLayout.JAVA_INT, ValueLayout.ADDRESS)); + private static final MethodHandle TCSETATTR = downcall("tcsetattr", + FunctionDescriptor.of(ValueLayout.JAVA_INT, ValueLayout.JAVA_INT, ValueLayout.JAVA_INT, ValueLayout.ADDRESS)); + private static final MethodHandle CFMAKERAW = downcall("cfmakeraw", + FunctionDescriptor.ofVoid(ValueLayout.ADDRESS)); + private static final MethodHandle IOCTL = LINKER.downcallHandle( + LIBC.find("ioctl").orElseThrow(() -> new UnsatisfiedLinkError("ioctl")), + FunctionDescriptor.of(ValueLayout.JAVA_INT, + ValueLayout.JAVA_INT, ValueLayout.JAVA_LONG, ValueLayout.ADDRESS), + Linker.Option.firstVariadicArg(2)); + + private final Arena arena; + private final MemorySegment savedTermios; + private final PrintStream out; + private final Thread shutdownHook; + private boolean closed; + + private PosixTerminal(Arena arena, MemorySegment savedTermios) { + this.arena = arena; + this.savedTermios = savedTermios; + this.out = System.out; + this.shutdownHook = new Thread(this::restore, "posix-term-restore"); + Runtime.getRuntime().addShutdownHook(shutdownHook); + } + + /// Enters raw mode and switches to the alternate screen. + /// + /// @return open terminal + /// @throws IOException if {@code tcgetattr} or {@code tcsetattr} fails + public static PosixTerminal open() throws IOException { + Arena arena = Arena.ofShared(); + try { + MemorySegment saved = arena.allocate(TERMIOS_SIZE); + MemorySegment raw = arena.allocate(TERMIOS_SIZE); + int rc = (int) TCGETATTR.invokeExact(STDIN_FD, saved); + if (rc != 0) { + throw new IOException("tcgetattr failed: rc=" + rc); + } + MemorySegment.copy(saved, 0, raw, 0, TERMIOS_SIZE); + CFMAKERAW.invokeExact(raw); + rc = (int) TCSETATTR.invokeExact(STDIN_FD, TCSANOW, raw); + if (rc != 0) { + throw new IOException("tcsetattr failed: rc=" + rc); + } + PosixTerminal term = new PosixTerminal(arena, saved); + term.out.print(Ansi.ENTER_ALT_SCREEN); + term.out.print(Ansi.HIDE_CURSOR); + term.out.print(Ansi.CLEAR_SCREEN); + term.out.flush(); + return term; + } catch (Throwable t) { + arena.close(); + if (t instanceof IOException io) { + throw io; + } + throw new IOException(t); + } + } + + @Override + public Size size() { + MemorySegment ws = arena.allocate(8); + try { + int rc = (int) IOCTL.invokeExact(STDOUT_FD, TIOCGWINSZ, ws); + if (rc != 0) { + return new Size(24, 80); + } + } catch (Throwable t) { + return new Size(24, 80); + } + int rows = Short.toUnsignedInt(ws.get(ValueLayout.JAVA_SHORT, 0)); + int cols = Short.toUnsignedInt(ws.get(ValueLayout.JAVA_SHORT, 2)); + if (rows == 0 || cols == 0) { + return new Size(24, 80); + } + return new Size(rows, cols); + } + + @Override + public void write(String s) { + out.print(s); + } + + @Override + public void flush() { + out.flush(); + } + + @Override + public Key readKey() throws IOException { + return KeyDecoder.next(System.in); + } + + @Override + public Optional readKey(long timeoutMs) throws IOException { + long deadline = System.nanoTime() + timeoutMs * 1_000_000L; + while (System.in.available() == 0) { + if (System.nanoTime() >= deadline) { + return Optional.empty(); + } + try { + Thread.sleep(20); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + return Optional.empty(); + } + } + return Optional.of(KeyDecoder.next(System.in)); + } + + @Override + public void close() { + if (closed) { + return; + } + closed = true; + try { + Runtime.getRuntime().removeShutdownHook(shutdownHook); + } catch (IllegalStateException ignored) { + // JVM already shutting down. + } + restore(); + arena.close(); + } + + private void restore() { + try { + out.print(Ansi.SHOW_CURSOR); + out.print(Ansi.EXIT_ALT_SCREEN); + out.print(Ansi.RESET); + out.flush(); + TCSETATTR.invokeExact(STDIN_FD, TCSANOW, savedTermios); + } catch (Throwable ignored) { + // Best-effort: JVM is exiting; nothing useful to do. + } + } + + private static MethodHandle downcall(String name, FunctionDescriptor desc) { + return LINKER.downcallHandle( + LIBC.find(name).orElseThrow(() -> new UnsatisfiedLinkError(name)), + desc); + } + + private static boolean isMac() { + return System.getProperty("os.name", "").toLowerCase().contains("mac"); + } +} diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/Terminal.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/Terminal.java new file mode 100644 index 00000000..425af92c --- /dev/null +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/Terminal.java @@ -0,0 +1,80 @@ +package io.github.dfa1.vortex.inspect.term; + +import java.io.IOException; +import java.util.Optional; + +/// Direct, dependency-free terminal abstraction. +/// +/// Implementations toggle the OS console into raw / non-canonical mode on +/// [#open()] and restore the prior state on [#close()]. Output is plain bytes +/// to {@code System.out}; input is buffered keystrokes from {@code System.in}. +/// +/// Usage: +/// ``` +/// try (Terminal term = Terminal.open()) { +/// term.write(Ansi.CLEAR_SCREEN); +/// Key k = term.readKey(); +/// ... +/// } +/// ``` +public sealed interface Terminal extends AutoCloseable + permits PosixTerminal, WindowsTerminal { + + /// Opens the platform-appropriate raw-mode terminal. + /// + /// Picks [PosixTerminal] on Linux / macOS and [WindowsTerminal] on Windows + /// based on {@code os.name}. + /// + /// @return an open raw terminal handle + /// @throws IOException if the OS-level setup fails + static Terminal open() throws IOException { + String os = System.getProperty("os.name", "").toLowerCase(); + if (os.contains("win")) { + return WindowsTerminal.open(); + } + return PosixTerminal.open(); + } + + /// Current terminal size in cells. + /// + /// @return rows and columns at this moment (re-queried each call) + Size size(); + + /// Writes a string of bytes (ASCII / UTF-8) verbatim to the terminal. + /// + /// @param s text to send (may contain ANSI escapes) + /// @throws IOException if the write fails + void write(String s) throws IOException; + + /// Flushes any buffered output. + /// + /// @throws IOException if flush fails + void flush() throws IOException; + + /// Blocks until a key is available, then returns the decoded event. + /// + /// @return next key + /// @throws IOException if reading fails + Key readKey() throws IOException; + + /// Reads a key with a wall-clock deadline. Returns {@link Optional#empty()} + /// if the timeout elapses before any input is available. + /// + /// @param timeoutMs maximum time to wait, in milliseconds + /// @return the decoded key, or empty on timeout + /// @throws IOException if reading fails + Optional readKey(long timeoutMs) throws IOException; + + /// Restores the original terminal mode and exits the alternate screen. + /// + /// Idempotent - safe to call multiple times. + @Override + void close(); + + /// Terminal dimensions in character cells. + /// + /// @param rows number of rows + /// @param cols number of columns + record Size(int rows, int cols) { + } +} diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/WindowsTerminal.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/WindowsTerminal.java new file mode 100644 index 00000000..5a0c6630 --- /dev/null +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/WindowsTerminal.java @@ -0,0 +1,210 @@ +package io.github.dfa1.vortex.inspect.term; + +import java.io.IOException; +import java.io.PrintStream; +import java.util.Optional; +import java.lang.foreign.Arena; +import java.lang.foreign.FunctionDescriptor; +import java.lang.foreign.Linker; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SymbolLookup; +import java.lang.foreign.ValueLayout; +import java.lang.invoke.MethodHandle; + +/// Windows console raw-mode implementation via kernel32.dll. +/// +/// Toggles {@code ENABLE_VIRTUAL_TERMINAL_PROCESSING} on stdout (Win10 1809+ +/// required) so ANSI sequences in [Ansi] render natively. Stdin runs with +/// line-input + echo + processed-input disabled and VT input enabled so xterm +/// arrow sequences arrive intact. +/// +/// {@code GetConsoleScreenBufferInfo} drives [#size()]; we report the visible +/// window rect, not the scrollback buffer. +public final class WindowsTerminal implements Terminal { + + private static final long STD_INPUT_HANDLE = -10L; + private static final long STD_OUTPUT_HANDLE = -11L; + + private static final int ENABLE_PROCESSED_INPUT = 0x0001; + private static final int ENABLE_LINE_INPUT = 0x0002; + private static final int ENABLE_ECHO_INPUT = 0x0004; + private static final int ENABLE_VIRTUAL_TERMINAL_INPUT = 0x0200; + + private static final int ENABLE_PROCESSED_OUTPUT = 0x0001; + private static final int ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004; + + private static final Linker LINKER = Linker.nativeLinker(); + private static final SymbolLookup KERNEL32 = SymbolLookup.libraryLookup( + "kernel32", Arena.global()); + + private static final MethodHandle GET_STD_HANDLE = downcall("GetStdHandle", + FunctionDescriptor.of(ValueLayout.ADDRESS, ValueLayout.JAVA_LONG)); + private static final MethodHandle GET_CONSOLE_MODE = downcall("GetConsoleMode", + FunctionDescriptor.of(ValueLayout.JAVA_INT, ValueLayout.ADDRESS, ValueLayout.ADDRESS)); + private static final MethodHandle SET_CONSOLE_MODE = downcall("SetConsoleMode", + FunctionDescriptor.of(ValueLayout.JAVA_INT, ValueLayout.ADDRESS, ValueLayout.JAVA_INT)); + private static final MethodHandle GET_CONSOLE_SCREEN_BUFFER_INFO = downcall( + "GetConsoleScreenBufferInfo", + FunctionDescriptor.of(ValueLayout.JAVA_INT, ValueLayout.ADDRESS, ValueLayout.ADDRESS)); + + private final Arena arena; + private final MemorySegment stdoutHandle; + private final int savedInMode; + private final int savedOutMode; + private final PrintStream out; + private final Thread shutdownHook; + private boolean closed; + + private WindowsTerminal(Arena arena, MemorySegment stdoutHandle, + int savedInMode, int savedOutMode) { + this.arena = arena; + this.stdoutHandle = stdoutHandle; + this.savedInMode = savedInMode; + this.savedOutMode = savedOutMode; + this.out = System.out; + this.shutdownHook = new Thread(this::restore, "windows-term-restore"); + Runtime.getRuntime().addShutdownHook(shutdownHook); + } + + /// Enables VT processing on stdout and VT input on stdin. + /// + /// @return open terminal + /// @throws IOException if console handles cannot be obtained or modes set + public static WindowsTerminal open() throws IOException { + Arena arena = Arena.ofShared(); + try { + MemorySegment stdin = invokeHandle(GET_STD_HANDLE, STD_INPUT_HANDLE); + MemorySegment stdout = invokeHandle(GET_STD_HANDLE, STD_OUTPUT_HANDLE); + int inMode = readMode(arena, stdin); + int outMode = readMode(arena, stdout); + + int newIn = (inMode & ~(ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT + | ENABLE_PROCESSED_INPUT)) | ENABLE_VIRTUAL_TERMINAL_INPUT; + int newOut = outMode | ENABLE_VIRTUAL_TERMINAL_PROCESSING | ENABLE_PROCESSED_OUTPUT; + + if ((int) SET_CONSOLE_MODE.invokeExact(stdin, newIn) == 0) { + throw new IOException("SetConsoleMode(stdin) failed"); + } + if ((int) SET_CONSOLE_MODE.invokeExact(stdout, newOut) == 0) { + throw new IOException("SetConsoleMode(stdout) failed"); + } + + WindowsTerminal term = new WindowsTerminal(arena, stdout, inMode, outMode); + term.out.print(Ansi.ENTER_ALT_SCREEN); + term.out.print(Ansi.HIDE_CURSOR); + term.out.print(Ansi.CLEAR_SCREEN); + term.out.flush(); + return term; + } catch (Throwable t) { + arena.close(); + if (t instanceof IOException io) { + throw io; + } + throw new IOException(t); + } + } + + @Override + public Size size() { + // CONSOLE_SCREEN_BUFFER_INFO is 22 bytes: COORD(4) size, COORD(4) cursor, + // WORD(2) attrs, SMALL_RECT(8) window, COORD(4) max. We only need window. + MemorySegment info = arena.allocate(22); + try { + int rc = (int) GET_CONSOLE_SCREEN_BUFFER_INFO.invokeExact(stdoutHandle, info); + if (rc == 0) { + return new Size(24, 80); + } + } catch (Throwable t) { + return new Size(24, 80); + } + int left = info.get(ValueLayout.JAVA_SHORT, 10); + int top = info.get(ValueLayout.JAVA_SHORT, 12); + int right = info.get(ValueLayout.JAVA_SHORT, 14); + int bottom = info.get(ValueLayout.JAVA_SHORT, 16); + int rows = bottom - top + 1; + int cols = right - left + 1; + if (rows <= 0 || cols <= 0) { + return new Size(24, 80); + } + return new Size(rows, cols); + } + + @Override + public void write(String s) { + out.print(s); + } + + @Override + public void flush() { + out.flush(); + } + + @Override + public Key readKey() throws IOException { + return KeyDecoder.next(System.in); + } + + @Override + public Optional readKey(long timeoutMs) throws IOException { + long deadline = System.nanoTime() + timeoutMs * 1_000_000L; + while (System.in.available() == 0) { + if (System.nanoTime() >= deadline) { + return Optional.empty(); + } + try { + Thread.sleep(20); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + return Optional.empty(); + } + } + return Optional.of(KeyDecoder.next(System.in)); + } + + @Override + public void close() { + if (closed) { + return; + } + closed = true; + try { + Runtime.getRuntime().removeShutdownHook(shutdownHook); + } catch (IllegalStateException ignored) { + // JVM already shutting down. + } + restore(); + arena.close(); + } + + private void restore() { + try { + out.print(Ansi.SHOW_CURSOR); + out.print(Ansi.EXIT_ALT_SCREEN); + out.print(Ansi.RESET); + out.flush(); + MemorySegment stdin = invokeHandle(GET_STD_HANDLE, STD_INPUT_HANDLE); + SET_CONSOLE_MODE.invokeExact(stdin, savedInMode); + SET_CONSOLE_MODE.invokeExact(stdoutHandle, savedOutMode); + } catch (Throwable ignored) { + // Best-effort: JVM is exiting; nothing useful to do. + } + } + + private static int readMode(Arena arena, MemorySegment handle) throws Throwable { + MemorySegment slot = arena.allocate(4); + if ((int) GET_CONSOLE_MODE.invokeExact(handle, slot) == 0) { + throw new IOException("GetConsoleMode failed"); + } + return slot.get(ValueLayout.JAVA_INT, 0); + } + + private static MemorySegment invokeHandle(MethodHandle mh, long stdHandle) throws Throwable { + return (MemorySegment) mh.invokeExact(stdHandle); + } + + private static MethodHandle downcall(String name, FunctionDescriptor desc) { + return LINKER.downcallHandle( + KERNEL32.find(name).orElseThrow(() -> new UnsatisfiedLinkError(name)), + desc); + } +} diff --git a/inspector/src/test/java/io/github/dfa1/vortex/inspect/InspectorTreeTest.java b/inspector/src/test/java/io/github/dfa1/vortex/inspect/InspectorTreeTest.java new file mode 100644 index 00000000..cf4398aa --- /dev/null +++ b/inspector/src/test/java/io/github/dfa1/vortex/inspect/InspectorTreeTest.java @@ -0,0 +1,278 @@ +package io.github.dfa1.vortex.inspect; + +import io.github.dfa1.vortex.core.CompressionScheme; +import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.Footer; +import io.github.dfa1.vortex.core.Layout; +import io.github.dfa1.vortex.core.PType; +import io.github.dfa1.vortex.core.SegmentSpec; +import io.github.dfa1.vortex.io.VortexHandle; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import java.util.List; +import java.util.Set; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.BDDMockito.given; + +@ExtendWith(MockitoExtension.class) +class InspectorTreeTest { + + @Mock + VortexHandle handle; + + @Test + void build_withStructDType_assignsFieldNamesToColumns() { + // Given + Layout idLeaf = leaf("vortex.constant", 10); + Layout valLeaf = leaf("vortex.constant", 10); + Layout root = struct(10, List.of(idLeaf, valLeaf)); + DType dtype = new DType.Struct( + List.of("id", "value"), + List.of(new DType.Primitive(PType.I64, false), new DType.Primitive(PType.F64, false)), + false); + givenHandle(dtype, root, List.of("vortex.constant"), List.of()); + + // When + InspectorTree sut = InspectorTree.build(handle); + + // Then + assertThat(sut.root().fieldName()).isEmpty(); + assertThat(sut.root().children()).hasSize(2); + assertThat(sut.root().children().get(0).fieldName()).contains("id"); + assertThat(sut.root().children().get(1).fieldName()).contains("value"); + } + + @Test + void build_withFewerColNamesThanChildren_fillsWithSyntheticNames() { + // Given — defensive path: malformed footer with a struct layout whose child count + // exceeds the dtype's named fields. Should not throw; should fall back to col0/col1... + Layout root = struct(0, List.of(leaf("vortex.constant", 0), leaf("vortex.constant", 0))); + DType dtype = new DType.Struct(List.of("only"), + List.of(new DType.Primitive(PType.I32, false)), false); + givenHandle(dtype, root, List.of("vortex.constant"), List.of()); + + // When + InspectorTree sut = InspectorTree.build(handle); + + // Then + assertThat(sut.root().children().get(0).fieldName()).contains("only"); + assertThat(sut.root().children().get(1).fieldName()).contains("col1"); + } + + @Test + void build_withNonStructRoot_leavesFieldNameEmpty() { + // Given + Layout root = leaf("vortex.flat", 100); + DType dtype = new DType.Primitive(PType.I64, false); + givenHandle(dtype, root, List.of("vortex.flat"), List.of()); + + // When + InspectorTree sut = InspectorTree.build(handle); + + // Then + assertThat(sut.root().fieldName()).isEmpty(); + assertThat(sut.root().children()).isEmpty(); + } + + @Test + void build_sumsSegmentBytesAndCountsSegments() { + // Given + Layout root = leaf("vortex.flat", 0); + DType dtype = new DType.Primitive(PType.I32, false); + List segs = List.of( + new SegmentSpec(0, 128, (byte) 0, CompressionScheme.LZ4), + new SegmentSpec(128, 256, (byte) 0, CompressionScheme.LZ4), + new SegmentSpec(384, 64, (byte) 0, CompressionScheme.LZ4)); + givenHandle(dtype, root, List.of("vortex.flat"), segs); + + // When + InspectorTree sut = InspectorTree.build(handle); + + // Then + assertThat(sut.segmentCount()).isEqualTo(3); + assertThat(sut.totalSegmentBytes()).isEqualTo(128L + 256L + 64L); + assertThat(sut.segmentSpecs()).containsExactlyElementsOf(segs); + } + + @Test + void build_setsTotalRowCountFromRootLayout() { + // Given — total rows is the root layout's row count, regardless of struct/non-struct + Layout root = struct(12_345L, List.of(leaf("vortex.constant", 12_345L))); + DType dtype = new DType.Struct(List.of("c"), + List.of(new DType.Primitive(PType.I32, false)), false); + givenHandle(dtype, root, List.of("vortex.constant"), List.of()); + + // When + InspectorTree sut = InspectorTree.build(handle); + + // Then + assertThat(sut.totalRowCount()).isEqualTo(12_345L); + } + + @Test + void build_carriesVersionAndFileSize() { + // Given + Layout root = leaf("vortex.flat", 0); + DType dtype = new DType.Primitive(PType.I32, false); + given(handle.version()).willReturn(7); + given(handle.fileSize()).willReturn(123_456L); + given(handle.dtype()).willReturn(dtype); + given(handle.layout()).willReturn(root); + given(handle.footer()).willReturn(new Footer(List.of("vortex.flat"), List.of(), List.of(), List.of())); + + // When + InspectorTree sut = InspectorTree.build(handle); + + // Then + assertThat(sut.version()).isEqualTo(7); + assertThat(sut.fileSize()).isEqualTo(123_456L); + } + + @Test + void build_reportsProgressOncePerPeekedSegment() { + // Given — struct of two compressed (skipped) + two uncompressed Flat columns. + // Only uncompressed leaves trigger peekFlatRoot, so progress should fire twice + // with total=2. + Layout c1 = new Layout("vortex.flat", 0, null, List.of(), List.of(0)); + Layout c2 = new Layout("vortex.flat", 0, null, List.of(), List.of(1)); + Layout c3 = new Layout("vortex.flat", 0, null, List.of(), List.of(2)); + Layout root = struct(0, List.of(c1, c2, c3)); + DType dtype = new DType.Struct(List.of("a", "b", "c"), + List.of(new DType.Primitive(PType.I32, false), + new DType.Primitive(PType.I32, false), + new DType.Primitive(PType.I32, false)), + false); + List segs = List.of( + new SegmentSpec(0, 1024, (byte) 0, CompressionScheme.ZSTD), // skipped + new SegmentSpec(1024, 1024, (byte) 0, CompressionScheme.LZ4), // skipped + new SegmentSpec(2048, 1024, (byte) 0, CompressionScheme.LZ4)); // skipped + givenHandle(dtype, root, List.of("vortex.flat"), segs); + + java.util.List reports = new java.util.ArrayList<>(); + + // When + InspectorTree.build(handle, (cur, tot) -> reports.add(new int[]{cur, tot})); + + // Then — all three are compressed, so no peeks fire; progress never called + assertThat(reports).isEmpty(); + } + + @Test + void build_progressNoop_isAcceptedAndProducesSameTree() { + // Given + Layout root = struct(0, List.of(leaf("vortex.constant", 0))); + DType dtype = new DType.Struct(List.of("c"), + List.of(new DType.Primitive(PType.I32, false)), false); + givenHandle(dtype, root, List.of("vortex.constant"), List.of()); + + // When / Then — NOOP passes; no NPE + InspectorTree sut = InspectorTree.build(handle, InspectorTree.Progress.NOOP); + assertThat(sut.root().children()).hasSize(1); + } + + @Test + void buildShallow_skipsAllSlicesAndStillNamesColumns() { + // Given — shallow build is the path the TUI uses; it must touch zero segment + // bytes (so opening a remote file is instant) yet still populate fieldName on + // top-level struct children. + Layout col0 = new Layout("vortex.flat", 10, null, List.of(), List.of(0)); + Layout col1 = new Layout("vortex.flat", 10, null, List.of(), List.of(1)); + Layout root = struct(10, List.of(col0, col1)); + DType dtype = new DType.Struct(List.of("id", "value"), + List.of(new DType.Primitive(PType.I64, false), + new DType.Primitive(PType.F64, false)), + false); + List segs = List.of( + new SegmentSpec(0, 64, (byte) 0, CompressionScheme.NONE), + new SegmentSpec(64, 64, (byte) 0, CompressionScheme.NONE)); + givenHandle(dtype, root, List.of("vortex.flat"), segs); + + // When + InspectorTree sut = InspectorTree.buildShallow(handle); + + // Then — column names assigned, but no peek fired so stats / usedEncodings empty + assertThat(sut.root().children().get(0).fieldName()).contains("id"); + assertThat(sut.root().children().get(1).fieldName()).contains("value"); + assertThat(sut.usedEncodings()).isEmpty(); + assertThat(sut.root().children().get(0).usedEncodings()).isEmpty(); + assertThat(sut.root().children().get(0).stats()).isEqualTo(io.github.dfa1.vortex.core.ArrayStats.empty()); + // Slice is reserved for lazy peek; shallow build must never call it + org.mockito.Mockito.verify(handle, org.mockito.Mockito.never()).slice( + org.mockito.Mockito.anyLong(), org.mockito.Mockito.anyLong()); + } + + @Test + void peek_nonFlatNode_returnsEmptyWithoutSlicing() { + // Given — peek is the lazy hook the TUI uses on the selected node. Non-Flat + // layouts (struct, chunked, stats wrappers) carry no array root and must short + // out without slicing, so navigating to them doesn't hit the network. + Layout structLayout = struct(0, List.of()); + InspectorTree.Node node = new InspectorTree.Node(structLayout, java.util.Optional.empty(), + Set.of(), io.github.dfa1.vortex.core.ArrayStats.empty(), List.of()); + + // When + InspectorTree.Peek result = InspectorTree.peek(node, handle); + + // Then + assertThat(result).isSameAs(InspectorTree.Peek.EMPTY); + org.mockito.Mockito.verify(handle, org.mockito.Mockito.never()).slice( + org.mockito.Mockito.anyLong(), org.mockito.Mockito.anyLong()); + } + + @Test + void peek_compressedFlatSegment_returnsEmptyWithoutSlicing() { + // Given — compressed segments would need the encoding to decompress before + // their FlatBuffer can be parsed; peek skips them rather than slicing garbage. + Layout flat = new Layout("vortex.flat", 10, null, List.of(), List.of(0)); + InspectorTree.Node node = new InspectorTree.Node(flat, java.util.Optional.empty(), + Set.of(), io.github.dfa1.vortex.core.ArrayStats.empty(), List.of()); + given(handle.footer()).willReturn(new io.github.dfa1.vortex.core.Footer( + List.of("vortex.flat"), List.of(), + List.of(new SegmentSpec(0, 100, (byte) 0, CompressionScheme.ZSTD)), + List.of())); + + // When + InspectorTree.Peek result = InspectorTree.peek(node, handle); + + // Then + assertThat(result).isSameAs(InspectorTree.Peek.EMPTY); + org.mockito.Mockito.verify(handle, org.mockito.Mockito.never()).slice( + org.mockito.Mockito.anyLong(), org.mockito.Mockito.anyLong()); + } + + @Test + void build_flatChildWithCompressedSegment_skipsRootEncodingPeek() { + // Given — peekRootEncoding() reads the segment as a FlatBuffer; compressed segments + // are intentionally skipped so a malformed or compressed payload can't crash the + // inspector. With code != NONE we should still build a tree, with no encodings used. + Layout root = new Layout("vortex.flat", 0, null, List.of(), List.of(0)); + DType dtype = new DType.Primitive(PType.I32, false); + SegmentSpec compressed = new SegmentSpec(0, 1024, (byte) 0, CompressionScheme.ZSTD); + givenHandle(dtype, root, List.of("vortex.flat"), List.of(compressed)); + + // When + InspectorTree sut = InspectorTree.build(handle); + + // Then + assertThat(sut.usedEncodings()).isEmpty(); + assertThat(sut.root().usedEncodings()).isEmpty(); + } + + private void givenHandle(DType dtype, Layout layout, List arraySpecs, List segs) { + given(handle.dtype()).willReturn(dtype); + given(handle.layout()).willReturn(layout); + given(handle.footer()).willReturn(new Footer(arraySpecs, List.of(), segs, List.of())); + } + + private static Layout struct(long rows, List children) { + return new Layout("vortex.struct", rows, null, children, List.of()); + } + + private static Layout leaf(String encodingId, long rows) { + return new Layout(encodingId, rows, null, List.of(), List.of()); + } +} diff --git a/inspector/src/test/java/io/github/dfa1/vortex/inspect/VortexInspectorTest.java b/inspector/src/test/java/io/github/dfa1/vortex/inspect/VortexInspectorTest.java new file mode 100644 index 00000000..b1926274 --- /dev/null +++ b/inspector/src/test/java/io/github/dfa1/vortex/inspect/VortexInspectorTest.java @@ -0,0 +1,203 @@ +package io.github.dfa1.vortex.inspect; + +import io.github.dfa1.vortex.core.ArrayStats; +import io.github.dfa1.vortex.core.CompressionScheme; +import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.Layout; +import io.github.dfa1.vortex.core.PType; +import io.github.dfa1.vortex.core.SegmentSpec; +import org.junit.jupiter.api.Test; + +import java.util.List; +import java.util.Optional; +import java.util.Set; + +import static org.assertj.core.api.Assertions.assertThat; + +class VortexInspectorTest { + + @Test + void render_struct_listsHeaderColumnsAndUsedEncodings() { + // Given + InspectorTree sut = struct2col(2, 4096L, + List.of( + new SegmentSpec(0, 512, (byte) 0, CompressionScheme.NONE), + new SegmentSpec(512, 512, (byte) 0, CompressionScheme.LZ4)), + Set.of("fastlanes.bitpacked", "vortex.constant")); + + // When + String out = VortexInspector.render(sut); + + // Then + assertThat(out) + .contains("Vortex v2") + .contains("4.0 KB") + .contains("1000 rows") + .contains("Schema:") + .contains("id") + .contains("value") + .contains("Registered encodings: vortex.flat, fastlanes.bitpacked, vortex.constant") + .contains("Used encodings: ") + .contains("Segments: 2") + .contains("[0] off=0 len=512 B compression=NONE") + .contains("[1] off=512 len=512 B compression=LZ4") + .contains("Layout:") + .contains("struct (1000 rows)") + .contains("[fastlanes.bitpacked]"); + } + + @Test + void render_segmentTable_listsEverySegment() { + // Given — verify table prints one line per segment with offset + size + compression + List specs = List.of( + new SegmentSpec(0, 1024, (byte) 0, CompressionScheme.ZSTD), + new SegmentSpec(1024, 2048, (byte) 0, CompressionScheme.NONE), + new SegmentSpec(3072, 4096, (byte) 0, CompressionScheme.LZ4)); + InspectorTree sut = struct2col(1, 8192L, specs, Set.of()); + + // When + String out = VortexInspector.render(sut); + + // Then + assertThat(out) + .contains("[0] off=0 len=1.0 KB compression=ZSTD") + .contains("[1] off=1024 len=2.0 KB compression=NONE") + .contains("[2] off=3072 len=4.0 KB compression=LZ4"); + } + + @Test + void render_nonStruct_inlinesSingleColumnLayout() { + // Given + Layout leaf = new Layout("vortex.flat", 100, null, List.of(), List.of()); + InspectorTree.Node root = new InspectorTree.Node(leaf, Optional.empty(), Set.of(), ArrayStats.empty(), List.of()); + InspectorTree sut = new InspectorTree( + 1, 256L, + new DType.Primitive(PType.I32, false), + List.of("vortex.flat"), Set.of(), + List.of(new SegmentSpec(0, 256, (byte) 0, CompressionScheme.NONE)), + 100L, root); + + // When + String out = VortexInspector.render(sut); + + // Then + assertThat(out).contains("vortex.flat(100 rows)"); + assertThat(out).doesNotContain("struct ("); + } + + @Test + void render_formatsBytesAcrossUnits() { + // Given — bytes / KB / MB boundaries + List oneSeg = List.of(new SegmentSpec(0, 1, (byte) 0, CompressionScheme.NONE)); + InspectorTree small = struct2col(1, 512L, oneSeg, Set.of()); + InspectorTree medium = struct2col(1, 2048L, oneSeg, Set.of()); + InspectorTree large = struct2col(1, 5L * 1024 * 1024, oneSeg, Set.of()); + + // When / Then + assertThat(VortexInspector.render(small)).contains("512 B"); + assertThat(VortexInspector.render(medium)).contains("2.0 KB"); + assertThat(VortexInspector.render(large)).contains("5.0 MB"); + } + + @Test + void render_chainsChildrenWithArrow() { + // Given — nested zoned → chunked → flat chain + Layout flat = new Layout("vortex.flat", 1000, null, List.of(), List.of()); + Layout chunked = new Layout("vortex.chunked", 1000, null, List.of(flat), List.of()); + Layout zoned = new Layout("vortex.stats", 1000, null, List.of(chunked), List.of()); + Layout structLayout = new Layout("vortex.struct", 1000, null, List.of(zoned), List.of()); + + InspectorTree.Node flatN = new InspectorTree.Node(flat, Optional.empty(), Set.of(), ArrayStats.empty(), List.of()); + InspectorTree.Node chunkedN = new InspectorTree.Node(chunked, Optional.empty(), Set.of(), ArrayStats.empty(), List.of(flatN)); + InspectorTree.Node zonedN = new InspectorTree.Node(zoned, Optional.of("v"), Set.of(), ArrayStats.empty(), List.of(chunkedN)); + InspectorTree.Node rootN = new InspectorTree.Node(structLayout, Optional.empty(), Set.of(), ArrayStats.empty(), List.of(zonedN)); + + InspectorTree sut = new InspectorTree( + 1, 1024L, + new DType.Struct(List.of("v"), List.of(new DType.Primitive(PType.I32, false)), false), + List.of("vortex.flat"), Set.of(), + List.of(), 1000L, rootN); + + // When + String out = VortexInspector.render(sut); + + // Then + assertThat(out).contains("vortex.stats(1000 rows) → vortex.chunked(1000 rows) → vortex.flat(1000 rows)"); + } + + @Test + void render_aggregatesMinMaxAcrossChunks() { + // Given — column with two chunked Flat leaves; aggregate should fold each leaf's stats + Layout chunk1 = new Layout("vortex.flat", 500, null, List.of(), List.of()); + Layout chunk2 = new Layout("vortex.flat", 500, null, List.of(), List.of()); + Layout chunked = new Layout("vortex.chunked", 1000, null, List.of(chunk1, chunk2), List.of()); + Layout structLayout = new Layout("vortex.struct", 1000, null, List.of(chunked), List.of()); + + InspectorTree.Node c1 = new InspectorTree.Node(chunk1, Optional.empty(), Set.of(), + new ArrayStats(10L, 50L, null, null, null, null), List.of()); + InspectorTree.Node c2 = new InspectorTree.Node(chunk2, Optional.empty(), Set.of(), + new ArrayStats(5L, 100L, null, null, null, null), List.of()); + InspectorTree.Node chunkedN = new InspectorTree.Node(chunked, Optional.of("id"), + Set.of("vortex.flat"), ArrayStats.empty(), List.of(c1, c2)); + InspectorTree.Node rootN = new InspectorTree.Node(structLayout, Optional.empty(), + Set.of("vortex.flat"), ArrayStats.empty(), List.of(chunkedN)); + + InspectorTree sut = new InspectorTree(1, 1024L, + new DType.Struct(List.of("id"), List.of(new DType.Primitive(PType.I64, false)), false), + List.of("vortex.flat"), Set.of(), List.of(), 1000L, rootN); + + // When + String out = VortexInspector.render(sut); + + // Then — min over (10, 5) = 5; max over (50, 100) = 100 + assertThat(out).contains("min=5 max=100"); + } + + @Test + void render_columnWithoutStats_omitsMinMax() { + // Given — default tree has ArrayStats.empty() on every node + InspectorTree sut = struct2col(1, 100L, List.of(), Set.of()); + + // When + String out = VortexInspector.render(sut); + + // Then + assertThat(out).doesNotContain("min="); + assertThat(out).doesNotContain("max="); + } + + @Test + void render_emptyUsedEncodings_omitsBracketSuffix() { + // Given — column with no resolved encodings should not emit " []" noise + InspectorTree sut = struct2col(1, 100L, List.of(), Set.of()); + + // When + String out = VortexInspector.render(sut); + + // Then + assertThat(out).doesNotContain(" []"); + } + + private static InspectorTree struct2col(int version, long fileSize, List specs, Set usedById) { + Layout idLeaf = new Layout("fastlanes.bitpacked", 1000, null, List.of(), List.of()); + Layout valLeaf = new Layout("vortex.constant", 1000, null, List.of(), List.of()); + Layout root = new Layout("vortex.struct", 1000, null, List.of(idLeaf, valLeaf), List.of()); + + InspectorTree.Node idNode = new InspectorTree.Node(idLeaf, + Optional.of("id"), Set.of("fastlanes.bitpacked"), ArrayStats.empty(), List.of()); + InspectorTree.Node valNode = new InspectorTree.Node(valLeaf, + Optional.of("value"), Set.of("vortex.constant"), ArrayStats.empty(), List.of()); + InspectorTree.Node rootNode = new InspectorTree.Node(root, + Optional.empty(), Set.of("fastlanes.bitpacked", "vortex.constant"), + ArrayStats.empty(), List.of(idNode, valNode)); + + DType dtype = new DType.Struct( + List.of("id", "value"), + List.of(new DType.Primitive(PType.I64, false), new DType.Primitive(PType.F64, false)), + false); + + return new InspectorTree(version, fileSize, dtype, + List.of("vortex.flat", "fastlanes.bitpacked", "vortex.constant"), + usedById, specs, 1000L, rootNode); + } +} diff --git a/inspector/src/test/java/io/github/dfa1/vortex/inspect/term/AnsiTest.java b/inspector/src/test/java/io/github/dfa1/vortex/inspect/term/AnsiTest.java new file mode 100644 index 00000000..7ab3c4c2 --- /dev/null +++ b/inspector/src/test/java/io/github/dfa1/vortex/inspect/term/AnsiTest.java @@ -0,0 +1,42 @@ +package io.github.dfa1.vortex.inspect.term; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; + +class AnsiTest { + + private static final char ESC = (char) 0x1B; + + @Test + void escConstant_isAsciiEscapeByte() { + // Given / When / Then — every CSI sequence relies on this being 0x1B + assertThat(Ansi.ESC).isEqualTo(String.valueOf(ESC)); + } + + @Test + void moveTo_formatsRowAndColumn() { + // Given / When / Then + assertThat(Ansi.moveTo(5, 12)).isEqualTo(ESC + "[5;12H"); + } + + @Test + void fgAndBg_emitSgrCode() { + // Given / When / Then + assertThat(Ansi.fg(31)).isEqualTo(ESC + "[31m"); + assertThat(Ansi.bg(42)).isEqualTo(ESC + "[42m"); + } + + @Test + void clearAndCursorConstants_startWithCsi() { + // Given / When / Then — guard against accidental edits dropping the ESC prefix + String csi = ESC + "["; + assertThat(Ansi.CLEAR_SCREEN).startsWith(csi).endsWith("2J"); + assertThat(Ansi.CURSOR_HOME).startsWith(csi).endsWith("H"); + assertThat(Ansi.HIDE_CURSOR).isEqualTo(csi + "?25l"); + assertThat(Ansi.SHOW_CURSOR).isEqualTo(csi + "?25h"); + assertThat(Ansi.ENTER_ALT_SCREEN).isEqualTo(csi + "?1049h"); + assertThat(Ansi.EXIT_ALT_SCREEN).isEqualTo(csi + "?1049l"); + assertThat(Ansi.RESET).isEqualTo(csi + "0m"); + } +} diff --git a/inspector/src/test/java/io/github/dfa1/vortex/inspect/term/KeyDecoderTest.java b/inspector/src/test/java/io/github/dfa1/vortex/inspect/term/KeyDecoderTest.java new file mode 100644 index 00000000..69009f5a --- /dev/null +++ b/inspector/src/test/java/io/github/dfa1/vortex/inspect/term/KeyDecoderTest.java @@ -0,0 +1,128 @@ +package io.github.dfa1.vortex.inspect.term; + +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayInputStream; +import java.io.IOException; + +import static org.assertj.core.api.Assertions.assertThat; + +class KeyDecoderTest { + + @Test + void next_arrowUp_decodesCsiA() throws IOException { + // Given + ByteArrayInputStream in = bytes(0x1B, '[', 'A'); + + // When + Key sut = KeyDecoder.next(in); + + // Then + assertThat(sut).isEqualTo(Key.ArrowUp.INSTANCE); + } + + @Test + void next_allArrows_decodeIndependently() throws IOException { + // Given / When / Then + assertThat(KeyDecoder.next(bytes(0x1B, '[', 'A'))).isEqualTo(Key.ArrowUp.INSTANCE); + assertThat(KeyDecoder.next(bytes(0x1B, '[', 'B'))).isEqualTo(Key.ArrowDown.INSTANCE); + assertThat(KeyDecoder.next(bytes(0x1B, '[', 'C'))).isEqualTo(Key.ArrowRight.INSTANCE); + assertThat(KeyDecoder.next(bytes(0x1B, '[', 'D'))).isEqualTo(Key.ArrowLeft.INSTANCE); + } + + @Test + void next_homeAndEnd_decodeBothCsiAndTildeForms() throws IOException { + // Given / When / Then — xterm sends ESC[H/F; rxvt and others send ESC[1~/4~ + assertThat(KeyDecoder.next(bytes(0x1B, '[', 'H'))).isEqualTo(Key.Home.INSTANCE); + assertThat(KeyDecoder.next(bytes(0x1B, '[', 'F'))).isEqualTo(Key.End.INSTANCE); + assertThat(KeyDecoder.next(bytes(0x1B, '[', '1', '~'))).isEqualTo(Key.Home.INSTANCE); + assertThat(KeyDecoder.next(bytes(0x1B, '[', '4', '~'))).isEqualTo(Key.End.INSTANCE); + } + + @Test + void next_pageUpAndDown_decodeTildeSequences() throws IOException { + // Given / When / Then + assertThat(KeyDecoder.next(bytes(0x1B, '[', '5', '~'))).isEqualTo(Key.PageUp.INSTANCE); + assertThat(KeyDecoder.next(bytes(0x1B, '[', '6', '~'))).isEqualTo(Key.PageDown.INSTANCE); + } + + @Test + void next_bareEscape_returnsEscapeWhenNoFollowupAvailable() throws IOException { + // Given — single ESC byte with no further input + ByteArrayInputStream in = bytes(0x1B); + + // When + Key sut = KeyDecoder.next(in); + + // Then + assertThat(sut).isEqualTo(Key.Escape.INSTANCE); + } + + @Test + void next_enterFromCrAndLf_bothDecodeToEnter() throws IOException { + // Given / When / Then + assertThat(KeyDecoder.next(bytes('\r'))).isEqualTo(Key.Enter.INSTANCE); + assertThat(KeyDecoder.next(bytes('\n'))).isEqualTo(Key.Enter.INSTANCE); + } + + @Test + void next_printableChar_returnsChar() throws IOException { + // Given + ByteArrayInputStream in = bytes('q'); + + // When + Key sut = KeyDecoder.next(in); + + // Then + assertThat(sut).isInstanceOf(Key.Char.class); + assertThat(((Key.Char) sut).value()).isEqualTo('q'); + } + + @Test + void next_eof_returnsEof() throws IOException { + // Given — empty stream + ByteArrayInputStream in = bytes(); + + // When + Key sut = KeyDecoder.next(in); + + // Then + assertThat(sut).isEqualTo(Key.Eof.INSTANCE); + } + + @Test + void next_unknownCsiLetter_yieldsEscape() throws IOException { + // Given — ESC [ Z is xterm reverse-tab; we don't recognise it + ByteArrayInputStream in = bytes(0x1B, '[', 'Z'); + + // When + Key sut = KeyDecoder.next(in); + + // Then — defensive: never emit garbage as Char on an unknown CSI + assertThat(sut).isEqualTo(Key.Escape.INSTANCE); + } + + @Test + void next_multiDigitTildeCode_handlesTwoDigits() throws IOException { + // Given — ESC [ 15 ~ is xterm F5; we treat unknown numbers as Escape but + // must still consume the trailing '~' rather than leak it as a character + ByteArrayInputStream in = bytes(0x1B, '[', '1', '5', '~', 'x'); + + // When + Key first = KeyDecoder.next(in); + Key second = KeyDecoder.next(in); + + // Then + assertThat(first).isEqualTo(Key.Escape.INSTANCE); + assertThat(second).isInstanceOf(Key.Char.class); + assertThat(((Key.Char) second).value()).isEqualTo('x'); + } + + private static ByteArrayInputStream bytes(int... bs) { + byte[] out = new byte[bs.length]; + for (int i = 0; i < bs.length; i++) { + out[i] = (byte) bs[i]; + } + return new ByteArrayInputStream(out); + } +} diff --git a/inspector/src/test/java/io/github/dfa1/vortex/inspect/term/WindowsTerminalSmokeTest.java b/inspector/src/test/java/io/github/dfa1/vortex/inspect/term/WindowsTerminalSmokeTest.java new file mode 100644 index 00000000..3bb9dfbf --- /dev/null +++ b/inspector/src/test/java/io/github/dfa1/vortex/inspect/term/WindowsTerminalSmokeTest.java @@ -0,0 +1,63 @@ +package io.github.dfa1.vortex.inspect.term; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledOnOs; +import org.junit.jupiter.api.condition.OS; + +import static org.assertj.core.api.Assertions.assertThat; + +/// Smoke test for the FFM-based Windows console binding. +/// +/// Runs only on Windows (other OSes lack kernel32). The goal is to catch +/// missing-symbol / signature-mismatch regressions in CI without requiring +/// a real interactive TTY: +/// +/// - Class load alone forces every {@code Linker.downcallHandle} to resolve +/// its kernel32 symbol. A missing entry point throws +/// {@link UnsatisfiedLinkError} during static initialization. +/// - Bit-flag math for the VT mode toggles is verified directly so a typo +/// in a constant fails here, not in a customer's terminal. +class WindowsTerminalSmokeTest { + + @Test + @EnabledOnOs(OS.WINDOWS) + void classLoad_resolvesEveryKernel32Symbol() { + // Given / When — touching the class triggers , which calls + // Linker.downcallHandle for every imported kernel32 function. + Class sut = WindowsTerminal.class; + + // Then + assertThat(sut).isNotNull(); + assertThat(sut.getDeclaredMethods()).isNotEmpty(); + } + + @Test + @EnabledOnOs(OS.WINDOWS) + void modeFlagMath_inputModeMasksLineEchoProcessed_andSetsVtInput() { + // Given — typical default cmd.exe input mode: line + echo + processed input enabled + int defaultInMode = 0x0001 | 0x0002 | 0x0004; // PROCESSED | LINE | ECHO + + // When — same transform that WindowsTerminal.open applies + int raw = (defaultInMode & ~(0x0002 | 0x0004 | 0x0001)) | 0x0200; + + // Then — line / echo / processed cleared, VT input set + assertThat(raw & 0x0002).isZero(); + assertThat(raw & 0x0004).isZero(); + assertThat(raw & 0x0001).isZero(); + assertThat(raw & 0x0200).isEqualTo(0x0200); + } + + @Test + @EnabledOnOs(OS.WINDOWS) + void modeFlagMath_outputModeAddsVtProcessing() { + // Given — default output mode + int defaultOutMode = 0x0001; // PROCESSED_OUTPUT only + + // When + int withVt = defaultOutMode | 0x0004 | 0x0001; + + // Then + assertThat(withVt & 0x0004).isEqualTo(0x0004); + assertThat(withVt & 0x0001).isEqualTo(0x0001); + } +} diff --git a/integration/pom.xml b/integration/pom.xml index c4e2f978..2c33101b 100644 --- a/integration/pom.xml +++ b/integration/pom.xml @@ -38,6 +38,11 @@ vortex-reader test + + io.github.dfa1.vortex + vortex-inspector + test + dev.vortex vortex-jni diff --git a/integration/src/test/java/io/github/dfa1/vortex/integration/InspectForTest.java b/integration/src/test/java/io/github/dfa1/vortex/integration/InspectForTest.java index 7f749562..885cb767 100644 --- a/integration/src/test/java/io/github/dfa1/vortex/integration/InspectForTest.java +++ b/integration/src/test/java/io/github/dfa1/vortex/integration/InspectForTest.java @@ -2,7 +2,7 @@ import io.github.dfa1.vortex.encoding.EncodingRegistry; -import io.github.dfa1.vortex.io.VortexInspector; +import io.github.dfa1.vortex.inspect.VortexInspector; import io.github.dfa1.vortex.io.VortexReader; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; diff --git a/integration/src/test/java/io/github/dfa1/vortex/integration/OhlcEncodingInspectionIntegrationTest.java b/integration/src/test/java/io/github/dfa1/vortex/integration/OhlcEncodingInspectionIntegrationTest.java index b98a2153..8ead05dc 100644 --- a/integration/src/test/java/io/github/dfa1/vortex/integration/OhlcEncodingInspectionIntegrationTest.java +++ b/integration/src/test/java/io/github/dfa1/vortex/integration/OhlcEncodingInspectionIntegrationTest.java @@ -5,7 +5,7 @@ import dev.vortex.arrow.ArrowAllocation; import dev.vortex.jni.NativeLoader; import io.github.dfa1.vortex.encoding.EncodingRegistry; -import io.github.dfa1.vortex.io.VortexInspector; +import io.github.dfa1.vortex.inspect.VortexInspector; import io.github.dfa1.vortex.io.VortexReader; import org.apache.arrow.c.ArrowArray; import org.apache.arrow.c.ArrowSchema; diff --git a/integration/src/test/java/io/github/dfa1/vortex/integration/RustJavaReaderComparisonIntegrationTest.java b/integration/src/test/java/io/github/dfa1/vortex/integration/RustJavaReaderComparisonIntegrationTest.java index ee9f4839..30c8844f 100644 --- a/integration/src/test/java/io/github/dfa1/vortex/integration/RustJavaReaderComparisonIntegrationTest.java +++ b/integration/src/test/java/io/github/dfa1/vortex/integration/RustJavaReaderComparisonIntegrationTest.java @@ -19,7 +19,7 @@ import io.github.dfa1.vortex.core.array.ShortArray; import io.github.dfa1.vortex.core.array.VarBinArray; import io.github.dfa1.vortex.encoding.EncodingRegistry; -import io.github.dfa1.vortex.io.VortexInspector; +import io.github.dfa1.vortex.inspect.VortexInspector; import io.github.dfa1.vortex.io.VortexReader; import io.github.dfa1.vortex.scan.Chunk; import org.apache.arrow.memory.BufferAllocator; diff --git a/integration/src/test/java/io/github/dfa1/vortex/integration/VortexInspectorIntegrationTest.java b/integration/src/test/java/io/github/dfa1/vortex/integration/VortexInspectorIntegrationTest.java index f3d7ed76..160a9599 100644 --- a/integration/src/test/java/io/github/dfa1/vortex/integration/VortexInspectorIntegrationTest.java +++ b/integration/src/test/java/io/github/dfa1/vortex/integration/VortexInspectorIntegrationTest.java @@ -5,7 +5,7 @@ import dev.vortex.arrow.ArrowAllocation; import dev.vortex.jni.NativeLoader; import io.github.dfa1.vortex.encoding.EncodingRegistry; -import io.github.dfa1.vortex.io.VortexInspector; +import io.github.dfa1.vortex.inspect.VortexInspector; import io.github.dfa1.vortex.io.VortexReader; import org.apache.arrow.c.ArrowArray; import org.apache.arrow.c.ArrowSchema; diff --git a/performance/pom.xml b/performance/pom.xml index 1794cf4b..6bac78f8 100644 --- a/performance/pom.xml +++ b/performance/pom.xml @@ -39,6 +39,11 @@ vortex-reader compile + + io.github.dfa1.vortex + vortex-inspector + compile + dev.vortex vortex-jni diff --git a/performance/src/main/java/io/github/dfa1/vortex/performance/TaxiLayoutInspector.java b/performance/src/main/java/io/github/dfa1/vortex/performance/TaxiLayoutInspector.java index a5b1a9de..77c8cd5d 100644 --- a/performance/src/main/java/io/github/dfa1/vortex/performance/TaxiLayoutInspector.java +++ b/performance/src/main/java/io/github/dfa1/vortex/performance/TaxiLayoutInspector.java @@ -9,7 +9,7 @@ import dev.vortex.arrow.ArrowAllocation; import dev.vortex.jni.NativeLoader; import io.github.dfa1.vortex.encoding.EncodingRegistry; -import io.github.dfa1.vortex.io.VortexInspector; +import io.github.dfa1.vortex.inspect.VortexInspector; import io.github.dfa1.vortex.io.VortexReader; import io.github.dfa1.vortex.parquet.ImportOptions; import io.github.dfa1.vortex.parquet.ParquetImporter; diff --git a/pom.xml b/pom.xml index 9326d963..1f96b9b6 100644 --- a/pom.xml +++ b/pom.xml @@ -46,6 +46,7 @@ parquet bom cli + inspector integration performance @@ -109,6 +110,11 @@ vortex-parquet ${project.version} + + io.github.dfa1.vortex + vortex-inspector + ${project.version} + de.siegmar fastcsv diff --git a/reader/src/main/java/io/github/dfa1/vortex/io/VortexHandle.java b/reader/src/main/java/io/github/dfa1/vortex/io/VortexHandle.java index b48ffc59..1118cd6b 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/io/VortexHandle.java +++ b/reader/src/main/java/io/github/dfa1/vortex/io/VortexHandle.java @@ -3,6 +3,7 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.Footer; import io.github.dfa1.vortex.core.Layout; +import io.github.dfa1.vortex.encoding.EncodingRegistry; import io.github.dfa1.vortex.scan.ScanIterator; import io.github.dfa1.vortex.scan.ScanOptions; @@ -29,8 +30,8 @@ public interface VortexHandle extends Closeable { /// ///

Internal escape hatch. This method is on the public /// {@link VortexHandle} interface only because {@link io.github.dfa1.vortex.scan.ScanIterator} - /// and {@link VortexInspector} live in sibling packages and need cross-package access to the - /// raw backing segment. It is not part of the supported stability contract; signatures and + /// and the inspector module's {@code VortexInspector} live in sibling packages and need + /// cross-package access to the raw backing segment. It is not part of the supported stability contract; signatures and /// semantics may change without a deprecation cycle. Application code should rely on /// {@link #scan(ScanOptions)} and the typed array accessors instead. /// @@ -44,6 +45,17 @@ public interface VortexHandle extends Closeable { ScanIterator scan(ScanOptions options); + /// Returns the {@link EncodingRegistry} this handle was opened with. + /// + ///

Internal escape hatch. Exposed for tooling + /// (e.g. the inspector's dictionary preview) that needs to decode an + /// internal subtree node directly via {@link io.github.dfa1.vortex.encoding.FlatSegmentDecoder}. + /// Not part of the supported stability contract; signatures may change + /// without deprecation. + /// + /// @return the registry used to resolve encoding ids during scan + EncodingRegistry registry(); + @Override void close(); } diff --git a/reader/src/main/java/io/github/dfa1/vortex/io/VortexHttpReader.java b/reader/src/main/java/io/github/dfa1/vortex/io/VortexHttpReader.java index 8fe1a037..90f40e85 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/io/VortexHttpReader.java +++ b/reader/src/main/java/io/github/dfa1/vortex/io/VortexHttpReader.java @@ -240,6 +240,11 @@ public ScanIterator scan(ScanOptions options) { return new ScanIterator(this, registry, options); } + @Override + public EncodingRegistry registry() { + return registry; + } + @Override public void close() { arena.close(); diff --git a/reader/src/main/java/io/github/dfa1/vortex/io/VortexInspector.java b/reader/src/main/java/io/github/dfa1/vortex/io/VortexInspector.java deleted file mode 100644 index 1c7eae20..00000000 --- a/reader/src/main/java/io/github/dfa1/vortex/io/VortexInspector.java +++ /dev/null @@ -1,175 +0,0 @@ -package io.github.dfa1.vortex.io; - -import io.github.dfa1.vortex.core.DType; -import io.github.dfa1.vortex.core.Footer; -import io.github.dfa1.vortex.core.Layout; -import io.github.dfa1.vortex.core.SegmentSpec; - -import java.lang.foreign.MemorySegment; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Set; - -/// Produces a human-readable summary of a Vortex file's structure and encodings. -public final class VortexInspector { - - private VortexInspector() { - } - - public static String inspect(VortexHandle reader) { - Footer footer = reader.footer(); - Layout layout = reader.layout(); - DType dtype = reader.dtype(); - - var sb = new StringBuilder(); - - sb.append("Vortex v").append(reader.version()) - .append(" ").append(formatBytes(reader.fileSize())).append('\n'); - sb.append('\n'); - - sb.append("Schema:\n"); - appendSchema(sb, dtype, " "); - sb.append('\n'); - - sb.append("Registered encodings: ").append(String.join(", ", footer.arraySpecs())).append('\n'); - sb.append('\n'); - - Set usedEncodings = collectUsedEncodings(reader); - sb.append("Used encodings: ").append(String.join(", ", usedEncodings)).append('\n'); - sb.append('\n'); - - int segCount = footer.segmentSpecs().size(); - long totalBytes = footer.segmentSpecs().stream().mapToLong(SegmentSpec::length).sum(); - sb.append("Segments: ").append(segCount) - .append(" total ").append(formatBytes(totalBytes)).append('\n'); - sb.append('\n'); - - sb.append("Layout:\n"); - List colNames = (dtype instanceof DType.Struct s) ? s.fieldNames() : List.of(); - appendLayout(sb, layout, colNames, reader, " "); - - return sb.toString(); - } - - // ── Used encodings ──────────────────────────────────────────────────────── - - private static Set collectUsedEncodings(VortexHandle reader) { - var used = new LinkedHashSet(); - collectLayoutEncodings(reader.layout(), reader, used); - return used; - } - - private static void collectLayoutEncodings(Layout layout, VortexHandle reader, Set used) { - if (layout.isFlat() && !layout.segments().isEmpty()) { - int segIdx = layout.segments().getFirst(); - SegmentSpec spec = reader.footer().segmentSpecs().get(segIdx); - if (spec.compression().code == 0) { - MemorySegment seg = reader.slice(spec.offset(), spec.length()); - peekRootEncoding(seg, reader.footer().arraySpecs(), used); - } - } - for (Layout child : layout.children()) { - collectLayoutEncodings(child, reader, used); - } - } - - /// Reads only the root ArrayNode encoding — ignores child/stats sub-nodes. - private static void peekRootEncoding(MemorySegment seg, List arraySpecs, Set used) { - int segLen = (int) seg.byteSize(); - ByteBuffer bb = seg.asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); - int fbLen = bb.getInt(segLen - 4); - int fbStart = segLen - 4 - fbLen; - ByteBuffer fbBuf = bb.slice(fbStart, fbLen).order(ByteOrder.LITTLE_ENDIAN); - var fbArray = io.github.dfa1.vortex.fbs.Array.getRootAsArray(fbBuf); - if (fbArray.root() != null) { - used.add(arraySpecs.get(fbArray.root().encoding())); - } - } - - // ── Layout tree ─────────────────────────────────────────────────────────── - - @SuppressWarnings("SameParameterValue") - private static void appendLayout(StringBuilder sb, Layout layout, List colNames, - VortexHandle reader, String indent) { - if (layout.isStruct()) { - sb.append(indent).append("struct (").append(layout.rowCount()).append(" rows)\n"); - for (int i = 0; i < layout.children().size(); i++) { - String name = i < colNames.size() ? colNames.get(i) : "col" + i; - Set colEncodings = new LinkedHashSet<>(); - collectLayoutEncodings(layout.children().get(i), reader, colEncodings); - sb.append(indent).append(" ").append(name).append(": "); - appendLayoutInline(sb, layout.children().get(i)); - if (!colEncodings.isEmpty()) { - sb.append(" [").append(String.join(", ", colEncodings)).append("]"); - } - sb.append('\n'); - } - } else { - sb.append(indent); - appendLayoutInline(sb, layout); - sb.append('\n'); - } - } - - private static void appendLayoutInline(StringBuilder sb, Layout layout) { - sb.append(layout.encodingId()).append('(').append(layout.rowCount()).append(" rows)"); - if (layout.children().isEmpty()) { - return; - } - sb.append(" → "); - if (layout.children().size() == 1) { - appendLayoutInline(sb, layout.children().getFirst()); - } else { - sb.append(layout.children().size()).append("× ["); - appendLayoutInline(sb, layout.children().getFirst()); - sb.append("]"); - } - } - - // ── Formatting ──────────────────────────────────────────────────────────── - - @SuppressWarnings("SameParameterValue") - private static void appendSchema(StringBuilder sb, DType dtype, String indent) { - if (dtype instanceof DType.Struct s) { - int maxLen = s.fieldNames().stream().mapToInt(String::length).max().orElse(0); - for (int i = 0; i < s.fieldNames().size(); i++) { - String name = s.fieldNames().get(i); - sb.append(indent).append(name) - .append(" ".repeat(maxLen - name.length() + 1)) - .append(formatDType(s.fieldTypes().get(i))).append('\n'); - } - } else { - sb.append(indent).append(formatDType(dtype)).append('\n'); - } - } - - private static String formatDType(DType dtype) { - return switch (dtype) { - case DType.Primitive(var pt, var nullable) -> pt.name() + (nullable ? "?" : ""); - case DType.Utf8(var nullable) -> "utf8" + (nullable ? "?" : ""); - case DType.Binary(var nullable) -> "binary" + (nullable ? "?" : ""); - case DType.Bool(var nullable) -> "bool" + (nullable ? "?" : ""); - case DType.Null ignored -> "null"; - case DType.Decimal(var p, var s, var nullable) -> "decimal(" + p + "," + s + ")" + (nullable ? "?" : ""); - case DType.Struct ignored -> "struct"; - case DType.List(var elem, var nullable) -> "list<" + formatDType(elem) + ">" + (nullable ? "?" : ""); - case DType.FixedSizeList(var elem, var size, var nullable) -> - "list<" + formatDType(elem) + ">[" + size + "]" + (nullable ? "?" : ""); - case DType.Extension(var id, var storage, var meta, var nullable) -> - "ext<" + id + ">" + (nullable ? "?" : ""); - case DType.Variant(var nullable) -> "variant" + (nullable ? "?" : ""); - }; - } - - private static String formatBytes(long bytes) { - if (bytes < 1024) { - return bytes + " B"; - } - if (bytes < 1024 * 1024) { - return String.format("%.1f KB", bytes / 1024.0); - } - return String.format("%.1f MB", bytes / (1024.0 * 1024.0)); - } -} diff --git a/reader/src/main/java/io/github/dfa1/vortex/io/VortexReader.java b/reader/src/main/java/io/github/dfa1/vortex/io/VortexReader.java index cd7fec87..7c8b9ce3 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/io/VortexReader.java +++ b/reader/src/main/java/io/github/dfa1/vortex/io/VortexReader.java @@ -163,6 +163,11 @@ public ScanIterator scan(ScanOptions options) { return new ScanIterator(this, registry, options); } + @Override + public EncodingRegistry registry() { + return registry; + } + /// Aggregated per-column statistics (global min/max across all chunks). /// Returns an empty map if the root layout is not a struct. /// Columns with no embedded stats return [ArrayStats#empty()]. diff --git a/reader/src/main/java/io/github/dfa1/vortex/scan/ScanIterator.java b/reader/src/main/java/io/github/dfa1/vortex/scan/ScanIterator.java index 235f7840..e96897ef 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/scan/ScanIterator.java +++ b/reader/src/main/java/io/github/dfa1/vortex/scan/ScanIterator.java @@ -13,6 +13,7 @@ import io.github.dfa1.vortex.core.array.DoubleArray; import io.github.dfa1.vortex.core.array.EmptyArray; import io.github.dfa1.vortex.core.array.FloatArray; +import io.github.dfa1.vortex.core.array.GenericArray; import io.github.dfa1.vortex.core.array.IntArray; import io.github.dfa1.vortex.core.array.LongArray; import io.github.dfa1.vortex.core.array.MaskedArray; @@ -265,6 +266,7 @@ private static Array truncateArray(Array arr, long rows) { yield new MaskedArray(truncChild, truncValidity); } case EmptyArray a -> a; + case GenericArray a -> a.withLength(rows); default -> throw new VortexException("limit: truncation not supported for " + arr.getClass().getSimpleName()); };