From da610c7739ccaa881f39e1df769d1b3aa1d5acb5 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Mon, 8 Jun 2026 18:27:43 +0200 Subject: [PATCH 01/37] feat(inspector): extract inspector module with Lanterna TUI Moves VortexInspector out of reader into a new vortex-inspector module and adds InspectorTree (immutable structural snapshot) plus a Lanterna two-pane TUI (`inspect --tui`). CLI also gains http(s):// URL support. The text report and TUI now display total row count and per-segment size, offset, and compression scheme, resolving every segment index against the footer's segment table. Co-Authored-By: Claude Opus 4.7 --- bom/pom.xml | 5 + cli/pom.xml | 4 + .../dfa1/vortex/cli/InspectCommand.java | 59 +++- .../io/github/dfa1/vortex/cli/VortexCli.java | 2 +- inspector/pom.xml | 53 ++++ .../dfa1/vortex/inspect/InspectorTree.java | 142 +++++++++ .../dfa1/vortex/inspect/VortexInspector.java | 144 +++++++++ .../vortex/inspect/VortexInspectorTui.java | 285 ++++++++++++++++++ .../vortex/inspect/InspectorTreeTest.java | 165 ++++++++++ .../vortex/inspect/VortexInspectorTest.java | 160 ++++++++++ integration/pom.xml | 5 + .../vortex/integration/InspectForTest.java | 2 +- ...OhlcEncodingInspectionIntegrationTest.java | 2 +- ...stJavaReaderComparisonIntegrationTest.java | 2 +- .../VortexInspectorIntegrationTest.java | 2 +- performance/pom.xml | 5 + .../performance/TaxiLayoutInspector.java | 2 +- pom.xml | 12 + .../github/dfa1/vortex/io/VortexHandle.java | 4 +- .../dfa1/vortex/io/VortexInspector.java | 175 ----------- 20 files changed, 1037 insertions(+), 193 deletions(-) create mode 100644 inspector/pom.xml create mode 100644 inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java create mode 100644 inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspector.java create mode 100644 inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java create mode 100644 inspector/src/test/java/io/github/dfa1/vortex/inspect/InspectorTreeTest.java create mode 100644 inspector/src/test/java/io/github/dfa1/vortex/inspect/VortexInspectorTest.java delete mode 100644 reader/src/main/java/io/github/dfa1/vortex/io/VortexInspector.java diff --git a/bom/pom.xml b/bom/pom.xml index 1bf420e3..071e4904 100644 --- a/bom/pom.xml +++ b/bom/pom.xml @@ -47,6 +47,11 @@ vortex-jdbc ${project.version} + + io.github.dfa1.vortex + vortex-inspector + ${project.version} + diff --git a/cli/pom.xml b/cli/pom.xml index c6f82cd3..d5d4eccd 100644 --- a/cli/pom.xml +++ b/cli/pom.xml @@ -30,6 +30,10 @@ io.github.dfa1.vortex vortex-reader + + io.github.dfa1.vortex + vortex-inspector + org.junit.jupiter diff --git a/cli/src/main/java/io/github/dfa1/vortex/cli/InspectCommand.java b/cli/src/main/java/io/github/dfa1/vortex/cli/InspectCommand.java index c79d130e..b72775c3 100644 --- a/cli/src/main/java/io/github/dfa1/vortex/cli/InspectCommand.java +++ b/cli/src/main/java/io/github/dfa1/vortex/cli/InspectCommand.java @@ -1,9 +1,14 @@ package io.github.dfa1.vortex.cli; -import io.github.dfa1.vortex.io.VortexInspector; +import io.github.dfa1.vortex.inspect.VortexInspector; +import io.github.dfa1.vortex.inspect.VortexInspectorTui; +import io.github.dfa1.vortex.io.VortexHandle; +import io.github.dfa1.vortex.io.VortexHttpReader; import io.github.dfa1.vortex.io.VortexReader; import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; import java.nio.file.Files; import java.nio.file.Path; @@ -13,21 +18,55 @@ private InspectCommand() { } static int run(String[] args) { - if (args.length != 2) { - System.err.println("usage: inspect "); - return ExitStatus.USAGE_ERROR; + boolean tui = false; + String target = null; + for (int i = 1; i < args.length; i++) { + if ("--tui".equals(args[i])) { + tui = true; + } else if (target == null) { + target = args[i]; + } else { + return usage(); + } } - Path path = Path.of(args[1]); - if (!Files.exists(path)) { - System.err.println("file not found: " + path); - return ExitStatus.FILE_NOT_FOUND; + if (target == null) { + return usage(); } - try (VortexReader reader = VortexReader.open(path)) { - System.out.print(VortexInspector.inspect(reader)); + try (VortexHandle handle = open(target)) { + if (handle == null) { + return ExitStatus.FILE_NOT_FOUND; + } + if (tui) { + VortexInspectorTui.show(handle); + } else { + System.out.print(VortexInspector.inspect(handle)); + } return ExitStatus.OK; } catch (IOException e) { System.err.println("error: " + e.getMessage()); return ExitStatus.ERROR; } } + + private static VortexHandle open(String target) throws IOException { + if (target.startsWith("http://") || target.startsWith("https://")) { + try { + return VortexHttpReader.open(new URI(target)); + } catch (URISyntaxException e) { + System.err.println("invalid URL: " + target); + return null; + } + } + Path path = Path.of(target); + if (!Files.exists(path)) { + System.err.println("file not found: " + path); + return null; + } + return VortexReader.open(path); + } + + private static int usage() { + System.err.println("usage: inspect [--tui] "); + return ExitStatus.USAGE_ERROR; + } } diff --git a/cli/src/main/java/io/github/dfa1/vortex/cli/VortexCli.java b/cli/src/main/java/io/github/dfa1/vortex/cli/VortexCli.java index 2279e2c9..58f307ab 100644 --- a/cli/src/main/java/io/github/dfa1/vortex/cli/VortexCli.java +++ b/cli/src/main/java/io/github/dfa1/vortex/cli/VortexCli.java @@ -35,7 +35,7 @@ public static void main(String[] args) { static void printUsage(PrintStream out) { out.println("Usage: java -jar vortex.jar [args]"); - out.println(" inspect print file structure"); + out.println(" inspect [--tui] print file structure (or open TUI); url is http(s)://"); out.println(" export write CSV to stdout"); out.println(" import [out.vortex] convert CSV or Parquet to Vortex"); out.println(" schema print dtype (machine-readable)"); diff --git a/inspector/pom.xml b/inspector/pom.xml new file mode 100644 index 00000000..9e898eb3 --- /dev/null +++ b/inspector/pom.xml @@ -0,0 +1,53 @@ + + + 4.0.0 + + io.github.dfa1.vortex + vortex-java + 0.5.0-SNAPSHOT + + + vortex-inspector + + vortex-inspector + Inspector for the Vortex columnar file format. Produces a structural tree model of a file's + schema, layout, and encodings, plus text and Lanterna-based TUI renderers. + + + + + + io.github.dfa1.vortex + vortex-core + + + io.github.dfa1.vortex + vortex-reader + + + com.google.flatbuffers + flatbuffers-java + + + com.googlecode.lanterna + lanterna + + + + + org.junit.jupiter + junit-jupiter + test + + + org.assertj + assertj-core + test + + + org.mockito + mockito-junit-jupiter + test + + + diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java new file mode 100644 index 00000000..7ffbdd03 --- /dev/null +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java @@ -0,0 +1,142 @@ +package io.github.dfa1.vortex.inspect; + +import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.Footer; +import io.github.dfa1.vortex.core.Layout; +import io.github.dfa1.vortex.core.SegmentSpec; +import io.github.dfa1.vortex.fbs.Array; +import io.github.dfa1.vortex.io.VortexHandle; + +import java.lang.foreign.MemorySegment; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.ArrayList; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Optional; +import java.util.Set; + +/// Structured snapshot of a Vortex file's schema, layout, and encoding usage. +/// +/// Built once from a [VortexHandle] via [#build(VortexHandle)] and then consumed by renderers +/// (text or TUI). Immutable — does not retain the handle. +/// +/// @param version Vortex file format version stored in the trailer +/// @param fileSize total file length in bytes +/// @param dtype top-level data type (typically [DType.Struct]) +/// @param registeredEncodings encoding IDs declared in the file footer +/// @param usedEncodings encoding IDs actually referenced by Flat layout segments +/// @param segmentSpecs all on-disk segments referenced by the footer, in index order +/// @param totalRowCount total logical rows in the file (root layout's row count) +/// @param root root layout node +public record InspectorTree( + int version, + long fileSize, + DType dtype, + List registeredEncodings, + Set usedEncodings, + List segmentSpecs, + long totalRowCount, + Node root) { + + /// Number of on-disk segments referenced by the footer. + /// + /// @return segment count + public int segmentCount() { + return segmentSpecs.size(); + } + + /// Sum of segment lengths in bytes. + /// + /// @return total segment bytes + public long totalSegmentBytes() { + long total = 0; + for (SegmentSpec spec : segmentSpecs) { + total += spec.length(); + } + return total; + } + + /// One layout node in the inspector tree. + /// + /// @param layout underlying [Layout] from the file footer + /// @param fieldName column name when this node is a direct child of a top-level struct + /// @param usedEncodings encoding IDs referenced by this subtree + /// @param children child nodes + public record Node( + Layout layout, + Optional fieldName, + Set usedEncodings, + List children) { + } + + /// Builds an inspector tree from an open Vortex file handle. + /// + /// @param handle open file handle + /// @return immutable inspector tree + public static InspectorTree build(VortexHandle handle) { + Footer footer = handle.footer(); + Layout layout = handle.layout(); + DType dtype = handle.dtype(); + + List colNames = (dtype instanceof DType.Struct s) ? s.fieldNames() : List.of(); + Set overallUsed = new LinkedHashSet<>(); + Node root = buildNode(layout, Optional.empty(), handle, footer.arraySpecs(), overallUsed); + if (layout.isStruct()) { + List namedChildren = new ArrayList<>(root.children().size()); + for (int i = 0; i < root.children().size(); i++) { + Node child = root.children().get(i); + String name = i < colNames.size() ? colNames.get(i) : "col" + i; + namedChildren.add(new Node(child.layout(), Optional.of(name), child.usedEncodings(), child.children())); + } + root = new Node(root.layout(), Optional.empty(), root.usedEncodings(), List.copyOf(namedChildren)); + } + + return new InspectorTree( + handle.version(), + handle.fileSize(), + dtype, + footer.arraySpecs(), + Set.copyOf(overallUsed), + footer.segmentSpecs(), + layout.rowCount(), + root); + } + + private static Node buildNode(Layout layout, Optional fieldName, VortexHandle handle, + List arraySpecs, Set overallUsed) { + Set localUsed = new LinkedHashSet<>(); + if (layout.isFlat() && !layout.segments().isEmpty()) { + int segIdx = layout.segments().getFirst(); + SegmentSpec spec = handle.footer().segmentSpecs().get(segIdx); + if (spec.compression().code == 0) { + MemorySegment seg = handle.slice(spec.offset(), spec.length()); + String enc = peekRootEncoding(seg, arraySpecs); + if (enc != null) { + localUsed.add(enc); + overallUsed.add(enc); + } + } + } + List children = new ArrayList<>(layout.children().size()); + for (Layout child : layout.children()) { + Node n = buildNode(child, Optional.empty(), handle, arraySpecs, overallUsed); + localUsed.addAll(n.usedEncodings()); + children.add(n); + } + return new Node(layout, fieldName, Set.copyOf(localUsed), List.copyOf(children)); + } + + private static String peekRootEncoding(MemorySegment seg, List arraySpecs) { + int segLen = (int) seg.byteSize(); + ByteBuffer bb = seg.asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); + int fbLen = bb.getInt(segLen - 4); + int fbStart = segLen - 4 - fbLen; + ByteBuffer fbBuf = bb.slice(fbStart, fbLen).order(ByteOrder.LITTLE_ENDIAN); + Array fbArray = Array.getRootAsArray(fbBuf); + if (fbArray.root() == null) { + return null; + } + return arraySpecs.get(fbArray.root().encoding()); + } +} diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspector.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspector.java new file mode 100644 index 00000000..dad55b75 --- /dev/null +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspector.java @@ -0,0 +1,144 @@ +package io.github.dfa1.vortex.inspect; + +import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.Layout; +import io.github.dfa1.vortex.core.SegmentSpec; +import io.github.dfa1.vortex.io.VortexHandle; + +import java.util.List; + +/// Produces a human-readable summary of a Vortex file's structure and encodings. +public final class VortexInspector { + + private VortexInspector() { + } + + /// Builds a multi-line text report for the given file handle. + /// + /// @param handle open file handle + /// @return formatted report + public static String inspect(VortexHandle handle) { + return render(InspectorTree.build(handle)); + } + + /// Builds a multi-line text report from a pre-built inspector tree. + /// + /// @param tree inspector tree + /// @return formatted report + public static String render(InspectorTree tree) { + var sb = new StringBuilder(); + + sb.append("Vortex v").append(tree.version()) + .append(" ").append(formatBytes(tree.fileSize())) + .append(" ").append(tree.totalRowCount()).append(" rows").append('\n'); + sb.append('\n'); + + sb.append("Schema:\n"); + appendSchema(sb, tree.dtype(), " "); + sb.append('\n'); + + sb.append("Registered encodings: ").append(String.join(", ", tree.registeredEncodings())).append('\n'); + sb.append('\n'); + + sb.append("Used encodings: ").append(String.join(", ", tree.usedEncodings())).append('\n'); + sb.append('\n'); + + sb.append("Segments: ").append(tree.segmentCount()) + .append(" total ").append(formatBytes(tree.totalSegmentBytes())).append('\n'); + appendSegmentTable(sb, tree.segmentSpecs(), " "); + sb.append('\n'); + + sb.append("Layout:\n"); + appendLayout(sb, tree.root(), " "); + + return sb.toString(); + } + + private static void appendSegmentTable(StringBuilder sb, List specs, String indent) { + for (int i = 0; i < specs.size(); i++) { + SegmentSpec spec = specs.get(i); + sb.append(indent).append('[').append(i).append("] ") + .append("off=").append(spec.offset()) + .append(" len=").append(formatBytes(spec.length())) + .append(" comp=").append(spec.compression().name()) + .append('\n'); + } + } + + private static void appendLayout(StringBuilder sb, InspectorTree.Node node, String indent) { + Layout layout = node.layout(); + if (layout.isStruct()) { + sb.append(indent).append("struct (").append(layout.rowCount()).append(" rows)\n"); + for (InspectorTree.Node child : node.children()) { + String name = child.fieldName().orElse("?"); + sb.append(indent).append(" ").append(name).append(": "); + appendLayoutInline(sb, child.layout()); + if (!child.usedEncodings().isEmpty()) { + sb.append(" [").append(String.join(", ", child.usedEncodings())).append("]"); + } + sb.append('\n'); + } + } else { + sb.append(indent); + appendLayoutInline(sb, layout); + sb.append('\n'); + } + } + + private static void appendLayoutInline(StringBuilder sb, Layout layout) { + sb.append(layout.encodingId()).append('(').append(layout.rowCount()).append(" rows)"); + if (layout.children().isEmpty()) { + return; + } + sb.append(" → "); + if (layout.children().size() == 1) { + appendLayoutInline(sb, layout.children().getFirst()); + } else { + sb.append(layout.children().size()).append("× ["); + appendLayoutInline(sb, layout.children().getFirst()); + sb.append("]"); + } + } + + private static void appendSchema(StringBuilder sb, DType dtype, String indent) { + if (dtype instanceof DType.Struct s) { + int maxLen = s.fieldNames().stream().mapToInt(String::length).max().orElse(0); + for (int i = 0; i < s.fieldNames().size(); i++) { + String name = s.fieldNames().get(i); + sb.append(indent).append(name) + .append(" ".repeat(maxLen - name.length() + 1)) + .append(formatDType(s.fieldTypes().get(i))).append('\n'); + } + } else { + sb.append(indent).append(formatDType(dtype)).append('\n'); + } + } + + private static String formatDType(DType dtype) { + return switch (dtype) { + case DType.Primitive(var pt, var nullable) -> pt.name() + (nullable ? "?" : ""); + case DType.Utf8(var nullable) -> "utf8" + (nullable ? "?" : ""); + case DType.Binary(var nullable) -> "binary" + (nullable ? "?" : ""); + case DType.Bool(var nullable) -> "bool" + (nullable ? "?" : ""); + case DType.Null ignored -> "null"; + case DType.Decimal(var p, var s, var nullable) -> "decimal(" + p + "," + s + ")" + (nullable ? "?" : ""); + case DType.Struct ignored -> "struct"; + case DType.List(var elem, var nullable) -> "list<" + formatDType(elem) + ">" + (nullable ? "?" : ""); + case DType.FixedSizeList(var elem, var size, var nullable) -> + "list<" + formatDType(elem) + ">[" + size + "]" + (nullable ? "?" : ""); + case DType.Extension(var id, var storage, var meta, var nullable) -> + "ext<" + id + ">" + (nullable ? "?" : ""); + case DType.Variant(var nullable) -> "variant" + (nullable ? "?" : ""); + }; + } + + private static String formatBytes(long bytes) { + if (bytes < 1024) { + return bytes + " B"; + } + if (bytes < 1024 * 1024) { + return String.format("%.1f KB", bytes / 1024.0); + } + return String.format("%.1f MB", bytes / (1024.0 * 1024.0)); + } +} diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java new file mode 100644 index 00000000..9464e876 --- /dev/null +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java @@ -0,0 +1,285 @@ +package io.github.dfa1.vortex.inspect; + +import com.googlecode.lanterna.TerminalSize; +import com.googlecode.lanterna.TextCharacter; +import com.googlecode.lanterna.TextColor; +import com.googlecode.lanterna.graphics.TextGraphics; +import com.googlecode.lanterna.input.KeyStroke; +import com.googlecode.lanterna.input.KeyType; +import com.googlecode.lanterna.screen.Screen; +import com.googlecode.lanterna.screen.TerminalScreen; +import com.googlecode.lanterna.terminal.DefaultTerminalFactory; +import com.googlecode.lanterna.terminal.Terminal; +import io.github.dfa1.vortex.core.Layout; +import io.github.dfa1.vortex.core.SegmentSpec; +import io.github.dfa1.vortex.io.VortexHandle; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/// Lanterna-based interactive viewer for a Vortex file's inspector tree. +/// +/// Renders a two-pane terminal UI: layout tree on the left, node details on the right. +/// Quit with `q` or `Esc`. +public final class VortexInspectorTui { + + private VortexInspectorTui() { + } + + /// Opens a Lanterna terminal, builds an inspector tree, and runs the interactive viewer until quit. + /// + /// @param handle open Vortex file handle + /// @throws IOException if the terminal cannot be initialized + public static void show(VortexHandle handle) throws IOException { + InspectorTree tree = InspectorTree.build(handle); + Terminal terminal = new DefaultTerminalFactory().createTerminal(); + Screen screen = new TerminalScreen(terminal); + screen.startScreen(); + try { + new Loop(screen, tree).run(); + } finally { + screen.stopScreen(); + } + } + + private static final class Loop { + private final Screen screen; + private final InspectorTree tree; + private final Set expanded = new HashSet<>(); + private int selected; + private int scrollOffset; + + Loop(Screen screen, InspectorTree tree) { + this.screen = screen; + this.tree = tree; + this.expanded.add(tree.root()); + } + + void run() throws IOException { + while (true) { + List items = flatten(); + if (selected >= items.size()) { + selected = items.size() - 1; + } + if (selected < 0) { + selected = 0; + } + render(items); + KeyStroke key = screen.readInput(); + if (key == null) { + continue; + } + if (isQuit(key)) { + return; + } + handleKey(key, items); + } + } + + private void handleKey(KeyStroke key, List items) { + switch (key.getKeyType()) { + case ArrowDown -> selected = Math.min(selected + 1, items.size() - 1); + case ArrowUp -> selected = Math.max(selected - 1, 0); + case ArrowRight, Enter -> { + if (selected < items.size()) { + InspectorTree.Node n = items.get(selected).node(); + if (!n.children().isEmpty()) { + expanded.add(n); + } + } + } + case ArrowLeft -> { + if (selected < items.size()) { + expanded.remove(items.get(selected).node()); + } + } + case PageDown -> selected = Math.min(selected + 10, items.size() - 1); + case PageUp -> selected = Math.max(selected - 10, 0); + case Home -> selected = 0; + case End -> selected = items.size() - 1; + default -> { + } + } + } + + private static boolean isQuit(KeyStroke key) { + if (key.getKeyType() == KeyType.Escape || key.getKeyType() == KeyType.EOF) { + return true; + } + return key.getKeyType() == KeyType.Character + && key.getCharacter() != null + && (key.getCharacter() == 'q' || key.getCharacter() == 'Q'); + } + + private List flatten() { + List out = new ArrayList<>(); + walk(tree.root(), 0, out); + return out; + } + + private void walk(InspectorTree.Node node, int depth, List out) { + out.add(new Item(node, depth)); + if (expanded.contains(node)) { + for (InspectorTree.Node child : node.children()) { + walk(child, depth + 1, out); + } + } + } + + private void render(List items) throws IOException { + screen.doResizeIfNecessary(); + TerminalSize size = screen.getTerminalSize(); + screen.clear(); + TextGraphics tg = screen.newTextGraphics(); + int width = size.getColumns(); + int height = size.getRows(); + int leftWidth = Math.max(20, width / 2); + + drawHeader(tg, width); + drawFooter(tg, width, height); + + int bodyTop = 2; + int bodyBottom = height - 2; + int bodyHeight = bodyBottom - bodyTop; + + if (selected < scrollOffset) { + scrollOffset = selected; + } else if (selected >= scrollOffset + bodyHeight) { + scrollOffset = selected - bodyHeight + 1; + } + + drawTree(tg, items, bodyTop, bodyHeight, leftWidth); + drawDivider(tg, leftWidth, bodyTop, bodyBottom); + if (!items.isEmpty()) { + drawDetails(tg, items.get(selected).node(), leftWidth + 2, bodyTop, width - leftWidth - 2, bodyHeight); + } + + screen.refresh(); + } + + private void drawHeader(TextGraphics tg, int width) { + tg.setForegroundColor(TextColor.ANSI.BLACK); + tg.setBackgroundColor(TextColor.ANSI.CYAN); + String header = " vortex-inspect — v" + tree.version() + + " " + formatBytes(tree.fileSize()) + + " rows=" + tree.totalRowCount() + + " segs=" + tree.segmentCount() + + " (" + formatBytes(tree.totalSegmentBytes()) + ")"; + String padded = pad(header, width); + tg.putString(0, 0, padded); + tg.setBackgroundColor(TextColor.ANSI.DEFAULT); + tg.setForegroundColor(TextColor.ANSI.DEFAULT); + } + + private void drawFooter(TextGraphics tg, int width, int height) { + tg.setForegroundColor(TextColor.ANSI.BLACK); + tg.setBackgroundColor(TextColor.ANSI.WHITE); + String hint = " ↑↓ nav →/Enter expand ← collapse q quit "; + tg.putString(0, height - 1, pad(hint, width)); + tg.setBackgroundColor(TextColor.ANSI.DEFAULT); + tg.setForegroundColor(TextColor.ANSI.DEFAULT); + } + + private void drawTree(TextGraphics tg, List items, int top, int rows, int leftWidth) { + for (int row = 0; row < rows; row++) { + int idx = scrollOffset + row; + if (idx >= items.size()) { + break; + } + Item item = items.get(idx); + boolean isSelected = idx == selected; + if (isSelected) { + tg.setForegroundColor(TextColor.ANSI.BLACK); + tg.setBackgroundColor(TextColor.ANSI.YELLOW); + } else { + tg.setForegroundColor(TextColor.ANSI.DEFAULT); + tg.setBackgroundColor(TextColor.ANSI.DEFAULT); + } + tg.putString(0, top + row, pad(renderItem(item), leftWidth - 1)); + } + tg.setBackgroundColor(TextColor.ANSI.DEFAULT); + tg.setForegroundColor(TextColor.ANSI.DEFAULT); + } + + private String renderItem(Item item) { + InspectorTree.Node node = item.node(); + String marker = node.children().isEmpty() + ? " " + : (expanded.contains(node) ? "▼ " : "▶ "); + String label = item.depth() == 0 && node.layout().isStruct() + ? "struct" + : node.fieldName().map(n -> n + ": ").orElse("") + node.layout().encodingId(); + return " ".repeat(item.depth() * 2) + marker + label + + " (" + node.layout().rowCount() + " rows)"; + } + + private void drawDivider(TextGraphics tg, int col, int top, int bottom) { + for (int y = top; y < bottom; y++) { + tg.setCharacter(col, y, new TextCharacter('│')); + } + } + + private void drawDetails(TextGraphics tg, InspectorTree.Node node, int col, int top, int width, int rows) { + List lines = new ArrayList<>(); + Layout layout = node.layout(); + lines.add("Encoding: " + layout.encodingId()); + node.fieldName().ifPresent(name -> lines.add("Field: " + name)); + lines.add("Rows: " + layout.rowCount()); + lines.add("Children: " + layout.children().size()); + if (!layout.segments().isEmpty()) { + long subtotal = 0; + for (int idx : layout.segments()) { + SegmentSpec spec = tree.segmentSpecs().get(idx); + subtotal += spec.length(); + } + lines.add("Segments: " + layout.segments().size() + + " (" + formatBytes(subtotal) + ")"); + for (int idx : layout.segments()) { + SegmentSpec spec = tree.segmentSpecs().get(idx); + lines.add(" [" + idx + "] off=" + spec.offset() + + " len=" + formatBytes(spec.length()) + + " comp=" + spec.compression().name()); + } + } else { + lines.add("Segments: 0"); + } + if (!node.usedEncodings().isEmpty()) { + lines.add(""); + lines.add("Used encodings:"); + for (String enc : node.usedEncodings()) { + lines.add(" • " + enc); + } + } + for (int i = 0; i < lines.size() && i < rows; i++) { + tg.putString(col, top + i, truncate(lines.get(i), width)); + } + } + + private record Item(InspectorTree.Node node, int depth) { + } + + private static String pad(String s, int width) { + if (s.length() >= width) { + return s.substring(0, width); + } + return s + " ".repeat(width - s.length()); + } + + private static String truncate(String s, int width) { + return s.length() > width ? s.substring(0, width) : s; + } + + private static String formatBytes(long bytes) { + if (bytes < 1024) { + return bytes + " B"; + } + if (bytes < 1024 * 1024) { + return String.format("%.1f KB", bytes / 1024.0); + } + return String.format("%.1f MB", bytes / (1024.0 * 1024.0)); + } + } +} diff --git a/inspector/src/test/java/io/github/dfa1/vortex/inspect/InspectorTreeTest.java b/inspector/src/test/java/io/github/dfa1/vortex/inspect/InspectorTreeTest.java new file mode 100644 index 00000000..acf32453 --- /dev/null +++ b/inspector/src/test/java/io/github/dfa1/vortex/inspect/InspectorTreeTest.java @@ -0,0 +1,165 @@ +package io.github.dfa1.vortex.inspect; + +import io.github.dfa1.vortex.core.CompressionScheme; +import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.Footer; +import io.github.dfa1.vortex.core.Layout; +import io.github.dfa1.vortex.core.PType; +import io.github.dfa1.vortex.core.SegmentSpec; +import io.github.dfa1.vortex.io.VortexHandle; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.BDDMockito.given; + +@ExtendWith(MockitoExtension.class) +class InspectorTreeTest { + + @Mock + VortexHandle handle; + + @Test + void build_withStructDType_assignsFieldNamesToColumns() { + // Given + Layout idLeaf = leaf("vortex.constant", 10); + Layout valLeaf = leaf("vortex.constant", 10); + Layout root = struct(10, List.of(idLeaf, valLeaf)); + DType dtype = new DType.Struct( + List.of("id", "value"), + List.of(new DType.Primitive(PType.I64, false), new DType.Primitive(PType.F64, false)), + false); + givenHandle(dtype, root, List.of("vortex.constant"), List.of()); + + // When + InspectorTree sut = InspectorTree.build(handle); + + // Then + assertThat(sut.root().fieldName()).isEmpty(); + assertThat(sut.root().children()).hasSize(2); + assertThat(sut.root().children().get(0).fieldName()).contains("id"); + assertThat(sut.root().children().get(1).fieldName()).contains("value"); + } + + @Test + void build_withFewerColNamesThanChildren_fillsWithSyntheticNames() { + // Given — defensive path: malformed footer with a struct layout whose child count + // exceeds the dtype's named fields. Should not throw; should fall back to col0/col1... + Layout root = struct(0, List.of(leaf("vortex.constant", 0), leaf("vortex.constant", 0))); + DType dtype = new DType.Struct(List.of("only"), + List.of(new DType.Primitive(PType.I32, false)), false); + givenHandle(dtype, root, List.of("vortex.constant"), List.of()); + + // When + InspectorTree sut = InspectorTree.build(handle); + + // Then + assertThat(sut.root().children().get(0).fieldName()).contains("only"); + assertThat(sut.root().children().get(1).fieldName()).contains("col1"); + } + + @Test + void build_withNonStructRoot_leavesFieldNameEmpty() { + // Given + Layout root = leaf("vortex.flat", 100); + DType dtype = new DType.Primitive(PType.I64, false); + givenHandle(dtype, root, List.of("vortex.flat"), List.of()); + + // When + InspectorTree sut = InspectorTree.build(handle); + + // Then + assertThat(sut.root().fieldName()).isEmpty(); + assertThat(sut.root().children()).isEmpty(); + } + + @Test + void build_sumsSegmentBytesAndCountsSegments() { + // Given + Layout root = leaf("vortex.flat", 0); + DType dtype = new DType.Primitive(PType.I32, false); + List segs = List.of( + new SegmentSpec(0, 128, (byte) 0, CompressionScheme.LZ4), + new SegmentSpec(128, 256, (byte) 0, CompressionScheme.LZ4), + new SegmentSpec(384, 64, (byte) 0, CompressionScheme.LZ4)); + givenHandle(dtype, root, List.of("vortex.flat"), segs); + + // When + InspectorTree sut = InspectorTree.build(handle); + + // Then + assertThat(sut.segmentCount()).isEqualTo(3); + assertThat(sut.totalSegmentBytes()).isEqualTo(128L + 256L + 64L); + assertThat(sut.segmentSpecs()).containsExactlyElementsOf(segs); + } + + @Test + void build_setsTotalRowCountFromRootLayout() { + // Given — total rows is the root layout's row count, regardless of struct/non-struct + Layout root = struct(12_345L, List.of(leaf("vortex.constant", 12_345L))); + DType dtype = new DType.Struct(List.of("c"), + List.of(new DType.Primitive(PType.I32, false)), false); + givenHandle(dtype, root, List.of("vortex.constant"), List.of()); + + // When + InspectorTree sut = InspectorTree.build(handle); + + // Then + assertThat(sut.totalRowCount()).isEqualTo(12_345L); + } + + @Test + void build_carriesVersionAndFileSize() { + // Given + Layout root = leaf("vortex.flat", 0); + DType dtype = new DType.Primitive(PType.I32, false); + given(handle.version()).willReturn(7); + given(handle.fileSize()).willReturn(123_456L); + given(handle.dtype()).willReturn(dtype); + given(handle.layout()).willReturn(root); + given(handle.footer()).willReturn(new Footer(List.of("vortex.flat"), List.of(), List.of(), List.of())); + + // When + InspectorTree sut = InspectorTree.build(handle); + + // Then + assertThat(sut.version()).isEqualTo(7); + assertThat(sut.fileSize()).isEqualTo(123_456L); + } + + @Test + void build_flatChildWithCompressedSegment_skipsRootEncodingPeek() { + // Given — peekRootEncoding() reads the segment as a FlatBuffer; compressed segments + // are intentionally skipped so a malformed or compressed payload can't crash the + // inspector. With code != NONE we should still build a tree, with no encodings used. + Layout root = new Layout("vortex.flat", 0, null, List.of(), List.of(0)); + DType dtype = new DType.Primitive(PType.I32, false); + SegmentSpec compressed = new SegmentSpec(0, 1024, (byte) 0, CompressionScheme.ZSTD); + givenHandle(dtype, root, List.of("vortex.flat"), List.of(compressed)); + + // When + InspectorTree sut = InspectorTree.build(handle); + + // Then + assertThat(sut.usedEncodings()).isEmpty(); + assertThat(sut.root().usedEncodings()).isEmpty(); + } + + private void givenHandle(DType dtype, Layout layout, List arraySpecs, List segs) { + given(handle.dtype()).willReturn(dtype); + given(handle.layout()).willReturn(layout); + given(handle.footer()).willReturn(new Footer(arraySpecs, List.of(), segs, List.of())); + } + + private static Layout struct(long rows, List children) { + return new Layout("vortex.struct", rows, null, children, List.of()); + } + + private static Layout leaf(String encodingId, long rows) { + return new Layout(encodingId, rows, null, List.of(), List.of()); + } +} diff --git a/inspector/src/test/java/io/github/dfa1/vortex/inspect/VortexInspectorTest.java b/inspector/src/test/java/io/github/dfa1/vortex/inspect/VortexInspectorTest.java new file mode 100644 index 00000000..03ecea44 --- /dev/null +++ b/inspector/src/test/java/io/github/dfa1/vortex/inspect/VortexInspectorTest.java @@ -0,0 +1,160 @@ +package io.github.dfa1.vortex.inspect; + +import io.github.dfa1.vortex.core.CompressionScheme; +import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.Layout; +import io.github.dfa1.vortex.core.PType; +import io.github.dfa1.vortex.core.SegmentSpec; +import org.junit.jupiter.api.Test; + +import java.util.List; +import java.util.Optional; +import java.util.Set; + +import static org.assertj.core.api.Assertions.assertThat; + +class VortexInspectorTest { + + @Test + void render_struct_listsHeaderColumnsAndUsedEncodings() { + // Given + InspectorTree sut = struct2col(2, 4096L, + List.of( + new SegmentSpec(0, 512, (byte) 0, CompressionScheme.NONE), + new SegmentSpec(512, 512, (byte) 0, CompressionScheme.LZ4)), + Set.of("fastlanes.bitpacked", "vortex.constant")); + + // When + String out = VortexInspector.render(sut); + + // Then + assertThat(out) + .contains("Vortex v2") + .contains("4.0 KB") + .contains("1000 rows") + .contains("Schema:") + .contains("id") + .contains("value") + .contains("Registered encodings: vortex.flat, fastlanes.bitpacked, vortex.constant") + .contains("Used encodings: ") + .contains("Segments: 2") + .contains("[0] off=0 len=512 B comp=NONE") + .contains("[1] off=512 len=512 B comp=LZ4") + .contains("Layout:") + .contains("struct (1000 rows)") + .contains("[fastlanes.bitpacked]"); + } + + @Test + void render_segmentTable_listsEverySegment() { + // Given — verify table prints one line per segment with offset + size + compression + List specs = List.of( + new SegmentSpec(0, 1024, (byte) 0, CompressionScheme.ZSTD), + new SegmentSpec(1024, 2048, (byte) 0, CompressionScheme.NONE), + new SegmentSpec(3072, 4096, (byte) 0, CompressionScheme.LZ4)); + InspectorTree sut = struct2col(1, 8192L, specs, Set.of()); + + // When + String out = VortexInspector.render(sut); + + // Then + assertThat(out) + .contains("[0] off=0 len=1.0 KB comp=ZSTD") + .contains("[1] off=1024 len=2.0 KB comp=NONE") + .contains("[2] off=3072 len=4.0 KB comp=LZ4"); + } + + @Test + void render_nonStruct_inlinesSingleColumnLayout() { + // Given + Layout leaf = new Layout("vortex.flat", 100, null, List.of(), List.of()); + InspectorTree.Node root = new InspectorTree.Node(leaf, Optional.empty(), Set.of(), List.of()); + InspectorTree sut = new InspectorTree( + 1, 256L, + new DType.Primitive(PType.I32, false), + List.of("vortex.flat"), Set.of(), + List.of(new SegmentSpec(0, 256, (byte) 0, CompressionScheme.NONE)), + 100L, root); + + // When + String out = VortexInspector.render(sut); + + // Then + assertThat(out).contains("vortex.flat(100 rows)"); + assertThat(out).doesNotContain("struct ("); + } + + @Test + void render_formatsBytesAcrossUnits() { + // Given — bytes / KB / MB boundaries + List oneSeg = List.of(new SegmentSpec(0, 1, (byte) 0, CompressionScheme.NONE)); + InspectorTree small = struct2col(1, 512L, oneSeg, Set.of()); + InspectorTree medium = struct2col(1, 2048L, oneSeg, Set.of()); + InspectorTree large = struct2col(1, 5L * 1024 * 1024, oneSeg, Set.of()); + + // When / Then + assertThat(VortexInspector.render(small)).contains("512 B"); + assertThat(VortexInspector.render(medium)).contains("2.0 KB"); + assertThat(VortexInspector.render(large)).contains("5.0 MB"); + } + + @Test + void render_chainsChildrenWithArrow() { + // Given — nested zoned → chunked → flat chain + Layout flat = new Layout("vortex.flat", 1000, null, List.of(), List.of()); + Layout chunked = new Layout("vortex.chunked", 1000, null, List.of(flat), List.of()); + Layout zoned = new Layout("vortex.stats", 1000, null, List.of(chunked), List.of()); + Layout structLayout = new Layout("vortex.struct", 1000, null, List.of(zoned), List.of()); + + InspectorTree.Node flatN = new InspectorTree.Node(flat, Optional.empty(), Set.of(), List.of()); + InspectorTree.Node chunkedN = new InspectorTree.Node(chunked, Optional.empty(), Set.of(), List.of(flatN)); + InspectorTree.Node zonedN = new InspectorTree.Node(zoned, Optional.of("v"), Set.of(), List.of(chunkedN)); + InspectorTree.Node rootN = new InspectorTree.Node(structLayout, Optional.empty(), Set.of(), List.of(zonedN)); + + InspectorTree sut = new InspectorTree( + 1, 1024L, + new DType.Struct(List.of("v"), List.of(new DType.Primitive(PType.I32, false)), false), + List.of("vortex.flat"), Set.of(), + List.of(), 1000L, rootN); + + // When + String out = VortexInspector.render(sut); + + // Then + assertThat(out).contains("vortex.stats(1000 rows) → vortex.chunked(1000 rows) → vortex.flat(1000 rows)"); + } + + @Test + void render_emptyUsedEncodings_omitsBracketSuffix() { + // Given — column with no resolved encodings should not emit " []" noise + InspectorTree sut = struct2col(1, 100L, List.of(), Set.of()); + + // When + String out = VortexInspector.render(sut); + + // Then + assertThat(out).doesNotContain(" []"); + } + + private static InspectorTree struct2col(int version, long fileSize, List specs, Set usedById) { + Layout idLeaf = new Layout("fastlanes.bitpacked", 1000, null, List.of(), List.of()); + Layout valLeaf = new Layout("vortex.constant", 1000, null, List.of(), List.of()); + Layout root = new Layout("vortex.struct", 1000, null, List.of(idLeaf, valLeaf), List.of()); + + InspectorTree.Node idNode = new InspectorTree.Node(idLeaf, + Optional.of("id"), Set.of("fastlanes.bitpacked"), List.of()); + InspectorTree.Node valNode = new InspectorTree.Node(valLeaf, + Optional.of("value"), Set.of("vortex.constant"), List.of()); + InspectorTree.Node rootNode = new InspectorTree.Node(root, + Optional.empty(), Set.of("fastlanes.bitpacked", "vortex.constant"), List.of(idNode, valNode)); + + DType dtype = new DType.Struct( + List.of("id", "value"), + List.of(new DType.Primitive(PType.I64, false), new DType.Primitive(PType.F64, false)), + false); + + return new InspectorTree(version, fileSize, dtype, + List.of("vortex.flat", "fastlanes.bitpacked", "vortex.constant"), + usedById, specs, 1000L, rootNode); + } +} diff --git a/integration/pom.xml b/integration/pom.xml index c4e2f978..2c33101b 100644 --- a/integration/pom.xml +++ b/integration/pom.xml @@ -38,6 +38,11 @@ vortex-reader test + + io.github.dfa1.vortex + vortex-inspector + test + dev.vortex vortex-jni diff --git a/integration/src/test/java/io/github/dfa1/vortex/integration/InspectForTest.java b/integration/src/test/java/io/github/dfa1/vortex/integration/InspectForTest.java index 7f749562..885cb767 100644 --- a/integration/src/test/java/io/github/dfa1/vortex/integration/InspectForTest.java +++ b/integration/src/test/java/io/github/dfa1/vortex/integration/InspectForTest.java @@ -2,7 +2,7 @@ import io.github.dfa1.vortex.encoding.EncodingRegistry; -import io.github.dfa1.vortex.io.VortexInspector; +import io.github.dfa1.vortex.inspect.VortexInspector; import io.github.dfa1.vortex.io.VortexReader; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; diff --git a/integration/src/test/java/io/github/dfa1/vortex/integration/OhlcEncodingInspectionIntegrationTest.java b/integration/src/test/java/io/github/dfa1/vortex/integration/OhlcEncodingInspectionIntegrationTest.java index b98a2153..8ead05dc 100644 --- a/integration/src/test/java/io/github/dfa1/vortex/integration/OhlcEncodingInspectionIntegrationTest.java +++ b/integration/src/test/java/io/github/dfa1/vortex/integration/OhlcEncodingInspectionIntegrationTest.java @@ -5,7 +5,7 @@ import dev.vortex.arrow.ArrowAllocation; import dev.vortex.jni.NativeLoader; import io.github.dfa1.vortex.encoding.EncodingRegistry; -import io.github.dfa1.vortex.io.VortexInspector; +import io.github.dfa1.vortex.inspect.VortexInspector; import io.github.dfa1.vortex.io.VortexReader; import org.apache.arrow.c.ArrowArray; import org.apache.arrow.c.ArrowSchema; diff --git a/integration/src/test/java/io/github/dfa1/vortex/integration/RustJavaReaderComparisonIntegrationTest.java b/integration/src/test/java/io/github/dfa1/vortex/integration/RustJavaReaderComparisonIntegrationTest.java index ee9f4839..30c8844f 100644 --- a/integration/src/test/java/io/github/dfa1/vortex/integration/RustJavaReaderComparisonIntegrationTest.java +++ b/integration/src/test/java/io/github/dfa1/vortex/integration/RustJavaReaderComparisonIntegrationTest.java @@ -19,7 +19,7 @@ import io.github.dfa1.vortex.core.array.ShortArray; import io.github.dfa1.vortex.core.array.VarBinArray; import io.github.dfa1.vortex.encoding.EncodingRegistry; -import io.github.dfa1.vortex.io.VortexInspector; +import io.github.dfa1.vortex.inspect.VortexInspector; import io.github.dfa1.vortex.io.VortexReader; import io.github.dfa1.vortex.scan.Chunk; import org.apache.arrow.memory.BufferAllocator; diff --git a/integration/src/test/java/io/github/dfa1/vortex/integration/VortexInspectorIntegrationTest.java b/integration/src/test/java/io/github/dfa1/vortex/integration/VortexInspectorIntegrationTest.java index f3d7ed76..160a9599 100644 --- a/integration/src/test/java/io/github/dfa1/vortex/integration/VortexInspectorIntegrationTest.java +++ b/integration/src/test/java/io/github/dfa1/vortex/integration/VortexInspectorIntegrationTest.java @@ -5,7 +5,7 @@ import dev.vortex.arrow.ArrowAllocation; import dev.vortex.jni.NativeLoader; import io.github.dfa1.vortex.encoding.EncodingRegistry; -import io.github.dfa1.vortex.io.VortexInspector; +import io.github.dfa1.vortex.inspect.VortexInspector; import io.github.dfa1.vortex.io.VortexReader; import org.apache.arrow.c.ArrowArray; import org.apache.arrow.c.ArrowSchema; diff --git a/performance/pom.xml b/performance/pom.xml index 1794cf4b..6bac78f8 100644 --- a/performance/pom.xml +++ b/performance/pom.xml @@ -39,6 +39,11 @@ vortex-reader compile + + io.github.dfa1.vortex + vortex-inspector + compile + dev.vortex vortex-jni diff --git a/performance/src/main/java/io/github/dfa1/vortex/performance/TaxiLayoutInspector.java b/performance/src/main/java/io/github/dfa1/vortex/performance/TaxiLayoutInspector.java index a5b1a9de..77c8cd5d 100644 --- a/performance/src/main/java/io/github/dfa1/vortex/performance/TaxiLayoutInspector.java +++ b/performance/src/main/java/io/github/dfa1/vortex/performance/TaxiLayoutInspector.java @@ -9,7 +9,7 @@ import dev.vortex.arrow.ArrowAllocation; import dev.vortex.jni.NativeLoader; import io.github.dfa1.vortex.encoding.EncodingRegistry; -import io.github.dfa1.vortex.io.VortexInspector; +import io.github.dfa1.vortex.inspect.VortexInspector; import io.github.dfa1.vortex.io.VortexReader; import io.github.dfa1.vortex.parquet.ImportOptions; import io.github.dfa1.vortex.parquet.ParquetImporter; diff --git a/pom.xml b/pom.xml index 9326d963..a0588eb1 100644 --- a/pom.xml +++ b/pom.xml @@ -46,6 +46,7 @@ parquet bom cli + inspector integration performance @@ -61,6 +62,7 @@ 4.3.0 1.0.0.CR1 1.37 + 3.1.3 2.4.240 6.1.0 @@ -109,6 +111,16 @@ vortex-parquet ${project.version} + + io.github.dfa1.vortex + vortex-inspector + ${project.version} + + + com.googlecode.lanterna + lanterna + ${lanterna.version} + de.siegmar fastcsv diff --git a/reader/src/main/java/io/github/dfa1/vortex/io/VortexHandle.java b/reader/src/main/java/io/github/dfa1/vortex/io/VortexHandle.java index b48ffc59..b1391a77 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/io/VortexHandle.java +++ b/reader/src/main/java/io/github/dfa1/vortex/io/VortexHandle.java @@ -29,8 +29,8 @@ public interface VortexHandle extends Closeable { /// ///

Internal escape hatch. This method is on the public /// {@link VortexHandle} interface only because {@link io.github.dfa1.vortex.scan.ScanIterator} - /// and {@link VortexInspector} live in sibling packages and need cross-package access to the - /// raw backing segment. It is not part of the supported stability contract; signatures and + /// and the inspector module's {@code VortexInspector} live in sibling packages and need + /// cross-package access to the raw backing segment. It is not part of the supported stability contract; signatures and /// semantics may change without a deprecation cycle. Application code should rely on /// {@link #scan(ScanOptions)} and the typed array accessors instead. /// diff --git a/reader/src/main/java/io/github/dfa1/vortex/io/VortexInspector.java b/reader/src/main/java/io/github/dfa1/vortex/io/VortexInspector.java deleted file mode 100644 index 1c7eae20..00000000 --- a/reader/src/main/java/io/github/dfa1/vortex/io/VortexInspector.java +++ /dev/null @@ -1,175 +0,0 @@ -package io.github.dfa1.vortex.io; - -import io.github.dfa1.vortex.core.DType; -import io.github.dfa1.vortex.core.Footer; -import io.github.dfa1.vortex.core.Layout; -import io.github.dfa1.vortex.core.SegmentSpec; - -import java.lang.foreign.MemorySegment; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Set; - -/// Produces a human-readable summary of a Vortex file's structure and encodings. -public final class VortexInspector { - - private VortexInspector() { - } - - public static String inspect(VortexHandle reader) { - Footer footer = reader.footer(); - Layout layout = reader.layout(); - DType dtype = reader.dtype(); - - var sb = new StringBuilder(); - - sb.append("Vortex v").append(reader.version()) - .append(" ").append(formatBytes(reader.fileSize())).append('\n'); - sb.append('\n'); - - sb.append("Schema:\n"); - appendSchema(sb, dtype, " "); - sb.append('\n'); - - sb.append("Registered encodings: ").append(String.join(", ", footer.arraySpecs())).append('\n'); - sb.append('\n'); - - Set usedEncodings = collectUsedEncodings(reader); - sb.append("Used encodings: ").append(String.join(", ", usedEncodings)).append('\n'); - sb.append('\n'); - - int segCount = footer.segmentSpecs().size(); - long totalBytes = footer.segmentSpecs().stream().mapToLong(SegmentSpec::length).sum(); - sb.append("Segments: ").append(segCount) - .append(" total ").append(formatBytes(totalBytes)).append('\n'); - sb.append('\n'); - - sb.append("Layout:\n"); - List colNames = (dtype instanceof DType.Struct s) ? s.fieldNames() : List.of(); - appendLayout(sb, layout, colNames, reader, " "); - - return sb.toString(); - } - - // ── Used encodings ──────────────────────────────────────────────────────── - - private static Set collectUsedEncodings(VortexHandle reader) { - var used = new LinkedHashSet(); - collectLayoutEncodings(reader.layout(), reader, used); - return used; - } - - private static void collectLayoutEncodings(Layout layout, VortexHandle reader, Set used) { - if (layout.isFlat() && !layout.segments().isEmpty()) { - int segIdx = layout.segments().getFirst(); - SegmentSpec spec = reader.footer().segmentSpecs().get(segIdx); - if (spec.compression().code == 0) { - MemorySegment seg = reader.slice(spec.offset(), spec.length()); - peekRootEncoding(seg, reader.footer().arraySpecs(), used); - } - } - for (Layout child : layout.children()) { - collectLayoutEncodings(child, reader, used); - } - } - - /// Reads only the root ArrayNode encoding — ignores child/stats sub-nodes. - private static void peekRootEncoding(MemorySegment seg, List arraySpecs, Set used) { - int segLen = (int) seg.byteSize(); - ByteBuffer bb = seg.asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); - int fbLen = bb.getInt(segLen - 4); - int fbStart = segLen - 4 - fbLen; - ByteBuffer fbBuf = bb.slice(fbStart, fbLen).order(ByteOrder.LITTLE_ENDIAN); - var fbArray = io.github.dfa1.vortex.fbs.Array.getRootAsArray(fbBuf); - if (fbArray.root() != null) { - used.add(arraySpecs.get(fbArray.root().encoding())); - } - } - - // ── Layout tree ─────────────────────────────────────────────────────────── - - @SuppressWarnings("SameParameterValue") - private static void appendLayout(StringBuilder sb, Layout layout, List colNames, - VortexHandle reader, String indent) { - if (layout.isStruct()) { - sb.append(indent).append("struct (").append(layout.rowCount()).append(" rows)\n"); - for (int i = 0; i < layout.children().size(); i++) { - String name = i < colNames.size() ? colNames.get(i) : "col" + i; - Set colEncodings = new LinkedHashSet<>(); - collectLayoutEncodings(layout.children().get(i), reader, colEncodings); - sb.append(indent).append(" ").append(name).append(": "); - appendLayoutInline(sb, layout.children().get(i)); - if (!colEncodings.isEmpty()) { - sb.append(" [").append(String.join(", ", colEncodings)).append("]"); - } - sb.append('\n'); - } - } else { - sb.append(indent); - appendLayoutInline(sb, layout); - sb.append('\n'); - } - } - - private static void appendLayoutInline(StringBuilder sb, Layout layout) { - sb.append(layout.encodingId()).append('(').append(layout.rowCount()).append(" rows)"); - if (layout.children().isEmpty()) { - return; - } - sb.append(" → "); - if (layout.children().size() == 1) { - appendLayoutInline(sb, layout.children().getFirst()); - } else { - sb.append(layout.children().size()).append("× ["); - appendLayoutInline(sb, layout.children().getFirst()); - sb.append("]"); - } - } - - // ── Formatting ──────────────────────────────────────────────────────────── - - @SuppressWarnings("SameParameterValue") - private static void appendSchema(StringBuilder sb, DType dtype, String indent) { - if (dtype instanceof DType.Struct s) { - int maxLen = s.fieldNames().stream().mapToInt(String::length).max().orElse(0); - for (int i = 0; i < s.fieldNames().size(); i++) { - String name = s.fieldNames().get(i); - sb.append(indent).append(name) - .append(" ".repeat(maxLen - name.length() + 1)) - .append(formatDType(s.fieldTypes().get(i))).append('\n'); - } - } else { - sb.append(indent).append(formatDType(dtype)).append('\n'); - } - } - - private static String formatDType(DType dtype) { - return switch (dtype) { - case DType.Primitive(var pt, var nullable) -> pt.name() + (nullable ? "?" : ""); - case DType.Utf8(var nullable) -> "utf8" + (nullable ? "?" : ""); - case DType.Binary(var nullable) -> "binary" + (nullable ? "?" : ""); - case DType.Bool(var nullable) -> "bool" + (nullable ? "?" : ""); - case DType.Null ignored -> "null"; - case DType.Decimal(var p, var s, var nullable) -> "decimal(" + p + "," + s + ")" + (nullable ? "?" : ""); - case DType.Struct ignored -> "struct"; - case DType.List(var elem, var nullable) -> "list<" + formatDType(elem) + ">" + (nullable ? "?" : ""); - case DType.FixedSizeList(var elem, var size, var nullable) -> - "list<" + formatDType(elem) + ">[" + size + "]" + (nullable ? "?" : ""); - case DType.Extension(var id, var storage, var meta, var nullable) -> - "ext<" + id + ">" + (nullable ? "?" : ""); - case DType.Variant(var nullable) -> "variant" + (nullable ? "?" : ""); - }; - } - - private static String formatBytes(long bytes) { - if (bytes < 1024) { - return bytes + " B"; - } - if (bytes < 1024 * 1024) { - return String.format("%.1f KB", bytes / 1024.0); - } - return String.format("%.1f MB", bytes / (1024.0 * 1024.0)); - } -} From b8b8ee76cbcf6b9ccdab014473665b3ddea459c6 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Mon, 8 Jun 2026 18:40:12 +0200 Subject: [PATCH 02/37] refactor(inspector): replace Lanterna with FFM-based ANSI terminal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drops the lanterna dependency in favour of a small in-tree terminal abstraction built on Java 25's FFM (MemorySegment / Linker), keeping the project's no-JNI / no-Unsafe stance and shrinking the runtime footprint by ~600 KB (plus the transitive JNA on Windows). New package io.github.dfa1.vortex.inspect.term: - RawTerminal — sealed AutoCloseable abstraction over POSIX / Windows. - PosixTerminal — libc tcgetattr/cfmakeraw/tcsetattr/ioctl(TIOCGWINSZ) via FFM. Saves and restores the prior termios; a shutdown hook guarantees restoration if the caller forgoes try-with-resources. - WindowsTerminal — kernel32 GetStdHandle / Get/SetConsoleMode and GetConsoleScreenBufferInfo via FFM. Enables VT processing on stdout and VT input on stdin (Win10 1809+). - Ansi — CSI escape constants + moveTo / fg / bg helpers. - Key — sealed key event type (arrows, PgUp/Dn, Home/End, Enter, Esc, Eof, Char). - KeyDecoder — stateless byte-stream → Key decoder covering xterm CSI letter and tilde sequences. The inspector tree and text renderer are unchanged; only VortexInspectorTui swaps its drawing backend. Co-Authored-By: Claude Opus 4.7 --- inspector/pom.xml | 4 - .../vortex/inspect/VortexInspectorTui.java | 211 +++++++++--------- .../github/dfa1/vortex/inspect/term/Ansi.java | 63 ++++++ .../github/dfa1/vortex/inspect/term/Key.java | 77 +++++++ .../dfa1/vortex/inspect/term/KeyDecoder.java | 89 ++++++++ .../vortex/inspect/term/PosixTerminal.java | 167 ++++++++++++++ .../dfa1/vortex/inspect/term/RawTerminal.java | 71 ++++++ .../vortex/inspect/term/WindowsTerminal.java | 192 ++++++++++++++++ .../dfa1/vortex/inspect/term/AnsiTest.java | 42 ++++ .../vortex/inspect/term/KeyDecoderTest.java | 128 +++++++++++ pom.xml | 6 - 11 files changed, 937 insertions(+), 113 deletions(-) create mode 100644 inspector/src/main/java/io/github/dfa1/vortex/inspect/term/Ansi.java create mode 100644 inspector/src/main/java/io/github/dfa1/vortex/inspect/term/Key.java create mode 100644 inspector/src/main/java/io/github/dfa1/vortex/inspect/term/KeyDecoder.java create mode 100644 inspector/src/main/java/io/github/dfa1/vortex/inspect/term/PosixTerminal.java create mode 100644 inspector/src/main/java/io/github/dfa1/vortex/inspect/term/RawTerminal.java create mode 100644 inspector/src/main/java/io/github/dfa1/vortex/inspect/term/WindowsTerminal.java create mode 100644 inspector/src/test/java/io/github/dfa1/vortex/inspect/term/AnsiTest.java create mode 100644 inspector/src/test/java/io/github/dfa1/vortex/inspect/term/KeyDecoderTest.java diff --git a/inspector/pom.xml b/inspector/pom.xml index 9e898eb3..e88d9212 100644 --- a/inspector/pom.xml +++ b/inspector/pom.xml @@ -28,10 +28,6 @@ com.google.flatbuffers flatbuffers-java - - com.googlecode.lanterna - lanterna - diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java index 9464e876..04e3a451 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java @@ -1,17 +1,10 @@ package io.github.dfa1.vortex.inspect; -import com.googlecode.lanterna.TerminalSize; -import com.googlecode.lanterna.TextCharacter; -import com.googlecode.lanterna.TextColor; -import com.googlecode.lanterna.graphics.TextGraphics; -import com.googlecode.lanterna.input.KeyStroke; -import com.googlecode.lanterna.input.KeyType; -import com.googlecode.lanterna.screen.Screen; -import com.googlecode.lanterna.screen.TerminalScreen; -import com.googlecode.lanterna.terminal.DefaultTerminalFactory; -import com.googlecode.lanterna.terminal.Terminal; import io.github.dfa1.vortex.core.Layout; import io.github.dfa1.vortex.core.SegmentSpec; +import io.github.dfa1.vortex.inspect.term.Ansi; +import io.github.dfa1.vortex.inspect.term.Key; +import io.github.dfa1.vortex.inspect.term.RawTerminal; import io.github.dfa1.vortex.io.VortexHandle; import java.io.IOException; @@ -20,40 +13,37 @@ import java.util.List; import java.util.Set; -/// Lanterna-based interactive viewer for a Vortex file's inspector tree. +/// Interactive viewer for a Vortex file's inspector tree, drawn with raw ANSI +/// escapes — no library dependency. /// -/// Renders a two-pane terminal UI: layout tree on the left, node details on the right. -/// Quit with `q` or `Esc`. +/// Renders a two-pane terminal UI: layout tree on the left, node details on +/// the right. Quit with {@code q} or {@code Esc}. public final class VortexInspectorTui { private VortexInspectorTui() { } - /// Opens a Lanterna terminal, builds an inspector tree, and runs the interactive viewer until quit. + /// Opens the terminal in raw mode, builds an inspector tree, and runs the + /// interactive viewer until quit. /// /// @param handle open Vortex file handle /// @throws IOException if the terminal cannot be initialized public static void show(VortexHandle handle) throws IOException { InspectorTree tree = InspectorTree.build(handle); - Terminal terminal = new DefaultTerminalFactory().createTerminal(); - Screen screen = new TerminalScreen(terminal); - screen.startScreen(); - try { - new Loop(screen, tree).run(); - } finally { - screen.stopScreen(); + try (RawTerminal term = RawTerminal.open()) { + new Loop(term, tree).run(); } } private static final class Loop { - private final Screen screen; + private final RawTerminal term; private final InspectorTree tree; private final Set expanded = new HashSet<>(); private int selected; private int scrollOffset; - Loop(Screen screen, InspectorTree tree) { - this.screen = screen; + Loop(RawTerminal term, InspectorTree tree) { + this.term = term; this.tree = tree; this.expanded.add(tree.root()); } @@ -68,10 +58,7 @@ void run() throws IOException { selected = 0; } render(items); - KeyStroke key = screen.readInput(); - if (key == null) { - continue; - } + Key key = term.readKey(); if (isQuit(key)) { return; } @@ -79,39 +66,52 @@ void run() throws IOException { } } - private void handleKey(KeyStroke key, List items) { - switch (key.getKeyType()) { - case ArrowDown -> selected = Math.min(selected + 1, items.size() - 1); - case ArrowUp -> selected = Math.max(selected - 1, 0); - case ArrowRight, Enter -> { - if (selected < items.size()) { - InspectorTree.Node n = items.get(selected).node(); - if (!n.children().isEmpty()) { - expanded.add(n); - } - } - } - case ArrowLeft -> { + private void handleKey(Key key, List items) { + switch (key) { + case Key.ArrowDown ignored -> selected = Math.min(selected + 1, items.size() - 1); + case Key.ArrowUp ignored -> selected = Math.max(selected - 1, 0); + case Key.ArrowRight ignored -> expandSelected(items); + case Key.Enter ignored -> toggleSelected(items); + case Key.ArrowLeft ignored -> { if (selected < items.size()) { expanded.remove(items.get(selected).node()); } } - case PageDown -> selected = Math.min(selected + 10, items.size() - 1); - case PageUp -> selected = Math.max(selected - 10, 0); - case Home -> selected = 0; - case End -> selected = items.size() - 1; + case Key.PageDown ignored -> selected = Math.min(selected + 10, items.size() - 1); + case Key.PageUp ignored -> selected = Math.max(selected - 10, 0); + case Key.Home ignored -> selected = 0; + case Key.End ignored -> selected = items.size() - 1; default -> { } } } - private static boolean isQuit(KeyStroke key) { - if (key.getKeyType() == KeyType.Escape || key.getKeyType() == KeyType.EOF) { - return true; + private void expandSelected(List items) { + if (selected < items.size()) { + InspectorTree.Node n = items.get(selected).node(); + if (!n.children().isEmpty()) { + expanded.add(n); + } + } + } + + private void toggleSelected(List items) { + if (selected >= items.size()) { + return; + } + InspectorTree.Node n = items.get(selected).node(); + if (n.children().isEmpty()) { + return; + } + if (!expanded.add(n)) { + expanded.remove(n); } - return key.getKeyType() == KeyType.Character - && key.getCharacter() != null - && (key.getCharacter() == 'q' || key.getCharacter() == 'Q'); + } + + private static boolean isQuit(Key key) { + return key instanceof Key.Escape + || key instanceof Key.Eof + || (key instanceof Key.Char(char c) && (c == 'q' || c == 'Q')); } private List flatten() { @@ -130,19 +130,12 @@ private void walk(InspectorTree.Node node, int depth, List out) { } private void render(List items) throws IOException { - screen.doResizeIfNecessary(); - TerminalSize size = screen.getTerminalSize(); - screen.clear(); - TextGraphics tg = screen.newTextGraphics(); - int width = size.getColumns(); - int height = size.getRows(); + RawTerminal.Size size = term.size(); + int width = size.cols(); + int height = size.rows(); int leftWidth = Math.max(20, width / 2); - - drawHeader(tg, width); - drawFooter(tg, width, height); - int bodyTop = 2; - int bodyBottom = height - 2; + int bodyBottom = height - 1; int bodyHeight = bodyBottom - bodyTop; if (selected < scrollOffset) { @@ -151,64 +144,70 @@ private void render(List items) throws IOException { scrollOffset = selected - bodyHeight + 1; } - drawTree(tg, items, bodyTop, bodyHeight, leftWidth); - drawDivider(tg, leftWidth, bodyTop, bodyBottom); + StringBuilder buf = new StringBuilder(width * height); + buf.append(Ansi.CLEAR_SCREEN); + drawHeader(buf, width); + drawTree(buf, items, bodyTop, bodyHeight, leftWidth); + drawDivider(buf, leftWidth, bodyTop, bodyBottom); if (!items.isEmpty()) { - drawDetails(tg, items.get(selected).node(), leftWidth + 2, bodyTop, width - leftWidth - 2, bodyHeight); + drawDetails(buf, items.get(selected).node(), + leftWidth + 2, bodyTop, width - leftWidth - 2, bodyHeight); } - - screen.refresh(); + drawFooter(buf, width, height); + buf.append(Ansi.moveTo(height, 1)); + term.write(buf.toString()); + term.flush(); } - private void drawHeader(TextGraphics tg, int width) { - tg.setForegroundColor(TextColor.ANSI.BLACK); - tg.setBackgroundColor(TextColor.ANSI.CYAN); + private void drawHeader(StringBuilder buf, int width) { String header = " vortex-inspect — v" + tree.version() + " " + formatBytes(tree.fileSize()) + " rows=" + tree.totalRowCount() + " segs=" + tree.segmentCount() + " (" + formatBytes(tree.totalSegmentBytes()) + ")"; - String padded = pad(header, width); - tg.putString(0, 0, padded); - tg.setBackgroundColor(TextColor.ANSI.DEFAULT); - tg.setForegroundColor(TextColor.ANSI.DEFAULT); + buf.append(Ansi.moveTo(1, 1)); + buf.append(Ansi.bg(46)).append(Ansi.fg(30)); + buf.append(pad(header, width)); + buf.append(Ansi.RESET); } - private void drawFooter(TextGraphics tg, int width, int height) { - tg.setForegroundColor(TextColor.ANSI.BLACK); - tg.setBackgroundColor(TextColor.ANSI.WHITE); - String hint = " ↑↓ nav →/Enter expand ← collapse q quit "; - tg.putString(0, height - 1, pad(hint, width)); - tg.setBackgroundColor(TextColor.ANSI.DEFAULT); - tg.setForegroundColor(TextColor.ANSI.DEFAULT); + private void drawFooter(StringBuilder buf, int width, int height) { + buf.append(Ansi.moveTo(height, 1)); + buf.append(Ansi.bg(47)).append(Ansi.fg(30)); + buf.append(pad(" ↑↓ nav →/Enter expand ← collapse q quit ", width)); + buf.append(Ansi.RESET); } - private void drawTree(TextGraphics tg, List items, int top, int rows, int leftWidth) { + private void drawTree(StringBuilder buf, List items, int top, int rows, int leftWidth) { for (int row = 0; row < rows; row++) { int idx = scrollOffset + row; + buf.append(Ansi.moveTo(top + row + 1, 1)); if (idx >= items.size()) { - break; + buf.append(pad("", leftWidth - 1)); + continue; } Item item = items.get(idx); boolean isSelected = idx == selected; if (isSelected) { - tg.setForegroundColor(TextColor.ANSI.BLACK); - tg.setBackgroundColor(TextColor.ANSI.YELLOW); - } else { - tg.setForegroundColor(TextColor.ANSI.DEFAULT); - tg.setBackgroundColor(TextColor.ANSI.DEFAULT); + buf.append(Ansi.bg(43)).append(Ansi.fg(30)); + } + buf.append(pad(renderItem(item), leftWidth - 1)); + if (isSelected) { + buf.append(Ansi.RESET); } - tg.putString(0, top + row, pad(renderItem(item), leftWidth - 1)); } - tg.setBackgroundColor(TextColor.ANSI.DEFAULT); - tg.setForegroundColor(TextColor.ANSI.DEFAULT); } private String renderItem(Item item) { InspectorTree.Node node = item.node(); - String marker = node.children().isEmpty() - ? " " - : (expanded.contains(node) ? "▼ " : "▶ "); + String marker; + if (node.children().isEmpty()) { + marker = " "; + } else if (expanded.contains(node)) { + marker = "v "; + } else { + marker = "> "; + } String label = item.depth() == 0 && node.layout().isStruct() ? "struct" : node.fieldName().map(n -> n + ": ").orElse("") + node.layout().encodingId(); @@ -216,13 +215,22 @@ private String renderItem(Item item) { + " (" + node.layout().rowCount() + " rows)"; } - private void drawDivider(TextGraphics tg, int col, int top, int bottom) { + private void drawDivider(StringBuilder buf, int col, int top, int bottom) { for (int y = top; y < bottom; y++) { - tg.setCharacter(col, y, new TextCharacter('│')); + buf.append(Ansi.moveTo(y + 1, col + 1)).append('|'); } } - private void drawDetails(TextGraphics tg, InspectorTree.Node node, int col, int top, int width, int rows) { + private void drawDetails(StringBuilder buf, InspectorTree.Node node, + int col, int top, int width, int rows) { + List lines = detailLines(node); + for (int i = 0; i < lines.size() && i < rows; i++) { + buf.append(Ansi.moveTo(top + i + 1, col + 1)); + buf.append(truncate(lines.get(i), width)); + } + } + + private List detailLines(InspectorTree.Node node) { List lines = new ArrayList<>(); Layout layout = node.layout(); lines.add("Encoding: " + layout.encodingId()); @@ -232,8 +240,7 @@ private void drawDetails(TextGraphics tg, InspectorTree.Node node, int col, int if (!layout.segments().isEmpty()) { long subtotal = 0; for (int idx : layout.segments()) { - SegmentSpec spec = tree.segmentSpecs().get(idx); - subtotal += spec.length(); + subtotal += tree.segmentSpecs().get(idx).length(); } lines.add("Segments: " + layout.segments().size() + " (" + formatBytes(subtotal) + ")"); @@ -250,12 +257,10 @@ private void drawDetails(TextGraphics tg, InspectorTree.Node node, int col, int lines.add(""); lines.add("Used encodings:"); for (String enc : node.usedEncodings()) { - lines.add(" • " + enc); + lines.add(" - " + enc); } } - for (int i = 0; i < lines.size() && i < rows; i++) { - tg.putString(col, top + i, truncate(lines.get(i), width)); - } + return lines; } private record Item(InspectorTree.Node node, int depth) { diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/Ansi.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/Ansi.java new file mode 100644 index 00000000..4395f263 --- /dev/null +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/Ansi.java @@ -0,0 +1,63 @@ +package io.github.dfa1.vortex.inspect.term; + +/// ANSI / xterm CSI escape constants and small formatting helpers. +/// +/// Sequences are plain ASCII once the leading {@code ESC} (0x1B) byte is included. +/// They're written verbatim to {@code System.out} once raw mode is enabled. +public final class Ansi { + + /// ESC (0x1B) - the byte every CSI sequence starts with. + public static final String ESC = String.valueOf((char) 0x1B); + + /// Control Sequence Introducer: {@code ESC + '['}. + public static final String CSI = ESC + "["; + + /// Clear entire screen. + public static final String CLEAR_SCREEN = CSI + "2J"; + + /// Move cursor to top-left. + public static final String CURSOR_HOME = CSI + "H"; + + /// Reset all SGR attributes. + public static final String RESET = CSI + "0m"; + + /// Hide the cursor. + public static final String HIDE_CURSOR = CSI + "?25l"; + + /// Show the cursor. + public static final String SHOW_CURSOR = CSI + "?25h"; + + /// Switch to the alternate screen buffer. + public static final String ENTER_ALT_SCREEN = CSI + "?1049h"; + + /// Restore the primary screen buffer. + public static final String EXIT_ALT_SCREEN = CSI + "?1049l"; + + private Ansi() { + } + + /// Move the cursor to (1-based) {@code row}, {@code col}. + /// + /// @param row 1-based row index + /// @param col 1-based column index + /// @return CSI sequence + public static String moveTo(int row, int col) { + return CSI + row + ";" + col + "H"; + } + + /// Standard SGR foreground colour (codes 30-37 normal, 90-97 bright). + /// + /// @param code SGR colour code + /// @return CSI sequence + public static String fg(int code) { + return CSI + code + "m"; + } + + /// Standard SGR background colour (codes 40-47 normal, 100-107 bright). + /// + /// @param code SGR colour code + /// @return CSI sequence + public static String bg(int code) { + return CSI + code + "m"; + } +} diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/Key.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/Key.java new file mode 100644 index 00000000..7f0a663d --- /dev/null +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/Key.java @@ -0,0 +1,77 @@ +package io.github.dfa1.vortex.inspect.term; + +/// Decoded terminal input event. +public sealed interface Key { + + /// Up arrow. + enum ArrowUp implements Key { + /// Singleton instance. + INSTANCE + } + + /// Down arrow. + enum ArrowDown implements Key { + /// Singleton instance. + INSTANCE + } + + /// Left arrow. + enum ArrowLeft implements Key { + /// Singleton instance. + INSTANCE + } + + /// Right arrow. + enum ArrowRight implements Key { + /// Singleton instance. + INSTANCE + } + + /// Page Up. + enum PageUp implements Key { + /// Singleton instance. + INSTANCE + } + + /// Page Down. + enum PageDown implements Key { + /// Singleton instance. + INSTANCE + } + + /// Home key. + enum Home implements Key { + /// Singleton instance. + INSTANCE + } + + /// End key. + enum End implements Key { + /// Singleton instance. + INSTANCE + } + + /// Enter / Return (LF or CR). + enum Enter implements Key { + /// Singleton instance. + INSTANCE + } + + /// Bare Escape key press (no CSI sequence followed). + enum Escape implements Key { + /// Singleton instance. + INSTANCE + } + + /// End of input - stdin closed. + enum Eof implements Key { + /// Singleton instance. + INSTANCE + } + + /// Printable character. + /// + /// @param value ASCII codepoint (multi-byte UTF-8 not decoded here) + record Char(char value) implements Key { + } +} diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/KeyDecoder.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/KeyDecoder.java new file mode 100644 index 00000000..824fe12d --- /dev/null +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/KeyDecoder.java @@ -0,0 +1,89 @@ +package io.github.dfa1.vortex.inspect.term; + +import java.io.IOException; +import java.io.InputStream; + +/// Translates raw stdin bytes into [Key] events. +/// +/// Recognises common CSI sequences emitted by xterm-compatible terminals: +/// {@code ESC [ A/B/C/D} for arrows, {@code ESC [ 5~ / 6~} for PgUp/PgDn, +/// {@code ESC [ H / F} and {@code ESC [ 1~ / 4~} for Home/End. Any unrecognised +/// escape sequence is dropped and decoding continues with the next byte. +/// +/// Stateless across reads - call [#next(InputStream)] for each event. +public final class KeyDecoder { + + private KeyDecoder() { + } + + /// Reads the next key from {@code in}, blocking until at least one byte arrives. + /// + /// @param in raw input stream (typically {@code System.in} in cbreak mode) + /// @return the decoded key, or [Key.Eof] if the stream is at EOF + /// @throws IOException if the underlying read fails + public static Key next(InputStream in) throws IOException { + int b = in.read(); + if (b < 0) { + return Key.Eof.INSTANCE; + } + if (b == 0x1B) { + return readAfterEsc(in); + } + if (b == '\r' || b == '\n') { + return Key.Enter.INSTANCE; + } + return new Key.Char((char) b); + } + + private static Key readAfterEsc(InputStream in) throws IOException { + // Bare ESC: no follow-up byte available within a short window. + // We approximate by peeking via available(); proper terminal IO would + // use a select() / VTIME timer, but this is enough for q/Esc quit. + if (in.available() == 0) { + return Key.Escape.INSTANCE; + } + int b1 = in.read(); + if (b1 != '[' && b1 != 'O') { + return Key.Escape.INSTANCE; + } + int b2 = in.read(); + return switch (b2) { + case 'A' -> Key.ArrowUp.INSTANCE; + case 'B' -> Key.ArrowDown.INSTANCE; + case 'C' -> Key.ArrowRight.INSTANCE; + case 'D' -> Key.ArrowLeft.INSTANCE; + case 'H' -> Key.Home.INSTANCE; + case 'F' -> Key.End.INSTANCE; + default -> readTildeSequence(in, b2); + }; + } + + private static Key readTildeSequence(InputStream in, int firstDigit) throws IOException { + if (firstDigit < '0' || firstDigit > '9') { + return Key.Escape.INSTANCE; + } + int digit = firstDigit - '0'; + int next = in.read(); + if (next == -1) { + return Key.Eof.INSTANCE; + } + // Two-digit codes like ESC [ 15~; collapse to single digit by ignoring extras. + while (next >= '0' && next <= '9') { + digit = digit * 10 + (next - '0'); + next = in.read(); + if (next == -1) { + return Key.Eof.INSTANCE; + } + } + if (next != '~') { + return Key.Escape.INSTANCE; + } + return switch (digit) { + case 1, 7 -> Key.Home.INSTANCE; + case 4, 8 -> Key.End.INSTANCE; + case 5 -> Key.PageUp.INSTANCE; + case 6 -> Key.PageDown.INSTANCE; + default -> Key.Escape.INSTANCE; + }; + } +} diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/PosixTerminal.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/PosixTerminal.java new file mode 100644 index 00000000..bbdb9231 --- /dev/null +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/PosixTerminal.java @@ -0,0 +1,167 @@ +package io.github.dfa1.vortex.inspect.term; + +import java.io.IOException; +import java.io.PrintStream; +import java.lang.foreign.Arena; +import java.lang.foreign.FunctionDescriptor; +import java.lang.foreign.Linker; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SymbolLookup; +import java.lang.foreign.ValueLayout; +import java.lang.invoke.MethodHandle; + +/// POSIX (Linux + macOS) raw-mode terminal implementation. +/// +/// Uses libc {@code tcgetattr} / {@code cfmakeraw} / {@code tcsetattr} via FFM +/// to put stdin in non-canonical, no-echo mode. {@code ioctl(TIOCGWINSZ)} +/// queries the terminal size on every call (no SIGWINCH plumbing). +/// +/// On entry: saves the original {@code termios}, switches to alt screen, hides +/// the cursor. On [#close()]: restores everything, even on exceptions, via a +/// shutdown hook that fires if the caller skips try-with-resources. +public final class PosixTerminal implements RawTerminal { + + private static final int STDIN_FD = 0; + private static final int STDOUT_FD = 1; + private static final int TCSANOW = 0; + + private static final long TIOCGWINSZ = isMac() ? 0x40087468L : 0x5413L; + + /// {@code struct termios} is at most 72 bytes (macOS); Linux glibc is 60. + /// 128 is a comfortable upper bound and lets the same code work on both. + private static final long TERMIOS_SIZE = 128; + + private static final Linker LINKER = Linker.nativeLinker(); + private static final SymbolLookup LIBC = LINKER.defaultLookup(); + + private static final MethodHandle TCGETATTR = downcall("tcgetattr", + FunctionDescriptor.of(ValueLayout.JAVA_INT, ValueLayout.JAVA_INT, ValueLayout.ADDRESS)); + private static final MethodHandle TCSETATTR = downcall("tcsetattr", + FunctionDescriptor.of(ValueLayout.JAVA_INT, ValueLayout.JAVA_INT, ValueLayout.JAVA_INT, ValueLayout.ADDRESS)); + private static final MethodHandle CFMAKERAW = downcall("cfmakeraw", + FunctionDescriptor.ofVoid(ValueLayout.ADDRESS)); + private static final MethodHandle IOCTL = LINKER.downcallHandle( + LIBC.find("ioctl").orElseThrow(() -> new UnsatisfiedLinkError("ioctl")), + FunctionDescriptor.of(ValueLayout.JAVA_INT, + ValueLayout.JAVA_INT, ValueLayout.JAVA_LONG, ValueLayout.ADDRESS), + Linker.Option.firstVariadicArg(2)); + + private final Arena arena; + private final MemorySegment savedTermios; + private final PrintStream out; + private final Thread shutdownHook; + private boolean closed; + + private PosixTerminal(Arena arena, MemorySegment savedTermios) { + this.arena = arena; + this.savedTermios = savedTermios; + this.out = System.out; + this.shutdownHook = new Thread(this::restore, "posix-term-restore"); + Runtime.getRuntime().addShutdownHook(shutdownHook); + } + + /// Enters raw mode and switches to the alternate screen. + /// + /// @return open terminal + /// @throws IOException if {@code tcgetattr} or {@code tcsetattr} fails + public static PosixTerminal open() throws IOException { + Arena arena = Arena.ofShared(); + try { + MemorySegment saved = arena.allocate(TERMIOS_SIZE); + MemorySegment raw = arena.allocate(TERMIOS_SIZE); + int rc = (int) TCGETATTR.invokeExact(STDIN_FD, saved); + if (rc != 0) { + throw new IOException("tcgetattr failed: rc=" + rc); + } + MemorySegment.copy(saved, 0, raw, 0, TERMIOS_SIZE); + CFMAKERAW.invokeExact(raw); + rc = (int) TCSETATTR.invokeExact(STDIN_FD, TCSANOW, raw); + if (rc != 0) { + throw new IOException("tcsetattr failed: rc=" + rc); + } + PosixTerminal term = new PosixTerminal(arena, saved); + term.out.print(Ansi.ENTER_ALT_SCREEN); + term.out.print(Ansi.HIDE_CURSOR); + term.out.print(Ansi.CLEAR_SCREEN); + term.out.flush(); + return term; + } catch (Throwable t) { + arena.close(); + if (t instanceof IOException io) { + throw io; + } + throw new IOException(t); + } + } + + @Override + public Size size() { + MemorySegment ws = arena.allocate(8); + try { + int rc = (int) IOCTL.invokeExact(STDOUT_FD, TIOCGWINSZ, ws); + if (rc != 0) { + return new Size(24, 80); + } + } catch (Throwable t) { + return new Size(24, 80); + } + int rows = Short.toUnsignedInt(ws.get(ValueLayout.JAVA_SHORT, 0)); + int cols = Short.toUnsignedInt(ws.get(ValueLayout.JAVA_SHORT, 2)); + if (rows == 0 || cols == 0) { + return new Size(24, 80); + } + return new Size(rows, cols); + } + + @Override + public void write(String s) { + out.print(s); + } + + @Override + public void flush() { + out.flush(); + } + + @Override + public Key readKey() throws IOException { + return KeyDecoder.next(System.in); + } + + @Override + public void close() { + if (closed) { + return; + } + closed = true; + try { + Runtime.getRuntime().removeShutdownHook(shutdownHook); + } catch (IllegalStateException ignored) { + // JVM already shutting down. + } + restore(); + arena.close(); + } + + private void restore() { + try { + out.print(Ansi.SHOW_CURSOR); + out.print(Ansi.EXIT_ALT_SCREEN); + out.print(Ansi.RESET); + out.flush(); + TCSETATTR.invokeExact(STDIN_FD, TCSANOW, savedTermios); + } catch (Throwable ignored) { + // Best-effort: JVM is exiting; nothing useful to do. + } + } + + private static MethodHandle downcall(String name, FunctionDescriptor desc) { + return LINKER.downcallHandle( + LIBC.find(name).orElseThrow(() -> new UnsatisfiedLinkError(name)), + desc); + } + + private static boolean isMac() { + return System.getProperty("os.name", "").toLowerCase().contains("mac"); + } +} diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/RawTerminal.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/RawTerminal.java new file mode 100644 index 00000000..24098807 --- /dev/null +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/RawTerminal.java @@ -0,0 +1,71 @@ +package io.github.dfa1.vortex.inspect.term; + +import java.io.IOException; + +/// Direct, dependency-free terminal abstraction. +/// +/// Implementations toggle the OS console into raw / non-canonical mode on +/// [#open()] and restore the prior state on [#close()]. Output is plain bytes +/// to {@code System.out}; input is buffered keystrokes from {@code System.in}. +/// +/// Usage: +/// ``` +/// try (RawTerminal term = RawTerminal.open()) { +/// term.write(Ansi.CLEAR_SCREEN); +/// Key k = term.readKey(); +/// ... +/// } +/// ``` +public sealed interface RawTerminal extends AutoCloseable + permits PosixTerminal, WindowsTerminal { + + /// Opens the platform-appropriate raw-mode terminal. + /// + /// Picks [PosixTerminal] on Linux / macOS and [WindowsTerminal] on Windows + /// based on {@code os.name}. + /// + /// @return an open raw terminal handle + /// @throws IOException if the OS-level setup fails + static RawTerminal open() throws IOException { + String os = System.getProperty("os.name", "").toLowerCase(); + if (os.contains("win")) { + return WindowsTerminal.open(); + } + return PosixTerminal.open(); + } + + /// Current terminal size in cells. + /// + /// @return rows and columns at this moment (re-queried each call) + Size size(); + + /// Writes a string of bytes (ASCII / UTF-8) verbatim to the terminal. + /// + /// @param s text to send (may contain ANSI escapes) + /// @throws IOException if the write fails + void write(String s) throws IOException; + + /// Flushes any buffered output. + /// + /// @throws IOException if flush fails + void flush() throws IOException; + + /// Blocks until a key is available, then returns the decoded event. + /// + /// @return next key + /// @throws IOException if reading fails + Key readKey() throws IOException; + + /// Restores the original terminal mode and exits the alternate screen. + /// + /// Idempotent - safe to call multiple times. + @Override + void close(); + + /// Terminal dimensions in character cells. + /// + /// @param rows number of rows + /// @param cols number of columns + record Size(int rows, int cols) { + } +} diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/WindowsTerminal.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/WindowsTerminal.java new file mode 100644 index 00000000..a35da22d --- /dev/null +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/WindowsTerminal.java @@ -0,0 +1,192 @@ +package io.github.dfa1.vortex.inspect.term; + +import java.io.IOException; +import java.io.PrintStream; +import java.lang.foreign.Arena; +import java.lang.foreign.FunctionDescriptor; +import java.lang.foreign.Linker; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SymbolLookup; +import java.lang.foreign.ValueLayout; +import java.lang.invoke.MethodHandle; + +/// Windows console raw-mode implementation via kernel32.dll. +/// +/// Toggles {@code ENABLE_VIRTUAL_TERMINAL_PROCESSING} on stdout (Win10 1809+ +/// required) so ANSI sequences in [Ansi] render natively. Stdin runs with +/// line-input + echo + processed-input disabled and VT input enabled so xterm +/// arrow sequences arrive intact. +/// +/// {@code GetConsoleScreenBufferInfo} drives [#size()]; we report the visible +/// window rect, not the scrollback buffer. +public final class WindowsTerminal implements RawTerminal { + + private static final long STD_INPUT_HANDLE = -10L; + private static final long STD_OUTPUT_HANDLE = -11L; + + private static final int ENABLE_PROCESSED_INPUT = 0x0001; + private static final int ENABLE_LINE_INPUT = 0x0002; + private static final int ENABLE_ECHO_INPUT = 0x0004; + private static final int ENABLE_VIRTUAL_TERMINAL_INPUT = 0x0200; + + private static final int ENABLE_PROCESSED_OUTPUT = 0x0001; + private static final int ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004; + + private static final Linker LINKER = Linker.nativeLinker(); + private static final SymbolLookup KERNEL32 = SymbolLookup.libraryLookup( + "kernel32", Arena.global()); + + private static final MethodHandle GET_STD_HANDLE = downcall("GetStdHandle", + FunctionDescriptor.of(ValueLayout.ADDRESS, ValueLayout.JAVA_LONG)); + private static final MethodHandle GET_CONSOLE_MODE = downcall("GetConsoleMode", + FunctionDescriptor.of(ValueLayout.JAVA_INT, ValueLayout.ADDRESS, ValueLayout.ADDRESS)); + private static final MethodHandle SET_CONSOLE_MODE = downcall("SetConsoleMode", + FunctionDescriptor.of(ValueLayout.JAVA_INT, ValueLayout.ADDRESS, ValueLayout.JAVA_INT)); + private static final MethodHandle GET_CONSOLE_SCREEN_BUFFER_INFO = downcall( + "GetConsoleScreenBufferInfo", + FunctionDescriptor.of(ValueLayout.JAVA_INT, ValueLayout.ADDRESS, ValueLayout.ADDRESS)); + + private final Arena arena; + private final MemorySegment stdoutHandle; + private final int savedInMode; + private final int savedOutMode; + private final PrintStream out; + private final Thread shutdownHook; + private boolean closed; + + private WindowsTerminal(Arena arena, MemorySegment stdoutHandle, + int savedInMode, int savedOutMode) { + this.arena = arena; + this.stdoutHandle = stdoutHandle; + this.savedInMode = savedInMode; + this.savedOutMode = savedOutMode; + this.out = System.out; + this.shutdownHook = new Thread(this::restore, "windows-term-restore"); + Runtime.getRuntime().addShutdownHook(shutdownHook); + } + + /// Enables VT processing on stdout and VT input on stdin. + /// + /// @return open terminal + /// @throws IOException if console handles cannot be obtained or modes set + public static WindowsTerminal open() throws IOException { + Arena arena = Arena.ofShared(); + try { + MemorySegment stdin = invokeHandle(GET_STD_HANDLE, STD_INPUT_HANDLE); + MemorySegment stdout = invokeHandle(GET_STD_HANDLE, STD_OUTPUT_HANDLE); + int inMode = readMode(arena, stdin); + int outMode = readMode(arena, stdout); + + int newIn = (inMode & ~(ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT + | ENABLE_PROCESSED_INPUT)) | ENABLE_VIRTUAL_TERMINAL_INPUT; + int newOut = outMode | ENABLE_VIRTUAL_TERMINAL_PROCESSING | ENABLE_PROCESSED_OUTPUT; + + if ((int) SET_CONSOLE_MODE.invokeExact(stdin, newIn) == 0) { + throw new IOException("SetConsoleMode(stdin) failed"); + } + if ((int) SET_CONSOLE_MODE.invokeExact(stdout, newOut) == 0) { + throw new IOException("SetConsoleMode(stdout) failed"); + } + + WindowsTerminal term = new WindowsTerminal(arena, stdout, inMode, outMode); + term.out.print(Ansi.ENTER_ALT_SCREEN); + term.out.print(Ansi.HIDE_CURSOR); + term.out.print(Ansi.CLEAR_SCREEN); + term.out.flush(); + return term; + } catch (Throwable t) { + arena.close(); + if (t instanceof IOException io) { + throw io; + } + throw new IOException(t); + } + } + + @Override + public Size size() { + // CONSOLE_SCREEN_BUFFER_INFO is 22 bytes: COORD(4) size, COORD(4) cursor, + // WORD(2) attrs, SMALL_RECT(8) window, COORD(4) max. We only need window. + MemorySegment info = arena.allocate(22); + try { + int rc = (int) GET_CONSOLE_SCREEN_BUFFER_INFO.invokeExact(stdoutHandle, info); + if (rc == 0) { + return new Size(24, 80); + } + } catch (Throwable t) { + return new Size(24, 80); + } + int left = info.get(ValueLayout.JAVA_SHORT, 10); + int top = info.get(ValueLayout.JAVA_SHORT, 12); + int right = info.get(ValueLayout.JAVA_SHORT, 14); + int bottom = info.get(ValueLayout.JAVA_SHORT, 16); + int rows = bottom - top + 1; + int cols = right - left + 1; + if (rows <= 0 || cols <= 0) { + return new Size(24, 80); + } + return new Size(rows, cols); + } + + @Override + public void write(String s) { + out.print(s); + } + + @Override + public void flush() { + out.flush(); + } + + @Override + public Key readKey() throws IOException { + return KeyDecoder.next(System.in); + } + + @Override + public void close() { + if (closed) { + return; + } + closed = true; + try { + Runtime.getRuntime().removeShutdownHook(shutdownHook); + } catch (IllegalStateException ignored) { + // JVM already shutting down. + } + restore(); + arena.close(); + } + + private void restore() { + try { + out.print(Ansi.SHOW_CURSOR); + out.print(Ansi.EXIT_ALT_SCREEN); + out.print(Ansi.RESET); + out.flush(); + MemorySegment stdin = invokeHandle(GET_STD_HANDLE, STD_INPUT_HANDLE); + SET_CONSOLE_MODE.invokeExact(stdin, savedInMode); + SET_CONSOLE_MODE.invokeExact(stdoutHandle, savedOutMode); + } catch (Throwable ignored) { + // Best-effort: JVM is exiting; nothing useful to do. + } + } + + private static int readMode(Arena arena, MemorySegment handle) throws Throwable { + MemorySegment slot = arena.allocate(4); + if ((int) GET_CONSOLE_MODE.invokeExact(handle, slot) == 0) { + throw new IOException("GetConsoleMode failed"); + } + return slot.get(ValueLayout.JAVA_INT, 0); + } + + private static MemorySegment invokeHandle(MethodHandle mh, long stdHandle) throws Throwable { + return (MemorySegment) mh.invokeExact(stdHandle); + } + + private static MethodHandle downcall(String name, FunctionDescriptor desc) { + return LINKER.downcallHandle( + KERNEL32.find(name).orElseThrow(() -> new UnsatisfiedLinkError(name)), + desc); + } +} diff --git a/inspector/src/test/java/io/github/dfa1/vortex/inspect/term/AnsiTest.java b/inspector/src/test/java/io/github/dfa1/vortex/inspect/term/AnsiTest.java new file mode 100644 index 00000000..7ab3c4c2 --- /dev/null +++ b/inspector/src/test/java/io/github/dfa1/vortex/inspect/term/AnsiTest.java @@ -0,0 +1,42 @@ +package io.github.dfa1.vortex.inspect.term; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; + +class AnsiTest { + + private static final char ESC = (char) 0x1B; + + @Test + void escConstant_isAsciiEscapeByte() { + // Given / When / Then — every CSI sequence relies on this being 0x1B + assertThat(Ansi.ESC).isEqualTo(String.valueOf(ESC)); + } + + @Test + void moveTo_formatsRowAndColumn() { + // Given / When / Then + assertThat(Ansi.moveTo(5, 12)).isEqualTo(ESC + "[5;12H"); + } + + @Test + void fgAndBg_emitSgrCode() { + // Given / When / Then + assertThat(Ansi.fg(31)).isEqualTo(ESC + "[31m"); + assertThat(Ansi.bg(42)).isEqualTo(ESC + "[42m"); + } + + @Test + void clearAndCursorConstants_startWithCsi() { + // Given / When / Then — guard against accidental edits dropping the ESC prefix + String csi = ESC + "["; + assertThat(Ansi.CLEAR_SCREEN).startsWith(csi).endsWith("2J"); + assertThat(Ansi.CURSOR_HOME).startsWith(csi).endsWith("H"); + assertThat(Ansi.HIDE_CURSOR).isEqualTo(csi + "?25l"); + assertThat(Ansi.SHOW_CURSOR).isEqualTo(csi + "?25h"); + assertThat(Ansi.ENTER_ALT_SCREEN).isEqualTo(csi + "?1049h"); + assertThat(Ansi.EXIT_ALT_SCREEN).isEqualTo(csi + "?1049l"); + assertThat(Ansi.RESET).isEqualTo(csi + "0m"); + } +} diff --git a/inspector/src/test/java/io/github/dfa1/vortex/inspect/term/KeyDecoderTest.java b/inspector/src/test/java/io/github/dfa1/vortex/inspect/term/KeyDecoderTest.java new file mode 100644 index 00000000..69009f5a --- /dev/null +++ b/inspector/src/test/java/io/github/dfa1/vortex/inspect/term/KeyDecoderTest.java @@ -0,0 +1,128 @@ +package io.github.dfa1.vortex.inspect.term; + +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayInputStream; +import java.io.IOException; + +import static org.assertj.core.api.Assertions.assertThat; + +class KeyDecoderTest { + + @Test + void next_arrowUp_decodesCsiA() throws IOException { + // Given + ByteArrayInputStream in = bytes(0x1B, '[', 'A'); + + // When + Key sut = KeyDecoder.next(in); + + // Then + assertThat(sut).isEqualTo(Key.ArrowUp.INSTANCE); + } + + @Test + void next_allArrows_decodeIndependently() throws IOException { + // Given / When / Then + assertThat(KeyDecoder.next(bytes(0x1B, '[', 'A'))).isEqualTo(Key.ArrowUp.INSTANCE); + assertThat(KeyDecoder.next(bytes(0x1B, '[', 'B'))).isEqualTo(Key.ArrowDown.INSTANCE); + assertThat(KeyDecoder.next(bytes(0x1B, '[', 'C'))).isEqualTo(Key.ArrowRight.INSTANCE); + assertThat(KeyDecoder.next(bytes(0x1B, '[', 'D'))).isEqualTo(Key.ArrowLeft.INSTANCE); + } + + @Test + void next_homeAndEnd_decodeBothCsiAndTildeForms() throws IOException { + // Given / When / Then — xterm sends ESC[H/F; rxvt and others send ESC[1~/4~ + assertThat(KeyDecoder.next(bytes(0x1B, '[', 'H'))).isEqualTo(Key.Home.INSTANCE); + assertThat(KeyDecoder.next(bytes(0x1B, '[', 'F'))).isEqualTo(Key.End.INSTANCE); + assertThat(KeyDecoder.next(bytes(0x1B, '[', '1', '~'))).isEqualTo(Key.Home.INSTANCE); + assertThat(KeyDecoder.next(bytes(0x1B, '[', '4', '~'))).isEqualTo(Key.End.INSTANCE); + } + + @Test + void next_pageUpAndDown_decodeTildeSequences() throws IOException { + // Given / When / Then + assertThat(KeyDecoder.next(bytes(0x1B, '[', '5', '~'))).isEqualTo(Key.PageUp.INSTANCE); + assertThat(KeyDecoder.next(bytes(0x1B, '[', '6', '~'))).isEqualTo(Key.PageDown.INSTANCE); + } + + @Test + void next_bareEscape_returnsEscapeWhenNoFollowupAvailable() throws IOException { + // Given — single ESC byte with no further input + ByteArrayInputStream in = bytes(0x1B); + + // When + Key sut = KeyDecoder.next(in); + + // Then + assertThat(sut).isEqualTo(Key.Escape.INSTANCE); + } + + @Test + void next_enterFromCrAndLf_bothDecodeToEnter() throws IOException { + // Given / When / Then + assertThat(KeyDecoder.next(bytes('\r'))).isEqualTo(Key.Enter.INSTANCE); + assertThat(KeyDecoder.next(bytes('\n'))).isEqualTo(Key.Enter.INSTANCE); + } + + @Test + void next_printableChar_returnsChar() throws IOException { + // Given + ByteArrayInputStream in = bytes('q'); + + // When + Key sut = KeyDecoder.next(in); + + // Then + assertThat(sut).isInstanceOf(Key.Char.class); + assertThat(((Key.Char) sut).value()).isEqualTo('q'); + } + + @Test + void next_eof_returnsEof() throws IOException { + // Given — empty stream + ByteArrayInputStream in = bytes(); + + // When + Key sut = KeyDecoder.next(in); + + // Then + assertThat(sut).isEqualTo(Key.Eof.INSTANCE); + } + + @Test + void next_unknownCsiLetter_yieldsEscape() throws IOException { + // Given — ESC [ Z is xterm reverse-tab; we don't recognise it + ByteArrayInputStream in = bytes(0x1B, '[', 'Z'); + + // When + Key sut = KeyDecoder.next(in); + + // Then — defensive: never emit garbage as Char on an unknown CSI + assertThat(sut).isEqualTo(Key.Escape.INSTANCE); + } + + @Test + void next_multiDigitTildeCode_handlesTwoDigits() throws IOException { + // Given — ESC [ 15 ~ is xterm F5; we treat unknown numbers as Escape but + // must still consume the trailing '~' rather than leak it as a character + ByteArrayInputStream in = bytes(0x1B, '[', '1', '5', '~', 'x'); + + // When + Key first = KeyDecoder.next(in); + Key second = KeyDecoder.next(in); + + // Then + assertThat(first).isEqualTo(Key.Escape.INSTANCE); + assertThat(second).isInstanceOf(Key.Char.class); + assertThat(((Key.Char) second).value()).isEqualTo('x'); + } + + private static ByteArrayInputStream bytes(int... bs) { + byte[] out = new byte[bs.length]; + for (int i = 0; i < bs.length; i++) { + out[i] = (byte) bs[i]; + } + return new ByteArrayInputStream(out); + } +} diff --git a/pom.xml b/pom.xml index a0588eb1..1f96b9b6 100644 --- a/pom.xml +++ b/pom.xml @@ -62,7 +62,6 @@ 4.3.0 1.0.0.CR1 1.37 - 3.1.3 2.4.240 6.1.0 @@ -116,11 +115,6 @@ vortex-inspector ${project.version} - - com.googlecode.lanterna - lanterna - ${lanterna.version} - de.siegmar fastcsv From b9b7a5ecfd936107582ec37b2c4d92f4bfe2a4ca Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Mon, 8 Jun 2026 18:52:09 +0200 Subject: [PATCH 03/37] fix(cli): print exception class and cause chain on inspect errors The previous catch printed only e.getMessage(), which surfaced as "error: null" whenever the exception had no message (e.g. an IOException constructed from a cause). The new describe() prints the simple class name plus the cause chain so failures are diagnosable without rebuilding. Setting VORTEX_DEBUG=1 still emits the full stack trace. The catch now also covers RuntimeException so unchecked failures during TUI rendering surface the same way. Co-Authored-By: Claude Opus 4.7 --- .../dfa1/vortex/cli/InspectCommand.java | 23 +++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/cli/src/main/java/io/github/dfa1/vortex/cli/InspectCommand.java b/cli/src/main/java/io/github/dfa1/vortex/cli/InspectCommand.java index b72775c3..45e5cc39 100644 --- a/cli/src/main/java/io/github/dfa1/vortex/cli/InspectCommand.java +++ b/cli/src/main/java/io/github/dfa1/vortex/cli/InspectCommand.java @@ -42,12 +42,31 @@ static int run(String[] args) { System.out.print(VortexInspector.inspect(handle)); } return ExitStatus.OK; - } catch (IOException e) { - System.err.println("error: " + e.getMessage()); + } catch (IOException | RuntimeException e) { + System.err.println("error: " + describe(e)); + if (System.getenv("VORTEX_DEBUG") != null) { + e.printStackTrace(System.err); + } return ExitStatus.ERROR; } } + private static String describe(Throwable t) { + StringBuilder sb = new StringBuilder(); + Throwable cur = t; + while (cur != null) { + if (!sb.isEmpty()) { + sb.append(" -> "); + } + sb.append(cur.getClass().getSimpleName()); + if (cur.getMessage() != null) { + sb.append(": ").append(cur.getMessage()); + } + cur = cur.getCause(); + } + return sb.toString(); + } + private static VortexHandle open(String target) throws IOException { if (target.startsWith("http://") || target.startsWith("https://")) { try { From 988c3deee723ae0456d95cc27c1e30acd7e21743 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Mon, 8 Jun 2026 18:55:33 +0200 Subject: [PATCH 04/37] feat(cli): split inspect into 'inspect' (text) and 'tui' (interactive) `inspect ` keeps the quick text report; the new `tui ` subcommand opens the interactive viewer. Drops the `--tui` flag from `inspect` - the split matches how the two modes are used in practice and avoids mixing a one-shot output command with one that takes over the terminal. Co-Authored-By: Claude Opus 4.7 --- .../dfa1/vortex/cli/InspectCommand.java | 30 ++------ .../io/github/dfa1/vortex/cli/TuiCommand.java | 71 +++++++++++++++++++ .../io/github/dfa1/vortex/cli/VortexCli.java | 4 +- 3 files changed, 79 insertions(+), 26 deletions(-) create mode 100644 cli/src/main/java/io/github/dfa1/vortex/cli/TuiCommand.java diff --git a/cli/src/main/java/io/github/dfa1/vortex/cli/InspectCommand.java b/cli/src/main/java/io/github/dfa1/vortex/cli/InspectCommand.java index 45e5cc39..5e699f38 100644 --- a/cli/src/main/java/io/github/dfa1/vortex/cli/InspectCommand.java +++ b/cli/src/main/java/io/github/dfa1/vortex/cli/InspectCommand.java @@ -1,7 +1,6 @@ package io.github.dfa1.vortex.cli; import io.github.dfa1.vortex.inspect.VortexInspector; -import io.github.dfa1.vortex.inspect.VortexInspectorTui; import io.github.dfa1.vortex.io.VortexHandle; import io.github.dfa1.vortex.io.VortexHttpReader; import io.github.dfa1.vortex.io.VortexReader; @@ -18,29 +17,15 @@ private InspectCommand() { } static int run(String[] args) { - boolean tui = false; - String target = null; - for (int i = 1; i < args.length; i++) { - if ("--tui".equals(args[i])) { - tui = true; - } else if (target == null) { - target = args[i]; - } else { - return usage(); - } - } - if (target == null) { - return usage(); + if (args.length != 2) { + System.err.println("usage: inspect "); + return ExitStatus.USAGE_ERROR; } - try (VortexHandle handle = open(target)) { + try (VortexHandle handle = open(args[1])) { if (handle == null) { return ExitStatus.FILE_NOT_FOUND; } - if (tui) { - VortexInspectorTui.show(handle); - } else { - System.out.print(VortexInspector.inspect(handle)); - } + System.out.print(VortexInspector.inspect(handle)); return ExitStatus.OK; } catch (IOException | RuntimeException e) { System.err.println("error: " + describe(e)); @@ -83,9 +68,4 @@ private static VortexHandle open(String target) throws IOException { } return VortexReader.open(path); } - - private static int usage() { - System.err.println("usage: inspect [--tui] "); - return ExitStatus.USAGE_ERROR; - } } diff --git a/cli/src/main/java/io/github/dfa1/vortex/cli/TuiCommand.java b/cli/src/main/java/io/github/dfa1/vortex/cli/TuiCommand.java new file mode 100644 index 00000000..586d4eb9 --- /dev/null +++ b/cli/src/main/java/io/github/dfa1/vortex/cli/TuiCommand.java @@ -0,0 +1,71 @@ +package io.github.dfa1.vortex.cli; + +import io.github.dfa1.vortex.inspect.VortexInspectorTui; +import io.github.dfa1.vortex.io.VortexHandle; +import io.github.dfa1.vortex.io.VortexHttpReader; +import io.github.dfa1.vortex.io.VortexReader; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.file.Files; +import java.nio.file.Path; + +final class TuiCommand { + + private TuiCommand() { + } + + static int run(String[] args) { + if (args.length != 2) { + System.err.println("usage: tui "); + return ExitStatus.USAGE_ERROR; + } + try (VortexHandle handle = open(args[1])) { + if (handle == null) { + return ExitStatus.FILE_NOT_FOUND; + } + VortexInspectorTui.show(handle); + return ExitStatus.OK; + } catch (IOException | RuntimeException e) { + System.err.println("error: " + describe(e)); + if (System.getenv("VORTEX_DEBUG") != null) { + e.printStackTrace(System.err); + } + return ExitStatus.ERROR; + } + } + + private static VortexHandle open(String target) throws IOException { + if (target.startsWith("http://") || target.startsWith("https://")) { + try { + return VortexHttpReader.open(new URI(target)); + } catch (URISyntaxException e) { + System.err.println("invalid URL: " + target); + return null; + } + } + Path path = Path.of(target); + if (!Files.exists(path)) { + System.err.println("file not found: " + path); + return null; + } + return VortexReader.open(path); + } + + private static String describe(Throwable t) { + StringBuilder sb = new StringBuilder(); + Throwable cur = t; + while (cur != null) { + if (!sb.isEmpty()) { + sb.append(" -> "); + } + sb.append(cur.getClass().getSimpleName()); + if (cur.getMessage() != null) { + sb.append(": ").append(cur.getMessage()); + } + cur = cur.getCause(); + } + return sb.toString(); + } +} diff --git a/cli/src/main/java/io/github/dfa1/vortex/cli/VortexCli.java b/cli/src/main/java/io/github/dfa1/vortex/cli/VortexCli.java index 58f307ab..b958489c 100644 --- a/cli/src/main/java/io/github/dfa1/vortex/cli/VortexCli.java +++ b/cli/src/main/java/io/github/dfa1/vortex/cli/VortexCli.java @@ -17,6 +17,7 @@ public static void main(String[] args) { } int exit = switch (args[0]) { case "inspect" -> InspectCommand.run(args); + case "tui" -> TuiCommand.run(args); case "export" -> ExportCommand.run(args); case "import" -> ImportCommand.run(args); case "schema" -> SchemaCommand.run(args); @@ -35,7 +36,8 @@ public static void main(String[] args) { static void printUsage(PrintStream out) { out.println("Usage: java -jar vortex.jar [args]"); - out.println(" inspect [--tui] print file structure (or open TUI); url is http(s)://"); + out.println(" inspect print file structure; url is http(s)://"); + out.println(" tui open interactive inspector; url is http(s)://"); out.println(" export write CSV to stdout"); out.println(" import [out.vortex] convert CSV or Parquet to Vortex"); out.println(" schema print dtype (machine-readable)"); From e6a1678984dc3cfdbdf6a70a77ed014ed8b77464 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Mon, 8 Jun 2026 19:01:08 +0200 Subject: [PATCH 05/37] feat(inspector): surface per-array min/max statistics InspectorTree.Node now carries an ArrayStats record decoded from the flat segment's FlatBuffer Array root (the same source the scan reader uses for zone-map pruning). The text renderer aggregates min/max across each column's leaves and prints them after the per-column encoding bracket. The TUI details pane shows the selected node's own min/max under a new 'Stats:' section. Co-Authored-By: Claude Opus 4.7 --- .../dfa1/vortex/inspect/InspectorTree.java | 34 +++++++---- .../dfa1/vortex/inspect/VortexInspector.java | 59 +++++++++++++++++++ .../vortex/inspect/VortexInspectorTui.java | 10 ++++ .../vortex/inspect/VortexInspectorTest.java | 59 ++++++++++++++++--- 4 files changed, 143 insertions(+), 19 deletions(-) diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java index 7ffbdd03..eedfc97a 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java @@ -1,10 +1,12 @@ package io.github.dfa1.vortex.inspect; +import io.github.dfa1.vortex.core.ArrayStats; import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.Footer; import io.github.dfa1.vortex.core.Layout; import io.github.dfa1.vortex.core.SegmentSpec; import io.github.dfa1.vortex.fbs.Array; +import io.github.dfa1.vortex.fbs.ArrayNode; import io.github.dfa1.vortex.io.VortexHandle; import java.lang.foreign.MemorySegment; @@ -62,11 +64,13 @@ public long totalSegmentBytes() { /// @param layout underlying [Layout] from the file footer /// @param fieldName column name when this node is a direct child of a top-level struct /// @param usedEncodings encoding IDs referenced by this subtree + /// @param stats per-array statistics decoded from the segment's FlatBuffer /// @param children child nodes public record Node( Layout layout, Optional fieldName, Set usedEncodings, + ArrayStats stats, List children) { } @@ -87,9 +91,11 @@ public static InspectorTree build(VortexHandle handle) { for (int i = 0; i < root.children().size(); i++) { Node child = root.children().get(i); String name = i < colNames.size() ? colNames.get(i) : "col" + i; - namedChildren.add(new Node(child.layout(), Optional.of(name), child.usedEncodings(), child.children())); + namedChildren.add(new Node(child.layout(), Optional.of(name), + child.usedEncodings(), child.stats(), child.children())); } - root = new Node(root.layout(), Optional.empty(), root.usedEncodings(), List.copyOf(namedChildren)); + root = new Node(root.layout(), Optional.empty(), root.usedEncodings(), + root.stats(), List.copyOf(namedChildren)); } return new InspectorTree( @@ -106,16 +112,18 @@ public static InspectorTree build(VortexHandle handle) { private static Node buildNode(Layout layout, Optional fieldName, VortexHandle handle, List arraySpecs, Set overallUsed) { Set localUsed = new LinkedHashSet<>(); + ArrayStats stats = ArrayStats.empty(); if (layout.isFlat() && !layout.segments().isEmpty()) { int segIdx = layout.segments().getFirst(); SegmentSpec spec = handle.footer().segmentSpecs().get(segIdx); if (spec.compression().code == 0) { MemorySegment seg = handle.slice(spec.offset(), spec.length()); - String enc = peekRootEncoding(seg, arraySpecs); - if (enc != null) { - localUsed.add(enc); - overallUsed.add(enc); + Peek peek = peekFlatRoot(seg, arraySpecs); + if (peek.encoding() != null) { + localUsed.add(peek.encoding()); + overallUsed.add(peek.encoding()); } + stats = peek.stats(); } } List children = new ArrayList<>(layout.children().size()); @@ -124,19 +132,23 @@ private static Node buildNode(Layout layout, Optional fieldName, VortexH localUsed.addAll(n.usedEncodings()); children.add(n); } - return new Node(layout, fieldName, Set.copyOf(localUsed), List.copyOf(children)); + return new Node(layout, fieldName, Set.copyOf(localUsed), stats, List.copyOf(children)); } - private static String peekRootEncoding(MemorySegment seg, List arraySpecs) { + private static Peek peekFlatRoot(MemorySegment seg, List arraySpecs) { int segLen = (int) seg.byteSize(); ByteBuffer bb = seg.asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); int fbLen = bb.getInt(segLen - 4); int fbStart = segLen - 4 - fbLen; ByteBuffer fbBuf = bb.slice(fbStart, fbLen).order(ByteOrder.LITTLE_ENDIAN); Array fbArray = Array.getRootAsArray(fbBuf); - if (fbArray.root() == null) { - return null; + ArrayNode root = fbArray.root(); + if (root == null) { + return new Peek(null, ArrayStats.empty()); } - return arraySpecs.get(fbArray.root().encoding()); + return new Peek(arraySpecs.get(root.encoding()), ArrayStats.fromFbs(root.stats())); + } + + private record Peek(String encoding, ArrayStats stats) { } } diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspector.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspector.java index dad55b75..45b12ebc 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspector.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspector.java @@ -1,5 +1,6 @@ package io.github.dfa1.vortex.inspect; +import io.github.dfa1.vortex.core.ArrayStats; import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.Layout; import io.github.dfa1.vortex.core.SegmentSpec; @@ -76,6 +77,11 @@ private static void appendLayout(StringBuilder sb, InspectorTree.Node node, Stri if (!child.usedEncodings().isEmpty()) { sb.append(" [").append(String.join(", ", child.usedEncodings())).append("]"); } + ArrayStats agg = aggregateStats(child); + if (agg.min() != null || agg.max() != null) { + sb.append(" min=").append(format(agg.min())) + .append(" max=").append(format(agg.max())); + } sb.append('\n'); } } else { @@ -85,6 +91,59 @@ private static void appendLayout(StringBuilder sb, InspectorTree.Node node, Stri } } + private static ArrayStats aggregateStats(InspectorTree.Node node) { + Object min = node.stats().min(); + Object max = node.stats().max(); + for (InspectorTree.Node child : node.children()) { + ArrayStats cs = aggregateStats(child); + min = pickMin(min, cs.min()); + max = pickMax(max, cs.max()); + } + if (min == null && max == null) { + return ArrayStats.empty(); + } + return new ArrayStats(min, max, null, null, null, null); + } + + @SuppressWarnings({"unchecked", "rawtypes"}) + private static Object pickMin(Object a, Object b) { + if (a == null) { + return b; + } + if (b == null) { + return a; + } + if (a.getClass() != b.getClass() || !(a instanceof Comparable)) { + return a; + } + return ((Comparable) a).compareTo(b) <= 0 ? a : b; + } + + @SuppressWarnings({"unchecked", "rawtypes"}) + private static Object pickMax(Object a, Object b) { + if (a == null) { + return b; + } + if (b == null) { + return a; + } + if (a.getClass() != b.getClass() || !(a instanceof Comparable)) { + return a; + } + return ((Comparable) a).compareTo(b) >= 0 ? a : b; + } + + private static String format(Object v) { + if (v == null) { + return "?"; + } + String s = v.toString(); + if (s.length() > 30) { + return s.substring(0, 27) + "..."; + } + return s; + } + private static void appendLayoutInline(StringBuilder sb, Layout layout) { sb.append(layout.encodingId()).append('(').append(layout.rowCount()).append(" rows)"); if (layout.children().isEmpty()) { diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java index 04e3a451..5aca120c 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java @@ -260,6 +260,16 @@ private List detailLines(InspectorTree.Node node) { lines.add(" - " + enc); } } + if (node.stats().min() != null || node.stats().max() != null) { + lines.add(""); + lines.add("Stats:"); + if (node.stats().min() != null) { + lines.add(" min: " + node.stats().min()); + } + if (node.stats().max() != null) { + lines.add(" max: " + node.stats().max()); + } + } return lines; } diff --git a/inspector/src/test/java/io/github/dfa1/vortex/inspect/VortexInspectorTest.java b/inspector/src/test/java/io/github/dfa1/vortex/inspect/VortexInspectorTest.java index 03ecea44..7dd7eff4 100644 --- a/inspector/src/test/java/io/github/dfa1/vortex/inspect/VortexInspectorTest.java +++ b/inspector/src/test/java/io/github/dfa1/vortex/inspect/VortexInspectorTest.java @@ -1,5 +1,6 @@ package io.github.dfa1.vortex.inspect; +import io.github.dfa1.vortex.core.ArrayStats; import io.github.dfa1.vortex.core.CompressionScheme; import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.Layout; @@ -68,7 +69,7 @@ void render_segmentTable_listsEverySegment() { void render_nonStruct_inlinesSingleColumnLayout() { // Given Layout leaf = new Layout("vortex.flat", 100, null, List.of(), List.of()); - InspectorTree.Node root = new InspectorTree.Node(leaf, Optional.empty(), Set.of(), List.of()); + InspectorTree.Node root = new InspectorTree.Node(leaf, Optional.empty(), Set.of(), ArrayStats.empty(), List.of()); InspectorTree sut = new InspectorTree( 1, 256L, new DType.Primitive(PType.I32, false), @@ -106,10 +107,10 @@ void render_chainsChildrenWithArrow() { Layout zoned = new Layout("vortex.stats", 1000, null, List.of(chunked), List.of()); Layout structLayout = new Layout("vortex.struct", 1000, null, List.of(zoned), List.of()); - InspectorTree.Node flatN = new InspectorTree.Node(flat, Optional.empty(), Set.of(), List.of()); - InspectorTree.Node chunkedN = new InspectorTree.Node(chunked, Optional.empty(), Set.of(), List.of(flatN)); - InspectorTree.Node zonedN = new InspectorTree.Node(zoned, Optional.of("v"), Set.of(), List.of(chunkedN)); - InspectorTree.Node rootN = new InspectorTree.Node(structLayout, Optional.empty(), Set.of(), List.of(zonedN)); + InspectorTree.Node flatN = new InspectorTree.Node(flat, Optional.empty(), Set.of(), ArrayStats.empty(), List.of()); + InspectorTree.Node chunkedN = new InspectorTree.Node(chunked, Optional.empty(), Set.of(), ArrayStats.empty(), List.of(flatN)); + InspectorTree.Node zonedN = new InspectorTree.Node(zoned, Optional.of("v"), Set.of(), ArrayStats.empty(), List.of(chunkedN)); + InspectorTree.Node rootN = new InspectorTree.Node(structLayout, Optional.empty(), Set.of(), ArrayStats.empty(), List.of(zonedN)); InspectorTree sut = new InspectorTree( 1, 1024L, @@ -124,6 +125,47 @@ void render_chainsChildrenWithArrow() { assertThat(out).contains("vortex.stats(1000 rows) → vortex.chunked(1000 rows) → vortex.flat(1000 rows)"); } + @Test + void render_aggregatesMinMaxAcrossChunks() { + // Given — column with two chunked Flat leaves; aggregate should fold each leaf's stats + Layout chunk1 = new Layout("vortex.flat", 500, null, List.of(), List.of()); + Layout chunk2 = new Layout("vortex.flat", 500, null, List.of(), List.of()); + Layout chunked = new Layout("vortex.chunked", 1000, null, List.of(chunk1, chunk2), List.of()); + Layout structLayout = new Layout("vortex.struct", 1000, null, List.of(chunked), List.of()); + + InspectorTree.Node c1 = new InspectorTree.Node(chunk1, Optional.empty(), Set.of(), + new ArrayStats(10L, 50L, null, null, null, null), List.of()); + InspectorTree.Node c2 = new InspectorTree.Node(chunk2, Optional.empty(), Set.of(), + new ArrayStats(5L, 100L, null, null, null, null), List.of()); + InspectorTree.Node chunkedN = new InspectorTree.Node(chunked, Optional.of("id"), + Set.of("vortex.flat"), ArrayStats.empty(), List.of(c1, c2)); + InspectorTree.Node rootN = new InspectorTree.Node(structLayout, Optional.empty(), + Set.of("vortex.flat"), ArrayStats.empty(), List.of(chunkedN)); + + InspectorTree sut = new InspectorTree(1, 1024L, + new DType.Struct(List.of("id"), List.of(new DType.Primitive(PType.I64, false)), false), + List.of("vortex.flat"), Set.of(), List.of(), 1000L, rootN); + + // When + String out = VortexInspector.render(sut); + + // Then — min over (10, 5) = 5; max over (50, 100) = 100 + assertThat(out).contains("min=5 max=100"); + } + + @Test + void render_columnWithoutStats_omitsMinMax() { + // Given — default tree has ArrayStats.empty() on every node + InspectorTree sut = struct2col(1, 100L, List.of(), Set.of()); + + // When + String out = VortexInspector.render(sut); + + // Then + assertThat(out).doesNotContain("min="); + assertThat(out).doesNotContain("max="); + } + @Test void render_emptyUsedEncodings_omitsBracketSuffix() { // Given — column with no resolved encodings should not emit " []" noise @@ -142,11 +184,12 @@ private static InspectorTree struct2col(int version, long fileSize, List Date: Mon, 8 Jun 2026 19:03:53 +0200 Subject: [PATCH 06/37] ci: run inspector module tests on Windows Adds a windows-latest job that runs `./mvnw test -pl inspector -am` so regressions in the FFM kernel32 bindings surface in CI. Also adds WindowsTerminalSmokeTest which (1) loads WindowsTerminal to force every kernel32 downcallHandle to resolve its symbol (a missing entry point throws UnsatisfiedLinkError during static init) and (2) verifies the input / output console-mode flag math against the values WindowsTerminal applies. The test class is gated on `@EnabledOnOs(OS.WINDOWS)`; it's skipped on the existing Linux job. Co-Authored-By: Claude Opus 4.7 --- .github/workflows/ci.yml | 23 +++++++ .../term/WindowsTerminalSmokeTest.java | 63 +++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 inspector/src/test/java/io/github/dfa1/vortex/inspect/term/WindowsTerminalSmokeTest.java diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d1cda504..016a13c6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,3 +33,26 @@ jobs: - name: Build and test run: ./mvnw verify + + inspector-windows: + runs-on: windows-latest + steps: + - uses: actions/checkout@v6 + + - name: Set up Azul Zulu JDK 25 + uses: actions/setup-java@v5 + with: + distribution: zulu + java-version: '25' + + - name: Cache Maven repository + uses: actions/cache@v5 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + + - name: Test inspector module + shell: bash + run: ./mvnw test -pl inspector -am diff --git a/inspector/src/test/java/io/github/dfa1/vortex/inspect/term/WindowsTerminalSmokeTest.java b/inspector/src/test/java/io/github/dfa1/vortex/inspect/term/WindowsTerminalSmokeTest.java new file mode 100644 index 00000000..3bb9dfbf --- /dev/null +++ b/inspector/src/test/java/io/github/dfa1/vortex/inspect/term/WindowsTerminalSmokeTest.java @@ -0,0 +1,63 @@ +package io.github.dfa1.vortex.inspect.term; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledOnOs; +import org.junit.jupiter.api.condition.OS; + +import static org.assertj.core.api.Assertions.assertThat; + +/// Smoke test for the FFM-based Windows console binding. +/// +/// Runs only on Windows (other OSes lack kernel32). The goal is to catch +/// missing-symbol / signature-mismatch regressions in CI without requiring +/// a real interactive TTY: +/// +/// - Class load alone forces every {@code Linker.downcallHandle} to resolve +/// its kernel32 symbol. A missing entry point throws +/// {@link UnsatisfiedLinkError} during static initialization. +/// - Bit-flag math for the VT mode toggles is verified directly so a typo +/// in a constant fails here, not in a customer's terminal. +class WindowsTerminalSmokeTest { + + @Test + @EnabledOnOs(OS.WINDOWS) + void classLoad_resolvesEveryKernel32Symbol() { + // Given / When — touching the class triggers , which calls + // Linker.downcallHandle for every imported kernel32 function. + Class sut = WindowsTerminal.class; + + // Then + assertThat(sut).isNotNull(); + assertThat(sut.getDeclaredMethods()).isNotEmpty(); + } + + @Test + @EnabledOnOs(OS.WINDOWS) + void modeFlagMath_inputModeMasksLineEchoProcessed_andSetsVtInput() { + // Given — typical default cmd.exe input mode: line + echo + processed input enabled + int defaultInMode = 0x0001 | 0x0002 | 0x0004; // PROCESSED | LINE | ECHO + + // When — same transform that WindowsTerminal.open applies + int raw = (defaultInMode & ~(0x0002 | 0x0004 | 0x0001)) | 0x0200; + + // Then — line / echo / processed cleared, VT input set + assertThat(raw & 0x0002).isZero(); + assertThat(raw & 0x0004).isZero(); + assertThat(raw & 0x0001).isZero(); + assertThat(raw & 0x0200).isEqualTo(0x0200); + } + + @Test + @EnabledOnOs(OS.WINDOWS) + void modeFlagMath_outputModeAddsVtProcessing() { + // Given — default output mode + int defaultOutMode = 0x0001; // PROCESSED_OUTPUT only + + // When + int withVt = defaultOutMode | 0x0004 | 0x0001; + + // Then + assertThat(withVt & 0x0004).isEqualTo(0x0004); + assertThat(withVt & 0x0001).isEqualTo(0x0001); + } +} From aaabdd3d56e0e23cb1bba4abfc43808ae691a36b Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Mon, 8 Jun 2026 19:09:20 +0200 Subject: [PATCH 07/37] feat(inspector): progress callback during tree build; bar in tui CLI Building the inspector tree calls handle.slice() once per Flat segment to peek the encoding id and per-array stats. On VortexHttpReader that slice triggers a separate HTTP range request, so on a remote file with dozens of segments the TUI sits idle for several seconds before the screen appears. Adds InspectorTree.Progress (functional interface, NOOP default) and an InspectorTree.build(handle, progress) overload that fires (current, total) on each peek. VortexInspectorTui.show gains a matching overload. TuiCommand wires a stderr progress bar so the delay is visible. The single-arg variants are kept for callers that don't want a callback. Co-Authored-By: Claude Opus 4.7 --- .../io/github/dfa1/vortex/cli/TuiCommand.java | 25 ++++++++- .../dfa1/vortex/inspect/InspectorTree.java | 55 ++++++++++++++++++- .../vortex/inspect/VortexInspectorTui.java | 13 ++++- .../vortex/inspect/InspectorTreeTest.java | 42 ++++++++++++++ 4 files changed, 130 insertions(+), 5 deletions(-) diff --git a/cli/src/main/java/io/github/dfa1/vortex/cli/TuiCommand.java b/cli/src/main/java/io/github/dfa1/vortex/cli/TuiCommand.java index 586d4eb9..e020d844 100644 --- a/cli/src/main/java/io/github/dfa1/vortex/cli/TuiCommand.java +++ b/cli/src/main/java/io/github/dfa1/vortex/cli/TuiCommand.java @@ -1,11 +1,13 @@ package io.github.dfa1.vortex.cli; +import io.github.dfa1.vortex.inspect.InspectorTree; import io.github.dfa1.vortex.inspect.VortexInspectorTui; import io.github.dfa1.vortex.io.VortexHandle; import io.github.dfa1.vortex.io.VortexHttpReader; import io.github.dfa1.vortex.io.VortexReader; import java.io.IOException; +import java.io.PrintStream; import java.net.URI; import java.net.URISyntaxException; import java.nio.file.Files; @@ -25,7 +27,7 @@ static int run(String[] args) { if (handle == null) { return ExitStatus.FILE_NOT_FOUND; } - VortexInspectorTui.show(handle); + VortexInspectorTui.show(handle, progressBar(System.err)); return ExitStatus.OK; } catch (IOException | RuntimeException e) { System.err.println("error: " + describe(e)); @@ -53,6 +55,27 @@ private static VortexHandle open(String target) throws IOException { return VortexReader.open(path); } + private static InspectorTree.Progress progressBar(PrintStream out) { + int width = 30; + return (current, total) -> { + if (total <= 0) { + return; + } + int filled = (int) ((long) current * width / total); + StringBuilder bar = new StringBuilder(width + 32); + bar.append('\r').append("Loading metadata ["); + for (int i = 0; i < width; i++) { + bar.append(i < filled ? '#' : '-'); + } + bar.append("] ").append(current).append('/').append(total); + if (current == total) { + bar.append('\n'); + } + out.print(bar); + out.flush(); + }; + } + private static String describe(Throwable t) { StringBuilder sb = new StringBuilder(); Throwable cur = t; diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java index eedfc97a..3748efdd 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java @@ -79,13 +79,28 @@ public record Node( /// @param handle open file handle /// @return immutable inspector tree public static InspectorTree build(VortexHandle handle) { + return build(handle, Progress.NOOP); + } + + /// Builds an inspector tree from an open Vortex file handle, reporting + /// progress on each Flat-segment peek (which on remote-storage handles + /// triggers a separate HTTP range request). + /// + /// @param handle open file handle + /// @param progress progress sink receiving {@code (current, total)} after each segment peek + /// @return immutable inspector tree + public static InspectorTree build(VortexHandle handle, Progress progress) { Footer footer = handle.footer(); Layout layout = handle.layout(); DType dtype = handle.dtype(); + int total = countPeekableSegments(layout, footer); + int[] counter = {0}; + List colNames = (dtype instanceof DType.Struct s) ? s.fieldNames() : List.of(); Set overallUsed = new LinkedHashSet<>(); - Node root = buildNode(layout, Optional.empty(), handle, footer.arraySpecs(), overallUsed); + Node root = buildNode(layout, Optional.empty(), handle, footer.arraySpecs(), + overallUsed, progress, counter, total); if (layout.isStruct()) { List namedChildren = new ArrayList<>(root.children().size()); for (int i = 0; i < root.children().size(); i++) { @@ -110,7 +125,8 @@ public static InspectorTree build(VortexHandle handle) { } private static Node buildNode(Layout layout, Optional fieldName, VortexHandle handle, - List arraySpecs, Set overallUsed) { + List arraySpecs, Set overallUsed, + Progress progress, int[] counter, int total) { Set localUsed = new LinkedHashSet<>(); ArrayStats stats = ArrayStats.empty(); if (layout.isFlat() && !layout.segments().isEmpty()) { @@ -124,17 +140,50 @@ private static Node buildNode(Layout layout, Optional fieldName, VortexH overallUsed.add(peek.encoding()); } stats = peek.stats(); + counter[0]++; + progress.update(counter[0], total); } } List children = new ArrayList<>(layout.children().size()); for (Layout child : layout.children()) { - Node n = buildNode(child, Optional.empty(), handle, arraySpecs, overallUsed); + Node n = buildNode(child, Optional.empty(), handle, arraySpecs, overallUsed, + progress, counter, total); localUsed.addAll(n.usedEncodings()); children.add(n); } return new Node(layout, fieldName, Set.copyOf(localUsed), stats, List.copyOf(children)); } + private static int countPeekableSegments(Layout layout, Footer footer) { + int n = 0; + if (layout.isFlat() && !layout.segments().isEmpty()) { + SegmentSpec spec = footer.segmentSpecs().get(layout.segments().getFirst()); + if (spec.compression().code == 0) { + n++; + } + } + for (Layout child : layout.children()) { + n += countPeekableSegments(child, footer); + } + return n; + } + + /// Callback used by [#build(VortexHandle, Progress)] to report how many + /// flat segments have been peeked so far. Implementations may render a + /// progress bar, log, or ignore (see [#NOOP]). + @FunctionalInterface + public interface Progress { + /// Sink that discards updates. + Progress NOOP = (current, total) -> { + }; + + /// Reports progress. + /// + /// @param current number of segments peeked so far + /// @param total total peekable segments in the file + void update(int current, int total); + } + private static Peek peekFlatRoot(MemorySegment seg, List arraySpecs) { int segLen = (int) seg.byteSize(); ByteBuffer bb = seg.asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java index 5aca120c..79c9dce0 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java @@ -29,7 +29,18 @@ private VortexInspectorTui() { /// @param handle open Vortex file handle /// @throws IOException if the terminal cannot be initialized public static void show(VortexHandle handle) throws IOException { - InspectorTree tree = InspectorTree.build(handle); + show(handle, InspectorTree.Progress.NOOP); + } + + /// Builds an inspector tree (reporting progress on each segment peek) + /// and runs the interactive viewer until quit. Useful for remote files + /// where {@link InspectorTree#build} can take seconds. + /// + /// @param handle open Vortex file handle + /// @param progress progress sink, called once per Flat segment peeked + /// @throws IOException if the terminal cannot be initialized + public static void show(VortexHandle handle, InspectorTree.Progress progress) throws IOException { + InspectorTree tree = InspectorTree.build(handle, progress); try (RawTerminal term = RawTerminal.open()) { new Loop(term, tree).run(); } diff --git a/inspector/src/test/java/io/github/dfa1/vortex/inspect/InspectorTreeTest.java b/inspector/src/test/java/io/github/dfa1/vortex/inspect/InspectorTreeTest.java index acf32453..a0d2d149 100644 --- a/inspector/src/test/java/io/github/dfa1/vortex/inspect/InspectorTreeTest.java +++ b/inspector/src/test/java/io/github/dfa1/vortex/inspect/InspectorTreeTest.java @@ -131,6 +131,48 @@ void build_carriesVersionAndFileSize() { assertThat(sut.fileSize()).isEqualTo(123_456L); } + @Test + void build_reportsProgressOncePerPeekedSegment() { + // Given — struct of two compressed (skipped) + two uncompressed Flat columns. + // Only uncompressed leaves trigger peekFlatRoot, so progress should fire twice + // with total=2. + Layout c1 = new Layout("vortex.flat", 0, null, List.of(), List.of(0)); + Layout c2 = new Layout("vortex.flat", 0, null, List.of(), List.of(1)); + Layout c3 = new Layout("vortex.flat", 0, null, List.of(), List.of(2)); + Layout root = struct(0, List.of(c1, c2, c3)); + DType dtype = new DType.Struct(List.of("a", "b", "c"), + List.of(new DType.Primitive(PType.I32, false), + new DType.Primitive(PType.I32, false), + new DType.Primitive(PType.I32, false)), + false); + List segs = List.of( + new SegmentSpec(0, 1024, (byte) 0, CompressionScheme.ZSTD), // skipped + new SegmentSpec(1024, 1024, (byte) 0, CompressionScheme.LZ4), // skipped + new SegmentSpec(2048, 1024, (byte) 0, CompressionScheme.LZ4)); // skipped + givenHandle(dtype, root, List.of("vortex.flat"), segs); + + java.util.List reports = new java.util.ArrayList<>(); + + // When + InspectorTree.build(handle, (cur, tot) -> reports.add(new int[]{cur, tot})); + + // Then — all three are compressed, so no peeks fire; progress never called + assertThat(reports).isEmpty(); + } + + @Test + void build_progressNoop_isAcceptedAndProducesSameTree() { + // Given + Layout root = struct(0, List.of(leaf("vortex.constant", 0))); + DType dtype = new DType.Struct(List.of("c"), + List.of(new DType.Primitive(PType.I32, false)), false); + givenHandle(dtype, root, List.of("vortex.constant"), List.of()); + + // When / Then — NOOP passes; no NPE + InspectorTree sut = InspectorTree.build(handle, InspectorTree.Progress.NOOP); + assertThat(sut.root().children()).hasSize(1); + } + @Test void build_flatChildWithCompressedSegment_skipsRootEncodingPeek() { // Given — peekRootEncoding() reads the segment as a FlatBuffer; compressed segments From 029762b2922e881fb7f58c4995de810813a5ecab Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Mon, 8 Jun 2026 19:11:19 +0200 Subject: [PATCH 08/37] feat(inspector): xxd-style hex preview in TUI details pane When a Flat node is selected, the details pane now shows the first 256 bytes of its first segment alongside encoding / stats / segment metadata. Output mirrors xxd: 8-digit hex offset, 16 hex bytes split in two groups of 8, plus a printable-ASCII column. Bytes are fetched on demand via VortexHandle.slice and cached per node so repeated re-renders on the same selection don't re-trigger an HTTP range request on remote files. Slice failures degrade silently to "no hex preview" rather than crashing the loop. Co-Authored-By: Claude Opus 4.7 --- .../vortex/inspect/VortexInspectorTui.java | 75 ++++++++++++++++++- 1 file changed, 73 insertions(+), 2 deletions(-) diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java index 79c9dce0..b2d7a115 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java @@ -8,9 +8,12 @@ import io.github.dfa1.vortex.io.VortexHandle; import java.io.IOException; +import java.lang.foreign.MemorySegment; import java.util.ArrayList; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; /// Interactive viewer for a Vortex file's inspector tree, drawn with raw ANSI @@ -42,20 +45,27 @@ public static void show(VortexHandle handle) throws IOException { public static void show(VortexHandle handle, InspectorTree.Progress progress) throws IOException { InspectorTree tree = InspectorTree.build(handle, progress); try (RawTerminal term = RawTerminal.open()) { - new Loop(term, tree).run(); + new Loop(term, tree, handle).run(); } } private static final class Loop { + /// Bytes to display per Flat segment in the hex pane. 256 lines up to + /// 16 rows of 16, which fits comfortably under the existing details. + private static final int HEX_PREVIEW_BYTES = 256; + private final RawTerminal term; private final InspectorTree tree; + private final VortexHandle handle; private final Set expanded = new HashSet<>(); + private final Map hexCache = new HashMap<>(); private int selected; private int scrollOffset; - Loop(RawTerminal term, InspectorTree tree) { + Loop(RawTerminal term, InspectorTree tree, VortexHandle handle) { this.term = term; this.tree = tree; + this.handle = handle; this.expanded.add(tree.root()); } @@ -281,9 +291,70 @@ private List detailLines(InspectorTree.Node node) { lines.add(" max: " + node.stats().max()); } } + if (layout.isFlat() && !layout.segments().isEmpty()) { + byte[] preview = loadHexPreview(node); + if (preview.length > 0) { + lines.add(""); + int segIdx = layout.segments().getFirst(); + SegmentSpec spec = tree.segmentSpecs().get(segIdx); + lines.add("Hex (first " + preview.length + " B of segment " + + segIdx + ", total " + formatBytes(spec.length()) + "):"); + for (int off = 0; off < preview.length; off += 16) { + lines.add(formatHexRow(preview, off)); + } + } + } return lines; } + private byte[] loadHexPreview(InspectorTree.Node node) { + return hexCache.computeIfAbsent(node, n -> { + Layout layout = n.layout(); + int segIdx = layout.segments().getFirst(); + SegmentSpec spec = tree.segmentSpecs().get(segIdx); + int wanted = (int) Math.min((long) HEX_PREVIEW_BYTES, spec.length()); + if (wanted <= 0) { + return new byte[0]; + } + try { + MemorySegment seg = handle.slice(spec.offset(), wanted); + byte[] buf = new byte[wanted]; + MemorySegment.copy(seg, 0, MemorySegment.ofArray(buf), 0, wanted); + return buf; + } catch (RuntimeException e) { + return new byte[0]; + } + }); + } + + private static String formatHexRow(byte[] data, int offset) { + StringBuilder sb = new StringBuilder(80); + sb.append(String.format("%08x ", offset)); + for (int i = 0; i < 16; i++) { + int idx = offset + i; + if (idx < data.length) { + sb.append(String.format("%02x ", data[idx] & 0xff)); + } else { + sb.append(" "); + } + if (i == 7) { + sb.append(' '); + } + } + sb.append(" |"); + for (int i = 0; i < 16; i++) { + int idx = offset + i; + if (idx >= data.length) { + sb.append(' '); + continue; + } + int b = data[idx] & 0xff; + sb.append(b >= 0x20 && b < 0x7f ? (char) b : '.'); + } + sb.append('|'); + return sb.toString(); + } + private record Item(InspectorTree.Node node, int depth) { } From f552a8c80af1d441181c70af30e58916c13aa429 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Mon, 8 Jun 2026 19:19:06 +0200 Subject: [PATCH 09/37] feat(inspector): lazy TUI build + decoded data preview Opens the TUI instantly even on remote files by switching to InspectorTree.buildShallow(handle), which derives the layout tree from the footer alone (no slice calls). Encoding id, per-array stats, and a data preview are now fetched per node on demand the first time the user selects it, then cached for free re-renders. Adds: - InspectorTree.buildShallow(VortexHandle) - structure-only build - InspectorTree.Peek (public record) and InspectorTree.peek(Node, VortexHandle) for one-shot lazy resolution of encoding + stats - VortexInspectorTui now invokes a small scan (limit 32, projected to the selected node's owning column) and formats the resulting Array via a pattern switch on the Array sealed hierarchy. Raw hex remains as a fallback when the selected node isn't inside any column. The existing eager InspectorTree.build(handle, progress) path is kept for the text-mode `inspect` command (and the test suite), so the only behaviour change for non-TUI consumers is the new Peek type. Co-Authored-By: Claude Opus 4.7 --- .../dfa1/vortex/inspect/InspectorTree.java | 80 +++++++++- .../vortex/inspect/VortexInspectorTui.java | 150 +++++++++++++++--- .../vortex/inspect/InspectorTreeTest.java | 71 +++++++++ 3 files changed, 280 insertions(+), 21 deletions(-) diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java index 3748efdd..ca4fae4d 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java @@ -82,6 +82,75 @@ public static InspectorTree build(VortexHandle handle) { return build(handle, Progress.NOOP); } + /// Builds an inspector tree without peeking segments — every node starts + /// with an empty encoding set and {@link ArrayStats#empty()} stats. The + /// resulting tree contains only structure derived from the file's footer + /// and layout, so the call is essentially free on remote handles. + /// + /// Use with {@link #peek(Node, VortexHandle)} for lazy on-demand resolution. + /// + /// @param handle open file handle + /// @return immutable shallow inspector tree + public static InspectorTree buildShallow(VortexHandle handle) { + Footer footer = handle.footer(); + Layout layout = handle.layout(); + DType dtype = handle.dtype(); + List colNames = (dtype instanceof DType.Struct s) ? s.fieldNames() : List.of(); + Node root = shallowNode(layout, Optional.empty()); + if (layout.isStruct()) { + List named = new ArrayList<>(root.children().size()); + for (int i = 0; i < root.children().size(); i++) { + Node child = root.children().get(i); + String name = i < colNames.size() ? colNames.get(i) : "col" + i; + named.add(new Node(child.layout(), Optional.of(name), + Set.of(), ArrayStats.empty(), child.children())); + } + root = new Node(root.layout(), Optional.empty(), Set.of(), + ArrayStats.empty(), List.copyOf(named)); + } + return new InspectorTree( + handle.version(), + handle.fileSize(), + dtype, + footer.arraySpecs(), + Set.of(), + footer.segmentSpecs(), + layout.rowCount(), + root); + } + + private static Node shallowNode(Layout layout, Optional fieldName) { + List children = new ArrayList<>(layout.children().size()); + for (Layout child : layout.children()) { + children.add(shallowNode(child, Optional.empty())); + } + return new Node(layout, fieldName, Set.of(), ArrayStats.empty(), List.copyOf(children)); + } + + /// Resolves encoding id + stats for one Flat node by reading its first + /// segment. Returns [Peek#EMPTY] for non-Flat nodes, segments under + /// compression, or missing data. + /// + /// Callers should cache the result — every call triggers a fresh + /// {@code handle.slice()}, which is a network round-trip on remote handles. + /// + /// @param node node to resolve + /// @param handle open file handle + /// @return peek result; never {@code null} + public static Peek peek(Node node, VortexHandle handle) { + Layout layout = node.layout(); + if (!layout.isFlat() || layout.segments().isEmpty()) { + return Peek.EMPTY; + } + int segIdx = layout.segments().getFirst(); + SegmentSpec spec = handle.footer().segmentSpecs().get(segIdx); + if (spec.compression().code != 0) { + return Peek.EMPTY; + } + MemorySegment seg = handle.slice(spec.offset(), spec.length()); + return peekFlatRoot(seg, handle.footer().arraySpecs()); + } + /// Builds an inspector tree from an open Vortex file handle, reporting /// progress on each Flat-segment peek (which on remote-storage handles /// triggers a separate HTTP range request). @@ -198,6 +267,15 @@ private static Peek peekFlatRoot(MemorySegment seg, List arraySpecs) { return new Peek(arraySpecs.get(root.encoding()), ArrayStats.fromFbs(root.stats())); } - private record Peek(String encoding, ArrayStats stats) { + /// Result of a single Flat segment peek - the resolved encoding id (or + /// {@code null} when the FlatBuffer carried no root) plus the per-array + /// statistics decoded from the same FlatBuffer. + /// + /// @param encoding resolved encoding id from the array spec table, or {@code null} + /// @param stats per-array stats, or {@link ArrayStats#empty()} if unknown + public record Peek(String encoding, ArrayStats stats) { + /// Sentinel returned for non-Flat nodes, compressed segments, or + /// segments that don't carry an array root. + public static final Peek EMPTY = new Peek(null, ArrayStats.empty()); } } diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java index b2d7a115..2276964d 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java @@ -2,10 +2,22 @@ import io.github.dfa1.vortex.core.Layout; import io.github.dfa1.vortex.core.SegmentSpec; +import io.github.dfa1.vortex.core.array.Array; +import io.github.dfa1.vortex.core.array.BoolArray; +import io.github.dfa1.vortex.core.array.ByteArray; +import io.github.dfa1.vortex.core.array.DoubleArray; +import io.github.dfa1.vortex.core.array.FloatArray; +import io.github.dfa1.vortex.core.array.IntArray; +import io.github.dfa1.vortex.core.array.LongArray; +import io.github.dfa1.vortex.core.array.ShortArray; +import io.github.dfa1.vortex.core.array.VarBinArray; import io.github.dfa1.vortex.inspect.term.Ansi; import io.github.dfa1.vortex.inspect.term.Key; import io.github.dfa1.vortex.inspect.term.RawTerminal; import io.github.dfa1.vortex.io.VortexHandle; +import io.github.dfa1.vortex.scan.Chunk; +import io.github.dfa1.vortex.scan.ScanIterator; +import io.github.dfa1.vortex.scan.ScanOptions; import java.io.IOException; import java.lang.foreign.MemorySegment; @@ -36,29 +48,37 @@ public static void show(VortexHandle handle) throws IOException { } /// Builds an inspector tree (reporting progress on each segment peek) - /// and runs the interactive viewer until quit. Useful for remote files - /// where {@link InspectorTree#build} can take seconds. + /// and runs the interactive viewer until quit. The TUI now uses the + /// shallow builder so the screen is interactive immediately; encoding, + /// stats and data previews are fetched lazily as the user navigates. + /// The {@code progress} parameter is retained for source compatibility + /// but is no longer invoked - shallow build does no peeks. /// /// @param handle open Vortex file handle - /// @param progress progress sink, called once per Flat segment peeked + /// @param progress unused; kept for API stability /// @throws IOException if the terminal cannot be initialized public static void show(VortexHandle handle, InspectorTree.Progress progress) throws IOException { - InspectorTree tree = InspectorTree.build(handle, progress); + InspectorTree tree = InspectorTree.buildShallow(handle); try (RawTerminal term = RawTerminal.open()) { new Loop(term, tree, handle).run(); } } private static final class Loop { - /// Bytes to display per Flat segment in the hex pane. 256 lines up to - /// 16 rows of 16, which fits comfortably under the existing details. + /// Bytes shown per Flat segment when falling back to the raw hex view. private static final int HEX_PREVIEW_BYTES = 256; + /// Decoded values shown per column in the data view. + private static final int DATA_PREVIEW_ROWS = 32; + private final RawTerminal term; private final InspectorTree tree; private final VortexHandle handle; private final Set expanded = new HashSet<>(); + private final Map peekCache = new HashMap<>(); private final Map hexCache = new HashMap<>(); + private final Map> dataCache = new HashMap<>(); + private final Map columnOf = new HashMap<>(); private int selected; private int scrollOffset; @@ -67,6 +87,33 @@ private static final class Loop { this.tree = tree; this.handle = handle; this.expanded.add(tree.root()); + indexColumns(tree.root()); + } + + private void indexColumns(InspectorTree.Node root) { + if (!root.layout().isStruct()) { + return; + } + for (InspectorTree.Node colNode : root.children()) { + colNode.fieldName().ifPresent(name -> tagSubtree(colNode, name)); + } + } + + private void tagSubtree(InspectorTree.Node node, String columnName) { + columnOf.put(node, columnName); + for (InspectorTree.Node child : node.children()) { + tagSubtree(child, columnName); + } + } + + private InspectorTree.Peek peek(InspectorTree.Node node) { + return peekCache.computeIfAbsent(node, n -> { + try { + return InspectorTree.peek(n, handle); + } catch (RuntimeException e) { + return InspectorTree.Peek.EMPTY; + } + }); } void run() throws IOException { @@ -254,8 +301,13 @@ private void drawDetails(StringBuilder buf, InspectorTree.Node node, private List detailLines(InspectorTree.Node node) { List lines = new ArrayList<>(); Layout layout = node.layout(); - lines.add("Encoding: " + layout.encodingId()); + InspectorTree.Peek p = peek(node); + lines.add("Encoding: " + (p.encoding() != null ? p.encoding() : layout.encodingId())); node.fieldName().ifPresent(name -> lines.add("Field: " + name)); + String col = columnOf.get(node); + if (col != null && !node.fieldName().isPresent()) { + lines.add("Column: " + col); + } lines.add("Rows: " + layout.rowCount()); lines.add("Children: " + layout.children().size()); if (!layout.segments().isEmpty()) { @@ -274,24 +326,26 @@ private List detailLines(InspectorTree.Node node) { } else { lines.add("Segments: 0"); } - if (!node.usedEncodings().isEmpty()) { - lines.add(""); - lines.add("Used encodings:"); - for (String enc : node.usedEncodings()) { - lines.add(" - " + enc); - } - } - if (node.stats().min() != null || node.stats().max() != null) { + if (p.stats().min() != null || p.stats().max() != null) { lines.add(""); lines.add("Stats:"); - if (node.stats().min() != null) { - lines.add(" min: " + node.stats().min()); + if (p.stats().min() != null) { + lines.add(" min: " + p.stats().min()); } - if (node.stats().max() != null) { - lines.add(" max: " + node.stats().max()); + if (p.stats().max() != null) { + lines.add(" max: " + p.stats().max()); } } - if (layout.isFlat() && !layout.segments().isEmpty()) { + if (col != null) { + List values = loadDataPreview(col); + if (!values.isEmpty()) { + lines.add(""); + lines.add("Data (column '" + col + "', first " + values.size() + " rows):"); + for (int i = 0; i < values.size(); i++) { + lines.add(String.format(" [%2d] %s", i, values.get(i))); + } + } + } else if (layout.isFlat() && !layout.segments().isEmpty()) { byte[] preview = loadHexPreview(node); if (preview.length > 0) { lines.add(""); @@ -307,6 +361,62 @@ private List detailLines(InspectorTree.Node node) { return lines; } + private List loadDataPreview(String columnName) { + return dataCache.computeIfAbsent(columnName, name -> { + try { + ScanOptions opts = ScanOptions.columns(name).withLimit(DATA_PREVIEW_ROWS); + try (ScanIterator it = handle.scan(opts)) { + if (!it.hasNext()) { + return List.of(); + } + try (Chunk chunk = it.next()) { + Array array = chunk.columns().get(name); + if (array == null) { + return List.of(); + } + int n = (int) Math.min(array.length(), DATA_PREVIEW_ROWS); + List out = new ArrayList<>(n); + for (int i = 0; i < n; i++) { + out.add(formatValue(array, i)); + } + return List.copyOf(out); + } + } + } catch (RuntimeException e) { + return List.of(); + } + }); + } + + private static String formatValue(Array array, int i) { + return switch (array) { + case LongArray a -> Long.toString(a.getLong(i)); + case IntArray a -> Integer.toString(a.getInt(i)); + case ShortArray a -> Short.toString(a.getShort(i)); + case ByteArray a -> Byte.toString(a.getByte(i)); + case DoubleArray a -> Double.toString(a.getDouble(i)); + case FloatArray a -> Float.toString(a.getFloat(i)); + case BoolArray a -> Boolean.toString(a.getBoolean(i)); + case VarBinArray a -> a.dtype() instanceof io.github.dfa1.vortex.core.DType.Utf8 + ? "\"" + a.getString(i) + "\"" + : bytesToShortHex(a.getBytes(i)); + default -> "<" + array.getClass().getSimpleName() + ">"; + }; + } + + private static String bytesToShortHex(byte[] bytes) { + int n = Math.min(bytes.length, 16); + StringBuilder sb = new StringBuilder(n * 3 + 2); + sb.append("0x"); + for (int i = 0; i < n; i++) { + sb.append(String.format("%02x", bytes[i] & 0xff)); + } + if (bytes.length > n) { + sb.append("..."); + } + return sb.toString(); + } + private byte[] loadHexPreview(InspectorTree.Node node) { return hexCache.computeIfAbsent(node, n -> { Layout layout = n.layout(); diff --git a/inspector/src/test/java/io/github/dfa1/vortex/inspect/InspectorTreeTest.java b/inspector/src/test/java/io/github/dfa1/vortex/inspect/InspectorTreeTest.java index a0d2d149..cf4398aa 100644 --- a/inspector/src/test/java/io/github/dfa1/vortex/inspect/InspectorTreeTest.java +++ b/inspector/src/test/java/io/github/dfa1/vortex/inspect/InspectorTreeTest.java @@ -13,6 +13,7 @@ import org.mockito.junit.jupiter.MockitoExtension; import java.util.List; +import java.util.Set; import static org.assertj.core.api.Assertions.assertThat; import static org.mockito.BDDMockito.given; @@ -173,6 +174,76 @@ void build_progressNoop_isAcceptedAndProducesSameTree() { assertThat(sut.root().children()).hasSize(1); } + @Test + void buildShallow_skipsAllSlicesAndStillNamesColumns() { + // Given — shallow build is the path the TUI uses; it must touch zero segment + // bytes (so opening a remote file is instant) yet still populate fieldName on + // top-level struct children. + Layout col0 = new Layout("vortex.flat", 10, null, List.of(), List.of(0)); + Layout col1 = new Layout("vortex.flat", 10, null, List.of(), List.of(1)); + Layout root = struct(10, List.of(col0, col1)); + DType dtype = new DType.Struct(List.of("id", "value"), + List.of(new DType.Primitive(PType.I64, false), + new DType.Primitive(PType.F64, false)), + false); + List segs = List.of( + new SegmentSpec(0, 64, (byte) 0, CompressionScheme.NONE), + new SegmentSpec(64, 64, (byte) 0, CompressionScheme.NONE)); + givenHandle(dtype, root, List.of("vortex.flat"), segs); + + // When + InspectorTree sut = InspectorTree.buildShallow(handle); + + // Then — column names assigned, but no peek fired so stats / usedEncodings empty + assertThat(sut.root().children().get(0).fieldName()).contains("id"); + assertThat(sut.root().children().get(1).fieldName()).contains("value"); + assertThat(sut.usedEncodings()).isEmpty(); + assertThat(sut.root().children().get(0).usedEncodings()).isEmpty(); + assertThat(sut.root().children().get(0).stats()).isEqualTo(io.github.dfa1.vortex.core.ArrayStats.empty()); + // Slice is reserved for lazy peek; shallow build must never call it + org.mockito.Mockito.verify(handle, org.mockito.Mockito.never()).slice( + org.mockito.Mockito.anyLong(), org.mockito.Mockito.anyLong()); + } + + @Test + void peek_nonFlatNode_returnsEmptyWithoutSlicing() { + // Given — peek is the lazy hook the TUI uses on the selected node. Non-Flat + // layouts (struct, chunked, stats wrappers) carry no array root and must short + // out without slicing, so navigating to them doesn't hit the network. + Layout structLayout = struct(0, List.of()); + InspectorTree.Node node = new InspectorTree.Node(structLayout, java.util.Optional.empty(), + Set.of(), io.github.dfa1.vortex.core.ArrayStats.empty(), List.of()); + + // When + InspectorTree.Peek result = InspectorTree.peek(node, handle); + + // Then + assertThat(result).isSameAs(InspectorTree.Peek.EMPTY); + org.mockito.Mockito.verify(handle, org.mockito.Mockito.never()).slice( + org.mockito.Mockito.anyLong(), org.mockito.Mockito.anyLong()); + } + + @Test + void peek_compressedFlatSegment_returnsEmptyWithoutSlicing() { + // Given — compressed segments would need the encoding to decompress before + // their FlatBuffer can be parsed; peek skips them rather than slicing garbage. + Layout flat = new Layout("vortex.flat", 10, null, List.of(), List.of(0)); + InspectorTree.Node node = new InspectorTree.Node(flat, java.util.Optional.empty(), + Set.of(), io.github.dfa1.vortex.core.ArrayStats.empty(), List.of()); + given(handle.footer()).willReturn(new io.github.dfa1.vortex.core.Footer( + List.of("vortex.flat"), List.of(), + List.of(new SegmentSpec(0, 100, (byte) 0, CompressionScheme.ZSTD)), + List.of())); + + // When + InspectorTree.Peek result = InspectorTree.peek(node, handle); + + // Then + assertThat(result).isSameAs(InspectorTree.Peek.EMPTY); + org.mockito.Mockito.verify(handle, org.mockito.Mockito.never()).slice( + org.mockito.Mockito.anyLong(), org.mockito.Mockito.anyLong()); + } + @Test void build_flatChildWithCompressedSegment_skipsRootEncodingPeek() { // Given — peekRootEncoding() reads the segment as a FlatBuffer; compressed segments From 31749834da022dac6faaf358de33a3f5f9646c08 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Mon, 8 Jun 2026 19:25:13 +0200 Subject: [PATCH 10/37] feat(inspector): non-blocking data fetch with spinner + status line MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Data previews now load on virtual threads, so navigating doesn't stall the input loop on each new column. A small ASCII spinner shows next to "Data (column 'X'): | loading..." in the details pane while the fetch is in flight; once a virtual thread completes the cache entry flips to Loaded and the next render shows the values. Failed fetches surface as "! " in the same slot. The main loop now polls via RawTerminal.readKey(timeoutMs) every 80 ms, so the spinner animates and completed fetches paint as soon as they land — no need for the user to press a key. A new status row sits between the body and the keybinding footer: green "ready" when idle, blue " I/O N pending" while fetches are in flight, red "! " sticky on the last error. Top-level struct columns are pre-fetched in the Loop constructor so the user can scroll through them with cache hits rather than cold misses. Co-Authored-By: Claude Opus 4.7 --- .../vortex/inspect/VortexInspectorTui.java | 168 ++++++++++++++---- .../vortex/inspect/term/PosixTerminal.java | 18 ++ .../dfa1/vortex/inspect/term/RawTerminal.java | 9 + .../vortex/inspect/term/WindowsTerminal.java | 18 ++ 4 files changed, 183 insertions(+), 30 deletions(-) diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java index 2276964d..a4071828 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java @@ -26,7 +26,10 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; /// Interactive viewer for a Vortex file's inspector tree, drawn with raw ANSI /// escapes — no library dependency. @@ -71,14 +74,24 @@ private static final class Loop { /// Decoded values shown per column in the data view. private static final int DATA_PREVIEW_ROWS = 32; + /// Render cadence while idle — drives spinner animation and reaping of + /// background fetches so updates land even when the user isn't typing. + private static final long POLL_INTERVAL_MS = 80; + + /// ASCII spinner frames; cycled by render tick. + private static final char[] SPINNER = {'|', '/', '-', '\\'}; + private final RawTerminal term; private final InspectorTree tree; private final VortexHandle handle; private final Set expanded = new HashSet<>(); private final Map peekCache = new HashMap<>(); private final Map hexCache = new HashMap<>(); - private final Map> dataCache = new HashMap<>(); + private final ConcurrentMap dataCache = new ConcurrentHashMap<>(); private final Map columnOf = new HashMap<>(); + private volatile int pendingLoads; + private volatile String lastError; + private long tick; private int selected; private int scrollOffset; @@ -88,6 +101,16 @@ private static final class Loop { this.handle = handle; this.expanded.add(tree.root()); indexColumns(tree.root()); + prefetchTopColumns(); + } + + private void prefetchTopColumns() { + if (!tree.root().layout().isStruct()) { + return; + } + for (InspectorTree.Node col : tree.root().children()) { + col.fieldName().ifPresent(this::startDataLoad); + } } private void indexColumns(InspectorTree.Node root) { @@ -126,11 +149,17 @@ void run() throws IOException { selected = 0; } render(items); - Key key = term.readKey(); + Optional maybeKey = term.readKey(POLL_INTERVAL_MS); + if (maybeKey.isEmpty()) { + tick++; + continue; + } + Key key = maybeKey.get(); if (isQuit(key)) { return; } handleKey(key, items); + tick++; } } @@ -203,7 +232,7 @@ private void render(List items) throws IOException { int height = size.rows(); int leftWidth = Math.max(20, width / 2); int bodyTop = 2; - int bodyBottom = height - 1; + int bodyBottom = height - 2; int bodyHeight = bodyBottom - bodyTop; if (selected < scrollOffset) { @@ -221,12 +250,35 @@ private void render(List items) throws IOException { drawDetails(buf, items.get(selected).node(), leftWidth + 2, bodyTop, width - leftWidth - 2, bodyHeight); } + drawStatus(buf, width, height - 1); drawFooter(buf, width, height); buf.append(Ansi.moveTo(height, 1)); term.write(buf.toString()); term.flush(); } + private void drawStatus(StringBuilder buf, int width, int row) { + int loads = pendingLoads; + String err = lastError; + String text; + int bg; + if (err != null) { + text = " ! " + err; + bg = 41; // red + } else if (loads > 0) { + text = " " + SPINNER[(int) (tick % SPINNER.length)] + + " I/O " + loads + " pending"; + bg = 44; // blue + } else { + text = " ready"; + bg = 42; // green + } + buf.append(Ansi.moveTo(row, 1)); + buf.append(Ansi.bg(bg)).append(Ansi.fg(30)); + buf.append(pad(text, width)); + buf.append(Ansi.RESET); + } + private void drawHeader(StringBuilder buf, int width) { String header = " vortex-inspect — v" + tree.version() + " " + formatBytes(tree.fileSize()) @@ -337,12 +389,19 @@ private List detailLines(InspectorTree.Node node) { } } if (col != null) { - List values = loadDataPreview(col); - if (!values.isEmpty()) { - lines.add(""); - lines.add("Data (column '" + col + "', first " + values.size() + " rows):"); - for (int i = 0; i < values.size(); i++) { - lines.add(String.format(" [%2d] %s", i, values.get(i))); + DataState state = loadDataPreview(col); + lines.add(""); + switch (state) { + case DataState.Pending ignored -> + lines.add("Data (column '" + col + "'): " + + SPINNER[(int) (tick % SPINNER.length)] + " loading..."); + case DataState.Failed(String msg) -> + lines.add("Data (column '" + col + "'): ! " + msg); + case DataState.Loaded(List values) -> { + lines.add("Data (column '" + col + "', first " + values.size() + " rows):"); + for (int i = 0; i < values.size(); i++) { + lines.add(String.format(" [%2d] %s", i, values.get(i))); + } } } } else if (layout.isFlat() && !layout.segments().isEmpty()) { @@ -361,31 +420,80 @@ private List detailLines(InspectorTree.Node node) { return lines; } - private List loadDataPreview(String columnName) { - return dataCache.computeIfAbsent(columnName, name -> { - try { - ScanOptions opts = ScanOptions.columns(name).withLimit(DATA_PREVIEW_ROWS); - try (ScanIterator it = handle.scan(opts)) { - if (!it.hasNext()) { - return List.of(); + private DataState loadDataPreview(String columnName) { + DataState existing = dataCache.get(columnName); + if (existing != null) { + return existing; + } + startDataLoad(columnName); + return dataCache.getOrDefault(columnName, DataState.PENDING); + } + + private void startDataLoad(String columnName) { + if (dataCache.putIfAbsent(columnName, DataState.PENDING) != null) { + return; + } + pendingLoads++; + Thread.ofVirtual().name("tui-data-" + columnName).start(() -> runDataLoad(columnName)); + } + + private void runDataLoad(String columnName) { + try { + ScanOptions opts = ScanOptions.columns(columnName).withLimit(DATA_PREVIEW_ROWS); + try (ScanIterator it = handle.scan(opts)) { + if (!it.hasNext()) { + dataCache.put(columnName, new DataState.Loaded(List.of())); + return; + } + try (Chunk chunk = it.next()) { + Array array = chunk.columns().get(columnName); + if (array == null) { + dataCache.put(columnName, new DataState.Loaded(List.of())); + return; } - try (Chunk chunk = it.next()) { - Array array = chunk.columns().get(name); - if (array == null) { - return List.of(); - } - int n = (int) Math.min(array.length(), DATA_PREVIEW_ROWS); - List out = new ArrayList<>(n); - for (int i = 0; i < n; i++) { - out.add(formatValue(array, i)); - } - return List.copyOf(out); + int n = (int) Math.min(array.length(), DATA_PREVIEW_ROWS); + List out = new ArrayList<>(n); + for (int i = 0; i < n; i++) { + out.add(formatValue(array, i)); } + dataCache.put(columnName, new DataState.Loaded(List.copyOf(out))); } - } catch (RuntimeException e) { - return List.of(); } - }); + } catch (RuntimeException e) { + dataCache.put(columnName, new DataState.Failed(messageOf(e))); + lastError = columnName + ": " + messageOf(e); + } finally { + pendingLoads--; + } + } + + private static String messageOf(Throwable t) { + String m = t.getMessage(); + return m != null ? m : t.getClass().getSimpleName(); + } + + /// Per-column data fetch state — pending while a virtual thread is + /// fetching, loaded with values once decoded, failed with a message + /// on error. Sealed so callers can pattern-match exhaustively. + sealed interface DataState { + /// Singleton state for a fetch in flight. + DataState PENDING = new Pending(); + + /// In-flight fetch. + record Pending() implements DataState { + } + + /// Completed fetch with decoded values. + /// + /// @param values formatted first rows of the column + record Loaded(List values) implements DataState { + } + + /// Failed fetch carrying a short error description. + /// + /// @param message short error string + record Failed(String message) implements DataState { + } } private static String formatValue(Array array, int i) { diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/PosixTerminal.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/PosixTerminal.java index bbdb9231..13bd5217 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/PosixTerminal.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/PosixTerminal.java @@ -2,6 +2,7 @@ import java.io.IOException; import java.io.PrintStream; +import java.util.Optional; import java.lang.foreign.Arena; import java.lang.foreign.FunctionDescriptor; import java.lang.foreign.Linker; @@ -128,6 +129,23 @@ public Key readKey() throws IOException { return KeyDecoder.next(System.in); } + @Override + public Optional readKey(long timeoutMs) throws IOException { + long deadline = System.nanoTime() + timeoutMs * 1_000_000L; + while (System.in.available() == 0) { + if (System.nanoTime() >= deadline) { + return Optional.empty(); + } + try { + Thread.sleep(20); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + return Optional.empty(); + } + } + return Optional.of(KeyDecoder.next(System.in)); + } + @Override public void close() { if (closed) { diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/RawTerminal.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/RawTerminal.java index 24098807..9e15c602 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/RawTerminal.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/RawTerminal.java @@ -1,6 +1,7 @@ package io.github.dfa1.vortex.inspect.term; import java.io.IOException; +import java.util.Optional; /// Direct, dependency-free terminal abstraction. /// @@ -56,6 +57,14 @@ static RawTerminal open() throws IOException { /// @throws IOException if reading fails Key readKey() throws IOException; + /// Reads a key with a wall-clock deadline. Returns {@link Optional#empty()} + /// if the timeout elapses before any input is available. + /// + /// @param timeoutMs maximum time to wait, in milliseconds + /// @return the decoded key, or empty on timeout + /// @throws IOException if reading fails + Optional readKey(long timeoutMs) throws IOException; + /// Restores the original terminal mode and exits the alternate screen. /// /// Idempotent - safe to call multiple times. diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/WindowsTerminal.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/WindowsTerminal.java index a35da22d..ca2850db 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/WindowsTerminal.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/WindowsTerminal.java @@ -2,6 +2,7 @@ import java.io.IOException; import java.io.PrintStream; +import java.util.Optional; import java.lang.foreign.Arena; import java.lang.foreign.FunctionDescriptor; import java.lang.foreign.Linker; @@ -143,6 +144,23 @@ public Key readKey() throws IOException { return KeyDecoder.next(System.in); } + @Override + public Optional readKey(long timeoutMs) throws IOException { + long deadline = System.nanoTime() + timeoutMs * 1_000_000L; + while (System.in.available() == 0) { + if (System.nanoTime() >= deadline) { + return Optional.empty(); + } + try { + Thread.sleep(20); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + return Optional.empty(); + } + } + return Optional.of(KeyDecoder.next(System.in)); + } + @Override public void close() { if (closed) { From db3dd61b4859f6204739cf656c6dd61ea6df35de Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Mon, 8 Jun 2026 19:38:31 +0200 Subject: [PATCH 11/37] fix(inspector): pin TUI I/O to handle's owning thread via IoWorker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous commit moved data fetches onto virtual threads, but VortexReader/VortexHttpReader use Arena.ofConfined() — every slice() and scan() must run on the thread that opened the handle. Virtual threads tripped the FFM scope check ("Attempted access outside owning thread"), so every column showed up as a Failed entry. Adds a single-threaded IoWorker that: - opens the VortexHandle on its own thread (via runAndAwait at startup) so the confined Arena is owned by the worker - executes every subsequent peek / hex slice / scan submitted by the TUI on that same thread - exposes pending() so the status row can show "I/O N pending" without the Loop tracking its own counter The render thread now never touches the handle directly. Peek and hex preview switched from synchronous to fire-and-forget submit; the detail pane shows the spinner until the first result lands. TuiCommand opens, runs, and closes the handle entirely through the worker. Co-Authored-By: Claude Opus 4.7 --- .../io/github/dfa1/vortex/cli/TuiCommand.java | 41 +++++- .../github/dfa1/vortex/inspect/IoWorker.java | 103 ++++++++++++++ .../vortex/inspect/VortexInspectorTui.java | 132 +++++++++++++----- 3 files changed, 239 insertions(+), 37 deletions(-) create mode 100644 inspector/src/main/java/io/github/dfa1/vortex/inspect/IoWorker.java diff --git a/cli/src/main/java/io/github/dfa1/vortex/cli/TuiCommand.java b/cli/src/main/java/io/github/dfa1/vortex/cli/TuiCommand.java index e020d844..602ae982 100644 --- a/cli/src/main/java/io/github/dfa1/vortex/cli/TuiCommand.java +++ b/cli/src/main/java/io/github/dfa1/vortex/cli/TuiCommand.java @@ -1,6 +1,7 @@ package io.github.dfa1.vortex.cli; import io.github.dfa1.vortex.inspect.InspectorTree; +import io.github.dfa1.vortex.inspect.IoWorker; import io.github.dfa1.vortex.inspect.VortexInspectorTui; import io.github.dfa1.vortex.io.VortexHandle; import io.github.dfa1.vortex.io.VortexHttpReader; @@ -12,6 +13,7 @@ import java.net.URISyntaxException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.concurrent.atomic.AtomicReference; final class TuiCommand { @@ -23,13 +25,21 @@ static int run(String[] args) { System.err.println("usage: tui "); return ExitStatus.USAGE_ERROR; } - try (VortexHandle handle = open(args[1])) { + try (IoWorker worker = new IoWorker("vortex-tui-io")) { + VortexHandle handle = openOnWorker(worker, args[1]); if (handle == null) { return ExitStatus.FILE_NOT_FOUND; } - VortexInspectorTui.show(handle, progressBar(System.err)); + try { + VortexInspectorTui.show(handle, worker, progressBar(System.err)); + } finally { + closeOnWorker(worker, handle); + } return ExitStatus.OK; - } catch (IOException | RuntimeException e) { + } catch (IOException | RuntimeException | InterruptedException e) { + if (e instanceof InterruptedException) { + Thread.currentThread().interrupt(); + } System.err.println("error: " + describe(e)); if (System.getenv("VORTEX_DEBUG") != null) { e.printStackTrace(System.err); @@ -38,6 +48,31 @@ static int run(String[] args) { } } + private static VortexHandle openOnWorker(IoWorker worker, String target) + throws InterruptedException, IOException { + AtomicReference handle = new AtomicReference<>(); + AtomicReference failure = new AtomicReference<>(); + worker.runAndAwait(() -> { + try { + handle.set(open(target)); + } catch (IOException e) { + failure.set(e); + } + }); + if (failure.get() != null) { + throw failure.get(); + } + return handle.get(); + } + + private static void closeOnWorker(IoWorker worker, VortexHandle handle) { + try { + worker.runAndAwait(handle::close); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + private static VortexHandle open(String target) throws IOException { if (target.startsWith("http://") || target.startsWith("https://")) { try { diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/IoWorker.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/IoWorker.java new file mode 100644 index 00000000..87580501 --- /dev/null +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/IoWorker.java @@ -0,0 +1,103 @@ +package io.github.dfa1.vortex.inspect; + +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.atomic.AtomicInteger; + +/// Single-threaded I/O executor that owns one {@link io.github.dfa1.vortex.io.VortexHandle}. +/// +/// Vortex readers use a confined {@link java.lang.foreign.Arena}, so every +/// {@code slice()} / {@code scan()} call must happen on the same thread that +/// opened the file. The TUI dispatches all such calls to this worker so the +/// render loop on the main thread never crosses the arena's owning thread. +/// +/// {@link #pending()} drives the status-line counter; callers should check it +/// when computing UI state. +public final class IoWorker implements AutoCloseable { + + private final BlockingQueue queue = new LinkedBlockingQueue<>(); + private final Thread thread; + private final AtomicInteger pending = new AtomicInteger(); + private volatile boolean closed; + + /// Creates and starts the worker thread. + /// + /// @param name thread name + public IoWorker(String name) { + this.thread = new Thread(this::loop, name); + this.thread.setDaemon(true); + this.thread.start(); + } + + /// Submits a task to run on the worker thread. Returns immediately. + /// + /// @param task task that performs I/O and updates shared state + public void submit(Runnable task) { + if (closed) { + return; + } + pending.incrementAndGet(); + queue.offer(() -> { + try { + task.run(); + } finally { + pending.decrementAndGet(); + } + }); + } + + /// Runs a task on the worker thread and waits for it to complete. + /// Used at startup to open the handle on the worker's owning thread. + /// + /// @param task task to execute + /// @throws InterruptedException if the calling thread is interrupted while waiting + public void runAndAwait(Runnable task) throws InterruptedException { + Object signal = new Object(); + boolean[] done = {false}; + submit(() -> { + try { + task.run(); + } finally { + synchronized (signal) { + done[0] = true; + signal.notifyAll(); + } + } + }); + synchronized (signal) { + while (!done[0]) { + signal.wait(); + } + } + } + + /// Number of submitted tasks that have not yet finished. + /// + /// @return pending count, including the currently running task + public int pending() { + return pending.get(); + } + + private void loop() { + while (!closed) { + Runnable task; + try { + task = queue.take(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + return; + } + try { + task.run(); + } catch (RuntimeException ignored) { + // Task is expected to capture its own failures into shared state. + } + } + } + + @Override + public void close() { + closed = true; + thread.interrupt(); + } +} diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java index a4071828..1ab25488 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java @@ -61,9 +61,27 @@ public static void show(VortexHandle handle) throws IOException { /// @param progress unused; kept for API stability /// @throws IOException if the terminal cannot be initialized public static void show(VortexHandle handle, InspectorTree.Progress progress) throws IOException { + show(handle, null, progress); + } + + /// Variant that dispatches every {@code handle} I/O call onto the supplied + /// {@link IoWorker}. Required when the handle was opened on a different + /// thread (Vortex readers use a confined {@link java.lang.foreign.Arena}, + /// so cross-thread access throws {@code WrongThreadException}). + /// + /// Passing {@code null} for {@code worker} falls back to synchronous I/O + /// on the render thread — fine for tests but causes the sluggishness this + /// machinery was built to avoid. + /// + /// @param handle open Vortex file handle + /// @param worker I/O dispatcher that owns the handle's thread; may be {@code null} + /// @param progress unused; kept for API stability + /// @throws IOException if the terminal cannot be initialized + public static void show(VortexHandle handle, IoWorker worker, InspectorTree.Progress progress) + throws IOException { InspectorTree tree = InspectorTree.buildShallow(handle); try (RawTerminal term = RawTerminal.open()) { - new Loop(term, tree, handle).run(); + new Loop(term, tree, handle, worker).run(); } } @@ -84,21 +102,24 @@ private static final class Loop { private final RawTerminal term; private final InspectorTree tree; private final VortexHandle handle; + private final IoWorker worker; private final Set expanded = new HashSet<>(); - private final Map peekCache = new HashMap<>(); - private final Map hexCache = new HashMap<>(); + private final ConcurrentMap peekCache = new ConcurrentHashMap<>(); + private final Set peekInFlight = ConcurrentHashMap.newKeySet(); + private final ConcurrentMap hexCache = new ConcurrentHashMap<>(); + private final Set hexInFlight = ConcurrentHashMap.newKeySet(); private final ConcurrentMap dataCache = new ConcurrentHashMap<>(); private final Map columnOf = new HashMap<>(); - private volatile int pendingLoads; private volatile String lastError; private long tick; private int selected; private int scrollOffset; - Loop(RawTerminal term, InspectorTree tree, VortexHandle handle) { + Loop(RawTerminal term, InspectorTree tree, VortexHandle handle, IoWorker worker) { this.term = term; this.tree = tree; this.handle = handle; + this.worker = worker; this.expanded.add(tree.root()); indexColumns(tree.root()); prefetchTopColumns(); @@ -130,13 +151,34 @@ private void tagSubtree(InspectorTree.Node node, String columnName) { } private InspectorTree.Peek peek(InspectorTree.Node node) { - return peekCache.computeIfAbsent(node, n -> { - try { - return InspectorTree.peek(n, handle); - } catch (RuntimeException e) { - return InspectorTree.Peek.EMPTY; - } - }); + InspectorTree.Peek cached = peekCache.get(node); + if (cached != null) { + return cached; + } + if (worker == null) { + InspectorTree.Peek p = safePeek(node); + peekCache.put(node, p); + return p; + } + if (peekInFlight.add(node)) { + worker.submit(() -> { + try { + peekCache.put(node, safePeek(node)); + } finally { + peekInFlight.remove(node); + } + }); + } + return InspectorTree.Peek.EMPTY; + } + + private InspectorTree.Peek safePeek(InspectorTree.Node node) { + try { + return InspectorTree.peek(node, handle); + } catch (RuntimeException e) { + lastError = "peek: " + messageOf(e); + return InspectorTree.Peek.EMPTY; + } } void run() throws IOException { @@ -258,7 +300,7 @@ private void render(List items) throws IOException { } private void drawStatus(StringBuilder buf, int width, int row) { - int loads = pendingLoads; + int loads = worker == null ? 0 : worker.pending(); String err = lastError; String text; int bg; @@ -433,8 +475,11 @@ private void startDataLoad(String columnName) { if (dataCache.putIfAbsent(columnName, DataState.PENDING) != null) { return; } - pendingLoads++; - Thread.ofVirtual().name("tui-data-" + columnName).start(() -> runDataLoad(columnName)); + if (worker == null) { + runDataLoad(columnName); + return; + } + worker.submit(() -> runDataLoad(columnName)); } private void runDataLoad(String columnName) { @@ -462,8 +507,6 @@ private void runDataLoad(String columnName) { } catch (RuntimeException e) { dataCache.put(columnName, new DataState.Failed(messageOf(e))); lastError = columnName + ": " + messageOf(e); - } finally { - pendingLoads--; } } @@ -526,23 +569,44 @@ private static String bytesToShortHex(byte[] bytes) { } private byte[] loadHexPreview(InspectorTree.Node node) { - return hexCache.computeIfAbsent(node, n -> { - Layout layout = n.layout(); - int segIdx = layout.segments().getFirst(); - SegmentSpec spec = tree.segmentSpecs().get(segIdx); - int wanted = (int) Math.min((long) HEX_PREVIEW_BYTES, spec.length()); - if (wanted <= 0) { - return new byte[0]; - } - try { - MemorySegment seg = handle.slice(spec.offset(), wanted); - byte[] buf = new byte[wanted]; - MemorySegment.copy(seg, 0, MemorySegment.ofArray(buf), 0, wanted); - return buf; - } catch (RuntimeException e) { - return new byte[0]; - } - }); + byte[] cached = hexCache.get(node); + if (cached != null) { + return cached; + } + if (worker == null) { + byte[] bytes = fetchHex(node); + hexCache.put(node, bytes); + return bytes; + } + if (hexInFlight.add(node)) { + worker.submit(() -> { + try { + hexCache.put(node, fetchHex(node)); + } finally { + hexInFlight.remove(node); + } + }); + } + return new byte[0]; + } + + private byte[] fetchHex(InspectorTree.Node node) { + Layout layout = node.layout(); + int segIdx = layout.segments().getFirst(); + SegmentSpec spec = tree.segmentSpecs().get(segIdx); + int wanted = (int) Math.min((long) HEX_PREVIEW_BYTES, spec.length()); + if (wanted <= 0) { + return new byte[0]; + } + try { + MemorySegment seg = handle.slice(spec.offset(), wanted); + byte[] buf = new byte[wanted]; + MemorySegment.copy(seg, 0, MemorySegment.ofArray(buf), 0, wanted); + return buf; + } catch (RuntimeException e) { + lastError = "hex: " + messageOf(e); + return new byte[0]; + } } private static String formatHexRow(byte[] data, int offset) { From 97ddfdf733bed91641ea0d7a469e463eddb0c10b Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Mon, 8 Jun 2026 19:48:26 +0200 Subject: [PATCH 12/37] fix(inspector): drop withLimit on TUI data scan; rejects GenericArray ScanIterator.truncateArray throws "limit: truncation not supported for GenericArray" for the array shape used by decimal_byte_parts (and likely other fallback dtypes). The TUI's data preview was asking for withLimit(32) on every column, which made decimal columns fail outright with that message landing in the status row. Since the slicing happens inside the format loop anyway (Math.min(array.length(), DATA_PREVIEW_ROWS)) and chunks are the natural granularity of a Vortex scan, the withLimit call wasn't actually saving any work. Removing it sidesteps the reader bug without losing functionality. Also makes the default formatValue branch include the dtype so GenericArray cells render as "" rather than an opaque "". Co-Authored-By: Claude Opus 4.7 --- .../io/github/dfa1/vortex/inspect/VortexInspectorTui.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java index 1ab25488..1c038392 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java @@ -484,7 +484,10 @@ private void startDataLoad(String columnName) { private void runDataLoad(String columnName) { try { - ScanOptions opts = ScanOptions.columns(columnName).withLimit(DATA_PREVIEW_ROWS); + // No withLimit: ScanIterator.truncateArray rejects GenericArray + // (decimal_byte_parts columns), and chunks are the granularity + // anyway. We slice to DATA_PREVIEW_ROWS in the format loop below. + ScanOptions opts = ScanOptions.columns(columnName); try (ScanIterator it = handle.scan(opts)) { if (!it.hasNext()) { dataCache.put(columnName, new DataState.Loaded(List.of())); @@ -551,7 +554,7 @@ private static String formatValue(Array array, int i) { case VarBinArray a -> a.dtype() instanceof io.github.dfa1.vortex.core.DType.Utf8 ? "\"" + a.getString(i) + "\"" : bytesToShortHex(a.getBytes(i)); - default -> "<" + array.getClass().getSimpleName() + ">"; + default -> "<" + array.getClass().getSimpleName() + " " + array.dtype() + ">"; }; } From bd8b4e748e5c9add2bd2a283713cf1ebf608d986 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Mon, 8 Jun 2026 19:55:09 +0200 Subject: [PATCH 13/37] fix(scan): support GenericArray in ScanIterator.truncateArray; format decimals in TUI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `ScanIterator.truncateArray` previously threw "limit: truncation not supported for GenericArray" whenever ScanOptions.withLimit was used on a column decoded into a GenericArray (decimal, ext, datetimeparts, constant). The new branch calls `GenericArray.withLength(rows)`, which reuses the same buffers and children — safe because callers already bound their reads by `length()`. GenericArray gains a small public surface (`withLength`, `bufferCount`, `bufferAt`, `childCount`) so renderers can introspect the underlying buffer without reaching for the package-private accessor. The TUI uses the new accessors to decode Decimal cells properly: read the little-endian two's-complement mantissa from the single buffer at width derived from `precision`, then format via BigDecimal with `scale`. Other GenericArray-shaped cells still fall back to the "" placeholder. Co-Authored-By: Claude Opus 4.7 --- .../dfa1/vortex/core/array/GenericArray.java | 43 ++++++++++ .../vortex/core/array/GenericArrayTest.java | 84 +++++++++++++++++++ .../vortex/inspect/VortexInspectorTui.java | 51 +++++++++-- .../github/dfa1/vortex/scan/ScanIterator.java | 2 + 4 files changed, 175 insertions(+), 5 deletions(-) create mode 100644 core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java diff --git a/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java b/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java index d319637c..2a174832 100644 --- a/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java +++ b/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java @@ -48,10 +48,53 @@ public long length() { return length; } + /// Returns a view of this array clamped to {@code newLength} logical rows. + /// Buffers and children are reused as-is; callers are expected to respect + /// {@link #length()} when reading. Used by the scan iterator to honour + /// {@code ScanOptions.limit} for dtypes that don't have a typed array. + /// + /// @param newLength desired logical length; must be {@code <= length()} + /// @return a new {@code GenericArray} sharing this array's buffers and children + /// @throws IllegalArgumentException if {@code newLength} exceeds the current length + public GenericArray withLength(long newLength) { + if (newLength < 0 || newLength > length) { + throw new IllegalArgumentException( + "newLength " + newLength + " out of range [0," + length + "]"); + } + if (newLength == length) { + return this; + } + return new GenericArray(dtype, newLength, buffers, children); + } + MemorySegment buffer(int i) { return buffers[i]; } + /// Returns the number of raw memory buffers backing this array. + /// + /// @return buffer count + public int bufferCount() { + return buffers.length; + } + + /// Returns the raw buffer at position {@code i}. Used by callers that need + /// to inspect encoded bytes when no typed accessor exists for the dtype + /// (e.g. the TUI inspector decoding {@code Decimal} cells). + /// + /// @param i buffer index + /// @return the underlying {@link MemorySegment} + public MemorySegment bufferAt(int i) { + return buffers[i]; + } + + /// Returns the number of child arrays. + /// + /// @return child count + public int childCount() { + return children.length; + } + /// Returns the child array at position {@code i}. /// /// @param i child index diff --git a/core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java b/core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java new file mode 100644 index 00000000..493a2d5e --- /dev/null +++ b/core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java @@ -0,0 +1,84 @@ +package io.github.dfa1.vortex.core.array; + +import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.PType; +import org.junit.jupiter.api.Test; + +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +class GenericArrayTest { + + private static final DType DTYPE = new DType.Primitive(PType.I64, false); + + @Test + void withLength_shorterLength_returnsClampedView() { + // Given — full-size array of 10 elements + try (Arena arena = Arena.ofConfined()) { + MemorySegment seg = arena.allocate(80); + GenericArray sut = new GenericArray(DTYPE, 10, seg); + + // When + GenericArray clamped = sut.withLength(4); + + // Then — length reflects new bound; buffer is reused (no copy) + assertThat(clamped.length()).isEqualTo(4); + assertThat(clamped.dtype()).isEqualTo(DTYPE); + } + } + + @Test + void withLength_sameLength_returnsSameInstance() { + // Given + try (Arena arena = Arena.ofConfined()) { + GenericArray sut = new GenericArray(DTYPE, 10, arena.allocate(80)); + + // When / Then — no-op short-circuits to avoid wrapper allocation + assertThat(sut.withLength(10)).isSameAs(sut); + } + } + + @Test + void withLength_zero_returnsEmptyView() { + // Given — boundary case: truncating to zero must still produce a valid + // GenericArray (length() == 0) rather than throw + try (Arena arena = Arena.ofConfined()) { + GenericArray sut = new GenericArray(DTYPE, 5, arena.allocate(40)); + + // When + GenericArray clamped = sut.withLength(0); + + // Then + assertThat(clamped.length()).isZero(); + } + } + + @Test + void withLength_greaterThanCurrent_throws() { + // Given — protects against silently extending past the backing buffer + try (Arena arena = Arena.ofConfined()) { + GenericArray sut = new GenericArray(DTYPE, 3, arena.allocate(24)); + + // When / Then + assertThatThrownBy(() -> sut.withLength(4)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("out of range"); + } + } + + @Test + void withLength_negative_throws() { + // Given + try (Arena arena = Arena.ofConfined()) { + GenericArray sut = new GenericArray(DTYPE, 3, arena.allocate(24)); + + // When / Then + assertThatThrownBy(() -> sut.withLength(-1)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("out of range"); + } + } +} diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java index 1c038392..a38eb78d 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java @@ -2,11 +2,13 @@ import io.github.dfa1.vortex.core.Layout; import io.github.dfa1.vortex.core.SegmentSpec; +import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.array.Array; import io.github.dfa1.vortex.core.array.BoolArray; import io.github.dfa1.vortex.core.array.ByteArray; import io.github.dfa1.vortex.core.array.DoubleArray; import io.github.dfa1.vortex.core.array.FloatArray; +import io.github.dfa1.vortex.core.array.GenericArray; import io.github.dfa1.vortex.core.array.IntArray; import io.github.dfa1.vortex.core.array.LongArray; import io.github.dfa1.vortex.core.array.ShortArray; @@ -484,10 +486,7 @@ private void startDataLoad(String columnName) { private void runDataLoad(String columnName) { try { - // No withLimit: ScanIterator.truncateArray rejects GenericArray - // (decimal_byte_parts columns), and chunks are the granularity - // anyway. We slice to DATA_PREVIEW_ROWS in the format loop below. - ScanOptions opts = ScanOptions.columns(columnName); + ScanOptions opts = ScanOptions.columns(columnName).withLimit(DATA_PREVIEW_ROWS); try (ScanIterator it = handle.scan(opts)) { if (!it.hasNext()) { dataCache.put(columnName, new DataState.Loaded(List.of())); @@ -551,13 +550,55 @@ private static String formatValue(Array array, int i) { case DoubleArray a -> Double.toString(a.getDouble(i)); case FloatArray a -> Float.toString(a.getFloat(i)); case BoolArray a -> Boolean.toString(a.getBoolean(i)); - case VarBinArray a -> a.dtype() instanceof io.github.dfa1.vortex.core.DType.Utf8 + case VarBinArray a -> a.dtype() instanceof DType.Utf8 ? "\"" + a.getString(i) + "\"" : bytesToShortHex(a.getBytes(i)); + case GenericArray a when a.dtype() instanceof DType.Decimal d + && a.bufferCount() == 1 -> formatDecimal(a, i, d); default -> "<" + array.getClass().getSimpleName() + " " + array.dtype() + ">"; }; } + private static String formatDecimal(GenericArray a, int i, DType.Decimal d) { + int byteWidth = decimalByteWidth(d.precision()); + java.lang.foreign.MemorySegment buf = a.bufferAt(0); + long offset = (long) i * byteWidth; + try { + java.math.BigInteger mantissa = readSignedInt(buf, offset, byteWidth); + java.math.BigDecimal value = new java.math.BigDecimal(mantissa, d.scale()); + return value.toPlainString(); + } catch (RuntimeException e) { + return ""; + } + } + + private static int decimalByteWidth(int precision) { + if (precision <= 2) { + return 1; + } + if (precision <= 4) { + return 2; + } + if (precision <= 9) { + return 4; + } + if (precision <= 18) { + return 8; + } + return 16; + } + + private static java.math.BigInteger readSignedInt(java.lang.foreign.MemorySegment buf, + long offset, int byteWidth) { + // Little-endian two's-complement; mirror Java arithmetic by building the + // bytes in big-endian order before handing them to BigInteger. + byte[] be = new byte[byteWidth]; + for (int k = 0; k < byteWidth; k++) { + be[byteWidth - 1 - k] = buf.get(java.lang.foreign.ValueLayout.JAVA_BYTE, offset + k); + } + return new java.math.BigInteger(be); + } + private static String bytesToShortHex(byte[] bytes) { int n = Math.min(bytes.length, 16); StringBuilder sb = new StringBuilder(n * 3 + 2); diff --git a/reader/src/main/java/io/github/dfa1/vortex/scan/ScanIterator.java b/reader/src/main/java/io/github/dfa1/vortex/scan/ScanIterator.java index 235f7840..e96897ef 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/scan/ScanIterator.java +++ b/reader/src/main/java/io/github/dfa1/vortex/scan/ScanIterator.java @@ -13,6 +13,7 @@ import io.github.dfa1.vortex.core.array.DoubleArray; import io.github.dfa1.vortex.core.array.EmptyArray; import io.github.dfa1.vortex.core.array.FloatArray; +import io.github.dfa1.vortex.core.array.GenericArray; import io.github.dfa1.vortex.core.array.IntArray; import io.github.dfa1.vortex.core.array.LongArray; import io.github.dfa1.vortex.core.array.MaskedArray; @@ -265,6 +266,7 @@ private static Array truncateArray(Array arr, long rows) { yield new MaskedArray(truncChild, truncValidity); } case EmptyArray a -> a; + case GenericArray a -> a.withLength(rows); default -> throw new VortexException("limit: truncation not supported for " + arr.getClass().getSimpleName()); }; From 341aa9b5edf3d1c443856c092ce7b310efcf079b Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Mon, 8 Jun 2026 19:57:00 +0200 Subject: [PATCH 14/37] refactor(core): move decimal decode into GenericArray.getDecimal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous commit put the BigInteger/BigDecimal decoding helpers inside VortexInspectorTui, which meant only the inspector module could turn a Decimal-shaped GenericArray cell into a number. Anyone consuming vortex-reader directly (CLI export, JDBC, downstream applications) still saw an opaque buffer. Promotes the logic to a public method on GenericArray itself: BigDecimal value = a.getDecimal(i); Width is derived from dtype's precision (1 / 2 / 4 / 8 / 16 bytes for precision ≤ 2 / 4 / 9 / 18 / 38) and scaled by dtype's scale. Throws VortexException on misuse (non-decimal dtype, multi-buffer array). TUI's formatValue now calls a.getDecimal(i).toPlainString() instead of its own private helpers. Co-Authored-By: Claude Opus 4.7 --- .../dfa1/vortex/core/array/GenericArray.java | 53 +++++++++++++++++++ .../vortex/core/array/GenericArrayTest.java | 51 ++++++++++++++++++ .../vortex/inspect/VortexInspectorTui.java | 44 +-------------- 3 files changed, 106 insertions(+), 42 deletions(-) diff --git a/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java b/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java index 2a174832..446f7595 100644 --- a/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java +++ b/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java @@ -1,8 +1,12 @@ package io.github.dfa1.vortex.core.array; import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.VortexException; import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.math.BigDecimal; +import java.math.BigInteger; /// Fallback [Array] for dtypes that lack a dedicated concrete subtype. /// @@ -95,6 +99,55 @@ public int childCount() { return children.length; } + /// Decodes the decimal value at row {@code i}. + /// + /// Only valid when this array's dtype is {@link DType.Decimal} and the + /// underlying storage is a single buffer of little-endian two's-complement + /// integers (the shape produced by {@code vortex.decimal} decoding). The + /// element width is derived from the dtype's precision: 1 / 2 / 4 / 8 / 16 + /// bytes for precision ≤ 2 / 4 / 9 / 18 / 38 respectively. + /// + /// @param i row index, {@code 0 <= i < length()} + /// @return decoded value as a {@link BigDecimal} with the dtype's scale + /// @throws VortexException if this array isn't a single-buffer decimal + public BigDecimal getDecimal(long i) { + if (!(dtype instanceof DType.Decimal d)) { + throw new VortexException("getDecimal called on non-decimal dtype: " + dtype); + } + if (buffers.length != 1) { + throw new VortexException("getDecimal requires a single-buffer GenericArray; got " + + buffers.length); + } + int width = decimalByteWidth(d.precision()); + BigInteger mantissa = readSignedLe(buffers[0], i * width, width); + return new BigDecimal(mantissa, d.scale()); + } + + private static int decimalByteWidth(int precision) { + if (precision <= 2) { + return 1; + } + if (precision <= 4) { + return 2; + } + if (precision <= 9) { + return 4; + } + if (precision <= 18) { + return 8; + } + return 16; + } + + private static BigInteger readSignedLe(MemorySegment buf, long offset, int width) { + // Little-endian two's-complement on disk; BigInteger expects big-endian. + byte[] be = new byte[width]; + for (int k = 0; k < width; k++) { + be[width - 1 - k] = buf.get(ValueLayout.JAVA_BYTE, offset + k); + } + return new BigInteger(be); + } + /// Returns the child array at position {@code i}. /// /// @param i child index diff --git a/core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java b/core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java index 493a2d5e..749d29a2 100644 --- a/core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java @@ -2,10 +2,13 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.PType; +import io.github.dfa1.vortex.core.VortexException; import org.junit.jupiter.api.Test; import java.lang.foreign.Arena; import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.math.BigDecimal; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; @@ -81,4 +84,52 @@ void withLength_negative_throws() { .hasMessageContaining("out of range"); } } + + @Test + void getDecimal_i64Buffer_decodesMantissaScaledByDtype() { + // Given — decimal(15,2): precision 15 → 8-byte (I64) mantissa; values + // 1234 / -50 / 0 should render as 12.34 / -0.50 / 0.00. + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(24); + buf.set(ValueLayout.JAVA_LONG_UNALIGNED, 0, 1234L); + buf.set(ValueLayout.JAVA_LONG_UNALIGNED, 8, -50L); + buf.set(ValueLayout.JAVA_LONG_UNALIGNED, 16, 0L); + DType.Decimal dec = new DType.Decimal((byte) 15, (byte) 2, false); + GenericArray sut = new GenericArray(dec, 3, buf); + + // When / Then + assertThat(sut.getDecimal(0)).isEqualByComparingTo(new BigDecimal("12.34")); + assertThat(sut.getDecimal(1)).isEqualByComparingTo(new BigDecimal("-0.50")); + assertThat(sut.getDecimal(2)).isEqualByComparingTo(BigDecimal.ZERO); + } + } + + @Test + void getDecimal_smallPrecisionUsesNarrowerBuffer() { + // Given — decimal(4,1): precision 4 → 2-byte (I16) mantissa + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(4); + buf.set(ValueLayout.JAVA_SHORT_UNALIGNED, 0, (short) 99); + buf.set(ValueLayout.JAVA_SHORT_UNALIGNED, 2, (short) -1); + DType.Decimal dec = new DType.Decimal((byte) 4, (byte) 1, false); + GenericArray sut = new GenericArray(dec, 2, buf); + + // When / Then — 99 / 10 = 9.9; -1 / 10 = -0.1 (signed extension matters) + assertThat(sut.getDecimal(0)).isEqualByComparingTo(new BigDecimal("9.9")); + assertThat(sut.getDecimal(1)).isEqualByComparingTo(new BigDecimal("-0.1")); + } + } + + @Test + void getDecimal_nonDecimalDtype_throws() { + // Given — guards against silently returning garbage on misuse + try (Arena arena = Arena.ofConfined()) { + GenericArray sut = new GenericArray(DTYPE, 1, arena.allocate(8)); + + // When / Then + assertThatThrownBy(() -> sut.getDecimal(0)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("non-decimal"); + } + } } diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java index a38eb78d..265681e0 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java @@ -553,52 +553,12 @@ private static String formatValue(Array array, int i) { case VarBinArray a -> a.dtype() instanceof DType.Utf8 ? "\"" + a.getString(i) + "\"" : bytesToShortHex(a.getBytes(i)); - case GenericArray a when a.dtype() instanceof DType.Decimal d - && a.bufferCount() == 1 -> formatDecimal(a, i, d); + case GenericArray a when a.dtype() instanceof DType.Decimal + && a.bufferCount() == 1 -> a.getDecimal(i).toPlainString(); default -> "<" + array.getClass().getSimpleName() + " " + array.dtype() + ">"; }; } - private static String formatDecimal(GenericArray a, int i, DType.Decimal d) { - int byteWidth = decimalByteWidth(d.precision()); - java.lang.foreign.MemorySegment buf = a.bufferAt(0); - long offset = (long) i * byteWidth; - try { - java.math.BigInteger mantissa = readSignedInt(buf, offset, byteWidth); - java.math.BigDecimal value = new java.math.BigDecimal(mantissa, d.scale()); - return value.toPlainString(); - } catch (RuntimeException e) { - return ""; - } - } - - private static int decimalByteWidth(int precision) { - if (precision <= 2) { - return 1; - } - if (precision <= 4) { - return 2; - } - if (precision <= 9) { - return 4; - } - if (precision <= 18) { - return 8; - } - return 16; - } - - private static java.math.BigInteger readSignedInt(java.lang.foreign.MemorySegment buf, - long offset, int byteWidth) { - // Little-endian two's-complement; mirror Java arithmetic by building the - // bytes in big-endian order before handing them to BigInteger. - byte[] be = new byte[byteWidth]; - for (int k = 0; k < byteWidth; k++) { - be[byteWidth - 1 - k] = buf.get(java.lang.foreign.ValueLayout.JAVA_BYTE, offset + k); - } - return new java.math.BigInteger(be); - } - private static String bytesToShortHex(byte[] bytes) { int n = Math.min(bytes.length, 16); StringBuilder sb = new StringBuilder(n * 3 + 2); From 90878476224b7d497052fa4d3213534f4a8c173a Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Mon, 8 Jun 2026 19:59:12 +0200 Subject: [PATCH 15/37] feat(core): decode decimal_byte_parts shape in GenericArray.getDecimal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends getDecimal to handle the second shape Vortex decoders produce for decimal columns: zero buffers, one child carrying the most-significant integer part as a typed array (LongArray / IntArray / ShortArray / ByteArray, optionally wrapped in a MaskedArray). This is the shape vortex.decimal_byte_parts emits when lower_part_count == 0 — i.e. tpch_lineitem.regular's l_quantity, l_extendedprice, l_discount and l_tax columns. The TUI's pattern switch drops its single-buffer guard and now calls getDecimal whenever the dtype is Decimal, falling back to the placeholder only if getDecimal itself rejects the shape. Co-Authored-By: Claude Opus 4.7 --- .../dfa1/vortex/core/array/GenericArray.java | 45 ++++++++++++++----- .../vortex/core/array/GenericArrayTest.java | 21 +++++++++ .../vortex/inspect/VortexInspectorTui.java | 12 ++++- 3 files changed, 65 insertions(+), 13 deletions(-) diff --git a/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java b/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java index 446f7595..725cccb4 100644 --- a/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java +++ b/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java @@ -101,28 +101,51 @@ public int childCount() { /// Decodes the decimal value at row {@code i}. /// - /// Only valid when this array's dtype is {@link DType.Decimal} and the - /// underlying storage is a single buffer of little-endian two's-complement - /// integers (the shape produced by {@code vortex.decimal} decoding). The - /// element width is derived from the dtype's precision: 1 / 2 / 4 / 8 / 16 - /// bytes for precision ≤ 2 / 4 / 9 / 18 / 38 respectively. + /// Handles the two shapes produced by Vortex decimal decoders: + /// + /// - **single-buffer**: one raw buffer of little-endian two's-complement + /// integers (one element per row); element width derived from the + /// dtype's precision (1 / 2 / 4 / 8 / 16 bytes for precision ≤ 2 / 4 / + /// 9 / 18 / 38). Produced by {@code vortex.decimal}. + /// - **child-array**: zero buffers, one child holding the most-significant + /// integer part as a {@link LongArray}, {@link IntArray}, {@link ShortArray}, + /// or {@link ByteArray}. Produced by {@code vortex.decimal_byte_parts} + /// when {@code lower_part_count == 0}. /// /// @param i row index, {@code 0 <= i < length()} /// @return decoded value as a {@link BigDecimal} with the dtype's scale - /// @throws VortexException if this array isn't a single-buffer decimal + /// @throws VortexException if the dtype isn't decimal or the array shape + /// doesn't match either supported layout public BigDecimal getDecimal(long i) { if (!(dtype instanceof DType.Decimal d)) { throw new VortexException("getDecimal called on non-decimal dtype: " + dtype); } - if (buffers.length != 1) { - throw new VortexException("getDecimal requires a single-buffer GenericArray; got " - + buffers.length); + BigInteger mantissa; + if (buffers.length == 1 && children.length == 0) { + int width = decimalByteWidth(d.precision()); + mantissa = readSignedLe(buffers[0], i * width, width); + } else if (buffers.length == 0 && children.length == 1) { + mantissa = mantissaFromChild(children[0], i); + } else { + throw new VortexException("getDecimal: unsupported decimal shape buffers=" + + buffers.length + " children=" + children.length); } - int width = decimalByteWidth(d.precision()); - BigInteger mantissa = readSignedLe(buffers[0], i * width, width); return new BigDecimal(mantissa, d.scale()); } + private static BigInteger mantissaFromChild(Array child, long i) { + return switch (child) { + case LongArray a -> BigInteger.valueOf(a.getLong(i)); + case IntArray a -> BigInteger.valueOf(a.getInt(i)); + case ShortArray a -> BigInteger.valueOf(a.getShort(i)); + case ByteArray a -> BigInteger.valueOf(a.getByte(i)); + case MaskedArray a -> mantissaFromChild(a.inner(), i); + default -> + throw new VortexException("getDecimal: unsupported mantissa child type " + + child.getClass().getSimpleName()); + }; + } + private static int decimalByteWidth(int precision) { if (precision <= 2) { return 1; diff --git a/core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java b/core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java index 749d29a2..bf0dee68 100644 --- a/core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java @@ -120,6 +120,27 @@ void getDecimal_smallPrecisionUsesNarrowerBuffer() { } } + @Test + void getDecimal_childArrayShape_decodesViaMostSignificantPart() { + // Given — the shape vortex.decimal_byte_parts decoders produce when + // lower_part_count == 0: zero buffers, one LongArray child carrying + // the i64 mantissa. + try (Arena arena = Arena.ofConfined()) { + MemorySegment mspBuf = arena.allocate(24); + mspBuf.set(ValueLayout.JAVA_LONG_UNALIGNED, 0, 4321L); + mspBuf.set(ValueLayout.JAVA_LONG_UNALIGNED, 8, -100L); + mspBuf.set(ValueLayout.JAVA_LONG_UNALIGNED, 16, 0L); + LongArray msp = new LongArray(new DType.Primitive(PType.I64, false), 3, mspBuf); + DType.Decimal dec = new DType.Decimal((byte) 15, (byte) 2, false); + GenericArray sut = new GenericArray(dec, 3, new MemorySegment[0], new Array[]{msp}); + + // When / Then + assertThat(sut.getDecimal(0)).isEqualByComparingTo(new BigDecimal("43.21")); + assertThat(sut.getDecimal(1)).isEqualByComparingTo(new BigDecimal("-1.00")); + assertThat(sut.getDecimal(2)).isEqualByComparingTo(BigDecimal.ZERO); + } + } + @Test void getDecimal_nonDecimalDtype_throws() { // Given — guards against silently returning garbage on misuse diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java index 265681e0..e3968c24 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java @@ -553,12 +553,20 @@ private static String formatValue(Array array, int i) { case VarBinArray a -> a.dtype() instanceof DType.Utf8 ? "\"" + a.getString(i) + "\"" : bytesToShortHex(a.getBytes(i)); - case GenericArray a when a.dtype() instanceof DType.Decimal - && a.bufferCount() == 1 -> a.getDecimal(i).toPlainString(); + case GenericArray a when a.dtype() instanceof DType.Decimal -> + tryDecimal(a, i); default -> "<" + array.getClass().getSimpleName() + " " + array.dtype() + ">"; }; } + private static String tryDecimal(GenericArray a, int i) { + try { + return a.getDecimal(i).toPlainString(); + } catch (RuntimeException e) { + return "<" + a.getClass().getSimpleName() + " " + a.dtype() + ">"; + } + } + private static String bytesToShortHex(byte[] bytes) { int n = Math.min(bytes.length, 16); StringBuilder sb = new StringBuilder(n * 3 + 2); From b2f4f539af7af1a88feb98542a0982f2211802ba Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Mon, 8 Jun 2026 20:01:43 +0200 Subject: [PATCH 16/37] feat(core): decode vortex.date cells via Extensions.localDate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a small core helper for the most common Vortex extension dtype. Storage is days since the Unix epoch (Arrow convention) carried in any signed integer primitive array — ByteArray, ShortArray, IntArray, LongArray, or a MaskedArray wrapping one of those. Extensions.DATE holds the canonical "vortex.date" id string so callers don't have to hard-code it. LocalDate d = Extensions.localDate(array, i); The TUI's data preview now calls localDate() for ext columns (l_shipdate / l_commitdate / l_receiptdate in tpch_lineitem), so the values render as 1996-02-12 instead of "9538". Falls back to the generic per-array switch if localDate throws on an unexpected storage shape. Co-Authored-By: Claude Opus 4.7 --- .../dfa1/vortex/core/array/Extensions.java | 53 +++++++++++ .../vortex/core/array/ExtensionsTest.java | 87 +++++++++++++++++++ .../vortex/inspect/VortexInspectorTui.java | 9 ++ 3 files changed, 149 insertions(+) create mode 100644 core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java create mode 100644 core/src/test/java/io/github/dfa1/vortex/core/array/ExtensionsTest.java diff --git a/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java b/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java new file mode 100644 index 00000000..5610c895 --- /dev/null +++ b/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java @@ -0,0 +1,53 @@ +package io.github.dfa1.vortex.core.array; + +import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.VortexException; + +import java.time.LocalDate; + +/// Decoding helpers for Vortex extension dtypes (e.g. {@code vortex.date}, +/// {@code vortex.timestamp}) that ship as primitive storage arrays plus an +/// extension id on the {@link DType}. +/// +/// Lives in core so any reader-jar consumer can decode these cells without +/// reimplementing the storage conventions. +public final class Extensions { + + /// Extension id for date columns - storage is days since the Unix epoch + /// (1970-01-01), Arrow-compatible. + public static final String DATE = "vortex.date"; + + private Extensions() { + } + + /// Decodes a {@code vortex.date} cell to a {@link LocalDate}. + /// + /// The storage array must be one of the integer primitive arrays + /// ({@link ByteArray}, {@link ShortArray}, {@link IntArray}, {@link LongArray}), + /// optionally wrapped in a {@link MaskedArray}. The cell value is read as a + /// signed integer giving days since the Unix epoch. + /// + /// @param array array whose dtype is {@code ext} + /// @param i row index, {@code 0 <= i < array.length()} + /// @return decoded date + /// @throws VortexException if {@code array}'s dtype isn't {@code ext} + /// or its storage isn't an integer primitive + public static LocalDate localDate(Array array, long i) { + if (!(array.dtype() instanceof DType.Extension ext) || !DATE.equals(ext.extensionId())) { + throw new VortexException("localDate called on non-date dtype: " + array.dtype()); + } + return LocalDate.ofEpochDay(epochDay(array, i)); + } + + private static long epochDay(Array array, long i) { + return switch (array) { + case ByteArray a -> a.getByte(i); + case ShortArray a -> a.getShort(i); + case IntArray a -> a.getInt(i); + case LongArray a -> a.getLong(i); + case MaskedArray a -> epochDay(a.inner(), i); + default -> throw new VortexException( + "localDate: unsupported storage type " + array.getClass().getSimpleName()); + }; + } +} diff --git a/core/src/test/java/io/github/dfa1/vortex/core/array/ExtensionsTest.java b/core/src/test/java/io/github/dfa1/vortex/core/array/ExtensionsTest.java new file mode 100644 index 00000000..bc49ff2a --- /dev/null +++ b/core/src/test/java/io/github/dfa1/vortex/core/array/ExtensionsTest.java @@ -0,0 +1,87 @@ +package io.github.dfa1.vortex.core.array; + +import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.PType; +import io.github.dfa1.vortex.core.VortexException; +import org.junit.jupiter.api.Test; + +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.time.LocalDate; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +class ExtensionsTest { + + private static final DType.Primitive I32 = new DType.Primitive(PType.I32, false); + private static final DType DATE_DTYPE = new DType.Extension(Extensions.DATE, I32, null, false); + + @Test + void localDate_zeroIsUnixEpoch() { + // Given — Arrow-compatible: 0 == 1970-01-01 + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(4); + buf.set(ValueLayout.JAVA_INT_UNALIGNED, 0, 0); + IntArray sut = new IntArray(DATE_DTYPE, 1, buf); + + // When / Then + assertThat(Extensions.localDate(sut, 0)).isEqualTo(LocalDate.of(1970, 1, 1)); + } + } + + @Test + void localDate_tpchSampleValue_matchesExpected() { + // Given — anchor against a known TPC-H value: 9538 = 1996-02-12. + // Catches accidental epoch-shift regressions (e.g. days-since-2000). + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(4); + buf.set(ValueLayout.JAVA_INT_UNALIGNED, 0, 9538); + IntArray sut = new IntArray(DATE_DTYPE, 1, buf); + + // When / Then + assertThat(Extensions.localDate(sut, 0)).isEqualTo(LocalDate.of(1996, 2, 12)); + } + } + + @Test + void localDate_negativeDays_returnsPreEpoch() { + // Given — defensive: integer storage is signed, so pre-1970 dates must work + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(4); + buf.set(ValueLayout.JAVA_INT_UNALIGNED, 0, -1); + IntArray sut = new IntArray(DATE_DTYPE, 1, buf); + + // When / Then + assertThat(Extensions.localDate(sut, 0)).isEqualTo(LocalDate.of(1969, 12, 31)); + } + } + + @Test + void localDate_nonDateDtype_throws() { + // Given — guards against silent misinterpretation (e.g. plain I32 as days) + try (Arena arena = Arena.ofConfined()) { + IntArray sut = new IntArray(I32, 1, arena.allocate(4)); + + // When / Then + assertThatThrownBy(() -> Extensions.localDate(sut, 0)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("non-date"); + } + } + + @Test + void localDate_unsupportedStorage_throws() { + // Given — a date dtype on top of a varbin array makes no semantic sense + try (Arena arena = Arena.ofConfined()) { + VarBinArray badStorage = new VarBinArray(DATE_DTYPE, 1, + arena.allocate(0), arena.allocate(8), PType.I32); + + // When / Then + assertThatThrownBy(() -> Extensions.localDate(badStorage, 0)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("unsupported storage"); + } + } +} diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java index e3968c24..f62745d0 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java @@ -7,6 +7,7 @@ import io.github.dfa1.vortex.core.array.BoolArray; import io.github.dfa1.vortex.core.array.ByteArray; import io.github.dfa1.vortex.core.array.DoubleArray; +import io.github.dfa1.vortex.core.array.Extensions; import io.github.dfa1.vortex.core.array.FloatArray; import io.github.dfa1.vortex.core.array.GenericArray; import io.github.dfa1.vortex.core.array.IntArray; @@ -542,6 +543,14 @@ record Failed(String message) implements DataState { } private static String formatValue(Array array, int i) { + if (array.dtype() instanceof DType.Extension ext + && Extensions.DATE.equals(ext.extensionId())) { + try { + return Extensions.localDate(array, i).toString(); + } catch (RuntimeException e) { + // fall through to generic rendering on shape mismatch + } + } return switch (array) { case LongArray a -> Long.toString(a.getLong(i)); case IntArray a -> Integer.toString(a.getInt(i)); From b424d9da84133c53bef5b04c662eec6753f383a4 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Mon, 8 Jun 2026 20:08:13 +0200 Subject: [PATCH 17/37] feat(inspector): show dictionary entries when a vortex.dict node is selected MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The TUI's details pane now decodes child[0] (the values layout) of any selected vortex.dict node and renders the unique entries underneath "Dictionary (N entries):". Same DataState/spinner machinery as the column data preview, so the lookup runs on the IoWorker and the UI stays responsive. To make this possible without duplicating decoder plumbing, VortexHandle gets a single `registry()` accessor — same internal-escape-hatch shape as `slice()`. The actual decode is now one inline call in the TUI: FlatSegmentDecoder(handle.registry()) .decode(handle.slice(...), handle.footer().arraySpecs(), dtype, values.rowCount(), arena); The previous `decodeFlatLayout` method on VortexHandle (added in the same session) and its duplicated impls in VortexReader / VortexHttpReader are gone — that method was leaking encoding-decoder plumbing into the file-handle interface and was duplicated across both readers. Co-Authored-By: Claude Opus 4.7 --- .../vortex/inspect/VortexInspectorTui.java | 77 +++++++++++++++++++ .../github/dfa1/vortex/io/VortexHandle.java | 12 +++ .../dfa1/vortex/io/VortexHttpReader.java | 5 ++ .../github/dfa1/vortex/io/VortexReader.java | 5 ++ 4 files changed, 99 insertions(+) diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java index f62745d0..cab05de5 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java @@ -112,6 +112,7 @@ private static final class Loop { private final ConcurrentMap hexCache = new ConcurrentHashMap<>(); private final Set hexInFlight = ConcurrentHashMap.newKeySet(); private final ConcurrentMap dataCache = new ConcurrentHashMap<>(); + private final ConcurrentMap dictCache = new ConcurrentHashMap<>(); private final Map columnOf = new HashMap<>(); private volatile String lastError; private long tick; @@ -433,6 +434,22 @@ private List detailLines(InspectorTree.Node node) { lines.add(" max: " + p.stats().max()); } } + if (layout.isDict() && layout.children().size() >= 1) { + DataState dictState = loadDictPreview(node); + lines.add(""); + switch (dictState) { + case DataState.Pending ignored -> + lines.add("Dictionary: " + SPINNER[(int) (tick % SPINNER.length)] + " loading..."); + case DataState.Failed(String msg) -> + lines.add("Dictionary: ! " + msg); + case DataState.Loaded(List values) -> { + lines.add("Dictionary (" + values.size() + " entries):"); + for (int i = 0; i < values.size(); i++) { + lines.add(String.format(" [%2d] %s", i, values.get(i))); + } + } + } + } if (col != null) { DataState state = loadDataPreview(col); lines.add(""); @@ -474,6 +491,66 @@ private DataState loadDataPreview(String columnName) { return dataCache.getOrDefault(columnName, DataState.PENDING); } + private DataState loadDictPreview(InspectorTree.Node dictNode) { + DataState existing = dictCache.get(dictNode); + if (existing != null) { + return existing; + } + if (dictCache.putIfAbsent(dictNode, DataState.PENDING) != null) { + return dictCache.get(dictNode); + } + if (worker == null) { + runDictLoad(dictNode); + } else { + worker.submit(() -> runDictLoad(dictNode)); + } + return dictCache.getOrDefault(dictNode, DataState.PENDING); + } + + private void runDictLoad(InspectorTree.Node dictNode) { + try { + Layout values = dictNode.layout().children().get(0); + DType dtype = columnDtypeFor(dictNode); + if (dtype == null) { + dictCache.put(dictNode, new DataState.Loaded(List.of())); + return; + } + try (java.lang.foreign.Arena arena = java.lang.foreign.Arena.ofConfined()) { + int segIdx = values.segments().getFirst(); + SegmentSpec spec = tree.segmentSpecs().get(segIdx); + java.lang.foreign.MemorySegment seg = handle.slice(spec.offset(), spec.length()); + io.github.dfa1.vortex.core.array.Array arr = + new io.github.dfa1.vortex.encoding.FlatSegmentDecoder(handle.registry()) + .decode(seg, handle.footer().arraySpecs(), + dtype, values.rowCount(), arena); + int n = (int) Math.min(arr.length(), DATA_PREVIEW_ROWS); + List out = new ArrayList<>(n); + for (int i = 0; i < n; i++) { + out.add(formatValue(arr, i)); + } + dictCache.put(dictNode, new DataState.Loaded(List.copyOf(out))); + } + } catch (RuntimeException e) { + dictCache.put(dictNode, new DataState.Failed(messageOf(e))); + lastError = "dict: " + messageOf(e); + } + } + + private DType columnDtypeFor(InspectorTree.Node node) { + String col = columnOf.get(node); + DType root = tree.dtype(); + if (col == null) { + return root; + } + if (root instanceof DType.Struct s) { + int idx = s.fieldNames().indexOf(col); + if (idx >= 0) { + return s.fieldTypes().get(idx); + } + } + return root; + } + private void startDataLoad(String columnName) { if (dataCache.putIfAbsent(columnName, DataState.PENDING) != null) { return; diff --git a/reader/src/main/java/io/github/dfa1/vortex/io/VortexHandle.java b/reader/src/main/java/io/github/dfa1/vortex/io/VortexHandle.java index b1391a77..1118cd6b 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/io/VortexHandle.java +++ b/reader/src/main/java/io/github/dfa1/vortex/io/VortexHandle.java @@ -3,6 +3,7 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.Footer; import io.github.dfa1.vortex.core.Layout; +import io.github.dfa1.vortex.encoding.EncodingRegistry; import io.github.dfa1.vortex.scan.ScanIterator; import io.github.dfa1.vortex.scan.ScanOptions; @@ -44,6 +45,17 @@ public interface VortexHandle extends Closeable { ScanIterator scan(ScanOptions options); + /// Returns the {@link EncodingRegistry} this handle was opened with. + /// + ///

Internal escape hatch. Exposed for tooling + /// (e.g. the inspector's dictionary preview) that needs to decode an + /// internal subtree node directly via {@link io.github.dfa1.vortex.encoding.FlatSegmentDecoder}. + /// Not part of the supported stability contract; signatures may change + /// without deprecation. + /// + /// @return the registry used to resolve encoding ids during scan + EncodingRegistry registry(); + @Override void close(); } diff --git a/reader/src/main/java/io/github/dfa1/vortex/io/VortexHttpReader.java b/reader/src/main/java/io/github/dfa1/vortex/io/VortexHttpReader.java index 8fe1a037..90f40e85 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/io/VortexHttpReader.java +++ b/reader/src/main/java/io/github/dfa1/vortex/io/VortexHttpReader.java @@ -240,6 +240,11 @@ public ScanIterator scan(ScanOptions options) { return new ScanIterator(this, registry, options); } + @Override + public EncodingRegistry registry() { + return registry; + } + @Override public void close() { arena.close(); diff --git a/reader/src/main/java/io/github/dfa1/vortex/io/VortexReader.java b/reader/src/main/java/io/github/dfa1/vortex/io/VortexReader.java index cd7fec87..7c8b9ce3 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/io/VortexReader.java +++ b/reader/src/main/java/io/github/dfa1/vortex/io/VortexReader.java @@ -163,6 +163,11 @@ public ScanIterator scan(ScanOptions options) { return new ScanIterator(this, registry, options); } + @Override + public EncodingRegistry registry() { + return registry; + } + /// Aggregated per-column statistics (global min/max across all chunks). /// Returns an empty map if the root layout is not a struct. /// Columns with no embedded stats return [ArrayStats#empty()]. From 7905d5ef8ff175f9fd1f73751eba8ba2d816c13c Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Mon, 8 Jun 2026 20:11:20 +0200 Subject: [PATCH 18/37] fix(inspector): format vortex.date columns using declared dtype ExtEncoding.decode unwraps the storage child and returns it with its primitive dtype (I32 for dates), so the column Array no longer carries the Extension marker by the time it reaches the TUI. The previous guard `array.dtype() instanceof DType.Extension` therefore never matched and dates rendered as raw epoch-day integers (9577, 9606, ...) instead of 1996-03-21. The TUI now threads the column's declared dtype (looked up in the top-level struct schema) through to formatValue, so the date check runs against the schema-level type rather than the post-unwrap array type. The new helper Extensions.localDateFromStorage decodes from any signed-integer storage array without re-checking the dtype, since the caller has already established context. Co-Authored-By: Claude Opus 4.7 --- .../dfa1/vortex/core/array/Extensions.java | 15 +++++++++++++ .../vortex/inspect/VortexInspectorTui.java | 21 ++++++++++++------- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java b/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java index 5610c895..59f41c59 100644 --- a/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java +++ b/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java @@ -39,6 +39,21 @@ public static LocalDate localDate(Array array, long i) { return LocalDate.ofEpochDay(epochDay(array, i)); } + /// Decodes a {@code vortex.date} cell directly from its storage array. + /// + /// Use when the caller has already established (via column metadata) that + /// the array represents a date, but the Array itself no longer carries the + /// Extension dtype - the case after {@code vortex.ext}'s decoder unwraps + /// the storage child and returns it with its primitive dtype. + /// + /// @param storage signed-integer storage array + /// @param i row index, {@code 0 <= i < storage.length()} + /// @return decoded date + /// @throws VortexException if {@code storage} isn't an integer primitive + public static LocalDate localDateFromStorage(Array storage, long i) { + return LocalDate.ofEpochDay(epochDay(storage, i)); + } + private static long epochDay(Array array, long i) { return switch (array) { case ByteArray a -> a.getByte(i); diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java index cab05de5..d09b15b3 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java @@ -526,7 +526,7 @@ private void runDictLoad(InspectorTree.Node dictNode) { int n = (int) Math.min(arr.length(), DATA_PREVIEW_ROWS); List out = new ArrayList<>(n); for (int i = 0; i < n; i++) { - out.add(formatValue(arr, i)); + out.add(formatValue(arr, i, dtype)); } dictCache.put(dictNode, new DataState.Loaded(List.copyOf(out))); } @@ -538,12 +538,16 @@ private void runDictLoad(InspectorTree.Node dictNode) { private DType columnDtypeFor(InspectorTree.Node node) { String col = columnOf.get(node); - DType root = tree.dtype(); if (col == null) { - return root; + return tree.dtype(); } + return columnDtypeByName(col); + } + + private DType columnDtypeByName(String columnName) { + DType root = tree.dtype(); if (root instanceof DType.Struct s) { - int idx = s.fieldNames().indexOf(col); + int idx = s.fieldNames().indexOf(columnName); if (idx >= 0) { return s.fieldTypes().get(idx); } @@ -564,6 +568,7 @@ private void startDataLoad(String columnName) { private void runDataLoad(String columnName) { try { + DType declared = columnDtypeByName(columnName); ScanOptions opts = ScanOptions.columns(columnName).withLimit(DATA_PREVIEW_ROWS); try (ScanIterator it = handle.scan(opts)) { if (!it.hasNext()) { @@ -579,7 +584,7 @@ private void runDataLoad(String columnName) { int n = (int) Math.min(array.length(), DATA_PREVIEW_ROWS); List out = new ArrayList<>(n); for (int i = 0; i < n; i++) { - out.add(formatValue(array, i)); + out.add(formatValue(array, i, declared)); } dataCache.put(columnName, new DataState.Loaded(List.copyOf(out))); } @@ -619,11 +624,11 @@ record Failed(String message) implements DataState { } } - private static String formatValue(Array array, int i) { - if (array.dtype() instanceof DType.Extension ext + private static String formatValue(Array array, int i, DType declared) { + if (declared instanceof DType.Extension ext && Extensions.DATE.equals(ext.extensionId())) { try { - return Extensions.localDate(array, i).toString(); + return Extensions.localDateFromStorage(array, i).toString(); } catch (RuntimeException e) { // fall through to generic rendering on shape mismatch } From c51f4b6628479b1606b3d29d7dd0983158a96e5e Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Mon, 8 Jun 2026 20:41:55 +0200 Subject: [PATCH 19/37] feat(inspector): spell out compression label, show bits/elem in TUI segment lines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Renames the segment table's "comp=" column to "compression=" in both the text inspector and the TUI details pane so the field reads as plain English. The TUI segment lines also gain a "bits/elem=N.NN" suffix computed from the owning layout's row count and segment byte length, which makes the encoding's compression ratio obvious at a glance (e.g. bitpacked vs flat for the same column). The top-level segment table in the text inspector keeps the same columns minus bits/elem — that table is global and a single segment can be reused across layouts with different row counts, so a single bits/elem number would be misleading. Co-Authored-By: Claude Opus 4.7 --- .../io/github/dfa1/vortex/inspect/VortexInspector.java | 2 +- .../github/dfa1/vortex/inspect/VortexInspectorTui.java | 7 ++++++- .../dfa1/vortex/inspect/VortexInspectorTest.java | 10 +++++----- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspector.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspector.java index 45b12ebc..e1eee450 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspector.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspector.java @@ -61,7 +61,7 @@ private static void appendSegmentTable(StringBuilder sb, List specs sb.append(indent).append('[').append(i).append("] ") .append("off=").append(spec.offset()) .append(" len=").append(formatBytes(spec.length())) - .append(" comp=").append(spec.compression().name()) + .append(" compression=").append(spec.compression().name()) .append('\n'); } } diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java index d09b15b3..c46511d3 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java @@ -415,11 +415,16 @@ private List detailLines(InspectorTree.Node node) { } lines.add("Segments: " + layout.segments().size() + " (" + formatBytes(subtotal) + ")"); + long rows = layout.rowCount(); for (int idx : layout.segments()) { SegmentSpec spec = tree.segmentSpecs().get(idx); + String bits = rows > 0 + ? " bits/elem=" + String.format("%.2f", spec.length() * 8.0 / rows) + : ""; lines.add(" [" + idx + "] off=" + spec.offset() + " len=" + formatBytes(spec.length()) - + " comp=" + spec.compression().name()); + + " compression=" + spec.compression().name() + + bits); } } else { lines.add("Segments: 0"); diff --git a/inspector/src/test/java/io/github/dfa1/vortex/inspect/VortexInspectorTest.java b/inspector/src/test/java/io/github/dfa1/vortex/inspect/VortexInspectorTest.java index 7dd7eff4..b1926274 100644 --- a/inspector/src/test/java/io/github/dfa1/vortex/inspect/VortexInspectorTest.java +++ b/inspector/src/test/java/io/github/dfa1/vortex/inspect/VortexInspectorTest.java @@ -39,8 +39,8 @@ void render_struct_listsHeaderColumnsAndUsedEncodings() { .contains("Registered encodings: vortex.flat, fastlanes.bitpacked, vortex.constant") .contains("Used encodings: ") .contains("Segments: 2") - .contains("[0] off=0 len=512 B comp=NONE") - .contains("[1] off=512 len=512 B comp=LZ4") + .contains("[0] off=0 len=512 B compression=NONE") + .contains("[1] off=512 len=512 B compression=LZ4") .contains("Layout:") .contains("struct (1000 rows)") .contains("[fastlanes.bitpacked]"); @@ -60,9 +60,9 @@ void render_segmentTable_listsEverySegment() { // Then assertThat(out) - .contains("[0] off=0 len=1.0 KB comp=ZSTD") - .contains("[1] off=1024 len=2.0 KB comp=NONE") - .contains("[2] off=3072 len=4.0 KB comp=LZ4"); + .contains("[0] off=0 len=1.0 KB compression=ZSTD") + .contains("[1] off=1024 len=2.0 KB compression=NONE") + .contains("[2] off=3072 len=4.0 KB compression=LZ4"); } @Test From 2e4c740c9895222d151a533469a611e827818aea Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Mon, 8 Jun 2026 20:48:17 +0200 Subject: [PATCH 20/37] feat(inspector): tag per-chunk stats children in the TUI tree MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The TUI now appends ", stats" to the row count in the tree view for nodes that hold zone-map statistics rather than column data: - the second child of any vortex.stats (Zoned) node - the first child of a vortex.chunked layout whose metadata byte 0 is set to 1 (matches the ScanIterator skip rule) So instead of two indistinguishable "vortex.flat (8 rows)" siblings under a vortex.stats node, the stats one renders as "vortex.flat (8 rows, stats)" — explains the seemingly anomalous high bits/elem (it's bits per stats row, not per data value). Co-Authored-By: Claude Opus 4.7 --- .../vortex/inspect/VortexInspectorTui.java | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java index c46511d3..9ba785be 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java @@ -114,6 +114,7 @@ private static final class Loop { private final ConcurrentMap dataCache = new ConcurrentHashMap<>(); private final ConcurrentMap dictCache = new ConcurrentHashMap<>(); private final Map columnOf = new HashMap<>(); + private final Set statsChildren = new HashSet<>(); private volatile String lastError; private long tick; private int selected; @@ -126,9 +127,29 @@ private static final class Loop { this.worker = worker; this.expanded.add(tree.root()); indexColumns(tree.root()); + indexStatsChildren(tree.root()); prefetchTopColumns(); } + private void indexStatsChildren(InspectorTree.Node node) { + Layout layout = node.layout(); + if (layout.isZoned() && node.children().size() >= 2) { + // Zoned: child[0] = data, child[1] = per-chunk stats payload + statsChildren.add(node.children().get(1)); + } else if (layout.isChunked() && hasLeadingStats(layout) && !node.children().isEmpty()) { + // Chunked with metadata[0] == 1: child[0] is the stats payload + statsChildren.add(node.children().get(0)); + } + for (InspectorTree.Node child : node.children()) { + indexStatsChildren(child); + } + } + + private static boolean hasLeadingStats(Layout layout) { + java.nio.ByteBuffer meta = layout.metadata(); + return meta != null && meta.hasRemaining() && meta.get(meta.position()) == 1; + } + private void prefetchTopColumns() { if (!tree.root().layout().isStruct()) { return; @@ -377,8 +398,9 @@ private String renderItem(Item item) { String label = item.depth() == 0 && node.layout().isStruct() ? "struct" : node.fieldName().map(n -> n + ": ").orElse("") + node.layout().encodingId(); + String tag = statsChildren.contains(node) ? ", stats" : ""; return " ".repeat(item.depth() * 2) + marker + label - + " (" + node.layout().rowCount() + " rows)"; + + " (" + node.layout().rowCount() + " rows" + tag + ")"; } private void drawDivider(StringBuilder buf, int col, int top, int bottom) { From 9422ffb52cc09eac64a6de802dcc55555c24c003 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Mon, 8 Jun 2026 20:55:28 +0200 Subject: [PATCH 21/37] fix(core): derive decimal element width from buffer size, not precision MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GenericArray.getDecimal previously picked the on-disk integer width from the dtype's precision (1 / 2 / 4 / 8 / 16 bytes for precision ≤ 2 / 4 / 9 / 18 / 38). vortex.decimal is free to pick a narrower valuesType when the actual values fit, so a decimal(15,2) column whose values fit in I32 is stored at 4 bytes per element — but the precision table said 8, and the decoder happily read garbage from the half-element offset. The fix derives the width from the single buffer's byteSize divided by length, then validates the result is 1 / 2 / 4 / 8 / 16. An unalignedbuffer-size now throws VortexException rather than silently truncating. Adds an explicit bounds check on i so callers that don't respect length() fail fast with IndexOutOfBoundsException rather than reading past the buffer. Co-Authored-By: Claude Opus 4.7 --- .../dfa1/vortex/core/array/GenericArray.java | 48 ++++++++------- .../vortex/core/array/GenericArrayTest.java | 60 +++++++++++++++++++ 2 files changed, 85 insertions(+), 23 deletions(-) diff --git a/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java b/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java index 725cccb4..644d17d6 100644 --- a/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java +++ b/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java @@ -104,9 +104,11 @@ public int childCount() { /// Handles the two shapes produced by Vortex decimal decoders: /// /// - **single-buffer**: one raw buffer of little-endian two's-complement - /// integers (one element per row); element width derived from the - /// dtype's precision (1 / 2 / 4 / 8 / 16 bytes for precision ≤ 2 / 4 / - /// 9 / 18 / 38). Produced by {@code vortex.decimal}. + /// integers (one element per row). Element width is derived from the + /// buffer's byte size divided by {@link #length()}, not from the + /// dtype's precision — {@code vortex.decimal} writes whatever width + /// the encoder chose in its {@code valuesType} metadata, which can be + /// narrower than the precision alone would allow. /// - **child-array**: zero buffers, one child holding the most-significant /// integer part as a {@link LongArray}, {@link IntArray}, {@link ShortArray}, /// or {@link ByteArray}. Produced by {@code vortex.decimal_byte_parts} @@ -114,16 +116,19 @@ public int childCount() { /// /// @param i row index, {@code 0 <= i < length()} /// @return decoded value as a {@link BigDecimal} with the dtype's scale - /// @throws VortexException if the dtype isn't decimal or the array shape - /// doesn't match either supported layout + /// @throws VortexException if the dtype isn't decimal or the array + /// shape doesn't match either supported layout + /// @throws IndexOutOfBoundsException if {@code i} is outside {@code [0, length())} public BigDecimal getDecimal(long i) { + if (i < 0 || i >= length) { + throw new IndexOutOfBoundsException("index " + i + " out of bounds for length " + length); + } if (!(dtype instanceof DType.Decimal d)) { throw new VortexException("getDecimal called on non-decimal dtype: " + dtype); } BigInteger mantissa; if (buffers.length == 1 && children.length == 0) { - int width = decimalByteWidth(d.precision()); - mantissa = readSignedLe(buffers[0], i * width, width); + mantissa = readSingleBufferMantissa(buffers[0], length, i); } else if (buffers.length == 0 && children.length == 1) { mantissa = mantissaFromChild(children[0], i); } else { @@ -133,6 +138,19 @@ public BigDecimal getDecimal(long i) { return new BigDecimal(mantissa, d.scale()); } + private static BigInteger readSingleBufferMantissa(MemorySegment buf, long length, long i) { + long bufBytes = buf.byteSize(); + if (length == 0 || bufBytes % length != 0) { + throw new VortexException("getDecimal: buffer size " + bufBytes + + " is not a multiple of length " + length); + } + int width = (int) (bufBytes / length); + if (width != 1 && width != 2 && width != 4 && width != 8 && width != 16) { + throw new VortexException("getDecimal: unsupported element width " + width + " bytes"); + } + return readSignedLe(buf, i * width, width); + } + private static BigInteger mantissaFromChild(Array child, long i) { return switch (child) { case LongArray a -> BigInteger.valueOf(a.getLong(i)); @@ -146,22 +164,6 @@ private static BigInteger mantissaFromChild(Array child, long i) { }; } - private static int decimalByteWidth(int precision) { - if (precision <= 2) { - return 1; - } - if (precision <= 4) { - return 2; - } - if (precision <= 9) { - return 4; - } - if (precision <= 18) { - return 8; - } - return 16; - } - private static BigInteger readSignedLe(MemorySegment buf, long offset, int width) { // Little-endian two's-complement on disk; BigInteger expects big-endian. byte[] be = new byte[width]; diff --git a/core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java b/core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java index bf0dee68..5f3cd8b7 100644 --- a/core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java @@ -141,6 +141,66 @@ void getDecimal_childArrayShape_decodesViaMostSignificantPart() { } } + @Test + void getDecimal_widthDerivedFromBufferNotPrecision() { + // Given — decimal(15,2) is precision 15 (≤18 → "should" be I64), but + // vortex.decimal stores at whatever valuesType the encoder picked. A + // narrower width fits if all values are small. The old precision-based + // table picked 8 bytes here and read garbage. The current impl derives + // width from buffer.byteSize / length, so storing 3 I32 values at the + // same precision 15 decodes correctly. + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(12); // 3 × 4 bytes (I32 mantissa) + buf.set(ValueLayout.JAVA_INT_UNALIGNED, 0, 1234); + buf.set(ValueLayout.JAVA_INT_UNALIGNED, 4, -50); + buf.set(ValueLayout.JAVA_INT_UNALIGNED, 8, 0); + DType.Decimal dec = new DType.Decimal((byte) 15, (byte) 2, false); + GenericArray sut = new GenericArray(dec, 3, buf); + + // When / Then + assertThat(sut.getDecimal(0)).isEqualByComparingTo(new BigDecimal("12.34")); + assertThat(sut.getDecimal(1)).isEqualByComparingTo(new BigDecimal("-0.50")); + assertThat(sut.getDecimal(2)).isEqualByComparingTo(BigDecimal.ZERO); + } + } + + @Test + void getDecimal_unalignedBufferSize_throws() { + // Given — buffer size not a clean multiple of length means we can't + // derive a sensible per-element width; fail fast rather than silently + // reading garbage from a half-element offset. + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(7); // not divisible by length=2 + DType.Decimal dec = new DType.Decimal((byte) 4, (byte) 0, false); + GenericArray sut = new GenericArray(dec, 2, buf); + + // When / Then + assertThatThrownBy(() -> sut.getDecimal(0)) + .isInstanceOf(io.github.dfa1.vortex.core.VortexException.class) + .hasMessageContaining("not a multiple"); + } + } + + @Test + void getDecimal_indexOutOfBounds_throws() { + // Given — explicit bounds check guards against silent garbage reads + // when callers don't respect length() + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(8); + DType.Decimal dec = new DType.Decimal((byte) 4, (byte) 0, false); + GenericArray sut = new GenericArray(dec, 1, buf); + + // When / Then + assertThatThrownBy(() -> sut.getDecimal(-1)) + .isInstanceOf(IndexOutOfBoundsException.class); + assertThatThrownBy(() -> sut.getDecimal(1)) + .isInstanceOf(IndexOutOfBoundsException.class) + .hasMessageContaining("out of bounds"); + assertThatThrownBy(() -> sut.getDecimal(Long.MAX_VALUE)) + .isInstanceOf(IndexOutOfBoundsException.class); + } + } + @Test void getDecimal_nonDecimalDtype_throws() { // Given — guards against silently returning garbage on misuse From 97742ae6fa96d72aa89d8cabcc7780dd0b67a38f Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Mon, 8 Jun 2026 20:57:08 +0200 Subject: [PATCH 22/37] fix(core): localDateFromStorage replaced with extension-typed overload localDateFromStorage took any Array with no dtype check, so passing a plain I32 column would silently render as a date. The doc said the caller had "already established context" but the API didn't enforce it. Replaces it with a localDate(DType.Extension, Array, long) overload: the caller must supply the declared extension dtype, which Extensions then verifies against the vortex.date id before decoding. The inspector's call site already had the column's declared dtype in scope, so threading it through is one line. Co-Authored-By: Claude Opus 4.7 --- .../dfa1/vortex/core/array/Extensions.java | 20 ++++++----- .../vortex/core/array/ExtensionsTest.java | 33 +++++++++++++++++++ .../vortex/inspect/VortexInspectorTui.java | 2 +- 3 files changed, 46 insertions(+), 9 deletions(-) diff --git a/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java b/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java index 59f41c59..eea0910e 100644 --- a/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java +++ b/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java @@ -39,18 +39,22 @@ public static LocalDate localDate(Array array, long i) { return LocalDate.ofEpochDay(epochDay(array, i)); } - /// Decodes a {@code vortex.date} cell directly from its storage array. - /// - /// Use when the caller has already established (via column metadata) that - /// the array represents a date, but the Array itself no longer carries the - /// Extension dtype - the case after {@code vortex.ext}'s decoder unwraps - /// the storage child and returns it with its primitive dtype. + /// Decodes a {@code vortex.date} cell when the storage array no longer + /// carries the Extension dtype — the case after {@code vortex.ext}'s + /// decoder unwraps the storage child and returns it with its primitive + /// dtype. Caller must supply the original {@link DType.Extension} so the + /// extension id is still verified. /// + /// @param ext the column's declared extension dtype; must be {@code vortex.date} /// @param storage signed-integer storage array /// @param i row index, {@code 0 <= i < storage.length()} /// @return decoded date - /// @throws VortexException if {@code storage} isn't an integer primitive - public static LocalDate localDateFromStorage(Array storage, long i) { + /// @throws VortexException if {@code ext} isn't {@code vortex.date} or + /// {@code storage} isn't an integer primitive + public static LocalDate localDate(DType.Extension ext, Array storage, long i) { + if (!DATE.equals(ext.extensionId())) { + throw new VortexException("localDate called with non-date extension: " + ext.extensionId()); + } return LocalDate.ofEpochDay(epochDay(storage, i)); } diff --git a/core/src/test/java/io/github/dfa1/vortex/core/array/ExtensionsTest.java b/core/src/test/java/io/github/dfa1/vortex/core/array/ExtensionsTest.java index bc49ff2a..423c7bf8 100644 --- a/core/src/test/java/io/github/dfa1/vortex/core/array/ExtensionsTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/core/array/ExtensionsTest.java @@ -71,6 +71,39 @@ void localDate_nonDateDtype_throws() { } } + @Test + void localDate_withExplicitExtAndStorage_decodes() { + // Given — ExtEncoding.decode strips the extension wrapper before the + // TUI gets the array, so the caller threads the declared dtype back + // in. This overload must still verify the extension id rather than + // trust any caller-supplied storage as a date. + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(4); + buf.set(ValueLayout.JAVA_INT_UNALIGNED, 0, 9538); + IntArray storage = new IntArray(I32, 1, buf); + DType.Extension ext = new DType.Extension(Extensions.DATE, I32, null, false); + + // When / Then + assertThat(Extensions.localDate(ext, storage, 0)) + .isEqualTo(LocalDate.of(1996, 2, 12)); + } + } + + @Test + void localDate_withWrongExtensionId_throws() { + // Given — passing some other extension's storage array must not be + // silently interpreted as a date + try (Arena arena = Arena.ofConfined()) { + IntArray storage = new IntArray(I32, 1, arena.allocate(4)); + DType.Extension notDate = new DType.Extension("vortex.something", I32, null, false); + + // When / Then + assertThatThrownBy(() -> Extensions.localDate(notDate, storage, 0)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("non-date extension"); + } + } + @Test void localDate_unsupportedStorage_throws() { // Given — a date dtype on top of a varbin array makes no semantic sense diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java index 9ba785be..96758c8f 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java @@ -655,7 +655,7 @@ private static String formatValue(Array array, int i, DType declared) { if (declared instanceof DType.Extension ext && Extensions.DATE.equals(ext.extensionId())) { try { - return Extensions.localDateFromStorage(array, i).toString(); + return Extensions.localDate(ext, array, i).toString(); } catch (RuntimeException e) { // fall through to generic rendering on shape mismatch } From 9f2b036aa6e403e165d16c470a9f82771e89b190 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Mon, 8 Jun 2026 20:59:21 +0200 Subject: [PATCH 23/37] fix(inspector): read Layout.metadata bytes on IoWorker thread Loop.indexStatsChildren reads the first byte of vortex.chunked layout metadata to decide whether child[0] is the per-chunk stats payload. That ByteBuffer wraps a confined-Arena segment owned by the IoWorker thread (the only thread that ever calls VortexReader.open under the TUI), so doing the read on the main render thread tripped WrongThreadException on local files: error: WrongThreadException: Attempted access outside owning thread The fix dispatches indexStatsChildren via worker.runAndAwait so the metadata read happens on the owning thread. The set is populated before runAndAwait returns and only read afterwards, so the synchronized signal in runAndAwait gives us the happens-before we need for the subsequent unsynchronized HashSet reads from the render loop. Co-Authored-By: Claude Opus 4.7 --- .../dfa1/vortex/inspect/VortexInspectorTui.java | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java index 96758c8f..49879432 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java @@ -127,10 +127,22 @@ private static final class Loop { this.worker = worker; this.expanded.add(tree.root()); indexColumns(tree.root()); - indexStatsChildren(tree.root()); + indexStatsChildrenOnWorker(tree.root()); prefetchTopColumns(); } + private void indexStatsChildrenOnWorker(InspectorTree.Node root) { + if (worker == null) { + indexStatsChildren(root); + return; + } + try { + worker.runAndAwait(() -> indexStatsChildren(root)); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + private void indexStatsChildren(InspectorTree.Node node) { Layout layout = node.layout(); if (layout.isZoned() && node.children().size() >= 2) { From 52e3078c0031bb6d295933b8ac8ee0b4c7732274 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Mon, 8 Jun 2026 23:14:20 +0200 Subject: [PATCH 24/37] debug(cli): unconditionally print stack trace from tui error path Temporary measure to localise the WrongThreadException that still trips on local-file TUI startup after the indexStatsChildren fix. Will revert once the root cause is found. Co-Authored-By: Claude Opus 4.7 --- cli/src/main/java/io/github/dfa1/vortex/cli/TuiCommand.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cli/src/main/java/io/github/dfa1/vortex/cli/TuiCommand.java b/cli/src/main/java/io/github/dfa1/vortex/cli/TuiCommand.java index 602ae982..7bfdd913 100644 --- a/cli/src/main/java/io/github/dfa1/vortex/cli/TuiCommand.java +++ b/cli/src/main/java/io/github/dfa1/vortex/cli/TuiCommand.java @@ -41,9 +41,7 @@ static int run(String[] args) { Thread.currentThread().interrupt(); } System.err.println("error: " + describe(e)); - if (System.getenv("VORTEX_DEBUG") != null) { - e.printStackTrace(System.err); - } + e.printStackTrace(System.err); return ExitStatus.ERROR; } } From d2f75dab9e816faa380e8af104a3986050c55108 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Mon, 8 Jun 2026 23:17:04 +0200 Subject: [PATCH 25/37] fix(inspector): InspectorTree.Node uses identity equality MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Layout is a record whose components include a ByteBuffer metadata field. Record-generated equals/hashCode delegate to ByteBuffer's which reads the underlying bytes — when those bytes wrap a confined Arena owned by another thread (the IoWorker, in the TUI), the read throws WrongThreadException. The TUI dropped into that path on every local-file open: HashSet.add on the root node → Node.hashCode → Layout.hashCode → ByteBuffer.hashCode → arena byte read on the render thread. Nodes are constructed exactly once per shallow build and used as container keys by reference everywhere in the inspector, so identity semantics are the correct contract anyway. Overriding equals / hashCode on the record sidesteps Layout's metadata entirely and fixes every container — expanded set, peek/hex/dict caches, columnOf map, statsChildren set — in one shot. Also reverts the unconditional stack trace from the previous debug commit; VORTEX_DEBUG=1 gates it again. Co-Authored-By: Claude Opus 4.7 --- .../io/github/dfa1/vortex/cli/TuiCommand.java | 4 +++- .../dfa1/vortex/inspect/InspectorTree.java | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/cli/src/main/java/io/github/dfa1/vortex/cli/TuiCommand.java b/cli/src/main/java/io/github/dfa1/vortex/cli/TuiCommand.java index 7bfdd913..602ae982 100644 --- a/cli/src/main/java/io/github/dfa1/vortex/cli/TuiCommand.java +++ b/cli/src/main/java/io/github/dfa1/vortex/cli/TuiCommand.java @@ -41,7 +41,9 @@ static int run(String[] args) { Thread.currentThread().interrupt(); } System.err.println("error: " + describe(e)); - e.printStackTrace(System.err); + if (System.getenv("VORTEX_DEBUG") != null) { + e.printStackTrace(System.err); + } return ExitStatus.ERROR; } } diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java index ca4fae4d..972c26f5 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java @@ -61,6 +61,14 @@ public long totalSegmentBytes() { /// One layout node in the inspector tree. /// + ///

Uses identity equality and hashing — two Nodes are equal only if they + /// are the same reference. Avoids touching {@link Layout#metadata()} (a + /// {@link java.nio.ByteBuffer} that may wrap an Arena-confined segment, + /// crashing with {@link WrongThreadException} when hashed from any thread + /// other than the handle's owner). Inspector containers — expanded sets, + /// peek/hex/dict caches — key by Node, so identity semantics keep them + /// thread-safe across the IoWorker / render-thread split. + /// /// @param layout underlying [Layout] from the file footer /// @param fieldName column name when this node is a direct child of a top-level struct /// @param usedEncodings encoding IDs referenced by this subtree @@ -72,6 +80,16 @@ public record Node( Set usedEncodings, ArrayStats stats, List children) { + + @Override + public boolean equals(Object o) { + return this == o; + } + + @Override + public int hashCode() { + return System.identityHashCode(this); + } } /// Builds an inspector tree from an open Vortex file handle. From 5990ec8fb2a2a2a07f2c681f2a76144360ffa45f Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Mon, 8 Jun 2026 23:22:05 +0200 Subject: [PATCH 26/37] refactor(inspector): swap identity-Node override for IdentityHashMap containers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reverts the equals/hashCode override on InspectorTree.Node — a record's value semantics are part of its contract, and overriding them is surprising to readers and tools. Moves the identity semantics to the call sites instead: every Node-keyed container in Loop now backs onto IdentityHashMap, wrapped in synchronizedMap / synchronizedSet for the caches that the IoWorker writes to. Plain IdentityHashMap is fine for the constructor-populated containers (columnOf, statsChildren, expanded) since they're only accessed on the render thread once IoWorker init returns. dataCache keeps ConcurrentHashMap (String keys, no Node hashing). Functionally equivalent to the previous fix — the WrongThreadException that surfaced through Layout's record-auto hashCode never fires because IdentityHashMap and HashSet-of-IdentityHashMap both compare references via System.identityHashCode. Co-Authored-By: Claude Opus 4.7 --- .../dfa1/vortex/inspect/InspectorTree.java | 18 ---------- .../vortex/inspect/VortexInspectorTui.java | 34 +++++++++++++------ 2 files changed, 24 insertions(+), 28 deletions(-) diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java index 972c26f5..ca4fae4d 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/InspectorTree.java @@ -61,14 +61,6 @@ public long totalSegmentBytes() { /// One layout node in the inspector tree. /// - ///

Uses identity equality and hashing — two Nodes are equal only if they - /// are the same reference. Avoids touching {@link Layout#metadata()} (a - /// {@link java.nio.ByteBuffer} that may wrap an Arena-confined segment, - /// crashing with {@link WrongThreadException} when hashed from any thread - /// other than the handle's owner). Inspector containers — expanded sets, - /// peek/hex/dict caches — key by Node, so identity semantics keep them - /// thread-safe across the IoWorker / render-thread split. - /// /// @param layout underlying [Layout] from the file footer /// @param fieldName column name when this node is a direct child of a top-level struct /// @param usedEncodings encoding IDs referenced by this subtree @@ -80,16 +72,6 @@ public record Node( Set usedEncodings, ArrayStats stats, List children) { - - @Override - public boolean equals(Object o) { - return this == o; - } - - @Override - public int hashCode() { - return System.identityHashCode(this); - } } /// Builds an inspector tree from an open Vortex file handle. diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java index 49879432..38dadc40 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java @@ -25,8 +25,8 @@ import java.io.IOException; import java.lang.foreign.MemorySegment; import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; +import java.util.Collections; +import java.util.IdentityHashMap; import java.util.List; import java.util.Map; import java.util.Optional; @@ -106,15 +106,29 @@ private static final class Loop { private final InspectorTree tree; private final VortexHandle handle; private final IoWorker worker; - private final Set expanded = new HashSet<>(); - private final ConcurrentMap peekCache = new ConcurrentHashMap<>(); - private final Set peekInFlight = ConcurrentHashMap.newKeySet(); - private final ConcurrentMap hexCache = new ConcurrentHashMap<>(); - private final Set hexInFlight = ConcurrentHashMap.newKeySet(); + // Identity-keyed containers throughout: InspectorTree.Node wraps a + // Layout record whose ByteBuffer metadata field crashes with + // WrongThreadException when its hashCode reads arena-confined bytes + // from any thread other than the handle's owner. Identity hashing + // sidesteps that entirely and matches the natural semantics — Nodes + // are constructed exactly once per shallow build and uniquely + // identify a position in the tree. + private final Set expanded = + Collections.newSetFromMap(new IdentityHashMap<>()); + private final Map peekCache = + Collections.synchronizedMap(new IdentityHashMap<>()); + private final Set peekInFlight = + Collections.synchronizedSet(Collections.newSetFromMap(new IdentityHashMap<>())); + private final Map hexCache = + Collections.synchronizedMap(new IdentityHashMap<>()); + private final Set hexInFlight = + Collections.synchronizedSet(Collections.newSetFromMap(new IdentityHashMap<>())); private final ConcurrentMap dataCache = new ConcurrentHashMap<>(); - private final ConcurrentMap dictCache = new ConcurrentHashMap<>(); - private final Map columnOf = new HashMap<>(); - private final Set statsChildren = new HashSet<>(); + private final Map dictCache = + Collections.synchronizedMap(new IdentityHashMap<>()); + private final Map columnOf = new IdentityHashMap<>(); + private final Set statsChildren = + Collections.newSetFromMap(new IdentityHashMap<>()); private volatile String lastError; private long tick; private int selected; From 91b243f484bea8881d52ee17abb581e9c0a4ffb7 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Mon, 8 Jun 2026 23:36:21 +0200 Subject: [PATCH 27/37] refactor(inspector): rename RawTerminal to Terminal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "Raw" prefix was redundant — every terminal abstraction in the inspect.term package is the raw / low-level one (there's nothing "non-raw" to disambiguate against). Plain Terminal reads more naturally at every call site. Co-Authored-By: Claude Opus 4.7 --- .../github/dfa1/vortex/inspect/VortexInspectorTui.java | 10 +++++----- .../github/dfa1/vortex/inspect/term/PosixTerminal.java | 2 +- .../inspect/term/{RawTerminal.java => Terminal.java} | 6 +++--- .../dfa1/vortex/inspect/term/WindowsTerminal.java | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) rename inspector/src/main/java/io/github/dfa1/vortex/inspect/term/{RawTerminal.java => Terminal.java} (93%) diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java index 38dadc40..163951af 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java @@ -16,7 +16,7 @@ import io.github.dfa1.vortex.core.array.VarBinArray; import io.github.dfa1.vortex.inspect.term.Ansi; import io.github.dfa1.vortex.inspect.term.Key; -import io.github.dfa1.vortex.inspect.term.RawTerminal; +import io.github.dfa1.vortex.inspect.term.Terminal; import io.github.dfa1.vortex.io.VortexHandle; import io.github.dfa1.vortex.scan.Chunk; import io.github.dfa1.vortex.scan.ScanIterator; @@ -83,7 +83,7 @@ public static void show(VortexHandle handle, InspectorTree.Progress progress) th public static void show(VortexHandle handle, IoWorker worker, InspectorTree.Progress progress) throws IOException { InspectorTree tree = InspectorTree.buildShallow(handle); - try (RawTerminal term = RawTerminal.open()) { + try (Terminal term = Terminal.open()) { new Loop(term, tree, handle, worker).run(); } } @@ -102,7 +102,7 @@ private static final class Loop { /// ASCII spinner frames; cycled by render tick. private static final char[] SPINNER = {'|', '/', '-', '\\'}; - private final RawTerminal term; + private final Terminal term; private final InspectorTree tree; private final VortexHandle handle; private final IoWorker worker; @@ -134,7 +134,7 @@ private static final class Loop { private int selected; private int scrollOffset; - Loop(RawTerminal term, InspectorTree tree, VortexHandle handle, IoWorker worker) { + Loop(Terminal term, InspectorTree tree, VortexHandle handle, IoWorker worker) { this.term = term; this.tree = tree; this.handle = handle; @@ -320,7 +320,7 @@ private void walk(InspectorTree.Node node, int depth, List out) { } private void render(List items) throws IOException { - RawTerminal.Size size = term.size(); + Terminal.Size size = term.size(); int width = size.cols(); int height = size.rows(); int leftWidth = Math.max(20, width / 2); diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/PosixTerminal.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/PosixTerminal.java index 13bd5217..8ab92e6f 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/PosixTerminal.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/PosixTerminal.java @@ -20,7 +20,7 @@ /// On entry: saves the original {@code termios}, switches to alt screen, hides /// the cursor. On [#close()]: restores everything, even on exceptions, via a /// shutdown hook that fires if the caller skips try-with-resources. -public final class PosixTerminal implements RawTerminal { +public final class PosixTerminal implements Terminal { private static final int STDIN_FD = 0; private static final int STDOUT_FD = 1; diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/RawTerminal.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/Terminal.java similarity index 93% rename from inspector/src/main/java/io/github/dfa1/vortex/inspect/term/RawTerminal.java rename to inspector/src/main/java/io/github/dfa1/vortex/inspect/term/Terminal.java index 9e15c602..425af92c 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/RawTerminal.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/Terminal.java @@ -11,13 +11,13 @@ /// /// Usage: /// ``` -/// try (RawTerminal term = RawTerminal.open()) { +/// try (Terminal term = Terminal.open()) { /// term.write(Ansi.CLEAR_SCREEN); /// Key k = term.readKey(); /// ... /// } /// ``` -public sealed interface RawTerminal extends AutoCloseable +public sealed interface Terminal extends AutoCloseable permits PosixTerminal, WindowsTerminal { /// Opens the platform-appropriate raw-mode terminal. @@ -27,7 +27,7 @@ public sealed interface RawTerminal extends AutoCloseable /// /// @return an open raw terminal handle /// @throws IOException if the OS-level setup fails - static RawTerminal open() throws IOException { + static Terminal open() throws IOException { String os = System.getProperty("os.name", "").toLowerCase(); if (os.contains("win")) { return WindowsTerminal.open(); diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/WindowsTerminal.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/WindowsTerminal.java index ca2850db..5a0c6630 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/WindowsTerminal.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/term/WindowsTerminal.java @@ -20,7 +20,7 @@ /// /// {@code GetConsoleScreenBufferInfo} drives [#size()]; we report the visible /// window rect, not the scrollback buffer. -public final class WindowsTerminal implements RawTerminal { +public final class WindowsTerminal implements Terminal { private static final long STD_INPUT_HANDLE = -10L; private static final long STD_OUTPUT_HANDLE = -11L; From 90dddfe69f23330ca2508bbc6a52e939e173c875 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Mon, 8 Jun 2026 23:38:42 +0200 Subject: [PATCH 28/37] build(cli): set Enable-Native-Access manifest attribute on the uber-jar PosixTerminal and WindowsTerminal call FFM downcalls (tcgetattr / cfmakeraw / ioctl / SetConsoleMode), which JEP 472 flags as "restricted methods" in JDK 25. Without an explicit opt-in, the JVM prints a four-line "WARNING: restricted method has been called" block on each first invocation and threatens to block such calls entirely in a future release. The standard fix for an uber-jar is the Enable-Native-Access manifest attribute (also from JEP 472): the entry-point module gets native access on launch without the user passing the corresponding command flag. Only the cli jar gets the entry; vortex-core / vortex-reader consumers still have to enable native access in their own deployments if they touch FFM directly. Co-Authored-By: Claude Opus 4.7 --- cli/pom.xml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/cli/pom.xml b/cli/pom.xml index d5d4eccd..f02df04c 100644 --- a/cli/pom.xml +++ b/cli/pom.xml @@ -75,6 +75,23 @@ io.github.dfa1.vortex.cli.VortexCli + + + ALL-UNNAMED + From 126733db1d0cae0838c3e6c8a525e4c627fe4685 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Tue, 9 Jun 2026 07:10:17 +0200 Subject: [PATCH 29/37] fix(core): reject null cells in GenericArray.getDecimal mantissa path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When vortex.decimal_byte_parts produces a GenericArray whose single child is a MaskedArray (nullable decimal columns), mantissaFromChild used to unwrap the MaskedArray and read straight from a.inner() at index i — silently returning whatever integer happened to occupy the slot for null cells. Now consults a.isValid(i) first and throws a VortexException with a "null cell at index N" message if the bit is clear. The TUI's tryDecimal recognises that message and renders the cell as "null" instead of falling back to the generic "" placeholder. Co-Authored-By: Claude Opus 4.7 --- .../dfa1/vortex/core/array/GenericArray.java | 7 ++++- .../vortex/core/array/GenericArrayTest.java | 29 +++++++++++++++++++ .../vortex/inspect/VortexInspectorTui.java | 4 +++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java b/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java index 644d17d6..59ab7515 100644 --- a/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java +++ b/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java @@ -157,7 +157,12 @@ private static BigInteger mantissaFromChild(Array child, long i) { case IntArray a -> BigInteger.valueOf(a.getInt(i)); case ShortArray a -> BigInteger.valueOf(a.getShort(i)); case ByteArray a -> BigInteger.valueOf(a.getByte(i)); - case MaskedArray a -> mantissaFromChild(a.inner(), i); + case MaskedArray a -> { + if (!a.isValid(i)) { + throw new VortexException("getDecimal: null cell at index " + i); + } + yield mantissaFromChild(a.inner(), i); + } default -> throw new VortexException("getDecimal: unsupported mantissa child type " + child.getClass().getSimpleName()); diff --git a/core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java b/core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java index 5f3cd8b7..c5b6a42d 100644 --- a/core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java @@ -201,6 +201,35 @@ void getDecimal_indexOutOfBounds_throws() { } } + @Test + void getDecimal_nullCellInMaskedChild_throws() { + // Given — mantissa-child shape with a MaskedArray wrapping a LongArray; + // the validity bitmap says index 1 is null. Without the validity check + // the previous code would happily decode whatever bytes sat at that + // slot and return a garbage BigDecimal. + try (Arena arena = Arena.ofConfined()) { + MemorySegment mspBuf = arena.allocate(16); + mspBuf.set(ValueLayout.JAVA_LONG_UNALIGNED, 0, 1234L); + mspBuf.set(ValueLayout.JAVA_LONG_UNALIGNED, 8, 9999L); + LongArray msp = new LongArray(new DType.Primitive(PType.I64, false), 2, mspBuf); + + MemorySegment validityBuf = arena.allocate(1); + // bit 0 set = index 0 valid; bit 1 clear = index 1 null + validityBuf.set(ValueLayout.JAVA_BYTE, 0, (byte) 0b0000_0001); + BoolArray validity = new BoolArray(new DType.Bool(false), 2, validityBuf); + + MaskedArray masked = new MaskedArray(msp, validity); + DType.Decimal dec = new DType.Decimal((byte) 15, (byte) 2, true); + GenericArray sut = new GenericArray(dec, 2, new MemorySegment[0], new Array[]{masked}); + + // When / Then + assertThat(sut.getDecimal(0)).isEqualByComparingTo(new BigDecimal("12.34")); + assertThatThrownBy(() -> sut.getDecimal(1)) + .isInstanceOf(io.github.dfa1.vortex.core.VortexException.class) + .hasMessageContaining("null cell at index 1"); + } + } + @Test void getDecimal_nonDecimalDtype_throws() { // Given — guards against silently returning garbage on misuse diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java index 163951af..25cea4d9 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java @@ -707,6 +707,10 @@ private static String tryDecimal(GenericArray a, int i) { try { return a.getDecimal(i).toPlainString(); } catch (RuntimeException e) { + String msg = e.getMessage(); + if (msg != null && msg.contains("null cell")) { + return "null"; + } return "<" + a.getClass().getSimpleName() + " " + a.dtype() + ">"; } } From c74c7ead36ca616171eaad6a4cbc848c3237ca56 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Tue, 9 Jun 2026 07:10:51 +0200 Subject: [PATCH 30/37] test(core): cover GenericArray.getDecimal i128 (precision > 18) path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GenericArray.getDecimal silently supported 16-byte mantissas for decimal(>18, _) columns but had no test exercising it; the precision-table tests stopped at decimal(15,2) / I64. The new test round-trips ±2^70 through a single-buffer decimal(38,4) so the ValueLayout.JAVA_BYTE loop in readSignedLe gets actually walked end to end and the little-endian -> big-endian flip is verified at i128 width. Co-Authored-By: Claude Opus 4.7 --- .../vortex/core/array/GenericArrayTest.java | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java b/core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java index c5b6a42d..f3f56a0a 100644 --- a/core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/core/array/GenericArrayTest.java @@ -141,6 +141,45 @@ void getDecimal_childArrayShape_decodesViaMostSignificantPart() { } } + @Test + void getDecimal_i128Buffer_decodesWideMantissa() { + // Given — decimal(38,4) stores mantissas wider than i64; vortex.decimal + // writes 16-byte little-endian two's-complement. Two values: 2^70 (way + // above I64.MAX) and -2^70 anchor the high-precision path the + // narrower-width tests never exercise. + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(32); + java.math.BigInteger pos = java.math.BigInteger.TWO.pow(70); + java.math.BigInteger neg = pos.negate(); + writeI128Le(buf, 0, pos); + writeI128Le(buf, 16, neg); + DType.Decimal dec = new DType.Decimal((byte) 38, (byte) 4, false); + GenericArray sut = new GenericArray(dec, 2, buf); + + // When / Then + assertThat(sut.getDecimal(0)).isEqualByComparingTo(new BigDecimal(pos, 4)); + assertThat(sut.getDecimal(1)).isEqualByComparingTo(new BigDecimal(neg, 4)); + } + } + + private static void writeI128Le(MemorySegment buf, long offset, java.math.BigInteger value) { + // BigInteger.toByteArray() returns minimum-length big-endian two's-complement. + // Pad / sign-extend to 16 bytes, then reverse into the little-endian wire slot. + byte[] be = value.toByteArray(); + byte[] le16 = new byte[16]; + // sign-extend pad in big-endian form + byte sign = (byte) (value.signum() < 0 ? 0xFF : 0x00); + for (int i = 0; i < 16; i++) { + le16[15 - i] = sign; + } + for (int i = 0; i < be.length && i < 16; i++) { + le16[i] = be[be.length - 1 - i]; + } + for (int i = 0; i < 16; i++) { + buf.set(ValueLayout.JAVA_BYTE, offset + i, le16[i]); + } + } + @Test void getDecimal_widthDerivedFromBufferNotPrecision() { // Given — decimal(15,2) is precision 15 (≤18 → "should" be I64), but From dffbb5133820d83ebe3ab0471d500d728e855bae Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Tue, 9 Jun 2026 07:11:45 +0200 Subject: [PATCH 31/37] fix(core): bounds-check Extensions.localDate; trim doc to date-only Both localDate overloads now reject indices outside [0, length()) up front instead of leaking through to storage.getInt and silently reading whatever the typed-array accessor produces past the end (or worse: garbage from a half-element offset when the storage layer doesn't bounds-check). The class-level doc previously listed vortex.timestamp / vortex.time as covered types, but only vortex.date was actually implemented. Trims the doc to match reality and notes timestamp / time as TODO gated on a public ScalarUnit type. Co-Authored-By: Claude Opus 4.7 --- .../dfa1/vortex/core/array/Extensions.java | 15 +++++++++++--- .../vortex/core/array/ExtensionsTest.java | 20 +++++++++++++++++++ 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java b/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java index eea0910e..bb718e1a 100644 --- a/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java +++ b/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java @@ -5,9 +5,10 @@ import java.time.LocalDate; -/// Decoding helpers for Vortex extension dtypes (e.g. {@code vortex.date}, -/// {@code vortex.timestamp}) that ship as primitive storage arrays plus an -/// extension id on the {@link DType}. +/// Decoding helpers for Vortex extension dtypes that ship as a primitive +/// storage array plus an extension id on the {@link DType}. Currently covers +/// {@code vortex.date}; {@code vortex.time} / {@code vortex.timestamp} live +/// on the TODO list until a public ScalarUnit type is available. /// /// Lives in core so any reader-jar consumer can decode these cells without /// reimplementing the storage conventions. @@ -36,6 +37,7 @@ public static LocalDate localDate(Array array, long i) { if (!(array.dtype() instanceof DType.Extension ext) || !DATE.equals(ext.extensionId())) { throw new VortexException("localDate called on non-date dtype: " + array.dtype()); } + checkBounds(i, array.length()); return LocalDate.ofEpochDay(epochDay(array, i)); } @@ -55,9 +57,16 @@ public static LocalDate localDate(DType.Extension ext, Array storage, long i) { if (!DATE.equals(ext.extensionId())) { throw new VortexException("localDate called with non-date extension: " + ext.extensionId()); } + checkBounds(i, storage.length()); return LocalDate.ofEpochDay(epochDay(storage, i)); } + private static void checkBounds(long i, long length) { + if (i < 0 || i >= length) { + throw new IndexOutOfBoundsException("index " + i + " out of bounds for length " + length); + } + } + private static long epochDay(Array array, long i) { return switch (array) { case ByteArray a -> a.getByte(i); diff --git a/core/src/test/java/io/github/dfa1/vortex/core/array/ExtensionsTest.java b/core/src/test/java/io/github/dfa1/vortex/core/array/ExtensionsTest.java index 423c7bf8..d4bc61a1 100644 --- a/core/src/test/java/io/github/dfa1/vortex/core/array/ExtensionsTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/core/array/ExtensionsTest.java @@ -104,6 +104,26 @@ void localDate_withWrongExtensionId_throws() { } } + @Test + void localDate_indexOutOfBounds_throws() { + // Given — both overloads must reject indices past the array length + // rather than silently reading whatever the storage decoder returns + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(4); + IntArray storage = new IntArray(I32, 1, buf); + DType.Extension ext = new DType.Extension(Extensions.DATE, I32, null, false); + IntArray dated = new IntArray(DATE_DTYPE, 1, buf); + + // When / Then + assertThatThrownBy(() -> Extensions.localDate(dated, 1)) + .isInstanceOf(IndexOutOfBoundsException.class); + assertThatThrownBy(() -> Extensions.localDate(dated, -1)) + .isInstanceOf(IndexOutOfBoundsException.class); + assertThatThrownBy(() -> Extensions.localDate(ext, storage, 1)) + .isInstanceOf(IndexOutOfBoundsException.class); + } + } + @Test void localDate_unsupportedStorage_throws() { // Given — a date dtype on top of a varbin array makes no semantic sense From d80fbc49ebeee3481174c87b4077423f49c6f054 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Tue, 9 Jun 2026 07:12:45 +0200 Subject: [PATCH 32/37] perf(core): GenericArray.getDecimal width 1/2/4/8 reads stay allocation-free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit readSignedLe used to allocate a byte[width] per call and copy bytes one at a time before handing it to BigInteger. Fine for the 32-row TUI preview, painful for any caller decoding a full decimal column. Widths 1 / 2 / 4 / 8 now use the corresponding native ValueLayout (JAVA_BYTE / SHORT_LE / INT_LE / LONG_LE) and feed BigInteger.valueOf, which boxes via the small-integer cache when the value fits. Width 16 keeps the heap-byte-array path under a separate helper — there is no 128-bit ValueLayout and the i128 case only fires for decimal(>18, _) columns, which are rare. LE layouts are constructed explicitly via withOrder(LITTLE_ENDIAN) rather than relying on native byte order, so the code stays correct on big-endian hosts. Co-Authored-By: Claude Opus 4.7 --- .../dfa1/vortex/core/array/GenericArray.java | 30 ++++++++++++++++--- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java b/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java index 59ab7515..9147f8be 100644 --- a/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java +++ b/core/src/main/java/io/github/dfa1/vortex/core/array/GenericArray.java @@ -7,6 +7,7 @@ import java.lang.foreign.ValueLayout; import java.math.BigDecimal; import java.math.BigInteger; +import java.nio.ByteOrder; /// Fallback [Array] for dtypes that lack a dedicated concrete subtype. /// @@ -169,11 +170,32 @@ private static BigInteger mantissaFromChild(Array child, long i) { }; } + private static final ValueLayout.OfShort SHORT_LE = + ValueLayout.JAVA_SHORT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN); + private static final ValueLayout.OfInt INT_LE = + ValueLayout.JAVA_INT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN); + private static final ValueLayout.OfLong LONG_LE = + ValueLayout.JAVA_LONG_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN); + private static BigInteger readSignedLe(MemorySegment buf, long offset, int width) { - // Little-endian two's-complement on disk; BigInteger expects big-endian. - byte[] be = new byte[width]; - for (int k = 0; k < width; k++) { - be[width - 1 - k] = buf.get(ValueLayout.JAVA_BYTE, offset + k); + return switch (width) { + case 1 -> BigInteger.valueOf(buf.get(ValueLayout.JAVA_BYTE, offset)); + case 2 -> BigInteger.valueOf(buf.get(SHORT_LE, offset)); + case 4 -> BigInteger.valueOf(buf.get(INT_LE, offset)); + case 8 -> BigInteger.valueOf(buf.get(LONG_LE, offset)); + case 16 -> readSigned128Le(buf, offset); + default -> throw new VortexException("readSignedLe: unsupported width " + width); + }; + } + + private static BigInteger readSigned128Le(MemorySegment buf, long offset) { + // Two's-complement i128 on disk in little-endian; BigInteger ingests big-endian. + // No SIMD intrinsic for 16-byte signed integer, so we materialise into a heap + // buffer here. Only fires for decimal(>18, _) — narrow-precision fast paths above + // stay allocation-free. + byte[] be = new byte[16]; + for (int k = 0; k < 16; k++) { + be[15 - k] = buf.get(ValueLayout.JAVA_BYTE, offset + k); } return new BigInteger(be); } From 7fb14e652ef4a23e284c9dfe452598e5f8e3b37d Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Tue, 9 Jun 2026 07:21:40 +0200 Subject: [PATCH 33/37] docs(compatibility): list Vortex extension types + Java coverage status Adds an "Extension types" section to docs/compatibility.md mirroring the encodings table style. Covers the four extensions the Rust reference defines under vortex-array/src/extension: date, time, timestamp, uuid. Notes the canonical id, storage shape, metadata layout, the matching Java decoder (Extensions.localDate is the only one wired up), and whether it's supported. Includes the TimeUnit metadata-byte enum table referenced from extension/datetime/unit.rs so the precision-byte values aren't a magic constant for future implementers. Co-Authored-By: Claude Opus 4.7 --- docs/compatibility.md | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/docs/compatibility.md b/docs/compatibility.md index 683879b4..ba72079c 100644 --- a/docs/compatibility.md +++ b/docs/compatibility.md @@ -56,6 +56,36 @@ try (VortexReader vf = VortexReader.open(path, registry)) { } ``` +## Extension types + +Extension dtypes wrap a primitive storage array with a logical-id tag plus optional +metadata. The Rust catalogue lives in +[`vortex-array/src/extension/`](https://github.com/vortex-data/vortex/tree/develop/vortex-array/src/extension); +each subdir below names a canonical extension id and its on-disk shape. + +| Extension id | Storage | Metadata | Java decoder | Status | +|---------------------|-------------------------------------------------|---------------------------------------------|-----------------------------------------------|--------| +| `vortex.date` | I32 (days) or I64 (ms) since Unix epoch | 1 byte: `TimeUnit` (2 = ms, 4 = days) | `Extensions.localDate(Array, long)` | ✅ | +| `vortex.time` | I32 (s/ms) or I64 (μs/ns) since midnight | 1 byte: `TimeUnit` | _not yet_ | ❌ | +| `vortex.timestamp` | I64 with `TimeUnit` (s/ms/μs/ns) + optional tz | 1 byte unit + UTF-8 tz string (optional) | _not yet_ | ❌ | +| `vortex.uuid` | `FixedSizeList(Primitive(U8), 16)` | 1 byte UUID version (optional, 0xff = unset) | _not yet_ | ❌ | + +`TimeUnit` (see [`extension/datetime/unit.rs`](https://github.com/vortex-data/vortex/blob/develop/vortex-array/src/extension/datetime/unit.rs)) +encodes precision in the first metadata byte: + +| Value | Unit | +|-------|--------------| +| 0 | Nanoseconds | +| 1 | Microseconds | +| 2 | Milliseconds | +| 3 | Seconds | +| 4 | Days | + +For unsupported extension ids the inspector falls back to a placeholder cell +(`>`); the underlying storage array still decodes +correctly via the primitive accessors, callers just have to format the value +themselves. + ## S3 Fixture Status (v0.72.0) Cross-language round-trips tested against Rust-written fixture files hosted at From 185cf990ee10e884ece26692a5c45ec78779c162 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Tue, 9 Jun 2026 07:27:07 +0200 Subject: [PATCH 34/37] feat(core): decode vortex.uuid cells via Extensions.uuid Wires up the second Vortex extension type (after vortex.date). Storage shape per the Arrow canonical UUID extension is FixedSizeList(Primitive(U8), 16); each row is 16 contiguous bytes interpreted as a big-endian UUID. Extensions.uuid(Array, long) reads both halves with an explicit & 0xffL mask so ByteArray.getByte's sign-extension doesn't poison the upper bytes of msb / lsb. java.util.UUID id = Extensions.uuid(array, i); The matching uuid(DType.Extension, Array, long) overload guards against a non-uuid extension being silently reinterpreted, mirroring the localDate pattern. Co-Authored-By: Claude Opus 4.7 --- .../dfa1/vortex/core/array/Extensions.java | 57 +++++++++ .../vortex/core/array/ExtensionsTest.java | 111 ++++++++++++++++++ 2 files changed, 168 insertions(+) diff --git a/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java b/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java index bb718e1a..a4fa35bb 100644 --- a/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java +++ b/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java @@ -18,6 +18,10 @@ public final class Extensions { /// (1970-01-01), Arrow-compatible. public static final String DATE = "vortex.date"; + /// Extension id for UUID columns - storage is + /// {@code FixedSizeList(Primitive(U8), 16)}, Arrow-compatible. + public static final String UUID_ID = "vortex.uuid"; + private Extensions() { } @@ -61,6 +65,59 @@ public static LocalDate localDate(DType.Extension ext, Array storage, long i) { return LocalDate.ofEpochDay(epochDay(storage, i)); } + /// Decodes a {@code vortex.uuid} cell. + /// + /// Storage shape per Arrow's canonical UUID extension: a + /// {@link FixedSizeListArray} of {@link ByteArray} (U8) with + /// {@code fixedSize == 16}; row {@code i} is the 16 contiguous bytes + /// {@code [i*16, i*16+16)} interpreted as a big-endian UUID. + /// + /// @param storage UUID extension's storage array + /// @param i row index, {@code 0 <= i < storage.length()} + /// @return decoded {@link java.util.UUID} + /// @throws VortexException if {@code storage} isn't a + /// {@code FixedSizeListArray} of size 16 + public static java.util.UUID uuid(Array storage, long i) { + checkBounds(i, storage.length()); + if (!(storage instanceof FixedSizeListArray fsl)) { + throw new VortexException("uuid: expected FixedSizeListArray storage, got " + + storage.getClass().getSimpleName()); + } + if (fsl.fixedSize() != 16) { + throw new VortexException("uuid: expected fixedSize 16, got " + fsl.fixedSize()); + } + if (!(fsl.elements() instanceof ByteArray bytes)) { + throw new VortexException("uuid: expected ByteArray elements, got " + + fsl.elements().getClass().getSimpleName()); + } + long base = i * 16; + long msb = 0L; + long lsb = 0L; + for (int k = 0; k < 8; k++) { + msb = (msb << 8) | (bytes.getByte(base + k) & 0xffL); + } + for (int k = 0; k < 8; k++) { + lsb = (lsb << 8) | (bytes.getByte(base + 8 + k) & 0xffL); + } + return new java.util.UUID(msb, lsb); + } + + /// Same as {@link #uuid(Array, long)} but verifies the declared extension id. + /// Use after {@code vortex.ext}'s decoder has unwrapped the storage and the + /// Array no longer carries the Extension dtype. + /// + /// @param ext the column's declared extension dtype; must be {@code vortex.uuid} + /// @param storage UUID storage array + /// @param i row index, {@code 0 <= i < storage.length()} + /// @return decoded {@link java.util.UUID} + /// @throws VortexException if {@code ext} isn't {@code vortex.uuid} or storage shape doesn't match + public static java.util.UUID uuid(DType.Extension ext, Array storage, long i) { + if (!UUID_ID.equals(ext.extensionId())) { + throw new VortexException("uuid called with non-uuid extension: " + ext.extensionId()); + } + return uuid(storage, i); + } + private static void checkBounds(long i, long length) { if (i < 0 || i >= length) { throw new IndexOutOfBoundsException("index " + i + " out of bounds for length " + length); diff --git a/core/src/test/java/io/github/dfa1/vortex/core/array/ExtensionsTest.java b/core/src/test/java/io/github/dfa1/vortex/core/array/ExtensionsTest.java index d4bc61a1..dd22e612 100644 --- a/core/src/test/java/io/github/dfa1/vortex/core/array/ExtensionsTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/core/array/ExtensionsTest.java @@ -104,6 +104,117 @@ void localDate_withWrongExtensionId_throws() { } } + @Test + void uuid_roundTripsKnownValue() { + // Given — Arrow canonical layout: FixedSizeList[16]; one well-known UUID + // (RFC 9562 example) plus its inverse, so msb/lsb extraction is exercised in + // both halves rather than only the high bytes. + java.util.UUID expected = java.util.UUID.fromString("123e4567-e89b-12d3-a456-426614174000"); + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(16); + long msb = expected.getMostSignificantBits(); + long lsb = expected.getLeastSignificantBits(); + for (int k = 0; k < 8; k++) { + buf.set(ValueLayout.JAVA_BYTE, k, (byte) ((msb >> (56 - 8 * k)) & 0xff)); + buf.set(ValueLayout.JAVA_BYTE, 8 + k, (byte) ((lsb >> (56 - 8 * k)) & 0xff)); + } + ByteArray inner = new ByteArray(new DType.Primitive(PType.U8, false), 16, buf); + DType.FixedSizeList fslDtype = new DType.FixedSizeList( + new DType.Primitive(PType.U8, false), 16, false); + FixedSizeListArray sut = new FixedSizeListArray(fslDtype, 1, inner); + + // When / Then + assertThat(Extensions.uuid(sut, 0)).isEqualTo(expected); + } + } + + @Test + void uuid_zeroBytes_decodesToZeroUuid() { + // Given — defensive: all-zero UUID is the most common "null UUID" sentinel + // and a regression test for sign extension on getByte + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(16); + ByteArray inner = new ByteArray(new DType.Primitive(PType.U8, false), 16, buf); + DType.FixedSizeList fslDtype = new DType.FixedSizeList( + new DType.Primitive(PType.U8, false), 16, false); + FixedSizeListArray sut = new FixedSizeListArray(fslDtype, 1, inner); + + // When / Then + assertThat(Extensions.uuid(sut, 0)) + .isEqualTo(new java.util.UUID(0L, 0L)); + } + } + + @Test + void uuid_allOnesBytes_decodesWithoutSignExtension() { + // Given — 0xff in every position; if getByte returned a sign-extended int + // and we forgot the & 0xffL mask, msb/lsb would land as 0xff..fff..ff with + // sign bits poisoning the upper longs. Use the highest-bit pattern as the + // sign-extension trap. + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(16); + for (int k = 0; k < 16; k++) { + buf.set(ValueLayout.JAVA_BYTE, k, (byte) 0xff); + } + ByteArray inner = new ByteArray(new DType.Primitive(PType.U8, false), 16, buf); + DType.FixedSizeList fslDtype = new DType.FixedSizeList( + new DType.Primitive(PType.U8, false), 16, false); + FixedSizeListArray sut = new FixedSizeListArray(fslDtype, 1, inner); + + // When / Then + assertThat(Extensions.uuid(sut, 0)) + .isEqualTo(new java.util.UUID(-1L, -1L)); + } + } + + @Test + void uuid_wrongFixedSize_throws() { + // Given — 8-byte FixedSizeList isn't a UUID; catch the mismatch up front + try (Arena arena = Arena.ofConfined()) { + ByteArray inner = new ByteArray(new DType.Primitive(PType.U8, false), 8, arena.allocate(8)); + DType.FixedSizeList wrongSize = new DType.FixedSizeList( + new DType.Primitive(PType.U8, false), 8, false); + FixedSizeListArray sut = new FixedSizeListArray(wrongSize, 1, inner); + + // When / Then + assertThatThrownBy(() -> Extensions.uuid(sut, 0)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("fixedSize 16"); + } + } + + @Test + void uuid_wrongStorageType_throws() { + // Given — a plain IntArray isn't FixedSizeList; guard against callers + // passing the wrong column by mistake + try (Arena arena = Arena.ofConfined()) { + IntArray notFsl = new IntArray(I32, 1, arena.allocate(4)); + + // When / Then + assertThatThrownBy(() -> Extensions.uuid(notFsl, 0)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("FixedSizeListArray"); + } + } + + @Test + void uuid_explicitExtensionOverload_verifiesId() { + // Given — passing a non-uuid extension dtype must not be silently + // reinterpreted as a uuid storage column + try (Arena arena = Arena.ofConfined()) { + ByteArray inner = new ByteArray(new DType.Primitive(PType.U8, false), 16, arena.allocate(16)); + DType.FixedSizeList fslDtype = new DType.FixedSizeList( + new DType.Primitive(PType.U8, false), 16, false); + FixedSizeListArray storage = new FixedSizeListArray(fslDtype, 1, inner); + DType.Extension wrongExt = new DType.Extension("vortex.something", fslDtype, null, false); + + // When / Then + assertThatThrownBy(() -> Extensions.uuid(wrongExt, storage, 0)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("non-uuid extension"); + } + } + @Test void localDate_indexOutOfBounds_throws() { // Given — both overloads must reject indices past the array length From c18e6e223c4382da426f7201fdd5f7fd9849b167 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Tue, 9 Jun 2026 07:28:41 +0200 Subject: [PATCH 35/37] feat(core): decode vortex.time cells via Extensions.localTime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the third Vortex extension type. Storage is a signed integer in the TimeUnit recorded in ext.metadata() byte 0: - tag 0 / Nanoseconds -> I64 nanos-of-day - tag 1 / Microseconds -> I64 micros-of-day - tag 2 / Milliseconds -> I32 millis-of-day - tag 3 / Seconds -> I32 seconds-of-day Days (tag 4) is rejected — vortex.time is a sub-second unit. Conversion scales raw to nanos via 1e9 / TimeUnit.divisor() so the existing TimeUnit enum carries all the precision math. LocalTime t = Extensions.localTime(ext, storage, i); Tests cover all four sub-second units, the Days-tag rejection, the missing-metadata path, and the wrong-extension-id guard. Co-Authored-By: Claude Opus 4.7 --- .../dfa1/vortex/core/array/Extensions.java | 46 ++++++++ .../vortex/core/array/ExtensionsTest.java | 104 ++++++++++++++++++ 2 files changed, 150 insertions(+) diff --git a/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java b/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java index a4fa35bb..17474bee 100644 --- a/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java +++ b/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java @@ -2,8 +2,10 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.VortexException; +import io.github.dfa1.vortex.encoding.TimeUnit; import java.time.LocalDate; +import java.time.LocalTime; /// Decoding helpers for Vortex extension dtypes that ship as a primitive /// storage array plus an extension id on the {@link DType}. Currently covers @@ -22,6 +24,11 @@ public final class Extensions { /// {@code FixedSizeList(Primitive(U8), 16)}, Arrow-compatible. public static final String UUID_ID = "vortex.uuid"; + /// Extension id for time-of-day columns - storage is a signed integer + /// counting seconds / milliseconds (I32) or microseconds / nanoseconds + /// (I64) since midnight; the unit is the first byte of {@code ext.metadata()}. + public static final String TIME = "vortex.time"; + private Extensions() { } @@ -102,6 +109,45 @@ public static java.util.UUID uuid(Array storage, long i) { return new java.util.UUID(msb, lsb); } + /// Decodes a {@code vortex.time} cell to a {@link LocalTime}. + /// + /// The storage array must be a signed integer primitive ({@link IntArray} + /// for second / millisecond precision, {@link LongArray} for microsecond / + /// nanosecond precision), optionally wrapped in {@link MaskedArray}. The + /// {@link TimeUnit} read from {@code ext.metadata()} byte 0 selects the + /// precision; {@link TimeUnit#Days} is not valid for time-of-day and + /// throws. + /// + /// @param ext the column's declared extension dtype; must be {@code vortex.time} + /// @param storage signed-integer storage array + /// @param i row index, {@code 0 <= i < storage.length()} + /// @return decoded time-of-day + /// @throws VortexException if {@code ext} isn't {@code vortex.time}, + /// the metadata unit is {@link TimeUnit#Days}, or {@code storage} + /// isn't an integer primitive + public static LocalTime localTime(DType.Extension ext, Array storage, long i) { + if (!TIME.equals(ext.extensionId())) { + throw new VortexException("localTime called with non-time extension: " + ext.extensionId()); + } + checkBounds(i, storage.length()); + TimeUnit unit = readUnit(ext); + if (unit == TimeUnit.Days) { + throw new VortexException("localTime: Days unit not valid for vortex.time"); + } + long raw = epochDay(storage, i); + // raw is in `unit`; scale to nanos-of-day. divisor() = units per second. + long nanos = raw * (1_000_000_000L / unit.divisor()); + return LocalTime.ofNanoOfDay(nanos); + } + + private static TimeUnit readUnit(DType.Extension ext) { + java.nio.ByteBuffer meta = ext.metadata(); + if (meta == null || !meta.hasRemaining()) { + throw new VortexException("missing TimeUnit metadata byte for " + ext.extensionId()); + } + return TimeUnit.fromTag(meta.get(meta.position())); + } + /// Same as {@link #uuid(Array, long)} but verifies the declared extension id. /// Use after {@code vortex.ext}'s decoder has unwrapped the storage and the /// Array no longer carries the Extension dtype. diff --git a/core/src/test/java/io/github/dfa1/vortex/core/array/ExtensionsTest.java b/core/src/test/java/io/github/dfa1/vortex/core/array/ExtensionsTest.java index dd22e612..b14c243a 100644 --- a/core/src/test/java/io/github/dfa1/vortex/core/array/ExtensionsTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/core/array/ExtensionsTest.java @@ -104,6 +104,110 @@ void localDate_withWrongExtensionId_throws() { } } + @Test + void localTime_secondsUnit_decodesViaI32() { + // Given — TimeUnit tag 3 (Seconds), storage I32: 3661 seconds = 01:01:01 + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(4); + buf.set(ValueLayout.JAVA_INT_UNALIGNED, 0, 3661); + IntArray storage = new IntArray(I32, 1, buf); + + // When / Then + assertThat(Extensions.localTime(timeExt((byte) 3), storage, 0)) + .isEqualTo(java.time.LocalTime.of(1, 1, 1)); + } + } + + @Test + void localTime_millisecondsUnit_decodesViaI32() { + // Given — TimeUnit tag 2 (Milliseconds): 3_661_500 ms = 01:01:01.500 + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(4); + buf.set(ValueLayout.JAVA_INT_UNALIGNED, 0, 3_661_500); + IntArray storage = new IntArray(I32, 1, buf); + + // When / Then + assertThat(Extensions.localTime(timeExt((byte) 2), storage, 0)) + .isEqualTo(java.time.LocalTime.of(1, 1, 1, 500_000_000)); + } + } + + @Test + void localTime_microsecondsUnit_decodesViaI64() { + // Given — TimeUnit tag 1 (Microseconds): 1 second + 1 microsecond + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(8); + buf.set(ValueLayout.JAVA_LONG_UNALIGNED, 0, 1_000_001L); + LongArray storage = new LongArray(new DType.Primitive(PType.I64, false), 1, buf); + + // When / Then + assertThat(Extensions.localTime(timeExt((byte) 1), storage, 0)) + .isEqualTo(java.time.LocalTime.of(0, 0, 1, 1_000)); + } + } + + @Test + void localTime_nanosecondsUnit_decodesViaI64() { + // Given — TimeUnit tag 0 (Nanoseconds): 42 nanos past midnight + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(8); + buf.set(ValueLayout.JAVA_LONG_UNALIGNED, 0, 42L); + LongArray storage = new LongArray(new DType.Primitive(PType.I64, false), 1, buf); + + // When / Then + assertThat(Extensions.localTime(timeExt((byte) 0), storage, 0)) + .isEqualTo(java.time.LocalTime.ofNanoOfDay(42)); + } + } + + @Test + void localTime_daysUnit_throws() { + // Given — Days isn't a sub-second unit, so vortex.time with Days is malformed + try (Arena arena = Arena.ofConfined()) { + IntArray storage = new IntArray(I32, 1, arena.allocate(4)); + + // When / Then + assertThatThrownBy(() -> Extensions.localTime(timeExt((byte) 4), storage, 0)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("Days unit not valid"); + } + } + + @Test + void localTime_wrongExtensionId_throws() { + // Given — guards against calling with a non-time extension + try (Arena arena = Arena.ofConfined()) { + IntArray storage = new IntArray(I32, 1, arena.allocate(4)); + DType.Extension wrongExt = new DType.Extension("vortex.date", I32, null, false); + + // When / Then + assertThatThrownBy(() -> Extensions.localTime(wrongExt, storage, 0)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("non-time extension"); + } + } + + @Test + void localTime_missingMetadata_throws() { + // Given — metadata byte must specify TimeUnit; otherwise we can't know + // whether the storage is in seconds, ms, μs, or ns + try (Arena arena = Arena.ofConfined()) { + IntArray storage = new IntArray(I32, 1, arena.allocate(4)); + DType.Extension noMetaExt = new DType.Extension(Extensions.TIME, I32, null, false); + + // When / Then + assertThatThrownBy(() -> Extensions.localTime(noMetaExt, storage, 0)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("missing TimeUnit metadata"); + } + } + + private static DType.Extension timeExt(byte tag) { + java.nio.ByteBuffer meta = java.nio.ByteBuffer.allocate(1); + meta.put(0, tag); + return new DType.Extension(Extensions.TIME, I32, meta, false); + } + @Test void uuid_roundTripsKnownValue() { // Given — Arrow canonical layout: FixedSizeList[16]; one well-known UUID From 724d0de20ffb89b321b836d7aa914c3ae78f4665 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Tue, 9 Jun 2026 07:34:17 +0200 Subject: [PATCH 36/37] feat(core): decode vortex.timestamp via Extensions.instant + zonedDateTime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Final Vortex extension type. Two complementary helpers: Instant t = Extensions.instant(ext, storage, i); // tz-less ZonedDateTime z = Extensions.zonedDateTime(ext, storage, i); // tz-aware Storage is an I64 count of the metadata-recorded TimeUnit since the Unix epoch (Days rejected, same as vortex.time). Wire format for the extension metadata, kept binary-compatible with the Rust reference: byte[0] = TimeUnit tag bytes[1..3] = tz_len (u16 LE) bytes[3..3+tz_len] = tz UTF-8 instantFromRaw uses Math.floorDiv / floorMod for the μs and ns paths so negative timestamps (pre-1970) split cleanly across the seconds boundary instead of rounding fractional nanos towards zero. zonedDateTime defaults to UTC when tz_len == 0; Extensions.timezone is exposed so callers can ask the column's recorded zone without materialising an Instant. Truncated metadata (declared tz_len longer than the buffer can carry) throws rather than silently decoding a shorter zone string. docs/compatibility.md now records all four Vortex extensions as implemented in vortex-java. Co-Authored-By: Claude Opus 4.7 --- .../dfa1/vortex/core/array/Extensions.java | 106 +++++++++++++- .../vortex/core/array/ExtensionsTest.java | 129 ++++++++++++++++++ docs/compatibility.md | 12 +- 3 files changed, 240 insertions(+), 7 deletions(-) diff --git a/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java b/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java index 17474bee..2040aa48 100644 --- a/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java +++ b/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java @@ -4,8 +4,16 @@ import io.github.dfa1.vortex.core.VortexException; import io.github.dfa1.vortex.encoding.TimeUnit; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; +import java.time.Instant; import java.time.LocalDate; import java.time.LocalTime; +import java.time.ZoneId; +import java.time.ZoneOffset; +import java.time.ZonedDateTime; +import java.util.Optional; /// Decoding helpers for Vortex extension dtypes that ship as a primitive /// storage array plus an extension id on the {@link DType}. Currently covers @@ -29,6 +37,13 @@ public final class Extensions { /// (I64) since midnight; the unit is the first byte of {@code ext.metadata()}. public static final String TIME = "vortex.time"; + /// Extension id for timestamp columns - storage is an I64 count of the + /// recorded {@link TimeUnit} since the Unix epoch, with an optional + /// IANA-style timezone string carried in the extension metadata. + /// Metadata layout: {@code byte[0] = TimeUnit tag, bytes[1..3] = tz_len + /// (u16 LE), bytes[3..3+tz_len] = tz UTF-8}. + public static final String TIMESTAMP = "vortex.timestamp"; + private Extensions() { } @@ -141,13 +156,102 @@ public static LocalTime localTime(DType.Extension ext, Array storage, long i) { } private static TimeUnit readUnit(DType.Extension ext) { - java.nio.ByteBuffer meta = ext.metadata(); + ByteBuffer meta = ext.metadata(); if (meta == null || !meta.hasRemaining()) { throw new VortexException("missing TimeUnit metadata byte for " + ext.extensionId()); } return TimeUnit.fromTag(meta.get(meta.position())); } + /// Decodes a {@code vortex.timestamp} cell to an {@link Instant}, ignoring + /// any timezone the column metadata carries. Use {@link #zonedDateTime} + /// when the timezone matters. + /// + /// Storage is an I64 count of the metadata-recorded {@link TimeUnit} since + /// the Unix epoch. {@link TimeUnit#Days} is invalid for timestamps. + /// + /// @param ext declared extension dtype; must be {@code vortex.timestamp} + /// @param storage signed-integer storage array + /// @param i row index, {@code 0 <= i < storage.length()} + /// @return decoded instant + /// @throws VortexException if {@code ext} isn't {@code vortex.timestamp}, + /// the metadata unit is Days, or storage isn't an integer primitive + public static Instant instant(DType.Extension ext, Array storage, long i) { + if (!TIMESTAMP.equals(ext.extensionId())) { + throw new VortexException("instant called with non-timestamp extension: " + ext.extensionId()); + } + checkBounds(i, storage.length()); + TimeUnit unit = readUnit(ext); + if (unit == TimeUnit.Days) { + throw new VortexException("instant: Days unit not valid for vortex.timestamp"); + } + return instantFromRaw(epochDay(storage, i), unit); + } + + /// Decodes a {@code vortex.timestamp} cell to a {@link ZonedDateTime} + /// using the timezone carried in the extension metadata. Falls back to + /// {@link ZoneOffset#UTC} when the metadata has no timezone string. + /// + /// @param ext declared extension dtype; must be {@code vortex.timestamp} + /// @param storage signed-integer storage array + /// @param i row index, {@code 0 <= i < storage.length()} + /// @return decoded zoned date-time + /// @throws VortexException if {@code ext} isn't {@code vortex.timestamp}, + /// the metadata unit is Days, or storage isn't an integer primitive + public static ZonedDateTime zonedDateTime(DType.Extension ext, Array storage, long i) { + Instant instant = instant(ext, storage, i); + return instant.atZone(timezone(ext).orElse(ZoneOffset.UTC)); + } + + /// Returns the optional IANA timezone string carried in a + /// {@code vortex.timestamp} extension's metadata. + /// + /// @param ext declared extension dtype + /// @return zone id parsed from {@code ext.metadata()} bytes 3..3+tz_len, + /// or empty when {@code tz_len == 0} + /// @throws VortexException if the metadata is truncated mid-string + public static Optional timezone(DType.Extension ext) { + ByteBuffer meta = ext.metadata(); + if (meta == null || meta.remaining() < 3) { + return Optional.empty(); + } + ByteBuffer le = meta.duplicate().order(ByteOrder.LITTLE_ENDIAN); + int basePos = le.position(); + int tzLen = Short.toUnsignedInt(le.getShort(basePos + 1)); + if (tzLen == 0) { + return Optional.empty(); + } + if (le.remaining() < 3 + tzLen) { + throw new VortexException("timestamp metadata truncated: declared tz_len=" + + tzLen + " but only " + (le.remaining() - 3) + " bytes available"); + } + byte[] tzBytes = new byte[tzLen]; + for (int k = 0; k < tzLen; k++) { + tzBytes[k] = le.get(basePos + 3 + k); + } + return Optional.of(ZoneId.of(new String(tzBytes, StandardCharsets.UTF_8))); + } + + private static Instant instantFromRaw(long raw, TimeUnit unit) { + return switch (unit) { + case Seconds -> Instant.ofEpochSecond(raw); + case Milliseconds -> Instant.ofEpochMilli(raw); + case Microseconds -> { + // floorDiv/floorMod handle negative timestamps (pre-1970) symmetrically; + // plain / and % round towards zero and break the seconds boundary. + long secs = Math.floorDiv(raw, 1_000_000L); + long nanos = Math.floorMod(raw, 1_000_000L) * 1_000L; + yield Instant.ofEpochSecond(secs, nanos); + } + case Nanoseconds -> { + long secs = Math.floorDiv(raw, 1_000_000_000L); + long nanos = Math.floorMod(raw, 1_000_000_000L); + yield Instant.ofEpochSecond(secs, nanos); + } + case Days -> throw new VortexException("Days unit not valid for vortex.timestamp"); + }; + } + /// Same as {@link #uuid(Array, long)} but verifies the declared extension id. /// Use after {@code vortex.ext}'s decoder has unwrapped the storage and the /// Array no longer carries the Extension dtype. diff --git a/core/src/test/java/io/github/dfa1/vortex/core/array/ExtensionsTest.java b/core/src/test/java/io/github/dfa1/vortex/core/array/ExtensionsTest.java index b14c243a..02556faa 100644 --- a/core/src/test/java/io/github/dfa1/vortex/core/array/ExtensionsTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/core/array/ExtensionsTest.java @@ -208,6 +208,135 @@ private static DType.Extension timeExt(byte tag) { return new DType.Extension(Extensions.TIME, I32, meta, false); } + @Test + void instant_secondsUnit_decodesEpoch() { + // Given — TimeUnit tag 3 (Seconds), no tz; raw 0 = Unix epoch + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(8); + buf.set(ValueLayout.JAVA_LONG_UNALIGNED, 0, 0L); + LongArray storage = new LongArray(new DType.Primitive(PType.I64, false), 1, buf); + + // When / Then + assertThat(Extensions.instant(timestampExt((byte) 3, null), storage, 0)) + .isEqualTo(java.time.Instant.EPOCH); + } + } + + @Test + void instant_microsecondsUnit_handlesNegativeRaw() { + // Given — pre-epoch micros: 1996-02-12T00:00:00Z is 824083200_000_000 micros + // Negate to flip into pre-epoch so the floorDiv path is actually exercised + // (plain / would round the 2-micro remainder towards zero and skew seconds) + long micros = -1_500_001L; // -1.500001s; expected: epochSecond = -2, nanos = 499_999_000 + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(8); + buf.set(ValueLayout.JAVA_LONG_UNALIGNED, 0, micros); + LongArray storage = new LongArray(new DType.Primitive(PType.I64, false), 1, buf); + + // When + java.time.Instant got = Extensions.instant(timestampExt((byte) 1, null), storage, 0); + + // Then + assertThat(got.getEpochSecond()).isEqualTo(-2L); + assertThat(got.getNano()).isEqualTo(499_999_000); + } + } + + @Test + void instant_nanosecondsUnit_decodesFullPrecision() { + // Given — TimeUnit tag 0 (Nanoseconds): 1_000_000_001 ns = 1.000_000_001 s + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(8); + buf.set(ValueLayout.JAVA_LONG_UNALIGNED, 0, 1_000_000_001L); + LongArray storage = new LongArray(new DType.Primitive(PType.I64, false), 1, buf); + + // When / Then + assertThat(Extensions.instant(timestampExt((byte) 0, null), storage, 0)) + .isEqualTo(java.time.Instant.ofEpochSecond(1, 1)); + } + } + + @Test + void instant_daysUnit_throws() { + // Given — Days isn't valid for timestamps + try (Arena arena = Arena.ofConfined()) { + LongArray storage = new LongArray(new DType.Primitive(PType.I64, false), 1, arena.allocate(8)); + + // When / Then + assertThatThrownBy(() -> Extensions.instant(timestampExt((byte) 4, null), storage, 0)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("Days unit not valid"); + } + } + + @Test + void zonedDateTime_withTimezone_appliesIt() { + // Given — milliseconds since epoch + a Europe/Paris tz string in metadata + long ms = 1_000L; // 1 second past epoch + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(8); + buf.set(ValueLayout.JAVA_LONG_UNALIGNED, 0, ms); + LongArray storage = new LongArray(new DType.Primitive(PType.I64, false), 1, buf); + + // When + java.time.ZonedDateTime got = Extensions.zonedDateTime( + timestampExt((byte) 2, "Europe/Paris"), storage, 0); + + // Then + assertThat(got.getZone()).isEqualTo(java.time.ZoneId.of("Europe/Paris")); + assertThat(got.toInstant()).isEqualTo(java.time.Instant.ofEpochMilli(ms)); + } + } + + @Test + void zonedDateTime_noTimezone_defaultsToUtc() { + // Given — tz_len = 0 in metadata means caller didn't record a zone; default UTC + // is unambiguous and matches the Arrow convention + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(8); + buf.set(ValueLayout.JAVA_LONG_UNALIGNED, 0, 0L); + LongArray storage = new LongArray(new DType.Primitive(PType.I64, false), 1, buf); + + // When + java.time.ZonedDateTime got = Extensions.zonedDateTime( + timestampExt((byte) 2, null), storage, 0); + + // Then + assertThat(got.getZone()).isEqualTo(java.time.ZoneOffset.UTC); + } + } + + @Test + void timezone_truncatedMetadata_throws() { + // Given — metadata claims tz_len=5 but provides only 3 bytes of payload + java.nio.ByteBuffer meta = java.nio.ByteBuffer.allocate(6).order(java.nio.ByteOrder.LITTLE_ENDIAN); + meta.put(0, (byte) 2); // ms + meta.putShort(1, (short) 5); // tz_len=5 + meta.put(3, (byte) 'U'); + meta.put(4, (byte) 'T'); + meta.put(5, (byte) 'C'); // only 3 of the 5 declared bytes present + DType.Extension ext = new DType.Extension(Extensions.TIMESTAMP, + new DType.Primitive(PType.I64, false), meta, false); + + // When / Then + assertThatThrownBy(() -> Extensions.timezone(ext)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("truncated"); + } + + private static DType.Extension timestampExt(byte tag, String tz) { + byte[] tzBytes = tz == null ? new byte[0] : tz.getBytes(java.nio.charset.StandardCharsets.UTF_8); + java.nio.ByteBuffer meta = java.nio.ByteBuffer.allocate(3 + tzBytes.length) + .order(java.nio.ByteOrder.LITTLE_ENDIAN); + meta.put(0, tag); + meta.putShort(1, (short) tzBytes.length); + for (int k = 0; k < tzBytes.length; k++) { + meta.put(3 + k, tzBytes[k]); + } + return new DType.Extension(Extensions.TIMESTAMP, + new DType.Primitive(PType.I64, false), meta, false); + } + @Test void uuid_roundTripsKnownValue() { // Given — Arrow canonical layout: FixedSizeList[16]; one well-known UUID diff --git a/docs/compatibility.md b/docs/compatibility.md index ba72079c..b2517101 100644 --- a/docs/compatibility.md +++ b/docs/compatibility.md @@ -63,12 +63,12 @@ metadata. The Rust catalogue lives in [`vortex-array/src/extension/`](https://github.com/vortex-data/vortex/tree/develop/vortex-array/src/extension); each subdir below names a canonical extension id and its on-disk shape. -| Extension id | Storage | Metadata | Java decoder | Status | -|---------------------|-------------------------------------------------|---------------------------------------------|-----------------------------------------------|--------| -| `vortex.date` | I32 (days) or I64 (ms) since Unix epoch | 1 byte: `TimeUnit` (2 = ms, 4 = days) | `Extensions.localDate(Array, long)` | ✅ | -| `vortex.time` | I32 (s/ms) or I64 (μs/ns) since midnight | 1 byte: `TimeUnit` | _not yet_ | ❌ | -| `vortex.timestamp` | I64 with `TimeUnit` (s/ms/μs/ns) + optional tz | 1 byte unit + UTF-8 tz string (optional) | _not yet_ | ❌ | -| `vortex.uuid` | `FixedSizeList(Primitive(U8), 16)` | 1 byte UUID version (optional, 0xff = unset) | _not yet_ | ❌ | +| Extension id | Storage | Metadata | Java decoder | Status | +|---------------------|-------------------------------------------------|---------------------------------------------|------------------------------------------------------------------------------|--------| +| `vortex.date` | I32 (days) or I64 (ms) since Unix epoch | 1 byte: `TimeUnit` (2 = ms, 4 = days) | `Extensions.localDate(Array, long)` | ✅ | +| `vortex.time` | I32 (s/ms) or I64 (μs/ns) since midnight | 1 byte: `TimeUnit` | `Extensions.localTime(DType.Extension, Array, long)` | ✅ | +| `vortex.timestamp` | I64 with `TimeUnit` (s/ms/μs/ns) + optional tz | 1 byte unit + u16 LE tz_len + UTF-8 tz bytes | `Extensions.instant(...)` / `Extensions.zonedDateTime(...)` / `timezone(...)` | ✅ | +| `vortex.uuid` | `FixedSizeList(Primitive(U8), 16)` | 1 byte UUID version (optional, 0xff = unset) | `Extensions.uuid(Array, long)` | ✅ | `TimeUnit` (see [`extension/datetime/unit.rs`](https://github.com/vortex-data/vortex/blob/develop/vortex-array/src/extension/datetime/unit.rs)) encodes precision in the first metadata byte: From 05fd0a68aa5f7765682d85724d8a693ed3e844f9 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Tue, 9 Jun 2026 08:21:32 +0200 Subject: [PATCH 37/37] refactor(core): Extension sealed hierarchy replaces Extensions utility class MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirrors the Encoding / EncodingId pattern. The new sealed interface io.github.dfa1.vortex.core.Extension fuses the closed-world id classification with the typed decode behaviour: public sealed interface Extension permits Date, Time, Timestamp, Uuid, Custom { String id(); ... static Extension of(String id); } Each spec-defined variant (Date, Time, Timestamp, Uuid) is a final class with its own statically-typed decode methods — no Object return type, no caller-side downcasts: LocalDate d = Extension.DATE.decode(storage, i); LocalTime t = Extension.TIME.decode(ext, storage, i); Instant ts = Extension.TIMESTAMP.instant(ext, storage, i); java.util.UUID u = Extension.UUID.decode(storage, i); Optional z = Extension.TIMESTAMP.timezone(ext); Custom(String id) carries any non-spec id verbatim so unknown extensions round-trip without loss. DType.Extension.kind() returns the matching record so callers pattern-match exhaustively: switch (ext.kind()) { case Extension.Date d -> d.decode(storage, i); case Extension.Time t -> t.decode(ext, storage, i); case Extension.Timestamp ts -> ts.instant(ext, storage, i); case Extension.Uuid u -> u.decode(storage, i); case Extension.Custom c -> renderPlaceholder(c.id()); } Drops core/array/Extensions.java entirely. Its String constants (DATE / TIME / TIMESTAMP / UUID_ID) move onto the records as ID constants; its static helpers move onto the records as instance methods; its shared utilities (epochInteger, readUnit, instantFromRaw, checkBounds) live as private static helpers inside the sealed interface. VortexInspectorTui's date format switch now binds the Extension.Date record and calls date.decode(array, i) directly, replacing the previous Extensions.localDate(ext, array, i) call. docs/compatibility.md updated with the new dispatch example and a table row for Extension.Custom. Co-Authored-By: Claude Opus 4.7 --- .../io/github/dfa1/vortex/core/DType.java | 10 + .../io/github/dfa1/vortex/core/Extension.java | 333 ++++++++++++ .../dfa1/vortex/core/array/Extensions.java | 288 ----------- .../dfa1/vortex/core/ExtensionTest.java | 285 +++++++++++ .../vortex/core/array/ExtensionsTest.java | 484 ------------------ docs/compatibility.md | 26 +- .../vortex/inspect/VortexInspectorTui.java | 5 +- 7 files changed, 650 insertions(+), 781 deletions(-) create mode 100644 core/src/main/java/io/github/dfa1/vortex/core/Extension.java delete mode 100644 core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java create mode 100644 core/src/test/java/io/github/dfa1/vortex/core/ExtensionTest.java delete mode 100644 core/src/test/java/io/github/dfa1/vortex/core/array/ExtensionsTest.java diff --git a/core/src/main/java/io/github/dfa1/vortex/core/DType.java b/core/src/main/java/io/github/dfa1/vortex/core/DType.java index d1113e4e..d4133cae 100644 --- a/core/src/main/java/io/github/dfa1/vortex/core/DType.java +++ b/core/src/main/java/io/github/dfa1/vortex/core/DType.java @@ -132,6 +132,16 @@ record Extension( ByteBuffer metadata, boolean nullable ) implements DType { + + /// Returns the closed-world classification of this extension's id. + /// Pattern-match exhaustively: known ids resolve to the matching + /// record, anything else lands in {@link io.github.dfa1.vortex.core.Extension.Custom}. + /// + /// @return the {@link io.github.dfa1.vortex.core.Extension} record + /// for this extension's id + public io.github.dfa1.vortex.core.Extension kind() { + return io.github.dfa1.vortex.core.Extension.of(extensionId); + } } /// Variant logical type for semi-structured data (analogous to Parquet variant / JSON). diff --git a/core/src/main/java/io/github/dfa1/vortex/core/Extension.java b/core/src/main/java/io/github/dfa1/vortex/core/Extension.java new file mode 100644 index 00000000..73ce1b83 --- /dev/null +++ b/core/src/main/java/io/github/dfa1/vortex/core/Extension.java @@ -0,0 +1,333 @@ +package io.github.dfa1.vortex.core; + +import io.github.dfa1.vortex.core.array.Array; +import io.github.dfa1.vortex.core.array.ByteArray; +import io.github.dfa1.vortex.core.array.FixedSizeListArray; +import io.github.dfa1.vortex.core.array.IntArray; +import io.github.dfa1.vortex.core.array.LongArray; +import io.github.dfa1.vortex.core.array.MaskedArray; +import io.github.dfa1.vortex.core.array.ShortArray; +import io.github.dfa1.vortex.encoding.TimeUnit; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalTime; +import java.time.ZoneId; +import java.time.ZoneOffset; +import java.time.ZonedDateTime; +import java.util.Optional; + +/// Sealed hierarchy of Vortex extension dtypes — closed-world view of the +/// four spec-defined extensions ({@code vortex.date}, {@code vortex.time}, +/// {@code vortex.timestamp}, {@code vortex.uuid}) plus a {@link Custom} +/// fallback record carrying any other id. +/// +///

Mirrors the {@link io.github.dfa1.vortex.encoding.Encoding} / +/// {@link io.github.dfa1.vortex.encoding.EncodingId} pairing in spirit but +/// merges the kind classification with the typed decode behaviour: each +/// record exposes its own statically-typed decode methods rather than a +/// single {@code Object decode(...)} contract that callers would have to +/// downcast. Pattern-match exhaustively to dispatch: +/// +/// ```java +/// switch (ext.kind()) { +/// case Extension.Date d -> d.decode(storage, i); // LocalDate +/// case Extension.Time t -> t.decode(ext, storage, i); // LocalTime +/// case Extension.Timestamp ts -> ts.instant(ext, storage, i); // Instant +/// case Extension.Uuid u -> u.decode(storage, i); // UUID +/// case Extension.Custom c -> renderPlaceholder(c.id()); +/// } +/// ``` +/// +///

{@link DType.Extension} carries the wire-format id as a {@code String} +/// so unknown ids round-trip without loss; {@link #of(String)} translates to +/// the matching record. +public sealed interface Extension { + + /// Singleton for {@link Date}. + Date DATE = new Date(); + /// Singleton for {@link Time}. + Time TIME = new Time(); + /// Singleton for {@link Timestamp}. + Timestamp TIMESTAMP = new Timestamp(); + /// Singleton for {@link Uuid}. + Uuid UUID = new Uuid(); + + /// Returns the wire-format id string. + /// + /// @return canonical extension id + String id(); + + /// Resolves a wire-format id string to its {@link Extension} record. + /// Unknown ids land in {@link Custom}. + /// + /// @param id raw extension id from the file footer + /// @return matching record, or {@link Custom} when {@code id} isn't recognised + static Extension of(String id) { + return switch (id) { + case Date.ID -> DATE; + case Time.ID -> TIME; + case Timestamp.ID -> TIMESTAMP; + case Uuid.ID -> UUID; + default -> new Custom(id); + }; + } + + /// {@code vortex.date} — days (any signed integer width) since the + /// Unix epoch. Per Arrow's canonical Date type. + final class Date implements Extension { + /// Wire id. + public static final String ID = "vortex.date"; + + private Date() { + } + + @Override public String id() { + return ID; + } + + /// Decodes the date cell at row {@code i}. + /// + /// @param storage signed-integer storage (Byte/Short/Int/Long, possibly Masked) + /// @param i row index, {@code 0 <= i < storage.length()} + /// @return decoded date + /// @throws VortexException if storage isn't an integer primitive + public LocalDate decode(Array storage, long i) { + checkBounds(i, storage.length()); + return LocalDate.ofEpochDay(epochInteger(storage, i)); + } + } + + /// {@code vortex.time} — sub-day count in the {@link TimeUnit} recorded + /// in {@code ext.metadata()} byte 0. + final class Time implements Extension { + /// Wire id. + public static final String ID = "vortex.time"; + + private Time() { + } + + @Override public String id() { + return ID; + } + + /// Decodes the time-of-day cell at row {@code i}. + /// + /// @param ext declared extension dtype carrying the {@link TimeUnit} byte + /// @param storage signed-integer storage (I32 for s/ms, I64 for μs/ns) + /// @param i row index, {@code 0 <= i < storage.length()} + /// @return decoded local time + /// @throws VortexException if the metadata unit is {@link TimeUnit#Days} + /// or storage isn't an integer primitive + public LocalTime decode(DType.Extension ext, Array storage, long i) { + checkBounds(i, storage.length()); + TimeUnit unit = readUnit(ext); + if (unit == TimeUnit.Days) { + throw new VortexException("Time.decode: Days unit not valid for vortex.time"); + } + long raw = epochInteger(storage, i); + long nanos = raw * (1_000_000_000L / unit.divisor()); + return LocalTime.ofNanoOfDay(nanos); + } + + /// Returns the {@link TimeUnit} recorded in the extension metadata. + /// + /// @param ext extension dtype + /// @return decoded time unit + public TimeUnit unit(DType.Extension ext) { + return readUnit(ext); + } + } + + /// {@code vortex.timestamp} — I64 epoch count plus optional IANA timezone. + /// Metadata layout: {@code byte[0] = TimeUnit tag, bytes[1..3] = tz_len + /// (u16 LE), bytes[3..3+tz_len] = tz UTF-8}. + final class Timestamp implements Extension { + /// Wire id. + public static final String ID = "vortex.timestamp"; + + private Timestamp() { + } + + @Override public String id() { + return ID; + } + + /// Decodes the timestamp cell at row {@code i} to an {@link Instant}, + /// ignoring any timezone the metadata carries. + /// + /// @param ext declared extension dtype + /// @param storage signed-integer storage array + /// @param i row index, {@code 0 <= i < storage.length()} + /// @return decoded instant + /// @throws VortexException if the metadata unit is {@link TimeUnit#Days} + /// or storage isn't an integer primitive + public Instant instant(DType.Extension ext, Array storage, long i) { + checkBounds(i, storage.length()); + TimeUnit unit = readUnit(ext); + if (unit == TimeUnit.Days) { + throw new VortexException("Timestamp.instant: Days unit not valid"); + } + return instantFromRaw(epochInteger(storage, i), unit); + } + + /// Decodes the timestamp cell at row {@code i} to a {@link ZonedDateTime} + /// using the timezone from the metadata, defaulting to UTC when absent. + /// + /// @param ext declared extension dtype + /// @param storage signed-integer storage array + /// @param i row index, {@code 0 <= i < storage.length()} + /// @return decoded zoned date-time + public ZonedDateTime zonedDateTime(DType.Extension ext, Array storage, long i) { + return instant(ext, storage, i).atZone(timezone(ext).orElse(ZoneOffset.UTC)); + } + + /// Returns the IANA timezone string recorded in the extension metadata. + /// + /// @param ext declared extension dtype + /// @return parsed zone id, or empty when {@code tz_len == 0} + /// @throws VortexException if the metadata is truncated mid-string + public Optional timezone(DType.Extension ext) { + ByteBuffer meta = ext.metadata(); + if (meta == null || meta.remaining() < 3) { + return Optional.empty(); + } + ByteBuffer le = meta.duplicate().order(ByteOrder.LITTLE_ENDIAN); + int basePos = le.position(); + int tzLen = Short.toUnsignedInt(le.getShort(basePos + 1)); + if (tzLen == 0) { + return Optional.empty(); + } + if (le.remaining() < 3 + tzLen) { + throw new VortexException("timestamp metadata truncated: declared tz_len=" + + tzLen + " but only " + (le.remaining() - 3) + " bytes available"); + } + byte[] tzBytes = new byte[tzLen]; + for (int k = 0; k < tzLen; k++) { + tzBytes[k] = le.get(basePos + 3 + k); + } + return Optional.of(ZoneId.of(new String(tzBytes, StandardCharsets.UTF_8))); + } + + /// Returns the {@link TimeUnit} recorded in the extension metadata. + /// + /// @param ext extension dtype + /// @return decoded time unit + public TimeUnit unit(DType.Extension ext) { + return readUnit(ext); + } + } + + /// {@code vortex.uuid} — 16-byte UUID stored as + /// {@code FixedSizeList(Primitive(U8), 16)}. + final class Uuid implements Extension { + /// Wire id. + public static final String ID = "vortex.uuid"; + + private Uuid() { + } + + @Override public String id() { + return ID; + } + + /// Decodes the UUID cell at row {@code i}. + /// + /// @param storage UUID storage array + /// @param i row index, {@code 0 <= i < storage.length()} + /// @return decoded {@link java.util.UUID} + /// @throws VortexException if storage isn't a {@code FixedSizeListArray} + /// of size 16 + public java.util.UUID decode(Array storage, long i) { + checkBounds(i, storage.length()); + if (!(storage instanceof FixedSizeListArray fsl)) { + throw new VortexException("Uuid.decode: expected FixedSizeListArray, got " + + storage.getClass().getSimpleName()); + } + if (fsl.fixedSize() != 16) { + throw new VortexException("Uuid.decode: expected fixedSize 16, got " + fsl.fixedSize()); + } + if (!(fsl.elements() instanceof ByteArray bytes)) { + throw new VortexException("Uuid.decode: expected ByteArray elements, got " + + fsl.elements().getClass().getSimpleName()); + } + long base = i * 16; + long msb = 0L; + long lsb = 0L; + for (int k = 0; k < 8; k++) { + msb = (msb << 8) | (bytes.getByte(base + k) & 0xffL); + } + for (int k = 0; k < 8; k++) { + lsb = (lsb << 8) | (bytes.getByte(base + 8 + k) & 0xffL); + } + return new java.util.UUID(msb, lsb); + } + } + + /// Open-world escape hatch for any extension id Vortex-java doesn't + /// know about. Pattern-match branches that need to render or decode an + /// unknown extension read its raw id via {@link #id()}. + /// + /// @param id raw extension id string + record Custom(String id) implements Extension { + } + + // ── Shared helpers ──────────────────────────────────────────────────── + + /// Reads a signed integer from any of the integer primitive arrays as + /// {@code long}. Recurses through {@link MaskedArray}; throws on null + /// cells so callers don't silently get garbage for nullable columns. + private static long epochInteger(Array storage, long i) { + return switch (storage) { + case ByteArray a -> a.getByte(i); + case ShortArray a -> a.getShort(i); + case IntArray a -> a.getInt(i); + case LongArray a -> a.getLong(i); + case MaskedArray a -> { + if (!a.isValid(i)) { + throw new VortexException("null cell at index " + i); + } + yield epochInteger(a.inner(), i); + } + default -> throw new VortexException( + "unsupported storage type " + storage.getClass().getSimpleName()); + }; + } + + /// Reads the {@link TimeUnit} metadata byte at the buffer's current + /// position; throws if the buffer is null or empty. + private static TimeUnit readUnit(DType.Extension ext) { + ByteBuffer meta = ext.metadata(); + if (meta == null || !meta.hasRemaining()) { + throw new VortexException("missing TimeUnit metadata byte for " + ext.extensionId()); + } + return TimeUnit.fromTag(meta.get(meta.position())); + } + + private static Instant instantFromRaw(long raw, TimeUnit unit) { + return switch (unit) { + case Seconds -> Instant.ofEpochSecond(raw); + case Milliseconds -> Instant.ofEpochMilli(raw); + case Microseconds -> { + long secs = Math.floorDiv(raw, 1_000_000L); + long nanos = Math.floorMod(raw, 1_000_000L) * 1_000L; + yield Instant.ofEpochSecond(secs, nanos); + } + case Nanoseconds -> { + long secs = Math.floorDiv(raw, 1_000_000_000L); + long nanos = Math.floorMod(raw, 1_000_000_000L); + yield Instant.ofEpochSecond(secs, nanos); + } + case Days -> throw new VortexException("Days unit not valid for instant"); + }; + } + + private static void checkBounds(long i, long length) { + if (i < 0 || i >= length) { + throw new IndexOutOfBoundsException("index " + i + " out of bounds for length " + length); + } + } +} diff --git a/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java b/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java deleted file mode 100644 index 2040aa48..00000000 --- a/core/src/main/java/io/github/dfa1/vortex/core/array/Extensions.java +++ /dev/null @@ -1,288 +0,0 @@ -package io.github.dfa1.vortex.core.array; - -import io.github.dfa1.vortex.core.DType; -import io.github.dfa1.vortex.core.VortexException; -import io.github.dfa1.vortex.encoding.TimeUnit; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.charset.StandardCharsets; -import java.time.Instant; -import java.time.LocalDate; -import java.time.LocalTime; -import java.time.ZoneId; -import java.time.ZoneOffset; -import java.time.ZonedDateTime; -import java.util.Optional; - -/// Decoding helpers for Vortex extension dtypes that ship as a primitive -/// storage array plus an extension id on the {@link DType}. Currently covers -/// {@code vortex.date}; {@code vortex.time} / {@code vortex.timestamp} live -/// on the TODO list until a public ScalarUnit type is available. -/// -/// Lives in core so any reader-jar consumer can decode these cells without -/// reimplementing the storage conventions. -public final class Extensions { - - /// Extension id for date columns - storage is days since the Unix epoch - /// (1970-01-01), Arrow-compatible. - public static final String DATE = "vortex.date"; - - /// Extension id for UUID columns - storage is - /// {@code FixedSizeList(Primitive(U8), 16)}, Arrow-compatible. - public static final String UUID_ID = "vortex.uuid"; - - /// Extension id for time-of-day columns - storage is a signed integer - /// counting seconds / milliseconds (I32) or microseconds / nanoseconds - /// (I64) since midnight; the unit is the first byte of {@code ext.metadata()}. - public static final String TIME = "vortex.time"; - - /// Extension id for timestamp columns - storage is an I64 count of the - /// recorded {@link TimeUnit} since the Unix epoch, with an optional - /// IANA-style timezone string carried in the extension metadata. - /// Metadata layout: {@code byte[0] = TimeUnit tag, bytes[1..3] = tz_len - /// (u16 LE), bytes[3..3+tz_len] = tz UTF-8}. - public static final String TIMESTAMP = "vortex.timestamp"; - - private Extensions() { - } - - /// Decodes a {@code vortex.date} cell to a {@link LocalDate}. - /// - /// The storage array must be one of the integer primitive arrays - /// ({@link ByteArray}, {@link ShortArray}, {@link IntArray}, {@link LongArray}), - /// optionally wrapped in a {@link MaskedArray}. The cell value is read as a - /// signed integer giving days since the Unix epoch. - /// - /// @param array array whose dtype is {@code ext} - /// @param i row index, {@code 0 <= i < array.length()} - /// @return decoded date - /// @throws VortexException if {@code array}'s dtype isn't {@code ext} - /// or its storage isn't an integer primitive - public static LocalDate localDate(Array array, long i) { - if (!(array.dtype() instanceof DType.Extension ext) || !DATE.equals(ext.extensionId())) { - throw new VortexException("localDate called on non-date dtype: " + array.dtype()); - } - checkBounds(i, array.length()); - return LocalDate.ofEpochDay(epochDay(array, i)); - } - - /// Decodes a {@code vortex.date} cell when the storage array no longer - /// carries the Extension dtype — the case after {@code vortex.ext}'s - /// decoder unwraps the storage child and returns it with its primitive - /// dtype. Caller must supply the original {@link DType.Extension} so the - /// extension id is still verified. - /// - /// @param ext the column's declared extension dtype; must be {@code vortex.date} - /// @param storage signed-integer storage array - /// @param i row index, {@code 0 <= i < storage.length()} - /// @return decoded date - /// @throws VortexException if {@code ext} isn't {@code vortex.date} or - /// {@code storage} isn't an integer primitive - public static LocalDate localDate(DType.Extension ext, Array storage, long i) { - if (!DATE.equals(ext.extensionId())) { - throw new VortexException("localDate called with non-date extension: " + ext.extensionId()); - } - checkBounds(i, storage.length()); - return LocalDate.ofEpochDay(epochDay(storage, i)); - } - - /// Decodes a {@code vortex.uuid} cell. - /// - /// Storage shape per Arrow's canonical UUID extension: a - /// {@link FixedSizeListArray} of {@link ByteArray} (U8) with - /// {@code fixedSize == 16}; row {@code i} is the 16 contiguous bytes - /// {@code [i*16, i*16+16)} interpreted as a big-endian UUID. - /// - /// @param storage UUID extension's storage array - /// @param i row index, {@code 0 <= i < storage.length()} - /// @return decoded {@link java.util.UUID} - /// @throws VortexException if {@code storage} isn't a - /// {@code FixedSizeListArray} of size 16 - public static java.util.UUID uuid(Array storage, long i) { - checkBounds(i, storage.length()); - if (!(storage instanceof FixedSizeListArray fsl)) { - throw new VortexException("uuid: expected FixedSizeListArray storage, got " - + storage.getClass().getSimpleName()); - } - if (fsl.fixedSize() != 16) { - throw new VortexException("uuid: expected fixedSize 16, got " + fsl.fixedSize()); - } - if (!(fsl.elements() instanceof ByteArray bytes)) { - throw new VortexException("uuid: expected ByteArray elements, got " - + fsl.elements().getClass().getSimpleName()); - } - long base = i * 16; - long msb = 0L; - long lsb = 0L; - for (int k = 0; k < 8; k++) { - msb = (msb << 8) | (bytes.getByte(base + k) & 0xffL); - } - for (int k = 0; k < 8; k++) { - lsb = (lsb << 8) | (bytes.getByte(base + 8 + k) & 0xffL); - } - return new java.util.UUID(msb, lsb); - } - - /// Decodes a {@code vortex.time} cell to a {@link LocalTime}. - /// - /// The storage array must be a signed integer primitive ({@link IntArray} - /// for second / millisecond precision, {@link LongArray} for microsecond / - /// nanosecond precision), optionally wrapped in {@link MaskedArray}. The - /// {@link TimeUnit} read from {@code ext.metadata()} byte 0 selects the - /// precision; {@link TimeUnit#Days} is not valid for time-of-day and - /// throws. - /// - /// @param ext the column's declared extension dtype; must be {@code vortex.time} - /// @param storage signed-integer storage array - /// @param i row index, {@code 0 <= i < storage.length()} - /// @return decoded time-of-day - /// @throws VortexException if {@code ext} isn't {@code vortex.time}, - /// the metadata unit is {@link TimeUnit#Days}, or {@code storage} - /// isn't an integer primitive - public static LocalTime localTime(DType.Extension ext, Array storage, long i) { - if (!TIME.equals(ext.extensionId())) { - throw new VortexException("localTime called with non-time extension: " + ext.extensionId()); - } - checkBounds(i, storage.length()); - TimeUnit unit = readUnit(ext); - if (unit == TimeUnit.Days) { - throw new VortexException("localTime: Days unit not valid for vortex.time"); - } - long raw = epochDay(storage, i); - // raw is in `unit`; scale to nanos-of-day. divisor() = units per second. - long nanos = raw * (1_000_000_000L / unit.divisor()); - return LocalTime.ofNanoOfDay(nanos); - } - - private static TimeUnit readUnit(DType.Extension ext) { - ByteBuffer meta = ext.metadata(); - if (meta == null || !meta.hasRemaining()) { - throw new VortexException("missing TimeUnit metadata byte for " + ext.extensionId()); - } - return TimeUnit.fromTag(meta.get(meta.position())); - } - - /// Decodes a {@code vortex.timestamp} cell to an {@link Instant}, ignoring - /// any timezone the column metadata carries. Use {@link #zonedDateTime} - /// when the timezone matters. - /// - /// Storage is an I64 count of the metadata-recorded {@link TimeUnit} since - /// the Unix epoch. {@link TimeUnit#Days} is invalid for timestamps. - /// - /// @param ext declared extension dtype; must be {@code vortex.timestamp} - /// @param storage signed-integer storage array - /// @param i row index, {@code 0 <= i < storage.length()} - /// @return decoded instant - /// @throws VortexException if {@code ext} isn't {@code vortex.timestamp}, - /// the metadata unit is Days, or storage isn't an integer primitive - public static Instant instant(DType.Extension ext, Array storage, long i) { - if (!TIMESTAMP.equals(ext.extensionId())) { - throw new VortexException("instant called with non-timestamp extension: " + ext.extensionId()); - } - checkBounds(i, storage.length()); - TimeUnit unit = readUnit(ext); - if (unit == TimeUnit.Days) { - throw new VortexException("instant: Days unit not valid for vortex.timestamp"); - } - return instantFromRaw(epochDay(storage, i), unit); - } - - /// Decodes a {@code vortex.timestamp} cell to a {@link ZonedDateTime} - /// using the timezone carried in the extension metadata. Falls back to - /// {@link ZoneOffset#UTC} when the metadata has no timezone string. - /// - /// @param ext declared extension dtype; must be {@code vortex.timestamp} - /// @param storage signed-integer storage array - /// @param i row index, {@code 0 <= i < storage.length()} - /// @return decoded zoned date-time - /// @throws VortexException if {@code ext} isn't {@code vortex.timestamp}, - /// the metadata unit is Days, or storage isn't an integer primitive - public static ZonedDateTime zonedDateTime(DType.Extension ext, Array storage, long i) { - Instant instant = instant(ext, storage, i); - return instant.atZone(timezone(ext).orElse(ZoneOffset.UTC)); - } - - /// Returns the optional IANA timezone string carried in a - /// {@code vortex.timestamp} extension's metadata. - /// - /// @param ext declared extension dtype - /// @return zone id parsed from {@code ext.metadata()} bytes 3..3+tz_len, - /// or empty when {@code tz_len == 0} - /// @throws VortexException if the metadata is truncated mid-string - public static Optional timezone(DType.Extension ext) { - ByteBuffer meta = ext.metadata(); - if (meta == null || meta.remaining() < 3) { - return Optional.empty(); - } - ByteBuffer le = meta.duplicate().order(ByteOrder.LITTLE_ENDIAN); - int basePos = le.position(); - int tzLen = Short.toUnsignedInt(le.getShort(basePos + 1)); - if (tzLen == 0) { - return Optional.empty(); - } - if (le.remaining() < 3 + tzLen) { - throw new VortexException("timestamp metadata truncated: declared tz_len=" - + tzLen + " but only " + (le.remaining() - 3) + " bytes available"); - } - byte[] tzBytes = new byte[tzLen]; - for (int k = 0; k < tzLen; k++) { - tzBytes[k] = le.get(basePos + 3 + k); - } - return Optional.of(ZoneId.of(new String(tzBytes, StandardCharsets.UTF_8))); - } - - private static Instant instantFromRaw(long raw, TimeUnit unit) { - return switch (unit) { - case Seconds -> Instant.ofEpochSecond(raw); - case Milliseconds -> Instant.ofEpochMilli(raw); - case Microseconds -> { - // floorDiv/floorMod handle negative timestamps (pre-1970) symmetrically; - // plain / and % round towards zero and break the seconds boundary. - long secs = Math.floorDiv(raw, 1_000_000L); - long nanos = Math.floorMod(raw, 1_000_000L) * 1_000L; - yield Instant.ofEpochSecond(secs, nanos); - } - case Nanoseconds -> { - long secs = Math.floorDiv(raw, 1_000_000_000L); - long nanos = Math.floorMod(raw, 1_000_000_000L); - yield Instant.ofEpochSecond(secs, nanos); - } - case Days -> throw new VortexException("Days unit not valid for vortex.timestamp"); - }; - } - - /// Same as {@link #uuid(Array, long)} but verifies the declared extension id. - /// Use after {@code vortex.ext}'s decoder has unwrapped the storage and the - /// Array no longer carries the Extension dtype. - /// - /// @param ext the column's declared extension dtype; must be {@code vortex.uuid} - /// @param storage UUID storage array - /// @param i row index, {@code 0 <= i < storage.length()} - /// @return decoded {@link java.util.UUID} - /// @throws VortexException if {@code ext} isn't {@code vortex.uuid} or storage shape doesn't match - public static java.util.UUID uuid(DType.Extension ext, Array storage, long i) { - if (!UUID_ID.equals(ext.extensionId())) { - throw new VortexException("uuid called with non-uuid extension: " + ext.extensionId()); - } - return uuid(storage, i); - } - - private static void checkBounds(long i, long length) { - if (i < 0 || i >= length) { - throw new IndexOutOfBoundsException("index " + i + " out of bounds for length " + length); - } - } - - private static long epochDay(Array array, long i) { - return switch (array) { - case ByteArray a -> a.getByte(i); - case ShortArray a -> a.getShort(i); - case IntArray a -> a.getInt(i); - case LongArray a -> a.getLong(i); - case MaskedArray a -> epochDay(a.inner(), i); - default -> throw new VortexException( - "localDate: unsupported storage type " + array.getClass().getSimpleName()); - }; - } -} diff --git a/core/src/test/java/io/github/dfa1/vortex/core/ExtensionTest.java b/core/src/test/java/io/github/dfa1/vortex/core/ExtensionTest.java new file mode 100644 index 00000000..ad684466 --- /dev/null +++ b/core/src/test/java/io/github/dfa1/vortex/core/ExtensionTest.java @@ -0,0 +1,285 @@ +package io.github.dfa1.vortex.core; + +import io.github.dfa1.vortex.core.array.ByteArray; +import io.github.dfa1.vortex.core.array.FixedSizeListArray; +import io.github.dfa1.vortex.core.array.IntArray; +import io.github.dfa1.vortex.core.array.LongArray; +import org.junit.jupiter.api.Test; + +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalTime; +import java.time.ZoneId; +import java.time.ZoneOffset; +import java.time.ZonedDateTime; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +class ExtensionTest { + + private static final DType.Primitive I32 = new DType.Primitive(PType.I32, false); + private static final DType.Primitive I64 = new DType.Primitive(PType.I64, false); + private static final DType.Primitive U8 = new DType.Primitive(PType.U8, false); + + @Test + void of_recognisedIds_returnSingletons() { + // Given / When / Then — known ids resolve to the cached singletons so + // identity comparison and pattern-match cases work without per-call alloc + assertThat(Extension.of("vortex.date")).isSameAs(Extension.DATE); + assertThat(Extension.of("vortex.time")).isSameAs(Extension.TIME); + assertThat(Extension.of("vortex.timestamp")).isSameAs(Extension.TIMESTAMP); + assertThat(Extension.of("vortex.uuid")).isSameAs(Extension.UUID); + } + + @Test + void of_unknownId_returnsCustomWithRawString() { + // Given — open-world fallback; the id must round-trip verbatim so callers + // can still apply their own decoding for non-spec extensions + Extension sut = Extension.of("acme.geopoint"); + + // Then + assertThat(sut).isInstanceOf(Extension.Custom.class); + assertThat(sut.id()).isEqualTo("acme.geopoint"); + } + + @Test + void kind_onDTypeExtension_dispatchesViaPatternMatch() { + // Given — practical sealed-switch usage that motivates the redesign + DType.Extension date = ext("vortex.date", I32, null); + DType.Extension custom = ext("acme.thing", I32, null); + + // When / Then + assertThat(classify(date)).isEqualTo("date"); + assertThat(classify(custom)).isEqualTo("custom:acme.thing"); + } + + private static String classify(DType.Extension ext) { + return switch (ext.kind()) { + case Extension.Date d -> "date"; + case Extension.Time t -> "time"; + case Extension.Timestamp ts -> "timestamp"; + case Extension.Uuid u -> "uuid"; + case Extension.Custom c -> "custom:" + c.id(); + }; + } + + @Test + void date_decodes_tpchSample() { + // Given — anchor against known TPC-H value 9538 = 1996-02-12 + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(4); + buf.set(ValueLayout.JAVA_INT_UNALIGNED, 0, 9538); + IntArray storage = new IntArray(I32, 1, buf); + + // When / Then + assertThat(Extension.DATE.decode(storage, 0)).isEqualTo(LocalDate.of(1996, 2, 12)); + } + } + + @Test + void date_negativeDays_returnsPreEpoch() { + // Given — defensive: signed storage, pre-1970 must work + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(4); + buf.set(ValueLayout.JAVA_INT_UNALIGNED, 0, -1); + IntArray storage = new IntArray(I32, 1, buf); + + // When / Then + assertThat(Extension.DATE.decode(storage, 0)).isEqualTo(LocalDate.of(1969, 12, 31)); + } + } + + @Test + void time_eachUnit_decodesCorrectly() { + // Given — round-trip a known time-of-day through every TimeUnit + try (Arena arena = Arena.ofConfined()) { + // Seconds: 3661 s = 01:01:01 + assertThat(Extension.TIME.decode(ext("vortex.time", I32, unitByte((byte) 3)), + i32(arena, 3661), 0)) + .isEqualTo(LocalTime.of(1, 1, 1)); + // Milliseconds: 3_661_500 = 01:01:01.500 + assertThat(Extension.TIME.decode(ext("vortex.time", I32, unitByte((byte) 2)), + i32(arena, 3_661_500), 0)) + .isEqualTo(LocalTime.of(1, 1, 1, 500_000_000)); + // Microseconds: 1_000_001 = 00:00:01.000001 + assertThat(Extension.TIME.decode(ext("vortex.time", I64, unitByte((byte) 1)), + i64(arena, 1_000_001L), 0)) + .isEqualTo(LocalTime.of(0, 0, 1, 1_000)); + // Nanoseconds: 42 ns past midnight + assertThat(Extension.TIME.decode(ext("vortex.time", I64, unitByte((byte) 0)), + i64(arena, 42L), 0)) + .isEqualTo(LocalTime.ofNanoOfDay(42)); + } + } + + @Test + void time_daysUnit_throws() { + // Given — Days isn't a sub-second unit + try (Arena arena = Arena.ofConfined()) { + DType.Extension ext = ext("vortex.time", I32, unitByte((byte) 4)); + + // When / Then + assertThatThrownBy(() -> Extension.TIME.decode(ext, i32(arena, 0), 0)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("Days unit not valid"); + } + } + + @Test + void timestamp_instant_microsecondsPath_handlesNegativeRaw() { + // Given — pre-epoch micros exercise the floorDiv / floorMod path + long micros = -1_500_001L; // -1.500001s + try (Arena arena = Arena.ofConfined()) { + DType.Extension ext = ext("vortex.timestamp", I64, tzMeta((byte) 1, null)); + + // When + Instant got = Extension.TIMESTAMP.instant(ext, i64(arena, micros), 0); + + // Then + assertThat(got.getEpochSecond()).isEqualTo(-2L); + assertThat(got.getNano()).isEqualTo(499_999_000); + } + } + + @Test + void timestamp_zonedDateTime_withTimezone_appliesIt() { + // Given — ms since epoch + Europe/Paris tz in metadata + try (Arena arena = Arena.ofConfined()) { + DType.Extension ext = ext("vortex.timestamp", I64, tzMeta((byte) 2, "Europe/Paris")); + + // When + ZonedDateTime got = Extension.TIMESTAMP.zonedDateTime(ext, i64(arena, 1_000L), 0); + + // Then + assertThat(got.getZone()).isEqualTo(ZoneId.of("Europe/Paris")); + assertThat(got.toInstant()).isEqualTo(Instant.ofEpochMilli(1_000L)); + } + } + + @Test + void timestamp_zonedDateTime_noTimezone_defaultsToUtc() { + // Given — tz_len = 0 should fall back to UTC for unambiguity + try (Arena arena = Arena.ofConfined()) { + DType.Extension ext = ext("vortex.timestamp", I64, tzMeta((byte) 2, null)); + + // When + ZonedDateTime got = Extension.TIMESTAMP.zonedDateTime(ext, i64(arena, 0L), 0); + + // Then + assertThat(got.getZone()).isEqualTo(ZoneOffset.UTC); + } + } + + @Test + void timestamp_timezone_truncatedMetadata_throws() { + // Given — declared tz_len longer than buffer can carry + ByteBuffer meta = ByteBuffer.allocate(6).order(ByteOrder.LITTLE_ENDIAN); + meta.put(0, (byte) 2); + meta.putShort(1, (short) 5); + meta.put(3, (byte) 'U'); + meta.put(4, (byte) 'T'); + meta.put(5, (byte) 'C'); + DType.Extension truncated = ext("vortex.timestamp", I64, meta); + + // When / Then + assertThatThrownBy(() -> Extension.TIMESTAMP.timezone(truncated)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("truncated"); + } + + @Test + void uuid_roundTripsKnownValue() { + // Given — RFC 9562 example + java.util.UUID expected = java.util.UUID.fromString("123e4567-e89b-12d3-a456-426614174000"); + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(16); + long msb = expected.getMostSignificantBits(); + long lsb = expected.getLeastSignificantBits(); + for (int k = 0; k < 8; k++) { + buf.set(ValueLayout.JAVA_BYTE, k, (byte) ((msb >> (56 - 8 * k)) & 0xff)); + buf.set(ValueLayout.JAVA_BYTE, 8 + k, (byte) ((lsb >> (56 - 8 * k)) & 0xff)); + } + ByteArray inner = new ByteArray(U8, 16, buf); + FixedSizeListArray storage = new FixedSizeListArray( + new DType.FixedSizeList(U8, 16, false), 1, inner); + + // When / Then + assertThat(Extension.UUID.decode(storage, 0)).isEqualTo(expected); + } + } + + @Test + void uuid_allOnes_decodesWithoutSignExtension() { + // Given — 0xff in every byte trips sign-extension bugs in the mask + try (Arena arena = Arena.ofConfined()) { + MemorySegment buf = arena.allocate(16); + for (int k = 0; k < 16; k++) { + buf.set(ValueLayout.JAVA_BYTE, k, (byte) 0xff); + } + ByteArray inner = new ByteArray(U8, 16, buf); + FixedSizeListArray storage = new FixedSizeListArray( + new DType.FixedSizeList(U8, 16, false), 1, inner); + + // When / Then + assertThat(Extension.UUID.decode(storage, 0)) + .isEqualTo(new java.util.UUID(-1L, -1L)); + } + } + + @Test + void uuid_wrongFixedSize_throws() { + // Given — 8 != 16; reject up front + try (Arena arena = Arena.ofConfined()) { + ByteArray inner = new ByteArray(U8, 8, arena.allocate(8)); + FixedSizeListArray storage = new FixedSizeListArray( + new DType.FixedSizeList(U8, 8, false), 1, inner); + + // When / Then + assertThatThrownBy(() -> Extension.UUID.decode(storage, 0)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("fixedSize 16"); + } + } + + // ── helpers ────────────────────────────────────────────────────────── + + private static DType.Extension ext(String id, DType storage, ByteBuffer meta) { + return new DType.Extension(id, storage, meta, false); + } + + private static ByteBuffer unitByte(byte tag) { + ByteBuffer meta = ByteBuffer.allocate(1); + meta.put(0, tag); + return meta; + } + + private static ByteBuffer tzMeta(byte unitTag, String tz) { + byte[] tzBytes = tz == null ? new byte[0] : tz.getBytes(StandardCharsets.UTF_8); + ByteBuffer meta = ByteBuffer.allocate(3 + tzBytes.length).order(ByteOrder.LITTLE_ENDIAN); + meta.put(0, unitTag); + meta.putShort(1, (short) tzBytes.length); + for (int k = 0; k < tzBytes.length; k++) { + meta.put(3 + k, tzBytes[k]); + } + return meta; + } + + private static IntArray i32(Arena arena, int value) { + MemorySegment buf = arena.allocate(4); + buf.set(ValueLayout.JAVA_INT_UNALIGNED, 0, value); + return new IntArray(I32, 1, buf); + } + + private static LongArray i64(Arena arena, long value) { + MemorySegment buf = arena.allocate(8); + buf.set(ValueLayout.JAVA_LONG_UNALIGNED, 0, value); + return new LongArray(I64, 1, buf); + } +} diff --git a/core/src/test/java/io/github/dfa1/vortex/core/array/ExtensionsTest.java b/core/src/test/java/io/github/dfa1/vortex/core/array/ExtensionsTest.java deleted file mode 100644 index 02556faa..00000000 --- a/core/src/test/java/io/github/dfa1/vortex/core/array/ExtensionsTest.java +++ /dev/null @@ -1,484 +0,0 @@ -package io.github.dfa1.vortex.core.array; - -import io.github.dfa1.vortex.core.DType; -import io.github.dfa1.vortex.core.PType; -import io.github.dfa1.vortex.core.VortexException; -import org.junit.jupiter.api.Test; - -import java.lang.foreign.Arena; -import java.lang.foreign.MemorySegment; -import java.lang.foreign.ValueLayout; -import java.time.LocalDate; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; - -class ExtensionsTest { - - private static final DType.Primitive I32 = new DType.Primitive(PType.I32, false); - private static final DType DATE_DTYPE = new DType.Extension(Extensions.DATE, I32, null, false); - - @Test - void localDate_zeroIsUnixEpoch() { - // Given — Arrow-compatible: 0 == 1970-01-01 - try (Arena arena = Arena.ofConfined()) { - MemorySegment buf = arena.allocate(4); - buf.set(ValueLayout.JAVA_INT_UNALIGNED, 0, 0); - IntArray sut = new IntArray(DATE_DTYPE, 1, buf); - - // When / Then - assertThat(Extensions.localDate(sut, 0)).isEqualTo(LocalDate.of(1970, 1, 1)); - } - } - - @Test - void localDate_tpchSampleValue_matchesExpected() { - // Given — anchor against a known TPC-H value: 9538 = 1996-02-12. - // Catches accidental epoch-shift regressions (e.g. days-since-2000). - try (Arena arena = Arena.ofConfined()) { - MemorySegment buf = arena.allocate(4); - buf.set(ValueLayout.JAVA_INT_UNALIGNED, 0, 9538); - IntArray sut = new IntArray(DATE_DTYPE, 1, buf); - - // When / Then - assertThat(Extensions.localDate(sut, 0)).isEqualTo(LocalDate.of(1996, 2, 12)); - } - } - - @Test - void localDate_negativeDays_returnsPreEpoch() { - // Given — defensive: integer storage is signed, so pre-1970 dates must work - try (Arena arena = Arena.ofConfined()) { - MemorySegment buf = arena.allocate(4); - buf.set(ValueLayout.JAVA_INT_UNALIGNED, 0, -1); - IntArray sut = new IntArray(DATE_DTYPE, 1, buf); - - // When / Then - assertThat(Extensions.localDate(sut, 0)).isEqualTo(LocalDate.of(1969, 12, 31)); - } - } - - @Test - void localDate_nonDateDtype_throws() { - // Given — guards against silent misinterpretation (e.g. plain I32 as days) - try (Arena arena = Arena.ofConfined()) { - IntArray sut = new IntArray(I32, 1, arena.allocate(4)); - - // When / Then - assertThatThrownBy(() -> Extensions.localDate(sut, 0)) - .isInstanceOf(VortexException.class) - .hasMessageContaining("non-date"); - } - } - - @Test - void localDate_withExplicitExtAndStorage_decodes() { - // Given — ExtEncoding.decode strips the extension wrapper before the - // TUI gets the array, so the caller threads the declared dtype back - // in. This overload must still verify the extension id rather than - // trust any caller-supplied storage as a date. - try (Arena arena = Arena.ofConfined()) { - MemorySegment buf = arena.allocate(4); - buf.set(ValueLayout.JAVA_INT_UNALIGNED, 0, 9538); - IntArray storage = new IntArray(I32, 1, buf); - DType.Extension ext = new DType.Extension(Extensions.DATE, I32, null, false); - - // When / Then - assertThat(Extensions.localDate(ext, storage, 0)) - .isEqualTo(LocalDate.of(1996, 2, 12)); - } - } - - @Test - void localDate_withWrongExtensionId_throws() { - // Given — passing some other extension's storage array must not be - // silently interpreted as a date - try (Arena arena = Arena.ofConfined()) { - IntArray storage = new IntArray(I32, 1, arena.allocate(4)); - DType.Extension notDate = new DType.Extension("vortex.something", I32, null, false); - - // When / Then - assertThatThrownBy(() -> Extensions.localDate(notDate, storage, 0)) - .isInstanceOf(VortexException.class) - .hasMessageContaining("non-date extension"); - } - } - - @Test - void localTime_secondsUnit_decodesViaI32() { - // Given — TimeUnit tag 3 (Seconds), storage I32: 3661 seconds = 01:01:01 - try (Arena arena = Arena.ofConfined()) { - MemorySegment buf = arena.allocate(4); - buf.set(ValueLayout.JAVA_INT_UNALIGNED, 0, 3661); - IntArray storage = new IntArray(I32, 1, buf); - - // When / Then - assertThat(Extensions.localTime(timeExt((byte) 3), storage, 0)) - .isEqualTo(java.time.LocalTime.of(1, 1, 1)); - } - } - - @Test - void localTime_millisecondsUnit_decodesViaI32() { - // Given — TimeUnit tag 2 (Milliseconds): 3_661_500 ms = 01:01:01.500 - try (Arena arena = Arena.ofConfined()) { - MemorySegment buf = arena.allocate(4); - buf.set(ValueLayout.JAVA_INT_UNALIGNED, 0, 3_661_500); - IntArray storage = new IntArray(I32, 1, buf); - - // When / Then - assertThat(Extensions.localTime(timeExt((byte) 2), storage, 0)) - .isEqualTo(java.time.LocalTime.of(1, 1, 1, 500_000_000)); - } - } - - @Test - void localTime_microsecondsUnit_decodesViaI64() { - // Given — TimeUnit tag 1 (Microseconds): 1 second + 1 microsecond - try (Arena arena = Arena.ofConfined()) { - MemorySegment buf = arena.allocate(8); - buf.set(ValueLayout.JAVA_LONG_UNALIGNED, 0, 1_000_001L); - LongArray storage = new LongArray(new DType.Primitive(PType.I64, false), 1, buf); - - // When / Then - assertThat(Extensions.localTime(timeExt((byte) 1), storage, 0)) - .isEqualTo(java.time.LocalTime.of(0, 0, 1, 1_000)); - } - } - - @Test - void localTime_nanosecondsUnit_decodesViaI64() { - // Given — TimeUnit tag 0 (Nanoseconds): 42 nanos past midnight - try (Arena arena = Arena.ofConfined()) { - MemorySegment buf = arena.allocate(8); - buf.set(ValueLayout.JAVA_LONG_UNALIGNED, 0, 42L); - LongArray storage = new LongArray(new DType.Primitive(PType.I64, false), 1, buf); - - // When / Then - assertThat(Extensions.localTime(timeExt((byte) 0), storage, 0)) - .isEqualTo(java.time.LocalTime.ofNanoOfDay(42)); - } - } - - @Test - void localTime_daysUnit_throws() { - // Given — Days isn't a sub-second unit, so vortex.time with Days is malformed - try (Arena arena = Arena.ofConfined()) { - IntArray storage = new IntArray(I32, 1, arena.allocate(4)); - - // When / Then - assertThatThrownBy(() -> Extensions.localTime(timeExt((byte) 4), storage, 0)) - .isInstanceOf(VortexException.class) - .hasMessageContaining("Days unit not valid"); - } - } - - @Test - void localTime_wrongExtensionId_throws() { - // Given — guards against calling with a non-time extension - try (Arena arena = Arena.ofConfined()) { - IntArray storage = new IntArray(I32, 1, arena.allocate(4)); - DType.Extension wrongExt = new DType.Extension("vortex.date", I32, null, false); - - // When / Then - assertThatThrownBy(() -> Extensions.localTime(wrongExt, storage, 0)) - .isInstanceOf(VortexException.class) - .hasMessageContaining("non-time extension"); - } - } - - @Test - void localTime_missingMetadata_throws() { - // Given — metadata byte must specify TimeUnit; otherwise we can't know - // whether the storage is in seconds, ms, μs, or ns - try (Arena arena = Arena.ofConfined()) { - IntArray storage = new IntArray(I32, 1, arena.allocate(4)); - DType.Extension noMetaExt = new DType.Extension(Extensions.TIME, I32, null, false); - - // When / Then - assertThatThrownBy(() -> Extensions.localTime(noMetaExt, storage, 0)) - .isInstanceOf(VortexException.class) - .hasMessageContaining("missing TimeUnit metadata"); - } - } - - private static DType.Extension timeExt(byte tag) { - java.nio.ByteBuffer meta = java.nio.ByteBuffer.allocate(1); - meta.put(0, tag); - return new DType.Extension(Extensions.TIME, I32, meta, false); - } - - @Test - void instant_secondsUnit_decodesEpoch() { - // Given — TimeUnit tag 3 (Seconds), no tz; raw 0 = Unix epoch - try (Arena arena = Arena.ofConfined()) { - MemorySegment buf = arena.allocate(8); - buf.set(ValueLayout.JAVA_LONG_UNALIGNED, 0, 0L); - LongArray storage = new LongArray(new DType.Primitive(PType.I64, false), 1, buf); - - // When / Then - assertThat(Extensions.instant(timestampExt((byte) 3, null), storage, 0)) - .isEqualTo(java.time.Instant.EPOCH); - } - } - - @Test - void instant_microsecondsUnit_handlesNegativeRaw() { - // Given — pre-epoch micros: 1996-02-12T00:00:00Z is 824083200_000_000 micros - // Negate to flip into pre-epoch so the floorDiv path is actually exercised - // (plain / would round the 2-micro remainder towards zero and skew seconds) - long micros = -1_500_001L; // -1.500001s; expected: epochSecond = -2, nanos = 499_999_000 - try (Arena arena = Arena.ofConfined()) { - MemorySegment buf = arena.allocate(8); - buf.set(ValueLayout.JAVA_LONG_UNALIGNED, 0, micros); - LongArray storage = new LongArray(new DType.Primitive(PType.I64, false), 1, buf); - - // When - java.time.Instant got = Extensions.instant(timestampExt((byte) 1, null), storage, 0); - - // Then - assertThat(got.getEpochSecond()).isEqualTo(-2L); - assertThat(got.getNano()).isEqualTo(499_999_000); - } - } - - @Test - void instant_nanosecondsUnit_decodesFullPrecision() { - // Given — TimeUnit tag 0 (Nanoseconds): 1_000_000_001 ns = 1.000_000_001 s - try (Arena arena = Arena.ofConfined()) { - MemorySegment buf = arena.allocate(8); - buf.set(ValueLayout.JAVA_LONG_UNALIGNED, 0, 1_000_000_001L); - LongArray storage = new LongArray(new DType.Primitive(PType.I64, false), 1, buf); - - // When / Then - assertThat(Extensions.instant(timestampExt((byte) 0, null), storage, 0)) - .isEqualTo(java.time.Instant.ofEpochSecond(1, 1)); - } - } - - @Test - void instant_daysUnit_throws() { - // Given — Days isn't valid for timestamps - try (Arena arena = Arena.ofConfined()) { - LongArray storage = new LongArray(new DType.Primitive(PType.I64, false), 1, arena.allocate(8)); - - // When / Then - assertThatThrownBy(() -> Extensions.instant(timestampExt((byte) 4, null), storage, 0)) - .isInstanceOf(VortexException.class) - .hasMessageContaining("Days unit not valid"); - } - } - - @Test - void zonedDateTime_withTimezone_appliesIt() { - // Given — milliseconds since epoch + a Europe/Paris tz string in metadata - long ms = 1_000L; // 1 second past epoch - try (Arena arena = Arena.ofConfined()) { - MemorySegment buf = arena.allocate(8); - buf.set(ValueLayout.JAVA_LONG_UNALIGNED, 0, ms); - LongArray storage = new LongArray(new DType.Primitive(PType.I64, false), 1, buf); - - // When - java.time.ZonedDateTime got = Extensions.zonedDateTime( - timestampExt((byte) 2, "Europe/Paris"), storage, 0); - - // Then - assertThat(got.getZone()).isEqualTo(java.time.ZoneId.of("Europe/Paris")); - assertThat(got.toInstant()).isEqualTo(java.time.Instant.ofEpochMilli(ms)); - } - } - - @Test - void zonedDateTime_noTimezone_defaultsToUtc() { - // Given — tz_len = 0 in metadata means caller didn't record a zone; default UTC - // is unambiguous and matches the Arrow convention - try (Arena arena = Arena.ofConfined()) { - MemorySegment buf = arena.allocate(8); - buf.set(ValueLayout.JAVA_LONG_UNALIGNED, 0, 0L); - LongArray storage = new LongArray(new DType.Primitive(PType.I64, false), 1, buf); - - // When - java.time.ZonedDateTime got = Extensions.zonedDateTime( - timestampExt((byte) 2, null), storage, 0); - - // Then - assertThat(got.getZone()).isEqualTo(java.time.ZoneOffset.UTC); - } - } - - @Test - void timezone_truncatedMetadata_throws() { - // Given — metadata claims tz_len=5 but provides only 3 bytes of payload - java.nio.ByteBuffer meta = java.nio.ByteBuffer.allocate(6).order(java.nio.ByteOrder.LITTLE_ENDIAN); - meta.put(0, (byte) 2); // ms - meta.putShort(1, (short) 5); // tz_len=5 - meta.put(3, (byte) 'U'); - meta.put(4, (byte) 'T'); - meta.put(5, (byte) 'C'); // only 3 of the 5 declared bytes present - DType.Extension ext = new DType.Extension(Extensions.TIMESTAMP, - new DType.Primitive(PType.I64, false), meta, false); - - // When / Then - assertThatThrownBy(() -> Extensions.timezone(ext)) - .isInstanceOf(VortexException.class) - .hasMessageContaining("truncated"); - } - - private static DType.Extension timestampExt(byte tag, String tz) { - byte[] tzBytes = tz == null ? new byte[0] : tz.getBytes(java.nio.charset.StandardCharsets.UTF_8); - java.nio.ByteBuffer meta = java.nio.ByteBuffer.allocate(3 + tzBytes.length) - .order(java.nio.ByteOrder.LITTLE_ENDIAN); - meta.put(0, tag); - meta.putShort(1, (short) tzBytes.length); - for (int k = 0; k < tzBytes.length; k++) { - meta.put(3 + k, tzBytes[k]); - } - return new DType.Extension(Extensions.TIMESTAMP, - new DType.Primitive(PType.I64, false), meta, false); - } - - @Test - void uuid_roundTripsKnownValue() { - // Given — Arrow canonical layout: FixedSizeList[16]; one well-known UUID - // (RFC 9562 example) plus its inverse, so msb/lsb extraction is exercised in - // both halves rather than only the high bytes. - java.util.UUID expected = java.util.UUID.fromString("123e4567-e89b-12d3-a456-426614174000"); - try (Arena arena = Arena.ofConfined()) { - MemorySegment buf = arena.allocate(16); - long msb = expected.getMostSignificantBits(); - long lsb = expected.getLeastSignificantBits(); - for (int k = 0; k < 8; k++) { - buf.set(ValueLayout.JAVA_BYTE, k, (byte) ((msb >> (56 - 8 * k)) & 0xff)); - buf.set(ValueLayout.JAVA_BYTE, 8 + k, (byte) ((lsb >> (56 - 8 * k)) & 0xff)); - } - ByteArray inner = new ByteArray(new DType.Primitive(PType.U8, false), 16, buf); - DType.FixedSizeList fslDtype = new DType.FixedSizeList( - new DType.Primitive(PType.U8, false), 16, false); - FixedSizeListArray sut = new FixedSizeListArray(fslDtype, 1, inner); - - // When / Then - assertThat(Extensions.uuid(sut, 0)).isEqualTo(expected); - } - } - - @Test - void uuid_zeroBytes_decodesToZeroUuid() { - // Given — defensive: all-zero UUID is the most common "null UUID" sentinel - // and a regression test for sign extension on getByte - try (Arena arena = Arena.ofConfined()) { - MemorySegment buf = arena.allocate(16); - ByteArray inner = new ByteArray(new DType.Primitive(PType.U8, false), 16, buf); - DType.FixedSizeList fslDtype = new DType.FixedSizeList( - new DType.Primitive(PType.U8, false), 16, false); - FixedSizeListArray sut = new FixedSizeListArray(fslDtype, 1, inner); - - // When / Then - assertThat(Extensions.uuid(sut, 0)) - .isEqualTo(new java.util.UUID(0L, 0L)); - } - } - - @Test - void uuid_allOnesBytes_decodesWithoutSignExtension() { - // Given — 0xff in every position; if getByte returned a sign-extended int - // and we forgot the & 0xffL mask, msb/lsb would land as 0xff..fff..ff with - // sign bits poisoning the upper longs. Use the highest-bit pattern as the - // sign-extension trap. - try (Arena arena = Arena.ofConfined()) { - MemorySegment buf = arena.allocate(16); - for (int k = 0; k < 16; k++) { - buf.set(ValueLayout.JAVA_BYTE, k, (byte) 0xff); - } - ByteArray inner = new ByteArray(new DType.Primitive(PType.U8, false), 16, buf); - DType.FixedSizeList fslDtype = new DType.FixedSizeList( - new DType.Primitive(PType.U8, false), 16, false); - FixedSizeListArray sut = new FixedSizeListArray(fslDtype, 1, inner); - - // When / Then - assertThat(Extensions.uuid(sut, 0)) - .isEqualTo(new java.util.UUID(-1L, -1L)); - } - } - - @Test - void uuid_wrongFixedSize_throws() { - // Given — 8-byte FixedSizeList isn't a UUID; catch the mismatch up front - try (Arena arena = Arena.ofConfined()) { - ByteArray inner = new ByteArray(new DType.Primitive(PType.U8, false), 8, arena.allocate(8)); - DType.FixedSizeList wrongSize = new DType.FixedSizeList( - new DType.Primitive(PType.U8, false), 8, false); - FixedSizeListArray sut = new FixedSizeListArray(wrongSize, 1, inner); - - // When / Then - assertThatThrownBy(() -> Extensions.uuid(sut, 0)) - .isInstanceOf(VortexException.class) - .hasMessageContaining("fixedSize 16"); - } - } - - @Test - void uuid_wrongStorageType_throws() { - // Given — a plain IntArray isn't FixedSizeList; guard against callers - // passing the wrong column by mistake - try (Arena arena = Arena.ofConfined()) { - IntArray notFsl = new IntArray(I32, 1, arena.allocate(4)); - - // When / Then - assertThatThrownBy(() -> Extensions.uuid(notFsl, 0)) - .isInstanceOf(VortexException.class) - .hasMessageContaining("FixedSizeListArray"); - } - } - - @Test - void uuid_explicitExtensionOverload_verifiesId() { - // Given — passing a non-uuid extension dtype must not be silently - // reinterpreted as a uuid storage column - try (Arena arena = Arena.ofConfined()) { - ByteArray inner = new ByteArray(new DType.Primitive(PType.U8, false), 16, arena.allocate(16)); - DType.FixedSizeList fslDtype = new DType.FixedSizeList( - new DType.Primitive(PType.U8, false), 16, false); - FixedSizeListArray storage = new FixedSizeListArray(fslDtype, 1, inner); - DType.Extension wrongExt = new DType.Extension("vortex.something", fslDtype, null, false); - - // When / Then - assertThatThrownBy(() -> Extensions.uuid(wrongExt, storage, 0)) - .isInstanceOf(VortexException.class) - .hasMessageContaining("non-uuid extension"); - } - } - - @Test - void localDate_indexOutOfBounds_throws() { - // Given — both overloads must reject indices past the array length - // rather than silently reading whatever the storage decoder returns - try (Arena arena = Arena.ofConfined()) { - MemorySegment buf = arena.allocate(4); - IntArray storage = new IntArray(I32, 1, buf); - DType.Extension ext = new DType.Extension(Extensions.DATE, I32, null, false); - IntArray dated = new IntArray(DATE_DTYPE, 1, buf); - - // When / Then - assertThatThrownBy(() -> Extensions.localDate(dated, 1)) - .isInstanceOf(IndexOutOfBoundsException.class); - assertThatThrownBy(() -> Extensions.localDate(dated, -1)) - .isInstanceOf(IndexOutOfBoundsException.class); - assertThatThrownBy(() -> Extensions.localDate(ext, storage, 1)) - .isInstanceOf(IndexOutOfBoundsException.class); - } - } - - @Test - void localDate_unsupportedStorage_throws() { - // Given — a date dtype on top of a varbin array makes no semantic sense - try (Arena arena = Arena.ofConfined()) { - VarBinArray badStorage = new VarBinArray(DATE_DTYPE, 1, - arena.allocate(0), arena.allocate(8), PType.I32); - - // When / Then - assertThatThrownBy(() -> Extensions.localDate(badStorage, 0)) - .isInstanceOf(VortexException.class) - .hasMessageContaining("unsupported storage"); - } - } -} diff --git a/docs/compatibility.md b/docs/compatibility.md index b2517101..aad65434 100644 --- a/docs/compatibility.md +++ b/docs/compatibility.md @@ -63,12 +63,26 @@ metadata. The Rust catalogue lives in [`vortex-array/src/extension/`](https://github.com/vortex-data/vortex/tree/develop/vortex-array/src/extension); each subdir below names a canonical extension id and its on-disk shape. -| Extension id | Storage | Metadata | Java decoder | Status | -|---------------------|-------------------------------------------------|---------------------------------------------|------------------------------------------------------------------------------|--------| -| `vortex.date` | I32 (days) or I64 (ms) since Unix epoch | 1 byte: `TimeUnit` (2 = ms, 4 = days) | `Extensions.localDate(Array, long)` | ✅ | -| `vortex.time` | I32 (s/ms) or I64 (μs/ns) since midnight | 1 byte: `TimeUnit` | `Extensions.localTime(DType.Extension, Array, long)` | ✅ | -| `vortex.timestamp` | I64 with `TimeUnit` (s/ms/μs/ns) + optional tz | 1 byte unit + u16 LE tz_len + UTF-8 tz bytes | `Extensions.instant(...)` / `Extensions.zonedDateTime(...)` / `timezone(...)` | ✅ | -| `vortex.uuid` | `FixedSizeList(Primitive(U8), 16)` | 1 byte UUID version (optional, 0xff = unset) | `Extensions.uuid(Array, long)` | ✅ | +Extensions are exposed as a sealed `Extension` hierarchy. Each record carries +its own typed decode methods; pattern-match on `ext.kind()` to dispatch: + +```java +switch (ext.kind()) { + case Extension.Date d -> d.decode(storage, i); // LocalDate + case Extension.Time t -> t.decode(ext, storage, i); // LocalTime + case Extension.Timestamp ts -> ts.instant(ext, storage, i); // Instant + case Extension.Uuid u -> u.decode(storage, i); // UUID + case Extension.Custom c -> ... // any other id, raw String available +} +``` + +| Extension id | Record | Storage | Metadata | Status | +|---------------------|----------------------|-------------------------------------------------|-------------------------------------------|--------| +| `vortex.date` | `Extension.Date` | Signed integer days since 1970-01-01 | none | ✅ | +| `vortex.time` | `Extension.Time` | I32 (s/ms) or I64 (μs/ns) since midnight | 1 byte: `TimeUnit` | ✅ | +| `vortex.timestamp` | `Extension.Timestamp`| I64 epoch count in the recorded `TimeUnit` | unit byte + u16 LE tz_len + UTF-8 tz | ✅ | +| `vortex.uuid` | `Extension.Uuid` | `FixedSizeList(Primitive(U8), 16)` | none | ✅ | +| _custom ids_ | `Extension.Custom` | _whatever the column declares_ | _opaque bytes_ | passthrough | `TimeUnit` (see [`extension/datetime/unit.rs`](https://github.com/vortex-data/vortex/blob/develop/vortex-array/src/extension/datetime/unit.rs)) encodes precision in the first metadata byte: diff --git a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java index 25cea4d9..aa4d2641 100644 --- a/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java +++ b/inspector/src/main/java/io/github/dfa1/vortex/inspect/VortexInspectorTui.java @@ -7,7 +7,6 @@ import io.github.dfa1.vortex.core.array.BoolArray; import io.github.dfa1.vortex.core.array.ByteArray; import io.github.dfa1.vortex.core.array.DoubleArray; -import io.github.dfa1.vortex.core.array.Extensions; import io.github.dfa1.vortex.core.array.FloatArray; import io.github.dfa1.vortex.core.array.GenericArray; import io.github.dfa1.vortex.core.array.IntArray; @@ -679,9 +678,9 @@ record Failed(String message) implements DataState { private static String formatValue(Array array, int i, DType declared) { if (declared instanceof DType.Extension ext - && Extensions.DATE.equals(ext.extensionId())) { + && ext.kind() instanceof io.github.dfa1.vortex.core.Extension.Date date) { try { - return Extensions.localDate(ext, array, i).toString(); + return date.decode(array, i).toString(); } catch (RuntimeException e) { // fall through to generic rendering on shape mismatch }