diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index 997b5a8b7..29e930d88 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -275,6 +275,14 @@ public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers @Override public List getFieldBuffers() { List result = new ArrayList<>(2); + + // Ensure offset buffer has at least one entry for offset[0]. + // According to Arrow specification, offset buffer must have N+1 entries, + // even when N=0, it should contain [0]. + if (offsetBuffer.capacity() == 0) { + offsetBuffer = allocateOffsetBuffer(OFFSET_WIDTH); + } + setReaderAndWriterIndex(); result.add(validityBuffer); result.add(offsetBuffer); @@ -309,7 +317,8 @@ private void setReaderAndWriterIndex() { offsetBuffer.readerIndex(0); if (valueCount == 0) { validityBuffer.writerIndex(0); - offsetBuffer.writerIndex(0); + // Even when valueCount is 0, offset buffer should have offset[0] per Arrow spec + offsetBuffer.writerIndex(Math.min(OFFSET_WIDTH, offsetBuffer.capacity())); } else { validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSizeFromCount(valueCount)); offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH); diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 93a313ef4..0890289e5 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -233,6 +233,14 @@ public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers @Override public List getFieldBuffers() { List result = new ArrayList<>(2); + + // Ensure offset buffer has at least one entry for offset[0]. + // According to Arrow specification, offset buffer must have N+1 entries, + // even when N=0, it should contain [0]. + if (offsetBuffer.capacity() == 0) { + offsetBuffer = allocateOffsetBuffer(OFFSET_WIDTH); + } + setReaderAndWriterIndex(); result.add(validityBuffer); result.add(offsetBuffer); @@ -267,7 +275,8 @@ private void setReaderAndWriterIndex() { offsetBuffer.readerIndex(0); if (valueCount == 0) { validityBuffer.writerIndex(0); - offsetBuffer.writerIndex(0); + // Even when valueCount is 0, offset buffer should have offset[0] per Arrow spec + offsetBuffer.writerIndex(Math.min(OFFSET_WIDTH, offsetBuffer.capacity())); } else { validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSizeFromCount(valueCount)); offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH); diff --git a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java index 759c84651..93be0228f 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java @@ -1100,6 +1100,36 @@ public void testCopyValueSafeForExtensionType() throws Exception { } } + @Test + public void testNestedEmptyLargeListOffsetBuffer() { + // Test that nested LargeListVector properly allocates offset buffer + // even when nested writers are never invoked. According to Arrow spec, + // offset buffer must have N+1 entries. Even when N=0, it should contain [0]. + try (LargeListVector outerList = LargeListVector.empty("outer", allocator)) { + // Setup LargeList> + outerList.addOrGetVector(FieldType.nullable(MinorType.LARGELIST.getType())); + LargeListVector innerList = (LargeListVector) outerList.getDataVector(); + innerList.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + + // Allocate outer only - simulates case where inner is never written to + outerList.allocateNew(); + outerList.setValueCount(0); + + // Get field buffers - this is what IPC serialization uses + List innerBuffers = innerList.getFieldBuffers(); + + // Verify inner list offset buffer has at least OFFSET_WIDTH (8) bytes + assertTrue( + innerBuffers.get(1).readableBytes() >= LargeListVector.OFFSET_WIDTH, + "Inner LargeList offset buffer should have at least " + + LargeListVector.OFFSET_WIDTH + + " bytes for offset[0]"); + + // Verify offset[0] = 0 + assertEquals(0L, innerList.getOffsetBuffer().getLong(0)); + } + } + private void writeIntValues(UnionLargeListWriter writer, int[] values) { writer.startList(); for (int v : values) { diff --git a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index e96ac3027..0149571e7 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -1379,6 +1379,45 @@ public void testCopyValueSafeForExtensionType() throws Exception { } } + @Test + public void testNestedEmptyListOffsetBuffer() { + // Test that 3-level nested ListVector properly allocates offset buffers + // even when nested writers are never invoked. According to Arrow spec, + // offset buffer must have N+1 entries. Even when N=0, it should contain [0]. + try (ListVector level0 = ListVector.empty("level0", allocator)) { + // Setup List>> - 3 levels + level0.addOrGetVector(FieldType.nullable(MinorType.LIST.getType())); + ListVector level1 = (ListVector) level0.getDataVector(); + level1.addOrGetVector(FieldType.nullable(MinorType.LIST.getType())); + ListVector level2 = (ListVector) level1.getDataVector(); + level2.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + + // Only allocate level0 - simulates case where all nested levels are empty + level0.allocateNew(); + level0.setValueCount(0); + + // Verify all levels have properly allocated offset buffers + List level1Buffers = level1.getFieldBuffers(); + List level2Buffers = level2.getFieldBuffers(); + + assertTrue( + level1Buffers.get(1).readableBytes() >= BaseRepeatedValueVector.OFFSET_WIDTH, + "Level1 offset buffer should have at least " + + BaseRepeatedValueVector.OFFSET_WIDTH + + " bytes for offset[0]"); + + assertTrue( + level2Buffers.get(1).readableBytes() >= BaseRepeatedValueVector.OFFSET_WIDTH, + "Level2 offset buffer should have at least " + + BaseRepeatedValueVector.OFFSET_WIDTH + + " bytes for offset[0]"); + + // Verify offset[0] = 0 for all levels + assertEquals(0, level1.getOffsetBuffer().getInt(0)); + assertEquals(0, level2.getOffsetBuffer().getInt(0)); + } + } + private void writeIntValues(UnionListWriter writer, int[] values) { writer.startList(); for (int v : values) {