From 93368cbd79765f52648f28e3af2ad8c925996120 Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Sat, 13 Jun 2026 09:43:28 +0530 Subject: [PATCH 1/7] Core: Add ManifestFile adapter for v4 tracked files Add TrackedFileAdapters.asManifestFile to wrap a DATA_MANIFEST or DELETE_MANIFEST TrackedFile as a ManifestFile. Closes #16227 --- .../apache/iceberg/TrackedFileAdapters.java | 146 ++++++++- .../iceberg/TestTrackedFileAdapters.java | 283 ++++++++++++++++++ 2 files changed, 428 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java b/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java index e0feafeda246..cb79d8ed66b6 100644 --- a/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java +++ b/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java @@ -22,9 +22,13 @@ import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.iceberg.relocated.com.google.common.base.Objects; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -/** Adapts {@link TrackedFile} entries to the {@link DataFile} and {@link DeleteFile} APIs. */ +/** + * Adapts {@link TrackedFile} entries to the {@link DataFile}, {@link DeleteFile}, and {@link + * ManifestFile} APIs. + */ class TrackedFileAdapters { private TrackedFileAdapters() {} @@ -53,6 +57,15 @@ static DeleteFile asEqualityDeleteFile(TrackedFile file, Map> implements ContentFile { @@ -405,6 +418,137 @@ public DeleteFile copyWithStats(Set requestedColumnIds) { } } + /** + * Adapts a TrackedFile DATA_MANIFEST or DELETE_MANIFEST entry to the {@link ManifestFile} + * interface. + */ + private static class TrackedManifestFile implements ManifestFile { + private final TrackedFile file; + + private TrackedManifestFile(TrackedFile file) { + Preconditions.checkArgument( + file.tracking() != null, "Cannot create manifest file: no tracking"); + Preconditions.checkArgument( + file.tracking().dataSequenceNumber() != null, + "Cannot create manifest file: no data sequence number"); + Preconditions.checkArgument( + file.tracking().snapshotId() != null, "Cannot create manifest file: no snapshot ID"); + Preconditions.checkArgument( + file.manifestInfo() != null, "Cannot create manifest file: no manifest info"); + Preconditions.checkArgument( + file.contentType() != FileContent.DELETE_MANIFEST || file.tracking().firstRowId() == null, + "Delete manifest must not have a first row ID: %s", + file.tracking().firstRowId()); + this.file = file; + } + + @Override + public String path() { + return file.location(); + } + + @Override + public long length() { + return file.fileSizeInBytes(); + } + + @Override + public int partitionSpecId() { + throw new UnsupportedOperationException( + "Tracked manifests are not bound to a single partition spec"); + } + + @Override + public ManifestContent content() { + return file.contentType() == FileContent.DATA_MANIFEST + ? ManifestContent.DATA + : ManifestContent.DELETES; + } + + @Override + public long sequenceNumber() { + return file.tracking().dataSequenceNumber(); + } + + @Override + public long minSequenceNumber() { + return file.manifestInfo().minSequenceNumber(); + } + + @Override + public Long snapshotId() { + return file.tracking().snapshotId(); + } + + @Override + public Integer addedFilesCount() { + return file.manifestInfo().addedFilesCount(); + } + + @Override + public Long addedRowsCount() { + return file.manifestInfo().addedRowsCount(); + } + + @Override + public Integer existingFilesCount() { + return file.manifestInfo().existingFilesCount(); + } + + @Override + public Long existingRowsCount() { + return file.manifestInfo().existingRowsCount(); + } + + @Override + public Integer deletedFilesCount() { + return file.manifestInfo().deletedFilesCount(); + } + + @Override + public Long deletedRowsCount() { + return file.manifestInfo().deletedRowsCount(); + } + + @Override + // v4 does not store partition summaries on manifests, so return null. + public List partitions() { + return null; + } + + @Override + public ByteBuffer keyMetadata() { + return file.keyMetadata(); + } + + @Override + public Long firstRowId() { + return file.tracking().firstRowId(); + } + + @Override + public ManifestFile copy() { + return new TrackedManifestFile(file.copy()); + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } else if (!(other instanceof TrackedManifestFile)) { + return false; + } + + TrackedManifestFile that = (TrackedManifestFile) other; + return Objects.equal(path(), that.path()); + } + + @Override + public int hashCode() { + return Objects.hashCode(path()); + } + } + private static PartitionSpec resolveSpec( TrackedFile file, Map specsById) { Integer specId = file.specId(); diff --git a/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java b/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java index e13a342b8d5a..0ca12b1becd4 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java @@ -38,6 +38,7 @@ class TestTrackedFileAdapters { private static final String MANIFEST_LOCATION = "s3://bucket/table/manifest.parquet"; private static final String DATA_FILE_LOCATION = "s3://bucket/data/file.parquet"; private static final String DV_LOCATION = "s3://bucket/puffin/dv-file.bin"; + private static final String MANIFEST_FILE_LOCATION = "s3://bucket/table/manifests/m0.parquet"; // Tracking values that the delegation tests validate. private static final long MANIFEST_POS = 3L; @@ -80,6 +81,7 @@ class TestTrackedFileAdapters { private static final int SORT_ORDER_ID_ORDINAL = ordinalOf(TRACKED_FILE_SCHEMA, "sort_order_id"); private static final int DELETION_VECTOR_ORDINAL = ordinalOf(TRACKED_FILE_SCHEMA, "deletion_vector"); + private static final int MANIFEST_INFO_ORDINAL = ordinalOf(TRACKED_FILE_SCHEMA, "manifest_info"); private static final int KEY_METADATA_ORDINAL = ordinalOf(TRACKED_FILE_SCHEMA, "key_metadata"); private static final int SPLIT_OFFSETS_ORDINAL = ordinalOf(TRACKED_FILE_SCHEMA, "split_offsets"); private static final int EQUALITY_IDS_ORDINAL = ordinalOf(TRACKED_FILE_SCHEMA, "equality_ids"); @@ -289,6 +291,263 @@ void testDVDeleteFileAdapterRejectsNullDeletionVector() { .hasMessage("Cannot create DV delete file: no deletion vector"); } + @Test + void testManifestFileAdapterDelegation() { + Tracking tracking = createTracking(); + ManifestInfo manifestInfo = createManifestInfo(); + TrackedFileStruct file = + new TrackedFileStruct( + tracking, + FileContent.DATA_MANIFEST, + WRITER_FORMAT_VERSION, + MANIFEST_FILE_LOCATION, + FileFormat.PARQUET, + NO_PARTITION, + 8L, + 2048L); + file.set(MANIFEST_INFO_ORDINAL, manifestInfo); + file.set(KEY_METADATA_ORDINAL, ByteBuffer.wrap(new byte[] {7, 8, 9})); + + ManifestFile manifest = TrackedFileAdapters.asManifestFile(file); + + assertThat(manifest.path()).isEqualTo(file.location()); + assertThat(manifest.length()).isEqualTo(file.fileSizeInBytes()); + assertThat(manifest.content()).isEqualTo(ManifestContent.DATA); + assertThat(manifest.sequenceNumber()).isEqualTo(tracking.dataSequenceNumber()); + assertThat(manifest.minSequenceNumber()).isEqualTo(manifestInfo.minSequenceNumber()); + assertThat(manifest.snapshotId()).isEqualTo(tracking.snapshotId()); + assertThat(manifest.addedFilesCount()).isEqualTo(manifestInfo.addedFilesCount()); + assertThat(manifest.addedRowsCount()).isEqualTo(manifestInfo.addedRowsCount()); + assertThat(manifest.existingFilesCount()).isEqualTo(manifestInfo.existingFilesCount()); + assertThat(manifest.existingRowsCount()).isEqualTo(manifestInfo.existingRowsCount()); + assertThat(manifest.deletedFilesCount()).isEqualTo(manifestInfo.deletedFilesCount()); + assertThat(manifest.deletedRowsCount()).isEqualTo(manifestInfo.deletedRowsCount()); + assertThat(manifest.firstRowId()).isEqualTo(tracking.firstRowId()); + assertThat(manifest.keyMetadata()).isEqualTo(file.keyMetadata()); + assertThat(manifest.partitions()).isNull(); + } + + @Test + void testManifestFileAdapterPartitionSpecIdUnsupported() { + TrackedFileStruct file = + new TrackedFileStruct( + requiredOnlyTracking(), + FileContent.DATA_MANIFEST, + WRITER_FORMAT_VERSION, + MANIFEST_FILE_LOCATION, + FileFormat.PARQUET, + NO_PARTITION, + 8L, + 2048L); + file.set(MANIFEST_INFO_ORDINAL, createManifestInfo()); + + ManifestFile manifest = TrackedFileAdapters.asManifestFile(file); + + assertThatThrownBy(manifest::partitionSpecId) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessage("Tracked manifests are not bound to a single partition spec"); + } + + @Test + void testDeleteManifestContentMapsToDeletes() { + TrackedFileStruct file = + new TrackedFileStruct( + requiredOnlyTracking(), + FileContent.DELETE_MANIFEST, + WRITER_FORMAT_VERSION, + MANIFEST_FILE_LOCATION, + FileFormat.PARQUET, + NO_PARTITION, + 4L, + 1024L); + file.set(MANIFEST_INFO_ORDINAL, createManifestInfo()); + + ManifestFile manifest = TrackedFileAdapters.asManifestFile(file); + + assertThat(manifest.content()).isEqualTo(ManifestContent.DELETES); + assertThat(manifest.copy().content()).isEqualTo(ManifestContent.DELETES); + } + + @Test + void testManifestFileAdapterRejectsDeleteManifestWithFirstRowId() { + TrackedFileStruct file = + new TrackedFileStruct( + createTracking(), + FileContent.DELETE_MANIFEST, + WRITER_FORMAT_VERSION, + MANIFEST_FILE_LOCATION, + FileFormat.PARQUET, + NO_PARTITION, + 4L, + 1024L); + file.set(MANIFEST_INFO_ORDINAL, createManifestInfo()); + + assertThatThrownBy(() -> TrackedFileAdapters.asManifestFile(file)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Delete manifest must not have a first row ID: %s", FIRST_ROW_ID); + } + + @Test + void testManifestFileAdapterReturnsNullForUnsetNullableFields() { + TrackedFileStruct file = + new TrackedFileStruct( + requiredOnlyTracking(), + FileContent.DATA_MANIFEST, + WRITER_FORMAT_VERSION, + MANIFEST_FILE_LOCATION, + FileFormat.PARQUET, + NO_PARTITION, + 8L, + 2048L); + file.set(MANIFEST_INFO_ORDINAL, createManifestInfo()); + + ManifestFile manifest = TrackedFileAdapters.asManifestFile(file); + + assertThat(manifest.firstRowId()).isNull(); + assertThat(manifest.keyMetadata()).isNull(); + } + + @Test + void testManifestFileAdapterEqualsAndHashCode() { + TrackedFileStruct file = + new TrackedFileStruct( + createTracking(), + FileContent.DATA_MANIFEST, + WRITER_FORMAT_VERSION, + MANIFEST_FILE_LOCATION, + FileFormat.PARQUET, + NO_PARTITION, + 8L, + 2048L); + file.set(MANIFEST_INFO_ORDINAL, createManifestInfo()); + + ManifestFile manifest = TrackedFileAdapters.asManifestFile(file); + + assertThat(manifest).isEqualTo(manifest.copy()); + assertThat(manifest).hasSameHashCodeAs(manifest.copy()); + } + + @Test + void testManifestFileAdapterCopy() { + TrackedFileStruct file = + new TrackedFileStruct( + createTracking(), + FileContent.DATA_MANIFEST, + WRITER_FORMAT_VERSION, + MANIFEST_FILE_LOCATION, + FileFormat.PARQUET, + NO_PARTITION, + 8L, + 2048L); + file.set(MANIFEST_INFO_ORDINAL, createManifestInfo()); + file.set(KEY_METADATA_ORDINAL, ByteBuffer.wrap(new byte[] {7, 8, 9})); + + ManifestFile original = TrackedFileAdapters.asManifestFile(file); + ManifestFile copy = original.copy(); + + assertThat(copy.path()).isEqualTo(original.path()); + assertThat(copy.length()).isEqualTo(original.length()); + assertThat(copy.content()).isEqualTo(original.content()); + assertThat(copy.sequenceNumber()).isEqualTo(original.sequenceNumber()); + assertThat(copy.minSequenceNumber()).isEqualTo(original.minSequenceNumber()); + assertThat(copy.snapshotId()).isEqualTo(original.snapshotId()); + assertThat(copy.addedFilesCount()).isEqualTo(original.addedFilesCount()); + assertThat(copy.addedRowsCount()).isEqualTo(original.addedRowsCount()); + assertThat(copy.existingFilesCount()).isEqualTo(original.existingFilesCount()); + assertThat(copy.existingRowsCount()).isEqualTo(original.existingRowsCount()); + assertThat(copy.deletedFilesCount()).isEqualTo(original.deletedFilesCount()); + assertThat(copy.deletedRowsCount()).isEqualTo(original.deletedRowsCount()); + assertThat(copy.firstRowId()).isEqualTo(original.firstRowId()); + assertThat(copy.keyMetadata()).isEqualTo(original.keyMetadata()); + assertThat(copy.partitions()).isNull(); + assertThat(copy.keyMetadata().array()).isNotSameAs(original.keyMetadata().array()); + } + + @ParameterizedTest + @EnumSource( + value = FileContent.class, + mode = EnumSource.Mode.EXCLUDE, + names = {"DATA_MANIFEST", "DELETE_MANIFEST"}) + void testManifestFileAdapterRejectsNonManifestContent(FileContent contentType) { + TrackedFileStruct file = trackedFile(contentType); + + assertThatThrownBy(() -> TrackedFileAdapters.asManifestFile(file)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid content type for ManifestFile: %s", contentType); + } + + @Test + void testManifestFileAdapterRejectsNullTracking() { + // trackedFile() builds an entry with no tracking; manifest adapters require it. + TrackedFileStruct file = trackedFile(FileContent.DATA_MANIFEST); + file.set(MANIFEST_INFO_ORDINAL, createManifestInfo()); + + assertThatThrownBy(() -> TrackedFileAdapters.asManifestFile(file)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot create manifest file: no tracking"); + } + + @Test + void testManifestFileAdapterRejectsNullDataSequenceNumber() { + TrackingStruct tracking = new TrackingStruct(); + tracking.set(STATUS_ORDINAL, EntryStatus.ADDED.id()); + TrackedFileStruct file = + new TrackedFileStruct( + tracking, + FileContent.DATA_MANIFEST, + WRITER_FORMAT_VERSION, + MANIFEST_FILE_LOCATION, + FileFormat.PARQUET, + NO_PARTITION, + 4L, + 1024L); + file.set(MANIFEST_INFO_ORDINAL, createManifestInfo()); + + assertThatThrownBy(() -> TrackedFileAdapters.asManifestFile(file)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot create manifest file: no data sequence number"); + } + + @Test + void testManifestFileAdapterRejectsNullSnapshotId() { + TrackingStruct tracking = new TrackingStruct(); + tracking.set(STATUS_ORDINAL, EntryStatus.ADDED.id()); + tracking.set(DATA_SEQUENCE_NUMBER_ORDINAL, DATA_SEQUENCE_NUMBER); + TrackedFileStruct file = + new TrackedFileStruct( + tracking, + FileContent.DATA_MANIFEST, + WRITER_FORMAT_VERSION, + MANIFEST_FILE_LOCATION, + FileFormat.PARQUET, + NO_PARTITION, + 4L, + 1024L); + file.set(MANIFEST_INFO_ORDINAL, createManifestInfo()); + + assertThatThrownBy(() -> TrackedFileAdapters.asManifestFile(file)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot create manifest file: no snapshot ID"); + } + + @Test + void testManifestFileAdapterRejectsNullManifestInfo() { + TrackedFileStruct file = + new TrackedFileStruct( + createTracking(), + FileContent.DATA_MANIFEST, + WRITER_FORMAT_VERSION, + MANIFEST_FILE_LOCATION, + FileFormat.PARQUET, + NO_PARTITION, + 4L, + 1024L); + + assertThatThrownBy(() -> TrackedFileAdapters.asManifestFile(file)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot create manifest file: no manifest info"); + } + @Test void testNullContentStatsReturnsNullStats() { TrackedFileStruct file = trackedFile(FileContent.DATA); @@ -423,6 +682,30 @@ private static TrackingStruct createTracking() { return tracking; } + // Tracking with only the fields a manifest adapter requires, leaving the nullable-by-contract + // fields (firstRowId) unset. + private static TrackingStruct requiredOnlyTracking() { + TrackingStruct tracking = new TrackingStruct(); + tracking.set(STATUS_ORDINAL, EntryStatus.ADDED.id()); + tracking.set(SNAPSHOT_ID_ORDINAL, 42L); + tracking.set(DATA_SEQUENCE_NUMBER_ORDINAL, DATA_SEQUENCE_NUMBER); + return tracking; + } + + private static ManifestInfo createManifestInfo() { + return ManifestInfoStruct.builder() + .addedFilesCount(3) + .existingFilesCount(5) + .deletedFilesCount(2) + .replacedFilesCount(0) + .addedRowsCount(300L) + .existingRowsCount(500L) + .deletedRowsCount(200L) + .replacedRowsCount(0L) + .minSequenceNumber(7L) + .build(); + } + private static DeletionVector deletionVector() { return DeletionVectorStruct.builder() .location(DV_LOCATION) From 9974e9b168e0e88c6c96d865015fc986b11d6475 Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Mon, 22 Jun 2026 10:37:28 -0700 Subject: [PATCH 2/7] PR feedback --- .../apache/iceberg/TrackedFileAdapters.java | 10 ++--- .../iceberg/TestTrackedFileAdapters.java | 37 +++++++++++-------- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java b/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java index cb79d8ed66b6..137d17dad8b2 100644 --- a/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java +++ b/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java @@ -426,8 +426,6 @@ private static class TrackedManifestFile implements ManifestFile { private final TrackedFile file; private TrackedManifestFile(TrackedFile file) { - Preconditions.checkArgument( - file.tracking() != null, "Cannot create manifest file: no tracking"); Preconditions.checkArgument( file.tracking().dataSequenceNumber() != null, "Cannot create manifest file: no data sequence number"); @@ -436,9 +434,9 @@ private TrackedManifestFile(TrackedFile file) { Preconditions.checkArgument( file.manifestInfo() != null, "Cannot create manifest file: no manifest info"); Preconditions.checkArgument( - file.contentType() != FileContent.DELETE_MANIFEST || file.tracking().firstRowId() == null, - "Delete manifest must not have a first row ID: %s", - file.tracking().firstRowId()); + file.manifestInfo().dv() == null, + "Cannot adapt manifest with a deletion vector to ManifestFile: %s", + file.location()); this.file = file; } @@ -510,8 +508,8 @@ public Long deletedRowsCount() { return file.manifestInfo().deletedRowsCount(); } - @Override // v4 does not store partition summaries on manifests, so return null. + @Override public List partitions() { return null; } diff --git a/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java b/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java index 0ca12b1becd4..6b0874a2afb6 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java @@ -369,22 +369,38 @@ void testDeleteManifestContentMapsToDeletes() { } @Test - void testManifestFileAdapterRejectsDeleteManifestWithFirstRowId() { + void testManifestFileAdapterRejectsManifestWithDeletionVector() { TrackedFileStruct file = new TrackedFileStruct( - createTracking(), - FileContent.DELETE_MANIFEST, + requiredOnlyTracking(), + FileContent.DATA_MANIFEST, WRITER_FORMAT_VERSION, MANIFEST_FILE_LOCATION, FileFormat.PARQUET, NO_PARTITION, 4L, 1024L); - file.set(MANIFEST_INFO_ORDINAL, createManifestInfo()); + ManifestInfo manifestInfo = + ManifestInfoStruct.builder() + .addedFilesCount(3) + .existingFilesCount(5) + .deletedFilesCount(2) + .replacedFilesCount(0) + .addedRowsCount(300L) + .existingRowsCount(500L) + .deletedRowsCount(200L) + .replacedRowsCount(0L) + .minSequenceNumber(7L) + .dv(ByteBuffer.wrap(new byte[] {1, 2, 3})) + .dvCardinality(4L) + .build(); + file.set(MANIFEST_INFO_ORDINAL, manifestInfo); assertThatThrownBy(() -> TrackedFileAdapters.asManifestFile(file)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Delete manifest must not have a first row ID: %s", FIRST_ROW_ID); + .hasMessage( + "Cannot adapt manifest with a deletion vector to ManifestFile: %s", + MANIFEST_FILE_LOCATION); } @Test @@ -476,17 +492,6 @@ void testManifestFileAdapterRejectsNonManifestContent(FileContent contentType) { .hasMessage("Invalid content type for ManifestFile: %s", contentType); } - @Test - void testManifestFileAdapterRejectsNullTracking() { - // trackedFile() builds an entry with no tracking; manifest adapters require it. - TrackedFileStruct file = trackedFile(FileContent.DATA_MANIFEST); - file.set(MANIFEST_INFO_ORDINAL, createManifestInfo()); - - assertThatThrownBy(() -> TrackedFileAdapters.asManifestFile(file)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Cannot create manifest file: no tracking"); - } - @Test void testManifestFileAdapterRejectsNullDataSequenceNumber() { TrackingStruct tracking = new TrackingStruct(); From e3be26b38f3772ee0974d2805dc61a94c20cb0f7 Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Tue, 23 Jun 2026 11:27:05 -0700 Subject: [PATCH 3/7] Address review comments and adopt TrackedFileBuilder in tests --- .../apache/iceberg/TrackedFileAdapters.java | 39 ++-- .../iceberg/TestTrackedFileAdapters.java | 178 +++++++----------- 2 files changed, 79 insertions(+), 138 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java b/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java index 137d17dad8b2..fadd41c9f400 100644 --- a/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java +++ b/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java @@ -22,7 +22,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import org.apache.iceberg.relocated.com.google.common.base.Objects; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; /** @@ -427,12 +426,9 @@ private static class TrackedManifestFile implements ManifestFile { private TrackedManifestFile(TrackedFile file) { Preconditions.checkArgument( - file.tracking().dataSequenceNumber() != null, - "Cannot create manifest file: no data sequence number"); + file.tracking().dataSequenceNumber() != null, "Invalid data sequence number: null"); Preconditions.checkArgument( - file.tracking().snapshotId() != null, "Cannot create manifest file: no snapshot ID"); - Preconditions.checkArgument( - file.manifestInfo() != null, "Cannot create manifest file: no manifest info"); + file.tracking().snapshotId() != null, "Invalid snapshot ID: null"); Preconditions.checkArgument( file.manifestInfo().dv() == null, "Cannot adapt manifest with a deletion vector to ManifestFile: %s", @@ -453,14 +449,20 @@ public long length() { @Override public int partitionSpecId() { throw new UnsupportedOperationException( - "Tracked manifests are not bound to a single partition spec"); + "v4 manifests are not bound to a single partition spec"); } @Override public ManifestContent content() { - return file.contentType() == FileContent.DATA_MANIFEST - ? ManifestContent.DATA - : ManifestContent.DELETES; + switch (file.contentType()) { + case DATA_MANIFEST: + return ManifestContent.DATA; + case DELETE_MANIFEST: + return ManifestContent.DELETES; + default: + throw new UnsupportedOperationException( + "Unsupported content type for manifests: " + file.contentType()); + } } @Override @@ -528,23 +530,6 @@ public Long firstRowId() { public ManifestFile copy() { return new TrackedManifestFile(file.copy()); } - - @Override - public boolean equals(Object other) { - if (this == other) { - return true; - } else if (!(other instanceof TrackedManifestFile)) { - return false; - } - - TrackedManifestFile that = (TrackedManifestFile) other; - return Objects.equal(path(), that.path()); - } - - @Override - public int hashCode() { - return Objects.hashCode(path()); - } } private static PartitionSpec resolveSpec( diff --git a/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java b/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java index b01d64c447b5..6130a9dd7860 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java @@ -45,6 +45,7 @@ class TestTrackedFileAdapters { private static final long DATA_SEQUENCE_NUMBER = 10L; private static final long FILE_SEQUENCE_NUMBER = 11L; private static final long FIRST_ROW_ID = 1000L; + private static final long SNAPSHOT_ID = 42L; private static final long MANIFEST_RECORD_COUNT = 8L; private static final long MANIFEST_FILE_SIZE = 2048L; @@ -61,12 +62,9 @@ class TestTrackedFileAdapters { .withSpecId(PARTITIONED_SPEC_ID) .build(); private static final PartitionData PARTITION = partition("books"); - - // Passed for manifest test files, where there is no partition tuple. - private static final PartitionData NO_PARTITION = null; + private static final PartitionData EMPTY_PARTITION = new PartitionData(Types.StructType.of()); // Tracking field ordinals, looked up from the schema so the tests do not hard-code offsets. - private static final int STATUS_ORDINAL = ordinalOf(Tracking.schema(), "status"); private static final int SNAPSHOT_ID_ORDINAL = ordinalOf(Tracking.schema(), "snapshot_id"); private static final int DATA_SEQUENCE_NUMBER_ORDINAL = ordinalOf(Tracking.schema(), "sequence_number"); @@ -83,8 +81,6 @@ class TestTrackedFileAdapters { private static final int SPEC_ID_ORDINAL = ordinalOf(TRACKED_FILE_SCHEMA, "spec_id"); private static final int DELETION_VECTOR_ORDINAL = ordinalOf(TRACKED_FILE_SCHEMA, "deletion_vector"); - private static final int MANIFEST_INFO_ORDINAL = ordinalOf(TRACKED_FILE_SCHEMA, "manifest_info"); - private static final int KEY_METADATA_ORDINAL = ordinalOf(TRACKED_FILE_SCHEMA, "key_metadata"); @Test void testDataFileAdapterDelegation() { @@ -286,54 +282,50 @@ void testDVDeleteFileAdapterRejectsNullDeletionVector() { .hasMessage("Cannot create DV delete file: no deletion vector"); } - @Test - void testManifestFileAdapterDelegation() { - Tracking tracking = createTracking(); + @ParameterizedTest + @EnumSource( + value = FileContent.class, + names = {"DATA_MANIFEST", "DELETE_MANIFEST"}) + void testManifestFileAdapterDelegation(FileContent contentType) { ManifestInfo manifestInfo = createManifestInfo(); - TrackedFileStruct file = manifestFile(FileContent.DATA_MANIFEST, tracking); - file.set(MANIFEST_INFO_ORDINAL, manifestInfo); - file.set(KEY_METADATA_ORDINAL, ByteBuffer.wrap(new byte[] {7, 8, 9})); + TrackedFile file = + manifestBuilder(contentType) + .manifestInfo(manifestInfo) + .keyMetadata(ByteBuffer.wrap(new byte[] {7, 8, 9})) + .build(); + populateTrackingFields(file); ManifestFile manifest = TrackedFileAdapters.asManifestFile(file); - assertThat(manifest.path()).isEqualTo(file.location()); - assertThat(manifest.length()).isEqualTo(file.fileSizeInBytes()); - assertThat(manifest.content()).isEqualTo(ManifestContent.DATA); - assertThat(manifest.sequenceNumber()).isEqualTo(tracking.dataSequenceNumber()); + ManifestContent expectedContent = + contentType == FileContent.DATA_MANIFEST ? ManifestContent.DATA : ManifestContent.DELETES; + assertThat(manifest.path()).isEqualTo(MANIFEST_FILE_LOCATION); + assertThat(manifest.length()).isEqualTo(MANIFEST_FILE_SIZE); + assertThat(manifest.content()).isEqualTo(expectedContent); + assertThat(manifest.copy().content()).isEqualTo(expectedContent); + assertThat(manifest.sequenceNumber()).isEqualTo(DATA_SEQUENCE_NUMBER); assertThat(manifest.minSequenceNumber()).isEqualTo(manifestInfo.minSequenceNumber()); - assertThat(manifest.snapshotId()).isEqualTo(tracking.snapshotId()); + assertThat(manifest.snapshotId()).isEqualTo(SNAPSHOT_ID); assertThat(manifest.addedFilesCount()).isEqualTo(manifestInfo.addedFilesCount()); assertThat(manifest.addedRowsCount()).isEqualTo(manifestInfo.addedRowsCount()); assertThat(manifest.existingFilesCount()).isEqualTo(manifestInfo.existingFilesCount()); assertThat(manifest.existingRowsCount()).isEqualTo(manifestInfo.existingRowsCount()); assertThat(manifest.deletedFilesCount()).isEqualTo(manifestInfo.deletedFilesCount()); assertThat(manifest.deletedRowsCount()).isEqualTo(manifestInfo.deletedRowsCount()); - assertThat(manifest.firstRowId()).isEqualTo(tracking.firstRowId()); - assertThat(manifest.keyMetadata()).isEqualTo(file.keyMetadata()); + assertThat(manifest.firstRowId()).isEqualTo(FIRST_ROW_ID); + assertThat(manifest.keyMetadata()).isEqualTo(ByteBuffer.wrap(new byte[] {7, 8, 9})); assertThat(manifest.partitions()).isNull(); } @Test void testManifestFileAdapterPartitionSpecIdUnsupported() { - TrackedFileStruct file = manifestFile(FileContent.DATA_MANIFEST, requiredOnlyTracking()); - file.set(MANIFEST_INFO_ORDINAL, createManifestInfo()); + TrackedFile file = manifestFile(FileContent.DATA_MANIFEST, createManifestInfo()); ManifestFile manifest = TrackedFileAdapters.asManifestFile(file); assertThatThrownBy(manifest::partitionSpecId) .isInstanceOf(UnsupportedOperationException.class) - .hasMessage("Tracked manifests are not bound to a single partition spec"); - } - - @Test - void testDeleteManifestContentMapsToDeletes() { - TrackedFileStruct file = manifestFile(FileContent.DELETE_MANIFEST, requiredOnlyTracking()); - file.set(MANIFEST_INFO_ORDINAL, createManifestInfo()); - - ManifestFile manifest = TrackedFileAdapters.asManifestFile(file); - - assertThat(manifest.content()).isEqualTo(ManifestContent.DELETES); - assertThat(manifest.copy().content()).isEqualTo(ManifestContent.DELETES); + .hasMessage("v4 manifests are not bound to a single partition spec"); } @Test @@ -352,8 +344,7 @@ void testManifestFileAdapterRejectsManifestWithDeletionVector() { .dv(ByteBuffer.wrap(new byte[] {1, 2, 3})) .dvCardinality(4L) .build(); - TrackedFileStruct file = manifestFile(FileContent.DATA_MANIFEST, requiredOnlyTracking()); - file.set(MANIFEST_INFO_ORDINAL, manifestInfo); + TrackedFile file = manifestFile(FileContent.DATA_MANIFEST, manifestInfo); assertThatThrownBy(() -> TrackedFileAdapters.asManifestFile(file)) .isInstanceOf(IllegalArgumentException.class) @@ -364,8 +355,11 @@ void testManifestFileAdapterRejectsManifestWithDeletionVector() { @Test void testManifestFileAdapterReturnsNullForUnsetNullableFields() { - TrackedFileStruct file = manifestFile(FileContent.DATA_MANIFEST, requiredOnlyTracking()); - file.set(MANIFEST_INFO_ORDINAL, createManifestInfo()); + TrackedFile file = + manifestBuilder(FileContent.DATA_MANIFEST).manifestInfo(createManifestInfo()).build(); + // Inheritance fills the data sequence number, which the adapter requires; first row ID and + // key metadata stay unset. + ((TrackingStruct) file.tracking()).set(DATA_SEQUENCE_NUMBER_ORDINAL, DATA_SEQUENCE_NUMBER); ManifestFile manifest = TrackedFileAdapters.asManifestFile(file); @@ -373,22 +367,14 @@ void testManifestFileAdapterReturnsNullForUnsetNullableFields() { assertThat(manifest.keyMetadata()).isNull(); } - @Test - void testManifestFileAdapterEqualsAndHashCode() { - TrackedFileStruct file = manifestFile(FileContent.DATA_MANIFEST, createTracking()); - file.set(MANIFEST_INFO_ORDINAL, createManifestInfo()); - - ManifestFile manifest = TrackedFileAdapters.asManifestFile(file); - - assertThat(manifest).isEqualTo(manifest.copy()); - assertThat(manifest).hasSameHashCodeAs(manifest.copy()); - } - @Test void testManifestFileAdapterCopy() { - TrackedFileStruct file = manifestFile(FileContent.DATA_MANIFEST, createTracking()); - file.set(MANIFEST_INFO_ORDINAL, createManifestInfo()); - file.set(KEY_METADATA_ORDINAL, ByteBuffer.wrap(new byte[] {7, 8, 9})); + TrackedFile file = + manifestBuilder(FileContent.DATA_MANIFEST) + .manifestInfo(createManifestInfo()) + .keyMetadata(ByteBuffer.wrap(new byte[] {7, 8, 9})) + .build(); + populateTrackingFields(file); ManifestFile original = TrackedFileAdapters.asManifestFile(file); ManifestFile copy = original.copy(); @@ -426,36 +412,24 @@ void testManifestFileAdapterRejectsNonManifestContent(FileContent contentType) { @Test void testManifestFileAdapterRejectsNullDataSequenceNumber() { - TrackingStruct tracking = new TrackingStruct(); - tracking.set(STATUS_ORDINAL, EntryStatus.ADDED.id()); - TrackedFileStruct file = manifestFile(FileContent.DATA_MANIFEST, tracking); - file.set(MANIFEST_INFO_ORDINAL, createManifestInfo()); + // A freshly built manifest entry has no data sequence number until inheritance fills it. + TrackedFile file = + manifestBuilder(FileContent.DATA_MANIFEST).manifestInfo(createManifestInfo()).build(); assertThatThrownBy(() -> TrackedFileAdapters.asManifestFile(file)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Cannot create manifest file: no data sequence number"); + .hasMessage("Invalid data sequence number: null"); } @Test void testManifestFileAdapterRejectsNullSnapshotId() { - TrackingStruct tracking = new TrackingStruct(); - tracking.set(STATUS_ORDINAL, EntryStatus.ADDED.id()); - tracking.set(DATA_SEQUENCE_NUMBER_ORDINAL, DATA_SEQUENCE_NUMBER); - TrackedFileStruct file = manifestFile(FileContent.DATA_MANIFEST, tracking); - file.set(MANIFEST_INFO_ORDINAL, createManifestInfo()); - - assertThatThrownBy(() -> TrackedFileAdapters.asManifestFile(file)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Cannot create manifest file: no snapshot ID"); - } - - @Test - void testManifestFileAdapterRejectsNullManifestInfo() { - TrackedFileStruct file = manifestFile(FileContent.DATA_MANIFEST, createTracking()); + TrackedFile file = manifestFile(FileContent.DATA_MANIFEST, createManifestInfo()); + // Clear the snapshot ID to exercise the adapter's required-field guard. + ((TrackingStruct) file.tracking()).set(SNAPSHOT_ID_ORDINAL, null); assertThatThrownBy(() -> TrackedFileAdapters.asManifestFile(file)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Cannot create manifest file: no manifest info"); + .hasMessage("Invalid snapshot ID: null"); } @Test @@ -574,36 +548,28 @@ private static TrackedFileStruct dummyTrackedFile(FileContent contentType) { return file; } - private static TrackedFileStruct manifestFile(FileContent contentType, Tracking tracking) { - return new TrackedFileStruct( - tracking, - contentType, - WRITER_FORMAT_VERSION, - MANIFEST_FILE_LOCATION, - FileFormat.PARQUET, - NO_PARTITION, - MANIFEST_RECORD_COUNT, - MANIFEST_FILE_SIZE, - null, - null, - null, - null, - null, - null, - null, - null); - } - - private static TrackingStruct createTracking() { - TrackingStruct tracking = new TrackingStruct(); - tracking.set(STATUS_ORDINAL, EntryStatus.ADDED.id()); - tracking.set(SNAPSHOT_ID_ORDINAL, 42L); - tracking.set(DATA_SEQUENCE_NUMBER_ORDINAL, DATA_SEQUENCE_NUMBER); - tracking.set(FILE_SEQUENCE_NUMBER_ORDINAL, FILE_SEQUENCE_NUMBER); - tracking.set(FIRST_ROW_ID_ORDINAL, FIRST_ROW_ID); - tracking.setManifestLocation(MANIFEST_LOCATION); - tracking.set(MANIFEST_POS_ORDINAL, MANIFEST_POS); - return tracking; + // Builder for a manifest entry with the required non-tracking fields set. Callers add the + // manifest info and any optional fields before building. + private static TrackedFileBuilder manifestBuilder(FileContent contentType) { + TrackedFileBuilder builder = + contentType == FileContent.DATA_MANIFEST + ? TrackedFileBuilder.dataManifest(SNAPSHOT_ID) + : TrackedFileBuilder.deleteManifest(SNAPSHOT_ID); + return builder + .writerFormatVersion(WRITER_FORMAT_VERSION) + .location(MANIFEST_FILE_LOCATION) + .fileFormat(FileFormat.PARQUET) + .partition(EMPTY_PARTITION) + .recordCount(MANIFEST_RECORD_COUNT) + .fileSizeInBytes(MANIFEST_FILE_SIZE); + } + + // Builds a manifest entry and simulates inheritance so the tracking fields the adapter requires + // are populated. + private static TrackedFile manifestFile(FileContent contentType, ManifestInfo manifestInfo) { + TrackedFile file = manifestBuilder(contentType).manifestInfo(manifestInfo).build(); + populateTrackingFields(file); + return file; } private static void populateTrackingFields(TrackedFile file) { @@ -615,16 +581,6 @@ private static void populateTrackingFields(TrackedFile file) { tracking.set(MANIFEST_POS_ORDINAL, MANIFEST_POS); } - // Tracking with only the fields a manifest adapter requires, leaving the nullable-by-contract - // fields (firstRowId) unset. - private static TrackingStruct requiredOnlyTracking() { - TrackingStruct tracking = new TrackingStruct(); - tracking.set(STATUS_ORDINAL, EntryStatus.ADDED.id()); - tracking.set(SNAPSHOT_ID_ORDINAL, 42L); - tracking.set(DATA_SEQUENCE_NUMBER_ORDINAL, DATA_SEQUENCE_NUMBER); - return tracking; - } - private static ManifestInfo createManifestInfo() { return ManifestInfoStruct.builder() .addedFilesCount(3) From ae41f2a3fbe3a3d202ba2f86b12a737cb4c432da Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Fri, 26 Jun 2026 17:53:17 -0700 Subject: [PATCH 4/7] PR feedback --- .../test/java/org/apache/iceberg/TestTrackedFileAdapters.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java b/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java index 6130a9dd7860..5e1151fdcc23 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java @@ -412,7 +412,6 @@ void testManifestFileAdapterRejectsNonManifestContent(FileContent contentType) { @Test void testManifestFileAdapterRejectsNullDataSequenceNumber() { - // A freshly built manifest entry has no data sequence number until inheritance fills it. TrackedFile file = manifestBuilder(FileContent.DATA_MANIFEST).manifestInfo(createManifestInfo()).build(); @@ -424,7 +423,6 @@ void testManifestFileAdapterRejectsNullDataSequenceNumber() { @Test void testManifestFileAdapterRejectsNullSnapshotId() { TrackedFile file = manifestFile(FileContent.DATA_MANIFEST, createManifestInfo()); - // Clear the snapshot ID to exercise the adapter's required-field guard. ((TrackingStruct) file.tracking()).set(SNAPSHOT_ID_ORDINAL, null); assertThatThrownBy(() -> TrackedFileAdapters.asManifestFile(file)) From ae9a18e844d8eaa6dbb2a227bbece681cc35d249 Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Sat, 27 Jun 2026 10:15:51 -0700 Subject: [PATCH 5/7] Fix rebase error --- .../test/java/org/apache/iceberg/TestTrackedFileAdapters.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java b/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java index 7d27dd0570cd..394f387fd6a5 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java @@ -554,7 +554,7 @@ private static TrackedFileBuilder manifestBuilder(FileContent contentType) { ? TrackedFileBuilder.dataManifest(SNAPSHOT_ID) : TrackedFileBuilder.deleteManifest(SNAPSHOT_ID); return builder - .writerFormatVersion(WRITER_FORMAT_VERSION) + .formatVersion(FORMAT_VERSION_V4) .location(MANIFEST_FILE_LOCATION) .fileFormat(FileFormat.PARQUET) .partition(EMPTY_PARTITION) From f98a53434456299e273ed407a33e1324b1f36685 Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Tue, 30 Jun 2026 21:31:04 -0700 Subject: [PATCH 6/7] Update core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java Co-authored-by: Ryan Blue --- core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java b/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java index fadd41c9f400..b52f88f039e7 100644 --- a/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java +++ b/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java @@ -418,7 +418,7 @@ public DeleteFile copyWithStats(Set requestedColumnIds) { } /** - * Adapts a TrackedFile DATA_MANIFEST or DELETE_MANIFEST entry to the {@link ManifestFile} + * Adapts a TrackedFile to {@link ManifestFile} * interface. */ private static class TrackedManifestFile implements ManifestFile { From 74f4995aaffff22d9ca228d9d2d2633ac56b19ff Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Tue, 30 Jun 2026 21:32:23 -0700 Subject: [PATCH 7/7] Update core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java Co-authored-by: Ryan Blue --- core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java | 1 - 1 file changed, 1 deletion(-) diff --git a/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java b/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java index b52f88f039e7..24b3c013dc40 100644 --- a/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java +++ b/core/src/main/java/org/apache/iceberg/TrackedFileAdapters.java @@ -419,7 +419,6 @@ public DeleteFile copyWithStats(Set requestedColumnIds) { /** * Adapts a TrackedFile to {@link ManifestFile} - * interface. */ private static class TrackedManifestFile implements ManifestFile { private final TrackedFile file;