diff --git a/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java b/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java index 3f5be0756fad..04d23fa33abe 100644 --- a/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java +++ b/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java @@ -19,7 +19,6 @@ package org.apache.iceberg; import java.io.Serializable; -import java.util.Objects; import org.apache.iceberg.avro.SupportsIndexProjection; import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; @@ -129,26 +128,6 @@ static Builder builder() { return new Builder(); } - @Override - public boolean equals(Object other) { - if (this == other) { - return true; - } else if (!(other instanceof DeletionVectorStruct)) { - return false; - } - - DeletionVectorStruct that = (DeletionVectorStruct) other; - return Objects.equals(location, that.location) - && offset == that.offset - && sizeInBytes == that.sizeInBytes - && cardinality == that.cardinality; - } - - @Override - public int hashCode() { - return Objects.hash(location, offset, sizeInBytes, cardinality); - } - @Override public String toString() { return MoreObjects.toStringHelper(this) diff --git a/core/src/main/java/org/apache/iceberg/TrackedFileBuilder.java b/core/src/main/java/org/apache/iceberg/TrackedFileBuilder.java index d7aa28c3290f..2cdb5168fcae 100644 --- a/core/src/main/java/org/apache/iceberg/TrackedFileBuilder.java +++ b/core/src/main/java/org/apache/iceberg/TrackedFileBuilder.java @@ -23,8 +23,8 @@ import org.apache.iceberg.relocated.com.google.common.base.Preconditions; class TrackedFileBuilder { - private final long snapshotId; private final FileContent contentType; + private final TrackingBuilder trackingBuilder; // Required fields private Integer formatVersion = null; @@ -32,10 +32,10 @@ class TrackedFileBuilder { private FileFormat fileFormat = null; private Long recordCount = null; private Long fileSizeInBytes = null; - private PartitionData partitionData = null; // optional fields private Integer specId = null; + private PartitionData partition = null; private ContentStats contentStats = null; private Integer sortOrderId = null; private DeletionVector deletionVector = null; @@ -44,12 +44,6 @@ class TrackedFileBuilder { private List splitOffsets = null; private List equalityIds = null; - // tracking-related fields - private Tracking sourceTracking = null; - private boolean dvUpdated = false; - private ByteBuffer deletedPositions = null; - private ByteBuffer replacedPositions = null; - /** * Creates a builder for a newly added data file entry. * @@ -111,7 +105,7 @@ static TrackedFile deleted(TrackedFile source, long newSnapshotId) { /** * Returns a REPLACED tracked file derived from {@code source}. * - *

Manifest entries cannot transition to REPLACED. + *

Manifest files cannot transition to REPLACED. * * @param source source tracked file * @param newSnapshotId the snapshot ID in which the new tracked file will be committed @@ -119,9 +113,7 @@ static TrackedFile deleted(TrackedFile source, long newSnapshotId) { static TrackedFile replaced(TrackedFile source, long newSnapshotId) { Preconditions.checkArgument(source != null, "Invalid source: null"); Preconditions.checkArgument( - !isLeafManifest(source.contentType()), - "Manifest entries cannot transition to REPLACED, but entry type is: %s", - source.contentType()); + !isLeafManifest(source.contentType()), "Cannot transition manifest files to REPLACED"); return terminal(source, TrackingBuilder.replaced(source.tracking(), newSnapshotId)); } @@ -147,18 +139,17 @@ private static TrackedFile terminal(TrackedFile source, Tracking tracking) { private TrackedFileBuilder(FileContent contentType, long snapshotId) { this.contentType = contentType; - this.snapshotId = snapshotId; + this.trackingBuilder = TrackingBuilder.added(snapshotId); } private TrackedFileBuilder(TrackedFile source, long snapshotId) { this.contentType = source.contentType(); - this.snapshotId = snapshotId; this.formatVersion = source.formatVersion(); this.location = source.location(); this.fileFormat = source.fileFormat(); this.recordCount = source.recordCount(); this.fileSizeInBytes = source.fileSizeInBytes(); - this.partitionData = (PartitionData) source.partition(); + this.partition = (PartitionData) source.partition(); this.specId = source.specId(); this.contentStats = source.contentStats(); this.sortOrderId = source.sortOrderId(); @@ -167,7 +158,7 @@ private TrackedFileBuilder(TrackedFile source, long snapshotId) { this.keyMetadata = source.keyMetadata(); this.splitOffsets = source.splitOffsets(); this.equalityIds = source.equalityIds(); - this.sourceTracking = source.tracking(); + this.trackingBuilder = TrackingBuilder.from(source.tracking(), snapshotId); } TrackedFileBuilder formatVersion(int newFormatVersion) { @@ -205,15 +196,16 @@ TrackedFileBuilder fileSizeInBytes(long newFileSizeInBytes) { return this; } - TrackedFileBuilder specId(int newSpecId) { - Preconditions.checkArgument(newSpecId >= 0, "Invalid spec ID: %s (must be >= 0)", newSpecId); - this.specId = newSpecId; - return this; - } - - TrackedFileBuilder partition(PartitionData newPartitionData) { - Preconditions.checkArgument(newPartitionData != null, "Invalid partition: null"); - this.partitionData = newPartitionData; + TrackedFileBuilder partition(PartitionSpec spec, StructLike newPartition) { + Preconditions.checkArgument(spec != null, "Invalid spec: null"); + if (spec.isUnpartitioned()) { + Preconditions.checkArgument( + newPartition == null, "Invalid partition: must be null for unpartitioned spec"); + } else { + Preconditions.checkArgument(newPartition != null, "Invalid partition: null"); + this.partition = DataFiles.copyPartitionData(spec, newPartition, null); + } + this.specId = spec.specId(); return this; } @@ -225,9 +217,7 @@ TrackedFileBuilder contentStats(ContentStats newContentStats) { TrackedFileBuilder sortOrderId(int newSortOrderId) { Preconditions.checkArgument( - !isLeafManifest(contentType), - "Sort order ID cannot be added to manifest entries, but entry type is: %s", - contentType); + !isLeafManifest(contentType), "Cannot set sort order for manifest files"); Preconditions.checkArgument( newSortOrderId >= 0, "Invalid sort order ID: %s (must be >= 0)", newSortOrderId); this.sortOrderId = newSortOrderId; @@ -238,13 +228,21 @@ TrackedFileBuilder deletionVector(DeletionVector newDeletionVector) { Preconditions.checkArgument(newDeletionVector != null, "Invalid deletion vector: null"); Preconditions.checkArgument( contentType == FileContent.DATA, - "Deletion vector can only be added to DATA entries, but entry type is: %s", + "Cannot add deletion vector for file with content: %s", contentType); - Preconditions.checkArgument( - this.deletionVector == null || !this.deletionVector.equals(newDeletionVector), - "The same deletion vector already added"); + if (deletionVector != null) { + Preconditions.checkArgument( + deletionVector.cardinality() < newDeletionVector.cardinality(), + "Invalid DV update, cardinality must increase: existing=%s, new=%s", + deletionVector == null ? null : deletionVector.cardinality(), + newDeletionVector.cardinality()); + Preconditions.checkArgument( + !deletionVector.location().equals(newDeletionVector.location()) + || deletionVector.offset() != newDeletionVector.offset(), + "Invalid DV update: same location and offset"); + } this.deletionVector = newDeletionVector; - this.dvUpdated = true; + trackingBuilder.dvUpdated(); return this; } @@ -252,7 +250,7 @@ TrackedFileBuilder manifestInfo(ManifestInfo newManifestInfo) { Preconditions.checkArgument(newManifestInfo != null, "Invalid manifest info: null"); Preconditions.checkArgument( isLeafManifest(contentType), - "Manifest info can only be added to manifests, but entry type is: %s", + "Cannot add manifest info for file with content: %s", contentType); this.manifestInfo = newManifestInfo; return this; @@ -267,9 +265,7 @@ TrackedFileBuilder keyMetadata(ByteBuffer newKeyMetadata) { TrackedFileBuilder splitOffsets(List newSplitOffsets) { Preconditions.checkArgument(newSplitOffsets != null, "Invalid split offsets: null"); Preconditions.checkArgument( - !isLeafManifest(contentType), - "Split offsets cannot be added to manifest entries, but entry type is: %s", - contentType); + !isLeafManifest(contentType), "Cannot set split offsets for manifest files"); this.splitOffsets = newSplitOffsets; return this; } @@ -278,7 +274,7 @@ TrackedFileBuilder equalityIds(List newEqualityIds) { Preconditions.checkArgument(newEqualityIds != null, "Invalid equality IDs: null"); Preconditions.checkArgument( contentType == FileContent.EQUALITY_DELETES, - "Equality IDs can only be added to EQUALITY_DELETES entries, but entry type is: %s", + "Cannot add equality IDs for file with content: %s", contentType); this.equalityIds = newEqualityIds; return this; @@ -288,9 +284,9 @@ TrackedFileBuilder deletedPositions(ByteBuffer newDeletedPositions) { Preconditions.checkArgument(newDeletedPositions != null, "Invalid deleted positions: null"); Preconditions.checkArgument( isLeafManifest(contentType), - "Deleted positions can only be added to manifest entries, but entry type is: %s", + "Cannot add deleted positions for file with content: %s", contentType); - this.deletedPositions = newDeletedPositions; + trackingBuilder.deletedPositions(newDeletedPositions); return this; } @@ -298,9 +294,9 @@ TrackedFileBuilder replacedPositions(ByteBuffer newReplacedPositions) { Preconditions.checkArgument(newReplacedPositions != null, "Invalid replaced positions: null"); Preconditions.checkArgument( isLeafManifest(contentType), - "Replaced positions can only be added to manifest entries, but entry type is: %s", + "Cannot add replaced positions for file with content: %s", contentType); - this.replacedPositions = newReplacedPositions; + trackingBuilder.replacedPositions(newReplacedPositions); return this; } @@ -315,7 +311,6 @@ TrackedFile build() { Preconditions.checkArgument(recordCount != null, "Missing required field: record count"); Preconditions.checkArgument( fileSizeInBytes != null, "Missing required field: file size in bytes"); - Preconditions.checkArgument(partitionData != null, "Missing required field: partition data"); Preconditions.checkArgument( !isLeafManifest(contentType) || manifestInfo != null, "Missing required field: manifest info"); @@ -323,30 +318,13 @@ TrackedFile build() { contentType != FileContent.EQUALITY_DELETES || equalityIds != null, "Missing required field: equality IDs"); - TrackingBuilder trackingBuilder = - sourceTracking == null - ? TrackingBuilder.added(snapshotId) - : TrackingBuilder.from(sourceTracking, snapshotId); - - if (dvUpdated) { - trackingBuilder.dvUpdated(); - } - - if (deletedPositions != null) { - trackingBuilder.deletedPositions(deletedPositions); - } - - if (replacedPositions != null) { - trackingBuilder.replacedPositions(replacedPositions); - } - return new TrackedFileStruct( trackingBuilder.build(), contentType, formatVersion, location, fileFormat, - partitionData, + partition, recordCount, fileSizeInBytes, specId, diff --git a/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java b/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java index 0f08b59e150d..8242be38e94a 100644 --- a/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java @@ -163,64 +163,6 @@ void testBuilderMissingRequiredFields() { .hasMessage("Missing required value: cardinality"); } - @Test - void testDvEquality() { - DeletionVectorStruct dv = - DeletionVectorStruct.builder() - .location("s3://bucket/data/dv.puffin") - .offset(256L) - .sizeInBytes(128L) - .cardinality(42L) - .build(); - - DeletionVectorStruct sameDv = - DeletionVectorStruct.builder() - .location("s3://bucket/data/dv.puffin") - .offset(256L) - .sizeInBytes(128L) - .cardinality(42L) - .build(); - - DeletionVectorStruct dvWithDifferentLocation = - DeletionVectorStruct.builder() - .location("s3://bucket/data/dv2.puffin") - .offset(256L) - .sizeInBytes(128L) - .cardinality(42L) - .build(); - - DeletionVectorStruct dvWithDifferentOffset = - DeletionVectorStruct.builder() - .location("s3://bucket/data/dv.puffin") - .offset(1L) - .sizeInBytes(128L) - .cardinality(42L) - .build(); - - DeletionVectorStruct dvWithDifferentSize = - DeletionVectorStruct.builder() - .location("s3://bucket/data/dv.puffin") - .offset(256L) - .sizeInBytes(8L) - .cardinality(42L) - .build(); - - DeletionVectorStruct dvWithDifferentCardinality = - DeletionVectorStruct.builder() - .location("s3://bucket/data/dv.puffin") - .offset(256L) - .sizeInBytes(128L) - .cardinality(2L) - .build(); - - assertThat(dv).isEqualTo(dv); - assertThat(dv).isEqualTo(sameDv); - assertThat(dv).isNotEqualTo(dvWithDifferentLocation); - assertThat(dv).isNotEqualTo(dvWithDifferentOffset); - assertThat(dv).isNotEqualTo(dvWithDifferentSize); - assertThat(dv).isNotEqualTo(dvWithDifferentCardinality); - } - @Test void testBuilderRejectsInvalidValuesAtSetter() { assertThatThrownBy(() -> DeletionVectorStruct.builder().location(null)) diff --git a/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java b/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java index f4d5675ee94a..809a34a5fbf8 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackedFileAdapters.java @@ -60,6 +60,8 @@ class TestTrackedFileAdapters { private static final PartitionData PARTITION = partition("books"); // Tracking field ordinals, looked up from the schema so the tests do not hard-code offsets. + private static final int STATUS_ORDINAL = ordinalOf(Tracking.schema(), "status"); + private static final int SNAPSHOT_ID_ORDINAL = ordinalOf(Tracking.schema(), "snapshot_id"); private static final int DATA_SEQUENCE_NUMBER_ORDINAL = ordinalOf(Tracking.schema(), "sequence_number"); private static final int FILE_SEQUENCE_NUMBER_ORDINAL = @@ -68,31 +70,26 @@ class TestTrackedFileAdapters { // manifestPos is appended after the tracking schema fields by the manifest reader. private static final int MANIFEST_POS_ORDINAL = Tracking.schema().fields().size(); - // TrackedFile optional field ordinals, looked up from the schema. - private static final Types.StructType TRACKED_FILE_SCHEMA = - TrackedFile.schemaWithContentStats(Types.StructType.of(), Types.StructType.of()); - private static final int CONTENT_TYPE_ORDINAL = ordinalOf(TRACKED_FILE_SCHEMA, "content_type"); - private static final int SPEC_ID_ORDINAL = ordinalOf(TRACKED_FILE_SCHEMA, "spec_id"); - private static final int DELETION_VECTOR_ORDINAL = - ordinalOf(TRACKED_FILE_SCHEMA, "deletion_vector"); - @Test void testDataFileAdapterDelegation() { TrackedFile file = - TrackedFileBuilder.data(42L) - .formatVersion(FORMAT_VERSION_V4) - .location(DATA_FILE_LOCATION) - .fileFormat(FileFormat.PARQUET) - .partition(PARTITION) - .recordCount(100L) - .fileSizeInBytes(1024L) - .specId(PARTITIONED_SPEC_ID) - .contentStats(createContentStats()) - .sortOrderId(3) - .keyMetadata(ByteBuffer.wrap(new byte[] {1, 2, 3})) - .splitOffsets(ImmutableList.of(50L, 100L)) - .build(); - populateTrackingFields(file); + new TrackedFileStruct( + createTracking(), + FileContent.DATA, + FORMAT_VERSION_V4, + DATA_FILE_LOCATION, + FileFormat.PARQUET, + PARTITION, + 100L, + 1024L, + PARTITIONED_SPEC_ID, + createContentStats(), + 3, + null, + null, + ByteBuffer.wrap(new byte[] {1, 2, 3}), + ImmutableList.of(50L, 100L), + null); DataFile dataFile = TrackedFileAdapters.asDataFile(file, specsById(PARTITIONED_SPEC)); @@ -129,7 +126,7 @@ void testDataFileAdapterDelegation() { @ParameterizedTest @EnumSource(value = FileContent.class, mode = EnumSource.Mode.EXCLUDE, names = "DATA") void testDataFileAdapterRejectsNonDataContent(FileContent contentType) { - TrackedFileStruct file = dummyTrackedFile(contentType); + TrackedFileStruct file = trackedFile(contentType); assertThatThrownBy(() -> TrackedFileAdapters.asDataFile(file, UNPARTITIONED)) .isInstanceOf(IllegalArgumentException.class) @@ -139,21 +136,23 @@ void testDataFileAdapterRejectsNonDataContent(FileContent contentType) { @Test void testEqualityDeleteFileAdapterDelegation() { TrackedFile file = - TrackedFileBuilder.equalityDelete(42L) - .formatVersion(FORMAT_VERSION_V4) - .location("s3://bucket/eq-delete.avro") - .fileFormat(FileFormat.AVRO) - .partition(PARTITION) - .recordCount(50L) - .fileSizeInBytes(512L) - .specId(PARTITIONED_SPEC_ID) - .contentStats(createContentStats()) - .sortOrderId(5) - .keyMetadata(ByteBuffer.wrap(new byte[] {4, 5})) - .splitOffsets(ImmutableList.of(200L)) - .equalityIds(ImmutableList.of(1, 2, 3)) - .build(); - populateTrackingFields(file); + new TrackedFileStruct( + createTracking(), + FileContent.EQUALITY_DELETES, + FORMAT_VERSION_V4, + "s3://bucket/eq-delete.avro", + FileFormat.AVRO, + PARTITION, + 50L, + 512L, + PARTITIONED_SPEC_ID, + createContentStats(), + 5, + null, + null, + ByteBuffer.wrap(new byte[] {4, 5}), + ImmutableList.of(200L), + ImmutableList.of(1, 2, 3)); DeleteFile deleteFile = TrackedFileAdapters.asEqualityDeleteFile(file, specsById(PARTITIONED_SPEC)); @@ -191,7 +190,7 @@ void testEqualityDeleteFileAdapterDelegation() { @ParameterizedTest @EnumSource(value = FileContent.class, mode = EnumSource.Mode.EXCLUDE, names = "EQUALITY_DELETES") void testEqualityDeleteFileAdapterRejectsNonEqualityContent(FileContent contentType) { - TrackedFileStruct file = dummyTrackedFile(contentType); + TrackedFileStruct file = trackedFile(contentType); assertThatThrownBy(() -> TrackedFileAdapters.asEqualityDeleteFile(file, UNPARTITIONED)) .isInstanceOf(IllegalArgumentException.class) @@ -200,26 +199,26 @@ void testEqualityDeleteFileAdapterRejectsNonEqualityContent(FileContent contentT @Test void testDVDeleteFileAdapterDelegation() { - DeletionVector dv = - DeletionVectorStruct.builder() - .location(DV_LOCATION) - .offset(128L) - .sizeInBytes(256L) - .cardinality(10L) - .build(); + DeletionVector dv = deletionVector(); TrackedFile file = - TrackedFileBuilder.data(42L) - .formatVersion(FORMAT_VERSION_V4) - .location(DATA_FILE_LOCATION) - .fileFormat(FileFormat.PARQUET) - .partition(PARTITION) - .recordCount(100L) - .fileSizeInBytes(1024L) - .specId(PARTITIONED_SPEC_ID) - .deletionVector(dv) - .build(); - populateTrackingFields(file); + new TrackedFileStruct( + createTracking(), + FileContent.DATA, + FORMAT_VERSION_V4, + DATA_FILE_LOCATION, + FileFormat.PARQUET, + PARTITION, + 100L, + 1024L, + PARTITIONED_SPEC_ID, + null, + null, + dv, + null, + null, + null, + null); DeleteFile dvFile = TrackedFileAdapters.asDVDeleteFile(file, specsById(PARTITIONED_SPEC)); @@ -260,7 +259,7 @@ void testDVDeleteFileAdapterDelegation() { @ParameterizedTest @EnumSource(value = FileContent.class, mode = EnumSource.Mode.EXCLUDE, names = "DATA") void testDVDeleteFileAdapterRejectsNonDataContent(FileContent contentType) { - TrackedFileStruct file = dummyTrackedFile(contentType); + TrackedFileStruct file = trackedFile(contentType); assertThatThrownBy(() -> TrackedFileAdapters.asDVDeleteFile(file, UNPARTITIONED)) .isInstanceOf(IllegalArgumentException.class) @@ -269,7 +268,7 @@ void testDVDeleteFileAdapterRejectsNonDataContent(FileContent contentType) { @Test void testDVDeleteFileAdapterRejectsNullDeletionVector() { - TrackedFileStruct file = dummyTrackedFile(FileContent.DATA); + TrackedFileStruct file = trackedFile(FileContent.DATA); assertThatThrownBy(() -> TrackedFileAdapters.asDVDeleteFile(file, UNPARTITIONED)) .isInstanceOf(IllegalArgumentException.class) @@ -278,7 +277,7 @@ void testDVDeleteFileAdapterRejectsNullDeletionVector() { @Test void testNullContentStatsReturnsNullStats() { - TrackedFileStruct file = dummyTrackedFile(FileContent.DATA); + TrackedFileStruct file = trackedFile(FileContent.DATA); DataFile dataFile = TrackedFileAdapters.asDataFile(file, UNPARTITIONED); @@ -294,19 +293,35 @@ void testNullTrackingReturnsNullTrackingFields() { // Files read before manifest inheritance have no tracking; tracking-derived fields must be // null rather than throwing. assertNullTrackingFields( - TrackedFileAdapters.asDataFile(dummyTrackedFile(FileContent.DATA), UNPARTITIONED)); + TrackedFileAdapters.asDataFile(trackedFile(FileContent.DATA), UNPARTITIONED)); assertNullTrackingFields( TrackedFileAdapters.asEqualityDeleteFile( - dummyTrackedFile(FileContent.EQUALITY_DELETES), UNPARTITIONED)); - - TrackedFileStruct dvFile = dummyTrackedFile(FileContent.DATA); - dvFile.set(DELETION_VECTOR_ORDINAL, deletionVector()); - assertNullTrackingFields(TrackedFileAdapters.asDVDeleteFile(dvFile, UNPARTITIONED)); + trackedFile(FileContent.EQUALITY_DELETES), UNPARTITIONED)); + + TrackedFileStruct fileWithDV = + new TrackedFileStruct( + null, + FileContent.DATA, + FORMAT_VERSION_V4, + "s3://bucket/file", + FileFormat.PARQUET, + null, + 1L, + 1L, + null, + null, + null, + deletionVector(), + null, + null, + null, + null); + assertNullTrackingFields(TrackedFileAdapters.asDVDeleteFile(fileWithDV, UNPARTITIONED)); } @Test void testUnpartitionedFilePartitionIsEmpty() { - TrackedFileStruct file = dummyTrackedFile(FileContent.DATA); + TrackedFileStruct file = trackedFile(FileContent.DATA); DataFile dataFile = TrackedFileAdapters.asDataFile(file, UNPARTITIONED); @@ -317,7 +332,7 @@ void testUnpartitionedFilePartitionIsEmpty() { @Test void testNullSpecIdResolvesToUnpartitionedSpec() { PartitionSpec unpartitioned = PartitionSpec.builderFor(new Schema()).withSpecId(5).build(); - TrackedFileStruct file = dummyTrackedFile(FileContent.DATA); + TrackedFileStruct file = trackedFile(FileContent.DATA); DataFile dataFile = TrackedFileAdapters.asDataFile(file, specsById(unpartitioned)); @@ -328,7 +343,7 @@ void testNullSpecIdResolvesToUnpartitionedSpec() { void testNullSpecIdThrowsWhenNoUnpartitionedSpec() { Schema schema = new Schema(Types.NestedField.required(1, "id", Types.IntegerType.get())); PartitionSpec partitioned = PartitionSpec.builderFor(schema).identity("id").build(); - TrackedFileStruct file = dummyTrackedFile(FileContent.DATA); + TrackedFileStruct file = trackedFile(FileContent.DATA); assertThatThrownBy(() -> TrackedFileAdapters.asDataFile(file, specsById(partitioned))) .isInstanceOf(IllegalArgumentException.class) @@ -337,8 +352,15 @@ void testNullSpecIdThrowsWhenNoUnpartitionedSpec() { @Test void testUnknownSpecIdThrows() { - TrackedFileStruct file = dummyTrackedFile(FileContent.DATA); - file.set(SPEC_ID_ORDINAL, 99); + TrackedFile file = + TrackedFileBuilder.data(42L) + .formatVersion(FORMAT_VERSION_V4) + .location(DATA_FILE_LOCATION) + .fileFormat(FileFormat.PARQUET) + .partition(PARTITIONED_SPEC, PARTITION) + .recordCount(100L) + .fileSizeInBytes(1024L) + .build(); assertThatThrownBy(() -> TrackedFileAdapters.asDataFile(file, ImmutableMap.of())) .isInstanceOf(IllegalArgumentException.class) @@ -348,8 +370,15 @@ void testUnknownSpecIdThrows() { @Test void testSpecIdMismatchThrows() { int mismatchedSpecId = PARTITIONED_SPEC_ID + 1; - TrackedFileStruct file = dummyTrackedFile(FileContent.DATA); - file.set(SPEC_ID_ORDINAL, PARTITIONED_SPEC_ID); + TrackedFile file = + TrackedFileBuilder.data(42L) + .formatVersion(FORMAT_VERSION_V4) + .location(DATA_FILE_LOCATION) + .fileFormat(FileFormat.PARQUET) + .partition(PARTITIONED_SPEC, PARTITION) + .recordCount(100L) + .fileSizeInBytes(1024L) + .build(); PartitionSpec mismatched = PartitionSpec.builderFor(PARTITION_SCHEMA) .identity("category") @@ -386,19 +415,36 @@ private static PartitionData partition(String category) { } /** Minimal file with no tracking, used by the rejection and null-tracking tests. */ - private static TrackedFileStruct dummyTrackedFile(FileContent contentType) { - TrackedFileStruct file = new TrackedFileStruct(); - file.set(CONTENT_TYPE_ORDINAL, contentType.id()); - return file; + private static TrackedFileStruct trackedFile(FileContent contentType) { + return new TrackedFileStruct( + null, + contentType, + FORMAT_VERSION_V4, + "s3://bucket/file", + FileFormat.PARQUET, + null, + 1L, + 1L, + null, + null, + null, + null, + null, + null, + null, + null); } - private static void populateTrackingFields(TrackedFile file) { - TrackingStruct tracking = (TrackingStruct) file.tracking(); + private static TrackingStruct createTracking() { + TrackingStruct tracking = new TrackingStruct(); + tracking.set(STATUS_ORDINAL, EntryStatus.ADDED.id()); + tracking.set(SNAPSHOT_ID_ORDINAL, 42L); tracking.set(DATA_SEQUENCE_NUMBER_ORDINAL, DATA_SEQUENCE_NUMBER); tracking.set(FILE_SEQUENCE_NUMBER_ORDINAL, FILE_SEQUENCE_NUMBER); tracking.set(FIRST_ROW_ID_ORDINAL, FIRST_ROW_ID); tracking.setManifestLocation(MANIFEST_LOCATION); tracking.set(MANIFEST_POS_ORDINAL, MANIFEST_POS); + return tracking; } private static DeletionVector deletionVector() { diff --git a/core/src/test/java/org/apache/iceberg/TestTrackedFileBuilder.java b/core/src/test/java/org/apache/iceberg/TestTrackedFileBuilder.java index d6b0701fdfd7..cd83f34f249f 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackedFileBuilder.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackedFileBuilder.java @@ -36,9 +36,12 @@ public class TestTrackedFileBuilder { private static final Schema TABLE_SCHEMA = new Schema( optional(1, "id", Types.IntegerType.get()), optional(2, "data", Types.StringType.get())); - private static final Types.StructType PARTITION_TYPE = - PartitionSpec.builderFor(TABLE_SCHEMA).identity("id").build().partitionType(); + private static final PartitionSpec PARTITION_SPEC = + PartitionSpec.builderFor(TABLE_SCHEMA).identity("id").build(); + private static final Types.StructType PARTITION_TYPE = PARTITION_SPEC.partitionType(); private static final PartitionData PARTITION_DATA = new PartitionData(PARTITION_TYPE); + private static final PartitionData EMPTY_PARTITION_DATA = + new PartitionData(Types.StructType.of()); private static final ManifestInfo MANIFEST_INFO = ManifestInfoStruct.builder() .addedFilesCount(10) @@ -91,8 +94,7 @@ private static Stream missingRequiredFieldCases() { Arguments.of("location", "Missing required field: location"), Arguments.of("fileFormat", "Missing required field: file format"), Arguments.of("recordCount", "Missing required field: record count"), - Arguments.of("fileSizeInBytes", "Missing required field: file size in bytes"), - Arguments.of("partition", "Missing required field: partition data")); + Arguments.of("fileSizeInBytes", "Missing required field: file size in bytes")); } @ParameterizedTest @@ -139,7 +141,7 @@ private TrackedFileBuilder builderWithMissingRequiredField( builder.fileSizeInBytes(12345L); } if (!"partition".equals(missingField)) { - builder.partition(PARTITION_DATA); + builder.partition(PARTITION_SPEC, PARTITION_DATA); } return builder; } @@ -155,7 +157,7 @@ public void missingFieldsForManifests(TrackedFileBuilder builder, FileContent co .fileFormat(FileFormat.AVRO) .recordCount(420L) .fileSizeInBytes(556L) - .partition(PARTITION_DATA) + .partition(PARTITION_SPEC, PARTITION_DATA) .build()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Missing required field: manifest info"); @@ -171,7 +173,7 @@ public void missingEqualityIdsForEqualityDeletes() { .fileFormat(FileFormat.PARQUET) .recordCount(2000L) .fileSizeInBytes(12345L) - .partition(PARTITION_DATA) + .partition(PARTITION_SPEC, PARTITION_DATA) .build()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Missing required field: equality IDs"); @@ -182,11 +184,15 @@ private static Stream nonEqualityDeleteBuilders() { Arguments.of(TrackedFileBuilder.data(10L), FileContent.DATA), Arguments.of(TrackedFileBuilder.dataManifest(10L), FileContent.DATA_MANIFEST), Arguments.of(TrackedFileBuilder.deleteManifest(10L), FileContent.DELETE_MANIFEST), - Arguments.of(TrackedFileBuilder.from(sourceData(12L), 20L), FileContent.DATA), Arguments.of( - TrackedFileBuilder.from(sourceDataManifest(21L), 25L), FileContent.DATA_MANIFEST), + TrackedFileBuilder.from(entryWithInheritedSeqNums(sourceData(12L), 7L), 20L), + FileContent.DATA), Arguments.of( - TrackedFileBuilder.from(sourceDeleteManifest(12L), 20L), FileContent.DELETE_MANIFEST)); + TrackedFileBuilder.from(entryWithInheritedSeqNums(sourceDataManifest(21L), 7L), 25L), + FileContent.DATA_MANIFEST), + Arguments.of( + TrackedFileBuilder.from(entryWithInheritedSeqNums(sourceDeleteManifest(12L), 7L), 20L), + FileContent.DELETE_MANIFEST)); } @ParameterizedTest @@ -195,9 +201,7 @@ public void invalidEqualityIdsForContentType( TrackedFileBuilder builder, FileContent contentType) { assertThatThrownBy(() -> builder.equalityIds(ImmutableList.of(1))) .isInstanceOf(IllegalArgumentException.class) - .hasMessage( - "Equality IDs can only be added to EQUALITY_DELETES entries, but entry type is: " - + contentType); + .hasMessage("Cannot add equality IDs for file with content: " + contentType); } private static Stream nonDataBuilders() { @@ -206,11 +210,14 @@ private static Stream nonDataBuilders() { Arguments.of(TrackedFileBuilder.dataManifest(10L), FileContent.DATA_MANIFEST), Arguments.of(TrackedFileBuilder.deleteManifest(10L), FileContent.DELETE_MANIFEST), Arguments.of( - TrackedFileBuilder.from(sourceEqualityDelete(12L), 20L), FileContent.EQUALITY_DELETES), + TrackedFileBuilder.from(entryWithInheritedSeqNums(sourceEqualityDelete(12L), 7L), 20L), + FileContent.EQUALITY_DELETES), Arguments.of( - TrackedFileBuilder.from(sourceDataManifest(21L), 25L), FileContent.DATA_MANIFEST), + TrackedFileBuilder.from(entryWithInheritedSeqNums(sourceDataManifest(21L), 7L), 25L), + FileContent.DATA_MANIFEST), Arguments.of( - TrackedFileBuilder.from(sourceDeleteManifest(12L), 20L), FileContent.DELETE_MANIFEST)); + TrackedFileBuilder.from(entryWithInheritedSeqNums(sourceDeleteManifest(12L), 7L), 20L), + FileContent.DELETE_MANIFEST)); } @ParameterizedTest @@ -219,8 +226,7 @@ public void invalidDeletionVectorForContentType( TrackedFileBuilder builder, FileContent contentType) { assertThatThrownBy(() -> builder.deletionVector(DELETION_VECTOR)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage( - "Deletion vector can only be added to DATA entries, but entry type is: " + contentType); + .hasMessage("Cannot add deletion vector for file with content: %s", contentType); } @ParameterizedTest @@ -229,8 +235,7 @@ public void invalidSortOrderIdForContentType( TrackedFileBuilder builder, FileContent contentType) { assertThatThrownBy(() -> builder.sortOrderId(1)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage( - "Sort order ID cannot be added to manifest entries, but entry type is: " + contentType); + .hasMessage("Cannot set sort order for manifest files"); } @ParameterizedTest @@ -239,17 +244,19 @@ public void invalidSplitOffsetsForContentType( TrackedFileBuilder builder, FileContent contentType) { assertThatThrownBy(() -> builder.splitOffsets(SPLIT_OFFSETS)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage( - "Split offsets cannot be added to manifest entries, but entry type is: " + contentType); + .hasMessage("Cannot set split offsets for manifest files"); } private static Stream nonManifestBuilders() { return Stream.of( Arguments.of(TrackedFileBuilder.data(10L), FileContent.DATA), Arguments.of(TrackedFileBuilder.equalityDelete(10L), FileContent.EQUALITY_DELETES), - Arguments.of(TrackedFileBuilder.from(sourceData(12L), 20L), FileContent.DATA), Arguments.of( - TrackedFileBuilder.from(sourceEqualityDelete(12L), 20L), FileContent.EQUALITY_DELETES)); + TrackedFileBuilder.from(entryWithInheritedSeqNums(sourceData(12L), 7L), 20L), + FileContent.DATA), + Arguments.of( + TrackedFileBuilder.from(entryWithInheritedSeqNums(sourceEqualityDelete(12L), 7L), 20L), + FileContent.EQUALITY_DELETES)); } @ParameterizedTest @@ -258,8 +265,7 @@ public void invalidManifestInfoForContentType( TrackedFileBuilder builder, FileContent contentType) { assertThatThrownBy(() -> builder.manifestInfo(MANIFEST_INFO)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage( - "Manifest info can only be added to manifests, but entry type is: " + contentType); + .hasMessage("Cannot add manifest info for file with content: " + contentType); } @ParameterizedTest @@ -268,9 +274,7 @@ public void invalidDeletedPositionsForContentType( TrackedFileBuilder builder, FileContent contentType) { assertThatThrownBy(() -> builder.deletedPositions(DELETED_POSITIONS)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage( - "Deleted positions can only be added to manifest entries, but entry type is: " - + contentType); + .hasMessage("Cannot add deleted positions for file with content: " + contentType); } @ParameterizedTest @@ -279,9 +283,7 @@ public void invalidReplacedPositionsForContentType( TrackedFileBuilder builder, FileContent contentType) { assertThatThrownBy(() -> builder.replacedPositions(REPLACED_POSITIONS)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage( - "Replaced positions can only be added to manifest entries, but entry type is: " - + contentType); + .hasMessage("Cannot add replaced positions for file with content: " + contentType); } @Test @@ -294,7 +296,11 @@ public void invalidNullInputs() { .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid file format: null"); - assertThatThrownBy(() -> TrackedFileBuilder.data(30L).partition(null)) + assertThatThrownBy(() -> TrackedFileBuilder.data(30L).partition(null, PARTITION_DATA)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid spec: null"); + + assertThatThrownBy(() -> TrackedFileBuilder.data(30L).partition(PARTITION_SPEC, null)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid partition: null"); @@ -351,6 +357,33 @@ public void invalidNullInputs() { .hasMessage("Invalid source: null"); } + @Test + public void unpartitionedSpecWithNonNullPartitionFails() { + assertThatThrownBy( + () -> + TrackedFileBuilder.data(30L) + .partition(PartitionSpec.unpartitioned(), PARTITION_DATA)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid partition: must be null for unpartitioned spec"); + } + + @Test + public void unpartitionedSpecWithNullPartitionSucceeds() { + PartitionSpec unpartitioned = PartitionSpec.unpartitioned(); + TrackedFile trackedFile = + TrackedFileBuilder.data(50L) + .formatVersion(FORMAT_VERSION_V4) + .location("s3://bucket/data/file.parquet") + .fileFormat(FileFormat.PARQUET) + .recordCount(2000L) + .fileSizeInBytes(12345L) + .partition(unpartitioned, null) + .build(); + + assertThat(trackedFile.specId()).isEqualTo(unpartitioned.specId()); + assertThat(trackedFile.partition()).isEqualTo(EMPTY_PARTITION_DATA); + } + @Test public void invalidNegativeInputs() { assertThatThrownBy(() -> TrackedFileBuilder.dataManifest(40L).formatVersion(-1)) @@ -365,10 +398,6 @@ public void invalidNegativeInputs() { .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid file size in bytes: -1 (must be >= 0)"); - assertThatThrownBy(() -> TrackedFileBuilder.dataManifest(40L).specId(-1)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid spec ID: -1 (must be >= 0)"); - assertThatThrownBy(() -> TrackedFileBuilder.data(40L).sortOrderId(-1)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid sort order ID: -1 (must be >= 0)"); @@ -383,7 +412,6 @@ public void buildDataFileWithRequiredFieldsOnly() { .fileFormat(FileFormat.PARQUET) .recordCount(2000L) .fileSizeInBytes(12345L) - .partition(PARTITION_DATA) .build(); assertThat(trackedFile.formatVersion()).isEqualTo(FORMAT_VERSION_V4); @@ -392,7 +420,7 @@ public void buildDataFileWithRequiredFieldsOnly() { assertThat(trackedFile.fileFormat()).isEqualTo(FileFormat.PARQUET); assertThat(trackedFile.recordCount()).isEqualTo(2000L); assertThat(trackedFile.fileSizeInBytes()).isEqualTo(12345L); - assertThat(trackedFile.partition()).isSameAs(PARTITION_DATA); + assertThat(trackedFile.partition()).isEqualTo(EMPTY_PARTITION_DATA); assertThat(trackedFile.tracking().status()).isEqualTo(EntryStatus.ADDED); assertThat(trackedFile.tracking().snapshotId()).isEqualTo(50L); @@ -417,8 +445,7 @@ public void buildDataFileWithAllFields() { .fileFormat(FileFormat.PARQUET) .recordCount(2000L) .fileSizeInBytes(12345L) - .specId(7) - .partition(PARTITION_DATA) + .partition(PARTITION_SPEC, PARTITION_DATA) .contentStats(CONTENT_STATS) .sortOrderId(3) .deletionVector(DELETION_VECTOR) @@ -432,8 +459,8 @@ public void buildDataFileWithAllFields() { assertThat(trackedFile.fileFormat()).isEqualTo(FileFormat.PARQUET); assertThat(trackedFile.recordCount()).isEqualTo(2000L); assertThat(trackedFile.fileSizeInBytes()).isEqualTo(12345L); - assertThat(trackedFile.specId()).isEqualTo(7); - assertThat(trackedFile.partition()).isSameAs(PARTITION_DATA); + assertThat(trackedFile.specId()).isEqualTo(PARTITION_SPEC.specId()); + assertThat(trackedFile.partition()).isEqualTo(PARTITION_DATA); assertThat(trackedFile.contentStats()).isSameAs(CONTENT_STATS); assertThat(trackedFile.sortOrderId()).isEqualTo(3); assertThat(trackedFile.deletionVector()).isSameAs(DELETION_VECTOR); @@ -458,7 +485,6 @@ public void buildEqualityDeleteFileWithRequiredFieldsOnly() { .fileFormat(FileFormat.PARQUET) .recordCount(2000L) .fileSizeInBytes(12345L) - .partition(PARTITION_DATA) .equalityIds(ImmutableList.of(1)) .build(); @@ -468,7 +494,7 @@ public void buildEqualityDeleteFileWithRequiredFieldsOnly() { assertThat(trackedFile.fileFormat()).isEqualTo(FileFormat.PARQUET); assertThat(trackedFile.recordCount()).isEqualTo(2000L); assertThat(trackedFile.fileSizeInBytes()).isEqualTo(12345L); - assertThat(trackedFile.partition()).isSameAs(PARTITION_DATA); + assertThat(trackedFile.partition()).isEqualTo(EMPTY_PARTITION_DATA); assertThat(trackedFile.equalityIds()).containsExactly(1); assertThat(trackedFile.tracking().status()).isEqualTo(EntryStatus.ADDED); @@ -492,8 +518,7 @@ public void buildEqualityDeleteFileWithAllFields() { .fileFormat(FileFormat.PARQUET) .recordCount(2000L) .fileSizeInBytes(12345L) - .specId(7) - .partition(PARTITION_DATA) + .partition(PARTITION_SPEC, PARTITION_DATA) .contentStats(CONTENT_STATS) .keyMetadata(KEY_METADATA) .splitOffsets(SPLIT_OFFSETS) @@ -507,8 +532,8 @@ public void buildEqualityDeleteFileWithAllFields() { assertThat(trackedFile.fileFormat()).isEqualTo(FileFormat.PARQUET); assertThat(trackedFile.recordCount()).isEqualTo(2000L); assertThat(trackedFile.fileSizeInBytes()).isEqualTo(12345L); - assertThat(trackedFile.specId()).isEqualTo(7); - assertThat(trackedFile.partition()).isSameAs(PARTITION_DATA); + assertThat(trackedFile.specId()).isEqualTo(PARTITION_SPEC.specId()); + assertThat(trackedFile.partition()).isEqualTo(PARTITION_DATA); assertThat(trackedFile.contentStats()).isSameAs(CONTENT_STATS); assertThat(trackedFile.keyMetadata()).isEqualTo(KEY_METADATA); assertThat(trackedFile.splitOffsets()).isEqualTo(SPLIT_OFFSETS); @@ -540,7 +565,6 @@ public void buildManifestWithRequiredFieldsOnly( .fileFormat(FileFormat.AVRO) .recordCount(420L) .fileSizeInBytes(556L) - .partition(PARTITION_DATA) .manifestInfo(MANIFEST_INFO) .build(); @@ -550,7 +574,7 @@ public void buildManifestWithRequiredFieldsOnly( assertThat(trackedFile.fileFormat()).isEqualTo(FileFormat.AVRO); assertThat(trackedFile.recordCount()).isEqualTo(420L); assertThat(trackedFile.fileSizeInBytes()).isEqualTo(556L); - assertThat(trackedFile.partition()).isSameAs(PARTITION_DATA); + assertThat(trackedFile.partition()).isEqualTo(EMPTY_PARTITION_DATA); assertThat(trackedFile.manifestInfo()).isSameAs(MANIFEST_INFO); assertThat(trackedFile.tracking().status()).isEqualTo(EntryStatus.ADDED); @@ -575,8 +599,7 @@ public void buildManifestWithAllFields(TrackedFileBuilder builder, FileContent c .fileFormat(FileFormat.AVRO) .recordCount(420L) .fileSizeInBytes(556L) - .specId(7) - .partition(PARTITION_DATA) + .partition(PARTITION_SPEC, PARTITION_DATA) .contentStats(CONTENT_STATS) .keyMetadata(KEY_METADATA) .manifestInfo(MANIFEST_INFO) @@ -588,8 +611,8 @@ public void buildManifestWithAllFields(TrackedFileBuilder builder, FileContent c assertThat(trackedFile.fileFormat()).isEqualTo(FileFormat.AVRO); assertThat(trackedFile.recordCount()).isEqualTo(420L); assertThat(trackedFile.fileSizeInBytes()).isEqualTo(556L); - assertThat(trackedFile.specId()).isEqualTo(7); - assertThat(trackedFile.partition()).isSameAs(PARTITION_DATA); + assertThat(trackedFile.specId()).isEqualTo(PARTITION_SPEC.specId()); + assertThat(trackedFile.partition()).isEqualTo(PARTITION_DATA); assertThat(trackedFile.contentStats()).isSameAs(CONTENT_STATS); assertThat(trackedFile.keyMetadata()).isEqualTo(KEY_METADATA); assertThat(trackedFile.manifestInfo()).isSameAs(MANIFEST_INFO); @@ -698,7 +721,7 @@ public void updateDVWhenBuildingDataFileFromSource() { } @Test - public void addingSameDeletionVectorFails() { + public void addingInvalidDeletionVectorFails() { TrackedFile source = entryWithInheritedSeqNums(sourceData(10L), 45L); DeletionVector dv = @@ -709,16 +732,28 @@ public void addingSameDeletionVectorFails() { .cardinality(40L) .build(); - DeletionVector dvCopy = dv.copy(); + DeletionVector sameDVWithDifferentCardinality = + DeletionVectorStruct.builder() + .location("s3://bucket/data/new_dv.puffin") + .offset(5L) + .sizeInBytes(256L) + .cardinality(50L) + .build(); TrackedFile trackedFile = TrackedFileBuilder.from(source, 20L).deletionVector(dv).build(); assertThatThrownBy(() -> TrackedFileBuilder.from(trackedFile, 30L).deletionVector(dv)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("The same deletion vector already added"); - assertThatThrownBy(() -> TrackedFileBuilder.from(trackedFile, 30L).deletionVector(dvCopy)) + .hasMessage( + "Invalid DV update, cardinality must increase: existing=%s, new=%s", + dv.cardinality(), dv.cardinality()); + + assertThatThrownBy( + () -> + TrackedFileBuilder.from(trackedFile, 30L) + .deletionVector(sameDVWithDifferentCardinality)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("The same deletion vector already added"); + .hasMessage("Invalid DV update: same location and offset"); } private static Stream nonManifestSources() { @@ -764,8 +799,7 @@ public void replacedFromManifestSourceFails(TrackedFile source, FileContent cont assertThatThrownBy(() -> TrackedFileBuilder.replaced(source, 20L)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage( - "Manifest entries cannot transition to REPLACED, but entry type is: " + contentType); + .hasMessage("Cannot transition manifest files to REPLACED"); } private static TrackedFile sourceData(long snapshotId) { @@ -775,8 +809,7 @@ private static TrackedFile sourceData(long snapshotId) { .fileFormat(FileFormat.PARQUET) .recordCount(2000L) .fileSizeInBytes(12345L) - .partition(PARTITION_DATA) - .specId(7) + .partition(PARTITION_SPEC, PARTITION_DATA) .contentStats(CONTENT_STATS) .sortOrderId(3) .deletionVector(DELETION_VECTOR) @@ -792,7 +825,7 @@ private static TrackedFile sourceEqualityDelete(long snapshotId) { .fileFormat(FileFormat.PARQUET) .recordCount(2000L) .fileSizeInBytes(12345L) - .partition(PARTITION_DATA) + .partition(PARTITION_SPEC, PARTITION_DATA) .equalityIds(ImmutableList.of(1)) .build(); } @@ -804,7 +837,7 @@ private static TrackedFile sourceDataManifest(long snapshotId) { .fileFormat(FileFormat.PARQUET) .recordCount(420L) .fileSizeInBytes(556L) - .partition(PARTITION_DATA) + .partition(PARTITION_SPEC, PARTITION_DATA) .manifestInfo(MANIFEST_INFO) .build(); } @@ -816,7 +849,7 @@ private static TrackedFile sourceDeleteManifest(long snapshotId) { .fileFormat(FileFormat.PARQUET) .recordCount(100L) .fileSizeInBytes(543L) - .partition(PARTITION_DATA) + .partition(PARTITION_SPEC, PARTITION_DATA) .manifestInfo(MANIFEST_INFO) .build(); } diff --git a/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java b/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java index 8e0d5c06824b..4834318758be 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java @@ -31,10 +31,13 @@ class TestTrackedFileStruct { private static final int FORMAT_VERSION_V4 = 4; - private static final Types.StructType PARTITION_TYPE = - Types.StructType.of( - Types.NestedField.optional(1000, "id_bucket", Types.IntegerType.get()), - Types.NestedField.optional(1001, "category", Types.StringType.get())); + private static final Schema TABLE_SCHEMA = + new Schema( + Types.NestedField.required(1, "id", Types.IntegerType.get()), + Types.NestedField.required(2, "category", Types.StringType.get())); + private static final PartitionSpec PARTITION_SPEC = + PartitionSpec.builderFor(TABLE_SCHEMA).bucket("id", 16).identity("category").build(); + private static final Types.StructType PARTITION_TYPE = PARTITION_SPEC.partitionType(); // Ordinals looked up from the TrackedFile schema so tests don't hard-code positions. private static final List SCHEMA_FIELDS = @@ -388,10 +391,9 @@ static TrackedFileStruct createFullTrackedFile() { .formatVersion(FORMAT_VERSION_V4) .location("s3://bucket/data/file.parquet") .fileFormat(FileFormat.PARQUET) - .partition(newPartition(7, "music")) + .partition(PARTITION_SPEC, newPartition(7, "music")) .recordCount(100L) .fileSizeInBytes(1024L) - .specId(0) .sortOrderId(1) .deletionVector(dv) .keyMetadata(ByteBuffer.wrap(new byte[] {1, 2, 3})) @@ -465,10 +467,8 @@ static TrackedFileStruct createTrackedFileWithStats() { .formatVersion(FORMAT_VERSION_V4) .location("s3://bucket/data/file.parquet") .fileFormat(FileFormat.PARQUET) - .partition(new PartitionData(Types.StructType.of())) .recordCount(100L) .fileSizeInBytes(1024L) - .specId(0) .contentStats(stats) .build(); }