-
Notifications
You must be signed in to change notification settings - Fork 3.4k
Core: Adjustments for TrackedFileBuilder #16964
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -23,19 +23,19 @@ | |||||
| import org.apache.iceberg.relocated.com.google.common.base.Preconditions; | ||||||
|
|
||||||
| class TrackedFileBuilder { | ||||||
| private final long snapshotId; | ||||||
| private final FileContent contentType; | ||||||
| private final TrackingBuilder trackingBuilder; | ||||||
|
|
||||||
| // Required fields | ||||||
| private Integer formatVersion = null; | ||||||
| private String location = null; | ||||||
| private FileFormat fileFormat = null; | ||||||
| private Long recordCount = null; | ||||||
| private Long fileSizeInBytes = null; | ||||||
| private PartitionData partitionData = null; | ||||||
|
|
||||||
| // optional fields | ||||||
| private Integer specId = null; | ||||||
| private PartitionData partition = null; | ||||||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. carry-over comment: "PartitionData should be StructLike. We don't want to rely on concrete classes here."
|
||||||
| private ContentStats contentStats = null; | ||||||
| private Integer sortOrderId = null; | ||||||
| private DeletionVector deletionVector = null; | ||||||
|
|
@@ -44,12 +44,6 @@ class TrackedFileBuilder { | |||||
| private List<Long> splitOffsets = null; | ||||||
| private List<Integer> equalityIds = null; | ||||||
|
|
||||||
| // tracking-related fields | ||||||
| private Tracking sourceTracking = null; | ||||||
| private boolean dvUpdated = false; | ||||||
| private ByteBuffer deletedPositions = null; | ||||||
| private ByteBuffer replacedPositions = null; | ||||||
|
|
||||||
| /** | ||||||
| * Creates a builder for a newly added data file entry. | ||||||
| * | ||||||
|
|
@@ -111,17 +105,15 @@ static TrackedFile deleted(TrackedFile source, long newSnapshotId) { | |||||
| /** | ||||||
| * Returns a REPLACED tracked file derived from {@code source}. | ||||||
| * | ||||||
| * <p>Manifest entries cannot transition to REPLACED. | ||||||
| * <p>Manifest files cannot transition to REPLACED. | ||||||
| * | ||||||
| * @param source source tracked file | ||||||
| * @param newSnapshotId the snapshot ID in which the new tracked file will be committed | ||||||
| */ | ||||||
| static TrackedFile replaced(TrackedFile source, long newSnapshotId) { | ||||||
| Preconditions.checkArgument(source != null, "Invalid source: null"); | ||||||
| Preconditions.checkArgument( | ||||||
| !isLeafManifest(source.contentType()), | ||||||
| "Manifest entries cannot transition to REPLACED, but entry type is: %s", | ||||||
| source.contentType()); | ||||||
| !isLeafManifest(source.contentType()), "Cannot transition manifest files to REPLACED"); | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| return terminal(source, TrackingBuilder.replaced(source.tracking(), newSnapshotId)); | ||||||
| } | ||||||
|
|
||||||
|
|
@@ -147,18 +139,17 @@ private static TrackedFile terminal(TrackedFile source, Tracking tracking) { | |||||
|
|
||||||
| private TrackedFileBuilder(FileContent contentType, long snapshotId) { | ||||||
| this.contentType = contentType; | ||||||
| this.snapshotId = snapshotId; | ||||||
| this.trackingBuilder = TrackingBuilder.added(snapshotId); | ||||||
| } | ||||||
|
|
||||||
| private TrackedFileBuilder(TrackedFile source, long snapshotId) { | ||||||
| this.contentType = source.contentType(); | ||||||
| this.snapshotId = snapshotId; | ||||||
| this.formatVersion = source.formatVersion(); | ||||||
| this.location = source.location(); | ||||||
| this.fileFormat = source.fileFormat(); | ||||||
| this.recordCount = source.recordCount(); | ||||||
| this.fileSizeInBytes = source.fileSizeInBytes(); | ||||||
| this.partitionData = (PartitionData) source.partition(); | ||||||
| this.partition = (PartitionData) source.partition(); | ||||||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. carry-over comment: "We should not need unchecked casts like this." carry-over answer: "
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Casting PartitionData partition = (PartitionData) file.partition();We can't narrow |
||||||
| this.specId = source.specId(); | ||||||
| this.contentStats = source.contentStats(); | ||||||
| this.sortOrderId = source.sortOrderId(); | ||||||
|
|
@@ -167,7 +158,7 @@ private TrackedFileBuilder(TrackedFile source, long snapshotId) { | |||||
| this.keyMetadata = source.keyMetadata(); | ||||||
| this.splitOffsets = source.splitOffsets(); | ||||||
| this.equalityIds = source.equalityIds(); | ||||||
| this.sourceTracking = source.tracking(); | ||||||
| this.trackingBuilder = TrackingBuilder.from(source.tracking(), snapshotId); | ||||||
| } | ||||||
|
|
||||||
| TrackedFileBuilder formatVersion(int newFormatVersion) { | ||||||
|
|
@@ -205,15 +196,16 @@ TrackedFileBuilder fileSizeInBytes(long newFileSizeInBytes) { | |||||
| return this; | ||||||
| } | ||||||
|
|
||||||
| TrackedFileBuilder specId(int newSpecId) { | ||||||
| Preconditions.checkArgument(newSpecId >= 0, "Invalid spec ID: %s (must be >= 0)", newSpecId); | ||||||
| this.specId = newSpecId; | ||||||
| return this; | ||||||
| } | ||||||
|
|
||||||
| TrackedFileBuilder partition(PartitionData newPartitionData) { | ||||||
| Preconditions.checkArgument(newPartitionData != null, "Invalid partition: null"); | ||||||
| this.partitionData = newPartitionData; | ||||||
| TrackedFileBuilder partition(PartitionSpec spec, StructLike newPartition) { | ||||||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. carry-over comment: "I think that this should be combined with specId: partition(int newSpecId, StructLike partitionTuple). If the tuple is set, then spec ID is required so that we know how to interpret it. I suspect that this should also be responsible for wrapping the partition tuple with a projection to the union type." carry-over answer: "I probably miss something here, but I see that TrackedFile has a PartitionData member. If we want to produce that, is specId and partitionTuple enough? Don't we need a PartitionSpec to produce PartitionData?" |
||||||
| Preconditions.checkArgument(spec != null, "Invalid spec: null"); | ||||||
| if (spec.isUnpartitioned()) { | ||||||
| Preconditions.checkArgument( | ||||||
| newPartition == null, "Invalid partition: must be null for unpartitioned spec"); | ||||||
| } else { | ||||||
| Preconditions.checkArgument(newPartition != null, "Invalid partition: null"); | ||||||
| this.partition = DataFiles.copyPartitionData(spec, newPartition, null); | ||||||
| } | ||||||
| this.specId = spec.specId(); | ||||||
| return this; | ||||||
| } | ||||||
|
|
||||||
|
|
@@ -225,9 +217,7 @@ TrackedFileBuilder contentStats(ContentStats newContentStats) { | |||||
|
|
||||||
| TrackedFileBuilder sortOrderId(int newSortOrderId) { | ||||||
| Preconditions.checkArgument( | ||||||
| !isLeafManifest(contentType), | ||||||
| "Sort order ID cannot be added to manifest entries, but entry type is: %s", | ||||||
| contentType); | ||||||
| !isLeafManifest(contentType), "Cannot set sort order for manifest files"); | ||||||
| Preconditions.checkArgument( | ||||||
| newSortOrderId >= 0, "Invalid sort order ID: %s (must be >= 0)", newSortOrderId); | ||||||
| this.sortOrderId = newSortOrderId; | ||||||
|
|
@@ -238,21 +228,29 @@ TrackedFileBuilder deletionVector(DeletionVector newDeletionVector) { | |||||
| Preconditions.checkArgument(newDeletionVector != null, "Invalid deletion vector: null"); | ||||||
| Preconditions.checkArgument( | ||||||
| contentType == FileContent.DATA, | ||||||
| "Deletion vector can only be added to DATA entries, but entry type is: %s", | ||||||
| "Cannot add deletion vector for file with content: %s", | ||||||
| contentType); | ||||||
| Preconditions.checkArgument( | ||||||
| this.deletionVector == null || !this.deletionVector.equals(newDeletionVector), | ||||||
| "The same deletion vector already added"); | ||||||
| if (deletionVector != null) { | ||||||
| Preconditions.checkArgument( | ||||||
| deletionVector.cardinality() < newDeletionVector.cardinality(), | ||||||
| "Invalid DV update, cardinality must increase: existing=%s, new=%s", | ||||||
| deletionVector == null ? null : deletionVector.cardinality(), | ||||||
| newDeletionVector.cardinality()); | ||||||
| Preconditions.checkArgument( | ||||||
| !deletionVector.location().equals(newDeletionVector.location()) | ||||||
| || deletionVector.offset() != newDeletionVector.offset(), | ||||||
| "Invalid DV update: same location and offset"); | ||||||
| } | ||||||
| this.deletionVector = newDeletionVector; | ||||||
| this.dvUpdated = true; | ||||||
| trackingBuilder.dvUpdated(); | ||||||
| return this; | ||||||
| } | ||||||
|
|
||||||
| TrackedFileBuilder manifestInfo(ManifestInfo newManifestInfo) { | ||||||
| Preconditions.checkArgument(newManifestInfo != null, "Invalid manifest info: null"); | ||||||
| Preconditions.checkArgument( | ||||||
| isLeafManifest(contentType), | ||||||
| "Manifest info can only be added to manifests, but entry type is: %s", | ||||||
| "Cannot add manifest info for file with content: %s", | ||||||
| contentType); | ||||||
| this.manifestInfo = newManifestInfo; | ||||||
| return this; | ||||||
|
|
@@ -267,9 +265,7 @@ TrackedFileBuilder keyMetadata(ByteBuffer newKeyMetadata) { | |||||
| TrackedFileBuilder splitOffsets(List<Long> newSplitOffsets) { | ||||||
| Preconditions.checkArgument(newSplitOffsets != null, "Invalid split offsets: null"); | ||||||
| Preconditions.checkArgument( | ||||||
| !isLeafManifest(contentType), | ||||||
| "Split offsets cannot be added to manifest entries, but entry type is: %s", | ||||||
| contentType); | ||||||
| !isLeafManifest(contentType), "Cannot set split offsets for manifest files"); | ||||||
| this.splitOffsets = newSplitOffsets; | ||||||
| return this; | ||||||
| } | ||||||
|
|
@@ -278,7 +274,7 @@ TrackedFileBuilder equalityIds(List<Integer> newEqualityIds) { | |||||
| Preconditions.checkArgument(newEqualityIds != null, "Invalid equality IDs: null"); | ||||||
| Preconditions.checkArgument( | ||||||
| contentType == FileContent.EQUALITY_DELETES, | ||||||
| "Equality IDs can only be added to EQUALITY_DELETES entries, but entry type is: %s", | ||||||
| "Cannot add equality IDs for file with content: %s", | ||||||
| contentType); | ||||||
| this.equalityIds = newEqualityIds; | ||||||
| return this; | ||||||
|
|
@@ -288,19 +284,19 @@ TrackedFileBuilder deletedPositions(ByteBuffer newDeletedPositions) { | |||||
| Preconditions.checkArgument(newDeletedPositions != null, "Invalid deleted positions: null"); | ||||||
| Preconditions.checkArgument( | ||||||
| isLeafManifest(contentType), | ||||||
| "Deleted positions can only be added to manifest entries, but entry type is: %s", | ||||||
| "Cannot add deleted positions for file with content: %s", | ||||||
| contentType); | ||||||
| this.deletedPositions = newDeletedPositions; | ||||||
| trackingBuilder.deletedPositions(newDeletedPositions); | ||||||
| return this; | ||||||
| } | ||||||
|
|
||||||
| TrackedFileBuilder replacedPositions(ByteBuffer newReplacedPositions) { | ||||||
| Preconditions.checkArgument(newReplacedPositions != null, "Invalid replaced positions: null"); | ||||||
| Preconditions.checkArgument( | ||||||
| isLeafManifest(contentType), | ||||||
| "Replaced positions can only be added to manifest entries, but entry type is: %s", | ||||||
| "Cannot add replaced positions for file with content: %s", | ||||||
| contentType); | ||||||
| this.replacedPositions = newReplacedPositions; | ||||||
| trackingBuilder.replacedPositions(newReplacedPositions); | ||||||
| return this; | ||||||
| } | ||||||
|
|
||||||
|
|
@@ -315,38 +311,20 @@ TrackedFile build() { | |||||
| Preconditions.checkArgument(recordCount != null, "Missing required field: record count"); | ||||||
| Preconditions.checkArgument( | ||||||
| fileSizeInBytes != null, "Missing required field: file size in bytes"); | ||||||
| Preconditions.checkArgument(partitionData != null, "Missing required field: partition data"); | ||||||
| Preconditions.checkArgument( | ||||||
| !isLeafManifest(contentType) || manifestInfo != null, | ||||||
| "Missing required field: manifest info"); | ||||||
| Preconditions.checkArgument( | ||||||
| contentType != FileContent.EQUALITY_DELETES || equalityIds != null, | ||||||
| "Missing required field: equality IDs"); | ||||||
|
|
||||||
| TrackingBuilder trackingBuilder = | ||||||
| sourceTracking == null | ||||||
| ? TrackingBuilder.added(snapshotId) | ||||||
| : TrackingBuilder.from(sourceTracking, snapshotId); | ||||||
|
|
||||||
| if (dvUpdated) { | ||||||
| trackingBuilder.dvUpdated(); | ||||||
| } | ||||||
|
|
||||||
| if (deletedPositions != null) { | ||||||
| trackingBuilder.deletedPositions(deletedPositions); | ||||||
| } | ||||||
|
|
||||||
| if (replacedPositions != null) { | ||||||
| trackingBuilder.replacedPositions(replacedPositions); | ||||||
| } | ||||||
|
|
||||||
| return new TrackedFileStruct( | ||||||
| trackingBuilder.build(), | ||||||
| contentType, | ||||||
| formatVersion, | ||||||
| location, | ||||||
| fileFormat, | ||||||
| partitionData, | ||||||
| partition, | ||||||
| recordCount, | ||||||
| fileSizeInBytes, | ||||||
| specId, | ||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can align my PR with this new shape to avoid the individual setters that you commented.
#16936
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
technically, nothing changes from the user's perspective, it's just how we create Tracking internally.