From a73993d705a8cc87733671477768a6f26183ecbe Mon Sep 17 00:00:00 2001 From: boroknagyz Date: Fri, 12 Jul 2024 14:25:39 +0200 Subject: [PATCH] Backport of apache/iceberg#10680 to openhouse-1.5.2. Backport note: the production fix (PartitionSet.java) was cherry-picked unchanged. The two added tests (testValidateWithNullPartition, testValidateWithVoidTransform) were adapted to the JUnit 4 test harness used by TestReplacePartitions in 1.5.2.x (@TestTemplate -> @Test, static assertThatThrownBy -> Assertions.assertThatThrownBy, Files.createTempDirectory(temp, ...) -> temp.newFolder()). Test logic and assertions are unchanged. Core: Fix NPE during conflict handling of NULL partitions (#10680) * Core: Fix NPE during conflict handling of NULL partitions Partition values can be NULLs, or we can have NULLs because of the VOID transforms. If a conflict is found in such partitions we get a NullPointerException instead of a proper error message. * Fix style issues * Use String.valueOf() * Reduce visibility of constant Co-authored-by: Eduard Tudenhoefner * Indentation * Update core/src/main/java/org/apache/iceberg/util/PartitionSet.java --------- Co-authored-by: Fokko Driesprong Co-authored-by: Eduard Tudenhoefner (cherry picked from commit ed228f79cd3e569e04af8a8ab411811803bf3a29) --- .../org/apache/iceberg/util/PartitionSet.java | 2 +- .../apache/iceberg/TestReplacePartitions.java | 77 +++++++++++++++++++ 2 files changed, 78 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/org/apache/iceberg/util/PartitionSet.java b/core/src/main/java/org/apache/iceberg/util/PartitionSet.java index eff37fa5a9..184f38b324 100644 --- a/core/src/main/java/org/apache/iceberg/util/PartitionSet.java +++ b/core/src/main/java/org/apache/iceberg/util/PartitionSet.java @@ -200,7 +200,7 @@ public String toString() { StringBuilder partitionStringBuilder = new StringBuilder(); partitionStringBuilder.append(structType.fields().get(i).name()); partitionStringBuilder.append("="); - partitionStringBuilder.append(s.get(i, Object.class).toString()); + partitionStringBuilder.append(s.get(i, Object.class)); partitionDataJoiner.add(partitionStringBuilder.toString()); } } diff --git a/core/src/test/java/org/apache/iceberg/TestReplacePartitions.java b/core/src/test/java/org/apache/iceberg/TestReplacePartitions.java index e657e7fc43..430af8d996 100644 --- a/core/src/test/java/org/apache/iceberg/TestReplacePartitions.java +++ b/core/src/test/java/org/apache/iceberg/TestReplacePartitions.java @@ -65,6 +65,34 @@ public class TestReplacePartitions extends TableTestBase { .withRecordCount(1) .build(); + static final DataFile FILE_NULL_PARTITION = + DataFiles.builder(SPEC) + .withPath("/path/to/data-null-partition.parquet") + .withFileSizeInBytes(0) + .withPartitionPath("data_bucket=__HIVE_DEFAULT_PARTITION__") + .withRecordCount(0) + .build(); + + // Partition spec with VOID partition transform ("alwaysNull" in Java code.) + static final PartitionSpec SPEC_VOID = + PartitionSpec.builderFor(SCHEMA).alwaysNull("id").bucket("data", BUCKETS_NUMBER).build(); + + static final DataFile FILE_A_VOID_PARTITION = + DataFiles.builder(SPEC_VOID) + .withPath("/path/to/data-a-void-partition.parquet") + .withFileSizeInBytes(10) + .withPartitionPath("id_null=__HIVE_DEFAULT_PARTITION__/data_bucket=0") + .withRecordCount(1) + .build(); + + static final DataFile FILE_B_VOID_PARTITION = + DataFiles.builder(SPEC_VOID) + .withPath("/path/to/data-b-void-partition.parquet") + .withFileSizeInBytes(10) + .withPartitionPath("id_null=__HIVE_DEFAULT_PARTITION__/data_bucket=1") + .withRecordCount(10) + .build(); + static final DeleteFile FILE_UNPARTITIONED_A_DELETES = FileMetadata.deleteFileBuilder(PartitionSpec.unpartitioned()) .ofPositionDeletes() @@ -347,6 +375,55 @@ public void testValidateWithDefaultSnapshotId() { + "[data_bucket=0, data_bucket=1]: [/path/to/data-a.parquet]"); } + @Test + public void testValidateWithNullPartition() { + commit(table, table.newReplacePartitions().addFile(FILE_NULL_PARTITION), branch); + + // Concurrent Replace Partitions should fail with ValidationException + ReplacePartitions replace = table.newReplacePartitions(); + Assertions.assertThatThrownBy( + () -> + commit( + table, + replace + .addFile(FILE_NULL_PARTITION) + .addFile(FILE_B) + .validateNoConflictingData() + .validateNoConflictingDeletes(), + branch)) + .isInstanceOf(ValidationException.class) + .hasMessage( + "Found conflicting files that can contain records matching partitions " + + "[data_bucket=null, data_bucket=1]: [/path/to/data-null-partition.parquet]"); + } + + @Test + public void testValidateWithVoidTransform() throws IOException { + File tableDir = temp.newFolder(); + Assert.assertTrue(tableDir.delete()); + + Table tableVoid = TestTables.create(tableDir, "tablevoid", SCHEMA, SPEC_VOID, formatVersion); + commit(tableVoid, tableVoid.newReplacePartitions().addFile(FILE_A_VOID_PARTITION), branch); + + // Concurrent Replace Partitions should fail with ValidationException + ReplacePartitions replace = tableVoid.newReplacePartitions(); + Assertions.assertThatThrownBy( + () -> + commit( + tableVoid, + replace + .addFile(FILE_A_VOID_PARTITION) + .addFile(FILE_B_VOID_PARTITION) + .validateNoConflictingData() + .validateNoConflictingDeletes(), + branch)) + .isInstanceOf(ValidationException.class) + .hasMessage( + "Found conflicting files that can contain records matching partitions " + + "[id_null=null, data_bucket=1, id_null=null, data_bucket=0]: " + + "[/path/to/data-a-void-partition.parquet]"); + } + @Test public void testConcurrentReplaceConflict() { commit(table, table.newFastAppend().appendFile(FILE_A).appendFile(FILE_B), branch);