From cc7125fb2d8b3fd97342d85d21f8d90f8cc81119 Mon Sep 17 00:00:00 2001 From: Manu Zhang Date: Thu, 21 May 2026 16:31:22 +0800 Subject: [PATCH 1/3] Add DeleteFiles table update API --- src/iceberg/json_serde.cc | 12 ++++++++++++ src/iceberg/table_update.cc | 22 ++++++++++++++++++++++ src/iceberg/table_update.h | 22 ++++++++++++++++++++++ src/iceberg/test/json_serde_test.cc | 18 ++++++++++++++++++ 4 files changed, 74 insertions(+) diff --git a/src/iceberg/json_serde.cc b/src/iceberg/json_serde.cc index 2d8c22255..0718c7884 100644 --- a/src/iceberg/json_serde.cc +++ b/src/iceberg/json_serde.cc @@ -196,6 +196,7 @@ constexpr std::string_view kActionRemoveStatistics = "remove-statistics"; constexpr std::string_view kActionSetPartitionStatistics = "set-partition-statistics"; constexpr std::string_view kActionRemovePartitionStatistics = "remove-partition-statistics"; +constexpr std::string_view kActionDeleteFiles = "delete-files"; // TableUpdate field constants constexpr std::string_view kUUID = "uuid"; @@ -209,6 +210,7 @@ constexpr std::string_view kSnapshotIds = "snapshot-ids"; constexpr std::string_view kRefName = "ref-name"; constexpr std::string_view kUpdates = "updates"; constexpr std::string_view kRemovals = "removals"; +constexpr std::string_view kFiles = "files"; // TableRequirement type constants constexpr std::string_view kRequirementAssertDoesNotExist = "assert-create"; @@ -1471,6 +1473,12 @@ nlohmann::json ToJson(const TableUpdate& update) { json[kSnapshotId] = u.snapshot_id(); break; } + case TableUpdate::Kind::kDeleteFiles: { + const auto& u = internal::checked_cast(update); + json[kAction] = kActionDeleteFiles; + json[kFiles] = u.files(); + break; + } } return json; } @@ -1672,6 +1680,10 @@ Result> TableUpdateFromJson(const nlohmann::json& j ICEBERG_ASSIGN_OR_RAISE(auto snapshot_id, GetJsonValue(json, kSnapshotId)); return std::make_unique(snapshot_id); } + if (action == kActionDeleteFiles) { + ICEBERG_ASSIGN_OR_RAISE(auto files, GetJsonValue>(json, kFiles)); + return std::make_unique(std::move(files)); + } return JsonParseError("Unknown table update action: {}", action); } diff --git a/src/iceberg/table_update.cc b/src/iceberg/table_update.cc index 01612d765..8083174ee 100644 --- a/src/iceberg/table_update.cc +++ b/src/iceberg/table_update.cc @@ -565,4 +565,26 @@ std::unique_ptr RemovePartitionStatistics::Clone() const { return std::make_unique(snapshot_id_); } +// DeleteFiles + +void DeleteFiles::ApplyTo(TableMetadataBuilder& builder) const { + // DeleteFiles does not alter table metadata. +} + +void DeleteFiles::GenerateRequirements(TableUpdateContext& context) const { + // DeleteFiles does not generate any requirements. +} + +bool DeleteFiles::Equals(const TableUpdate& other) const { + if (other.kind() != Kind::kDeleteFiles) { + return false; + } + const auto& other_delete = internal::checked_cast(other); + return files_ == other_delete.files_; +} + +std::unique_ptr DeleteFiles::Clone() const { + return std::make_unique(files_); +} + } // namespace iceberg::table diff --git a/src/iceberg/table_update.h b/src/iceberg/table_update.h index c75c3fa6a..aaa2d9105 100644 --- a/src/iceberg/table_update.h +++ b/src/iceberg/table_update.h @@ -63,6 +63,7 @@ class ICEBERG_EXPORT TableUpdate { kRemoveStatistics, kSetPartitionStatistics, kRemovePartitionStatistics, + kDeleteFiles, }; virtual ~TableUpdate(); @@ -608,6 +609,27 @@ class ICEBERG_EXPORT RemovePartitionStatistics : public TableUpdate { int64_t snapshot_id_; }; +/// \brief Represents deleting one or more physical files. +class ICEBERG_EXPORT DeleteFiles : public TableUpdate { + public: + explicit DeleteFiles(std::vector files) : files_(std::move(files)) {} + + const std::vector& files() const { return files_; } + + void ApplyTo(TableMetadataBuilder& builder) const override; + + void GenerateRequirements(TableUpdateContext& context) const override; + + Kind kind() const override { return Kind::kDeleteFiles; } + + bool Equals(const TableUpdate& other) const override; + + std::unique_ptr Clone() const override; + + private: + std::vector files_; +}; + } // namespace table } // namespace iceberg diff --git a/src/iceberg/test/json_serde_test.cc b/src/iceberg/test/json_serde_test.cc index f019375d3..1f7abdb09 100644 --- a/src/iceberg/test/json_serde_test.cc +++ b/src/iceberg/test/json_serde_test.cc @@ -649,6 +649,24 @@ TEST(JsonInternalTest, TableUpdateRemovePartitionStatistics) { update); } +TEST(JsonInternalTest, TableUpdateDeleteFiles) { + table::DeleteFiles update( + {"s3://bucket/warehouse/table/metadata/old-1.json", + "s3://bucket/warehouse/table/metadata/old-2.json"}); + nlohmann::json expected = R"({ + "action":"delete-files", + "files":[ + "s3://bucket/warehouse/table/metadata/old-1.json", + "s3://bucket/warehouse/table/metadata/old-2.json" + ] + })"_json; + + EXPECT_EQ(ToJson(update), expected); + auto parsed = TableUpdateFromJson(expected); + ASSERT_THAT(parsed, IsOk()); + EXPECT_EQ(*internal::checked_cast(parsed.value().get()), update); +} + TEST(JsonInternalTest, TableUpdateUnknownAction) { nlohmann::json json = R"({"action":"unknown-action"})"_json; auto result = TableUpdateFromJson(json); From fd64fa47a07a02f96059ccb7e64bd435bdd14c88 Mon Sep 17 00:00:00 2001 From: Manu Zhang Date: Thu, 21 May 2026 17:07:33 +0800 Subject: [PATCH 2/3] Clarify DeleteFiles path matching semantics --- src/iceberg/table_update.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/iceberg/table_update.h b/src/iceberg/table_update.h index aaa2d9105..0385e7335 100644 --- a/src/iceberg/table_update.h +++ b/src/iceberg/table_update.h @@ -610,6 +610,10 @@ class ICEBERG_EXPORT RemovePartitionStatistics : public TableUpdate { }; /// \brief Represents deleting one or more physical files. +/// +/// Similar to Iceberg's Java DeleteFiles API, file paths are treated as exact +/// metadata values. Equivalent but differently-normalized URIs are not +/// considered matches. class ICEBERG_EXPORT DeleteFiles : public TableUpdate { public: explicit DeleteFiles(std::vector files) : files_(std::move(files)) {} From 00cb9fc2ed39f128c7e40b90755664f42b446083 Mon Sep 17 00:00:00 2001 From: Manu Zhang Date: Thu, 21 May 2026 20:25:41 +0800 Subject: [PATCH 3/3] Format DeleteFiles JSON serde changes Co-authored-by: Codex --- src/iceberg/json_serde.cc | 3 ++- src/iceberg/test/json_serde_test.cc | 5 ++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/iceberg/json_serde.cc b/src/iceberg/json_serde.cc index 0718c7884..34d6351a6 100644 --- a/src/iceberg/json_serde.cc +++ b/src/iceberg/json_serde.cc @@ -1681,7 +1681,8 @@ Result> TableUpdateFromJson(const nlohmann::json& j return std::make_unique(snapshot_id); } if (action == kActionDeleteFiles) { - ICEBERG_ASSIGN_OR_RAISE(auto files, GetJsonValue>(json, kFiles)); + ICEBERG_ASSIGN_OR_RAISE(auto files, + GetJsonValue>(json, kFiles)); return std::make_unique(std::move(files)); } diff --git a/src/iceberg/test/json_serde_test.cc b/src/iceberg/test/json_serde_test.cc index 1f7abdb09..3fe5aaa83 100644 --- a/src/iceberg/test/json_serde_test.cc +++ b/src/iceberg/test/json_serde_test.cc @@ -650,9 +650,8 @@ TEST(JsonInternalTest, TableUpdateRemovePartitionStatistics) { } TEST(JsonInternalTest, TableUpdateDeleteFiles) { - table::DeleteFiles update( - {"s3://bucket/warehouse/table/metadata/old-1.json", - "s3://bucket/warehouse/table/metadata/old-2.json"}); + table::DeleteFiles update({"s3://bucket/warehouse/table/metadata/old-1.json", + "s3://bucket/warehouse/table/metadata/old-2.json"}); nlohmann::json expected = R"({ "action":"delete-files", "files":[