Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 64 additions & 16 deletions crates/iceberg/src/spec/snapshot_summary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -506,22 +506,31 @@ fn update_totals(
},
};

let added = summary
.additional_properties
.get(added_property)
.map_or(0, |value| {
value
.parse::<u64>()
.expect("must be parsable as it was just serialized")
});
let removed = summary
.additional_properties
.get(removed_property)
.map_or(0, |value| {
value
.parse::<u64>()
.expect("must be parsable as it was just serialized")
});
// Parse the added/removed deltas, tolerating an unparsable value by skipping
// the total entirely rather than panicking. Computed metrics always overwrite
// user-supplied summary properties (see `SnapshotProducer::summary`), so a bad
// value should only ever come from a previous snapshot's summary; matching
// iceberg-java's `updateTotal`, we ignore it instead of failing the commit.
let parse_delta = |property: &str| -> Option<u64> {
match summary.additional_properties.get(property) {
None => Some(0),
Some(value) => match value.parse::<u64>() {
Ok(v) => Some(v),
Err(parse_err) => {
tracing::warn!(
"Property '{property}' could not be parsed when computing '{total_property}': {parse_err}. \
Skipping total computation.",
);
None
}
},
}
};

let (Some(added), Some(removed)) = (parse_delta(added_property), parse_delta(removed_property))
else {
return;
};

let new_total = previous_total + added - removed;
summary
Expand Down Expand Up @@ -1156,6 +1165,45 @@ mod tests {
}
}

#[test]
fn test_update_totals_tolerates_unparsable_added_value() {
// A non-integer added value (which can survive in a previous snapshot's
// summary) must not panic the commit. Matching iceberg-java's `updateTotal`
// try/catch, the affected total is skipped while other totals still compute.
let prev_props: HashMap<String, String> = [(TOTAL_DATA_FILES, "8"), (TOTAL_RECORDS, "80")]
.into_iter()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect();

let previous_summary = Summary {
operation: Operation::Append,
additional_properties: prev_props,
};

let new_props: HashMap<String, String> =
[(ADDED_DATA_FILES, "not-a-number"), (ADDED_RECORDS, "40")]
.into_iter()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect();

let summary = Summary {
operation: Operation::Append,
additional_properties: new_props,
};

// Must not panic.
let updated = update_snapshot_summaries(summary, Some(&previous_summary), false).unwrap();
let props = &updated.additional_properties;

// The total whose added delta was unparsable is skipped...
assert!(
!props.contains_key(TOTAL_DATA_FILES),
"TOTAL_DATA_FILES should be skipped when its added value is unparsable",
);
// ...while a sibling total with valid deltas still computes.
assert_eq!(props.get(TOTAL_RECORDS).unwrap(), "120");
}

#[test]
fn test_update_totals_computed_when_no_previous_summary() {
let new_props: HashMap<String, String> = [
Expand Down
54 changes: 54 additions & 0 deletions crates/iceberg/src/transaction/append.rs
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,60 @@ mod tests {
);
}

#[tokio::test]
async fn test_snapshot_properties_cannot_override_computed_metrics() {
// A user-supplied snapshot property must not shadow a computed metric key
// such as `added-data-files`. Matching iceberg-java, the computed value
// wins, so the summary reflects the real count and a bad value can neither
// corrupt the summary nor panic total computation (see #2184-adjacent fix).
let table = make_v2_minimal_table();
let tx = Transaction::new(&table);

let mut snapshot_properties = HashMap::new();
// Both a benign-but-wrong value and a non-integer value collide with
// computed metric keys; neither should reach the final summary.
snapshot_properties.insert("added-data-files".to_string(), "9999".to_string());
snapshot_properties.insert("added-records".to_string(), "not-a-number".to_string());

let data_file = DataFileBuilder::default()
.content(DataContentType::Data)
.file_path("test/1.parquet".to_string())
.file_format(DataFileFormat::Parquet)
.file_size_in_bytes(100)
.record_count(1)
.partition_spec_id(table.metadata().default_partition_spec_id())
.partition(Struct::from_iter([Some(Literal::long(300))]))
.build()
.unwrap();

let action = tx
.fast_append()
.set_snapshot_properties(snapshot_properties)
.add_data_files(vec![data_file]);
// Must not panic during total computation.
let mut action_commit = Arc::new(action).commit(&table).await.unwrap();
let updates = action_commit.take_updates();

let new_snapshot = if let TableUpdate::AddSnapshot { snapshot } = &updates[0] {
snapshot
} else {
unreachable!()
};
let props = &new_snapshot.summary().additional_properties;

// Computed metric wins over the user's colliding values.
assert_eq!(
props.get("added-data-files").unwrap(),
"1",
"computed added-data-files must override the user-supplied value"
);
assert_eq!(
props.get("added-records").unwrap(),
"1",
"computed added-records must override the user-supplied non-integer value"
);
}

#[tokio::test]
async fn test_append_snapshot_properties() {
let table = make_v2_minimal_table();
Expand Down
14 changes: 12 additions & 2 deletions crates/iceberg/src/transaction/snapshot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -404,8 +404,18 @@ impl<'a> SnapshotProducer<'a> {

let previous_snapshot = table_metadata.current_snapshot();

let mut additional_properties = summary_collector.build();
additional_properties.extend(self.snapshot_properties.clone());
// User-supplied snapshot properties are applied first, then the computed
// metrics overwrite any colliding keys. This matches iceberg-java
// (`SnapshotProducer.summary`), where computed `added-*`/`total-*` values
// are written after user properties so a user cannot shadow them with a
// bad (or merely wrong) value that would corrupt the snapshot summary.
// User-supplied snapshot properties are applied first, then the computed
// metrics overwrite any colliding keys. This matches iceberg-java
// (`SnapshotProducer.summary`), where computed `added-*`/`total-*` values
// are written after user properties so a user cannot shadow them with a
// bad (or merely wrong) value that would corrupt the snapshot summary.
let mut additional_properties = self.snapshot_properties.clone();
additional_properties.extend(summary_collector.build());

let summary = Summary {
operation: snapshot_produce_operation.operation(),
Expand Down
Loading