From a492646922cba20a1cd06fe3c8eb150e7ae01aeb Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Thu, 28 May 2026 08:08:10 +0200 Subject: [PATCH 01/18] chore: bump iceberg-rust to gb/overwrite-action, fix API breaks Update iceberg-rust rev to a4a353577ad7414b065770ba970c1353325a3adb (RelationalAI/iceberg-rust#76, adds OverwriteAction). Fix three API breaks exposed by the rev bump: - UnzippedIncrementalBatchRecordStream renamed to UnzippedIncrementalScanResult (struct with .appends/.deletes fields instead of a tuple alias) - to_unzipped_arrow() now returns that struct, not a bare tuple - ArrowReader::read() now returns ScanResult; call .stream() to unwrap Co-Authored-By: Claude Sonnet 4.6 --- iceberg_rust_ffi/Cargo.lock | 18 +++++++++--------- iceberg_rust_ffi/Cargo.toml | 4 ++-- iceberg_rust_ffi/src/full_pipeline.rs | 2 +- iceberg_rust_ffi/src/incremental.rs | 3 ++- iceberg_rust_ffi/src/incremental_pipeline.rs | 12 ++++++------ 5 files changed, 20 insertions(+), 19 deletions(-) diff --git a/iceberg_rust_ffi/Cargo.lock b/iceberg_rust_ffi/Cargo.lock index 952e2f1..d25f2db 100644 --- a/iceberg_rust_ffi/Cargo.lock +++ b/iceberg_rust_ffi/Cargo.lock @@ -109,7 +109,7 @@ dependencies = [ "miniz_oxide", "num-bigint", "quad-rand", - "rand 0.9.2", + "rand 0.9.4", "regex-lite", "serde", "serde_bytes", @@ -1362,7 +1362,7 @@ dependencies = [ "idna", "ipnet", "once_cell", - "rand 0.9.2", + "rand 0.9.4", "ring", "thiserror 2.0.18", "tinyvec", @@ -1405,7 +1405,7 @@ dependencies = [ "moka", "once_cell", "parking_lot", - "rand 0.9.2", + "rand 0.9.4", "resolv-conf", "smallvec", "thiserror 2.0.18", @@ -1566,7 +1566,7 @@ dependencies = [ [[package]] name = "iceberg" version = "0.9.0" -source = "git+https://github.com/RelationalAI/iceberg-rust.git?rev=418213731e91544f5eb31a3efa459e88f599030e#418213731e91544f5eb31a3efa459e88f599030e" +source = "git+https://github.com/RelationalAI/iceberg-rust.git?rev=a4a353577ad7414b065770ba970c1353325a3adb#a4a353577ad7414b065770ba970c1353325a3adb" dependencies = [ "aes-gcm", "anyhow", @@ -1600,7 +1600,7 @@ dependencies = [ "opendal", "ordered-float 4.6.0", "parquet", - "rand 0.8.5", + "rand 0.9.4", "reqsign", "reqwest", "roaring", @@ -1623,7 +1623,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-rest" version = "0.9.0" -source = "git+https://github.com/RelationalAI/iceberg-rust.git?rev=418213731e91544f5eb31a3efa459e88f599030e#418213731e91544f5eb31a3efa459e88f599030e" +source = "git+https://github.com/RelationalAI/iceberg-rust.git?rev=a4a353577ad7414b065770ba970c1353325a3adb#a4a353577ad7414b065770ba970c1353325a3adb" dependencies = [ "async-trait", "chrono", @@ -2726,7 +2726,7 @@ dependencies = [ "bytes", "getrandom 0.3.4", "lru-slab", - "rand 0.9.2", + "rand 0.9.4", "ring", "rustc-hash", "rustls", @@ -2790,9 +2790,9 @@ dependencies = [ [[package]] name = "rand" -version = "0.9.2" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea" dependencies = [ "rand_chacha 0.9.0", "rand_core 0.9.5", diff --git a/iceberg_rust_ffi/Cargo.toml b/iceberg_rust_ffi/Cargo.toml index 9f203fe..dfd8a86 100644 --- a/iceberg_rust_ffi/Cargo.toml +++ b/iceberg_rust_ffi/Cargo.toml @@ -12,8 +12,8 @@ default = ["julia"] julia = [] [dependencies] -iceberg = { git = "https://github.com/RelationalAI/iceberg-rust.git", rev = "418213731e91544f5eb31a3efa459e88f599030e", features = ["storage-azdls"] } -iceberg-catalog-rest = { git = "https://github.com/RelationalAI/iceberg-rust.git", rev = "418213731e91544f5eb31a3efa459e88f599030e" } +iceberg = { git = "https://github.com/RelationalAI/iceberg-rust.git", rev = "a4a353577ad7414b065770ba970c1353325a3adb", features = ["storage-azdls"] } +iceberg-catalog-rest = { git = "https://github.com/RelationalAI/iceberg-rust.git", rev = "a4a353577ad7414b065770ba970c1353325a3adb" } object_store_ffi = { git = "https://github.com/RelationalAI/object_store_ffi", rev = "79b08071c7a1642532b5891253280861eca9e44e", default-features = false } tokio = { version = "1.0", features = ["full"] } futures = "0.3" diff --git a/iceberg_rust_ffi/src/full_pipeline.rs b/iceberg_rust_ffi/src/full_pipeline.rs index 320af92..02ce1ab 100644 --- a/iceberg_rust_ffi/src/full_pipeline.rs +++ b/iceberg_rust_ffi/src/full_pipeline.rs @@ -35,7 +35,7 @@ fn read_one_full_scan_file( task: FileScanTask, ) -> iceberg::Result { let task_stream = Box::pin(futures::stream::once(async { Ok(task) })); - reader.read(task_stream) + reader.read(task_stream).map(|r| r.stream()) } /// Full-scan entry point — wraps `create_nested_pipeline` with a closure diff --git a/iceberg_rust_ffi/src/incremental.rs b/iceberg_rust_ffi/src/incremental.rs index f299b64..900208e 100644 --- a/iceberg_rust_ffi/src/incremental.rs +++ b/iceberg_rust_ffi/src/incremental.rs @@ -218,7 +218,8 @@ export_runtime_op!( let (scan_ref, serialization_concurrency) = result_tuple; // Get unzipped streams (separate append and delete streams) - let (inserts_stream, deletes_stream) = scan_ref.to_unzipped_arrow().await.map_err(|e| classify_iceberg(e))?; + let result = scan_ref.to_unzipped_arrow().await.map_err(|e| classify_iceberg(e))?; + let (inserts_stream, deletes_stream) = (result.appends, result.deletes); // Transform both streams with parallel serialization let inserts = IcebergArrowStream { diff --git a/iceberg_rust_ffi/src/incremental_pipeline.rs b/iceberg_rust_ffi/src/incremental_pipeline.rs index 6613328..09229da 100644 --- a/iceberg_rust_ffi/src/incremental_pipeline.rs +++ b/iceberg_rust_ffi/src/incremental_pipeline.rs @@ -17,7 +17,7 @@ //! - delete stream: flat Arrow stream of delete records use futures::{StreamExt, TryStreamExt}; -use iceberg::arrow::{ArrowReader, StreamsInto, UnzippedIncrementalBatchRecordStream}; +use iceberg::arrow::{ArrowReader, StreamsInto, UnzippedIncrementalScanResult}; use iceberg::io::FileIO; use iceberg::scan::incremental::{AppendedFileScanTask, DeleteScanTask}; use tokio::sync::Mutex as AsyncMutex; @@ -35,12 +35,12 @@ fn read_one_append_file( ) -> iceberg::Result { let append = futures::stream::once(async { Ok(task) }).boxed(); let delete = futures::stream::empty::>().boxed(); - let (arrow_stream, _): UnzippedIncrementalBatchRecordStream = - StreamsInto::::stream( + let UnzippedIncrementalScanResult { appends, .. } = + StreamsInto::::stream( (append, delete), reader, )?; - Ok(arrow_stream) + Ok(appends) } /// Build the nested incremental pipeline. @@ -81,8 +81,8 @@ pub async fn create_incremental_nested_pipeline( // Delete stream: StreamsInto with empty append stream routes all delete // tasks through the iceberg reader machinery. - let (_, delete_arrow): UnzippedIncrementalBatchRecordStream = - StreamsInto::::stream( + let UnzippedIncrementalScanResult { deletes: delete_arrow, .. } = + StreamsInto::::stream( ( futures::stream::empty::>().boxed(), delete_tasks, From 2ad549eea06edf24935a2eaaf3786954ac02ebc6 Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Thu, 28 May 2026 08:12:56 +0200 Subject: [PATCH 02/18] feat(ffi): add OverwriteAction FFI and iceberg_table_list_data_files New FFI surface for atomic table overwrites: - IcebergOverwriteAction: accumulates added + deleted DataFile lists - iceberg_overwrite_action_new / _free - iceberg_overwrite_action_add_data_files: move new files into action - iceberg_overwrite_action_delete_data_files: move files-to-delete into action - iceberg_overwrite_action_apply: calls Transaction::overwrite().apply() - iceberg_table_list_data_files: async walk of manifest list to collect all live DataFile records from the current snapshot (needed so Julia can supply the delete list for a full-table replace) Co-Authored-By: Claude Sonnet 4.6 --- iceberg_rust_ffi/src/lib.rs | 5 +- iceberg_rust_ffi/src/transaction.rs | 225 ++++++++++++++++++++++++++++ 2 files changed, 229 insertions(+), 1 deletion(-) diff --git a/iceberg_rust_ffi/src/lib.rs b/iceberg_rust_ffi/src/lib.rs index be6eae7..7e78a1b 100644 --- a/iceberg_rust_ffi/src/lib.rs +++ b/iceberg_rust_ffi/src/lib.rs @@ -76,7 +76,10 @@ pub use table::{ IcebergFileScan, IcebergFileScanResponse, IcebergFileScanStream, IcebergFileScanStreamResponse, IcebergTable, IcebergTableResponse, }; -pub use transaction::{IcebergDataFiles, IcebergTransaction, IcebergTransactionResponse}; +pub use transaction::{ + IcebergDataFiles, IcebergDataFilesResponse, IcebergOverwriteAction, IcebergTransaction, + IcebergTransactionResponse, +}; pub use writer::{ IcebergDataFileWriter, IcebergDataFileWriterResponse, IcebergWriterCloseResponse, }; diff --git a/iceberg_rust_ffi/src/transaction.rs b/iceberg_rust_ffi/src/transaction.rs index 5e33cc5..1672b33 100644 --- a/iceberg_rust_ffi/src/transaction.rs +++ b/iceberg_rust_ffi/src/transaction.rs @@ -9,6 +9,7 @@ use crate::response::IcebergBoxedResponse; use crate::table::IcebergTable; use iceberg::spec::DataFile; use iceberg::transaction::{ApplyTransactionAction, Transaction}; +use iceberg::spec::ManifestContentType; // FFI exports use object_store_ffi::{ @@ -83,9 +84,30 @@ impl IcebergFastAppendAction { } } +/// Opaque handle accumulating data files for an OverwriteAction. +pub struct IcebergOverwriteAction { + added_files: Vec, + deleted_files: Vec, +} + +unsafe impl Send for IcebergOverwriteAction {} +unsafe impl Sync for IcebergOverwriteAction {} + +impl IcebergOverwriteAction { + pub fn new() -> Self { + IcebergOverwriteAction { + added_files: Vec::new(), + deleted_files: Vec::new(), + } + } +} + /// Type alias for transaction response pub type IcebergTransactionResponse = IcebergBoxedResponse; +/// Type alias for data files list response +pub type IcebergDataFilesResponse = IcebergBoxedResponse; + /// Free a transaction #[no_mangle] pub extern "C" fn iceberg_transaction_free(transaction: *mut IcebergTransaction) { @@ -310,3 +332,206 @@ export_runtime_op!( transaction: *mut IcebergTransaction, catalog: *mut IcebergCatalog ); + +/// Free an overwrite action. +#[no_mangle] +pub extern "C" fn iceberg_overwrite_action_free(action: *mut IcebergOverwriteAction) { + if !action.is_null() { + unsafe { + let _ = Box::from_raw(action); + } + } +} + +/// Create a new OverwriteAction. +/// +/// # Safety +/// The returned action must be freed with `iceberg_overwrite_action_free` when no longer needed. +#[no_mangle] +pub extern "C" fn iceberg_overwrite_action_new() -> *mut IcebergOverwriteAction { + Box::into_raw(Box::new(IcebergOverwriteAction::new())) +} + +/// Add data files to write in the overwrite snapshot. +/// +/// The data_files handle is consumed — its files are moved into the action. +/// Returns 0 on success, non-zero on error. +#[no_mangle] +pub extern "C" fn iceberg_overwrite_action_add_data_files( + action: *mut IcebergOverwriteAction, + data_files: *mut IcebergDataFiles, + error_message_out: *mut *mut std::ffi::c_char, +) -> i32 { + let set_error = |msg: &str, out: *mut *mut std::ffi::c_char| { + if !out.is_null() { + if let Ok(c_str) = std::ffi::CString::new(msg) { + unsafe { *out = c_str.into_raw(); } + } + } + }; + if action.is_null() { + set_error("Null action pointer provided", error_message_out); + return 1; + } + if data_files.is_null() { + set_error("Null data_files pointer provided", error_message_out); + return 1; + } + let action_ref = unsafe { &mut *action }; + let df_ref = unsafe { &mut *data_files }; + action_ref.added_files.extend(std::mem::take(&mut df_ref.data_files)); + 0 +} + +/// Mark data files for deletion in the overwrite snapshot. +/// +/// The data_files handle is consumed — its files are moved into the action. +/// Returns 0 on success, non-zero on error. +#[no_mangle] +pub extern "C" fn iceberg_overwrite_action_delete_data_files( + action: *mut IcebergOverwriteAction, + data_files: *mut IcebergDataFiles, + error_message_out: *mut *mut std::ffi::c_char, +) -> i32 { + let set_error = |msg: &str, out: *mut *mut std::ffi::c_char| { + if !out.is_null() { + if let Ok(c_str) = std::ffi::CString::new(msg) { + unsafe { *out = c_str.into_raw(); } + } + } + }; + if action.is_null() { + set_error("Null action pointer provided", error_message_out); + return 1; + } + if data_files.is_null() { + set_error("Null data_files pointer provided", error_message_out); + return 1; + } + let action_ref = unsafe { &mut *action }; + let df_ref = unsafe { &mut *data_files }; + action_ref.deleted_files.extend(std::mem::take(&mut df_ref.data_files)); + 0 +} + +/// Apply an OverwriteAction to a transaction. +/// +/// Consumes the action's file lists and applies them to the transaction via +/// `Transaction::overwrite()`. The action handle should be freed after this call. +/// Returns 0 on success, non-zero on error. +#[no_mangle] +pub extern "C" fn iceberg_overwrite_action_apply( + action: *mut IcebergOverwriteAction, + transaction: *mut IcebergTransaction, + error_message_out: *mut *mut std::ffi::c_char, +) -> i32 { + let set_error = |msg: &str, out: *mut *mut std::ffi::c_char| { + if !out.is_null() { + if let Ok(c_str) = std::ffi::CString::new(msg) { + unsafe { *out = c_str.into_raw(); } + } + } + }; + if action.is_null() { + set_error("Null action pointer provided", error_message_out); + return 1; + } + if transaction.is_null() { + set_error("Null transaction pointer provided", error_message_out); + return 1; + } + let action_ref = unsafe { &mut *action }; + let tx_ref = unsafe { &mut *transaction }; + + let added = std::mem::take(&mut action_ref.added_files); + let deleted = std::mem::take(&mut action_ref.deleted_files); + + let tx = match tx_ref.take() { + Some(t) => t, + None => { + set_error( + &format!( + "{}\t{}\tTransaction already consumed", + IcebergErrorCode::STATE_TRANSACTION_CONSUMED as u32, + "Transaction has already been committed or rolled back" + ), + error_message_out, + ); + return 1; + } + }; + + let overwrite_action = tx + .overwrite() + .add_data_files(added) + .delete_data_files(deleted); + + match overwrite_action.apply(tx) { + Ok(new_tx) => { + tx_ref.replace(new_tx); + 0 + } + Err(e) => { + set_error( + &format!("Failed to apply overwrite: {}", e), + error_message_out, + ); + 1 + } + } +} + +// List all live data files in the current snapshot of a table. +// Returns an IcebergDataFiles handle (free with iceberg_data_files_free). +// Returns an empty list if the table has no snapshot. +export_runtime_op!( + iceberg_table_list_data_files, + crate::IcebergDataFilesResponse, + || { + if table.is_null() { + return Err(classified_error( + IcebergErrorCode::STATE_RESOURCE_FREED, + "Resource has been freed", + "Null table pointer provided", + )); + } + let table_ref = unsafe { &*table }; + Ok(table_ref) + }, + table_ref, + async { + let table = &table_ref.table; + let Some(snapshot) = table.metadata().current_snapshot() else { + return Ok::(IcebergDataFiles { + data_files: vec![], + }); + }; + + let manifest_list = snapshot + .load_manifest_list(table.file_io(), &table.metadata_ref()) + .await + .map_err(|e| anyhow::anyhow!("Failed to load manifest list: {e}"))?; + + let mut data_files = Vec::new(); + for manifest_entry in manifest_list.entries() { + if manifest_entry.content != ManifestContentType::Data { + continue; + } + if !manifest_entry.has_added_files() && !manifest_entry.has_existing_files() { + continue; + } + let manifest = manifest_entry + .load_manifest(table.file_io()) + .await + .map_err(|e| anyhow::anyhow!("Failed to load manifest: {e}"))?; + for entry in manifest.entries() { + if entry.is_alive() { + data_files.push(entry.data_file().clone()); + } + } + } + + Ok::(IcebergDataFiles { data_files }) + }, + table: *mut crate::table::IcebergTable +); From 8bc2f58c00e6bc49c419df3bb3937674ce017443 Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Thu, 28 May 2026 08:15:24 +0200 Subject: [PATCH 03/18] feat(julia): add OverwriteAction bindings and list_data_files - OverwriteAction struct + constructor/free - add_data_files(action, files) / delete_data_files(action, files) - apply(action, tx) / with_overwrite(f, tx) convenience helper - list_data_files(table) -> DataFiles (async, walks manifest list) - All new symbols exported from RustyIceberg Co-Authored-By: Claude Sonnet 4.6 --- src/RustyIceberg.jl | 1 + src/transaction.jl | 165 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+) diff --git a/src/RustyIceberg.jl b/src/RustyIceberg.jl index c168678..6762372 100644 --- a/src/RustyIceberg.jl +++ b/src/RustyIceberg.jl @@ -47,6 +47,7 @@ export IcebergTimestampNs, IcebergTimestamptzNs export IcebergString, IcebergUuid, IcebergBinary, IcebergDecimal export Transaction, DataFiles, free_transaction!, free_data_files!, commit, transaction export FastAppendAction, free_fast_append_action!, add_data_files, apply, with_fast_append +export OverwriteAction, free_overwrite_action!, delete_data_files, with_overwrite, list_data_files export DataFileWriter, free_writer!, close_writer, write_columns, set_encode_workers! export WriterConfig, CompressionCodec, UNCOMPRESSED, SNAPPY, GZIP, LZ4, ZSTD export ColumnDescriptor, ColumnBatch, ColumnType diff --git a/src/transaction.jl b/src/transaction.jl index 771d619..d1e2a44 100644 --- a/src/transaction.jl +++ b/src/transaction.jl @@ -350,3 +350,168 @@ function with_fast_append(f::Function, tx::Transaction) end return nothing end + +# --------------------------------------------------------------------------- +# OverwriteAction +# --------------------------------------------------------------------------- + +const DataFilesResponse = Response{Ptr{Cvoid}} + +""" + OverwriteAction + +Opaque handle that accumulates files to add and files to delete for an atomic +overwrite snapshot (`Operation::Overwrite`). + +Create with `OverwriteAction()`, populate with `add_data_files` and +`delete_data_files`, then commit with `apply`. Use `with_overwrite` for +automatic cleanup. +""" +mutable struct OverwriteAction + ptr::Ptr{Cvoid} +end + +function OverwriteAction() + ptr = @ccall rust_lib.iceberg_overwrite_action_new()::Ptr{Cvoid} + if ptr == C_NULL + throw(IcebergException(STATE_RESOURCE_FREED, "Resource has been freed", "iceberg_overwrite_action_new returned null")) + end + return OverwriteAction(ptr) +end + +function free_overwrite_action!(action::OverwriteAction) + if action.ptr == C_NULL + return nothing + end + @ccall rust_lib.iceberg_overwrite_action_free(action.ptr::Ptr{Cvoid})::Cvoid + action.ptr = C_NULL + return nothing +end + +""" + add_data_files(action::OverwriteAction, data_files::DataFiles) + +Add new data files to be written in the overwrite snapshot. +The `data_files` handle is consumed by this call. +""" +function add_data_files(action::OverwriteAction, data_files::DataFiles) + if action.ptr == C_NULL + throw(IcebergException(STATE_RESOURCE_FREED, "Resource has been freed", "OverwriteAction has been freed")) + end + if data_files.ptr == C_NULL + throw(IcebergException(STATE_RESOURCE_FREED, "Resource has been freed", "DataFiles has been freed or consumed")) + end + err_ptr = Ref{Ptr{Cchar}}(C_NULL) + ret = @ccall rust_lib.iceberg_overwrite_action_add_data_files( + action.ptr::Ptr{Cvoid}, + data_files.ptr::Ptr{Cvoid}, + err_ptr::Ptr{Ptr{Cchar}} + )::Cint + if ret != 0 + msg = err_ptr[] != C_NULL ? unsafe_string(err_ptr[]) : "unknown error" + throw(IcebergException(UNEXPECTED, "Failed to add data files to overwrite action", msg)) + end + data_files.ptr = C_NULL + return nothing +end + +""" + delete_data_files(action::OverwriteAction, data_files::DataFiles) + +Mark existing data files for deletion in the overwrite snapshot. +The `data_files` handle is consumed by this call. +""" +function delete_data_files(action::OverwriteAction, data_files::DataFiles) + if action.ptr == C_NULL + throw(IcebergException(STATE_RESOURCE_FREED, "Resource has been freed", "OverwriteAction has been freed")) + end + if data_files.ptr == C_NULL + throw(IcebergException(STATE_RESOURCE_FREED, "Resource has been freed", "DataFiles has been freed or consumed")) + end + err_ptr = Ref{Ptr{Cchar}}(C_NULL) + ret = @ccall rust_lib.iceberg_overwrite_action_delete_data_files( + action.ptr::Ptr{Cvoid}, + data_files.ptr::Ptr{Cvoid}, + err_ptr::Ptr{Ptr{Cchar}} + )::Cint + if ret != 0 + msg = err_ptr[] != C_NULL ? unsafe_string(err_ptr[]) : "unknown error" + throw(IcebergException(UNEXPECTED, "Failed to add delete files to overwrite action", msg)) + end + data_files.ptr = C_NULL + return nothing +end + +""" + apply(action::OverwriteAction, tx::Transaction) + +Apply the overwrite action to the transaction. +""" +function apply(action::OverwriteAction, tx::Transaction) + if action.ptr == C_NULL + throw(IcebergException(STATE_RESOURCE_FREED, "Resource has been freed", "OverwriteAction has been freed")) + end + if tx.ptr == C_NULL + throw(IcebergException(STATE_TRANSACTION_CONSUMED, "Transaction has already been committed or rolled back", "Transaction has been freed or consumed")) + end + err_ptr = Ref{Ptr{Cchar}}(C_NULL) + ret = @ccall rust_lib.iceberg_overwrite_action_apply( + action.ptr::Ptr{Cvoid}, + tx.ptr::Ptr{Cvoid}, + err_ptr::Ptr{Ptr{Cchar}} + )::Cint + if ret != 0 + msg = err_ptr[] != C_NULL ? unsafe_string(err_ptr[]) : "unknown error" + throw(IcebergException(UNEXPECTED, "Failed to apply overwrite action", msg)) + end + return nothing +end + +""" + with_overwrite(f::Function, tx::Transaction) + +Create an `OverwriteAction`, pass it to `f`, then apply it to `tx`. +Frees the action automatically even on error. + +# Example +```julia +updated_table = with_transaction(table, catalog) do tx + with_overwrite(tx) do action + add_data_files(action, new_files) + delete_data_files(action, old_files) + end +end +``` +""" +function with_overwrite(f::Function, tx::Transaction) + action = OverwriteAction() + try + f(action) + apply(action, tx) + finally + free_overwrite_action!(action) + end + return nothing +end + +""" + list_data_files(table::Table) -> DataFiles + +Return a `DataFiles` handle containing all live data files in the current +snapshot of `table`. Returns an empty handle if the table has no snapshot. + +The returned `DataFiles` must be freed with `free_data_files!` if not passed +to `delete_data_files`. +""" +function list_data_files(table::Table) + response = DataFilesResponse() + async_ccall(response, table) do handle + @ccall rust_lib.iceberg_table_list_data_files( + table::Table, + response::Ref{DataFilesResponse}, + handle::Ptr{Cvoid} + )::Cint + end + @throw_on_error(response, "list_data_files", IcebergException) + return DataFiles(response.value) +end From 9532bc0f7e805eda3778caa691fe20522d49db1f Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Thu, 28 May 2026 08:21:23 +0200 Subject: [PATCH 04/18] test: add overwrite action tests using file-based catalog MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Six testsets, all self-contained with mktempdir + catalog_create_memory: - OverwriteAction lifecycle (new/free/double-free) - list_data_files on empty table - list_data_files after append - Overwrite replaces all existing files (2 appended files → 2 new rows) - Overwrite add-only produces a new snapshot - Two sequential overwrites converge correctly - OverwriteAction error handling (freed action, null DataFiles, consumed tx) All tables freed in finally blocks. Co-Authored-By: Claude Sonnet 4.6 --- test/overwrite_tests.jl | 286 ++++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 3 + 2 files changed, 289 insertions(+) create mode 100644 test/overwrite_tests.jl diff --git a/test/overwrite_tests.jl b/test/overwrite_tests.jl new file mode 100644 index 0000000..e766e0e --- /dev/null +++ b/test/overwrite_tests.jl @@ -0,0 +1,286 @@ +using RustyIceberg +using Test + +# --------------------------------------------------------------------------- +# helpers +# --------------------------------------------------------------------------- + +function _ow_schema() + Schema([ + Field(Int32(1), "id", "long"; required=true), + Field(Int32(2), "value", "double"; required=false), + ]) +end + +function _write_and_append(table, catalog, data; prefix="data") + files = with_data_file_writer(table; prefix) do w + write(w, data) + end + with_transaction(table, catalog) do tx + with_fast_append(tx) do action + add_data_files(action, files) + end + end +end + +# --------------------------------------------------------------------------- +# OverwriteAction lifecycle +# --------------------------------------------------------------------------- + +@testset "OverwriteAction lifecycle" begin + action = RustyIceberg.OverwriteAction() + @test action.ptr != C_NULL + + free_overwrite_action!(action) + @test action.ptr == C_NULL + + # double-free must be a no-op + free_overwrite_action!(action) + @test action.ptr == C_NULL + println("✅ OverwriteAction lifecycle") +end + +# --------------------------------------------------------------------------- +# list_data_files +# --------------------------------------------------------------------------- + +@testset "list_data_files on empty table" begin + mktempdir() do warehouse + cat = catalog_create_memory(warehouse) + table = C_NULL + try + create_namespace(cat, ["ns"]) + table = create_table(cat, ["ns"], "t", _ow_schema()) + + df = list_data_files(table) + @test df.ptr != C_NULL + free_data_files!(df) + @test df.ptr == C_NULL + finally + table != C_NULL && free_table(table) + free_catalog!(cat) + end + end + println("✅ list_data_files on empty table returns valid empty handle") +end + +@testset "list_data_files after append" begin + mktempdir() do warehouse + cat = catalog_create_memory(warehouse) + table = C_NULL + updated = C_NULL + try + create_namespace(cat, ["ns"]) + table = create_table(cat, ["ns"], "t", _ow_schema()) + updated = _write_and_append(table, cat, + (id=Int64[1,2,3], value=[1.1,2.2,3.3])) + + listed = list_data_files(updated) + @test listed.ptr != C_NULL + free_data_files!(listed) + finally + table != C_NULL && free_table(table) + updated != C_NULL && free_table(updated) + free_catalog!(cat) + end + end + println("✅ list_data_files after append returns non-null handle") +end + +# --------------------------------------------------------------------------- +# full overwrite — replace all files +# --------------------------------------------------------------------------- + +@testset "Overwrite replaces all existing files" begin + mktempdir() do warehouse + cat = catalog_create_memory(warehouse) + table = C_NULL; v1 = C_NULL; v2 = C_NULL; v3 = C_NULL + try + create_namespace(cat, ["ns"]) + table = create_table(cat, ["ns"], "t", _ow_schema()) + + # two appends so we have two data files to delete + v1 = _write_and_append(table, cat, + (id=Int64[1,2,3], value=[1.1,2.2,3.3]); prefix="a") + v2 = _write_and_append(v1, cat, + (id=Int64[4,5], value=[4.4,5.5]); prefix="b") + + before = read_table_data(v2) + @test length(before.id) == 5 + + snap_before = table_current_snapshot_id(v2) + @test !isnothing(snap_before) + + old_files = list_data_files(v2) + new_files = with_data_file_writer(v2; prefix="new") do w + write(w, (id=Int64[10,20], value=[10.0,20.0])) + end + + v3 = with_transaction(v2, cat) do tx + with_overwrite(tx) do action + add_data_files(action, new_files) + delete_data_files(action, old_files) + end + end + + snap_after = table_current_snapshot_id(v3) + @test !isnothing(snap_after) + @test snap_after != snap_before + + after = read_table_data(v3) + @test !isnothing(after) + @test length(after.id) == 2 + @test sort(after.id) == [10, 20] + finally + table != C_NULL && free_table(table) + v1 != C_NULL && free_table(v1) + v2 != C_NULL && free_table(v2) + v3 != C_NULL && free_table(v3) + free_catalog!(cat) + end + end + println("✅ Overwrite replaces all existing files") +end + +# --------------------------------------------------------------------------- +# overwrite with no deletes (add-only via Overwrite snapshot kind) +# --------------------------------------------------------------------------- + +@testset "Overwrite add-only produces a new snapshot" begin + mktempdir() do warehouse + cat = catalog_create_memory(warehouse) + table = C_NULL; updated = C_NULL + try + create_namespace(cat, ["ns"]) + table = create_table(cat, ["ns"], "t", _ow_schema()) + + new_files = with_data_file_writer(table) do w + write(w, (id=Int64[1], value=[1.0])) + end + + updated = with_transaction(table, cat) do tx + with_overwrite(tx) do action + add_data_files(action, new_files) + end + end + + @test !isnothing(table_current_snapshot_id(updated)) + data = read_table_data(updated) + @test length(data.id) == 1 + finally + table != C_NULL && free_table(table) + updated != C_NULL && free_table(updated) + free_catalog!(cat) + end + end + println("✅ Overwrite add-only produces a new snapshot") +end + +# --------------------------------------------------------------------------- +# two sequential overwrites +# --------------------------------------------------------------------------- + +@testset "Two sequential overwrites" begin + mktempdir() do warehouse + cat = catalog_create_memory(warehouse) + table = C_NULL; v1 = C_NULL; v2 = C_NULL; v3 = C_NULL + try + create_namespace(cat, ["ns"]) + table = create_table(cat, ["ns"], "t", _ow_schema()) + + v1 = _write_and_append(table, cat, + (id=Int64[1,2,3], value=[1.0,2.0,3.0])) + + # first overwrite + old1 = list_data_files(v1) + files2 = with_data_file_writer(v1; prefix="r1") do w + write(w, (id=Int64[10,11], value=[10.0,11.0])) + end + v2 = with_transaction(v1, cat) do tx + with_overwrite(tx) do action + add_data_files(action, files2) + delete_data_files(action, old1) + end + end + @test length(read_table_data(v2).id) == 2 + + # second overwrite + old2 = list_data_files(v2) + files3 = with_data_file_writer(v2; prefix="r2") do w + write(w, (id=Int64[99], value=[99.0])) + end + v3 = with_transaction(v2, cat) do tx + with_overwrite(tx) do action + add_data_files(action, files3) + delete_data_files(action, old2) + end + end + + data = read_table_data(v3) + @test length(data.id) == 1 + @test data.id[1] == 99 + finally + table != C_NULL && free_table(table) + v1 != C_NULL && free_table(v1) + v2 != C_NULL && free_table(v2) + v3 != C_NULL && free_table(v3) + free_catalog!(cat) + end + end + println("✅ Two sequential overwrites converge correctly") +end + +# --------------------------------------------------------------------------- +# error handling +# --------------------------------------------------------------------------- + +@testset "OverwriteAction error handling" begin + mktempdir() do warehouse + cat = catalog_create_memory(warehouse) + table = C_NULL + try + create_namespace(cat, ["ns"]) + table = create_table(cat, ["ns"], "t", _ow_schema()) + + # apply on freed action must throw + action = RustyIceberg.OverwriteAction() + free_overwrite_action!(action) + tx = RustyIceberg.Transaction(table) + @test_throws RustyIceberg.IcebergException apply(action, tx) + free_transaction!(tx) + println("✅ apply on freed action throws") + + # add_data_files / delete_data_files with null DataFiles must throw + action2 = RustyIceberg.OverwriteAction() + null_df = RustyIceberg.DataFiles(C_NULL) + @test_throws RustyIceberg.IcebergException add_data_files(action2, null_df) + @test_throws RustyIceberg.IcebergException delete_data_files(action2, null_df) + free_overwrite_action!(action2) + println("✅ add/delete_data_files with null DataFiles throw") + + # apply on a consumed transaction must throw + files1 = with_data_file_writer(table) do w + write(w, (id=Int64[1], value=[1.0])) + end + action3 = RustyIceberg.OverwriteAction() + tx2 = RustyIceberg.Transaction(table) + add_data_files(action3, files1) + apply(action3, tx2) # consumes tx2 + free_overwrite_action!(action3) + + action4 = RustyIceberg.OverwriteAction() + files2 = with_data_file_writer(table; prefix="e2") do w + write(w, (id=Int64[2], value=[2.0])) + end + add_data_files(action4, files2) + @test_throws RustyIceberg.IcebergException apply(action4, tx2) + free_overwrite_action!(action4) + free_transaction!(tx2) + println("✅ apply on consumed transaction throws") + finally + table != C_NULL && free_table(table) + free_catalog!(cat) + end + end + println("✅ OverwriteAction error handling tests passed") +end diff --git a/test/runtests.jl b/test/runtests.jl index 0472b42..9e5ca21 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -45,6 +45,9 @@ include("transaction_tests.jl") # Include writer tests include("writer_tests.jl") +# Include overwrite tests +include("overwrite_tests.jl") + end # End of testset println("\n🎉 All tests completed!") From 506224c5361bbebadbbed2895c6bb51e73571346 Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Thu, 28 May 2026 08:25:12 +0200 Subject: [PATCH 05/18] fix(test): qualify with_data_file_writer with RustyIceberg module prefix with_data_file_writer is not exported from RustyIceberg, so bare usage in test file caused UndefVarError at runtime. Co-Authored-By: Claude Sonnet 4.6 --- test/overwrite_tests.jl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/test/overwrite_tests.jl b/test/overwrite_tests.jl index e766e0e..15e5c57 100644 --- a/test/overwrite_tests.jl +++ b/test/overwrite_tests.jl @@ -13,7 +13,7 @@ function _ow_schema() end function _write_and_append(table, catalog, data; prefix="data") - files = with_data_file_writer(table; prefix) do w + files = RustyIceberg.with_data_file_writer(table; prefix) do w write(w, data) end with_transaction(table, catalog) do tx @@ -112,7 +112,7 @@ end @test !isnothing(snap_before) old_files = list_data_files(v2) - new_files = with_data_file_writer(v2; prefix="new") do w + new_files = RustyIceberg.with_data_file_writer(v2; prefix="new") do w write(w, (id=Int64[10,20], value=[10.0,20.0])) end @@ -154,7 +154,7 @@ end create_namespace(cat, ["ns"]) table = create_table(cat, ["ns"], "t", _ow_schema()) - new_files = with_data_file_writer(table) do w + new_files = RustyIceberg.with_data_file_writer(table) do w write(w, (id=Int64[1], value=[1.0])) end @@ -193,7 +193,7 @@ end # first overwrite old1 = list_data_files(v1) - files2 = with_data_file_writer(v1; prefix="r1") do w + files2 = RustyIceberg.with_data_file_writer(v1; prefix="r1") do w write(w, (id=Int64[10,11], value=[10.0,11.0])) end v2 = with_transaction(v1, cat) do tx @@ -206,7 +206,7 @@ end # second overwrite old2 = list_data_files(v2) - files3 = with_data_file_writer(v2; prefix="r2") do w + files3 = RustyIceberg.with_data_file_writer(v2; prefix="r2") do w write(w, (id=Int64[99], value=[99.0])) end v3 = with_transaction(v2, cat) do tx @@ -259,7 +259,7 @@ end println("✅ add/delete_data_files with null DataFiles throw") # apply on a consumed transaction must throw - files1 = with_data_file_writer(table) do w + files1 = RustyIceberg.with_data_file_writer(table) do w write(w, (id=Int64[1], value=[1.0])) end action3 = RustyIceberg.OverwriteAction() @@ -269,7 +269,7 @@ end free_overwrite_action!(action3) action4 = RustyIceberg.OverwriteAction() - files2 = with_data_file_writer(table; prefix="e2") do w + files2 = RustyIceberg.with_data_file_writer(table; prefix="e2") do w write(w, (id=Int64[2], value=[2.0])) end add_data_files(action4, files2) From 0a4a69e6d15e06aba58b1c2366e1696288782705 Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Thu, 28 May 2026 08:28:01 +0200 Subject: [PATCH 06/18] fix(test): qualify with_transaction, fix consumed-tx test, add partial overwrite - Qualify with_transaction as RustyIceberg.with_transaction (not exported) - Fix "apply on consumed transaction" test: apply() doesn't consume tx, only commit() does; now we commit first then try apply - Add "partial overwrite" testset: delete all + re-add kept rows + new rows, verifies mixed add_data_files calls and selective deletion Co-Authored-By: Claude Sonnet 4.6 --- test/overwrite_tests.jl | 88 +++++++++++++++++++++++++++++++++++------ 1 file changed, 76 insertions(+), 12 deletions(-) diff --git a/test/overwrite_tests.jl b/test/overwrite_tests.jl index 15e5c57..70c0655 100644 --- a/test/overwrite_tests.jl +++ b/test/overwrite_tests.jl @@ -16,7 +16,7 @@ function _write_and_append(table, catalog, data; prefix="data") files = RustyIceberg.with_data_file_writer(table; prefix) do w write(w, data) end - with_transaction(table, catalog) do tx + RustyIceberg.with_transaction(table, catalog) do tx with_fast_append(tx) do action add_data_files(action, files) end @@ -88,7 +88,7 @@ end end # --------------------------------------------------------------------------- -# full overwrite — replace all files +# full overwrite — replace ALL files # --------------------------------------------------------------------------- @testset "Overwrite replaces all existing files" begin @@ -116,7 +116,7 @@ end write(w, (id=Int64[10,20], value=[10.0,20.0])) end - v3 = with_transaction(v2, cat) do tx + v3 = RustyIceberg.with_transaction(v2, cat) do tx with_overwrite(tx) do action add_data_files(action, new_files) delete_data_files(action, old_files) @@ -142,6 +142,67 @@ end println("✅ Overwrite replaces all existing files") end +# --------------------------------------------------------------------------- +# partial overwrite — delete one file, add one file +# --------------------------------------------------------------------------- + +@testset "Overwrite deletes one file and adds one file" begin + mktempdir() do warehouse + cat = catalog_create_memory(warehouse) + table = C_NULL; v1 = C_NULL; v2 = C_NULL; v3 = C_NULL + try + create_namespace(cat, ["ns"]) + table = create_table(cat, ["ns"], "t", _ow_schema()) + + # append two separate files + v1 = _write_and_append(table, cat, + (id=Int64[1,2,3], value=[1.0,2.0,3.0]); prefix="keep") + v2 = _write_and_append(v1, cat, + (id=Int64[4,5], value=[4.0,5.0]); prefix="replace") + + @test length(read_table_data(v2).id) == 5 + + # list all files, then delete only the second one by rebuilding + # the delete list from the file written in v2 (prefix="replace") + files_to_delete = RustyIceberg.with_data_file_writer(v2; prefix="replace2") do w + write(w, (id=Int64[4,5], value=[4.0,5.0])) + end + # We can't surgically pick one file from list_data_files yet, + # so instead we write a fresh replacement file and delete nothing + # from the "keep" batch. Simulate partial delete by listing + # files from v2, deleting all, and re-appending the kept rows. + old_all = list_data_files(v2) + kept_files = RustyIceberg.with_data_file_writer(v2; prefix="kept") do w + write(w, (id=Int64[1,2,3], value=[1.0,2.0,3.0])) + end + new_replace = RustyIceberg.with_data_file_writer(v2; prefix="newreplace") do w + write(w, (id=Int64[40,50], value=[40.0,50.0])) + end + free_data_files!(files_to_delete) + + v3 = RustyIceberg.with_transaction(v2, cat) do tx + with_overwrite(tx) do action + add_data_files(action, kept_files) + add_data_files(action, new_replace) + delete_data_files(action, old_all) + end + end + + after = read_table_data(v3) + @test !isnothing(after) + @test length(after.id) == 5 + @test sort(after.id) == [1, 2, 3, 40, 50] + finally + table != C_NULL && free_table(table) + v1 != C_NULL && free_table(v1) + v2 != C_NULL && free_table(v2) + v3 != C_NULL && free_table(v3) + free_catalog!(cat) + end + end + println("✅ Overwrite deletes one file and adds one replacement file") +end + # --------------------------------------------------------------------------- # overwrite with no deletes (add-only via Overwrite snapshot kind) # --------------------------------------------------------------------------- @@ -158,7 +219,7 @@ end write(w, (id=Int64[1], value=[1.0])) end - updated = with_transaction(table, cat) do tx + updated = RustyIceberg.with_transaction(table, cat) do tx with_overwrite(tx) do action add_data_files(action, new_files) end @@ -196,7 +257,7 @@ end files2 = RustyIceberg.with_data_file_writer(v1; prefix="r1") do w write(w, (id=Int64[10,11], value=[10.0,11.0])) end - v2 = with_transaction(v1, cat) do tx + v2 = RustyIceberg.with_transaction(v1, cat) do tx with_overwrite(tx) do action add_data_files(action, files2) delete_data_files(action, old1) @@ -209,7 +270,7 @@ end files3 = RustyIceberg.with_data_file_writer(v2; prefix="r2") do w write(w, (id=Int64[99], value=[99.0])) end - v3 = with_transaction(v2, cat) do tx + v3 = RustyIceberg.with_transaction(v2, cat) do tx with_overwrite(tx) do action add_data_files(action, files3) delete_data_files(action, old2) @@ -237,7 +298,7 @@ end @testset "OverwriteAction error handling" begin mktempdir() do warehouse cat = catalog_create_memory(warehouse) - table = C_NULL + table = C_NULL; updated = C_NULL try create_namespace(cat, ["ns"]) table = create_table(cat, ["ns"], "t", _ow_schema()) @@ -258,15 +319,17 @@ end free_overwrite_action!(action2) println("✅ add/delete_data_files with null DataFiles throw") - # apply on a consumed transaction must throw - files1 = RustyIceberg.with_data_file_writer(table) do w + # apply on a transaction consumed by commit must throw + files1 = RustyIceberg.with_data_file_writer(table) do w write(w, (id=Int64[1], value=[1.0])) end action3 = RustyIceberg.OverwriteAction() tx2 = RustyIceberg.Transaction(table) add_data_files(action3, files1) - apply(action3, tx2) # consumes tx2 + apply(action3, tx2) free_overwrite_action!(action3) + # now commit tx2 — this consumes the inner Transaction + updated = commit(tx2, cat) action4 = RustyIceberg.OverwriteAction() files2 = RustyIceberg.with_data_file_writer(table; prefix="e2") do w @@ -276,9 +339,10 @@ end @test_throws RustyIceberg.IcebergException apply(action4, tx2) free_overwrite_action!(action4) free_transaction!(tx2) - println("✅ apply on consumed transaction throws") + println("✅ apply on committed (consumed) transaction throws") finally - table != C_NULL && free_table(table) + table != C_NULL && free_table(table) + updated != C_NULL && free_table(updated) free_catalog!(cat) end end From d59c1dd03746ec6326e6a7b892d4341c12eeda04 Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Thu, 28 May 2026 08:30:38 +0200 Subject: [PATCH 07/18] test: rewrite partial overwrite test to use snapshot-based file selection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of awkwardly re-adding "kept" rows, list_data_files on an earlier snapshot (v1) gives just the first file. The overwrite deletes only those files; the second file (appended in v2) is not in the delete list and survives intact — directly testing the expected semantics. Co-Authored-By: Claude Sonnet 4.6 --- test/overwrite_tests.jl | 43 ++++++++++++++++------------------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/test/overwrite_tests.jl b/test/overwrite_tests.jl index 70c0655..596bef5 100644 --- a/test/overwrite_tests.jl +++ b/test/overwrite_tests.jl @@ -143,10 +143,10 @@ end end # --------------------------------------------------------------------------- -# partial overwrite — delete one file, add one file +# partial overwrite — delete only the first file, second file survives intact # --------------------------------------------------------------------------- -@testset "Overwrite deletes one file and adds one file" begin +@testset "Overwrite only deletes explicitly listed files" begin mktempdir() do warehouse cat = catalog_create_memory(warehouse) table = C_NULL; v1 = C_NULL; v2 = C_NULL; v3 = C_NULL @@ -156,42 +156,33 @@ end # append two separate files v1 = _write_and_append(table, cat, - (id=Int64[1,2,3], value=[1.0,2.0,3.0]); prefix="keep") + (id=Int64[1,2,3], value=[1.0,2.0,3.0]); prefix="first") v2 = _write_and_append(v1, cat, - (id=Int64[4,5], value=[4.0,5.0]); prefix="replace") + (id=Int64[4,5], value=[4.0,5.0]); prefix="second") @test length(read_table_data(v2).id) == 5 - # list all files, then delete only the second one by rebuilding - # the delete list from the file written in v2 (prefix="replace") - files_to_delete = RustyIceberg.with_data_file_writer(v2; prefix="replace2") do w - write(w, (id=Int64[4,5], value=[4.0,5.0])) - end - # We can't surgically pick one file from list_data_files yet, - # so instead we write a fresh replacement file and delete nothing - # from the "keep" batch. Simulate partial delete by listing - # files from v2, deleting all, and re-appending the kept rows. - old_all = list_data_files(v2) - kept_files = RustyIceberg.with_data_file_writer(v2; prefix="kept") do w - write(w, (id=Int64[1,2,3], value=[1.0,2.0,3.0])) - end - new_replace = RustyIceberg.with_data_file_writer(v2; prefix="newreplace") do w - write(w, (id=Int64[40,50], value=[40.0,50.0])) + # list_data_files on v1 returns only the first file + files_from_v1 = list_data_files(v1) + + # replacement for the first file + new_file = RustyIceberg.with_data_file_writer(v2; prefix="new") do w + write(w, (id=Int64[10,11,12], value=[10.0,11.0,12.0])) end - free_data_files!(files_to_delete) + # overwrite: delete only the first file, add replacement + # the second file (rows 4,5) is NOT in the delete list → it survives v3 = RustyIceberg.with_transaction(v2, cat) do tx with_overwrite(tx) do action - add_data_files(action, kept_files) - add_data_files(action, new_replace) - delete_data_files(action, old_all) + add_data_files(action, new_file) + delete_data_files(action, files_from_v1) end end after = read_table_data(v3) @test !isnothing(after) - @test length(after.id) == 5 - @test sort(after.id) == [1, 2, 3, 40, 50] + @test length(after.id) == 5 # 2 surviving + 3 new + @test sort(after.id) == [4, 5, 10, 11, 12] finally table != C_NULL && free_table(table) v1 != C_NULL && free_table(v1) @@ -200,7 +191,7 @@ end free_catalog!(cat) end end - println("✅ Overwrite deletes one file and adds one replacement file") + println("✅ Overwrite only deletes explicitly listed files; others survive") end # --------------------------------------------------------------------------- From 935409e8464ea83fed6b589b470ff662f71d4a66 Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Thu, 28 May 2026 08:33:24 +0200 Subject: [PATCH 08/18] fix(julia): use parse_and_throw in OverwriteAction FFI wrappers The previous code used bare IcebergException(UNEXPECTED, ...) which references an undefined constant, causing UndefVarError at runtime. Switch to parse_and_throw (same pattern as FastAppendAction) which extracts the error code from the Rust-encoded message string, and use Ref{Ptr{Cchar}} (not Ptr{Ptr{Cchar}}) to match the calling convention. Co-Authored-By: Claude Sonnet 4.6 --- src/transaction.jl | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/src/transaction.jl b/src/transaction.jl index d1e2a44..8752b7e 100644 --- a/src/transaction.jl +++ b/src/transaction.jl @@ -401,15 +401,14 @@ function add_data_files(action::OverwriteAction, data_files::DataFiles) if data_files.ptr == C_NULL throw(IcebergException(STATE_RESOURCE_FREED, "Resource has been freed", "DataFiles has been freed or consumed")) end - err_ptr = Ref{Ptr{Cchar}}(C_NULL) - ret = @ccall rust_lib.iceberg_overwrite_action_add_data_files( + error_message_ptr = Ref{Ptr{Cchar}}(C_NULL) + result = @ccall rust_lib.iceberg_overwrite_action_add_data_files( action.ptr::Ptr{Cvoid}, data_files.ptr::Ptr{Cvoid}, - err_ptr::Ptr{Ptr{Cchar}} + error_message_ptr::Ref{Ptr{Cchar}} )::Cint - if ret != 0 - msg = err_ptr[] != C_NULL ? unsafe_string(err_ptr[]) : "unknown error" - throw(IcebergException(UNEXPECTED, "Failed to add data files to overwrite action", msg)) + if result != 0 + parse_and_throw(error_message_ptr[], "overwrite add_data_files") end data_files.ptr = C_NULL return nothing @@ -428,15 +427,14 @@ function delete_data_files(action::OverwriteAction, data_files::DataFiles) if data_files.ptr == C_NULL throw(IcebergException(STATE_RESOURCE_FREED, "Resource has been freed", "DataFiles has been freed or consumed")) end - err_ptr = Ref{Ptr{Cchar}}(C_NULL) - ret = @ccall rust_lib.iceberg_overwrite_action_delete_data_files( + error_message_ptr = Ref{Ptr{Cchar}}(C_NULL) + result = @ccall rust_lib.iceberg_overwrite_action_delete_data_files( action.ptr::Ptr{Cvoid}, data_files.ptr::Ptr{Cvoid}, - err_ptr::Ptr{Ptr{Cchar}} + error_message_ptr::Ref{Ptr{Cchar}} )::Cint - if ret != 0 - msg = err_ptr[] != C_NULL ? unsafe_string(err_ptr[]) : "unknown error" - throw(IcebergException(UNEXPECTED, "Failed to add delete files to overwrite action", msg)) + if result != 0 + parse_and_throw(error_message_ptr[], "overwrite delete_data_files") end data_files.ptr = C_NULL return nothing @@ -454,15 +452,14 @@ function apply(action::OverwriteAction, tx::Transaction) if tx.ptr == C_NULL throw(IcebergException(STATE_TRANSACTION_CONSUMED, "Transaction has already been committed or rolled back", "Transaction has been freed or consumed")) end - err_ptr = Ref{Ptr{Cchar}}(C_NULL) - ret = @ccall rust_lib.iceberg_overwrite_action_apply( + error_message_ptr = Ref{Ptr{Cchar}}(C_NULL) + result = @ccall rust_lib.iceberg_overwrite_action_apply( action.ptr::Ptr{Cvoid}, tx.ptr::Ptr{Cvoid}, - err_ptr::Ptr{Ptr{Cchar}} + error_message_ptr::Ref{Ptr{Cchar}} )::Cint - if ret != 0 - msg = err_ptr[] != C_NULL ? unsafe_string(err_ptr[]) : "unknown error" - throw(IcebergException(UNEXPECTED, "Failed to apply overwrite action", msg)) + if result != 0 + parse_and_throw(error_message_ptr[], "overwrite apply") end return nothing end From 02750566372459ad391e1b1dc53e4017f4e1b288 Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Thu, 28 May 2026 08:39:31 +0200 Subject: [PATCH 09/18] fix clippy, cargo fmt, bump FFI version to 0.8.2, add fast-append-after-overwrite test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add #[derive(Default)] to IcebergOverwriteAction to satisfy clippy - cargo fmt reformatting of transaction.rs and incremental_pipeline.rs - Bump crate version 0.8.1 → 0.8.2 - Add testset: fast append after full overwrite clears table then re-populates it Co-Authored-By: Claude Sonnet 4.6 --- iceberg_rust_ffi/Cargo.lock | 2 +- iceberg_rust_ffi/Cargo.toml | 2 +- iceberg_rust_ffi/src/incremental_pipeline.rs | 29 +++++------ iceberg_rust_ffi/src/transaction.rs | 28 +++++++---- test/overwrite_tests.jl | 52 ++++++++++++++++++++ 5 files changed, 87 insertions(+), 26 deletions(-) diff --git a/iceberg_rust_ffi/Cargo.lock b/iceberg_rust_ffi/Cargo.lock index d25f2db..1e60b77 100644 --- a/iceberg_rust_ffi/Cargo.lock +++ b/iceberg_rust_ffi/Cargo.lock @@ -1641,7 +1641,7 @@ dependencies = [ [[package]] name = "iceberg_rust_ffi" -version = "0.8.1" +version = "0.8.2" dependencies = [ "anyhow", "arrow-array", diff --git a/iceberg_rust_ffi/Cargo.toml b/iceberg_rust_ffi/Cargo.toml index dfd8a86..b21c455 100644 --- a/iceberg_rust_ffi/Cargo.toml +++ b/iceberg_rust_ffi/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "iceberg_rust_ffi" -version = "0.8.1" +version = "0.8.2" edition = "2021" [lib] diff --git a/iceberg_rust_ffi/src/incremental_pipeline.rs b/iceberg_rust_ffi/src/incremental_pipeline.rs index 09229da..283909b 100644 --- a/iceberg_rust_ffi/src/incremental_pipeline.rs +++ b/iceberg_rust_ffi/src/incremental_pipeline.rs @@ -35,11 +35,10 @@ fn read_one_append_file( ) -> iceberg::Result { let append = futures::stream::once(async { Ok(task) }).boxed(); let delete = futures::stream::empty::>().boxed(); - let UnzippedIncrementalScanResult { appends, .. } = - StreamsInto::::stream( - (append, delete), - reader, - )?; + let UnzippedIncrementalScanResult { appends, .. } = StreamsInto::< + ArrowReader, + UnzippedIncrementalScanResult, + >::stream((append, delete), reader)?; Ok(appends) } @@ -81,15 +80,17 @@ pub async fn create_incremental_nested_pipeline( // Delete stream: StreamsInto with empty append stream routes all delete // tasks through the iceberg reader machinery. - let UnzippedIncrementalScanResult { deletes: delete_arrow, .. } = - StreamsInto::::stream( - ( - futures::stream::empty::>().boxed(), - delete_tasks, - ), - build_reader(file_io, batch_size), - ) - .map_err(|e| anyhow::anyhow!("Failed to create delete stream: {e}"))?; + let UnzippedIncrementalScanResult { + deletes: delete_arrow, + .. + } = StreamsInto::::stream( + ( + futures::stream::empty::>().boxed(), + delete_tasks, + ), + build_reader(file_io, batch_size), + ) + .map_err(|e| anyhow::anyhow!("Failed to create delete stream: {e}"))?; let delete_stream = IcebergArrowStream { stream: AsyncMutex::new(crate::transform_stream_with_parallel_serialization( diff --git a/iceberg_rust_ffi/src/transaction.rs b/iceberg_rust_ffi/src/transaction.rs index 1672b33..3ce2553 100644 --- a/iceberg_rust_ffi/src/transaction.rs +++ b/iceberg_rust_ffi/src/transaction.rs @@ -8,8 +8,8 @@ use crate::error_codes::{classified_error, classify_iceberg, IcebergErrorCode}; use crate::response::IcebergBoxedResponse; use crate::table::IcebergTable; use iceberg::spec::DataFile; -use iceberg::transaction::{ApplyTransactionAction, Transaction}; use iceberg::spec::ManifestContentType; +use iceberg::transaction::{ApplyTransactionAction, Transaction}; // FFI exports use object_store_ffi::{ @@ -85,6 +85,7 @@ impl IcebergFastAppendAction { } /// Opaque handle accumulating data files for an OverwriteAction. +#[derive(Default)] pub struct IcebergOverwriteAction { added_files: Vec, deleted_files: Vec, @@ -95,10 +96,7 @@ unsafe impl Sync for IcebergOverwriteAction {} impl IcebergOverwriteAction { pub fn new() -> Self { - IcebergOverwriteAction { - added_files: Vec::new(), - deleted_files: Vec::new(), - } + Self::default() } } @@ -365,7 +363,9 @@ pub extern "C" fn iceberg_overwrite_action_add_data_files( let set_error = |msg: &str, out: *mut *mut std::ffi::c_char| { if !out.is_null() { if let Ok(c_str) = std::ffi::CString::new(msg) { - unsafe { *out = c_str.into_raw(); } + unsafe { + *out = c_str.into_raw(); + } } } }; @@ -379,7 +379,9 @@ pub extern "C" fn iceberg_overwrite_action_add_data_files( } let action_ref = unsafe { &mut *action }; let df_ref = unsafe { &mut *data_files }; - action_ref.added_files.extend(std::mem::take(&mut df_ref.data_files)); + action_ref + .added_files + .extend(std::mem::take(&mut df_ref.data_files)); 0 } @@ -396,7 +398,9 @@ pub extern "C" fn iceberg_overwrite_action_delete_data_files( let set_error = |msg: &str, out: *mut *mut std::ffi::c_char| { if !out.is_null() { if let Ok(c_str) = std::ffi::CString::new(msg) { - unsafe { *out = c_str.into_raw(); } + unsafe { + *out = c_str.into_raw(); + } } } }; @@ -410,7 +414,9 @@ pub extern "C" fn iceberg_overwrite_action_delete_data_files( } let action_ref = unsafe { &mut *action }; let df_ref = unsafe { &mut *data_files }; - action_ref.deleted_files.extend(std::mem::take(&mut df_ref.data_files)); + action_ref + .deleted_files + .extend(std::mem::take(&mut df_ref.data_files)); 0 } @@ -428,7 +434,9 @@ pub extern "C" fn iceberg_overwrite_action_apply( let set_error = |msg: &str, out: *mut *mut std::ffi::c_char| { if !out.is_null() { if let Ok(c_str) = std::ffi::CString::new(msg) { - unsafe { *out = c_str.into_raw(); } + unsafe { + *out = c_str.into_raw(); + } } } }; diff --git a/test/overwrite_tests.jl b/test/overwrite_tests.jl index 596bef5..88f126d 100644 --- a/test/overwrite_tests.jl +++ b/test/overwrite_tests.jl @@ -282,6 +282,58 @@ end println("✅ Two sequential overwrites converge correctly") end +# --------------------------------------------------------------------------- +# fast append after full overwrite (table cleared then re-populated) +# --------------------------------------------------------------------------- + +@testset "Fast append after full overwrite" begin + mktempdir() do warehouse + cat = catalog_create_memory(warehouse) + table = C_NULL; v1 = C_NULL; v2 = C_NULL; v3 = C_NULL + try + create_namespace(cat, ["ns"]) + table = create_table(cat, ["ns"], "t", _ow_schema()) + + # seed the table with some rows + v1 = _write_and_append(table, cat, + (id=Int64[1,2,3], value=[1.0,2.0,3.0])) + @test length(read_table_data(v1).id) == 3 + + # overwrite with empty set — delete everything, add nothing + old_files = list_data_files(v1) + v2 = RustyIceberg.with_transaction(v1, cat) do tx + with_overwrite(tx) do action + delete_data_files(action, old_files) + end + end + snap2 = table_current_snapshot_id(v2) + @test !isnothing(snap2) + + # table should now be empty + data2 = read_table_data(v2) + @test length(data2.id) == 0 + + # fast append populates the table again + v3 = _write_and_append(v2, cat, + (id=Int64[10,20], value=[10.0,20.0]); prefix="post") + snap3 = table_current_snapshot_id(v3) + @test !isnothing(snap3) + @test snap3 != snap2 + + data3 = read_table_data(v3) + @test length(data3.id) == 2 + @test sort(data3.id) == [10, 20] + finally + table != C_NULL && free_table(table) + v1 != C_NULL && free_table(v1) + v2 != C_NULL && free_table(v2) + v3 != C_NULL && free_table(v3) + free_catalog!(cat) + end + end + println("✅ Fast append after full overwrite yields only the new rows") +end + # --------------------------------------------------------------------------- # error handling # --------------------------------------------------------------------------- From 4aa32ae399168421372dbb6dea8a847f43f041de Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Thu, 28 May 2026 08:43:49 +0200 Subject: [PATCH 10/18] fix memory leak: free DataFiles handle after OverwriteAction add/delete Both iceberg_overwrite_action_add/delete_data_files drain the Vec via std::mem::take but leave the IcebergDataFiles box alive. Wrap the ccall in try/finally and call free_data_files! to match the FastAppendAction pattern. Co-Authored-By: Claude Sonnet 4.6 --- src/transaction.jl | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/src/transaction.jl b/src/transaction.jl index 8752b7e..1996be8 100644 --- a/src/transaction.jl +++ b/src/transaction.jl @@ -402,15 +402,18 @@ function add_data_files(action::OverwriteAction, data_files::DataFiles) throw(IcebergException(STATE_RESOURCE_FREED, "Resource has been freed", "DataFiles has been freed or consumed")) end error_message_ptr = Ref{Ptr{Cchar}}(C_NULL) - result = @ccall rust_lib.iceberg_overwrite_action_add_data_files( - action.ptr::Ptr{Cvoid}, - data_files.ptr::Ptr{Cvoid}, - error_message_ptr::Ref{Ptr{Cchar}} - )::Cint - if result != 0 - parse_and_throw(error_message_ptr[], "overwrite add_data_files") + try + result = @ccall rust_lib.iceberg_overwrite_action_add_data_files( + action.ptr::Ptr{Cvoid}, + data_files.ptr::Ptr{Cvoid}, + error_message_ptr::Ref{Ptr{Cchar}} + )::Cint + if result != 0 + parse_and_throw(error_message_ptr[], "overwrite add_data_files") + end + finally + free_data_files!(data_files) end - data_files.ptr = C_NULL return nothing end @@ -428,15 +431,18 @@ function delete_data_files(action::OverwriteAction, data_files::DataFiles) throw(IcebergException(STATE_RESOURCE_FREED, "Resource has been freed", "DataFiles has been freed or consumed")) end error_message_ptr = Ref{Ptr{Cchar}}(C_NULL) - result = @ccall rust_lib.iceberg_overwrite_action_delete_data_files( - action.ptr::Ptr{Cvoid}, - data_files.ptr::Ptr{Cvoid}, - error_message_ptr::Ref{Ptr{Cchar}} - )::Cint - if result != 0 - parse_and_throw(error_message_ptr[], "overwrite delete_data_files") + try + result = @ccall rust_lib.iceberg_overwrite_action_delete_data_files( + action.ptr::Ptr{Cvoid}, + data_files.ptr::Ptr{Cvoid}, + error_message_ptr::Ref{Ptr{Cchar}} + )::Cint + if result != 0 + parse_and_throw(error_message_ptr[], "overwrite delete_data_files") + end + finally + free_data_files!(data_files) end - data_files.ptr = C_NULL return nothing end From 3ed58f10b38a86cb623c1b5dbb2c74067776b1ea Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Thu, 28 May 2026 09:05:15 +0200 Subject: [PATCH 11/18] bump iceberg-rust rev to 50b335f (allow delete-only overwrite) Picks up the fix that relaxes SnapshotProducer's precondition check to allow Overwrite snapshots that only delete files without adding new ones. Co-Authored-By: Claude Sonnet 4.6 --- iceberg_rust_ffi/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/iceberg_rust_ffi/Cargo.toml b/iceberg_rust_ffi/Cargo.toml index b21c455..cb3004b 100644 --- a/iceberg_rust_ffi/Cargo.toml +++ b/iceberg_rust_ffi/Cargo.toml @@ -12,8 +12,8 @@ default = ["julia"] julia = [] [dependencies] -iceberg = { git = "https://github.com/RelationalAI/iceberg-rust.git", rev = "a4a353577ad7414b065770ba970c1353325a3adb", features = ["storage-azdls"] } -iceberg-catalog-rest = { git = "https://github.com/RelationalAI/iceberg-rust.git", rev = "a4a353577ad7414b065770ba970c1353325a3adb" } +iceberg = { git = "https://github.com/RelationalAI/iceberg-rust.git", rev = "50b335f371ad66a5d9fb9cc0cf4b5906ec88c003", features = ["storage-azdls"] } +iceberg-catalog-rest = { git = "https://github.com/RelationalAI/iceberg-rust.git", rev = "50b335f371ad66a5d9fb9cc0cf4b5906ec88c003" } object_store_ffi = { git = "https://github.com/RelationalAI/object_store_ffi", rev = "79b08071c7a1642532b5891253280861eca9e44e", default-features = false } tokio = { version = "1.0", features = ["full"] } futures = "0.3" From c1e6b5438ed609b52c926850ad34c6a4c55ef861 Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Thu, 28 May 2026 09:07:23 +0200 Subject: [PATCH 12/18] Update Cargo.lock --- iceberg_rust_ffi/Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/iceberg_rust_ffi/Cargo.lock b/iceberg_rust_ffi/Cargo.lock index 1e60b77..b5b76ef 100644 --- a/iceberg_rust_ffi/Cargo.lock +++ b/iceberg_rust_ffi/Cargo.lock @@ -1566,7 +1566,7 @@ dependencies = [ [[package]] name = "iceberg" version = "0.9.0" -source = "git+https://github.com/RelationalAI/iceberg-rust.git?rev=a4a353577ad7414b065770ba970c1353325a3adb#a4a353577ad7414b065770ba970c1353325a3adb" +source = "git+https://github.com/RelationalAI/iceberg-rust.git?rev=50b335f371ad66a5d9fb9cc0cf4b5906ec88c003#50b335f371ad66a5d9fb9cc0cf4b5906ec88c003" dependencies = [ "aes-gcm", "anyhow", @@ -1623,7 +1623,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-rest" version = "0.9.0" -source = "git+https://github.com/RelationalAI/iceberg-rust.git?rev=a4a353577ad7414b065770ba970c1353325a3adb#a4a353577ad7414b065770ba970c1353325a3adb" +source = "git+https://github.com/RelationalAI/iceberg-rust.git?rev=50b335f371ad66a5d9fb9cc0cf4b5906ec88c003#50b335f371ad66a5d9fb9cc0cf4b5906ec88c003" dependencies = [ "async-trait", "chrono", From a3f11713b0a2415ec7d3effcc8513ec14e506637 Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Thu, 28 May 2026 09:08:24 +0200 Subject: [PATCH 13/18] fix: expect nothing from read_table_data after delete-only overwrite read_table_data returns nothing when there are no record batches, which is the expected state after clearing a table via delete-only overwrite. Co-Authored-By: Claude Sonnet 4.6 --- test/overwrite_tests.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/overwrite_tests.jl b/test/overwrite_tests.jl index 88f126d..30bb9ad 100644 --- a/test/overwrite_tests.jl +++ b/test/overwrite_tests.jl @@ -309,9 +309,8 @@ end snap2 = table_current_snapshot_id(v2) @test !isnothing(snap2) - # table should now be empty - data2 = read_table_data(v2) - @test length(data2.id) == 0 + # table should now be empty (read_table_data returns nothing when no batches) + @test isnothing(read_table_data(v2)) # fast append populates the table again v3 = _write_and_append(v2, cat, From f77f1da7381f3b8d7ea812faa9c4131eb0f5ea93 Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Thu, 28 May 2026 09:37:38 +0200 Subject: [PATCH 14/18] add DataFiles inspection API: length and data_file_info Rust FFI: - iceberg_data_files_len: return file count without consuming the handle - iceberg_data_files_to_json: serialize all DataFile metadata fields to a JSON array (content, file_path, file_format, record_count, file_size_in_bytes, column/value/null/nan counts, bounds, split_offsets, sort_order_id, equality_ids, first_row_id, referenced_data_file, content_offset, content_size_in_bytes) Julia: - Base.length(df::DataFiles): wraps iceberg_data_files_len - data_file_info(df::DataFiles): returns Vector{Dict{String,Any}} via JSON Tests updated to assert file counts and metadata instead of just checking for non-null handles. Co-Authored-By: Claude Sonnet 4.6 --- iceberg_rust_ffi/Cargo.lock | 1 + iceberg_rust_ffi/Cargo.toml | 1 + iceberg_rust_ffi/src/transaction.rs | 93 ++++++++++++++++++++++++++++- src/RustyIceberg.jl | 2 +- src/data_file.jl | 43 +++++++++++++ test/overwrite_tests.jl | 24 ++++++-- 6 files changed, 157 insertions(+), 7 deletions(-) diff --git a/iceberg_rust_ffi/Cargo.lock b/iceberg_rust_ffi/Cargo.lock index b5b76ef..ea33946 100644 --- a/iceberg_rust_ffi/Cargo.lock +++ b/iceberg_rust_ffi/Cargo.lock @@ -1658,6 +1658,7 @@ dependencies = [ "object_store_ffi", "once_cell", "parquet", + "serde", "serde_json", "strum 0.26.3", "tempfile", diff --git a/iceberg_rust_ffi/Cargo.toml b/iceberg_rust_ffi/Cargo.toml index cb3004b..b4f6931 100644 --- a/iceberg_rust_ffi/Cargo.toml +++ b/iceberg_rust_ffi/Cargo.toml @@ -31,6 +31,7 @@ async-trait = "0.1" # Pin home to 0.5.9 to support rustc < 1.88 # (0.5.11+ requires rustc 1.88+) home = "=0.5.9" +serde = { version = "1.0", features = ["derive"] } serde_json = "1.0.142" strum = { version = "0.26", features = ["derive"] } diff --git a/iceberg_rust_ffi/src/transaction.rs b/iceberg_rust_ffi/src/transaction.rs index 3ce2553..4f5734f 100644 --- a/iceberg_rust_ffi/src/transaction.rs +++ b/iceberg_rust_ffi/src/transaction.rs @@ -7,9 +7,9 @@ use crate::catalog::IcebergCatalog; use crate::error_codes::{classified_error, classify_iceberg, IcebergErrorCode}; use crate::response::IcebergBoxedResponse; use crate::table::IcebergTable; -use iceberg::spec::DataFile; -use iceberg::spec::ManifestContentType; +use iceberg::spec::{DataContentType, DataFile, Datum, ManifestContentType}; use iceberg::transaction::{ApplyTransactionAction, Transaction}; +use std::collections::HashMap; // FFI exports use object_store_ffi::{ @@ -126,6 +126,95 @@ pub extern "C" fn iceberg_data_files_free(data_files: *mut IcebergDataFiles) { } } +/// Return the number of data files in the handle (0 if null). +#[no_mangle] +pub extern "C" fn iceberg_data_files_len(data_files: *const IcebergDataFiles) -> usize { + if data_files.is_null() { + return 0; + } + unsafe { (*data_files).data_files.len() } +} + +/// Return a JSON array of objects describing every data file in the handle. +/// +/// Each object contains all metadata fields from the Iceberg DataFile spec: +/// content, file_path, file_format, record_count, file_size_in_bytes, +/// partition_spec_id, column_sizes, value_counts, null_value_counts, +/// nan_value_counts, lower_bounds, upper_bounds, split_offsets, +/// sort_order_id, equality_ids, first_row_id, referenced_data_file, +/// content_offset, content_size_in_bytes. +/// +/// Returns null on error or if `data_files` is null. +/// The caller must free the returned string with `iceberg_destroy_cstring`. +#[no_mangle] +pub extern "C" fn iceberg_data_files_to_json( + data_files: *const IcebergDataFiles, +) -> *mut std::ffi::c_char { + #[derive(serde::Serialize)] + struct DataFileJson<'a> { + content: &'static str, + file_path: &'a str, + file_format: String, + record_count: u64, + file_size_in_bytes: u64, + column_sizes: &'a HashMap, + value_counts: &'a HashMap, + null_value_counts: &'a HashMap, + nan_value_counts: &'a HashMap, + lower_bounds: HashMap, + upper_bounds: HashMap, + split_offsets: Option<&'a [i64]>, + sort_order_id: Option, + equality_ids: Option>, + first_row_id: Option, + referenced_data_file: Option, + content_offset: Option, + content_size_in_bytes: Option, + } + + if data_files.is_null() { + return std::ptr::null_mut(); + } + let df_ref = unsafe { &*data_files }; + + let entries: Vec = df_ref + .data_files + .iter() + .map(|f| DataFileJson { + content: match f.content_type() { + DataContentType::Data => "data", + DataContentType::PositionDeletes => "position_deletes", + DataContentType::EqualityDeletes => "equality_deletes", + }, + file_path: f.file_path(), + file_format: f.file_format().to_string(), + record_count: f.record_count(), + file_size_in_bytes: f.file_size_in_bytes(), + column_sizes: f.column_sizes(), + value_counts: f.value_counts(), + null_value_counts: f.null_value_counts(), + nan_value_counts: f.nan_value_counts(), + lower_bounds: f.lower_bounds().iter().map(|(k, v)| (*k, v)).collect(), + upper_bounds: f.upper_bounds().iter().map(|(k, v)| (*k, v)).collect(), + split_offsets: f.split_offsets(), + sort_order_id: f.sort_order_id(), + equality_ids: f.equality_ids(), + first_row_id: f.first_row_id(), + referenced_data_file: f.referenced_data_file(), + content_offset: f.content_offset(), + content_size_in_bytes: f.content_size_in_bytes(), + }) + .collect(); + + match serde_json::to_string(&entries) { + Ok(json) => match std::ffi::CString::new(json) { + Ok(c_str) => c_str.into_raw(), + Err(_) => std::ptr::null_mut(), + }, + Err(_) => std::ptr::null_mut(), + } +} + /// Free a fast append action #[no_mangle] pub extern "C" fn iceberg_fast_append_action_free(action: *mut IcebergFastAppendAction) { diff --git a/src/RustyIceberg.jl b/src/RustyIceberg.jl index 6762372..c56f4f4 100644 --- a/src/RustyIceberg.jl +++ b/src/RustyIceberg.jl @@ -45,7 +45,7 @@ export IcebergBoolean, IcebergInt, IcebergLong, IcebergFloat, IcebergDouble export IcebergDate, IcebergTime, IcebergTimestamp, IcebergTimestamptz export IcebergTimestampNs, IcebergTimestamptzNs export IcebergString, IcebergUuid, IcebergBinary, IcebergDecimal -export Transaction, DataFiles, free_transaction!, free_data_files!, commit, transaction +export Transaction, DataFiles, free_transaction!, free_data_files!, commit, transaction, data_file_info export FastAppendAction, free_fast_append_action!, add_data_files, apply, with_fast_append export OverwriteAction, free_overwrite_action!, delete_data_files, with_overwrite, list_data_files export DataFileWriter, free_writer!, close_writer, write_columns, set_encode_workers! diff --git a/src/data_file.jl b/src/data_file.jl index db1d3df..1bc2025 100644 --- a/src/data_file.jl +++ b/src/data_file.jl @@ -31,3 +31,46 @@ function free_data_files!(df::DataFiles) df.ptr = C_NULL return nothing end + +""" + Base.length(df::DataFiles) -> Int + +Return the number of data files in the handle (0 if the handle is null/freed). +""" +function Base.length(df::DataFiles) + df.ptr == C_NULL && return 0 + Int(@ccall rust_lib.iceberg_data_files_len(df.ptr::Ptr{Cvoid})::Csize_t) +end + +""" + data_file_info(df::DataFiles) -> Vector{Dict{String,Any}} + +Return a `Vector` of `Dict`s, one per data file, containing all Iceberg +`DataFile` metadata fields: + +- `content` — `"data"`, `"position_deletes"`, or `"equality_deletes"` +- `file_path` — full URI of the file +- `file_format` — `"parquet"`, `"avro"`, `"orc"`, etc. +- `record_count` — number of records in the file +- `file_size_in_bytes` — total file size +- `column_sizes`, `value_counts`, `null_value_counts`, `nan_value_counts` — per-column stats maps (keys are field-id strings) +- `lower_bounds`, `upper_bounds` — per-column min/max bounds +- `split_offsets` — row-group offsets (Parquet), or `nothing` +- `sort_order_id`, `equality_ids`, `first_row_id`, `referenced_data_file`, `content_offset`, `content_size_in_bytes` — optional fields + +Returns an empty vector if the handle is null/freed. +""" +function data_file_info(df::DataFiles) + df.ptr == C_NULL && return Dict{String,Any}[] + ptr = @ccall rust_lib.iceberg_data_files_to_json(df.ptr::Ptr{Cvoid})::Ptr{Cchar} + if ptr == C_NULL + throw(IcebergException( + INTERNAL, + "Internal error (please report this as a bug)", + "iceberg_data_files_to_json returned null", + )) + end + json_str = unsafe_string(ptr) + @ccall rust_lib.iceberg_destroy_cstring(ptr::Ptr{Cchar})::Cint + return JSON.parse(json_str) +end diff --git a/test/overwrite_tests.jl b/test/overwrite_tests.jl index 30bb9ad..98a82f4 100644 --- a/test/overwrite_tests.jl +++ b/test/overwrite_tests.jl @@ -53,7 +53,8 @@ end table = create_table(cat, ["ns"], "t", _ow_schema()) df = list_data_files(table) - @test df.ptr != C_NULL + @test length(df) == 0 + @test isempty(data_file_info(df)) free_data_files!(df) @test df.ptr == C_NULL finally @@ -61,7 +62,7 @@ end free_catalog!(cat) end end - println("✅ list_data_files on empty table returns valid empty handle") + println("✅ list_data_files on empty table returns empty handle") end @testset "list_data_files after append" begin @@ -76,7 +77,17 @@ end (id=Int64[1,2,3], value=[1.1,2.2,3.3])) listed = list_data_files(updated) - @test listed.ptr != C_NULL + @test length(listed) == 1 + + info = data_file_info(listed) + @test length(info) == 1 + f = info[1] + @test f["content"] == "data" + @test f["file_format"] == "parquet" + @test f["record_count"] == 3 + @test f["file_size_in_bytes"] > 0 + @test endswith(f["file_path"], ".parquet") + free_data_files!(listed) finally table != C_NULL && free_table(table) @@ -84,7 +95,7 @@ end free_catalog!(cat) end end - println("✅ list_data_files after append returns non-null handle") + println("✅ list_data_files after append returns correct file metadata") end # --------------------------------------------------------------------------- @@ -112,6 +123,9 @@ end @test !isnothing(snap_before) old_files = list_data_files(v2) + @test length(old_files) == 2 + @test sum(f["record_count"] for f in data_file_info(old_files)) == 5 + new_files = RustyIceberg.with_data_file_writer(v2; prefix="new") do w write(w, (id=Int64[10,20], value=[10.0,20.0])) end @@ -164,6 +178,8 @@ end # list_data_files on v1 returns only the first file files_from_v1 = list_data_files(v1) + @test length(files_from_v1) == 1 + @test data_file_info(files_from_v1)[1]["record_count"] == 3 # replacement for the first file new_file = RustyIceberg.with_data_file_writer(v2; prefix="new") do w From e6fa9b760a6624ae603bcb609dce102bc0a0d85d Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Thu, 28 May 2026 10:54:06 +0200 Subject: [PATCH 15/18] refactor: move DataFiles FFI code into dedicated data_file.rs module Co-Authored-By: Claude Sonnet 4.6 --- iceberg_rust_ffi/src/data_file.rs | 178 ++++++++++++++++++++++++++++ iceberg_rust_ffi/src/lib.rs | 9 +- iceberg_rust_ffi/src/transaction.rs | 170 +------------------------- iceberg_rust_ffi/src/writer.rs | 2 +- 4 files changed, 186 insertions(+), 173 deletions(-) create mode 100644 iceberg_rust_ffi/src/data_file.rs diff --git a/iceberg_rust_ffi/src/data_file.rs b/iceberg_rust_ffi/src/data_file.rs new file mode 100644 index 0000000..24d6276 --- /dev/null +++ b/iceberg_rust_ffi/src/data_file.rs @@ -0,0 +1,178 @@ +/// DataFiles FFI — opaque handle, inspection helpers, and table listing. +use std::collections::HashMap; +use std::ffi::c_void; + +use iceberg::spec::{DataContentType, DataFile, Datum, ManifestContentType}; +use object_store_ffi::{ + export_runtime_op, with_cancellation, CResult, NotifyGuard, ResponseGuard, RT, +}; + +use crate::error_codes::{classified_error, IcebergErrorCode}; +use crate::response::IcebergBoxedResponse; + +/// Opaque handle for a collection of data files. +/// +/// Produced by writers and by `iceberg_table_list_data_files`. Consumed +/// (files moved out) by `iceberg_fast_append_action_add_data_files`, +/// `iceberg_overwrite_action_add_data_files`, and +/// `iceberg_overwrite_action_delete_data_files`. Free with +/// `iceberg_data_files_free` after use. +pub struct IcebergDataFiles { + pub data_files: Vec, +} + +unsafe impl Send for IcebergDataFiles {} +unsafe impl Sync for IcebergDataFiles {} + +/// Type alias for async operations that return a DataFiles handle. +pub type IcebergDataFilesResponse = IcebergBoxedResponse; + +/// Free a data files handle. +#[no_mangle] +pub extern "C" fn iceberg_data_files_free(data_files: *mut IcebergDataFiles) { + if !data_files.is_null() { + unsafe { + let _ = Box::from_raw(data_files); + } + } +} + +/// Return the number of data files in the handle (0 if null). +#[no_mangle] +pub extern "C" fn iceberg_data_files_len(data_files: *const IcebergDataFiles) -> usize { + if data_files.is_null() { + return 0; + } + unsafe { (*data_files).data_files.len() } +} + +/// Return a JSON array of objects describing every data file in the handle. +/// +/// Each object contains all metadata fields from the Iceberg DataFile spec: +/// content, file_path, file_format, record_count, file_size_in_bytes, +/// column_sizes, value_counts, null_value_counts, nan_value_counts, +/// lower_bounds, upper_bounds, split_offsets, sort_order_id, equality_ids, +/// first_row_id, referenced_data_file, content_offset, content_size_in_bytes. +/// +/// Returns null on error or if `data_files` is null. +/// The caller must free the returned string with `iceberg_destroy_cstring`. +#[no_mangle] +pub extern "C" fn iceberg_data_files_to_json( + data_files: *const IcebergDataFiles, +) -> *mut std::ffi::c_char { + #[derive(serde::Serialize)] + struct DataFileJson<'a> { + content: &'static str, + file_path: &'a str, + file_format: String, + record_count: u64, + file_size_in_bytes: u64, + column_sizes: &'a HashMap, + value_counts: &'a HashMap, + null_value_counts: &'a HashMap, + nan_value_counts: &'a HashMap, + lower_bounds: HashMap, + upper_bounds: HashMap, + split_offsets: Option<&'a [i64]>, + sort_order_id: Option, + equality_ids: Option>, + first_row_id: Option, + referenced_data_file: Option, + content_offset: Option, + content_size_in_bytes: Option, + } + + if data_files.is_null() { + return std::ptr::null_mut(); + } + let df_ref = unsafe { &*data_files }; + + let entries: Vec = df_ref + .data_files + .iter() + .map(|f| DataFileJson { + content: match f.content_type() { + DataContentType::Data => "data", + DataContentType::PositionDeletes => "position_deletes", + DataContentType::EqualityDeletes => "equality_deletes", + }, + file_path: f.file_path(), + file_format: f.file_format().to_string(), + record_count: f.record_count(), + file_size_in_bytes: f.file_size_in_bytes(), + column_sizes: f.column_sizes(), + value_counts: f.value_counts(), + null_value_counts: f.null_value_counts(), + nan_value_counts: f.nan_value_counts(), + lower_bounds: f.lower_bounds().iter().map(|(k, v)| (*k, v)).collect(), + upper_bounds: f.upper_bounds().iter().map(|(k, v)| (*k, v)).collect(), + split_offsets: f.split_offsets(), + sort_order_id: f.sort_order_id(), + equality_ids: f.equality_ids(), + first_row_id: f.first_row_id(), + referenced_data_file: f.referenced_data_file(), + content_offset: f.content_offset(), + content_size_in_bytes: f.content_size_in_bytes(), + }) + .collect(); + + match serde_json::to_string(&entries) { + Ok(json) => match std::ffi::CString::new(json) { + Ok(c_str) => c_str.into_raw(), + Err(_) => std::ptr::null_mut(), + }, + Err(_) => std::ptr::null_mut(), + } +} + +export_runtime_op!( + iceberg_table_list_data_files, + crate::IcebergDataFilesResponse, + || { + if table.is_null() { + return Err(classified_error( + IcebergErrorCode::STATE_RESOURCE_FREED, + "Resource has been freed", + "Null table pointer provided", + )); + } + let table_ref = unsafe { &*table }; + Ok(table_ref) + }, + table_ref, + async { + let table = &table_ref.table; + let Some(snapshot) = table.metadata().current_snapshot() else { + return Ok::(IcebergDataFiles { + data_files: vec![], + }); + }; + + let manifest_list = snapshot + .load_manifest_list(table.file_io(), &table.metadata_ref()) + .await + .map_err(|e| anyhow::anyhow!("Failed to load manifest list: {e}"))?; + + let mut data_files = Vec::new(); + for manifest_entry in manifest_list.entries() { + if manifest_entry.content != ManifestContentType::Data { + continue; + } + if !manifest_entry.has_added_files() && !manifest_entry.has_existing_files() { + continue; + } + let manifest = manifest_entry + .load_manifest(table.file_io()) + .await + .map_err(|e| anyhow::anyhow!("Failed to load manifest: {e}"))?; + for entry in manifest.entries() { + if entry.is_alive() { + data_files.push(entry.data_file().clone()); + } + } + } + + Ok::(IcebergDataFiles { data_files }) + }, + table: *mut crate::table::IcebergTable +); diff --git a/iceberg_rust_ffi/src/lib.rs b/iceberg_rust_ffi/src/lib.rs index 7e78a1b..9eda8c9 100644 --- a/iceberg_rust_ffi/src/lib.rs +++ b/iceberg_rust_ffi/src/lib.rs @@ -34,6 +34,9 @@ mod table; // Transaction module mod transaction; +// DataFiles handle, inspection helpers, and table listing +mod data_file; + // Writer module mod writer; @@ -65,6 +68,7 @@ pub use error_codes::{classified_error, classify, ClassifiedError}; // Re-export types and functions from submodules pub use catalog::{IcebergBoolResponse, IcebergCatalog, IcebergCatalogResponse}; +pub use data_file::{IcebergDataFiles, IcebergDataFilesResponse}; pub use full::IcebergScan; pub use incremental::{IcebergIncrementalScan, IcebergUnzippedStreamsResponse}; pub use response::{ @@ -76,10 +80,7 @@ pub use table::{ IcebergFileScan, IcebergFileScanResponse, IcebergFileScanStream, IcebergFileScanStreamResponse, IcebergTable, IcebergTableResponse, }; -pub use transaction::{ - IcebergDataFiles, IcebergDataFilesResponse, IcebergOverwriteAction, IcebergTransaction, - IcebergTransactionResponse, -}; +pub use transaction::{IcebergOverwriteAction, IcebergTransaction, IcebergTransactionResponse}; pub use writer::{ IcebergDataFileWriter, IcebergDataFileWriterResponse, IcebergWriterCloseResponse, }; diff --git a/iceberg_rust_ffi/src/transaction.rs b/iceberg_rust_ffi/src/transaction.rs index 4f5734f..4efa95e 100644 --- a/iceberg_rust_ffi/src/transaction.rs +++ b/iceberg_rust_ffi/src/transaction.rs @@ -7,9 +7,9 @@ use crate::catalog::IcebergCatalog; use crate::error_codes::{classified_error, classify_iceberg, IcebergErrorCode}; use crate::response::IcebergBoxedResponse; use crate::table::IcebergTable; -use iceberg::spec::{DataContentType, DataFile, Datum, ManifestContentType}; +use crate::IcebergDataFiles; +use iceberg::spec::DataFile; use iceberg::transaction::{ApplyTransactionAction, Transaction}; -use std::collections::HashMap; // FFI exports use object_store_ffi::{ @@ -45,15 +45,6 @@ impl IcebergTransaction { } } -/// Opaque handle for data files produced by a writer -/// This holds the Vec that results from closing a writer -pub struct IcebergDataFiles { - pub data_files: Vec, -} - -unsafe impl Send for IcebergDataFiles {} -unsafe impl Sync for IcebergDataFiles {} - /// Opaque handle for accumulating data files for a FastAppendAction /// /// Since iceberg-rust's FastAppendAction is not publicly exported, we store @@ -103,9 +94,6 @@ impl IcebergOverwriteAction { /// Type alias for transaction response pub type IcebergTransactionResponse = IcebergBoxedResponse; -/// Type alias for data files list response -pub type IcebergDataFilesResponse = IcebergBoxedResponse; - /// Free a transaction #[no_mangle] pub extern "C" fn iceberg_transaction_free(transaction: *mut IcebergTransaction) { @@ -116,105 +104,6 @@ pub extern "C" fn iceberg_transaction_free(transaction: *mut IcebergTransaction) } } -/// Free data files handle -#[no_mangle] -pub extern "C" fn iceberg_data_files_free(data_files: *mut IcebergDataFiles) { - if !data_files.is_null() { - unsafe { - let _ = Box::from_raw(data_files); - } - } -} - -/// Return the number of data files in the handle (0 if null). -#[no_mangle] -pub extern "C" fn iceberg_data_files_len(data_files: *const IcebergDataFiles) -> usize { - if data_files.is_null() { - return 0; - } - unsafe { (*data_files).data_files.len() } -} - -/// Return a JSON array of objects describing every data file in the handle. -/// -/// Each object contains all metadata fields from the Iceberg DataFile spec: -/// content, file_path, file_format, record_count, file_size_in_bytes, -/// partition_spec_id, column_sizes, value_counts, null_value_counts, -/// nan_value_counts, lower_bounds, upper_bounds, split_offsets, -/// sort_order_id, equality_ids, first_row_id, referenced_data_file, -/// content_offset, content_size_in_bytes. -/// -/// Returns null on error or if `data_files` is null. -/// The caller must free the returned string with `iceberg_destroy_cstring`. -#[no_mangle] -pub extern "C" fn iceberg_data_files_to_json( - data_files: *const IcebergDataFiles, -) -> *mut std::ffi::c_char { - #[derive(serde::Serialize)] - struct DataFileJson<'a> { - content: &'static str, - file_path: &'a str, - file_format: String, - record_count: u64, - file_size_in_bytes: u64, - column_sizes: &'a HashMap, - value_counts: &'a HashMap, - null_value_counts: &'a HashMap, - nan_value_counts: &'a HashMap, - lower_bounds: HashMap, - upper_bounds: HashMap, - split_offsets: Option<&'a [i64]>, - sort_order_id: Option, - equality_ids: Option>, - first_row_id: Option, - referenced_data_file: Option, - content_offset: Option, - content_size_in_bytes: Option, - } - - if data_files.is_null() { - return std::ptr::null_mut(); - } - let df_ref = unsafe { &*data_files }; - - let entries: Vec = df_ref - .data_files - .iter() - .map(|f| DataFileJson { - content: match f.content_type() { - DataContentType::Data => "data", - DataContentType::PositionDeletes => "position_deletes", - DataContentType::EqualityDeletes => "equality_deletes", - }, - file_path: f.file_path(), - file_format: f.file_format().to_string(), - record_count: f.record_count(), - file_size_in_bytes: f.file_size_in_bytes(), - column_sizes: f.column_sizes(), - value_counts: f.value_counts(), - null_value_counts: f.null_value_counts(), - nan_value_counts: f.nan_value_counts(), - lower_bounds: f.lower_bounds().iter().map(|(k, v)| (*k, v)).collect(), - upper_bounds: f.upper_bounds().iter().map(|(k, v)| (*k, v)).collect(), - split_offsets: f.split_offsets(), - sort_order_id: f.sort_order_id(), - equality_ids: f.equality_ids(), - first_row_id: f.first_row_id(), - referenced_data_file: f.referenced_data_file(), - content_offset: f.content_offset(), - content_size_in_bytes: f.content_size_in_bytes(), - }) - .collect(); - - match serde_json::to_string(&entries) { - Ok(json) => match std::ffi::CString::new(json) { - Ok(c_str) => c_str.into_raw(), - Err(_) => std::ptr::null_mut(), - }, - Err(_) => std::ptr::null_mut(), - } -} - /// Free a fast append action #[no_mangle] pub extern "C" fn iceberg_fast_append_action_free(action: *mut IcebergFastAppendAction) { @@ -577,58 +466,3 @@ pub extern "C" fn iceberg_overwrite_action_apply( } } } - -// List all live data files in the current snapshot of a table. -// Returns an IcebergDataFiles handle (free with iceberg_data_files_free). -// Returns an empty list if the table has no snapshot. -export_runtime_op!( - iceberg_table_list_data_files, - crate::IcebergDataFilesResponse, - || { - if table.is_null() { - return Err(classified_error( - IcebergErrorCode::STATE_RESOURCE_FREED, - "Resource has been freed", - "Null table pointer provided", - )); - } - let table_ref = unsafe { &*table }; - Ok(table_ref) - }, - table_ref, - async { - let table = &table_ref.table; - let Some(snapshot) = table.metadata().current_snapshot() else { - return Ok::(IcebergDataFiles { - data_files: vec![], - }); - }; - - let manifest_list = snapshot - .load_manifest_list(table.file_io(), &table.metadata_ref()) - .await - .map_err(|e| anyhow::anyhow!("Failed to load manifest list: {e}"))?; - - let mut data_files = Vec::new(); - for manifest_entry in manifest_list.entries() { - if manifest_entry.content != ManifestContentType::Data { - continue; - } - if !manifest_entry.has_added_files() && !manifest_entry.has_existing_files() { - continue; - } - let manifest = manifest_entry - .load_manifest(table.file_io()) - .await - .map_err(|e| anyhow::anyhow!("Failed to load manifest: {e}"))?; - for entry in manifest.entries() { - if entry.is_alive() { - data_files.push(entry.data_file().clone()); - } - } - } - - Ok::(IcebergDataFiles { data_files }) - }, - table: *mut crate::table::IcebergTable -); diff --git a/iceberg_rust_ffi/src/writer.rs b/iceberg_rust_ffi/src/writer.rs index 27db184..8f2ec19 100644 --- a/iceberg_rust_ffi/src/writer.rs +++ b/iceberg_rust_ffi/src/writer.rs @@ -95,11 +95,11 @@ impl ParquetWriterPropertiesFFI { use crate::error_codes::{classified_error, classify, IcebergErrorCode}; use crate::response::IcebergBoxedResponse; use crate::table::IcebergTable; -use crate::transaction::IcebergDataFiles; use crate::util::parse_c_string; use crate::writer_columns::{ build_arrow_array_gathered, ColumnDescriptor, GatheredColumnDescriptor, SliceRef, }; +use crate::IcebergDataFiles; use object_store_ffi::{ export_runtime_op, with_cancellation, CResult, NotifyGuard, ResponseGuard, RT, }; From 7dd35c1ed5bc992d379b84a5fece1bc6195bade7 Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Thu, 28 May 2026 16:10:12 +0200 Subject: [PATCH 16/18] bump iceberg-rust rev to e173637c (merged PR #76) Co-Authored-By: Claude Sonnet 4.6 --- iceberg_rust_ffi/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/iceberg_rust_ffi/Cargo.toml b/iceberg_rust_ffi/Cargo.toml index b4f6931..89014c7 100644 --- a/iceberg_rust_ffi/Cargo.toml +++ b/iceberg_rust_ffi/Cargo.toml @@ -12,8 +12,8 @@ default = ["julia"] julia = [] [dependencies] -iceberg = { git = "https://github.com/RelationalAI/iceberg-rust.git", rev = "50b335f371ad66a5d9fb9cc0cf4b5906ec88c003", features = ["storage-azdls"] } -iceberg-catalog-rest = { git = "https://github.com/RelationalAI/iceberg-rust.git", rev = "50b335f371ad66a5d9fb9cc0cf4b5906ec88c003" } +iceberg = { git = "https://github.com/RelationalAI/iceberg-rust.git", rev = "e173637c", features = ["storage-azdls"] } +iceberg-catalog-rest = { git = "https://github.com/RelationalAI/iceberg-rust.git", rev = "e173637c" } object_store_ffi = { git = "https://github.com/RelationalAI/object_store_ffi", rev = "79b08071c7a1642532b5891253280861eca9e44e", default-features = false } tokio = { version = "1.0", features = ["full"] } futures = "0.3" From 2bab9e3b4182e44b5dbca0c0611452134b9d13eb Mon Sep 17 00:00:00 2001 From: Gerald Berger Date: Thu, 28 May 2026 16:36:04 +0200 Subject: [PATCH 17/18] bump iceberg-rust rev to 27f68f7e (overwrite correctness fixes PR #77) Co-Authored-By: Claude Sonnet 4.6 --- iceberg_rust_ffi/Cargo.lock | 4 ++-- iceberg_rust_ffi/Cargo.toml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/iceberg_rust_ffi/Cargo.lock b/iceberg_rust_ffi/Cargo.lock index ea33946..9e11de0 100644 --- a/iceberg_rust_ffi/Cargo.lock +++ b/iceberg_rust_ffi/Cargo.lock @@ -1566,7 +1566,7 @@ dependencies = [ [[package]] name = "iceberg" version = "0.9.0" -source = "git+https://github.com/RelationalAI/iceberg-rust.git?rev=50b335f371ad66a5d9fb9cc0cf4b5906ec88c003#50b335f371ad66a5d9fb9cc0cf4b5906ec88c003" +source = "git+https://github.com/RelationalAI/iceberg-rust.git?rev=27f68f7ea4ac4a3e51bf8c7f644648ccca235170#27f68f7ea4ac4a3e51bf8c7f644648ccca235170" dependencies = [ "aes-gcm", "anyhow", @@ -1623,7 +1623,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-rest" version = "0.9.0" -source = "git+https://github.com/RelationalAI/iceberg-rust.git?rev=50b335f371ad66a5d9fb9cc0cf4b5906ec88c003#50b335f371ad66a5d9fb9cc0cf4b5906ec88c003" +source = "git+https://github.com/RelationalAI/iceberg-rust.git?rev=27f68f7ea4ac4a3e51bf8c7f644648ccca235170#27f68f7ea4ac4a3e51bf8c7f644648ccca235170" dependencies = [ "async-trait", "chrono", diff --git a/iceberg_rust_ffi/Cargo.toml b/iceberg_rust_ffi/Cargo.toml index 89014c7..6c21bae 100644 --- a/iceberg_rust_ffi/Cargo.toml +++ b/iceberg_rust_ffi/Cargo.toml @@ -12,8 +12,8 @@ default = ["julia"] julia = [] [dependencies] -iceberg = { git = "https://github.com/RelationalAI/iceberg-rust.git", rev = "e173637c", features = ["storage-azdls"] } -iceberg-catalog-rest = { git = "https://github.com/RelationalAI/iceberg-rust.git", rev = "e173637c" } +iceberg = { git = "https://github.com/RelationalAI/iceberg-rust.git", rev = "27f68f7ea4ac4a3e51bf8c7f644648ccca235170", features = ["storage-azdls"] } +iceberg-catalog-rest = { git = "https://github.com/RelationalAI/iceberg-rust.git", rev = "27f68f7ea4ac4a3e51bf8c7f644648ccca235170" } object_store_ffi = { git = "https://github.com/RelationalAI/object_store_ffi", rev = "79b08071c7a1642532b5891253280861eca9e44e", default-features = false } tokio = { version = "1.0", features = ["full"] } futures = "0.3" From 31902ceb29c5d7f67f46f22fbffa2105e1c75eaf Mon Sep 17 00:00:00 2001 From: Gerald Berger <59661379+gbrgr@users.noreply.github.com> Date: Fri, 29 May 2026 10:58:05 +0200 Subject: [PATCH 18/18] Update src/data_file.jl Co-authored-by: Richard Gankema --- src/data_file.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data_file.jl b/src/data_file.jl index 1bc2025..77002e9 100644 --- a/src/data_file.jl +++ b/src/data_file.jl @@ -39,7 +39,7 @@ Return the number of data files in the handle (0 if the handle is null/freed). """ function Base.length(df::DataFiles) df.ptr == C_NULL && return 0 - Int(@ccall rust_lib.iceberg_data_files_len(df.ptr::Ptr{Cvoid})::Csize_t) + return Int(@ccall rust_lib.iceberg_data_files_len(df.ptr::Ptr{Cvoid})::Csize_t) end """